Coverage for /home/ubuntu/Documents/Research/mut_p6/sacred/sacred/observers/tinydb_hashfs/bases.py: 0%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

73 statements  

1import datetime as dt 

2import json 

3import os 

4from io import BufferedReader, FileIO 

5from pathlib import Path 

6from typing import Tuple 

7 

8from hashfs import HashFS 

9from tinydb import TinyDB 

10from tinydb_serialization import Serializer, SerializationMiddleware 

11 

12import sacred.optional as opt 

13 

14# Set data type values for abstract properties in Serializers 

15series_type = opt.pandas.Series if opt.has_pandas else None 

16dataframe_type = opt.pandas.DataFrame if opt.has_pandas else None 

17ndarray_type = opt.np.ndarray if opt.has_numpy else None 

18 

19 

20class BufferedReaderWrapper(BufferedReader): 

21 """Custom wrapper to allow for copying of file handle. 

22 

23 tinydb_serialisation currently does a deepcopy on all the content of the 

24 dictionary before serialisation. By default, file handles are not 

25 copiable so this wrapper is necessary to create a duplicate of the 

26 file handle passes in. 

27 

28 Note that the file passed in will therefor remain open as the copy is the 

29 one that gets closed. 

30 """ 

31 

32 def __init__(self, f_obj): 

33 f_obj = FileIO(f_obj.name) 

34 super().__init__(f_obj) 

35 

36 def __copy__(self): 

37 f = open(self.name, self.mode) 

38 return BufferedReaderWrapper(f) 

39 

40 def __deepcopy__(self, memo): 

41 f = open(self.name, self.mode) 

42 return BufferedReaderWrapper(f) 

43 

44 

45class DateTimeSerializer(Serializer): 

46 OBJ_CLASS = dt.datetime # The class this serializer handles 

47 

48 def encode(self, obj): 

49 return obj.strftime("%Y-%m-%dT%H:%M:%S.%f") 

50 

51 def decode(self, s): 

52 return dt.datetime.strptime(s, "%Y-%m-%dT%H:%M:%S.%f") 

53 

54 

55class NdArraySerializer(Serializer): 

56 OBJ_CLASS = ndarray_type 

57 

58 def encode(self, obj): 

59 return json.dumps(obj.tolist(), check_circular=True) 

60 

61 def decode(self, s): 

62 return opt.np.array(json.loads(s)) 

63 

64 

65class DataFrameSerializer(Serializer): 

66 OBJ_CLASS = dataframe_type 

67 

68 def encode(self, obj): 

69 return obj.to_json() 

70 

71 def decode(self, s): 

72 return opt.pandas.read_json(s) 

73 

74 

75class SeriesSerializer(Serializer): 

76 OBJ_CLASS = series_type 

77 

78 def encode(self, obj): 

79 return obj.to_json() 

80 

81 def decode(self, s): 

82 return opt.pandas.read_json(s, typ="series") 

83 

84 

85class FileSerializer(Serializer): 

86 OBJ_CLASS = BufferedReaderWrapper 

87 

88 def __init__(self, fs): 

89 self.fs = fs 

90 

91 def encode(self, obj): 

92 address = self.fs.put(obj) 

93 return json.dumps(address.id) 

94 

95 def decode(self, s): 

96 id_ = json.loads(s) 

97 file_reader = self.fs.open(id_) 

98 file_reader = BufferedReaderWrapper(file_reader) 

99 file_reader.hash = id_ 

100 return file_reader 

101 

102 

103def get_db_file_manager(root_dir) -> Tuple[TinyDB, HashFS]: 

104 root_dir = Path(root_dir) 

105 fs = HashFS(root_dir / "hashfs", depth=3, width=2, algorithm="md5") 

106 

107 # Setup Serialisation object for non list/dict objects 

108 serialization_store = SerializationMiddleware() 

109 serialization_store.register_serializer(DateTimeSerializer(), "TinyDate") 

110 serialization_store.register_serializer(FileSerializer(fs), "TinyFile") 

111 

112 if opt.has_numpy: 

113 serialization_store.register_serializer(NdArraySerializer(), "TinyArray") 

114 if opt.has_pandas: 

115 serialization_store.register_serializer(DataFrameSerializer(), "TinyDataFrame") 

116 serialization_store.register_serializer(SeriesSerializer(), "TinySeries") 

117 

118 db = TinyDB(os.path.join(root_dir, "metadata.json"), storage=serialization_store) 

119 return db, fs