Coverage for /home/ubuntu/Documents/Research/mut_p6/sacred/sacred/observers/tinydb_hashfs/bases.py: 0%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1import datetime as dt
2import json
3import os
4from io import BufferedReader, FileIO
5from pathlib import Path
6from typing import Tuple
8from hashfs import HashFS
9from tinydb import TinyDB
10from tinydb_serialization import Serializer, SerializationMiddleware
12import sacred.optional as opt
14# Set data type values for abstract properties in Serializers
15series_type = opt.pandas.Series if opt.has_pandas else None
16dataframe_type = opt.pandas.DataFrame if opt.has_pandas else None
17ndarray_type = opt.np.ndarray if opt.has_numpy else None
20class BufferedReaderWrapper(BufferedReader):
21 """Custom wrapper to allow for copying of file handle.
23 tinydb_serialisation currently does a deepcopy on all the content of the
24 dictionary before serialisation. By default, file handles are not
25 copiable so this wrapper is necessary to create a duplicate of the
26 file handle passes in.
28 Note that the file passed in will therefor remain open as the copy is the
29 one that gets closed.
30 """
32 def __init__(self, f_obj):
33 f_obj = FileIO(f_obj.name)
34 super().__init__(f_obj)
36 def __copy__(self):
37 f = open(self.name, self.mode)
38 return BufferedReaderWrapper(f)
40 def __deepcopy__(self, memo):
41 f = open(self.name, self.mode)
42 return BufferedReaderWrapper(f)
45class DateTimeSerializer(Serializer):
46 OBJ_CLASS = dt.datetime # The class this serializer handles
48 def encode(self, obj):
49 return obj.strftime("%Y-%m-%dT%H:%M:%S.%f")
51 def decode(self, s):
52 return dt.datetime.strptime(s, "%Y-%m-%dT%H:%M:%S.%f")
55class NdArraySerializer(Serializer):
56 OBJ_CLASS = ndarray_type
58 def encode(self, obj):
59 return json.dumps(obj.tolist(), check_circular=True)
61 def decode(self, s):
62 return opt.np.array(json.loads(s))
65class DataFrameSerializer(Serializer):
66 OBJ_CLASS = dataframe_type
68 def encode(self, obj):
69 return obj.to_json()
71 def decode(self, s):
72 return opt.pandas.read_json(s)
75class SeriesSerializer(Serializer):
76 OBJ_CLASS = series_type
78 def encode(self, obj):
79 return obj.to_json()
81 def decode(self, s):
82 return opt.pandas.read_json(s, typ="series")
85class FileSerializer(Serializer):
86 OBJ_CLASS = BufferedReaderWrapper
88 def __init__(self, fs):
89 self.fs = fs
91 def encode(self, obj):
92 address = self.fs.put(obj)
93 return json.dumps(address.id)
95 def decode(self, s):
96 id_ = json.loads(s)
97 file_reader = self.fs.open(id_)
98 file_reader = BufferedReaderWrapper(file_reader)
99 file_reader.hash = id_
100 return file_reader
103def get_db_file_manager(root_dir) -> Tuple[TinyDB, HashFS]:
104 root_dir = Path(root_dir)
105 fs = HashFS(root_dir / "hashfs", depth=3, width=2, algorithm="md5")
107 # Setup Serialisation object for non list/dict objects
108 serialization_store = SerializationMiddleware()
109 serialization_store.register_serializer(DateTimeSerializer(), "TinyDate")
110 serialization_store.register_serializer(FileSerializer(fs), "TinyFile")
112 if opt.has_numpy:
113 serialization_store.register_serializer(NdArraySerializer(), "TinyArray")
114 if opt.has_pandas:
115 serialization_store.register_serializer(DataFrameSerializer(), "TinyDataFrame")
116 serialization_store.register_serializer(SeriesSerializer(), "TinySeries")
118 db = TinyDB(os.path.join(root_dir, "metadata.json"), storage=serialization_store)
119 return db, fs