-
Notifications
You must be signed in to change notification settings - Fork 2
/
h5file.py
218 lines (175 loc) · 5.76 KB
/
h5file.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
"""
Easier access to hdf5 subgroups/datasets
by using `group.key` instead of `group['key']`.
It has been deprecated by h5attr.H5Attr, because the latter
is more efficient without creating `f['a']` when calling `f['a/b']`
Usage:
hg = H5File('data.h5')
a = hg.a
a = hg['a']
a_b = hg.a.b
a_b = hg['a/b']
a_b_arr = hg.a.b.value # using .value to load the array
# list available properties
dir(hg)
# or print
print(hg)
# attrs
a = hg.attrs.a
# non-lazy mode
hg = H5File('data.h5', lazy=False)
a_b_arr = hg.a.b # no need of .value
# add new property (original file will not be changed)
hg.b = 1
# access properties starting with non-alphabetic
a_1 = hg.a.1 # SyntaxError: invalid syntax
a_1 = hg.a['1']
a_1 = hg['a/1']
# slicing
sl = hg[slice]
sl.x == hg.x[slice]
sl.y == hg.y[slice]
# slicing only takes effect on direct dataset
sl.dataset == hg.dataset[slice]
sl.group.dataset == hg.group.dataset
# slice of slice
hg[slice1][slice2].x == hg.x[slice1][slice2]
# slice of slice is not efficient, don't use it too much.
"""
from __future__ import print_function
import os
import h5py
import numpy as np
from six import string_types
__all__ = ['H5File']
class H5Group(object):
'''Wrap of hdf5 group for quick access.
'''
def __init__(self, file, lazy=True):
"""
Parameters
----------
file : h5py.Group or file path.
lazy : bool
"""
if isinstance(file, string_types):
file = h5py.File(os.path.expanduser(file), 'r')
self.__dict__['_data_'] = file
self.__dict__['_lazy_'] = lazy
self.__dict__['_keys_'] = list(file.keys())
if hasattr(file, 'attrs') and file.attrs:
self.__dict__['_keys_'] += ['attrs']
def __dir__(self):
return self._keys_
def __str__(self):
return "file:\t{file}\nname:\t{name}\nkeys:\t{keys}".format(
file=self._data_.file.filename,
name=self._data_.name,
keys="\n\t".join(self._keys_)
)
def __getattr__(self, key):
return self[key]
def __setattr__(self, key, value):
self[key] = value
def __delattr__(self, key):
del self[key]
def __getitem__(self, key):
# slice
if not isinstance(key, string_types):
return H5Slice(self, key)
# hierarchical key
elif '/' in key:
keys = key.strip('/').split('/')
value = self
for key in keys:
value = value[key]
return value
# simple key
else:
if key not in self._keys_:
raise AttributeError("no attribute: '%s'" % key)
elif key in self.__dict__:
return self.__dict__[key]
else:
return self._load_(key)
def __setitem__(self, key, value):
if not isinstance(key, string_types):
raise TypeError("key must be a string")
elif '/' in key:
raise ValueError("key with '/' is not supported")
else:
self.__dict__[key] = value
if key not in self._keys_:
self._keys_.append(key)
def __delitem__(self, key):
if not isinstance(key, string_types):
raise TypeError("key must be a string")
elif '/' in key:
raise ValueError("key with '/' is not supported")
else:
if key not in self._keys_:
raise AttributeError("No attribute: '%s'" % key)
elif key in self.__dict__:
del self.__dict__[key]
def _load_(self, key):
if key == 'attrs':
value = H5Attrs(self._data_.attrs)
else:
value = self._data_[key]
if isinstance(value, h5py.Group):
value = H5Group(value, lazy=self._lazy_)
elif not self._lazy_ and isinstance(value, h5py.Dataset):
value = value[()]
self.__dict__[key] = value
return value
def _show_(self):
for key in self._keys_:
value = self[key]
if isinstance(value, (h5py.Dataset, np.ndarray)):
print("{}:\n\t{:>5s} {}".format(
key, value.dtype.str.strip(">|<"), value.shape)
)
else:
print("{}:\n\t{}".format(key, value))
class H5Slice(H5Group):
'''Slice of H5Group
'''
def __init__(self, group, slice):
slice = slice if isinstance(slice, tuple) else (slice,)
fancy = True
if group._lazy_:
for sl in slice:
# fancy array slice does not support lazy mode
if isinstance(sl, np.ndarray):
fancy = False
break
self.__dict__['_data_'] = group
self.__dict__['_lazy_'] = False
self.__dict__['_keys_'] = dir(group)
self.__dict__['_slice_'] = slice
self.__dict__['_fancy_'] = fancy
def __str__(self):
return "{original}\nslice:\t{slice}".format(
original=str(self._data_),
slice=self._slice_
)
def _load_(self, key):
if self._fancy_:
value = self._data_[key]
else:
value = self._data_[key].value
if isinstance(value, (h5py.Dataset, np.ndarray)) and value.shape:
sliced = value[self._slice_]
self.__dict__[key] = sliced # only cache sliced dataset
return sliced
class H5Attrs(H5Group):
'''Wrap of hdf5 attrs for quick access.
'''
def __str__(self):
return "\n".join(
"%s:\t%s" % (key, getattr(self, key)) for key in dir(self)
)
class H5File(H5Group):
'''Wrap of hdf5 file for quick access.
'''
pass