From b8b5a483b880b09c5fd4e9b23e92fda2985b685b Mon Sep 17 00:00:00 2001 From: Axel Huebl Date: Thu, 17 Feb 2022 12:19:27 -0800 Subject: [PATCH 1/2] [Draft] Fix MR Reads Start to fix MR reads on higher levels, where we have declared but undefined regions and need to filter by valid boxes. --- .../data_reader/io_reader/utilities.py | 49 +++++++++++++++++-- setup.py | 4 +- 2 files changed, 48 insertions(+), 5 deletions(-) diff --git a/openpmd_viewer/openpmd_timeseries/data_reader/io_reader/utilities.py b/openpmd_viewer/openpmd_timeseries/data_reader/io_reader/utilities.py index bb933287..19a727a9 100644 --- a/openpmd_viewer/openpmd_timeseries/data_reader/io_reader/utilities.py +++ b/openpmd_viewer/openpmd_timeseries/data_reader/io_reader/utilities.py @@ -11,6 +11,15 @@ import numpy as np +def chunk_to_slice(chunk): + """ + Convert an openPMD_api.ChunkInfo to np.s_ + """ + stops = [a + b for a, b in zip(chunk.offset, chunk.extent)] + indices_per_dim = zip(chunk.offset, stops) + index_tuple = map(lambda s: slice(s[0], s[1], None), indices_per_dim) + return tuple(index_tuple) + def get_data(series, record_component, i_slice=None, pos_slice=None, output_type=np.float64): """ @@ -46,8 +55,17 @@ def get_data(series, record_component, i_slice=None, pos_slice=None, if i_slice is not None and not isinstance(i_slice, list): i_slice = [i_slice] + chunks = record_component.available_chunks() + if pos_slice is None: - data = record_component[()] + # mask invalid regions with zero + data = np.zeros_like(record_component) + for chunk in chunks: + chunk_slice = chunk_to_slice(chunk) + # read only valid region + x = record_component[chunk_slice] + series.flush() + data[chunk_slice] = x else: # Get largest element of pos_slice max_pos = max(pos_slice) @@ -60,8 +78,33 @@ def get_data(series, record_component, i_slice=None, pos_slice=None, list_index[dir_index] = i_slice[count] # Convert list_index into a tuple tuple_index = tuple(list_index) - # Slice dset according to tuple_index - data = record_component[tuple_index] + print("tuple_index={}".format(tuple_index)) + + # potentially a better approach as below, since we only slice + # out hyperplanes, planes & lines: + # - allocate zero array for result, which is a hyperplane/plane/line + # - iterate over slices in tuple_index + # - reduce selected read range to "valid" range + + # initial experiment: + # full_indices can be HUGE, avoid!! + full_indices = np.indices(record_component.shape)[0] + #full_shape = full_indices.shape + #print("full_shape.shape={}".format(full_shape)) + #print("full_shape={}".format(full_shape)) + + # prepare sliced data according to tuple_index + slice_indices = full_indices[tuple_index] + slice_shape = slice_indices.shape + data = np.zeros(slice_shape, dtype=output_type) + # write now in index space between intersection of slice_indices and chunk indices + for chunk in chunks: + chunk_slice = chunk_to_slice(chunk) + chunk_indices = full_indices[chunk_slice] + intersect_indices = np.intersect1d(chunk_indices, slice_indices) + print(intersect_indices) + data[slice_indices] = record_component[intersect_indices] + #data = np.zeros_like(record_component)[tuple_index] # just avoid invalid reads for now series.flush() diff --git a/setup.py b/setup.py index 71bc1356..2f96dbaa 100644 --- a/setup.py +++ b/setup.py @@ -38,12 +38,12 @@ def run_tests(self): tests_require=['pytest', 'jupyter'], install_requires=install_requires, extras_require = { - 'all': ["ipympl", "ipywidgets", "matplotlib", "numba", "openpmd-api", "wget"], + 'all': ["ipympl", "ipywidgets", "matplotlib", "numba", "openpmd-api~=0.13.3,~=0.14.0", "wget"], 'GUI': ["ipywidgets", "ipympl", "matplotlib"], 'plot': ["matplotlib"], 'tutorials': ["ipywidgets", "ipympl", "matplotlib", "wget"], 'numba': ["numba"], - 'openpmd-api': ["openpmd-api"] + 'openpmd-api': ["openpmd-api~=0.13.3,~=0.14.0"] }, cmdclass={'test': PyTest}, platforms='any', From e2217e231805758a417c02ae970e6eea49d9a9a4 Mon Sep 17 00:00:00 2001 From: Axel Huebl Date: Thu, 17 Feb 2022 14:56:05 -0800 Subject: [PATCH 2/2] MR: Implement Chunked Slicing --- .../data_reader/io_reader/utilities.py | 88 +++++++++++-------- setup.py | 4 +- 2 files changed, 52 insertions(+), 40 deletions(-) diff --git a/openpmd_viewer/openpmd_timeseries/data_reader/io_reader/utilities.py b/openpmd_viewer/openpmd_timeseries/data_reader/io_reader/utilities.py index 19a727a9..7b21cea5 100644 --- a/openpmd_viewer/openpmd_timeseries/data_reader/io_reader/utilities.py +++ b/openpmd_viewer/openpmd_timeseries/data_reader/io_reader/utilities.py @@ -13,13 +13,14 @@ def chunk_to_slice(chunk): """ - Convert an openPMD_api.ChunkInfo to np.s_ + Convert an openPMD_api.ChunkInfo to slice """ stops = [a + b for a, b in zip(chunk.offset, chunk.extent)] indices_per_dim = zip(chunk.offset, stops) index_tuple = map(lambda s: slice(s[0], s[1], None), indices_per_dim) return tuple(index_tuple) + def get_data(series, record_component, i_slice=None, pos_slice=None, output_type=np.float64): """ @@ -67,46 +68,57 @@ def get_data(series, record_component, i_slice=None, pos_slice=None, series.flush() data[chunk_slice] = x else: - # Get largest element of pos_slice - max_pos = max(pos_slice) - # Create list of indices list_index of type - # [:, :, :, ...] where Ellipsis starts at max_pos + 1 - list_index = [np.s_[:]] * (max_pos + 2) - list_index[max_pos + 1] = np.s_[...] - # Fill list_index with elements of i_slice - for count, dir_index in enumerate(pos_slice): - list_index[dir_index] = i_slice[count] - # Convert list_index into a tuple - tuple_index = tuple(list_index) - print("tuple_index={}".format(tuple_index)) - - # potentially a better approach as below, since we only slice - # out hyperplanes, planes & lines: - # - allocate zero array for result, which is a hyperplane/plane/line - # - iterate over slices in tuple_index - # - reduce selected read range to "valid" range - - # initial experiment: - # full_indices can be HUGE, avoid!! - full_indices = np.indices(record_component.shape)[0] - #full_shape = full_indices.shape - #print("full_shape.shape={}".format(full_shape)) - #print("full_shape={}".format(full_shape)) - - # prepare sliced data according to tuple_index - slice_indices = full_indices[tuple_index] - slice_shape = slice_indices.shape + full_shape = record_component.shape + + slice_shape = list(full_shape) # copy + pos_slice_sorted = pos_slice.copy() # copy for in-place sort + pos_slice_sorted.sort(reverse=True) + for dir_index in pos_slice_sorted: # remove indices in list + del slice_shape[dir_index] + + # mask invalid regions with zero data = np.zeros(slice_shape, dtype=output_type) - # write now in index space between intersection of slice_indices and chunk indices + + # build requested ND slice with respect to full data + s = [] + for d in range(len(full_shape)): + if d in pos_slice: + s.append(i_slice[pos_slice.index(d)]) # one index in such directions + else: # all indices in other direction + s.append(slice(None, None, None)) + s = tuple(s) + + # now we check which chunks contribute to the slice for chunk in chunks: + skip_this_chunk = False + s_valid = list(s) # same as s but reduced to valid regions in chunk + s_target = [] # starts and stops in sliced array chunk_slice = chunk_to_slice(chunk) - chunk_indices = full_indices[chunk_slice] - intersect_indices = np.intersect1d(chunk_indices, slice_indices) - print(intersect_indices) - data[slice_indices] = record_component[intersect_indices] - #data = np.zeros_like(record_component)[tuple_index] # just avoid invalid reads for now - - series.flush() + # read only valid region + for d, slice_d in enumerate(s): + start = chunk_slice[d].start + stop = chunk_slice[d].stop + if isinstance(slice_d, int): + # Nothing to do for s_target (dimension sliced out) + # Nothing to do for s_valid (dimension index is set) + if slice_d < start or slice_d >= stop: + # chunk not in slice line/plane + skip_this_chunk = True + else: + if slice_d.start is None or slice_d.start < start: + s_valid[d] = slice(start, s_valid[d].stop) + if slice_d.stop is None or slice_d.stop > stop: + s_valid[d] = slice(s_valid[d].start, stop) + s_target.append(slice(start, stop)) + + s_valid = tuple(s_valid) + s_target = tuple(s_target) + + # read + if not skip_this_chunk: + x = record_component[s_valid] + series.flush() + data[s_target] = x # Convert to the right type if data.dtype != output_type: diff --git a/setup.py b/setup.py index 2f96dbaa..1da55e9d 100644 --- a/setup.py +++ b/setup.py @@ -38,12 +38,12 @@ def run_tests(self): tests_require=['pytest', 'jupyter'], install_requires=install_requires, extras_require = { - 'all': ["ipympl", "ipywidgets", "matplotlib", "numba", "openpmd-api~=0.13.3,~=0.14.0", "wget"], + 'all': ["ipympl", "ipywidgets", "matplotlib", "numba", "openpmd-api~=0.14.0", "wget"], 'GUI': ["ipywidgets", "ipympl", "matplotlib"], 'plot': ["matplotlib"], 'tutorials': ["ipywidgets", "ipympl", "matplotlib", "wget"], 'numba': ["numba"], - 'openpmd-api': ["openpmd-api~=0.13.3,~=0.14.0"] + 'openpmd-api': ["openpmd-api~=0.14.0"] }, cmdclass={'test': PyTest}, platforms='any',