From b8b5a483b880b09c5fd4e9b23e92fda2985b685b Mon Sep 17 00:00:00 2001
From: Axel Huebl <axel.huebl@plasma.ninja>
Date: Thu, 17 Feb 2022 12:19:27 -0800
Subject: [PATCH 1/2] [Draft] Fix MR Reads

Start to fix MR reads on higher levels, where we have declared
but undefined regions and need to filter by valid boxes.
---
 .../data_reader/io_reader/utilities.py        | 49 +++++++++++++++++--
 setup.py                                      |  4 +-
 2 files changed, 48 insertions(+), 5 deletions(-)

diff --git a/openpmd_viewer/openpmd_timeseries/data_reader/io_reader/utilities.py b/openpmd_viewer/openpmd_timeseries/data_reader/io_reader/utilities.py
index bb933287..19a727a9 100644
--- a/openpmd_viewer/openpmd_timeseries/data_reader/io_reader/utilities.py
+++ b/openpmd_viewer/openpmd_timeseries/data_reader/io_reader/utilities.py
@@ -11,6 +11,15 @@
 import numpy as np
 
 
+def chunk_to_slice(chunk):
+    """
+    Convert an openPMD_api.ChunkInfo to np.s_
+    """
+    stops = [a + b for a, b in zip(chunk.offset, chunk.extent)]
+    indices_per_dim = zip(chunk.offset, stops)
+    index_tuple = map(lambda s: slice(s[0], s[1], None), indices_per_dim)
+    return tuple(index_tuple)
+
 def get_data(series, record_component, i_slice=None, pos_slice=None,
              output_type=np.float64):
     """
@@ -46,8 +55,17 @@ def get_data(series, record_component, i_slice=None, pos_slice=None,
     if i_slice is not None and not isinstance(i_slice, list):
         i_slice = [i_slice]
 
+    chunks = record_component.available_chunks()
+
     if pos_slice is None:
-        data = record_component[()]
+        # mask invalid regions with zero
+        data = np.zeros_like(record_component)
+        for chunk in chunks:
+            chunk_slice = chunk_to_slice(chunk)
+            # read only valid region
+            x = record_component[chunk_slice]
+            series.flush()
+            data[chunk_slice] = x
     else:
         # Get largest element of pos_slice
         max_pos = max(pos_slice)
@@ -60,8 +78,33 @@ def get_data(series, record_component, i_slice=None, pos_slice=None,
             list_index[dir_index] = i_slice[count]
         # Convert list_index into a tuple
         tuple_index = tuple(list_index)
-        # Slice dset according to tuple_index
-        data = record_component[tuple_index]
+        print("tuple_index={}".format(tuple_index))
+
+        # potentially a better approach as below, since we only slice
+        # out hyperplanes, planes & lines:
+        # - allocate zero array for result, which is a hyperplane/plane/line
+        # - iterate over slices in tuple_index
+        # - reduce selected read range to "valid" range
+
+        # initial experiment:
+        # full_indices can be HUGE, avoid!!
+        full_indices = np.indices(record_component.shape)[0]
+        #full_shape = full_indices.shape
+        #print("full_shape.shape={}".format(full_shape))
+        #print("full_shape={}".format(full_shape))
+
+        # prepare sliced data according to tuple_index
+        slice_indices = full_indices[tuple_index]
+        slice_shape = slice_indices.shape
+        data = np.zeros(slice_shape, dtype=output_type)
+        # write now in index space between intersection of slice_indices and chunk indices
+        for chunk in chunks:
+            chunk_slice = chunk_to_slice(chunk)
+            chunk_indices = full_indices[chunk_slice]
+            intersect_indices = np.intersect1d(chunk_indices, slice_indices)
+            print(intersect_indices)
+            data[slice_indices] = record_component[intersect_indices]
+        #data = np.zeros_like(record_component)[tuple_index]  # just avoid invalid reads for now
 
     series.flush()
 
diff --git a/setup.py b/setup.py
index 71bc1356..2f96dbaa 100644
--- a/setup.py
+++ b/setup.py
@@ -38,12 +38,12 @@ def run_tests(self):
       tests_require=['pytest', 'jupyter'],
       install_requires=install_requires,
       extras_require = {
-        'all': ["ipympl", "ipywidgets", "matplotlib", "numba", "openpmd-api", "wget"],
+        'all': ["ipympl", "ipywidgets", "matplotlib", "numba", "openpmd-api~=0.13.3,~=0.14.0", "wget"],
         'GUI':  ["ipywidgets", "ipympl", "matplotlib"],
         'plot': ["matplotlib"],
         'tutorials': ["ipywidgets", "ipympl", "matplotlib", "wget"],
         'numba': ["numba"],
-        'openpmd-api': ["openpmd-api"]
+        'openpmd-api': ["openpmd-api~=0.13.3,~=0.14.0"]
         },
       cmdclass={'test': PyTest},
       platforms='any',

From e2217e231805758a417c02ae970e6eea49d9a9a4 Mon Sep 17 00:00:00 2001
From: Axel Huebl <axel.huebl@plasma.ninja>
Date: Thu, 17 Feb 2022 14:56:05 -0800
Subject: [PATCH 2/2] MR: Implement Chunked Slicing

---
 .../data_reader/io_reader/utilities.py        | 88 +++++++++++--------
 setup.py                                      |  4 +-
 2 files changed, 52 insertions(+), 40 deletions(-)

diff --git a/openpmd_viewer/openpmd_timeseries/data_reader/io_reader/utilities.py b/openpmd_viewer/openpmd_timeseries/data_reader/io_reader/utilities.py
index 19a727a9..7b21cea5 100644
--- a/openpmd_viewer/openpmd_timeseries/data_reader/io_reader/utilities.py
+++ b/openpmd_viewer/openpmd_timeseries/data_reader/io_reader/utilities.py
@@ -13,13 +13,14 @@
 
 def chunk_to_slice(chunk):
     """
-    Convert an openPMD_api.ChunkInfo to np.s_
+    Convert an openPMD_api.ChunkInfo to slice
     """
     stops = [a + b for a, b in zip(chunk.offset, chunk.extent)]
     indices_per_dim = zip(chunk.offset, stops)
     index_tuple = map(lambda s: slice(s[0], s[1], None), indices_per_dim)
     return tuple(index_tuple)
 
+
 def get_data(series, record_component, i_slice=None, pos_slice=None,
              output_type=np.float64):
     """
@@ -67,46 +68,57 @@ def get_data(series, record_component, i_slice=None, pos_slice=None,
             series.flush()
             data[chunk_slice] = x
     else:
-        # Get largest element of pos_slice
-        max_pos = max(pos_slice)
-        # Create list of indices list_index of type
-        # [:, :, :, ...] where Ellipsis starts at max_pos + 1
-        list_index = [np.s_[:]] * (max_pos + 2)
-        list_index[max_pos + 1] = np.s_[...]
-        # Fill list_index with elements of i_slice
-        for count, dir_index in enumerate(pos_slice):
-            list_index[dir_index] = i_slice[count]
-        # Convert list_index into a tuple
-        tuple_index = tuple(list_index)
-        print("tuple_index={}".format(tuple_index))
-
-        # potentially a better approach as below, since we only slice
-        # out hyperplanes, planes & lines:
-        # - allocate zero array for result, which is a hyperplane/plane/line
-        # - iterate over slices in tuple_index
-        # - reduce selected read range to "valid" range
-
-        # initial experiment:
-        # full_indices can be HUGE, avoid!!
-        full_indices = np.indices(record_component.shape)[0]
-        #full_shape = full_indices.shape
-        #print("full_shape.shape={}".format(full_shape))
-        #print("full_shape={}".format(full_shape))
-
-        # prepare sliced data according to tuple_index
-        slice_indices = full_indices[tuple_index]
-        slice_shape = slice_indices.shape
+        full_shape = record_component.shape
+
+        slice_shape = list(full_shape)      # copy
+        pos_slice_sorted = pos_slice.copy() # copy for in-place sort
+        pos_slice_sorted.sort(reverse=True)
+        for dir_index in pos_slice_sorted:  # remove indices in list
+            del slice_shape[dir_index]
+
+        # mask invalid regions with zero
         data = np.zeros(slice_shape, dtype=output_type)
-        # write now in index space between intersection of slice_indices and chunk indices
+
+        # build requested ND slice with respect to full data
+        s = []
+        for d in range(len(full_shape)):
+            if d in pos_slice:
+                s.append(i_slice[pos_slice.index(d)]) # one index in such directions
+            else: # all indices in other direction
+                s.append(slice(None, None, None))
+        s = tuple(s)
+
+        # now we check which chunks contribute to the slice
         for chunk in chunks:
+            skip_this_chunk = False
+            s_valid = list(s)  # same as s but reduced to valid regions in chunk
+            s_target = []  # starts and stops in sliced array
             chunk_slice = chunk_to_slice(chunk)
-            chunk_indices = full_indices[chunk_slice]
-            intersect_indices = np.intersect1d(chunk_indices, slice_indices)
-            print(intersect_indices)
-            data[slice_indices] = record_component[intersect_indices]
-        #data = np.zeros_like(record_component)[tuple_index]  # just avoid invalid reads for now
-
-    series.flush()
+            # read only valid region
+            for d, slice_d in enumerate(s):
+                start = chunk_slice[d].start
+                stop = chunk_slice[d].stop
+                if isinstance(slice_d, int):
+                    # Nothing to do for s_target (dimension sliced out)
+                    # Nothing to do for s_valid (dimension index is set)
+                    if slice_d < start or slice_d >= stop:
+                        # chunk not in slice line/plane
+                        skip_this_chunk = True
+                else:
+                    if slice_d.start is None or slice_d.start < start:
+                        s_valid[d] = slice(start, s_valid[d].stop)
+                    if slice_d.stop is None or slice_d.stop > stop:
+                        s_valid[d] = slice(s_valid[d].start, stop)
+                    s_target.append(slice(start, stop))
+
+            s_valid = tuple(s_valid)
+            s_target = tuple(s_target)
+
+            # read
+            if not skip_this_chunk:
+                x = record_component[s_valid]
+                series.flush()
+                data[s_target] = x
 
     # Convert to the right type
     if data.dtype != output_type:
diff --git a/setup.py b/setup.py
index 2f96dbaa..1da55e9d 100644
--- a/setup.py
+++ b/setup.py
@@ -38,12 +38,12 @@ def run_tests(self):
       tests_require=['pytest', 'jupyter'],
       install_requires=install_requires,
       extras_require = {
-        'all': ["ipympl", "ipywidgets", "matplotlib", "numba", "openpmd-api~=0.13.3,~=0.14.0", "wget"],
+        'all': ["ipympl", "ipywidgets", "matplotlib", "numba", "openpmd-api~=0.14.0", "wget"],
         'GUI':  ["ipywidgets", "ipympl", "matplotlib"],
         'plot': ["matplotlib"],
         'tutorials': ["ipywidgets", "ipympl", "matplotlib", "wget"],
         'numba': ["numba"],
-        'openpmd-api': ["openpmd-api~=0.13.3,~=0.14.0"]
+        'openpmd-api': ["openpmd-api~=0.14.0"]
         },
       cmdclass={'test': PyTest},
       platforms='any',