From fd41324296dce8cab5973c8a0c04621027d6e6d8 Mon Sep 17 00:00:00 2001 From: Axel Huebl Date: Tue, 12 Apr 2022 14:33:50 -0700 Subject: [PATCH] Fix: Avoid `_like` function in Chunking When we prepare chunked reads, we assume a single chunk for all backends but ADIOS2. Preparing the returned data, we use `data = np.full_like(record_component, np.nan)`. It turns out that numpy seems to trigger a `__getitem__` access or full copy of our `record_component` at this point, which causes severe slowdown. This was first seen for particles, but affects every read where we do not slice a subset. Co-authored-by: AlexanderSinn --- .../openpmd_timeseries/data_reader/io_reader/utilities.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/openpmd_viewer/openpmd_timeseries/data_reader/io_reader/utilities.py b/openpmd_viewer/openpmd_timeseries/data_reader/io_reader/utilities.py index c5d8bf85..7a7360c2 100644 --- a/openpmd_viewer/openpmd_timeseries/data_reader/io_reader/utilities.py +++ b/openpmd_viewer/openpmd_timeseries/data_reader/io_reader/utilities.py @@ -56,17 +56,21 @@ def get_data(series, record_component, i_slice=None, pos_slice=None, if i_slice is not None and not isinstance(i_slice, list): i_slice = [i_slice] + # ADIOS2: Actual chunks, all other: one chunk chunks = record_component.available_chunks() + # read whole data set if pos_slice is None: # mask invalid regions with NaN - data = np.full_like(record_component, np.nan) + # note: full_like triggers a full read, thus we avoid it #340 + data = np.full(record_component.shape, np.nan, record_component.dtype) for chunk in chunks: chunk_slice = chunk_to_slice(chunk) # read only valid region x = record_component[chunk_slice] series.flush() data[chunk_slice] = x + # slice: read only part of the data set else: full_shape = record_component.shape