Skip to content

Commit

Permalink
Add a cache for the 1-dim case of NDArray.__iter__. Fixes #330.
Browse files Browse the repository at this point in the history
  • Loading branch information
FrancescAlted committed Nov 27, 2024
1 parent 3db7b94 commit 522b1e4
Showing 1 changed file with 19 additions and 3 deletions.
22 changes: 19 additions & 3 deletions src/blosc2/ndarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -1021,11 +1021,15 @@ def extract_values(arr, indices: np.ndarray[np.int_], max_cache_size: int = 10)


class NDOuterIterator:
def __init__(self, ndarray: NDArray | NDField, cache_size=10):
def __init__(self, ndarray: NDArray | NDField, cache_size=1):
self.ndarray = ndarray
self.outer_dim_size = ndarray.shape[0]
self.inner_shape = ndarray.shape[1:]
self.current_index = 0
# Cache for 1D arrays; for higher dimensions, the implementation should be more involved
self.chunk_size = ndarray.chunks[0] if len(ndarray.shape) == 1 else None
self.cache = {} if len(ndarray.shape) == 1 else None
self.cache_size = cache_size

def __iter__(self):
return self
Expand All @@ -1037,8 +1041,20 @@ def __next__(self):
outer_index = self.current_index
self.current_index += 1

# Return the outer dimension
return self.ndarray[outer_index]
if self.cache is not None:
chunk_index = outer_index // self.chunk_size
local_index = outer_index % self.chunk_size

if chunk_index not in self.cache:
if len(self.cache) >= self.cache_size:
self.cache.pop(next(iter(self.cache)))
self.cache[chunk_index] = self.ndarray[
chunk_index * self.chunk_size : (chunk_index + 1) * self.chunk_size
]

return self.cache[chunk_index][local_index]
else:
return self.ndarray[outer_index]


class NDArray(blosc2_ext.NDArray, Operand):
Expand Down

0 comments on commit 522b1e4

Please sign in to comment.