Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Change nan default fill_value for kerchunk arrays #255

Open
wants to merge 9 commits into
base: main
Choose a base branch
from
2 changes: 1 addition & 1 deletion virtualizarr/tests/test_kerchunk.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def test_kerchunk_roundtrip_in_memory_no_concat():
chunks=(2, 2),
compressor=None,
filters=None,
fill_value=np.nan,
fill_value=None,
order="C",
),
chunkmanifest=manifest,
Expand Down
2 changes: 1 addition & 1 deletion virtualizarr/tests/test_manifests/test_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def test_create_manifestarray_from_kerchunk_refs(self):
assert marr.chunks == (2, 3)
assert marr.dtype == np.dtype("int64")
assert marr.zarray.compressor is None
assert marr.zarray.fill_value is np.nan
assert marr.zarray.fill_value == 0
assert marr.zarray.filters is None
assert marr.zarray.order == "C"

Expand Down
2 changes: 1 addition & 1 deletion virtualizarr/tests/test_readers/test_kerchunk.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def test_dataset_from_df_refs():

assert da.data.zarray.compressor is None
assert da.data.zarray.filters is None
assert da.data.zarray.fill_value is np.nan
assert da.data.zarray.fill_value == 0
assert da.data.zarray.order == "C"

assert da.data.manifest.dict() == {
Expand Down
32 changes: 32 additions & 0 deletions virtualizarr/tests/test_zarr.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,3 +27,35 @@ def test_replace_total():
result = arr.replace(**kwargs)
expected = ZArray(**kwargs)
assert result == expected


def test_nan_fill_value_from_kerchunk():
i_arr = ZArray.from_kerchunk_refs(
{
"chunks": [2, 3],
"compressor": None,
"dtype": "<i8",
"fill_value": None,
"filters": None,
"order": "C",
"shape": [2, 3],
"zarr_format": 2,
}
)

assert i_arr.fill_value == 0

f_arr = ZArray.from_kerchunk_refs(
{
"chunks": [2, 3],
"compressor": None,
"dtype": "<f8",
"fill_value": None,
"filters": None,
"order": "C",
"shape": [2, 3],
"zarr_format": 2,
}
)

assert f_arr.fill_value is np.nan
9 changes: 6 additions & 3 deletions virtualizarr/zarr.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,9 +73,12 @@ def codec(self) -> Codec:

@classmethod
def from_kerchunk_refs(cls, decoded_arr_refs_zarray) -> "ZArray":
# coerce type of fill_value as kerchunk can be inconsistent with this
# coerce type of fill_value for floats, as kerchunk can be inconsistent with this
dtype = np.dtype(decoded_arr_refs_zarray["dtype"])
fill_value = decoded_arr_refs_zarray["fill_value"]
if fill_value is None or fill_value == "NaN" or fill_value == "nan":
if np.issubdtype(dtype, np.floating) and (
fill_value is None or fill_value == "NaN" or fill_value == "nan"
):
fill_value = np.nan

compressor = decoded_arr_refs_zarray["compressor"]
Expand All @@ -86,7 +89,7 @@ def from_kerchunk_refs(cls, decoded_arr_refs_zarray) -> "ZArray":
return ZArray(
chunks=tuple(decoded_arr_refs_zarray["chunks"]),
compressor=compressor,
dtype=np.dtype(decoded_arr_refs_zarray["dtype"]),
dtype=dtype,
fill_value=fill_value,
filters=decoded_arr_refs_zarray["filters"],
order=decoded_arr_refs_zarray["order"],
Expand Down
Loading