Skip to content

Commit

Permalink
add support for references
Browse files Browse the repository at this point in the history
  • Loading branch information
vincentsarago committed Nov 4, 2024
1 parent 3fefc67 commit 07e4beb
Show file tree
Hide file tree
Showing 5 changed files with 47 additions and 7 deletions.
1 change: 1 addition & 0 deletions src/titiler/xarray/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ test = [
"pytest-cov",
"pytest-asyncio",
"httpx",
"kerchunk",
]

[project.urls]
Expand Down
24 changes: 24 additions & 0 deletions src/titiler/xarray/tests/fixtures/generate_fixtures.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,30 @@
" ds.to_zarr(store=f\"pyramid.zarr\", mode=\"w\", group=ix)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"import json\n",
"import fsspec\n",
"from kerchunk.hdf import SingleHdf5ToZarr\n",
"\n",
"with fsspec.open(\"dataset_3d.nc\", mode=\"rb\", anon=True) as infile:\n",
" h5chunks = SingleHdf5ToZarr(infile, \"dataset_3d.nc\", inline_threshold=100)\n",
"\n",
" with open(\"reference.json\", 'w') as f:\n",
" f.write(json.dumps(h5chunks.translate()));\n"
]
},
{
"cell_type": "code",
"execution_count": null,
Expand Down
1 change: 1 addition & 0 deletions src/titiler/xarray/tests/fixtures/reference.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"version": 1, "refs": {".zgroup": "{\"zarr_format\":2}", "dataset/.zarray": "{\"chunks\":[1,500,1000],\"compressor\":null,\"dtype\":\"<f8\",\"fill_value\":\"NaN\",\"filters\":[{\"elementsize\":8,\"id\":\"shuffle\"},{\"id\":\"zlib\",\"level\":9}],\"order\":\"C\",\"shape\":[2,1000,2000],\"zarr_format\":2}", "dataset/.zattrs": "{\"_ARRAY_DIMENSIONS\":[\"time\",\"y\",\"x\"],\"fill_value\":0,\"valid_max\":1000.0,\"valid_min\":1.0}", "dataset/0.0.0": ["tests/fixtures/dataset_3d.nc", 37134, 113251], "dataset/0.0.1": ["tests/fixtures/dataset_3d.nc", 150385, 112805], "dataset/0.1.0": ["tests/fixtures/dataset_3d.nc", 263190, 65106], "dataset/0.1.1": ["tests/fixtures/dataset_3d.nc", 328296, 65049], "dataset/1.0.0": ["tests/fixtures/dataset_3d.nc", 393345, 65468], "dataset/1.0.1": ["tests/fixtures/dataset_3d.nc", 458813, 65506], "dataset/1.1.0": ["tests/fixtures/dataset_3d.nc", 524319, 58101], "dataset/1.1.1": ["tests/fixtures/dataset_3d.nc", 582420, 58075], "time/.zarray": "{\"chunks\":[2],\"compressor\":null,\"dtype\":\"<i8\",\"fill_value\":null,\"filters\":null,\"order\":\"C\",\"shape\":[2],\"zarr_format\":2}", "time/.zattrs": "{\"_ARRAY_DIMENSIONS\":[\"time\"],\"calendar\":\"proleptic_gregorian\",\"units\":\"days since 2022-01-01 00:00:00\"}", "time/0": "\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000m\u0001\u0000\u0000\u0000\u0000\u0000\u0000", "x/.zarray": "{\"chunks\":[2000],\"compressor\":null,\"dtype\":\"<f8\",\"fill_value\":\"NaN\",\"filters\":null,\"order\":\"C\",\"shape\":[2000],\"zarr_format\":2}", "x/.zattrs": "{\"_ARRAY_DIMENSIONS\":[\"x\"]}", "x/0": ["tests/fixtures/dataset_3d.nc", 1415, 16000], "y/.zarray": "{\"chunks\":[1000],\"compressor\":null,\"dtype\":\"<f8\",\"fill_value\":\"NaN\",\"filters\":null,\"order\":\"C\",\"shape\":[1000],\"zarr_format\":2}", "y/.zattrs": "{\"_ARRAY_DIMENSIONS\":[\"y\"]}", "y/0": ["tests/fixtures/dataset_3d.nc", 17570, 8000]}}
2 changes: 1 addition & 1 deletion src/titiler/xarray/tests/test_io_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ def test_get_variable():

@pytest.mark.parametrize(
"filename",
["dataset_2d.nc", "dataset_3d.nc", "dataset_3d.zarr"],
["dataset_2d.nc", "dataset_3d.nc", "dataset_3d.zarr", "reference.json"],
)
def test_reader(filename):
"""test reader."""
Expand Down
26 changes: 20 additions & 6 deletions src/titiler/xarray/titiler/xarray/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,7 @@ def xarray_engine(src_path: str) -> str:
# ".hdf", ".hdf5", ".h5" will be supported once we have tests + expand the type permitted for the group parameter
if any(src_path.lower().endswith(ext) for ext in [".nc", ".nc4"]):
return "h5netcdf"
else:
return "zarr"
return "zarr"


def get_filesystem(
Expand All @@ -68,8 +67,14 @@ def get_filesystem(
if xr_engine == "h5netcdf"
else s3fs.S3Map(root=src_path, s3=s3_filesystem)
)

elif protocol in ["https", "http", "file"]:
elif protocol == "reference" or src_path.lower().endswith(".json"):
reference_args = {
"fo": src_path.replace("reference://", ""),
"remote_options": {"anon": anon},
}
return fsspec.filesystem("reference", **reference_args).get_mapper("")

elif protocol in ["https", "http", "file", "reference"]:
if protocol.startswith("http"):
assert (
aiohttp is not None
Expand Down Expand Up @@ -121,6 +126,17 @@ def xarray_open_dataset(
xr_open_args["lock"] = False
ds = xarray.open_dataset(file_handler, **xr_open_args)

elif src_path.lower().endswith(".json"):
xr_open_args.update(
{
"engine": "zarr",
"consolidated": False,
"backend_kwargs": {"consolidated": False},
}
)

ds = xarray.open_dataset(file_handler, **xr_open_args)

# Fallback to Zarr
else:
ds = xarray.open_zarr(file_handler, **xr_open_args)
Expand Down Expand Up @@ -285,8 +301,6 @@ def list_variables(
cls,
src_path: str,
group: Optional[Any] = None,
reference: Optional[bool] = False,
consolidated: Optional[bool] = True,
) -> List[str]:
"""List available variable in a dataset."""
with xarray_open_dataset(
Expand Down

0 comments on commit 07e4beb

Please sign in to comment.