Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Skip tests that require kerchunk #259

Open
wants to merge 6 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 60 additions & 0 deletions .github/workflows/min-deps.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
name: min-deps
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There was probably a better way to do this than just making an entire extra workflow file but my github-actions-fu is weak


on:
push:
branches: [ "main" ]
paths-ignore:
- 'docs/**'
pull_request:
branches: [ "main" ]
paths-ignore:
- 'docs/**'
schedule:
- cron: "0 0 * * *"

concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true

jobs:

test:
name: ${{ matrix.python-version }}-build
runs-on: ubuntu-latest
defaults:
run:
shell: bash -l {0}
strategy:
matrix:
python-version: ["3.12"]
steps:
- uses: actions/checkout@v4

- name: Setup micromamba
uses: mamba-org/setup-micromamba@v1
with:
environment-file: ci/min-deps.yml
cache-environment: true
create-args: >-
python=${{matrix.python-version}}
- name: Install virtualizarr
run: |
python -m pip install -e . --no-deps
- name: Conda list information
run: |
conda env list
conda list
- name: Running Tests
run: |
python -m pytest ./virtualizarr --cov=./ --cov-report=xml --verbose
- name: Upload code coverage to Codecov
uses: codecov/[email protected]
with:
file: ./coverage.xml
flags: unittests
env_vars: OS,PYTHON
name: codecov-umbrella
fail_ci_if_error: false
25 changes: 25 additions & 0 deletions ci/min-deps.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
name: virtualizarr-min-deps
channels:
- conda-forge
- nodefaults
dependencies:
- h5netcdf
- h5py
- hdf5
- netcdf4
- xarray>=2024.6.0
- numpy>=2.0.0
- numcodecs
- packaging
- universal_pathlib
# Testing
- codecov
- pre-commit
- mypy
- ruff
- pandas-stubs
- pytest-mypy
- pytest-cov
- pytest
- pooch
- fsspec
7 changes: 4 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,20 +22,20 @@ requires-python = ">=3.10"
dynamic = ["version"]
dependencies = [
"xarray>=2024.06.0",
"kerchunk>=0.2.5",
"h5netcdf",
"numpy>=2.0.0",
"ujson",
"packaging",
"universal-pathlib",
"numcodecs",
Comment on lines 24 to +28
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The minimum set of dependencies should look like this, and you could probably even make universal-pathlib optional for reading from non-remote files...

]

[project.optional-dependencies]
test = [
"codecov",
"fastparquet",
"fsspec",
"h5netcdf",
"h5py",
"kerchunk>=0.2.5",
"mypy",
"netcdf4",
"pandas-stubs",
Expand All @@ -47,6 +47,7 @@ test = [
"ruff",
"s3fs",
"scipy",
"ujson",
]


Expand Down
3 changes: 2 additions & 1 deletion virtualizarr/accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
overload,
)

import ujson # type: ignore
from xarray import Dataset, register_dataset_accessor

from virtualizarr.manifests import ManifestArray
Expand Down Expand Up @@ -91,6 +90,8 @@ def to_kerchunk(
if format == "dict":
return refs
elif format == "json":
import ujson

if filepath is None:
raise ValueError("Filepath must be provided when format is 'json'")

Expand Down
3 changes: 2 additions & 1 deletion virtualizarr/manifests/manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
from typing import Any, Callable, Dict, NewType, Tuple, TypedDict, cast

import numpy as np
from upath import UPath

from virtualizarr.types import ChunkKey

Expand Down Expand Up @@ -41,6 +40,8 @@ class ChunkEntry:
def from_kerchunk(
cls, path_and_byte_range_info: tuple[str] | tuple[str, int, int]
) -> "ChunkEntry":
from upath import UPath

if len(path_and_byte_range_info) == 1:
path = path_and_byte_range_info[0]
offset = 0
Expand Down
3 changes: 2 additions & 1 deletion virtualizarr/readers/kerchunk.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
from pathlib import Path
from typing import Any, MutableMapping, Optional, cast

import ujson # type: ignore
from xarray import Dataset
from xarray.core.indexes import Index
from xarray.core.variable import Variable
Expand Down Expand Up @@ -300,6 +299,8 @@ def fully_decode_arr_refs(d: dict) -> KerchunkArrRefs:
"""
Only have to do this because kerchunk.SingleHdf5ToZarr apparently doesn't bother converting .zarray and .zattrs contents to dicts, see https://github.com/fsspec/kerchunk/issues/415 .
"""
import ujson

sanitized = d.copy()
for k, v in d.items():
if k.startswith("."):
Expand Down
2 changes: 2 additions & 0 deletions virtualizarr/tests/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,9 @@ def _importorskip(


has_astropy, requires_astropy = _importorskip("astropy")
has_kerchunk, requires_kerchunk = _importorskip("kerchunk")
has_s3fs, requires_s3fs = _importorskip("s3fs")
has_scipy, requires_scipy = _importorskip("scipy")
has_tifffile, requires_tifffile = _importorskip("tifffile")


Expand Down
19 changes: 17 additions & 2 deletions virtualizarr/tests/test_backend.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from collections.abc import Mapping
from unittest.mock import patch

import fsspec
import numpy as np
import pytest
import xarray as xr
Expand All @@ -13,9 +12,17 @@
from virtualizarr.backend import FileType
from virtualizarr.manifests import ManifestArray
from virtualizarr.readers.kerchunk import _automatically_determine_filetype
from virtualizarr.tests import has_astropy, has_tifffile, network, requires_s3fs
from virtualizarr.tests import (
has_astropy,
has_tifffile,
network,
requires_kerchunk,
requires_s3fs,
requires_scipy,
)


@requires_scipy
def test_automatically_determine_filetype_netcdf3_netcdf4():
# test the NetCDF3 vs NetCDF4 automatic file type selection

Expand Down Expand Up @@ -75,6 +82,7 @@ def test_FileType():
FileType(None)


@requires_kerchunk
class TestOpenVirtualDatasetIndexes:
def test_no_indexes(self, netcdf4_file):
vds = open_virtual_dataset(netcdf4_file, indexes={})
Expand Down Expand Up @@ -105,6 +113,7 @@ def index_mappings_equal(indexes1: Mapping[str, Index], indexes2: Mapping[str, I
return True


@requires_kerchunk
def test_cftime_index(tmpdir):
"""Ensure a virtual dataset contains the same indexes as an Xarray dataset"""
# Note: Test was created to debug: https://github.com/zarr-developers/VirtualiZarr/issues/168
Expand All @@ -130,6 +139,7 @@ def test_cftime_index(tmpdir):
assert vds.attrs == ds.attrs


@requires_kerchunk
class TestOpenVirtualDatasetAttrs:
def test_drop_array_dimensions(self, netcdf4_file):
# regression test for GH issue #150
Expand Down Expand Up @@ -237,6 +247,8 @@ def test_read_from_url(self, filetype, url):
assert isinstance(vds, xr.Dataset)

def test_virtualizarr_vs_local_nisar(self):
import fsspec

# Open group directly from locally cached file with xarray
url = "https://nisar.asf.earthdatacloud.nasa.gov/NISAR-SAMPLE-DATA/GCOV/ALOS1_Rosamond_20081012/NISAR_L2_PR_GCOV_001_005_A_219_4020_SHNA_A_20081012T060910_20081012T060926_P01101_F_N_J_001.h5"
tmpfile = fsspec.open_local(
Expand Down Expand Up @@ -266,6 +278,7 @@ def test_virtualizarr_vs_local_nisar(self):
xrt.assert_equal(dsXR, dsV)


@requires_kerchunk
class TestLoadVirtualDataset:
def test_loadable_variables(self, netcdf4_file):
vars_to_load = ["air", "time"]
Expand Down Expand Up @@ -338,6 +351,7 @@ def test_open_dataset_with_scalar(self, hdf5_scalar, tmpdir):
assert vds.scalar.attrs == {"scalar": "true"}


@requires_kerchunk
@pytest.mark.parametrize(
"reference_format",
["json", "parquet", "invalid"],
Expand Down Expand Up @@ -395,6 +409,7 @@ def test_open_virtual_dataset_existing_kerchunk_refs(
assert set(vds.variables) == set(netcdf4_virtual_dataset.variables)


@requires_kerchunk
def test_notimplemented_read_inline_refs(tmp_path, netcdf4_inlined_ref):
# For now, we raise a NotImplementedError if we read existing references that have inlined data
# https://github.com/zarr-developers/VirtualiZarr/pull/251#pullrequestreview-2361916932
Expand Down
4 changes: 4 additions & 0 deletions virtualizarr/tests/test_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,11 @@
from virtualizarr import open_virtual_dataset
from virtualizarr.manifests.array import ManifestArray
from virtualizarr.manifests.manifest import ChunkManifest
from virtualizarr.tests import requires_kerchunk
from virtualizarr.zarr import ZArray


@requires_kerchunk
@pytest.mark.parametrize(
"inline_threshold, vars_to_inline",
[
Expand Down Expand Up @@ -45,6 +47,7 @@ def test_numpy_arrays_to_inlined_kerchunk_refs(
assert refs["refs"]["time/0"] == expected["refs"]["time/0"]


@requires_kerchunk
@pytest.mark.parametrize("format", ["dict", "json", "parquet"])
class TestKerchunkRoundtrip:
def test_kerchunk_roundtrip_no_concat(self, tmpdir, format):
Expand Down Expand Up @@ -212,6 +215,7 @@ def test_datetime64_dtype_fill_value(self, tmpdir, format):
assert roundtrip.a.attrs == ds.a.attrs


@requires_kerchunk
def test_open_scalar_variable(tmpdir):
# regression test for GH issue #100

Expand Down
3 changes: 3 additions & 0 deletions virtualizarr/tests/test_kerchunk.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,10 @@
dataset_from_kerchunk_refs,
find_var_names,
)
from virtualizarr.tests import requires_kerchunk


@requires_kerchunk
def test_kerchunk_roundtrip_in_memory_no_concat():
# Set up example xarray dataset
chunks_dict = {
Expand Down Expand Up @@ -40,6 +42,7 @@ def test_kerchunk_roundtrip_in_memory_no_concat():
xrt.assert_equal(roundtrip, ds)


@requires_kerchunk
def test_no_duplicates_find_var_names():
"""Verify that we get a deduplicated list of var names"""
ref_dict = {"refs": {"x/something": {}, "x/otherthing": {}}}
Expand Down
4 changes: 3 additions & 1 deletion virtualizarr/tests/test_manifests/test_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import pytest

from virtualizarr.manifests import ChunkManifest, ManifestArray
from virtualizarr.tests import create_manifestarray
from virtualizarr.tests import create_manifestarray, requires_kerchunk
from virtualizarr.zarr import ZArray


Expand Down Expand Up @@ -35,6 +35,7 @@ def test_create_manifestarray(self):
assert marr.size == 5 * 2 * 20
assert marr.ndim == 3

@requires_kerchunk
def test_create_manifestarray_from_kerchunk_refs(self):
arr_refs = {
".zarray": '{"chunks":[2,3],"compressor":null,"dtype":"<i8","fill_value":null,"filters":null,"order":"C","shape":[2,3],"zarr_format":2}',
Expand All @@ -50,6 +51,7 @@ def test_create_manifestarray_from_kerchunk_refs(self):
assert marr.zarray.filters is None
assert marr.zarray.order == "C"

@requires_kerchunk
def test_create_scalar_manifestarray_from_kerchunk_refs(self):
arr_refs = {
".zarray": '{"chunks":[],"compressor":null,"dtype":"<i8","fill_value":null,"filters":null,"order":"C","shape":[],"zarr_format":2}',
Expand Down
6 changes: 5 additions & 1 deletion virtualizarr/tests/test_readers/test_kerchunk.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
import numpy as np
import ujson # type: ignore

from virtualizarr.manifests import ManifestArray
from virtualizarr.readers.kerchunk import (
dataset_from_kerchunk_refs,
)
from virtualizarr.tests import requires_kerchunk


def gen_ds_refs(
Expand All @@ -24,6 +24,7 @@ def gen_ds_refs(
}


@requires_kerchunk
def test_dataset_from_df_refs():
ds_refs = gen_ds_refs()
ds = dataset_from_kerchunk_refs(ds_refs)
Expand All @@ -45,7 +46,10 @@ def test_dataset_from_df_refs():
}


@requires_kerchunk
def test_dataset_from_df_refs_with_filters():
import ujson

filters = [{"elementsize": 4, "id": "shuffle"}, {"id": "zlib", "level": 4}]
zarray = {
"chunks": [2, 3],
Expand Down
2 changes: 2 additions & 0 deletions virtualizarr/tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import pytest
import xarray as xr

from virtualizarr.tests import requires_scipy
from virtualizarr.utils import _FsspecFSFromFilepath


Expand All @@ -25,6 +26,7 @@ def test_fsspec_openfile_from_path(tmp_path: pathlib.Path, dataset: xr.Dataset)
assert isinstance(result, fsspec.implementations.local.LocalFileOpener)


@requires_scipy
def test_fsspec_openfile_memory(dataset: xr.Dataset):
fs = fsspec.filesystem("memory")
with contextlib.redirect_stderr(None):
Expand Down
Loading
Loading