diff --git a/.github/workflows/kedro-datasets.yml b/.github/workflows/kedro-datasets.yml index d96d14a27..e28cacc57 100644 --- a/.github/workflows/kedro-datasets.yml +++ b/.github/workflows/kedro-datasets.yml @@ -59,7 +59,7 @@ jobs: - name: Install dependencies run: | cd kedro-datasets - uv pip install --system "kedro-datasets[docs,test] @ ." + uv pip install --system "kedro-datasets[docs,test,experimental] @ ." - name: Documentation check for kedro-datasets run: | make check-datasets-docs diff --git a/kedro-airflow/README.md b/kedro-airflow/README.md index cf405aa7a..b5f5cd651 100644 --- a/kedro-airflow/README.md +++ b/kedro-airflow/README.md @@ -1,7 +1,7 @@ # Kedro-Airflow [![License](https://img.shields.io/badge/license-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0) -[![Python Version](https://img.shields.io/badge/python-3.8%20%7C%203.9%20%7C%203.10%20%7C%203.11-blue.svg)](https://pypi.org/project/kedro-airflow/) +[![Python Version](https://img.shields.io/badge/python-3.8%20%7C%203.9%20%7C%203.10%20%7C%203.11%20%7C%203.12-blue.svg)](https://pypi.org/project/kedro-airflow/) [![PyPI Version](https://badge.fury.io/py/kedro-airflow.svg)](https://pypi.org/project/kedro-airflow/) [![Code Style: Black](https://img.shields.io/badge/code%20style-black-black.svg)](https://github.com/ambv/black) diff --git a/kedro-datasets/RELEASE.md b/kedro-datasets/RELEASE.md index 2ad4e6d15..46e8f89bb 100755 --- a/kedro-datasets/RELEASE.md +++ b/kedro-datasets/RELEASE.md @@ -9,8 +9,12 @@ ## Bug fixes and other changes * Removed arbitrary upper bound for `s3fs`. +* `NetCDFDataset` support for NetCDF4 via `engine="netcdf4"` and `engine="h5netcdf"` ## Community contributions +Many thanks to the following Kedroids for contributing PRs to this release: +* [Charles Guan](https://github.com/charlesbmi) + # Release 3.0.0 ## Major features and improvements diff --git a/kedro-datasets/kedro_datasets/netcdf/netcdf_dataset.py b/kedro-datasets/kedro_datasets/netcdf/netcdf_dataset.py index 11f8680e2..1f24e681a 100644 --- a/kedro-datasets/kedro_datasets/netcdf/netcdf_dataset.py +++ b/kedro-datasets/kedro_datasets/netcdf/netcdf_dataset.py @@ -1,4 +1,5 @@ """NetCDFDataset loads and saves data to a local netcdf (.nc) file.""" + from __future__ import annotations import logging @@ -56,7 +57,7 @@ class NetCDFDataset(AbstractDataset): ... [0, 1, 2], dims=["x"], coords={"x": [0, 1, 2]}, name="data" ... ).to_dataset() >>> dataset = NetCDFDataset( - ... filepath="path/to/folder", + ... filepath=tmp_path / "data.nc", ... save_args={"mode": "w"}, ... ) >>> dataset.save(ds) @@ -168,11 +169,15 @@ def _save(self, data: xr.Dataset): + "Create an alternate NetCDFDataset with a single .nc output file." ) else: - save_path = self._filepath - bytes_buffer = data.to_netcdf(**self._save_args) - - with self._fs.open(save_path, mode="wb") as fs_file: - fs_file.write(bytes_buffer) + if self._protocol == "file": + data.to_netcdf(path=self._filepath, **self._save_args) + else: + if self._temppath is None: + raise DatasetError("_temppath should have been set in __init__") + temp_save_path = self._temppath / PurePosixPath(self._filepath).name + data.to_netcdf(path=str(temp_save_path), **self._save_args) + # Sync to remote storage + self._fs.put_file(str(temp_save_path), self._filepath) self._invalidate_cache() diff --git a/kedro-datasets/pyproject.toml b/kedro-datasets/pyproject.toml index 9c16f8d48..21aa829ea 100644 --- a/kedro-datasets/pyproject.toml +++ b/kedro-datasets/pyproject.toml @@ -196,6 +196,7 @@ test = [ "geopandas>=0.6.0, <1.0", "hdfs>=2.5.8, <3.0", "holoviews>=1.13.0", + "h5netcdf>=1.2.0", "ibis-framework[duckdb,examples]", "import-linter[toml]==1.2.6", "ipython>=7.31.1, <8.0", @@ -209,6 +210,7 @@ test = [ "memory_profiler>=0.50.0, <1.0", "moto==5.0.0", "mypy~=1.0", + "netcdf4>=1.6.4", "networkx~=2.4", "opencv-python~=4.5.5.64", "openpyxl>=3.0.3, <4.0", diff --git a/kedro-datasets/tests/netcdf/test_netcdf_dataset.py b/kedro-datasets/tests/netcdf/test_netcdf_dataset.py index 51eea1e15..c2120b126 100644 --- a/kedro-datasets/tests/netcdf/test_netcdf_dataset.py +++ b/kedro-datasets/tests/netcdf/test_netcdf_dataset.py @@ -224,9 +224,18 @@ def test_exists_multi_locally(self, tmp_path, dummy_xr_dataset): NetCDFDataset(filepath=str(tmp_path / "test2.nc")).save(dummy_xr_dataset) assert dataset.exists() - def test_save_load_locally(self, tmp_path, dummy_xr_dataset): + @pytest.mark.parametrize( + "save_args, load_args", + [ + ({"engine": "netcdf4"}, {"engine": "netcdf4"}), + ({"engine": "scipy"}, {"engine": "scipy"}), + ({"engine": "h5netcdf"}, {"engine": "h5netcdf"}), + ], + indirect=True, + ) + def test_save_load_locally(self, tmp_path, dummy_xr_dataset, save_args, load_args): """Test loading and saving the a NetCDF file locally.""" - file_path = str(tmp_path / "some" / "dir" / FILE_NAME) + file_path = str(tmp_path / FILE_NAME) dataset = NetCDFDataset(filepath=file_path) assert not dataset.exists() @@ -239,18 +248,14 @@ def test_load_locally_multi( self, tmp_path, dummy_xr_dataset, dummy_xr_dataset_multi ): """Test loading multiple NetCDF files locally.""" - file_path = str(tmp_path / "some" / "dir" / MULTIFILE_NAME) + file_path = str(tmp_path / MULTIFILE_NAME) dataset = NetCDFDataset( filepath=file_path, load_args={"concat_dim": "dummy", "combine": "nested"} ) assert not dataset.exists() - NetCDFDataset(filepath=str(tmp_path / "some" / "dir" / "test1.nc")).save( - dummy_xr_dataset - ) - NetCDFDataset(filepath=str(tmp_path / "some" / "dir" / "test2.nc")).save( - dummy_xr_dataset - ) + NetCDFDataset(filepath=str(tmp_path / "test1.nc")).save(dummy_xr_dataset) + NetCDFDataset(filepath=str(tmp_path / "test2.nc")).save(dummy_xr_dataset) assert dataset.exists() loaded_data = dataset.load() dummy_xr_dataset_multi.equals(loaded_data.compute()) diff --git a/kedro-telemetry/README.md b/kedro-telemetry/README.md index db4a1f30e..47584a3e4 100644 --- a/kedro-telemetry/README.md +++ b/kedro-telemetry/README.md @@ -1,6 +1,6 @@ # Kedro-Telemetry -[![Python Version](https://img.shields.io/badge/python-3.8%20%7C%203.9%20%7C%203.10%20%7C%203.11-blue.svg)](https://pypi.org/project/kedro-telemetry/) +[![Python Version](https://img.shields.io/badge/python-3.8%20%7C%203.9%20%7C%203.10%20%7C%203.11%20%7C%203.12-blue.svg)](https://pypi.org/project/kedro-telemetry/) [![PyPI version](https://badge.fury.io/py/kedro-telemetry.svg)](https://pypi.org/project/kedro-telemetry/) [![License](https://img.shields.io/badge/license-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0) [![Code Style: Black](https://img.shields.io/badge/code%20style-black-black.svg)](https://github.com/ambv/black)