chore(datasets): replace "data set" with "dataset" (#867)

* chore(datasets): replace "data set" with "dataset" Signed-off-by: Deepyaman Datta <[email protected]> * style(datasets): reformat everything using `black` Signed-off-by: Deepyaman Datta <[email protected]> --------- Signed-off-by: Deepyaman Datta <[email protected]>
kedro-org · Oct 10, 2024 · 987dab9 · 987dab9
1 parent 62a5808
commit 987dab9
Show file tree

Hide file tree

Showing 56 changed files with 206 additions and 208 deletions.
diff --git a/kedro-datasets/kedro_datasets/dask/csv_dataset.py b/kedro-datasets/kedro_datasets/dask/csv_dataset.py
@@ -1,4 +1,4 @@
-"""``CSVDataset`` is a data set used to load and save data to CSV files using Dask
+"""``CSVDataset`` is a dataset used to load and save data to CSV files using Dask
 dataframe"""
 from __future__ import annotations
 

diff --git a/kedro-datasets/kedro_datasets/dask/parquet_dataset.py b/kedro-datasets/kedro_datasets/dask/parquet_dataset.py
@@ -1,4 +1,4 @@
-"""``ParquetDataset`` is a data set used to load and save data to parquet files using Dask
+"""``ParquetDataset`` is a dataset used to load and save data to parquet files using Dask
 dataframe"""
 from __future__ import annotations
 

diff --git a/kedro-datasets/kedro_datasets/databricks/managed_table_dataset.py b/kedro-datasets/kedro_datasets/databricks/managed_table_dataset.py
@@ -242,7 +242,7 @@ def __init__(  # noqa: PLR0913
             database: the name of the database.
                 (also referred to as schema). Defaults to "default".
             write_mode: the mode to write the data into the table. If not
-                present, the data set is read-only.
+                present, the dataset is read-only.
                 Options are:["overwrite", "append", "upsert"].
                 "upsert" mode requires primary_key field to be populated.
                 Defaults to None.

diff --git a/kedro-datasets/kedro_datasets/pandas/feather_dataset.py b/kedro-datasets/kedro_datasets/pandas/feather_dataset.py
@@ -1,4 +1,4 @@
-"""``FeatherDataset`` is a data set used to load and save data to feather files
+"""``FeatherDataset`` is a dataset used to load and save data to feather files
 using an underlying filesystem (e.g.: local, S3, GCS). The underlying functionality
 is supported by pandas, so it supports all operations the pandas supports.
 """

diff --git a/kedro-datasets/kedro_datasets/pandas/sql_dataset.py b/kedro-datasets/kedro_datasets/pandas/sql_dataset.py
@@ -309,7 +309,7 @@ class SQLQueryDataset(AbstractDataset[None, pd.DataFrame]):
     by SQLAlchemy can be found here:
     https://docs.sqlalchemy.org/core/engines.html#database-urls
 
-    It does not support save method so it is a read only data set.
+    It does not support save method so it is a read only dataset.
     To save data to a SQL server use ``SQLTableDataset``.
 
     Example usage for the

diff --git a/kedro-datasets/kedro_datasets/spark/spark_hive_dataset.py b/kedro-datasets/kedro_datasets/spark/spark_hive_dataset.py
@@ -16,7 +16,7 @@
 
 class SparkHiveDataset(AbstractDataset[DataFrame, DataFrame]):
     """``SparkHiveDataset`` loads and saves Spark dataframes stored on Hive.
-    This data set also handles some incompatible file types such as using partitioned parquet on
+    This dataset also handles some incompatible file types such as using partitioned parquet on
     hive which will not normally allow upserts to existing data without a complete replacement
     of the existing file/partition.
 

diff --git a/kedro-datasets/kedro_datasets_experimental/langchain/_anthropic.py b/kedro-datasets/kedro_datasets_experimental/langchain/_anthropic.py
@@ -68,7 +68,7 @@ def _describe(self) -> dict[str, Any]:
         return {**self.kwargs}
 
     def save(self, data: None) -> NoReturn:
-        raise DatasetError(f"{self.__class__.__name__} is a read only data set type")
+        raise DatasetError(f"{self.__class__.__name__} is a read only dataset type")
 
     def load(self) -> ChatAnthropic:
         return ChatAnthropic(

diff --git a/kedro-datasets/kedro_datasets_experimental/langchain/_cohere.py b/kedro-datasets/kedro_datasets_experimental/langchain/_cohere.py
@@ -70,7 +70,7 @@ def _describe(self) -> dict[str, Any]:
         return {**self.kwargs}
 
     def save(self, data: None) -> NoReturn:
-        raise DatasetError(f"{self.__class__.__name__} is a read only data set type")
+        raise DatasetError(f"{self.__class__.__name__} is a read only dataset type")
 
     def load(self) -> ChatCohere:
         return ChatCohere(cohere_api_key=self.cohere_api_key, base_url=self.cohere_api_url, **self.kwargs)
diff --git a/kedro-datasets/kedro_datasets_experimental/langchain/_openai.py b/kedro-datasets/kedro_datasets_experimental/langchain/_openai.py
@@ -32,7 +32,7 @@ def _describe(self) -> dict[str, Any]:
         return {**self.kwargs}
 
     def save(self, data: None) -> NoReturn:
-        raise DatasetError(f"{self.__class__.__name__} is a read only data set type")
+        raise DatasetError(f"{self.__class__.__name__} is a read only dataset type")
 
     def load(self) -> OPENAI_TYPE:
         return self.constructor(

diff --git a/kedro-datasets/kedro_datasets_experimental/tests/netcdf/test_netcdf_dataset.py b/kedro-datasets/kedro_datasets_experimental/tests/netcdf/test_netcdf_dataset.py
@@ -153,7 +153,7 @@ def test_empty_credentials_load(self, bad_credentials, tmp_path):
         netcdf_dataset = NetCDFDataset(
             filepath=S3_PATH, temppath=tmp_path, credentials=bad_credentials
         )
-        pattern = r"Failed while loading data from data set NetCDFDataset\(.+\)"
+        pattern = r"Failed while loading data from dataset NetCDFDataset\(.+\)"
         with pytest.raises(DatasetError, match=pattern):
             netcdf_dataset.load()
 
@@ -165,7 +165,7 @@ def test_pass_credentials(self, mocker, tmp_path):
         s3_dataset = NetCDFDataset(
             filepath=S3_PATH, temppath=tmp_path, credentials=AWS_CREDENTIALS
         )
-        pattern = r"Failed while loading data from data set NetCDFDataset\(.+\)"
+        pattern = r"Failed while loading data from dataset NetCDFDataset\(.+\)"
         with pytest.raises(DatasetError, match=pattern):
             s3_dataset.load()
 

diff --git a/kedro-datasets/kedro_datasets_experimental/tests/prophet/test_prophet_dataset.py b/kedro-datasets/kedro_datasets_experimental/tests/prophet/test_prophet_dataset.py
@@ -75,7 +75,7 @@ def test_open_extra_args(self, prophet_model_dataset, fs_args):
 
     def test_load_missing_file(self, prophet_model_dataset):
         """Check the error when trying to load missing file."""
-        pattern = r"Failed while loading data from data set ProphetModelDataset\(.*\)"
+        pattern = r"Failed while loading data from dataset ProphetModelDataset\(.*\)"
         with pytest.raises(DatasetError, match=pattern):
             prophet_model_dataset.load()
 

diff --git a/kedro-datasets/kedro_datasets_experimental/tests/rioxarray/test_geotiff_dataset.py b/kedro-datasets/kedro_datasets_experimental/tests/rioxarray/test_geotiff_dataset.py
@@ -65,7 +65,7 @@ def cog_geotiff_dataset(cog_file_path, save_args) -> GeoTIFFDataset:
 
 
 def test_load_cog_geotiff(cog_geotiff_dataset):
-    """Test loading cloud optimised geotiff reloading the data set."""
+    """Test loading cloud optimised geotiff reloading the dataset."""
     loaded_xr = cog_geotiff_dataset.load()
     assert isinstance(loaded_xr.rio.crs, CRS)
     assert isinstance(loaded_xr, xr.DataArray)
@@ -144,7 +144,7 @@ def test_load_not_tif():
 
 def test_exists(tmp_path, synthetic_xarray):
     """Test `exists` method invocation for both existing and
-    nonexistent data set."""
+    nonexistent dataset."""
     dataset = GeoTIFFDataset(filepath=str(tmp_path / "tmp.tif"))
     assert not dataset.exists()
     dataset.save(synthetic_xarray)
@@ -155,7 +155,7 @@ def test_exists(tmp_path, synthetic_xarray):
     "synthetic_xarray",
 ])
 def test_save_and_load_geotiff(tmp_path, request, xarray_fixture):
-    """Test saving and reloading the data set."""
+    """Test saving and reloading the dataset."""
     xarray_data = request.getfixturevalue(xarray_fixture)
     dataset = GeoTIFFDataset(filepath=str(tmp_path / "tmp.tif"))
     dataset.save(xarray_data)
@@ -176,6 +176,6 @@ def test_load_missing_file(tmp_path):
     """Check the error when trying to load missing file."""
     dataset = GeoTIFFDataset(filepath=str(tmp_path / "tmp.tif"))
     assert not dataset._exists(), "File unexpectedly exists"
-    pattern = r"Failed while loading data from data set GeoTIFFDataset\(.*\)"
+    pattern = r"Failed while loading data from dataset GeoTIFFDataset\(.*\)"
     with pytest.raises(DatasetError, match=pattern):
         dataset.load()
diff --git a/kedro-datasets/tests/biosequence/test_biosequence_dataset.py b/kedro-datasets/tests/biosequence/test_biosequence_dataset.py
@@ -38,7 +38,7 @@ def dummy_data():
 
 class TestBioSequenceDataset:
     def test_save_and_load(self, biosequence_dataset, dummy_data):
-        """Test saving and reloading the data set."""
+        """Test saving and reloading the dataset."""
         biosequence_dataset.save(dummy_data)
         reloaded = biosequence_dataset.load()
         assert dummy_data[0].id, reloaded[0].id
@@ -49,7 +49,7 @@ def test_save_and_load(self, biosequence_dataset, dummy_data):
 
     def test_exists(self, biosequence_dataset, dummy_data):
         """Test `exists` method invocation for both existing and
-        nonexistent data set."""
+        nonexistent dataset."""
         assert not biosequence_dataset.exists()
         biosequence_dataset.save(dummy_data)
         assert biosequence_dataset.exists()
@@ -75,7 +75,7 @@ def test_open_extra_args(self, biosequence_dataset, fs_args):
 
     def test_load_missing_file(self, biosequence_dataset):
         """Check the error when trying to load missing file."""
-        pattern = r"Failed while loading data from data set BioSequenceDataset\(.*\)"
+        pattern = r"Failed while loading data from dataset BioSequenceDataset\(.*\)"
         with pytest.raises(DatasetError, match=pattern):
             biosequence_dataset.load()
 

diff --git a/kedro-datasets/tests/dask/test_csv_dataset.py b/kedro-datasets/tests/dask/test_csv_dataset.py
@@ -84,7 +84,7 @@ def test_incorrect_credentials_load(self):
     @pytest.mark.parametrize("bad_credentials", [{"key": None, "secret": None}])
     def test_empty_credentials_load(self, bad_credentials):
         csv_dataset = CSVDataset(filepath=S3_PATH, credentials=bad_credentials)
-        pattern = r"Failed while loading data from data set CSVDataset\(.+\)"
+        pattern = r"Failed while loading data from dataset CSVDataset\(.+\)"
         with pytest.raises(DatasetError, match=pattern):
             csv_dataset.load().compute()
 
@@ -94,7 +94,7 @@ def test_pass_credentials(self, mocker):
         client instantiation on creating S3 connection."""
         client_mock = mocker.patch("botocore.session.Session.create_client")
         s3_dataset = CSVDataset(filepath=S3_PATH, credentials=AWS_CREDENTIALS)
-        pattern = r"Failed while loading data from data set CSVDataset\(.+\)"
+        pattern = r"Failed while loading data from dataset CSVDataset\(.+\)"
         with pytest.raises(DatasetError, match=pattern):
             s3_dataset.load().compute()
 
@@ -121,7 +121,7 @@ def test_load_data(self, s3_dataset, dummy_dd_dataframe, mocked_s3_object):
 
     def test_exists(self, s3_dataset, dummy_dd_dataframe, mocked_s3_bucket):
         """Test `exists` method invocation for both existing and
-        nonexistent data set."""
+        nonexistent dataset."""
         assert not s3_dataset.exists()
         s3_dataset.save(dummy_dd_dataframe)
         assert s3_dataset.exists()

diff --git a/kedro-datasets/tests/dask/test_parquet_dataset.py b/kedro-datasets/tests/dask/test_parquet_dataset.py
@@ -87,7 +87,7 @@ def test_incorrect_credentials_load(self):
     @pytest.mark.parametrize("bad_credentials", [{"key": None, "secret": None}])
     def test_empty_credentials_load(self, bad_credentials):
         parquet_dataset = ParquetDataset(filepath=S3_PATH, credentials=bad_credentials)
-        pattern = r"Failed while loading data from data set ParquetDataset\(.+\)"
+        pattern = r"Failed while loading data from dataset ParquetDataset\(.+\)"
         with pytest.raises(DatasetError, match=pattern):
             parquet_dataset.load().compute()
 
@@ -97,7 +97,7 @@ def test_pass_credentials(self, mocker):
         client instantiation on creating S3 connection."""
         client_mock = mocker.patch("botocore.session.Session.create_client")
         s3_dataset = ParquetDataset(filepath=S3_PATH, credentials=AWS_CREDENTIALS)
-        pattern = r"Failed while loading data from data set ParquetDataset\(.+\)"
+        pattern = r"Failed while loading data from dataset ParquetDataset\(.+\)"
         with pytest.raises(DatasetError, match=pattern):
             s3_dataset.load().compute()
 
@@ -124,7 +124,7 @@ def test_load_data(self, s3_dataset, dummy_dd_dataframe, mocked_s3_object):
 
     def test_exists(self, s3_dataset, dummy_dd_dataframe, mocked_s3_bucket):
         """Test `exists` method invocation for both existing and
-        nonexistent data set."""
+        nonexistent dataset."""
         assert not s3_dataset.exists()
         s3_dataset.save(dummy_dd_dataframe)
         assert s3_dataset.exists()

diff --git a/kedro-datasets/tests/email/test_message_dataset.py b/kedro-datasets/tests/email/test_message_dataset.py
@@ -50,7 +50,7 @@ def dummy_msg():
 
 class TestEmailMessageDataset:
     def test_save_and_load(self, message_dataset, dummy_msg):
-        """Test saving and reloading the data set."""
+        """Test saving and reloading the dataset."""
         message_dataset.save(dummy_msg)
         reloaded = message_dataset.load()
         assert dummy_msg.__dict__ == reloaded.__dict__
@@ -59,7 +59,7 @@ def test_save_and_load(self, message_dataset, dummy_msg):
 
     def test_exists(self, message_dataset, dummy_msg):
         """Test `exists` method invocation for both existing and
-        nonexistent data set."""
+        nonexistent dataset."""
         assert not message_dataset.exists()
         message_dataset.save(dummy_msg)
         assert message_dataset.exists()
@@ -91,7 +91,7 @@ def test_open_extra_args(self, message_dataset, fs_args):
 
     def test_load_missing_file(self, message_dataset):
         """Check the error when trying to load missing file."""
-        pattern = r"Failed while loading data from data set EmailMessageDataset\(.*\)"
+        pattern = r"Failed while loading data from dataset EmailMessageDataset\(.*\)"
         with pytest.raises(DatasetError, match=pattern):
             message_dataset.load()
 
@@ -149,7 +149,7 @@ def test_version_str_repr(self, load_version, save_version):
 
     def test_save_and_load(self, versioned_message_dataset, dummy_msg):
         """Test that saved and reloaded data matches the original one for
-        the versioned data set."""
+        the versioned dataset."""
         versioned_message_dataset.save(dummy_msg)
         reloaded = versioned_message_dataset.load()
         assert dummy_msg.__dict__ == reloaded.__dict__
@@ -161,13 +161,13 @@ def test_no_versions(self, versioned_message_dataset):
             versioned_message_dataset.load()
 
     def test_exists(self, versioned_message_dataset, dummy_msg):
-        """Test `exists` method invocation for versioned data set."""
+        """Test `exists` method invocation for versioned dataset."""
         assert not versioned_message_dataset.exists()
         versioned_message_dataset.save(dummy_msg)
         assert versioned_message_dataset.exists()
 
     def test_prevent_overwrite(self, versioned_message_dataset, dummy_msg):
-        """Check the error when attempting to override the data set if the
+        """Check the error when attempting to override the dataset if the
         corresponding text file for a given save version already exists."""
         versioned_message_dataset.save(dummy_msg)
         pattern = (

diff --git a/kedro-datasets/tests/geopandas/test_geojson_dataset.py b/kedro-datasets/tests/geopandas/test_geojson_dataset.py
@@ -72,7 +72,7 @@ def test_save_and_load(self, geojson_dataset, dummy_dataframe):
     @pytest.mark.parametrize("geojson_dataset", [{"index": False}], indirect=True)
     def test_load_missing_file(self, geojson_dataset):
         """Check the error while trying to load from missing source."""
-        pattern = r"Failed while loading data from data set GeoJSONDataset"
+        pattern = r"Failed while loading data from dataset GeoJSONDataset"
         with pytest.raises(DatasetError, match=pattern):
             geojson_dataset.load()
 
@@ -156,7 +156,7 @@ def test_version_str_repr(self, load_version, save_version):
 
     def test_save_and_load(self, versioned_geojson_dataset, dummy_dataframe):
         """Test that saved and reloaded data matches the original one for
-        the versioned data set."""
+        the versioned dataset."""
         versioned_geojson_dataset.save(dummy_dataframe)
         reloaded_df = versioned_geojson_dataset.load()
         assert_frame_equal(reloaded_df, dummy_dataframe)
@@ -168,13 +168,13 @@ def test_no_versions(self, versioned_geojson_dataset):
             versioned_geojson_dataset.load()
 
     def test_exists(self, versioned_geojson_dataset, dummy_dataframe):
-        """Test `exists` method invocation for versioned data set."""
+        """Test `exists` method invocation for versioned dataset."""
         assert not versioned_geojson_dataset.exists()
         versioned_geojson_dataset.save(dummy_dataframe)
         assert versioned_geojson_dataset.exists()
 
     def test_prevent_override(self, versioned_geojson_dataset, dummy_dataframe):
-        """Check the error when attempt to override the same data set
+        """Check the error when attempt to override the same dataset
         version."""
         versioned_geojson_dataset.save(dummy_dataframe)
         pattern = (

diff --git a/kedro-datasets/tests/holoviews/test_holoviews_writer.py b/kedro-datasets/tests/holoviews/test_holoviews_writer.py
@@ -140,7 +140,7 @@ def test_version_str_repr(self, hv_writer, versioned_hv_writer):
         assert "save_args" in str(versioned_hv_writer)
 
     def test_prevent_overwrite(self, dummy_hv_object, versioned_hv_writer):
-        """Check the error when attempting to override the data set if the
+        """Check the error when attempting to override the dataset if the
         corresponding file for a given save version already exists."""
         versioned_hv_writer.save(dummy_hv_object)
         pattern = (
@@ -185,7 +185,7 @@ def test_load_not_supported(self, versioned_hv_writer):
             versioned_hv_writer.load()
 
     def test_exists(self, versioned_hv_writer, dummy_hv_object):
-        """Test `exists` method invocation for versioned data set."""
+        """Test `exists` method invocation for versioned dataset."""
         assert not versioned_hv_writer.exists()
         versioned_hv_writer.save(dummy_hv_object)
         assert versioned_hv_writer.exists()

diff --git a/kedro-datasets/tests/ibis/test_table_dataset.py b/kedro-datasets/tests/ibis/test_table_dataset.py
@@ -52,7 +52,7 @@ def dummy_table(table_dataset_from_csv):
 
 class TestTableDataset:
     def test_save_and_load(self, table_dataset, dummy_table, database):
-        """Test saving and reloading the data set."""
+        """Test saving and reloading the dataset."""
         table_dataset.save(dummy_table)
         reloaded = table_dataset.load()
         assert_frame_equal(dummy_table.execute(), reloaded.execute())
@@ -64,7 +64,7 @@ def test_save_and_load(self, table_dataset, dummy_table, database):
 
     def test_exists(self, table_dataset, dummy_table):
         """Test `exists` method invocation for both existing and
-        nonexistent data set."""
+        nonexistent dataset."""
         assert not table_dataset.exists()
         table_dataset.save(dummy_table)
         assert table_dataset.exists()

diff --git a/kedro-datasets/tests/json/test_json_dataset.py b/kedro-datasets/tests/json/test_json_dataset.py
@@ -36,7 +36,7 @@ def dummy_data():
 
 class TestJSONDataset:
     def test_save_and_load(self, json_dataset, dummy_data):
-        """Test saving and reloading the data set."""
+        """Test saving and reloading the dataset."""
         json_dataset.save(dummy_data)
         reloaded = json_dataset.load()
         assert dummy_data == reloaded
@@ -45,7 +45,7 @@ def test_save_and_load(self, json_dataset, dummy_data):
 
     def test_exists(self, json_dataset, dummy_data):
         """Test `exists` method invocation for both existing and
-        nonexistent data set."""
+        nonexistent dataset."""
         assert not json_dataset.exists()
         json_dataset.save(dummy_data)
         assert json_dataset.exists()
@@ -69,7 +69,7 @@ def test_open_extra_args(self, json_dataset, fs_args):
 
     def test_load_missing_file(self, json_dataset):
         """Check the error when trying to load missing file."""
-        pattern = r"Failed while loading data from data set JSONDataset\(.*\)"
+        pattern = r"Failed while loading data from dataset JSONDataset\(.*\)"
         with pytest.raises(DatasetError, match=pattern):
             json_dataset.load()
 
@@ -125,7 +125,7 @@ def test_version_str_repr(self, load_version, save_version):
 
     def test_save_and_load(self, versioned_json_dataset, dummy_data):
         """Test that saved and reloaded data matches the original one for
-        the versioned data set."""
+        the versioned dataset."""
         versioned_json_dataset.save(dummy_data)
         reloaded = versioned_json_dataset.load()
         assert dummy_data == reloaded
@@ -137,13 +137,13 @@ def test_no_versions(self, versioned_json_dataset):
             versioned_json_dataset.load()
 
     def test_exists(self, versioned_json_dataset, dummy_data):
-        """Test `exists` method invocation for versioned data set."""
+        """Test `exists` method invocation for versioned dataset."""
         assert not versioned_json_dataset.exists()
         versioned_json_dataset.save(dummy_data)
         assert versioned_json_dataset.exists()
 
     def test_prevent_overwrite(self, versioned_json_dataset, dummy_data):
-        """Check the error when attempting to override the data set if the
+        """Check the error when attempting to override the dataset if the
         corresponding json file for a given save version already exists."""
         versioned_json_dataset.save(dummy_data)
         pattern = (