diff --git a/kedro-datasets/kedro_datasets/dask/csv_dataset.py b/kedro-datasets/kedro_datasets/dask/csv_dataset.py index 31f20680f..053da6b00 100644 --- a/kedro-datasets/kedro_datasets/dask/csv_dataset.py +++ b/kedro-datasets/kedro_datasets/dask/csv_dataset.py @@ -1,4 +1,4 @@ -"""``CSVDataset`` is a data set used to load and save data to CSV files using Dask +"""``CSVDataset`` is a dataset used to load and save data to CSV files using Dask dataframe""" from __future__ import annotations diff --git a/kedro-datasets/kedro_datasets/dask/parquet_dataset.py b/kedro-datasets/kedro_datasets/dask/parquet_dataset.py index 03082b341..1acfe7cda 100644 --- a/kedro-datasets/kedro_datasets/dask/parquet_dataset.py +++ b/kedro-datasets/kedro_datasets/dask/parquet_dataset.py @@ -1,4 +1,4 @@ -"""``ParquetDataset`` is a data set used to load and save data to parquet files using Dask +"""``ParquetDataset`` is a dataset used to load and save data to parquet files using Dask dataframe""" from __future__ import annotations diff --git a/kedro-datasets/kedro_datasets/databricks/managed_table_dataset.py b/kedro-datasets/kedro_datasets/databricks/managed_table_dataset.py index 5783cebcc..ecca89f80 100644 --- a/kedro-datasets/kedro_datasets/databricks/managed_table_dataset.py +++ b/kedro-datasets/kedro_datasets/databricks/managed_table_dataset.py @@ -242,7 +242,7 @@ def __init__( # noqa: PLR0913 database: the name of the database. (also referred to as schema). Defaults to "default". write_mode: the mode to write the data into the table. If not - present, the data set is read-only. + present, the dataset is read-only. Options are:["overwrite", "append", "upsert"]. "upsert" mode requires primary_key field to be populated. Defaults to None. diff --git a/kedro-datasets/kedro_datasets/pandas/feather_dataset.py b/kedro-datasets/kedro_datasets/pandas/feather_dataset.py index 4c1f68fa8..56e3eab25 100644 --- a/kedro-datasets/kedro_datasets/pandas/feather_dataset.py +++ b/kedro-datasets/kedro_datasets/pandas/feather_dataset.py @@ -1,4 +1,4 @@ -"""``FeatherDataset`` is a data set used to load and save data to feather files +"""``FeatherDataset`` is a dataset used to load and save data to feather files using an underlying filesystem (e.g.: local, S3, GCS). The underlying functionality is supported by pandas, so it supports all operations the pandas supports. """ diff --git a/kedro-datasets/kedro_datasets/pandas/sql_dataset.py b/kedro-datasets/kedro_datasets/pandas/sql_dataset.py index 12f71e790..e34f3a257 100644 --- a/kedro-datasets/kedro_datasets/pandas/sql_dataset.py +++ b/kedro-datasets/kedro_datasets/pandas/sql_dataset.py @@ -309,7 +309,7 @@ class SQLQueryDataset(AbstractDataset[None, pd.DataFrame]): by SQLAlchemy can be found here: https://docs.sqlalchemy.org/core/engines.html#database-urls - It does not support save method so it is a read only data set. + It does not support save method so it is a read only dataset. To save data to a SQL server use ``SQLTableDataset``. Example usage for the diff --git a/kedro-datasets/kedro_datasets/spark/spark_hive_dataset.py b/kedro-datasets/kedro_datasets/spark/spark_hive_dataset.py index 5886dd7ce..8908c0fac 100644 --- a/kedro-datasets/kedro_datasets/spark/spark_hive_dataset.py +++ b/kedro-datasets/kedro_datasets/spark/spark_hive_dataset.py @@ -16,7 +16,7 @@ class SparkHiveDataset(AbstractDataset[DataFrame, DataFrame]): """``SparkHiveDataset`` loads and saves Spark dataframes stored on Hive. - This data set also handles some incompatible file types such as using partitioned parquet on + This dataset also handles some incompatible file types such as using partitioned parquet on hive which will not normally allow upserts to existing data without a complete replacement of the existing file/partition. diff --git a/kedro-datasets/kedro_datasets_experimental/langchain/_anthropic.py b/kedro-datasets/kedro_datasets_experimental/langchain/_anthropic.py index ea082dfd9..d643706e6 100644 --- a/kedro-datasets/kedro_datasets_experimental/langchain/_anthropic.py +++ b/kedro-datasets/kedro_datasets_experimental/langchain/_anthropic.py @@ -68,7 +68,7 @@ def _describe(self) -> dict[str, Any]: return {**self.kwargs} def save(self, data: None) -> NoReturn: - raise DatasetError(f"{self.__class__.__name__} is a read only data set type") + raise DatasetError(f"{self.__class__.__name__} is a read only dataset type") def load(self) -> ChatAnthropic: return ChatAnthropic( diff --git a/kedro-datasets/kedro_datasets_experimental/langchain/_cohere.py b/kedro-datasets/kedro_datasets_experimental/langchain/_cohere.py index 38c33e48b..5b4e0eba7 100644 --- a/kedro-datasets/kedro_datasets_experimental/langchain/_cohere.py +++ b/kedro-datasets/kedro_datasets_experimental/langchain/_cohere.py @@ -70,7 +70,7 @@ def _describe(self) -> dict[str, Any]: return {**self.kwargs} def save(self, data: None) -> NoReturn: - raise DatasetError(f"{self.__class__.__name__} is a read only data set type") + raise DatasetError(f"{self.__class__.__name__} is a read only dataset type") def load(self) -> ChatCohere: return ChatCohere(cohere_api_key=self.cohere_api_key, base_url=self.cohere_api_url, **self.kwargs) diff --git a/kedro-datasets/kedro_datasets_experimental/langchain/_openai.py b/kedro-datasets/kedro_datasets_experimental/langchain/_openai.py index 952ae7eb6..2c1ad002d 100644 --- a/kedro-datasets/kedro_datasets_experimental/langchain/_openai.py +++ b/kedro-datasets/kedro_datasets_experimental/langchain/_openai.py @@ -32,7 +32,7 @@ def _describe(self) -> dict[str, Any]: return {**self.kwargs} def save(self, data: None) -> NoReturn: - raise DatasetError(f"{self.__class__.__name__} is a read only data set type") + raise DatasetError(f"{self.__class__.__name__} is a read only dataset type") def load(self) -> OPENAI_TYPE: return self.constructor( diff --git a/kedro-datasets/kedro_datasets_experimental/tests/netcdf/test_netcdf_dataset.py b/kedro-datasets/kedro_datasets_experimental/tests/netcdf/test_netcdf_dataset.py index 1526f89a5..1e657e1d4 100644 --- a/kedro-datasets/kedro_datasets_experimental/tests/netcdf/test_netcdf_dataset.py +++ b/kedro-datasets/kedro_datasets_experimental/tests/netcdf/test_netcdf_dataset.py @@ -153,7 +153,7 @@ def test_empty_credentials_load(self, bad_credentials, tmp_path): netcdf_dataset = NetCDFDataset( filepath=S3_PATH, temppath=tmp_path, credentials=bad_credentials ) - pattern = r"Failed while loading data from data set NetCDFDataset\(.+\)" + pattern = r"Failed while loading data from dataset NetCDFDataset\(.+\)" with pytest.raises(DatasetError, match=pattern): netcdf_dataset.load() @@ -165,7 +165,7 @@ def test_pass_credentials(self, mocker, tmp_path): s3_dataset = NetCDFDataset( filepath=S3_PATH, temppath=tmp_path, credentials=AWS_CREDENTIALS ) - pattern = r"Failed while loading data from data set NetCDFDataset\(.+\)" + pattern = r"Failed while loading data from dataset NetCDFDataset\(.+\)" with pytest.raises(DatasetError, match=pattern): s3_dataset.load() diff --git a/kedro-datasets/kedro_datasets_experimental/tests/prophet/test_prophet_dataset.py b/kedro-datasets/kedro_datasets_experimental/tests/prophet/test_prophet_dataset.py index 88510a99b..668ae544c 100644 --- a/kedro-datasets/kedro_datasets_experimental/tests/prophet/test_prophet_dataset.py +++ b/kedro-datasets/kedro_datasets_experimental/tests/prophet/test_prophet_dataset.py @@ -75,7 +75,7 @@ def test_open_extra_args(self, prophet_model_dataset, fs_args): def test_load_missing_file(self, prophet_model_dataset): """Check the error when trying to load missing file.""" - pattern = r"Failed while loading data from data set ProphetModelDataset\(.*\)" + pattern = r"Failed while loading data from dataset ProphetModelDataset\(.*\)" with pytest.raises(DatasetError, match=pattern): prophet_model_dataset.load() diff --git a/kedro-datasets/kedro_datasets_experimental/tests/rioxarray/test_geotiff_dataset.py b/kedro-datasets/kedro_datasets_experimental/tests/rioxarray/test_geotiff_dataset.py index 7f217eee6..51dcc8596 100644 --- a/kedro-datasets/kedro_datasets_experimental/tests/rioxarray/test_geotiff_dataset.py +++ b/kedro-datasets/kedro_datasets_experimental/tests/rioxarray/test_geotiff_dataset.py @@ -65,7 +65,7 @@ def cog_geotiff_dataset(cog_file_path, save_args) -> GeoTIFFDataset: def test_load_cog_geotiff(cog_geotiff_dataset): - """Test loading cloud optimised geotiff reloading the data set.""" + """Test loading cloud optimised geotiff reloading the dataset.""" loaded_xr = cog_geotiff_dataset.load() assert isinstance(loaded_xr.rio.crs, CRS) assert isinstance(loaded_xr, xr.DataArray) @@ -144,7 +144,7 @@ def test_load_not_tif(): def test_exists(tmp_path, synthetic_xarray): """Test `exists` method invocation for both existing and - nonexistent data set.""" + nonexistent dataset.""" dataset = GeoTIFFDataset(filepath=str(tmp_path / "tmp.tif")) assert not dataset.exists() dataset.save(synthetic_xarray) @@ -155,7 +155,7 @@ def test_exists(tmp_path, synthetic_xarray): "synthetic_xarray", ]) def test_save_and_load_geotiff(tmp_path, request, xarray_fixture): - """Test saving and reloading the data set.""" + """Test saving and reloading the dataset.""" xarray_data = request.getfixturevalue(xarray_fixture) dataset = GeoTIFFDataset(filepath=str(tmp_path / "tmp.tif")) dataset.save(xarray_data) @@ -176,6 +176,6 @@ def test_load_missing_file(tmp_path): """Check the error when trying to load missing file.""" dataset = GeoTIFFDataset(filepath=str(tmp_path / "tmp.tif")) assert not dataset._exists(), "File unexpectedly exists" - pattern = r"Failed while loading data from data set GeoTIFFDataset\(.*\)" + pattern = r"Failed while loading data from dataset GeoTIFFDataset\(.*\)" with pytest.raises(DatasetError, match=pattern): dataset.load() diff --git a/kedro-datasets/tests/biosequence/test_biosequence_dataset.py b/kedro-datasets/tests/biosequence/test_biosequence_dataset.py index 7566a559f..b5c35bd8f 100644 --- a/kedro-datasets/tests/biosequence/test_biosequence_dataset.py +++ b/kedro-datasets/tests/biosequence/test_biosequence_dataset.py @@ -38,7 +38,7 @@ def dummy_data(): class TestBioSequenceDataset: def test_save_and_load(self, biosequence_dataset, dummy_data): - """Test saving and reloading the data set.""" + """Test saving and reloading the dataset.""" biosequence_dataset.save(dummy_data) reloaded = biosequence_dataset.load() assert dummy_data[0].id, reloaded[0].id @@ -49,7 +49,7 @@ def test_save_and_load(self, biosequence_dataset, dummy_data): def test_exists(self, biosequence_dataset, dummy_data): """Test `exists` method invocation for both existing and - nonexistent data set.""" + nonexistent dataset.""" assert not biosequence_dataset.exists() biosequence_dataset.save(dummy_data) assert biosequence_dataset.exists() @@ -75,7 +75,7 @@ def test_open_extra_args(self, biosequence_dataset, fs_args): def test_load_missing_file(self, biosequence_dataset): """Check the error when trying to load missing file.""" - pattern = r"Failed while loading data from data set BioSequenceDataset\(.*\)" + pattern = r"Failed while loading data from dataset BioSequenceDataset\(.*\)" with pytest.raises(DatasetError, match=pattern): biosequence_dataset.load() diff --git a/kedro-datasets/tests/dask/test_csv_dataset.py b/kedro-datasets/tests/dask/test_csv_dataset.py index 898606ad3..f8fe8773b 100644 --- a/kedro-datasets/tests/dask/test_csv_dataset.py +++ b/kedro-datasets/tests/dask/test_csv_dataset.py @@ -84,7 +84,7 @@ def test_incorrect_credentials_load(self): @pytest.mark.parametrize("bad_credentials", [{"key": None, "secret": None}]) def test_empty_credentials_load(self, bad_credentials): csv_dataset = CSVDataset(filepath=S3_PATH, credentials=bad_credentials) - pattern = r"Failed while loading data from data set CSVDataset\(.+\)" + pattern = r"Failed while loading data from dataset CSVDataset\(.+\)" with pytest.raises(DatasetError, match=pattern): csv_dataset.load().compute() @@ -94,7 +94,7 @@ def test_pass_credentials(self, mocker): client instantiation on creating S3 connection.""" client_mock = mocker.patch("botocore.session.Session.create_client") s3_dataset = CSVDataset(filepath=S3_PATH, credentials=AWS_CREDENTIALS) - pattern = r"Failed while loading data from data set CSVDataset\(.+\)" + pattern = r"Failed while loading data from dataset CSVDataset\(.+\)" with pytest.raises(DatasetError, match=pattern): s3_dataset.load().compute() @@ -121,7 +121,7 @@ def test_load_data(self, s3_dataset, dummy_dd_dataframe, mocked_s3_object): def test_exists(self, s3_dataset, dummy_dd_dataframe, mocked_s3_bucket): """Test `exists` method invocation for both existing and - nonexistent data set.""" + nonexistent dataset.""" assert not s3_dataset.exists() s3_dataset.save(dummy_dd_dataframe) assert s3_dataset.exists() diff --git a/kedro-datasets/tests/dask/test_parquet_dataset.py b/kedro-datasets/tests/dask/test_parquet_dataset.py index 5babced77..72d348e6b 100644 --- a/kedro-datasets/tests/dask/test_parquet_dataset.py +++ b/kedro-datasets/tests/dask/test_parquet_dataset.py @@ -87,7 +87,7 @@ def test_incorrect_credentials_load(self): @pytest.mark.parametrize("bad_credentials", [{"key": None, "secret": None}]) def test_empty_credentials_load(self, bad_credentials): parquet_dataset = ParquetDataset(filepath=S3_PATH, credentials=bad_credentials) - pattern = r"Failed while loading data from data set ParquetDataset\(.+\)" + pattern = r"Failed while loading data from dataset ParquetDataset\(.+\)" with pytest.raises(DatasetError, match=pattern): parquet_dataset.load().compute() @@ -97,7 +97,7 @@ def test_pass_credentials(self, mocker): client instantiation on creating S3 connection.""" client_mock = mocker.patch("botocore.session.Session.create_client") s3_dataset = ParquetDataset(filepath=S3_PATH, credentials=AWS_CREDENTIALS) - pattern = r"Failed while loading data from data set ParquetDataset\(.+\)" + pattern = r"Failed while loading data from dataset ParquetDataset\(.+\)" with pytest.raises(DatasetError, match=pattern): s3_dataset.load().compute() @@ -124,7 +124,7 @@ def test_load_data(self, s3_dataset, dummy_dd_dataframe, mocked_s3_object): def test_exists(self, s3_dataset, dummy_dd_dataframe, mocked_s3_bucket): """Test `exists` method invocation for both existing and - nonexistent data set.""" + nonexistent dataset.""" assert not s3_dataset.exists() s3_dataset.save(dummy_dd_dataframe) assert s3_dataset.exists() diff --git a/kedro-datasets/tests/email/test_message_dataset.py b/kedro-datasets/tests/email/test_message_dataset.py index d35fa5f8d..f8cd6ec67 100644 --- a/kedro-datasets/tests/email/test_message_dataset.py +++ b/kedro-datasets/tests/email/test_message_dataset.py @@ -50,7 +50,7 @@ def dummy_msg(): class TestEmailMessageDataset: def test_save_and_load(self, message_dataset, dummy_msg): - """Test saving and reloading the data set.""" + """Test saving and reloading the dataset.""" message_dataset.save(dummy_msg) reloaded = message_dataset.load() assert dummy_msg.__dict__ == reloaded.__dict__ @@ -59,7 +59,7 @@ def test_save_and_load(self, message_dataset, dummy_msg): def test_exists(self, message_dataset, dummy_msg): """Test `exists` method invocation for both existing and - nonexistent data set.""" + nonexistent dataset.""" assert not message_dataset.exists() message_dataset.save(dummy_msg) assert message_dataset.exists() @@ -91,7 +91,7 @@ def test_open_extra_args(self, message_dataset, fs_args): def test_load_missing_file(self, message_dataset): """Check the error when trying to load missing file.""" - pattern = r"Failed while loading data from data set EmailMessageDataset\(.*\)" + pattern = r"Failed while loading data from dataset EmailMessageDataset\(.*\)" with pytest.raises(DatasetError, match=pattern): message_dataset.load() @@ -149,7 +149,7 @@ def test_version_str_repr(self, load_version, save_version): def test_save_and_load(self, versioned_message_dataset, dummy_msg): """Test that saved and reloaded data matches the original one for - the versioned data set.""" + the versioned dataset.""" versioned_message_dataset.save(dummy_msg) reloaded = versioned_message_dataset.load() assert dummy_msg.__dict__ == reloaded.__dict__ @@ -161,13 +161,13 @@ def test_no_versions(self, versioned_message_dataset): versioned_message_dataset.load() def test_exists(self, versioned_message_dataset, dummy_msg): - """Test `exists` method invocation for versioned data set.""" + """Test `exists` method invocation for versioned dataset.""" assert not versioned_message_dataset.exists() versioned_message_dataset.save(dummy_msg) assert versioned_message_dataset.exists() def test_prevent_overwrite(self, versioned_message_dataset, dummy_msg): - """Check the error when attempting to override the data set if the + """Check the error when attempting to override the dataset if the corresponding text file for a given save version already exists.""" versioned_message_dataset.save(dummy_msg) pattern = ( diff --git a/kedro-datasets/tests/geopandas/test_geojson_dataset.py b/kedro-datasets/tests/geopandas/test_geojson_dataset.py index d2779e5c2..9c6cb49fe 100644 --- a/kedro-datasets/tests/geopandas/test_geojson_dataset.py +++ b/kedro-datasets/tests/geopandas/test_geojson_dataset.py @@ -72,7 +72,7 @@ def test_save_and_load(self, geojson_dataset, dummy_dataframe): @pytest.mark.parametrize("geojson_dataset", [{"index": False}], indirect=True) def test_load_missing_file(self, geojson_dataset): """Check the error while trying to load from missing source.""" - pattern = r"Failed while loading data from data set GeoJSONDataset" + pattern = r"Failed while loading data from dataset GeoJSONDataset" with pytest.raises(DatasetError, match=pattern): geojson_dataset.load() @@ -156,7 +156,7 @@ def test_version_str_repr(self, load_version, save_version): def test_save_and_load(self, versioned_geojson_dataset, dummy_dataframe): """Test that saved and reloaded data matches the original one for - the versioned data set.""" + the versioned dataset.""" versioned_geojson_dataset.save(dummy_dataframe) reloaded_df = versioned_geojson_dataset.load() assert_frame_equal(reloaded_df, dummy_dataframe) @@ -168,13 +168,13 @@ def test_no_versions(self, versioned_geojson_dataset): versioned_geojson_dataset.load() def test_exists(self, versioned_geojson_dataset, dummy_dataframe): - """Test `exists` method invocation for versioned data set.""" + """Test `exists` method invocation for versioned dataset.""" assert not versioned_geojson_dataset.exists() versioned_geojson_dataset.save(dummy_dataframe) assert versioned_geojson_dataset.exists() def test_prevent_override(self, versioned_geojson_dataset, dummy_dataframe): - """Check the error when attempt to override the same data set + """Check the error when attempt to override the same dataset version.""" versioned_geojson_dataset.save(dummy_dataframe) pattern = ( diff --git a/kedro-datasets/tests/holoviews/test_holoviews_writer.py b/kedro-datasets/tests/holoviews/test_holoviews_writer.py index 1426b2e28..94d722d1f 100644 --- a/kedro-datasets/tests/holoviews/test_holoviews_writer.py +++ b/kedro-datasets/tests/holoviews/test_holoviews_writer.py @@ -140,7 +140,7 @@ def test_version_str_repr(self, hv_writer, versioned_hv_writer): assert "save_args" in str(versioned_hv_writer) def test_prevent_overwrite(self, dummy_hv_object, versioned_hv_writer): - """Check the error when attempting to override the data set if the + """Check the error when attempting to override the dataset if the corresponding file for a given save version already exists.""" versioned_hv_writer.save(dummy_hv_object) pattern = ( @@ -185,7 +185,7 @@ def test_load_not_supported(self, versioned_hv_writer): versioned_hv_writer.load() def test_exists(self, versioned_hv_writer, dummy_hv_object): - """Test `exists` method invocation for versioned data set.""" + """Test `exists` method invocation for versioned dataset.""" assert not versioned_hv_writer.exists() versioned_hv_writer.save(dummy_hv_object) assert versioned_hv_writer.exists() diff --git a/kedro-datasets/tests/ibis/test_table_dataset.py b/kedro-datasets/tests/ibis/test_table_dataset.py index b7ee7baca..644bbc127 100644 --- a/kedro-datasets/tests/ibis/test_table_dataset.py +++ b/kedro-datasets/tests/ibis/test_table_dataset.py @@ -52,7 +52,7 @@ def dummy_table(table_dataset_from_csv): class TestTableDataset: def test_save_and_load(self, table_dataset, dummy_table, database): - """Test saving and reloading the data set.""" + """Test saving and reloading the dataset.""" table_dataset.save(dummy_table) reloaded = table_dataset.load() assert_frame_equal(dummy_table.execute(), reloaded.execute()) @@ -64,7 +64,7 @@ def test_save_and_load(self, table_dataset, dummy_table, database): def test_exists(self, table_dataset, dummy_table): """Test `exists` method invocation for both existing and - nonexistent data set.""" + nonexistent dataset.""" assert not table_dataset.exists() table_dataset.save(dummy_table) assert table_dataset.exists() diff --git a/kedro-datasets/tests/json/test_json_dataset.py b/kedro-datasets/tests/json/test_json_dataset.py index 52075266d..beaafc343 100644 --- a/kedro-datasets/tests/json/test_json_dataset.py +++ b/kedro-datasets/tests/json/test_json_dataset.py @@ -36,7 +36,7 @@ def dummy_data(): class TestJSONDataset: def test_save_and_load(self, json_dataset, dummy_data): - """Test saving and reloading the data set.""" + """Test saving and reloading the dataset.""" json_dataset.save(dummy_data) reloaded = json_dataset.load() assert dummy_data == reloaded @@ -45,7 +45,7 @@ def test_save_and_load(self, json_dataset, dummy_data): def test_exists(self, json_dataset, dummy_data): """Test `exists` method invocation for both existing and - nonexistent data set.""" + nonexistent dataset.""" assert not json_dataset.exists() json_dataset.save(dummy_data) assert json_dataset.exists() @@ -69,7 +69,7 @@ def test_open_extra_args(self, json_dataset, fs_args): def test_load_missing_file(self, json_dataset): """Check the error when trying to load missing file.""" - pattern = r"Failed while loading data from data set JSONDataset\(.*\)" + pattern = r"Failed while loading data from dataset JSONDataset\(.*\)" with pytest.raises(DatasetError, match=pattern): json_dataset.load() @@ -125,7 +125,7 @@ def test_version_str_repr(self, load_version, save_version): def test_save_and_load(self, versioned_json_dataset, dummy_data): """Test that saved and reloaded data matches the original one for - the versioned data set.""" + the versioned dataset.""" versioned_json_dataset.save(dummy_data) reloaded = versioned_json_dataset.load() assert dummy_data == reloaded @@ -137,13 +137,13 @@ def test_no_versions(self, versioned_json_dataset): versioned_json_dataset.load() def test_exists(self, versioned_json_dataset, dummy_data): - """Test `exists` method invocation for versioned data set.""" + """Test `exists` method invocation for versioned dataset.""" assert not versioned_json_dataset.exists() versioned_json_dataset.save(dummy_data) assert versioned_json_dataset.exists() def test_prevent_overwrite(self, versioned_json_dataset, dummy_data): - """Check the error when attempting to override the data set if the + """Check the error when attempting to override the dataset if the corresponding json file for a given save version already exists.""" versioned_json_dataset.save(dummy_data) pattern = ( diff --git a/kedro-datasets/tests/matlab/test_matlab_dataset.py b/kedro-datasets/tests/matlab/test_matlab_dataset.py index 331702db9..284a0892f 100644 --- a/kedro-datasets/tests/matlab/test_matlab_dataset.py +++ b/kedro-datasets/tests/matlab/test_matlab_dataset.py @@ -36,7 +36,7 @@ def dummy_data(): class TestMatlabDataset: def test_save_and_load(self, matlab_dataset, dummy_data): - """Test saving and reloading the data set.""" + """Test saving and reloading the dataset.""" matlab_dataset.save(dummy_data) reloaded = matlab_dataset.load() assert (dummy_data == reloaded["data"]).all() @@ -45,7 +45,7 @@ def test_save_and_load(self, matlab_dataset, dummy_data): def test_exists(self, matlab_dataset, dummy_data): """Test `exists` method invocation for both existing and - nonexistent data set.""" + nonexistent dataset.""" assert not matlab_dataset.exists() matlab_dataset.save(dummy_data) assert matlab_dataset.exists() @@ -69,7 +69,7 @@ def test_open_extra_args(self, matlab_dataset, fs_args): def test_load_missing_file(self, matlab_dataset): """Check the error when trying to load missing file.""" - pattern = r"Failed while loading data from data set MatlabDataset\(.*\)" + pattern = r"Failed while loading data from dataset MatlabDataset\(.*\)" with pytest.raises(DatasetError, match=pattern): matlab_dataset.load() @@ -125,7 +125,7 @@ def test_version_str_repr(self, load_version, save_version): def test_save_and_load(self, versioned_matlab_dataset, dummy_data): """Test that saved and reloaded data matches the original one for - the versioned data set.""" + the versioned dataset.""" versioned_matlab_dataset.save(dummy_data) reloaded = versioned_matlab_dataset.load() assert (dummy_data == reloaded["data"]).all() @@ -137,13 +137,13 @@ def test_no_versions(self, versioned_matlab_dataset): versioned_matlab_dataset.load() def test_exists(self, versioned_matlab_dataset, dummy_data): - """Test `exists` method invocation for versioned data set.""" + """Test `exists` method invocation for versioned dataset.""" assert not versioned_matlab_dataset.exists() versioned_matlab_dataset.save(dummy_data) assert versioned_matlab_dataset.exists() def test_prevent_overwrite(self, versioned_matlab_dataset, dummy_data): - """Check the error when attempting to override the data set if the + """Check the error when attempting to override the dataset if the corresponding json file for a given save version already exists.""" versioned_matlab_dataset.save(dummy_data) pattern = ( diff --git a/kedro-datasets/tests/matplotlib/test_matplotlib_writer.py b/kedro-datasets/tests/matplotlib/test_matplotlib_writer.py index 8b58ed9fc..4cdb58e92 100644 --- a/kedro-datasets/tests/matplotlib/test_matplotlib_writer.py +++ b/kedro-datasets/tests/matplotlib/test_matplotlib_writer.py @@ -282,7 +282,7 @@ def test_version_str_repr(self, load_version, save_version): assert ver_str in str(chart_versioned) def test_prevent_overwrite(self, mock_single_plot, versioned_plot_writer): - """Check the error when attempting to override the data set if the + """Check the error when attempting to override the dataset if the corresponding matplotlib file for a given save version already exists.""" versioned_plot_writer.save(mock_single_plot) pattern = ( @@ -341,13 +341,13 @@ def test_load_not_supported(self, versioned_plot_writer): versioned_plot_writer.load() def test_exists(self, versioned_plot_writer, mock_single_plot): - """Test `exists` method invocation for versioned data set.""" + """Test `exists` method invocation for versioned dataset.""" assert not versioned_plot_writer.exists() versioned_plot_writer.save(mock_single_plot) assert versioned_plot_writer.exists() def test_exists_multiple(self, versioned_plot_writer, mock_list_plot): - """Test `exists` method invocation for versioned data set.""" + """Test `exists` method invocation for versioned dataset.""" assert not versioned_plot_writer.exists() versioned_plot_writer.save(mock_list_plot) assert versioned_plot_writer.exists() diff --git a/kedro-datasets/tests/networkx/test_gml_dataset.py b/kedro-datasets/tests/networkx/test_gml_dataset.py index 6d2d3cea7..8caf55654 100644 --- a/kedro-datasets/tests/networkx/test_gml_dataset.py +++ b/kedro-datasets/tests/networkx/test_gml_dataset.py @@ -51,7 +51,7 @@ def dummy_graph_data(): class TestGMLDataset: def test_save_and_load(self, gml_dataset, dummy_graph_data): - """Test saving and reloading the data set.""" + """Test saving and reloading the dataset.""" gml_dataset.save(dummy_graph_data) reloaded = gml_dataset.load() assert dummy_graph_data.nodes(data=True) == reloaded.nodes(data=True) @@ -60,7 +60,7 @@ def test_save_and_load(self, gml_dataset, dummy_graph_data): def test_load_missing_file(self, gml_dataset): """Check the error when trying to load missing file.""" - pattern = r"Failed while loading data from data set GMLDataset\(.*\)" + pattern = r"Failed while loading data from dataset GMLDataset\(.*\)" with pytest.raises(DatasetError, match=pattern): assert gml_dataset.load() @@ -100,7 +100,7 @@ def test_catalog_release(self, mocker): class TestGMLDatasetVersioned: def test_save_and_load(self, versioned_gml_dataset, dummy_graph_data): """Test that saved and reloaded data matches the original one for - the versioned data set.""" + the versioned dataset.""" versioned_gml_dataset.save(dummy_graph_data) reloaded = versioned_gml_dataset.load() assert dummy_graph_data.nodes(data=True) == reloaded.nodes(data=True) @@ -114,13 +114,13 @@ def test_no_versions(self, versioned_gml_dataset): versioned_gml_dataset.load() def test_exists(self, versioned_gml_dataset, dummy_graph_data): - """Test `exists` method invocation for versioned data set.""" + """Test `exists` method invocation for versioned dataset.""" assert not versioned_gml_dataset.exists() versioned_gml_dataset.save(dummy_graph_data) assert versioned_gml_dataset.exists() def test_prevent_override(self, versioned_gml_dataset, dummy_graph_data): - """Check the error when attempt to override the same data set + """Check the error when attempt to override the same dataset version.""" versioned_gml_dataset.save(dummy_graph_data) pattern = ( diff --git a/kedro-datasets/tests/networkx/test_graphml_dataset.py b/kedro-datasets/tests/networkx/test_graphml_dataset.py index acffd14b0..659c3d55b 100644 --- a/kedro-datasets/tests/networkx/test_graphml_dataset.py +++ b/kedro-datasets/tests/networkx/test_graphml_dataset.py @@ -51,7 +51,7 @@ def dummy_graph_data(): class TestGraphMLDataset: def test_save_and_load(self, graphml_dataset, dummy_graph_data): - """Test saving and reloading the data set.""" + """Test saving and reloading the dataset.""" graphml_dataset.save(dummy_graph_data) reloaded = graphml_dataset.load() assert dummy_graph_data.nodes(data=True) == reloaded.nodes(data=True) @@ -60,7 +60,7 @@ def test_save_and_load(self, graphml_dataset, dummy_graph_data): def test_load_missing_file(self, graphml_dataset): """Check the error when trying to load missing file.""" - pattern = r"Failed while loading data from data set GraphMLDataset\(.*\)" + pattern = r"Failed while loading data from dataset GraphMLDataset\(.*\)" with pytest.raises(DatasetError, match=pattern): assert graphml_dataset.load() @@ -100,7 +100,7 @@ def test_catalog_release(self, mocker): class TestGraphMLDatasetVersioned: def test_save_and_load(self, versioned_graphml_dataset, dummy_graph_data): """Test that saved and reloaded data matches the original one for - the versioned data set.""" + the versioned dataset.""" versioned_graphml_dataset.save(dummy_graph_data) reloaded = versioned_graphml_dataset.load() assert dummy_graph_data.nodes(data=True) == reloaded.nodes(data=True) @@ -114,13 +114,13 @@ def test_no_versions(self, versioned_graphml_dataset): versioned_graphml_dataset.load() def test_exists(self, versioned_graphml_dataset, dummy_graph_data): - """Test `exists` method invocation for versioned data set.""" + """Test `exists` method invocation for versioned dataset.""" assert not versioned_graphml_dataset.exists() versioned_graphml_dataset.save(dummy_graph_data) assert versioned_graphml_dataset.exists() def test_prevent_override(self, versioned_graphml_dataset, dummy_graph_data): - """Check the error when attempt to override the same data set + """Check the error when attempt to override the same dataset version.""" versioned_graphml_dataset.save(dummy_graph_data) pattern = ( diff --git a/kedro-datasets/tests/networkx/test_json_dataset.py b/kedro-datasets/tests/networkx/test_json_dataset.py index 53039c0c9..d61043126 100644 --- a/kedro-datasets/tests/networkx/test_json_dataset.py +++ b/kedro-datasets/tests/networkx/test_json_dataset.py @@ -51,7 +51,7 @@ def dummy_graph_data(): class TestJSONDataset: def test_save_and_load(self, json_dataset, dummy_graph_data): - """Test saving and reloading the data set.""" + """Test saving and reloading the dataset.""" json_dataset.save(dummy_graph_data) reloaded = json_dataset.load() assert dummy_graph_data.nodes(data=True) == reloaded.nodes(data=True) @@ -60,7 +60,7 @@ def test_save_and_load(self, json_dataset, dummy_graph_data): def test_load_missing_file(self, json_dataset): """Check the error when trying to load missing file.""" - pattern = r"Failed while loading data from data set JSONDataset\(.*\)" + pattern = r"Failed while loading data from dataset JSONDataset\(.*\)" with pytest.raises(DatasetError, match=pattern): assert json_dataset.load() @@ -140,7 +140,7 @@ def test_catalog_release(self, mocker): class TestJSONDatasetVersioned: def test_save_and_load(self, versioned_json_dataset, dummy_graph_data): """Test that saved and reloaded data matches the original one for - the versioned data set.""" + the versioned dataset.""" versioned_json_dataset.save(dummy_graph_data) reloaded = versioned_json_dataset.load() assert dummy_graph_data.nodes(data=True) == reloaded.nodes(data=True) @@ -152,13 +152,13 @@ def test_no_versions(self, versioned_json_dataset): versioned_json_dataset.load() def test_exists(self, versioned_json_dataset, dummy_graph_data): - """Test `exists` method invocation for versioned data set.""" + """Test `exists` method invocation for versioned dataset.""" assert not versioned_json_dataset.exists() versioned_json_dataset.save(dummy_graph_data) assert versioned_json_dataset.exists() def test_prevent_override(self, versioned_json_dataset, dummy_graph_data): - """Check the error when attempt to override the same data set + """Check the error when attempt to override the same dataset version.""" versioned_json_dataset.save(dummy_graph_data) pattern = ( diff --git a/kedro-datasets/tests/pandas/test_csv_dataset.py b/kedro-datasets/tests/pandas/test_csv_dataset.py index 6a5c52464..449de4cfd 100644 --- a/kedro-datasets/tests/pandas/test_csv_dataset.py +++ b/kedro-datasets/tests/pandas/test_csv_dataset.py @@ -87,14 +87,14 @@ def mocked_csv_in_s3(mocked_s3_bucket, mocked_dataframe): class TestCSVDataset: def test_save_and_load(self, csv_dataset, dummy_dataframe): - """Test saving and reloading the data set.""" + """Test saving and reloading the dataset.""" csv_dataset.save(dummy_dataframe) reloaded = csv_dataset.load() assert_frame_equal(dummy_dataframe, reloaded) def test_exists(self, csv_dataset, dummy_dataframe): """Test `exists` method invocation for both existing and - nonexistent data set.""" + nonexistent dataset.""" assert not csv_dataset.exists() csv_dataset.save(dummy_dataframe) assert csv_dataset.exists() @@ -195,7 +195,7 @@ def test_preview(self, csv_dataset, dummy_dataframe, nrows, expected): def test_load_missing_file(self, csv_dataset): """Check the error when trying to load missing file.""" - pattern = r"Failed while loading data from data set CSVDataset\(.*\)" + pattern = r"Failed while loading data from dataset CSVDataset\(.*\)" with pytest.raises(DatasetError, match=pattern): csv_dataset.load() @@ -258,7 +258,7 @@ def test_version_str_repr(self, load_version, save_version): def test_save_and_load(self, versioned_csv_dataset, dummy_dataframe): """Test that saved and reloaded data matches the original one for - the versioned data set.""" + the versioned dataset.""" versioned_csv_dataset.save(dummy_dataframe) reloaded_df = versioned_csv_dataset.load() assert_frame_equal(dummy_dataframe, reloaded_df) @@ -337,13 +337,13 @@ def test_no_versions(self, versioned_csv_dataset): versioned_csv_dataset.load() def test_exists(self, versioned_csv_dataset, dummy_dataframe): - """Test `exists` method invocation for versioned data set.""" + """Test `exists` method invocation for versioned dataset.""" assert not versioned_csv_dataset.exists() versioned_csv_dataset.save(dummy_dataframe) assert versioned_csv_dataset.exists() def test_prevent_overwrite(self, versioned_csv_dataset, dummy_dataframe): - """Check the error when attempting to override the data set if the + """Check the error when attempting to override the dataset if the corresponding CSV file for a given save version already exists.""" versioned_csv_dataset.save(dummy_dataframe) pattern = ( diff --git a/kedro-datasets/tests/pandas/test_excel_dataset.py b/kedro-datasets/tests/pandas/test_excel_dataset.py index 16f0c8605..ba950d10a 100644 --- a/kedro-datasets/tests/pandas/test_excel_dataset.py +++ b/kedro-datasets/tests/pandas/test_excel_dataset.py @@ -58,7 +58,7 @@ def another_dummy_dataframe(): class TestExcelDataset: def test_save_and_load(self, excel_dataset, dummy_dataframe): - """Test saving and reloading the data set.""" + """Test saving and reloading the dataset.""" excel_dataset.save(dummy_dataframe) reloaded = excel_dataset.load() assert_frame_equal(dummy_dataframe, reloaded) @@ -66,7 +66,7 @@ def test_save_and_load(self, excel_dataset, dummy_dataframe): def test_save_and_load_multiple_sheets( self, excel_multisheet_dataset, dummy_dataframe, another_dummy_dataframe ): - """Test saving and reloading the data set with multiple sheets.""" + """Test saving and reloading the dataset with multiple sheets.""" dummy_multisheet = { "sheet 1": dummy_dataframe, "sheet 2": another_dummy_dataframe, @@ -78,7 +78,7 @@ def test_save_and_load_multiple_sheets( def test_exists(self, excel_dataset, dummy_dataframe): """Test `exists` method invocation for both existing and - nonexistent data set.""" + nonexistent dataset.""" assert not excel_dataset.exists() excel_dataset.save(dummy_dataframe) assert excel_dataset.exists() @@ -169,7 +169,7 @@ def test_preview(self, excel_dataset, dummy_dataframe, nrows, expected): def test_load_missing_file(self, excel_dataset): """Check the error when trying to load missing file.""" - pattern = r"Failed while loading data from data set ExcelDataset\(.*\)" + pattern = r"Failed while loading data from dataset ExcelDataset\(.*\)" with pytest.raises(DatasetError, match=pattern): excel_dataset.load() @@ -238,7 +238,7 @@ def test_version_str_repr(self, load_version, save_version): def test_save_and_load(self, versioned_excel_dataset, dummy_dataframe): """Test that saved and reloaded data matches the original one for - the versioned data set.""" + the versioned dataset.""" versioned_excel_dataset.save(dummy_dataframe) reloaded_df = versioned_excel_dataset.load() assert_frame_equal(dummy_dataframe, reloaded_df) @@ -264,13 +264,13 @@ def test_versioning_not_supported_in_append_mode( ) def test_exists(self, versioned_excel_dataset, dummy_dataframe): - """Test `exists` method invocation for versioned data set.""" + """Test `exists` method invocation for versioned dataset.""" assert not versioned_excel_dataset.exists() versioned_excel_dataset.save(dummy_dataframe) assert versioned_excel_dataset.exists() def test_prevent_overwrite(self, versioned_excel_dataset, dummy_dataframe): - """Check the error when attempting to override the data set if the + """Check the error when attempting to override the dataset if the corresponding Excel file for a given save version already exists.""" versioned_excel_dataset.save(dummy_dataframe) pattern = ( diff --git a/kedro-datasets/tests/pandas/test_feather_dataset.py b/kedro-datasets/tests/pandas/test_feather_dataset.py index 38d1f0e31..5c2ef8190 100644 --- a/kedro-datasets/tests/pandas/test_feather_dataset.py +++ b/kedro-datasets/tests/pandas/test_feather_dataset.py @@ -38,14 +38,14 @@ def dummy_dataframe(): class TestFeatherDataset: def test_save_and_load(self, feather_dataset, dummy_dataframe): - """Test saving and reloading the data set.""" + """Test saving and reloading the dataset.""" feather_dataset.save(dummy_dataframe) reloaded = feather_dataset.load() assert_frame_equal(dummy_dataframe, reloaded) def test_exists(self, feather_dataset, dummy_dataframe): """Test `exists` method invocation for both existing and - nonexistent data set.""" + nonexistent dataset.""" assert not feather_dataset.exists() feather_dataset.save(dummy_dataframe) assert feather_dataset.exists() @@ -92,7 +92,7 @@ def test_storage_options_dropped(self, load_args, save_args, caplog, tmp_path): def test_load_missing_file(self, feather_dataset): """Check the error when trying to load missing file.""" - pattern = r"Failed while loading data from data set FeatherDataset\(.*\)" + pattern = r"Failed while loading data from dataset FeatherDataset\(.*\)" with pytest.raises(DatasetError, match=pattern): feather_dataset.load() @@ -154,7 +154,7 @@ def test_version_str_repr(self, load_version, save_version): def test_save_and_load(self, versioned_feather_dataset, dummy_dataframe): """Test that saved and reloaded data matches the original one for - the versioned data set.""" + the versioned dataset.""" versioned_feather_dataset.save(dummy_dataframe) reloaded_df = versioned_feather_dataset.load() assert_frame_equal(dummy_dataframe, reloaded_df) @@ -166,13 +166,13 @@ def test_no_versions(self, versioned_feather_dataset): versioned_feather_dataset.load() def test_exists(self, versioned_feather_dataset, dummy_dataframe): - """Test `exists` method invocation for versioned data set.""" + """Test `exists` method invocation for versioned dataset.""" assert not versioned_feather_dataset.exists() versioned_feather_dataset.save(dummy_dataframe) assert versioned_feather_dataset.exists() def test_prevent_overwrite(self, versioned_feather_dataset, dummy_dataframe): - """Check the error when attempting to overwrite the data set if the + """Check the error when attempting to overwrite the dataset if the corresponding feather file for a given save version already exists.""" versioned_feather_dataset.save(dummy_dataframe) pattern = ( diff --git a/kedro-datasets/tests/pandas/test_gbq_dataset.py b/kedro-datasets/tests/pandas/test_gbq_dataset.py index 63095b74e..19767f15b 100644 --- a/kedro-datasets/tests/pandas/test_gbq_dataset.py +++ b/kedro-datasets/tests/pandas/test_gbq_dataset.py @@ -94,7 +94,7 @@ def test_save_extra_params(self, gbq_dataset, save_args): def test_load_missing_file(self, gbq_dataset, mocker): """Check the error when trying to load missing table.""" - pattern = r"Failed while loading data from data set GBQTableDataset\(.*\)" + pattern = r"Failed while loading data from dataset GBQTableDataset\(.*\)" mocked_read_gbq = mocker.patch( "kedro_datasets.pandas.gbq_dataset.pd_gbq.read_gbq" ) @@ -121,7 +121,7 @@ def test_invalid_location(self, save_args, load_args): @pytest.mark.parametrize("save_args", [{"option1": "value1"}], indirect=True) @pytest.mark.parametrize("load_args", [{"option2": "value2"}], indirect=True) def test_str_representation(self, gbq_dataset, save_args, load_args): - """Test string representation of the data set instance.""" + """Test string representation of the dataset instance.""" str_repr = str(gbq_dataset) assert "GBQTableDataset" in str_repr assert TABLE_NAME in str_repr @@ -132,7 +132,7 @@ def test_str_representation(self, gbq_dataset, save_args, load_args): assert k in str_repr def test_save_load_data(self, gbq_dataset, dummy_dataframe, mocker): - """Test saving and reloading the data set.""" + """Test saving and reloading the dataset.""" sql = f"select * from {DATASET}.{TABLE_NAME}" table_id = f"{DATASET}.{TABLE_NAME}" mocked_to_gbq = mocker.patch("kedro_datasets.pandas.gbq_dataset.pd_gbq.to_gbq") @@ -161,7 +161,7 @@ def test_save_load_data(self, gbq_dataset, dummy_dataframe, mocker): "load_args", [{"query_or_table": "Select 1"}], indirect=True ) def test_read_gbq_with_query(self, gbq_dataset, dummy_dataframe, mocker, load_args): - """Test loading data set with query in the argument.""" + """Test loading dataset with query in the argument.""" mocked_read_gbq = mocker.patch( "kedro_datasets.pandas.gbq_dataset.pd_gbq.read_gbq" ) @@ -283,13 +283,13 @@ def test_load_query_file(self, mocker, gbq_sql_file_dataset, dummy_dataframe): assert_frame_equal(dummy_dataframe, loaded_data) def test_save_error(self, gbq_sql_dataset, dummy_dataframe): - """Check the error when trying to save to the data set""" + """Check the error when trying to save to the dataset""" pattern = r"'save' is not supported on GBQQueryDataset" with pytest.raises(DatasetError, match=pattern): gbq_sql_dataset.save(dummy_dataframe) def test_str_representation_sql(self, gbq_sql_dataset, sql_file): - """Test the data set instance string representation""" + """Test the dataset instance string representation""" str_repr = str(gbq_sql_dataset) assert ( f"GBQQueryDataset(filepath=None, load_args={{}}, sql={SQL_QUERY})" @@ -298,7 +298,7 @@ def test_str_representation_sql(self, gbq_sql_dataset, sql_file): assert sql_file not in str_repr def test_str_representation_filepath(self, gbq_sql_file_dataset, sql_file): - """Test the data set instance string representation with filepath arg.""" + """Test the dataset instance string representation with filepath arg.""" str_repr = str(gbq_sql_file_dataset) assert ( f"GBQQueryDataset(filepath={str(sql_file)}, load_args={{}}, sql=None)" diff --git a/kedro-datasets/tests/pandas/test_generic_dataset.py b/kedro-datasets/tests/pandas/test_generic_dataset.py index a6436622d..f5b30e21b 100644 --- a/kedro-datasets/tests/pandas/test_generic_dataset.py +++ b/kedro-datasets/tests/pandas/test_generic_dataset.py @@ -170,7 +170,7 @@ def test_version_str_repr(self, filepath_csv, load_version, save_version): def test_save_and_load(self, versioned_csv_dataset, dummy_dataframe): """Test that saved and reloaded data matches the original one for - the versioned data set.""" + the versioned dataset.""" versioned_csv_dataset.save(dummy_dataframe) reloaded_df = versioned_csv_dataset.load() assert_frame_equal(dummy_dataframe, reloaded_df) @@ -271,13 +271,13 @@ def test_no_versions(self, versioned_csv_dataset): versioned_csv_dataset.load() def test_exists(self, versioned_csv_dataset, dummy_dataframe): - """Test `exists` method invocation for versioned data set.""" + """Test `exists` method invocation for versioned dataset.""" assert not versioned_csv_dataset.exists() versioned_csv_dataset.save(dummy_dataframe) assert versioned_csv_dataset.exists() def test_prevent_overwrite(self, versioned_csv_dataset, dummy_dataframe): - """Check the error when attempting to override the data set if the + """Check the error when attempting to override the dataset if the corresponding Generic (csv) file for a given save version already exists.""" versioned_csv_dataset.save(dummy_dataframe) pattern = ( diff --git a/kedro-datasets/tests/pandas/test_hdf_dataset.py b/kedro-datasets/tests/pandas/test_hdf_dataset.py index 997fa6ded..cad2f81ff 100644 --- a/kedro-datasets/tests/pandas/test_hdf_dataset.py +++ b/kedro-datasets/tests/pandas/test_hdf_dataset.py @@ -45,7 +45,7 @@ def dummy_dataframe(): class TestHDFDataset: def test_save_and_load(self, hdf_dataset, dummy_dataframe): - """Test saving and reloading the data set.""" + """Test saving and reloading the dataset.""" hdf_dataset.save(dummy_dataframe) reloaded = hdf_dataset.load() assert_frame_equal(dummy_dataframe, reloaded) @@ -54,7 +54,7 @@ def test_save_and_load(self, hdf_dataset, dummy_dataframe): def test_exists(self, hdf_dataset, dummy_dataframe): """Test `exists` method invocation for both existing and - nonexistent data set.""" + nonexistent dataset.""" assert not hdf_dataset.exists() hdf_dataset.save(dummy_dataframe) assert hdf_dataset.exists() @@ -86,7 +86,7 @@ def test_open_extra_args(self, hdf_dataset, fs_args): def test_load_missing_file(self, hdf_dataset): """Check the error when trying to load missing file.""" - pattern = r"Failed while loading data from data set HDFDataset\(.*\)" + pattern = r"Failed while loading data from dataset HDFDataset\(.*\)" with pytest.raises(DatasetError, match=pattern): hdf_dataset.load() @@ -117,7 +117,7 @@ def test_catalog_release(self, mocker): fs_mock.invalidate_cache.assert_called_once_with(filepath) def test_save_and_load_df_with_categorical_variables(self, hdf_dataset): - """Test saving and reloading the data set with categorical variables.""" + """Test saving and reloading the dataset with categorical variables.""" df = pd.DataFrame( {"A": [1, 2, 3], "B": pd.Series(list("aab")).astype("category")} ) @@ -166,7 +166,7 @@ def test_version_str_repr(self, load_version, save_version): def test_save_and_load(self, versioned_hdf_dataset, dummy_dataframe): """Test that saved and reloaded data matches the original one for - the versioned data set.""" + the versioned dataset.""" versioned_hdf_dataset.save(dummy_dataframe) reloaded_df = versioned_hdf_dataset.load() assert_frame_equal(dummy_dataframe, reloaded_df) @@ -178,13 +178,13 @@ def test_no_versions(self, versioned_hdf_dataset): versioned_hdf_dataset.load() def test_exists(self, versioned_hdf_dataset, dummy_dataframe): - """Test `exists` method invocation for versioned data set.""" + """Test `exists` method invocation for versioned dataset.""" assert not versioned_hdf_dataset.exists() versioned_hdf_dataset.save(dummy_dataframe) assert versioned_hdf_dataset.exists() def test_prevent_overwrite(self, versioned_hdf_dataset, dummy_dataframe): - """Check the error when attempting to override the data set if the + """Check the error when attempting to override the dataset if the corresponding hdf file for a given save version already exists.""" versioned_hdf_dataset.save(dummy_dataframe) pattern = ( diff --git a/kedro-datasets/tests/pandas/test_json_dataset.py b/kedro-datasets/tests/pandas/test_json_dataset.py index 20f0a1e21..1f8ceb0d8 100644 --- a/kedro-datasets/tests/pandas/test_json_dataset.py +++ b/kedro-datasets/tests/pandas/test_json_dataset.py @@ -55,14 +55,14 @@ def json_lines_data(tmp_path): class TestJSONDataset: def test_save_and_load(self, json_dataset, dummy_dataframe): - """Test saving and reloading the data set.""" + """Test saving and reloading the dataset.""" json_dataset.save(dummy_dataframe) reloaded = json_dataset.load() assert_frame_equal(dummy_dataframe, reloaded) def test_exists(self, json_dataset, dummy_dataframe): """Test `exists` method invocation for both existing and - nonexistent data set.""" + nonexistent dataset.""" assert not json_dataset.exists() json_dataset.save(dummy_dataframe) assert json_dataset.exists() @@ -117,7 +117,7 @@ def test_storage_options_dropped(self, load_args, save_args, caplog, tmp_path): def test_load_missing_file(self, json_dataset): """Check the error when trying to load missing file.""" - pattern = r"Failed while loading data from data set JSONDataset\(.*\)" + pattern = r"Failed while loading data from dataset JSONDataset\(.*\)" with pytest.raises(DatasetError, match=pattern): json_dataset.load() @@ -208,7 +208,7 @@ def test_version_str_repr(self, load_version, save_version): def test_save_and_load(self, versioned_json_dataset, dummy_dataframe): """Test that saved and reloaded data matches the original one for - the versioned data set.""" + the versioned dataset.""" versioned_json_dataset.save(dummy_dataframe) reloaded_df = versioned_json_dataset.load() assert_frame_equal(dummy_dataframe, reloaded_df) @@ -220,13 +220,13 @@ def test_no_versions(self, versioned_json_dataset): versioned_json_dataset.load() def test_exists(self, versioned_json_dataset, dummy_dataframe): - """Test `exists` method invocation for versioned data set.""" + """Test `exists` method invocation for versioned dataset.""" assert not versioned_json_dataset.exists() versioned_json_dataset.save(dummy_dataframe) assert versioned_json_dataset.exists() def test_prevent_overwrite(self, versioned_json_dataset, dummy_dataframe): - """Check the error when attempting to override the data set if the + """Check the error when attempting to override the dataset if the corresponding hdf file for a given save version already exists.""" versioned_json_dataset.save(dummy_dataframe) pattern = ( diff --git a/kedro-datasets/tests/pandas/test_parquet_dataset.py b/kedro-datasets/tests/pandas/test_parquet_dataset.py index 74fb65252..7c7e98c98 100644 --- a/kedro-datasets/tests/pandas/test_parquet_dataset.py +++ b/kedro-datasets/tests/pandas/test_parquet_dataset.py @@ -65,7 +65,7 @@ def test_credentials_propagated(self, mocker): mock_fs.assert_called_once_with("file", auto_mkdir=True, **credentials) def test_save_and_load(self, tmp_path, dummy_dataframe): - """Test saving and reloading the data set.""" + """Test saving and reloading the dataset.""" filepath = (tmp_path / FILENAME).as_posix() dataset = ParquetDataset(filepath=filepath) dataset.save(dummy_dataframe) @@ -77,7 +77,7 @@ def test_save_and_load(self, tmp_path, dummy_dataframe): assert len(files) == 1 def test_save_and_load_non_existing_dir(self, tmp_path, dummy_dataframe): - """Test saving and reloading the data set to non-existing directory.""" + """Test saving and reloading the dataset to non-existing directory.""" filepath = (tmp_path / "non-existing" / FILENAME).as_posix() dataset = ParquetDataset(filepath=filepath) dataset.save(dummy_dataframe) @@ -86,7 +86,7 @@ def test_save_and_load_non_existing_dir(self, tmp_path, dummy_dataframe): def test_exists(self, parquet_dataset, dummy_dataframe): """Test `exists` method invocation for both existing and - nonexistent data set.""" + nonexistent dataset.""" assert not parquet_dataset.exists() parquet_dataset.save(dummy_dataframe) assert parquet_dataset.exists() @@ -131,7 +131,7 @@ def test_storage_options_dropped(self, load_args, save_args, caplog, tmp_path): def test_load_missing_file(self, parquet_dataset): """Check the error when trying to load missing file.""" - pattern = r"Failed while loading data from data set ParquetDataset\(.*\)" + pattern = r"Failed while loading data from dataset ParquetDataset\(.*\)" with pytest.raises(DatasetError, match=pattern): parquet_dataset.load() @@ -273,7 +273,7 @@ def test_version_str_repr(self, load_version, save_version): def test_save_and_load(self, versioned_parquet_dataset, dummy_dataframe, mocker): """Test that saved and reloaded data matches the original one for - the versioned data set.""" + the versioned dataset.""" mocker.patch( "pyarrow.fs._ensure_filesystem", return_value=PyFileSystem(FSSpecHandler(versioned_parquet_dataset._fs)), @@ -289,7 +289,7 @@ def test_no_versions(self, versioned_parquet_dataset): versioned_parquet_dataset.load() def test_exists(self, versioned_parquet_dataset, dummy_dataframe, mocker): - """Test `exists` method invocation for versioned data set.""" + """Test `exists` method invocation for versioned dataset.""" assert not versioned_parquet_dataset.exists() mocker.patch( "pyarrow.fs._ensure_filesystem", @@ -301,7 +301,7 @@ def test_exists(self, versioned_parquet_dataset, dummy_dataframe, mocker): def test_prevent_overwrite( self, versioned_parquet_dataset, dummy_dataframe, mocker ): - """Check the error when attempting to override the data set if the + """Check the error when attempting to override the dataset if the corresponding parquet file for a given save version already exists.""" mocker.patch( "pyarrow.fs._ensure_filesystem", diff --git a/kedro-datasets/tests/pandas/test_sql_dataset.py b/kedro-datasets/tests/pandas/test_sql_dataset.py index 0828f3f18..54698a409 100644 --- a/kedro-datasets/tests/pandas/test_sql_dataset.py +++ b/kedro-datasets/tests/pandas/test_sql_dataset.py @@ -138,7 +138,7 @@ def test_unknown_module(self, mocker): ).exists() def test_str_representation_table(self, table_dataset): - """Test the data set instance string representation""" + """Test the dataset instance string representation""" str_repr = str(table_dataset) assert ( "SQLTableDataset(load_args={}, save_args={'index': False}, " @@ -424,13 +424,13 @@ def test_load_unknown_sql(self): SQLQueryDataset(sql=SQL_QUERY, credentials={"con": FAKE_CONN_STR}).load() def test_save_error(self, query_dataset, dummy_dataframe): - """Check the error when trying to save to the data set""" + """Check the error when trying to save to the dataset""" pattern = r"'save' is not supported on SQLQueryDataset" with pytest.raises(DatasetError, match=pattern): query_dataset.save(dummy_dataframe) def test_str_representation_sql(self, query_dataset, sql_file): - """Test the data set instance string representation""" + """Test the dataset instance string representation""" str_repr = str(query_dataset) assert ( "SQLQueryDataset(execution_options={}, filepath=None, " @@ -440,7 +440,7 @@ def test_str_representation_sql(self, query_dataset, sql_file): assert sql_file not in str_repr def test_str_representation_filepath(self, query_file_dataset, sql_file): - """Test the data set instance string representation with filepath arg.""" + """Test the dataset instance string representation with filepath arg.""" str_repr = str(query_file_dataset) assert ( f"SQLQueryDataset(execution_options={{}}, filepath={str(sql_file)}, " diff --git a/kedro-datasets/tests/pandas/test_xml_dataset.py b/kedro-datasets/tests/pandas/test_xml_dataset.py index be57351ca..0ba840da2 100644 --- a/kedro-datasets/tests/pandas/test_xml_dataset.py +++ b/kedro-datasets/tests/pandas/test_xml_dataset.py @@ -42,14 +42,14 @@ def dummy_dataframe(): class TestXMLDataset: def test_save_and_load(self, xml_dataset, dummy_dataframe): - """Test saving and reloading the data set.""" + """Test saving and reloading the dataset.""" xml_dataset.save(dummy_dataframe) reloaded = xml_dataset.load() assert_frame_equal(dummy_dataframe, reloaded) def test_exists(self, xml_dataset, dummy_dataframe): """Test `exists` method invocation for both existing and - nonexistent data set.""" + nonexistent dataset.""" assert not xml_dataset.exists() xml_dataset.save(dummy_dataframe) assert xml_dataset.exists() @@ -94,7 +94,7 @@ def test_storage_options_dropped(self, load_args, save_args, caplog, tmp_path): def test_load_missing_file(self, xml_dataset): """Check the error when trying to load missing file.""" - pattern = r"Failed while loading data from data set XMLDataset\(.*\)" + pattern = r"Failed while loading data from dataset XMLDataset\(.*\)" with pytest.raises(DatasetError, match=pattern): xml_dataset.load() @@ -165,7 +165,7 @@ def test_version_str_repr(self, load_version, save_version): def test_save_and_load(self, versioned_xml_dataset, dummy_dataframe): """Test that saved and reloaded data matches the original one for - the versioned data set.""" + the versioned dataset.""" versioned_xml_dataset.save(dummy_dataframe) reloaded_df = versioned_xml_dataset.load() assert_frame_equal(dummy_dataframe, reloaded_df) @@ -177,13 +177,13 @@ def test_no_versions(self, versioned_xml_dataset): versioned_xml_dataset.load() def test_exists(self, versioned_xml_dataset, dummy_dataframe): - """Test `exists` method invocation for versioned data set.""" + """Test `exists` method invocation for versioned dataset.""" assert not versioned_xml_dataset.exists() versioned_xml_dataset.save(dummy_dataframe) assert versioned_xml_dataset.exists() def test_prevent_overwrite(self, versioned_xml_dataset, dummy_dataframe): - """Check the error when attempting to override the data set if the + """Check the error when attempting to override the dataset if the corresponding hdf file for a given save version already exists.""" versioned_xml_dataset.save(dummy_dataframe) pattern = ( diff --git a/kedro-datasets/tests/partitions/test_partitioned_dataset.py b/kedro-datasets/tests/partitions/test_partitioned_dataset.py index 2d16665cb..f0126887d 100644 --- a/kedro-datasets/tests/partitions/test_partitioned_dataset.py +++ b/kedro-datasets/tests/partitions/test_partitioned_dataset.py @@ -272,7 +272,7 @@ def test_invalid_dataset(self, dataset, local_csvs): loaded_partitions = pds.load() for partition, df_loader in loaded_partitions.items(): - pattern = r"Failed while loading data from data set ParquetDataset(.*)" + pattern = r"Failed while loading data from dataset ParquetDataset(.*)" with pytest.raises(DatasetError, match=pattern) as exc_info: df_loader() error_message = str(exc_info.value) @@ -293,7 +293,7 @@ def test_invalid_dataset(self, dataset, local_csvs): ( FakeDataset, r"Dataset type 'tests\.partitions\.test_partitioned_dataset\.FakeDataset' " - r"is invalid\: all data set types must extend 'AbstractDataset'", + r"is invalid\: all dataset types must extend 'AbstractDataset'", ), ({}, "'type' is missing from dataset catalog configuration"), ], diff --git a/kedro-datasets/tests/pickle/test_pickle_dataset.py b/kedro-datasets/tests/pickle/test_pickle_dataset.py index 5e03f91cd..5d5b25bcb 100644 --- a/kedro-datasets/tests/pickle/test_pickle_dataset.py +++ b/kedro-datasets/tests/pickle/test_pickle_dataset.py @@ -59,7 +59,7 @@ class TestPickleDataset: indirect=True, ) def test_save_and_load(self, pickle_dataset, dummy_dataframe): - """Test saving and reloading the data set.""" + """Test saving and reloading the dataset.""" pickle_dataset.save(dummy_dataframe) reloaded = pickle_dataset.load() assert_frame_equal(dummy_dataframe, reloaded) @@ -68,7 +68,7 @@ def test_save_and_load(self, pickle_dataset, dummy_dataframe): def test_exists(self, pickle_dataset, dummy_dataframe): """Test `exists` method invocation for both existing and - nonexistent data set.""" + nonexistent dataset.""" assert not pickle_dataset.exists() pickle_dataset.save(dummy_dataframe) assert pickle_dataset.exists() @@ -98,7 +98,7 @@ def test_open_extra_args(self, pickle_dataset, fs_args): def test_load_missing_file(self, pickle_dataset): """Check the error when trying to load missing file.""" - pattern = r"Failed while loading data from data set PickleDataset\(.*\)" + pattern = r"Failed while loading data from dataset PickleDataset\(.*\)" with pytest.raises(DatasetError, match=pattern): pickle_dataset.load() @@ -189,7 +189,7 @@ def test_version_str_repr(self, load_version, save_version): def test_save_and_load(self, versioned_pickle_dataset, dummy_dataframe): """Test that saved and reloaded data matches the original one for - the versioned data set.""" + the versioned dataset.""" versioned_pickle_dataset.save(dummy_dataframe) reloaded_df = versioned_pickle_dataset.load() assert_frame_equal(dummy_dataframe, reloaded_df) @@ -201,13 +201,13 @@ def test_no_versions(self, versioned_pickle_dataset): versioned_pickle_dataset.load() def test_exists(self, versioned_pickle_dataset, dummy_dataframe): - """Test `exists` method invocation for versioned data set.""" + """Test `exists` method invocation for versioned dataset.""" assert not versioned_pickle_dataset.exists() versioned_pickle_dataset.save(dummy_dataframe) assert versioned_pickle_dataset.exists() def test_prevent_overwrite(self, versioned_pickle_dataset, dummy_dataframe): - """Check the error when attempting to override the data set if the + """Check the error when attempting to override the dataset if the corresponding Pickle file for a given save version already exists.""" versioned_pickle_dataset.save(dummy_dataframe) pattern = ( diff --git a/kedro-datasets/tests/pillow/test_image_dataset.py b/kedro-datasets/tests/pillow/test_image_dataset.py index 325d08aeb..2a3ef296a 100644 --- a/kedro-datasets/tests/pillow/test_image_dataset.py +++ b/kedro-datasets/tests/pillow/test_image_dataset.py @@ -41,7 +41,7 @@ def images_equal(image_1, image_2): class TestImageDataset: def test_save_and_load(self, image_dataset, image_object): - """Test saving and reloading the data set.""" + """Test saving and reloading the dataset.""" image_dataset.save(image_object) reloaded_image = image_dataset.load() assert images_equal(image_object, reloaded_image) @@ -49,7 +49,7 @@ def test_save_and_load(self, image_dataset, image_object): def test_exists(self, image_dataset, image_object): """Test `exists` method invocation for both existing and - nonexistent data set.""" + nonexistent dataset.""" assert not image_dataset.exists() image_dataset.save(image_object) assert image_dataset.exists() @@ -80,7 +80,7 @@ def test_open_extra_args(self, image_dataset, fs_args): def test_load_missing_file(self, image_dataset): """Check the error when trying to load missing file.""" - pattern = r"Failed while loading data from data set ImageDataset\(.*\)" + pattern = r"Failed while loading data from dataset ImageDataset\(.*\)" with pytest.raises(DatasetError, match=pattern): image_dataset.load() @@ -147,7 +147,7 @@ def test_version_str_repr(self, load_version, save_version): def test_save_and_load(self, versioned_image_dataset, image_object): """Test that saved and reloaded data matches the original one for - the versioned data set.""" + the versioned dataset.""" versioned_image_dataset.save(image_object) reloaded_image = versioned_image_dataset.load() assert images_equal(image_object, reloaded_image) @@ -182,13 +182,13 @@ def test_no_versions(self, versioned_image_dataset): versioned_image_dataset.load() def test_exists(self, versioned_image_dataset, image_object): - """Test `exists` method invocation for versioned data set.""" + """Test `exists` method invocation for versioned dataset.""" assert not versioned_image_dataset.exists() versioned_image_dataset.save(image_object) assert versioned_image_dataset.exists() def test_prevent_overwrite(self, versioned_image_dataset, image_object): - """Check the error when attempting to override the data set if the + """Check the error when attempting to override the dataset if the corresponding image file for a given save version already exists.""" versioned_image_dataset.save(image_object) pattern = ( diff --git a/kedro-datasets/tests/plotly/test_html_dataset.py b/kedro-datasets/tests/plotly/test_html_dataset.py index 06ed7291b..a37bffcc9 100644 --- a/kedro-datasets/tests/plotly/test_html_dataset.py +++ b/kedro-datasets/tests/plotly/test_html_dataset.py @@ -33,13 +33,13 @@ def dummy_plot(): class TestHTMLDataset: def test_save(self, html_dataset, dummy_plot): - """Test saving and reloading the data set.""" + """Test saving and reloading the dataset.""" html_dataset.save(dummy_plot) assert html_dataset._fs_open_args_save == {"mode": "w", "encoding": "utf-8"} def test_exists(self, html_dataset, dummy_plot): """Test `exists` method invocation for both existing and - nonexistent data set.""" + nonexistent dataset.""" assert not html_dataset.exists() html_dataset.save(dummy_plot) assert html_dataset.exists() diff --git a/kedro-datasets/tests/plotly/test_json_dataset.py b/kedro-datasets/tests/plotly/test_json_dataset.py index 571b0d048..0f1835e7a 100644 --- a/kedro-datasets/tests/plotly/test_json_dataset.py +++ b/kedro-datasets/tests/plotly/test_json_dataset.py @@ -35,7 +35,7 @@ def dummy_plot(): class TestJSONDataset: def test_save_and_load(self, json_dataset, dummy_plot): - """Test saving and reloading the data set.""" + """Test saving and reloading the dataset.""" json_dataset.save(dummy_plot) reloaded = json_dataset.load() assert dummy_plot == reloaded @@ -44,14 +44,14 @@ def test_save_and_load(self, json_dataset, dummy_plot): def test_exists(self, json_dataset, dummy_plot): """Test `exists` method invocation for both existing and - nonexistent data set.""" + nonexistent dataset.""" assert not json_dataset.exists() json_dataset.save(dummy_plot) assert json_dataset.exists() def test_load_missing_file(self, json_dataset): """Check the error when trying to load missing file.""" - pattern = r"Failed while loading data from data set JSONDataset\(.*\)" + pattern = r"Failed while loading data from dataset JSONDataset\(.*\)" with pytest.raises(DatasetError, match=pattern): json_dataset.load() diff --git a/kedro-datasets/tests/plotly/test_plotly_dataset.py b/kedro-datasets/tests/plotly/test_plotly_dataset.py index 7f1b0eae9..37ee92a40 100644 --- a/kedro-datasets/tests/plotly/test_plotly_dataset.py +++ b/kedro-datasets/tests/plotly/test_plotly_dataset.py @@ -47,7 +47,7 @@ def dummy_dataframe(): class TestPlotlyDataset: def test_save_and_load(self, plotly_dataset, dummy_dataframe): - """Test saving and reloading the data set.""" + """Test saving and reloading the dataset.""" plotly_dataset.save(dummy_dataframe) reloaded = plotly_dataset.load() assert isinstance(reloaded, graph_objects.Figure) @@ -56,14 +56,14 @@ def test_save_and_load(self, plotly_dataset, dummy_dataframe): def test_exists(self, plotly_dataset, dummy_dataframe): """Test `exists` method invocation for both existing and - nonexistent data set.""" + nonexistent dataset.""" assert not plotly_dataset.exists() plotly_dataset.save(dummy_dataframe) assert plotly_dataset.exists() def test_load_missing_file(self, plotly_dataset): """Check the error when trying to load missing file.""" - pattern = r"Failed while loading data from data set PlotlyDataset\(.*\)" + pattern = r"Failed while loading data from dataset PlotlyDataset\(.*\)" with pytest.raises(DatasetError, match=pattern): plotly_dataset.load() diff --git a/kedro-datasets/tests/polars/test_csv_dataset.py b/kedro-datasets/tests/polars/test_csv_dataset.py index 15abf3392..5312e9b48 100644 --- a/kedro-datasets/tests/polars/test_csv_dataset.py +++ b/kedro-datasets/tests/polars/test_csv_dataset.py @@ -89,14 +89,14 @@ def mocked_csv_in_s3(mocked_s3_bucket, mocked_dataframe: pl.DataFrame): class TestCSVDataset: def test_save_and_load(self, csv_dataset, dummy_dataframe): - """Test saving and reloading the data set.""" + """Test saving and reloading the dataset.""" csv_dataset.save(dummy_dataframe) reloaded = csv_dataset.load() assert_frame_equal(dummy_dataframe, reloaded) def test_exists(self, csv_dataset, dummy_dataframe): """Test `exists` method invocation for both existing and - nonexistent data set.""" + nonexistent dataset.""" assert not csv_dataset.exists() csv_dataset.save(dummy_dataframe) assert csv_dataset.exists() @@ -141,7 +141,7 @@ def test_storage_options_dropped(self, load_args, save_args, caplog, tmp_path): def test_load_missing_file(self, csv_dataset): """Check the error when trying to load missing file.""" - pattern = r"Failed while loading data from data set CSVDataset\(.*\)" + pattern = r"Failed while loading data from dataset CSVDataset\(.*\)" with pytest.raises(DatasetError, match=pattern): csv_dataset.load() @@ -204,7 +204,7 @@ def test_version_str_repr(self, load_version, save_version): def test_save_and_load(self, versioned_csv_dataset, dummy_dataframe): """Test that saved and reloaded data matches the original one for - the versioned data set.""" + the versioned dataset.""" versioned_csv_dataset.save(dummy_dataframe) reloaded_df = versioned_csv_dataset.load() assert_frame_equal(dummy_dataframe, reloaded_df) @@ -283,13 +283,13 @@ def test_no_versions(self, versioned_csv_dataset): versioned_csv_dataset.load() def test_exists(self, versioned_csv_dataset, dummy_dataframe): - """Test `exists` method invocation for versioned data set.""" + """Test `exists` method invocation for versioned dataset.""" assert not versioned_csv_dataset.exists() versioned_csv_dataset.save(dummy_dataframe) assert versioned_csv_dataset.exists() def test_prevent_overwrite(self, versioned_csv_dataset, dummy_dataframe): - """Check the error when attempting to override the data set if the + """Check the error when attempting to override the dataset if the corresponding CSV file for a given save version already exists.""" versioned_csv_dataset.save(dummy_dataframe) pattern = ( diff --git a/kedro-datasets/tests/polars/test_eager_polars_dataset.py b/kedro-datasets/tests/polars/test_eager_polars_dataset.py index 615f3a26e..6da005fb2 100644 --- a/kedro-datasets/tests/polars/test_eager_polars_dataset.py +++ b/kedro-datasets/tests/polars/test_eager_polars_dataset.py @@ -155,7 +155,7 @@ def test_load_args(self, parquet_dataset_ignore): assert df.shape == (2, 3) def test_save_and_load(self, versioned_parquet_dataset, dummy_dataframe): - """Test saving and reloading the data set.""" + """Test saving and reloading the dataset.""" versioned_parquet_dataset.save(dummy_dataframe) reloaded_df = versioned_parquet_dataset.load() assert_frame_equal(dummy_dataframe, reloaded_df) @@ -241,7 +241,7 @@ def test_multiple_saves(self, dummy_dataframe, filepath_parquet): class TestEagerIPCDatasetVersioned: def test_save_and_load(self, versioned_ipc_dataset, dummy_dataframe): - """Test saving and reloading the data set.""" + """Test saving and reloading the dataset.""" versioned_ipc_dataset.save(dummy_dataframe) reloaded_df = versioned_ipc_dataset.load() assert_frame_equal(dummy_dataframe, reloaded_df) @@ -345,7 +345,7 @@ def test_version_str_repr(self, filepath_csv, load_version, save_version): def test_save_and_load(self, versioned_csv_dataset, dummy_dataframe): """Test that saved and reloaded data matches the original one for - the versioned data set.""" + the versioned dataset.""" versioned_csv_dataset.save(dummy_dataframe) reloaded_df = versioned_csv_dataset.load() assert_frame_equal(dummy_dataframe, reloaded_df) @@ -446,13 +446,13 @@ def test_no_versions(self, versioned_csv_dataset): versioned_csv_dataset.load() def test_exists(self, versioned_csv_dataset, dummy_dataframe): - """Test `exists` method invocation for versioned data set.""" + """Test `exists` method invocation for versioned dataset.""" assert not versioned_csv_dataset.exists() versioned_csv_dataset.save(dummy_dataframe) assert versioned_csv_dataset.exists() def test_prevent_overwrite(self, versioned_csv_dataset, dummy_dataframe): - """Check the error when attempting to override the data set if the + """Check the error when attempting to override the dataset if the corresponding Generic (csv) file for a given save version already exists.""" versioned_csv_dataset.save(dummy_dataframe) pattern = ( diff --git a/kedro-datasets/tests/polars/test_lazy_polars_dataset.py b/kedro-datasets/tests/polars/test_lazy_polars_dataset.py index e92dbfc50..50528c581 100644 --- a/kedro-datasets/tests/polars/test_lazy_polars_dataset.py +++ b/kedro-datasets/tests/polars/test_lazy_polars_dataset.py @@ -111,7 +111,7 @@ class TestLazyCSVDataset: def test_exists(self, csv_dataset, dummy_dataframe): """Test `exists` method invocation for both existing and - nonexistent data set. + nonexistent dataset. """ assert not csv_dataset.exists() csv_dataset.save(dummy_dataframe) @@ -137,7 +137,7 @@ def test_save_and_load(self, csv_dataset, dummy_dataframe): def test_load_missing_file(self, csv_dataset): """Check the error when trying to load missing file.""" - pattern = r"Failed while loading data from data set LazyPolarsDataset\(.*\)" + pattern = r"Failed while loading data from dataset LazyPolarsDataset\(.*\)" with pytest.raises(DatasetError, match=pattern): csv_dataset.load() @@ -229,7 +229,7 @@ def test_load_args(self, parquet_dataset_ignore, dummy_dataframe, filepath_pq): assert df.shape == (2, 3) def test_save_and_load(self, versioned_parquet_dataset, dummy_dataframe): - """Test saving and reloading the data set.""" + """Test saving and reloading the dataset.""" versioned_parquet_dataset.save(dummy_dataframe.lazy()) reloaded_df = versioned_parquet_dataset.load().collect() assert_frame_equal(dummy_dataframe, reloaded_df) @@ -350,7 +350,7 @@ def test_no_versions(self, versioned_parquet_dataset): versioned_parquet_dataset.load() def test_prevent_overwrite(self, versioned_parquet_dataset, dummy_dataframe): - """Check the error when attempting to override the data set if the + """Check the error when attempting to override the dataset if the corresponding Generic (parquet) file for a given save version already exists.""" versioned_parquet_dataset.save(dummy_dataframe) pattern = ( diff --git a/kedro-datasets/tests/redis/test_redis_dataset.py b/kedro-datasets/tests/redis/test_redis_dataset.py index 322c35480..7b9dfeb65 100644 --- a/kedro-datasets/tests/redis/test_redis_dataset.py +++ b/kedro-datasets/tests/redis/test_redis_dataset.py @@ -77,7 +77,7 @@ def test_save_and_load( serialised_dummy_object, key, ): - """Test saving and reloading the data set.""" + """Test saving and reloading the dataset.""" set_mocker = mocker.patch("redis.StrictRedis.set") get_mocker = mocker.patch( "redis.StrictRedis.get", return_value=serialised_dummy_object @@ -94,7 +94,7 @@ def test_save_and_load( def test_exists(self, mocker, pickle_dataset, dummy_object, key): """Test `exists` method invocation for both existing and - nonexistent data set.""" + nonexistent dataset.""" mocker.patch("redis.StrictRedis.exists", return_value=False) assert not pickle_dataset.exists() mocker.patch("redis.StrictRedis.set") diff --git a/kedro-datasets/tests/spark/test_deltatable_dataset.py b/kedro-datasets/tests/spark/test_deltatable_dataset.py index 24ad7a3c6..938e90a31 100644 --- a/kedro-datasets/tests/spark/test_deltatable_dataset.py +++ b/kedro-datasets/tests/spark/test_deltatable_dataset.py @@ -94,7 +94,7 @@ def no_output(x): catalog = DataCatalog({"delta_in": delta_ds}) pipeline = modular_pipeline([node(no_output, "delta_in", None)]) pattern = ( - r"The following data sets cannot be used with " + r"The following datasets cannot be used with " r"multiprocessing: \['delta_in'\]" ) with pytest.raises(AttributeError, match=pattern): diff --git a/kedro-datasets/tests/spark/test_memory_dataset.py b/kedro-datasets/tests/spark/test_memory_dataset.py index 8dd469217..7f4f2c43b 100644 --- a/kedro-datasets/tests/spark/test_memory_dataset.py +++ b/kedro-datasets/tests/spark/test_memory_dataset.py @@ -35,14 +35,14 @@ def memory_dataset(spark_data_frame): def test_load_modify_original_data(memory_dataset, spark_data_frame): - """Check that the data set object is not updated when the original + """Check that the dataset object is not updated when the original SparkDataFrame is changed.""" spark_data_frame = _update_spark_df(spark_data_frame, 1, 1, -5) assert not _check_equals(memory_dataset.load(), spark_data_frame) def test_save_modify_original_data(spark_data_frame): - """Check that the data set object is not updated when the original + """Check that the dataset object is not updated when the original SparkDataFrame is changed.""" memory_dataset = MemoryDataset() memory_dataset.save(spark_data_frame) @@ -62,5 +62,5 @@ def test_load_returns_same_spark_object(memory_dataset, spark_data_frame): def test_str_representation(memory_dataset): - """Test string representation of the data set""" + """Test string representation of the dataset""" assert "MemoryDataset(data=)" in str(memory_dataset) diff --git a/kedro-datasets/tests/spark/test_spark_dataset.py b/kedro-datasets/tests/spark/test_spark_dataset.py index e4eed4481..bc40f9512 100644 --- a/kedro-datasets/tests/spark/test_spark_dataset.py +++ b/kedro-datasets/tests/spark/test_spark_dataset.py @@ -427,7 +427,7 @@ def test_parallel_runner(self, is_async, spark_in): catalog = DataCatalog({"spark_in": spark_in}) pipeline = modular_pipeline([node(identity, "spark_in", "spark_out")]) pattern = ( - r"The following data sets cannot be used with " + r"The following datasets cannot be used with " r"multiprocessing: \['spark_in'\]" ) with pytest.raises(AttributeError, match=pattern): diff --git a/kedro-datasets/tests/spark/test_spark_hive_dataset.py b/kedro-datasets/tests/spark/test_spark_hive_dataset.py index 5f11674dd..6ea4be9e7 100644 --- a/kedro-datasets/tests/spark/test_spark_hive_dataset.py +++ b/kedro-datasets/tests/spark/test_spark_hive_dataset.py @@ -294,7 +294,7 @@ def test_read_from_non_existent_table(self): ) with pytest.raises( DatasetError, - match=r"Failed while loading data from data set SparkHiveDataset" + match=r"Failed while loading data from dataset SparkHiveDataset" r"|table_doesnt_exist" r"|UnresolvedRelation", ): diff --git a/kedro-datasets/tests/svmlight/test_svmlight_dataset.py b/kedro-datasets/tests/svmlight/test_svmlight_dataset.py index ec504deda..7d9176a62 100644 --- a/kedro-datasets/tests/svmlight/test_svmlight_dataset.py +++ b/kedro-datasets/tests/svmlight/test_svmlight_dataset.py @@ -39,7 +39,7 @@ def dummy_data(): class TestSVMLightDataset: def test_save_and_load(self, svm_dataset, dummy_data): - """Test saving and reloading the data set.""" + """Test saving and reloading the dataset.""" svm_dataset.save(dummy_data) reloaded_features, reloaded_label = svm_dataset.load() original_features, original_label = dummy_data @@ -50,7 +50,7 @@ def test_save_and_load(self, svm_dataset, dummy_data): def test_exists(self, svm_dataset, dummy_data): """Test `exists` method invocation for both existing and - nonexistent data set.""" + nonexistent dataset.""" assert not svm_dataset.exists() svm_dataset.save(dummy_data) assert svm_dataset.exists() @@ -82,7 +82,7 @@ def test_open_extra_args(self, svm_dataset, fs_args): def test_load_missing_file(self, svm_dataset): """Check the error when trying to load missing file.""" - pattern = r"Failed while loading data from data set SVMLightDataset\(.*\)" + pattern = r"Failed while loading data from dataset SVMLightDataset\(.*\)" with pytest.raises(DatasetError, match=pattern): svm_dataset.load() @@ -135,7 +135,7 @@ def test_version_str_repr(self, load_version, save_version): def test_save_and_load(self, versioned_svm_dataset, dummy_data): """Test that saved and reloaded data matches the original one for - the versioned data set.""" + the versioned dataset.""" versioned_svm_dataset.save(dummy_data) reloaded_features, reloaded_label = versioned_svm_dataset.load() original_features, original_label = dummy_data @@ -149,13 +149,13 @@ def test_no_versions(self, versioned_svm_dataset): versioned_svm_dataset.load() def test_exists(self, versioned_svm_dataset, dummy_data): - """Test `exists` method invocation for versioned data set.""" + """Test `exists` method invocation for versioned dataset.""" assert not versioned_svm_dataset.exists() versioned_svm_dataset.save(dummy_data) assert versioned_svm_dataset.exists() def test_prevent_overwrite(self, versioned_svm_dataset, dummy_data): - """Check the error when attempting to override the data set if the + """Check the error when attempting to override the dataset if the corresponding json file for a given save version already exists.""" versioned_svm_dataset.save(dummy_data) pattern = ( diff --git a/kedro-datasets/tests/tensorflow/test_tensorflow_model_dataset.py b/kedro-datasets/tests/tensorflow/test_tensorflow_model_dataset.py index ea22a3188..8855dc4f3 100644 --- a/kedro-datasets/tests/tensorflow/test_tensorflow_model_dataset.py +++ b/kedro-datasets/tests/tensorflow/test_tensorflow_model_dataset.py @@ -142,7 +142,7 @@ class TestTensorFlowModelDataset: """No versioning passed to creator""" def test_save_and_load(self, tf_model_dataset, dummy_tf_base_model, dummy_x_test): - """Test saving and reloading the data set.""" + """Test saving and reloading the dataset.""" predictions = dummy_tf_base_model.predict(dummy_x_test) tf_model_dataset.save(dummy_tf_base_model) @@ -155,14 +155,12 @@ def test_save_and_load(self, tf_model_dataset, dummy_tf_base_model, dummy_x_test def test_load_missing_model(self, tf_model_dataset): """Test error message when trying to load missing model.""" - pattern = ( - r"Failed while loading data from data set TensorFlowModelDataset\(.*\)" - ) + pattern = r"Failed while loading data from dataset TensorFlowModelDataset\(.*\)" with pytest.raises(DatasetError, match=pattern): tf_model_dataset.load() def test_exists(self, tf_model_dataset, dummy_tf_base_model): - """Test `exists` method invocation for both existing and nonexistent data set.""" + """Test `exists` method invocation for both existing and nonexistent dataset.""" assert not tf_model_dataset.exists() tf_model_dataset.save(dummy_tf_base_model) assert tf_model_dataset.exists() @@ -301,7 +299,7 @@ def test_save_and_load( load_version, save_version, ): - """Test saving and reloading the versioned data set.""" + """Test saving and reloading the versioned dataset.""" predictions = dummy_tf_base_model.predict(dummy_x_test) versioned_tf_model_dataset.save(dummy_tf_base_model) @@ -334,7 +332,7 @@ def test_hdf5_save_format( np.testing.assert_allclose(predictions, new_predictions, rtol=1e-6, atol=1e-6) def test_prevent_overwrite(self, dummy_tf_base_model, versioned_tf_model_dataset): - """Check the error when attempting to override the data set if the + """Check the error when attempting to override the dataset if the corresponding file for a given save version already exists.""" versioned_tf_model_dataset.save(dummy_tf_base_model) pattern = ( @@ -374,7 +372,7 @@ def test_http_filesystem_no_versioning(self, tensorflow_model_dataset): ) def test_exists(self, versioned_tf_model_dataset, dummy_tf_base_model): - """Test `exists` method invocation for versioned data set.""" + """Test `exists` method invocation for versioned dataset.""" assert not versioned_tf_model_dataset.exists() versioned_tf_model_dataset.save(dummy_tf_base_model) assert versioned_tf_model_dataset.exists() diff --git a/kedro-datasets/tests/text/test_text_dataset.py b/kedro-datasets/tests/text/test_text_dataset.py index 4109c163e..a883fa552 100644 --- a/kedro-datasets/tests/text/test_text_dataset.py +++ b/kedro-datasets/tests/text/test_text_dataset.py @@ -31,7 +31,7 @@ def versioned_txt_dataset(filepath_txt, load_version, save_version): class TestTextDataset: def test_save_and_load(self, txt_dataset): - """Test saving and reloading the data set.""" + """Test saving and reloading the dataset.""" txt_dataset.save(STRING) reloaded = txt_dataset.load() assert STRING == reloaded @@ -40,7 +40,7 @@ def test_save_and_load(self, txt_dataset): def test_exists(self, txt_dataset): """Test `exists` method invocation for both existing and - nonexistent data set.""" + nonexistent dataset.""" assert not txt_dataset.exists() txt_dataset.save(STRING) assert txt_dataset.exists() @@ -56,7 +56,7 @@ def test_open_extra_args(self, txt_dataset, fs_args): def test_load_missing_file(self, txt_dataset): """Check the error when trying to load missing file.""" - pattern = r"Failed while loading data from data set TextDataset\(.*\)" + pattern = r"Failed while loading data from dataset TextDataset\(.*\)" with pytest.raises(DatasetError, match=pattern): txt_dataset.load() @@ -109,7 +109,7 @@ def test_version_str_repr(self, load_version, save_version): def test_save_and_load(self, versioned_txt_dataset): """Test that saved and reloaded data matches the original one for - the versioned data set.""" + the versioned dataset.""" versioned_txt_dataset.save(STRING) reloaded_df = versioned_txt_dataset.load() assert STRING == reloaded_df @@ -121,13 +121,13 @@ def test_no_versions(self, versioned_txt_dataset): versioned_txt_dataset.load() def test_exists(self, versioned_txt_dataset): - """Test `exists` method invocation for versioned data set.""" + """Test `exists` method invocation for versioned dataset.""" assert not versioned_txt_dataset.exists() versioned_txt_dataset.save(STRING) assert versioned_txt_dataset.exists() def test_prevent_overwrite(self, versioned_txt_dataset): - """Check the error when attempting to override the data set if the + """Check the error when attempting to override the dataset if the corresponding text file for a given save version already exists.""" versioned_txt_dataset.save(STRING) pattern = ( diff --git a/kedro-datasets/tests/tracking/test_json_dataset.py b/kedro-datasets/tests/tracking/test_json_dataset.py index 0353d4669..de24ba9b9 100644 --- a/kedro-datasets/tests/tracking/test_json_dataset.py +++ b/kedro-datasets/tests/tracking/test_json_dataset.py @@ -35,7 +35,7 @@ def dummy_data(): class TestJSONDataset: def test_save(self, filepath_json, dummy_data, tmp_path, save_version): - """Test saving and reloading the data set.""" + """Test saving and reloading the dataset.""" json_dataset = JSONDataset( filepath=filepath_json, version=Version(None, save_version) ) @@ -68,7 +68,7 @@ def test_load_fail(self, json_dataset, dummy_data): def test_exists(self, json_dataset, dummy_data): """Test `exists` method invocation for both existing and - nonexistent data set.""" + nonexistent dataset.""" assert not json_dataset.exists() json_dataset.save(dummy_data) assert json_dataset.exists() @@ -143,7 +143,7 @@ def test_version_str_repr(self, load_version, save_version): assert "save_args={'indent': 2}" in str(ds_versioned) def test_prevent_overwrite(self, explicit_versioned_json_dataset, dummy_data): - """Check the error when attempting to override the data set if the + """Check the error when attempting to override the dataset if the corresponding json file for a given save version already exists.""" explicit_versioned_json_dataset.save(dummy_data) pattern = ( diff --git a/kedro-datasets/tests/tracking/test_metrics_dataset.py b/kedro-datasets/tests/tracking/test_metrics_dataset.py index 0f6e07c37..b638fcdfd 100644 --- a/kedro-datasets/tests/tracking/test_metrics_dataset.py +++ b/kedro-datasets/tests/tracking/test_metrics_dataset.py @@ -41,7 +41,7 @@ def test_save_data( filepath_json, save_version, ): - """Test saving and reloading the data set.""" + """Test saving and reloading the dataset.""" metrics_dataset = MetricsDataset( filepath=filepath_json, version=Version(None, save_version) ) @@ -74,7 +74,7 @@ def test_load_fail(self, metrics_dataset, dummy_data): def test_exists(self, metrics_dataset, dummy_data): """Test `exists` method invocation for both existing and - nonexistent data set.""" + nonexistent dataset.""" assert not metrics_dataset.exists() metrics_dataset.save(dummy_data) assert metrics_dataset.exists() @@ -156,7 +156,7 @@ def test_version_str_repr(self, load_version, save_version): assert "save_args={'indent': 2}" in str(ds_versioned) def test_prevent_overwrite(self, explicit_versioned_metrics_dataset, dummy_data): - """Check the error when attempting to override the data set if the + """Check the error when attempting to override the dataset if the corresponding json file for a given save version already exists.""" explicit_versioned_metrics_dataset.save(dummy_data) pattern = ( diff --git a/kedro-datasets/tests/video/test_video_dataset.py b/kedro-datasets/tests/video/test_video_dataset.py index 357cfd001..d37fae3ca 100644 --- a/kedro-datasets/tests/video/test_video_dataset.py +++ b/kedro-datasets/tests/video/test_video_dataset.py @@ -58,7 +58,7 @@ def test_load_mp4(self, filepath_mp4, mp4_object): assert_videos_equal(loaded_video, mp4_object) def test_save_and_load_mp4(self, empty_dataset_mp4, mp4_object): - """Test saving and reloading the data set.""" + """Test saving and reloading the dataset.""" empty_dataset_mp4.save(mp4_object) reloaded_video = empty_dataset_mp4.load() assert_videos_equal(mp4_object, reloaded_video) @@ -109,7 +109,7 @@ def test_save_generator_video( def test_exists(self, empty_dataset_mp4, mp4_object): """Test `exists` method invocation for both existing and - nonexistent data set.""" + nonexistent dataset.""" assert not empty_dataset_mp4.exists() empty_dataset_mp4.save(mp4_object) assert empty_dataset_mp4.exists() @@ -123,7 +123,7 @@ def test_convert_video(self, empty_dataset_mp4, mjpeg_object): def test_load_missing_file(self, empty_dataset_mp4): """Check the error when trying to load missing file.""" - pattern = r"Failed while loading data from data set VideoDataset\(.*\)" + pattern = r"Failed while loading data from dataset VideoDataset\(.*\)" with pytest.raises(DatasetError, match=pattern): empty_dataset_mp4.load() diff --git a/kedro-datasets/tests/yaml/test_yaml_dataset.py b/kedro-datasets/tests/yaml/test_yaml_dataset.py index 243f0e0d5..611baee0c 100644 --- a/kedro-datasets/tests/yaml/test_yaml_dataset.py +++ b/kedro-datasets/tests/yaml/test_yaml_dataset.py @@ -39,7 +39,7 @@ def dummy_data(): class TestYAMLDataset: def test_save_and_load(self, yaml_dataset, dummy_data): - """Test saving and reloading the data set.""" + """Test saving and reloading the dataset.""" yaml_dataset.save(dummy_data) reloaded = yaml_dataset.load() assert dummy_data == reloaded @@ -48,7 +48,7 @@ def test_save_and_load(self, yaml_dataset, dummy_data): def test_exists(self, yaml_dataset, dummy_data): """Test `exists` method invocation for both existing and - nonexistent data set.""" + nonexistent dataset.""" assert not yaml_dataset.exists() yaml_dataset.save(dummy_data) assert yaml_dataset.exists() @@ -72,7 +72,7 @@ def test_open_extra_args(self, yaml_dataset, fs_args): def test_load_missing_file(self, yaml_dataset): """Check the error when trying to load missing file.""" - pattern = r"Failed while loading data from data set YAMLDataset\(.*\)" + pattern = r"Failed while loading data from dataset YAMLDataset\(.*\)" with pytest.raises(DatasetError, match=pattern): yaml_dataset.load() @@ -137,7 +137,7 @@ def test_version_str_repr(self, load_version, save_version): def test_save_and_load(self, versioned_yaml_dataset, dummy_data): """Test that saved and reloaded data matches the original one for - the versioned data set.""" + the versioned dataset.""" versioned_yaml_dataset.save(dummy_data) reloaded = versioned_yaml_dataset.load() assert dummy_data == reloaded @@ -149,13 +149,13 @@ def test_no_versions(self, versioned_yaml_dataset): versioned_yaml_dataset.load() def test_exists(self, versioned_yaml_dataset, dummy_data): - """Test `exists` method invocation for versioned data set.""" + """Test `exists` method invocation for versioned dataset.""" assert not versioned_yaml_dataset.exists() versioned_yaml_dataset.save(dummy_data) assert versioned_yaml_dataset.exists() def test_prevent_overwrite(self, versioned_yaml_dataset, dummy_data): - """Check the error when attempting to override the data set if the + """Check the error when attempting to override the dataset if the corresponding yaml file for a given save version already exists.""" versioned_yaml_dataset.save(dummy_data) pattern = (