Skip to content

Commit

Permalink
chore(datasets): replace "data set" with "dataset" (#867)
Browse files Browse the repository at this point in the history
* chore(datasets): replace "data set" with "dataset"

Signed-off-by: Deepyaman Datta <[email protected]>

* style(datasets): reformat everything using `black`

Signed-off-by: Deepyaman Datta <[email protected]>

---------

Signed-off-by: Deepyaman Datta <[email protected]>
  • Loading branch information
deepyaman authored Oct 10, 2024
1 parent 62a5808 commit 987dab9
Show file tree
Hide file tree
Showing 56 changed files with 206 additions and 208 deletions.
2 changes: 1 addition & 1 deletion kedro-datasets/kedro_datasets/dask/csv_dataset.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""``CSVDataset`` is a data set used to load and save data to CSV files using Dask
"""``CSVDataset`` is a dataset used to load and save data to CSV files using Dask
dataframe"""
from __future__ import annotations

Expand Down
2 changes: 1 addition & 1 deletion kedro-datasets/kedro_datasets/dask/parquet_dataset.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""``ParquetDataset`` is a data set used to load and save data to parquet files using Dask
"""``ParquetDataset`` is a dataset used to load and save data to parquet files using Dask
dataframe"""
from __future__ import annotations

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,7 @@ def __init__( # noqa: PLR0913
database: the name of the database.
(also referred to as schema). Defaults to "default".
write_mode: the mode to write the data into the table. If not
present, the data set is read-only.
present, the dataset is read-only.
Options are:["overwrite", "append", "upsert"].
"upsert" mode requires primary_key field to be populated.
Defaults to None.
Expand Down
2 changes: 1 addition & 1 deletion kedro-datasets/kedro_datasets/pandas/feather_dataset.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""``FeatherDataset`` is a data set used to load and save data to feather files
"""``FeatherDataset`` is a dataset used to load and save data to feather files
using an underlying filesystem (e.g.: local, S3, GCS). The underlying functionality
is supported by pandas, so it supports all operations the pandas supports.
"""
Expand Down
2 changes: 1 addition & 1 deletion kedro-datasets/kedro_datasets/pandas/sql_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -309,7 +309,7 @@ class SQLQueryDataset(AbstractDataset[None, pd.DataFrame]):
by SQLAlchemy can be found here:
https://docs.sqlalchemy.org/core/engines.html#database-urls
It does not support save method so it is a read only data set.
It does not support save method so it is a read only dataset.
To save data to a SQL server use ``SQLTableDataset``.
Example usage for the
Expand Down
2 changes: 1 addition & 1 deletion kedro-datasets/kedro_datasets/spark/spark_hive_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

class SparkHiveDataset(AbstractDataset[DataFrame, DataFrame]):
"""``SparkHiveDataset`` loads and saves Spark dataframes stored on Hive.
This data set also handles some incompatible file types such as using partitioned parquet on
This dataset also handles some incompatible file types such as using partitioned parquet on
hive which will not normally allow upserts to existing data without a complete replacement
of the existing file/partition.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def _describe(self) -> dict[str, Any]:
return {**self.kwargs}

def save(self, data: None) -> NoReturn:
raise DatasetError(f"{self.__class__.__name__} is a read only data set type")
raise DatasetError(f"{self.__class__.__name__} is a read only dataset type")

def load(self) -> ChatAnthropic:
return ChatAnthropic(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ def _describe(self) -> dict[str, Any]:
return {**self.kwargs}

def save(self, data: None) -> NoReturn:
raise DatasetError(f"{self.__class__.__name__} is a read only data set type")
raise DatasetError(f"{self.__class__.__name__} is a read only dataset type")

def load(self) -> ChatCohere:
return ChatCohere(cohere_api_key=self.cohere_api_key, base_url=self.cohere_api_url, **self.kwargs)
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def _describe(self) -> dict[str, Any]:
return {**self.kwargs}

def save(self, data: None) -> NoReturn:
raise DatasetError(f"{self.__class__.__name__} is a read only data set type")
raise DatasetError(f"{self.__class__.__name__} is a read only dataset type")

def load(self) -> OPENAI_TYPE:
return self.constructor(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ def test_empty_credentials_load(self, bad_credentials, tmp_path):
netcdf_dataset = NetCDFDataset(
filepath=S3_PATH, temppath=tmp_path, credentials=bad_credentials
)
pattern = r"Failed while loading data from data set NetCDFDataset\(.+\)"
pattern = r"Failed while loading data from dataset NetCDFDataset\(.+\)"
with pytest.raises(DatasetError, match=pattern):
netcdf_dataset.load()

Expand All @@ -165,7 +165,7 @@ def test_pass_credentials(self, mocker, tmp_path):
s3_dataset = NetCDFDataset(
filepath=S3_PATH, temppath=tmp_path, credentials=AWS_CREDENTIALS
)
pattern = r"Failed while loading data from data set NetCDFDataset\(.+\)"
pattern = r"Failed while loading data from dataset NetCDFDataset\(.+\)"
with pytest.raises(DatasetError, match=pattern):
s3_dataset.load()

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ def test_open_extra_args(self, prophet_model_dataset, fs_args):

def test_load_missing_file(self, prophet_model_dataset):
"""Check the error when trying to load missing file."""
pattern = r"Failed while loading data from data set ProphetModelDataset\(.*\)"
pattern = r"Failed while loading data from dataset ProphetModelDataset\(.*\)"
with pytest.raises(DatasetError, match=pattern):
prophet_model_dataset.load()

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ def cog_geotiff_dataset(cog_file_path, save_args) -> GeoTIFFDataset:


def test_load_cog_geotiff(cog_geotiff_dataset):
"""Test loading cloud optimised geotiff reloading the data set."""
"""Test loading cloud optimised geotiff reloading the dataset."""
loaded_xr = cog_geotiff_dataset.load()
assert isinstance(loaded_xr.rio.crs, CRS)
assert isinstance(loaded_xr, xr.DataArray)
Expand Down Expand Up @@ -144,7 +144,7 @@ def test_load_not_tif():

def test_exists(tmp_path, synthetic_xarray):
"""Test `exists` method invocation for both existing and
nonexistent data set."""
nonexistent dataset."""
dataset = GeoTIFFDataset(filepath=str(tmp_path / "tmp.tif"))
assert not dataset.exists()
dataset.save(synthetic_xarray)
Expand All @@ -155,7 +155,7 @@ def test_exists(tmp_path, synthetic_xarray):
"synthetic_xarray",
])
def test_save_and_load_geotiff(tmp_path, request, xarray_fixture):
"""Test saving and reloading the data set."""
"""Test saving and reloading the dataset."""
xarray_data = request.getfixturevalue(xarray_fixture)
dataset = GeoTIFFDataset(filepath=str(tmp_path / "tmp.tif"))
dataset.save(xarray_data)
Expand All @@ -176,6 +176,6 @@ def test_load_missing_file(tmp_path):
"""Check the error when trying to load missing file."""
dataset = GeoTIFFDataset(filepath=str(tmp_path / "tmp.tif"))
assert not dataset._exists(), "File unexpectedly exists"
pattern = r"Failed while loading data from data set GeoTIFFDataset\(.*\)"
pattern = r"Failed while loading data from dataset GeoTIFFDataset\(.*\)"
with pytest.raises(DatasetError, match=pattern):
dataset.load()
6 changes: 3 additions & 3 deletions kedro-datasets/tests/biosequence/test_biosequence_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def dummy_data():

class TestBioSequenceDataset:
def test_save_and_load(self, biosequence_dataset, dummy_data):
"""Test saving and reloading the data set."""
"""Test saving and reloading the dataset."""
biosequence_dataset.save(dummy_data)
reloaded = biosequence_dataset.load()
assert dummy_data[0].id, reloaded[0].id
Expand All @@ -49,7 +49,7 @@ def test_save_and_load(self, biosequence_dataset, dummy_data):

def test_exists(self, biosequence_dataset, dummy_data):
"""Test `exists` method invocation for both existing and
nonexistent data set."""
nonexistent dataset."""
assert not biosequence_dataset.exists()
biosequence_dataset.save(dummy_data)
assert biosequence_dataset.exists()
Expand All @@ -75,7 +75,7 @@ def test_open_extra_args(self, biosequence_dataset, fs_args):

def test_load_missing_file(self, biosequence_dataset):
"""Check the error when trying to load missing file."""
pattern = r"Failed while loading data from data set BioSequenceDataset\(.*\)"
pattern = r"Failed while loading data from dataset BioSequenceDataset\(.*\)"
with pytest.raises(DatasetError, match=pattern):
biosequence_dataset.load()

Expand Down
6 changes: 3 additions & 3 deletions kedro-datasets/tests/dask/test_csv_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ def test_incorrect_credentials_load(self):
@pytest.mark.parametrize("bad_credentials", [{"key": None, "secret": None}])
def test_empty_credentials_load(self, bad_credentials):
csv_dataset = CSVDataset(filepath=S3_PATH, credentials=bad_credentials)
pattern = r"Failed while loading data from data set CSVDataset\(.+\)"
pattern = r"Failed while loading data from dataset CSVDataset\(.+\)"
with pytest.raises(DatasetError, match=pattern):
csv_dataset.load().compute()

Expand All @@ -94,7 +94,7 @@ def test_pass_credentials(self, mocker):
client instantiation on creating S3 connection."""
client_mock = mocker.patch("botocore.session.Session.create_client")
s3_dataset = CSVDataset(filepath=S3_PATH, credentials=AWS_CREDENTIALS)
pattern = r"Failed while loading data from data set CSVDataset\(.+\)"
pattern = r"Failed while loading data from dataset CSVDataset\(.+\)"
with pytest.raises(DatasetError, match=pattern):
s3_dataset.load().compute()

Expand All @@ -121,7 +121,7 @@ def test_load_data(self, s3_dataset, dummy_dd_dataframe, mocked_s3_object):

def test_exists(self, s3_dataset, dummy_dd_dataframe, mocked_s3_bucket):
"""Test `exists` method invocation for both existing and
nonexistent data set."""
nonexistent dataset."""
assert not s3_dataset.exists()
s3_dataset.save(dummy_dd_dataframe)
assert s3_dataset.exists()
Expand Down
6 changes: 3 additions & 3 deletions kedro-datasets/tests/dask/test_parquet_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ def test_incorrect_credentials_load(self):
@pytest.mark.parametrize("bad_credentials", [{"key": None, "secret": None}])
def test_empty_credentials_load(self, bad_credentials):
parquet_dataset = ParquetDataset(filepath=S3_PATH, credentials=bad_credentials)
pattern = r"Failed while loading data from data set ParquetDataset\(.+\)"
pattern = r"Failed while loading data from dataset ParquetDataset\(.+\)"
with pytest.raises(DatasetError, match=pattern):
parquet_dataset.load().compute()

Expand All @@ -97,7 +97,7 @@ def test_pass_credentials(self, mocker):
client instantiation on creating S3 connection."""
client_mock = mocker.patch("botocore.session.Session.create_client")
s3_dataset = ParquetDataset(filepath=S3_PATH, credentials=AWS_CREDENTIALS)
pattern = r"Failed while loading data from data set ParquetDataset\(.+\)"
pattern = r"Failed while loading data from dataset ParquetDataset\(.+\)"
with pytest.raises(DatasetError, match=pattern):
s3_dataset.load().compute()

Expand All @@ -124,7 +124,7 @@ def test_load_data(self, s3_dataset, dummy_dd_dataframe, mocked_s3_object):

def test_exists(self, s3_dataset, dummy_dd_dataframe, mocked_s3_bucket):
"""Test `exists` method invocation for both existing and
nonexistent data set."""
nonexistent dataset."""
assert not s3_dataset.exists()
s3_dataset.save(dummy_dd_dataframe)
assert s3_dataset.exists()
Expand Down
12 changes: 6 additions & 6 deletions kedro-datasets/tests/email/test_message_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def dummy_msg():

class TestEmailMessageDataset:
def test_save_and_load(self, message_dataset, dummy_msg):
"""Test saving and reloading the data set."""
"""Test saving and reloading the dataset."""
message_dataset.save(dummy_msg)
reloaded = message_dataset.load()
assert dummy_msg.__dict__ == reloaded.__dict__
Expand All @@ -59,7 +59,7 @@ def test_save_and_load(self, message_dataset, dummy_msg):

def test_exists(self, message_dataset, dummy_msg):
"""Test `exists` method invocation for both existing and
nonexistent data set."""
nonexistent dataset."""
assert not message_dataset.exists()
message_dataset.save(dummy_msg)
assert message_dataset.exists()
Expand Down Expand Up @@ -91,7 +91,7 @@ def test_open_extra_args(self, message_dataset, fs_args):

def test_load_missing_file(self, message_dataset):
"""Check the error when trying to load missing file."""
pattern = r"Failed while loading data from data set EmailMessageDataset\(.*\)"
pattern = r"Failed while loading data from dataset EmailMessageDataset\(.*\)"
with pytest.raises(DatasetError, match=pattern):
message_dataset.load()

Expand Down Expand Up @@ -149,7 +149,7 @@ def test_version_str_repr(self, load_version, save_version):

def test_save_and_load(self, versioned_message_dataset, dummy_msg):
"""Test that saved and reloaded data matches the original one for
the versioned data set."""
the versioned dataset."""
versioned_message_dataset.save(dummy_msg)
reloaded = versioned_message_dataset.load()
assert dummy_msg.__dict__ == reloaded.__dict__
Expand All @@ -161,13 +161,13 @@ def test_no_versions(self, versioned_message_dataset):
versioned_message_dataset.load()

def test_exists(self, versioned_message_dataset, dummy_msg):
"""Test `exists` method invocation for versioned data set."""
"""Test `exists` method invocation for versioned dataset."""
assert not versioned_message_dataset.exists()
versioned_message_dataset.save(dummy_msg)
assert versioned_message_dataset.exists()

def test_prevent_overwrite(self, versioned_message_dataset, dummy_msg):
"""Check the error when attempting to override the data set if the
"""Check the error when attempting to override the dataset if the
corresponding text file for a given save version already exists."""
versioned_message_dataset.save(dummy_msg)
pattern = (
Expand Down
8 changes: 4 additions & 4 deletions kedro-datasets/tests/geopandas/test_geojson_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ def test_save_and_load(self, geojson_dataset, dummy_dataframe):
@pytest.mark.parametrize("geojson_dataset", [{"index": False}], indirect=True)
def test_load_missing_file(self, geojson_dataset):
"""Check the error while trying to load from missing source."""
pattern = r"Failed while loading data from data set GeoJSONDataset"
pattern = r"Failed while loading data from dataset GeoJSONDataset"
with pytest.raises(DatasetError, match=pattern):
geojson_dataset.load()

Expand Down Expand Up @@ -156,7 +156,7 @@ def test_version_str_repr(self, load_version, save_version):

def test_save_and_load(self, versioned_geojson_dataset, dummy_dataframe):
"""Test that saved and reloaded data matches the original one for
the versioned data set."""
the versioned dataset."""
versioned_geojson_dataset.save(dummy_dataframe)
reloaded_df = versioned_geojson_dataset.load()
assert_frame_equal(reloaded_df, dummy_dataframe)
Expand All @@ -168,13 +168,13 @@ def test_no_versions(self, versioned_geojson_dataset):
versioned_geojson_dataset.load()

def test_exists(self, versioned_geojson_dataset, dummy_dataframe):
"""Test `exists` method invocation for versioned data set."""
"""Test `exists` method invocation for versioned dataset."""
assert not versioned_geojson_dataset.exists()
versioned_geojson_dataset.save(dummy_dataframe)
assert versioned_geojson_dataset.exists()

def test_prevent_override(self, versioned_geojson_dataset, dummy_dataframe):
"""Check the error when attempt to override the same data set
"""Check the error when attempt to override the same dataset
version."""
versioned_geojson_dataset.save(dummy_dataframe)
pattern = (
Expand Down
4 changes: 2 additions & 2 deletions kedro-datasets/tests/holoviews/test_holoviews_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ def test_version_str_repr(self, hv_writer, versioned_hv_writer):
assert "save_args" in str(versioned_hv_writer)

def test_prevent_overwrite(self, dummy_hv_object, versioned_hv_writer):
"""Check the error when attempting to override the data set if the
"""Check the error when attempting to override the dataset if the
corresponding file for a given save version already exists."""
versioned_hv_writer.save(dummy_hv_object)
pattern = (
Expand Down Expand Up @@ -185,7 +185,7 @@ def test_load_not_supported(self, versioned_hv_writer):
versioned_hv_writer.load()

def test_exists(self, versioned_hv_writer, dummy_hv_object):
"""Test `exists` method invocation for versioned data set."""
"""Test `exists` method invocation for versioned dataset."""
assert not versioned_hv_writer.exists()
versioned_hv_writer.save(dummy_hv_object)
assert versioned_hv_writer.exists()
Expand Down
4 changes: 2 additions & 2 deletions kedro-datasets/tests/ibis/test_table_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def dummy_table(table_dataset_from_csv):

class TestTableDataset:
def test_save_and_load(self, table_dataset, dummy_table, database):
"""Test saving and reloading the data set."""
"""Test saving and reloading the dataset."""
table_dataset.save(dummy_table)
reloaded = table_dataset.load()
assert_frame_equal(dummy_table.execute(), reloaded.execute())
Expand All @@ -64,7 +64,7 @@ def test_save_and_load(self, table_dataset, dummy_table, database):

def test_exists(self, table_dataset, dummy_table):
"""Test `exists` method invocation for both existing and
nonexistent data set."""
nonexistent dataset."""
assert not table_dataset.exists()
table_dataset.save(dummy_table)
assert table_dataset.exists()
Expand Down
12 changes: 6 additions & 6 deletions kedro-datasets/tests/json/test_json_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def dummy_data():

class TestJSONDataset:
def test_save_and_load(self, json_dataset, dummy_data):
"""Test saving and reloading the data set."""
"""Test saving and reloading the dataset."""
json_dataset.save(dummy_data)
reloaded = json_dataset.load()
assert dummy_data == reloaded
Expand All @@ -45,7 +45,7 @@ def test_save_and_load(self, json_dataset, dummy_data):

def test_exists(self, json_dataset, dummy_data):
"""Test `exists` method invocation for both existing and
nonexistent data set."""
nonexistent dataset."""
assert not json_dataset.exists()
json_dataset.save(dummy_data)
assert json_dataset.exists()
Expand All @@ -69,7 +69,7 @@ def test_open_extra_args(self, json_dataset, fs_args):

def test_load_missing_file(self, json_dataset):
"""Check the error when trying to load missing file."""
pattern = r"Failed while loading data from data set JSONDataset\(.*\)"
pattern = r"Failed while loading data from dataset JSONDataset\(.*\)"
with pytest.raises(DatasetError, match=pattern):
json_dataset.load()

Expand Down Expand Up @@ -125,7 +125,7 @@ def test_version_str_repr(self, load_version, save_version):

def test_save_and_load(self, versioned_json_dataset, dummy_data):
"""Test that saved and reloaded data matches the original one for
the versioned data set."""
the versioned dataset."""
versioned_json_dataset.save(dummy_data)
reloaded = versioned_json_dataset.load()
assert dummy_data == reloaded
Expand All @@ -137,13 +137,13 @@ def test_no_versions(self, versioned_json_dataset):
versioned_json_dataset.load()

def test_exists(self, versioned_json_dataset, dummy_data):
"""Test `exists` method invocation for versioned data set."""
"""Test `exists` method invocation for versioned dataset."""
assert not versioned_json_dataset.exists()
versioned_json_dataset.save(dummy_data)
assert versioned_json_dataset.exists()

def test_prevent_overwrite(self, versioned_json_dataset, dummy_data):
"""Check the error when attempting to override the data set if the
"""Check the error when attempting to override the dataset if the
corresponding json file for a given save version already exists."""
versioned_json_dataset.save(dummy_data)
pattern = (
Expand Down
Loading

0 comments on commit 987dab9

Please sign in to comment.