From 3093af4d364093341f1ca5fa4251976a4348c484 Mon Sep 17 00:00:00 2001 From: Deepyaman Datta Date: Thu, 10 Oct 2024 04:04:52 -0600 Subject: [PATCH 1/7] build(datasets): relax bounds for holoviews, scipy (#868) Signed-off-by: Deepyaman Datta --- kedro-datasets/pyproject.toml | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/kedro-datasets/pyproject.toml b/kedro-datasets/pyproject.toml index 156938986..87228f81c 100644 --- a/kedro-datasets/pyproject.toml +++ b/kedro-datasets/pyproject.toml @@ -17,13 +17,13 @@ dependencies = [ dynamic = ["readme", "version"] [project.optional-dependencies] -pandas-base = ["pandas>=1.3, <3.0",] -spark-base = ["pyspark>=2.2, <4.0",] -hdfs-base = ["hdfs>=2.5.8, <3.0",] -s3fs-base = ["s3fs>=2021.04",] -polars-base = ["polars>=0.18.0",] +pandas-base = ["pandas>=1.3, <3.0"] +spark-base = ["pyspark>=2.2, <4.0"] +hdfs-base = ["hdfs>=2.5.8, <3.0"] +s3fs-base = ["s3fs>=2021.4"] +polars-base = ["polars>=0.18.0"] plotly-base = ["plotly>=4.8.0, <6.0"] -delta-base = ["delta-spark>=1.0, <4.0",] +delta-base = ["delta-spark>=1.0, <4.0"] networkx-base = ["networkx~=2.4"] # Individual Datasets @@ -43,7 +43,7 @@ databricks = ["kedro-datasets[databricks-managedtabledataset]"] geopandas-geojsondataset = ["geopandas>=0.6.0, <1.0", "pyproj~=3.0"] geopandas = ["kedro-datasets[geopandas-geojsondataset]"] -holoviews-holoviewswriter = ["holoviews~=1.13.0"] +holoviews-holoviewswriter = ["holoviews>=1.13.0"] holoviews = ["kedro-datasets[holoviews-holoviewswriter]"] huggingface-hfdataset = ["datasets", "huggingface_hub"] @@ -154,10 +154,10 @@ spark = [ spark-sparkstreamingdataset]""" ] -svmlight-svmlightdataset = ["scikit-learn>=1.0.2", "scipy~=1.7.3"] +svmlight-svmlightdataset = ["scikit-learn>=1.0.2", "scipy>=1.7.3"] svmlight = ["kedro-datasets[svmlight-svmlightdataset]"] -tensorflow-tensorflowmodeldataset = ["tensorflow~=2.0; platform_system != 'Darwin' or platform_machine != 'arm64'", "tensorflow-macos~=2.0; platform_system == 'Darwin' and platform_machine == 'arm64'",] +tensorflow-tensorflowmodeldataset = ["tensorflow~=2.0; platform_system != 'Darwin' or platform_machine != 'arm64'", "tensorflow-macos~=2.0; platform_system == 'Darwin' and platform_machine == 'arm64'"] tensorflow = ["kedro-datasets[tensorflow-tensorflowmodeldataset]"] text-textdataset = [] @@ -178,7 +178,7 @@ langchain-chatopenaidataset = ["langchain-openai~=0.1.7"] langchain-openaiembeddingsdataset = ["langchain-openai~=0.1.7"] langchain-chatanthropicdataset = ["langchain-anthropic~=0.1.13", "langchain-community~=0.2.0"] langchain-chatcoheredataset = ["langchain-cohere~=0.1.5", "langchain-community~=0.2.0"] -langchain = ["kedro-datasets[langchain-chatopenaidataset,langchain-openaiembeddingsdataset,langchain-chatanthropicdataset,langchain-chatcoheredataset ]"] +langchain = ["kedro-datasets[langchain-chatopenaidataset,langchain-openaiembeddingsdataset,langchain-chatanthropicdataset,langchain-chatcoheredataset]"] netcdf-netcdfdataset = ["h5netcdf>=1.2.0","netcdf4>=1.6.4","xarray>=2023.1.0"] netcdf = ["kedro-datasets[netcdf-netcdfdataset]"] From 8cbdd4dfad129486f24cb4a9293b77e5168a3624 Mon Sep 17 00:00:00 2001 From: Ankita Katiyar <110245118+ankatiyar@users.noreply.github.com> Date: Thu, 10 Oct 2024 11:47:18 +0100 Subject: [PATCH 2/7] chore(datasets): bump `pillow` (#871) * chore(datasets): bump pillow Signed-off-by: Ankita Katiyar * Apply suggestions from code review Co-authored-by: Deepyaman Datta Signed-off-by: Ankita Katiyar <110245118+ankatiyar@users.noreply.github.com> --------- Signed-off-by: Ankita Katiyar Signed-off-by: Ankita Katiyar <110245118+ankatiyar@users.noreply.github.com> Co-authored-by: Deepyaman Datta --- kedro-datasets/pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kedro-datasets/pyproject.toml b/kedro-datasets/pyproject.toml index 87228f81c..d8863cb73 100644 --- a/kedro-datasets/pyproject.toml +++ b/kedro-datasets/pyproject.toml @@ -118,7 +118,7 @@ pandas = [ pickle-pickledataset = ["compress-pickle[lz4]~=2.1.0"] pickle = ["kedro-datasets[pickle-pickledataset]"] -pillow-imagedataset = ["Pillow~=9.0"] +pillow-imagedataset = ["Pillow>=9.0"] pillow = ["kedro-datasets[pillow-imagedataset]"] plotly-htmldataset = ["kedro-datasets[plotly-base]"] @@ -237,7 +237,7 @@ test = [ "openpyxl>=3.0.3, <4.0", "pandas-gbq>=0.12.0", "pandas>=2.0", - "Pillow~=9.0", + "Pillow~=10.0", "plotly>=4.8.0, <6.0", "polars[xlsx2csv, deltalake]~=0.18.0", "pre-commit>=2.9.2", From 6aea78e2c204deeff49b5ab1bc970b63cd2cab4a Mon Sep 17 00:00:00 2001 From: Felix Scherz Date: Thu, 10 Oct 2024 13:17:31 +0200 Subject: [PATCH 3/7] chore: remove support for python 3.8 for kedro-telemetry (#878) * chore: remove support for python 3.8 for kedro-telemetry Signed-off-by: Felix Scherz * ci: remove python 3.8 from airflow and docker Signed-off-by: Felix Scherz * docs: remove python 3.8 from readme badge Signed-off-by: Felix Scherz --------- Signed-off-by: Felix Scherz Co-authored-by: Ankita Katiyar <110245118+ankatiyar@users.noreply.github.com> --- .github/workflows/kedro-airflow.yml | 4 ++-- .github/workflows/kedro-docker.yml | 4 ++-- .github/workflows/kedro-telemetry.yml | 2 +- .pre-commit-config.yaml | 2 +- kedro-telemetry/README.md | 2 +- kedro-telemetry/RELEASE.md | 3 +++ kedro-telemetry/kedro_telemetry/masking.py | 1 + kedro-telemetry/pyproject.toml | 2 +- .../integration/dummy-project/src/dummy_project/__main__.py | 1 + .../dummy-project/src/dummy_project/pipeline_registry.py | 3 +-- 10 files changed, 14 insertions(+), 10 deletions(-) diff --git a/.github/workflows/kedro-airflow.yml b/.github/workflows/kedro-airflow.yml index 87aa5b0df..85e7ca62d 100644 --- a/.github/workflows/kedro-airflow.yml +++ b/.github/workflows/kedro-airflow.yml @@ -22,7 +22,7 @@ jobs: strategy: matrix: os: [ ubuntu-latest, windows-latest ] - python-version: [ "3.8", "3.9", "3.10", "3.11", "3.12" ] + python-version: [ "3.9", "3.10", "3.11", "3.12" ] uses: ./.github/workflows/unit-tests.yml with: plugin: kedro-airflow @@ -40,7 +40,7 @@ jobs: strategy: matrix: os: [ ubuntu-latest ] - python-version: [ "3.8", "3.9", "3.10", "3.11", "3.12" ] + python-version: [ "3.9", "3.10", "3.11", "3.12" ] uses: ./.github/workflows/e2e-tests.yml with: plugin: kedro-airflow diff --git a/.github/workflows/kedro-docker.yml b/.github/workflows/kedro-docker.yml index 8c04d89ab..92558891d 100644 --- a/.github/workflows/kedro-docker.yml +++ b/.github/workflows/kedro-docker.yml @@ -22,7 +22,7 @@ jobs: strategy: matrix: os: [ ubuntu-latest, windows-latest ] - python-version: [ "3.8", "3.9", "3.10", "3.11", "3.12" ] + python-version: [ "3.9", "3.10", "3.11", "3.12" ] uses: ./.github/workflows/unit-tests.yml with: plugin: kedro-docker @@ -40,7 +40,7 @@ jobs: strategy: matrix: os: [ ubuntu-latest ] - python-version: [ "3.8", "3.9", "3.10", "3.11", "3.12" ] + python-version: [ "3.9", "3.10", "3.11", "3.12" ] uses: ./.github/workflows/e2e-tests.yml with: plugin: kedro-docker diff --git a/.github/workflows/kedro-telemetry.yml b/.github/workflows/kedro-telemetry.yml index f4fb3db9b..d870d1b83 100644 --- a/.github/workflows/kedro-telemetry.yml +++ b/.github/workflows/kedro-telemetry.yml @@ -22,7 +22,7 @@ jobs: strategy: matrix: os: [ ubuntu-latest ] - python-version: [ "3.8", "3.9", "3.10", "3.11", "3.12" ] + python-version: [ "3.9", "3.10", "3.11", "3.12" ] uses: ./.github/workflows/unit-tests.yml with: plugin: kedro-telemetry diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 0afe0c87c..a6f101bdf 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -60,7 +60,7 @@ repos: exclude: ^(?!kedro-telemetry/kedro_telemetry/).*\.py$ pass_filenames: false stages: [manual] - entry: ruff kedro-telemetry --fix --exit-non-zero-on-fix + entry: ruff check kedro-telemetry --fix --exit-non-zero-on-fix - id: black-kedro-datasets name: "Black" diff --git a/kedro-telemetry/README.md b/kedro-telemetry/README.md index 5a588650f..79e513ea4 100644 --- a/kedro-telemetry/README.md +++ b/kedro-telemetry/README.md @@ -1,6 +1,6 @@ # Kedro-Telemetry -[![Python Version](https://img.shields.io/badge/python-3.8%20%7C%203.9%20%7C%203.10%20%7C%203.11%20%7C%203.12-blue.svg)](https://pypi.org/project/kedro-telemetry/) +[![Python Version](https://img.shields.io/badge/python-3.9%20%7C%203.10%20%7C%203.11%20%7C%203.12-blue.svg)](https://pypi.org/project/kedro-telemetry/) [![PyPI version](https://badge.fury.io/py/kedro-telemetry.svg)](https://pypi.org/project/kedro-telemetry/) [![License](https://img.shields.io/badge/license-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0) [![Code Style: Black](https://img.shields.io/badge/code%20style-black-black.svg)](https://github.com/ambv/black) diff --git a/kedro-telemetry/RELEASE.md b/kedro-telemetry/RELEASE.md index 27b620e64..504ca2c34 100644 --- a/kedro-telemetry/RELEASE.md +++ b/kedro-telemetry/RELEASE.md @@ -1,5 +1,8 @@ # Upcoming release +# Release 0.7.0 +* Removed support for Python 3.8 + # Release 0.6.1 * Changed Kedro CLI loading method to improve loading times. * Changed logging level from error to debug for most logging messages. diff --git a/kedro-telemetry/kedro_telemetry/masking.py b/kedro-telemetry/kedro_telemetry/masking.py index 9308dc771..475f4de2b 100644 --- a/kedro-telemetry/kedro_telemetry/masking.py +++ b/kedro-telemetry/kedro_telemetry/masking.py @@ -1,4 +1,5 @@ """Module containing command masking functionality.""" + from __future__ import annotations from typing import Any diff --git a/kedro-telemetry/pyproject.toml b/kedro-telemetry/pyproject.toml index 32fb3d0b8..fcb56fcde 100644 --- a/kedro-telemetry/pyproject.toml +++ b/kedro-telemetry/pyproject.toml @@ -8,7 +8,7 @@ authors = [ {name = "Kedro"} ] description = "Kedro-Telemetry" -requires-python = ">=3.8" +requires-python = ">=3.9" license = {text = "Apache Software License (Apache 2.0)"} dependencies = [ "kedro>=0.18.0", diff --git a/kedro-telemetry/tests/integration/dummy-project/src/dummy_project/__main__.py b/kedro-telemetry/tests/integration/dummy-project/src/dummy_project/__main__.py index 56cef4b26..c051c7809 100644 --- a/kedro-telemetry/tests/integration/dummy-project/src/dummy_project/__main__.py +++ b/kedro-telemetry/tests/integration/dummy-project/src/dummy_project/__main__.py @@ -1,6 +1,7 @@ """dummy_project file for ensuring the package is executable as `dummy-project` and `python -m dummy_project` """ + import importlib from pathlib import Path diff --git a/kedro-telemetry/tests/integration/dummy-project/src/dummy_project/pipeline_registry.py b/kedro-telemetry/tests/integration/dummy-project/src/dummy_project/pipeline_registry.py index 2d4272e31..d3aa4d381 100644 --- a/kedro-telemetry/tests/integration/dummy-project/src/dummy_project/pipeline_registry.py +++ b/kedro-telemetry/tests/integration/dummy-project/src/dummy_project/pipeline_registry.py @@ -1,11 +1,10 @@ """Project pipelines.""" -from typing import Dict from kedro.framework.project import find_pipelines from kedro.pipeline import Pipeline -def register_pipelines() -> Dict[str, Pipeline]: +def register_pipelines() -> dict[str, Pipeline]: """Register the project's pipelines. Returns: From 6adb8235867bba69e02eb3c9daa9269731e922fd Mon Sep 17 00:00:00 2001 From: Felix Scherz Date: Thu, 10 Oct 2024 14:31:55 +0200 Subject: [PATCH 4/7] chore: remove support for python 3.8 for kedro-airflow (#877) * chore: removed support for python 3.8 for kedro-airflow Signed-off-by: Felix Scherz * ci: remove python 3.8 from docker and telemetry Signed-off-by: Felix Scherz * docs: remove python 3.8 from readme badge Signed-off-by: Felix Scherz --------- Signed-off-by: Felix Scherz --- .pre-commit-config.yaml | 2 +- kedro-airflow/README.md | 2 +- kedro-airflow/RELEASE.md | 3 +++ kedro-airflow/features/steps/sh_run.py | 3 +-- kedro-airflow/pyproject.toml | 2 +- 5 files changed, 7 insertions(+), 5 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index a6f101bdf..a39779940 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -42,7 +42,7 @@ repos: exclude: ^(?!kedro-airflow/kedro_airflow/).*\.py$ pass_filenames: false stages: [ manual ] - entry: ruff kedro-airflow --fix --exit-non-zero-on-fix + entry: ruff check kedro-airflow --fix --exit-non-zero-on-fix - id: ruff-kedro-docker name: "Ruff on kedro_docker/*" diff --git a/kedro-airflow/README.md b/kedro-airflow/README.md index 1ecaa8d8b..c67aba52e 100644 --- a/kedro-airflow/README.md +++ b/kedro-airflow/README.md @@ -1,7 +1,7 @@ # Kedro-Airflow [![License](https://img.shields.io/badge/license-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0) -[![Python Version](https://img.shields.io/badge/python-3.8%20%7C%203.9%20%7C%203.10%20%7C%203.11%20%7C%203.12-blue.svg)](https://pypi.org/project/kedro-airflow/) +[![Python Version](https://img.shields.io/badge/python-3.9%20%7C%203.10%20%7C%203.11%20%7C%203.12-blue.svg)](https://pypi.org/project/kedro-airflow/) [![PyPI Version](https://badge.fury.io/py/kedro-airflow.svg)](https://pypi.org/project/kedro-airflow/) [![Code Style: Black](https://img.shields.io/badge/code%20style-black-black.svg)](https://github.com/ambv/black) diff --git a/kedro-airflow/RELEASE.md b/kedro-airflow/RELEASE.md index 105b3df13..356a16731 100755 --- a/kedro-airflow/RELEASE.md +++ b/kedro-airflow/RELEASE.md @@ -1,5 +1,8 @@ # Upcoming Release +# Release 0.10.0 +* Removed support for Python 3.8 + # Release 0.9.1 * Added support to specify `--conf-source` which would point to the runtime configuration directory to be used for running the DAG in airflow. This configuration path is added to the generated DAG. diff --git a/kedro-airflow/features/steps/sh_run.py b/kedro-airflow/features/steps/sh_run.py index cc8afc413..0c80e3686 100644 --- a/kedro-airflow/features/steps/sh_run.py +++ b/kedro-airflow/features/steps/sh_run.py @@ -1,10 +1,9 @@ import shlex import subprocess -from typing import Dict def run( - cmd: str, split: bool = True, print_output: bool = False, **kwargs: Dict + cmd: str, split: bool = True, print_output: bool = False, **kwargs: dict ) -> int: """ Args: diff --git a/kedro-airflow/pyproject.toml b/kedro-airflow/pyproject.toml index 1aa39f9f6..5f5a9be65 100644 --- a/kedro-airflow/pyproject.toml +++ b/kedro-airflow/pyproject.toml @@ -8,7 +8,7 @@ authors = [ {name = "Kedro"} ] description = "Kedro-Airflow makes it easy to deploy Kedro projects to Airflow" -requires-python = ">=3.8" +requires-python = ">=3.9" license = {text = "Apache Software License (Apache 2.0)"} dependencies = [ "kedro>=0.19.0", From 9f9c1c360ab7e3d06f4bf37aa763535bc33f74d7 Mon Sep 17 00:00:00 2001 From: Felix Scherz Date: Thu, 10 Oct 2024 14:49:13 +0200 Subject: [PATCH 5/7] chore: remove python 3.8 support for kedro-docker (#876) * chore: remove python 3.8 support for kedro-docker Signed-off-by: Felix Scherz * docs: update release notes Signed-off-by: Felix Scherz * ci: remove python 3.8 from airflow and telemetry Signed-off-by: Felix Scherz * docs: remove Python3.8 from README Signed-off-by: Felix Scherz --------- Signed-off-by: Felix Scherz Co-authored-by: Ankita Katiyar <110245118+ankatiyar@users.noreply.github.com> --- .pre-commit-config.yaml | 2 +- kedro-docker/README.md | 4 ++-- kedro-docker/RELEASE.md | 4 ++++ kedro-docker/features/steps/sh_run.py | 3 ++- kedro-docker/features/steps/util.py | 6 +++--- kedro-docker/kedro_docker/helpers.py | 2 +- kedro-docker/kedro_docker/plugin.py | 4 ++-- kedro-docker/pyproject.toml | 2 +- 8 files changed, 16 insertions(+), 11 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index a39779940..9f9706a34 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -51,7 +51,7 @@ repos: exclude: ^(?!kedro-docker/kedro_docker/).*\.py$ pass_filenames: false stages: [ manual ] - entry: ruff kedro-docker --fix --exit-non-zero-on-fix + entry: ruff check kedro-docker --fix --exit-non-zero-on-fix - id: ruff-kedro-telemetry name: "Ruff on kedro_telemetry/*" diff --git a/kedro-docker/README.md b/kedro-docker/README.md index 7d4e9b9b5..fd3150172 100644 --- a/kedro-docker/README.md +++ b/kedro-docker/README.md @@ -1,6 +1,6 @@ # Kedro-Docker -[![Python Version](https://img.shields.io/badge/python-3.8%20%7C%203.9%20%7C%203.10%20%7C%203.11%20%7C%203.12-blue.svg)](https://pypi.org/project/kedro-docker/) +[![Python Version](https://img.shields.io/badge/python-3.9%20%7C%203.10%20%7C%203.11%20%7C%203.12-blue.svg)](https://pypi.org/project/kedro-docker/) [![PyPI version](https://badge.fury.io/py/kedro-docker.svg)](https://pypi.org/project/kedro-docker/) [![License](https://img.shields.io/badge/license-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0) [![Code Style: Black](https://img.shields.io/badge/code%20style-black-black.svg)](https://github.com/ambv/black) @@ -63,7 +63,7 @@ Behind the scenes Kedro does the following: > *Note:* By default, `kedro docker build` creates an image without Spark and Hadoop. -> *Note:* By default, when calling `kedro docker build` image is built with `python:VERSION-buster` image, where VERSION is Python (major + minor) version from the current environment. By specifying `--base-image` option, different base image can be used. For example `kedro docker build --base-image="python:3.8-buster"`. +> *Note:* By default, when calling `kedro docker build` image is built with `python:VERSION-buster` image, where VERSION is Python (major + minor) version from the current environment. By specifying `--base-image` option, different base image can be used. For example `kedro docker build --base-image="python:3.9-buster"`. > *Note:* You can generate the `Dockerfile`, `.dockerignore` or `.dive-ci` files without building the image by running `kedro docker init`. This might be of use in case you would like to modify these files before the first build. diff --git a/kedro-docker/RELEASE.md b/kedro-docker/RELEASE.md index 725ee0f96..0d1045270 100644 --- a/kedro-docker/RELEASE.md +++ b/kedro-docker/RELEASE.md @@ -1,5 +1,9 @@ # Upcoming Release +# Release 0.7.0 +## Major features and improvements +* Removed support for python 3.8 + # Release 0.6.1 * Unpinned pip version requirement diff --git a/kedro-docker/features/steps/sh_run.py b/kedro-docker/features/steps/sh_run.py index 7d9f6152a..6eb3b2e4e 100644 --- a/kedro-docker/features/steps/sh_run.py +++ b/kedro-docker/features/steps/sh_run.py @@ -1,6 +1,7 @@ import shlex import subprocess -from typing import Sequence, Union +from collections.abc import Sequence +from typing import Union import psutil diff --git a/kedro-docker/features/steps/util.py b/kedro-docker/features/steps/util.py index 8362d0a45..ec3b2d535 100644 --- a/kedro-docker/features/steps/util.py +++ b/kedro-docker/features/steps/util.py @@ -6,7 +6,7 @@ from pathlib import Path from threading import Thread from time import sleep, time -from typing import Any, Callable, List +from typing import Any, Callable import docker from kedro.framework.cli.utils import get_pkg_version @@ -130,7 +130,7 @@ def init_docker_client(**kwargs) -> docker.client.DockerClient: return docker.from_env(**kwargs) -def get_docker_containers(name: str) -> List[docker.models.containers.Container]: +def get_docker_containers(name: str) -> list[docker.models.containers.Container]: """ Get list of docker containers which contain `name` in their names. @@ -164,7 +164,7 @@ def docker_prune(): client.images.prune() -def get_docker_images(name: str) -> List[docker.models.images.Image]: +def get_docker_images(name: str) -> list[docker.models.images.Image]: """ Get docker images with `name` in their names. diff --git a/kedro-docker/kedro_docker/helpers.py b/kedro-docker/kedro_docker/helpers.py index 280e198b2..21655923f 100644 --- a/kedro-docker/kedro_docker/helpers.py +++ b/kedro-docker/kedro_docker/helpers.py @@ -7,11 +7,11 @@ import shutil import socket import subprocess +from collections.abc import Sequence from importlib import import_module from itertools import chain from pathlib import Path, PurePosixPath from subprocess import DEVNULL, PIPE -from typing import Sequence from click import secho from kedro.framework.cli.utils import KedroCliError diff --git a/kedro-docker/kedro_docker/plugin.py b/kedro-docker/kedro_docker/plugin.py index c78c35dd1..908c187e8 100644 --- a/kedro-docker/kedro_docker/plugin.py +++ b/kedro-docker/kedro_docker/plugin.py @@ -2,9 +2,9 @@ import shlex import subprocess +from collections.abc import Sequence from pathlib import Path from sys import version_info -from typing import Dict, Sequence import click from kedro import __version__ as kedro_version @@ -191,7 +191,7 @@ def docker_build(ctx, uid, gid, spark, base_image, image, docker_args): # noqa: call(command) -def _mount_info() -> Dict[str, Sequence[str]]: +def _mount_info() -> dict[str, Sequence[str]]: res = { "host_root": str(Path.cwd()), "container_root": "/home/kedro_docker", diff --git a/kedro-docker/pyproject.toml b/kedro-docker/pyproject.toml index 1f065a5fe..c607ef9a2 100644 --- a/kedro-docker/pyproject.toml +++ b/kedro-docker/pyproject.toml @@ -8,7 +8,7 @@ authors = [ {name = "Kedro"} ] description = "Kedro-Docker makes it easy to package Kedro projects with Docker." -requires-python = ">=3.8" +requires-python = ">=3.9" license = {text = "Apache Software License (Apache 2.0)"} dependencies = [ "anyconfig~=0.10.0", # not directly required, pinned by Snyk to avoid a vulnerability From 62a5808b66d1545eaf3eb922c789ddcb4c75a125 Mon Sep 17 00:00:00 2001 From: Felix Scherz Date: Thu, 10 Oct 2024 15:48:49 +0200 Subject: [PATCH 6/7] chore: remove support for python 3.9 (#870) * chore: remove support for python 3.9 Signed-off-by: Felix Scherz * chore: remove Union in favor of | operator Signed-off-by: Felix Scherz * chore: use collections.abc.Callable instead of typing.Callable Signed-off-by: Felix Scherz * chore: remove python 3.9 from CI Signed-off-by: Felix Scherz * docs: document removal of python 3.9 support Signed-off-by: Felix Scherz * chore: use `|` in favor of tuples for isinstance checks Signed-off-by: Felix Scherz * chore: run formatter Signed-off-by: Felix Scherz * chore: remove environment markers for python 3.9 Signed-off-by: Felix Scherz * docs: add to contributer list Signed-off-by: Felix Scherz --------- Signed-off-by: Felix Scherz --- .github/workflows/kedro-datasets.yml | 2 +- kedro-datasets/CONTRIBUTING.md | 2 +- kedro-datasets/README.md | 2 +- kedro-datasets/RELEASE.md | 2 ++ kedro-datasets/docs/source/conf.py | 3 +-- .../kedro_datasets/databricks/managed_table_dataset.py | 2 +- kedro-datasets/kedro_datasets/geopandas/geojson_dataset.py | 4 ++-- .../kedro_datasets/matplotlib/matplotlib_writer.py | 6 +++--- kedro-datasets/kedro_datasets/pandas/excel_dataset.py | 6 +++--- .../kedro_datasets/partitions/incremental_dataset.py | 3 ++- .../kedro_datasets/partitions/partitioned_dataset.py | 3 ++- kedro-datasets/kedro_datasets/plotly/html_dataset.py | 6 ++---- kedro-datasets/kedro_datasets/plotly/json_dataset.py | 6 ++---- kedro-datasets/kedro_datasets/polars/lazy_polars_dataset.py | 4 ++-- kedro-datasets/kedro_datasets/svmlight/svmlight_dataset.py | 4 ++-- kedro-datasets/pyproject.toml | 5 ++--- kedro-datasets/tests/conftest.py | 2 +- kedro-datasets/tests/spark/conftest.py | 6 +----- 18 files changed, 31 insertions(+), 37 deletions(-) diff --git a/.github/workflows/kedro-datasets.yml b/.github/workflows/kedro-datasets.yml index 991a12731..d5aae0282 100644 --- a/.github/workflows/kedro-datasets.yml +++ b/.github/workflows/kedro-datasets.yml @@ -22,7 +22,7 @@ jobs: strategy: matrix: os: [ ubuntu-latest, windows-latest ] - python-version: [ "3.9", "3.10", "3.11", "3.12" ] + python-version: [ "3.10", "3.11", "3.12" ] uses: ./.github/workflows/unit-tests.yml with: plugin: kedro-datasets diff --git a/kedro-datasets/CONTRIBUTING.md b/kedro-datasets/CONTRIBUTING.md index 0ccc1b2a5..41454f598 100644 --- a/kedro-datasets/CONTRIBUTING.md +++ b/kedro-datasets/CONTRIBUTING.md @@ -41,7 +41,7 @@ Core datasets are maintained by the [Kedro Technical Steering Committee (TSC)](h 3. Must have working doctests (unless complex cloud/DB setup required, which can be discussed in the review). 4. Must run as part of the regular CI/CD jobs. 5. Must have 100% test coverage. -6. Should support all Python versions under NEP 29 (3.9+ currently). +6. Should support all Python versions under NEP 29 (3.10+ currently). 7. Should work on Linux, macOS, and Windows. #### Experimental datasets diff --git a/kedro-datasets/README.md b/kedro-datasets/README.md index 3bf6cd8d6..aa4531e5b 100644 --- a/kedro-datasets/README.md +++ b/kedro-datasets/README.md @@ -3,7 +3,7 @@ [![License](https://img.shields.io/badge/license-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0) -[![Python Version](https://img.shields.io/badge/python-3.9%20%7C%203.10%20%7C%203.11%20%7C%203.12-blue.svg)](https://pypi.org/project/kedro-datasets/) +[![Python Version](https://img.shields.io/badge/python-3.10%20%7C%203.11%20%7C%203.12-blue.svg)](https://pypi.org/project/kedro-datasets/) [![PyPI Version](https://badge.fury.io/py/kedro-datasets.svg)](https://pypi.org/project/kedro-datasets/) [![Code Style: Black](https://img.shields.io/badge/code%20style-black-black.svg)](https://github.com/ambv/black) diff --git a/kedro-datasets/RELEASE.md b/kedro-datasets/RELEASE.md index 51c4ae9bb..e333d2787 100755 --- a/kedro-datasets/RELEASE.md +++ b/kedro-datasets/RELEASE.md @@ -1,5 +1,6 @@ # Upcoming Release ## Major features and improvements +* Removed support for Python 3.9 * Added the following new **experimental** datasets: | Type | Description | Location | @@ -31,6 +32,7 @@ Many thanks to the following Kedroids for contributing PRs to this release: * [janickspirig](https://github.com/janickspirig) * [Galen Seilis](https://github.com/galenseilis) * [Mariusz Wojakowski](https://github.com/mariusz89016) +* [Felix Scherz](https://github.com/felixscherz) # Release 4.1.0 diff --git a/kedro-datasets/docs/source/conf.py b/kedro-datasets/docs/source/conf.py index 09524612a..318cd8884 100644 --- a/kedro-datasets/docs/source/conf.py +++ b/kedro-datasets/docs/source/conf.py @@ -97,7 +97,7 @@ intersphinx_mapping = { "kedro": ("https://docs.kedro.org/en/stable/", None), - "python": ("https://docs.python.org/3.9/", None), + "python": ("https://docs.python.org/3.10/", None), } type_targets = { @@ -145,7 +145,6 @@ ), "py:data": ( "typing.Any", - "typing.Union", "typing.Optional", "typing.Tuple", ), diff --git a/kedro-datasets/kedro_datasets/databricks/managed_table_dataset.py b/kedro-datasets/kedro_datasets/databricks/managed_table_dataset.py index e15230b02..5783cebcc 100644 --- a/kedro-datasets/kedro_datasets/databricks/managed_table_dataset.py +++ b/kedro-datasets/kedro_datasets/databricks/managed_table_dataset.py @@ -297,7 +297,7 @@ def load(self) -> DataFrame | pd.DataFrame: the init doesn't exist Returns: - Union[DataFrame, pd.DataFrame]: Returns a dataframe + DataFrame | pd.DataFrame: Returns a dataframe in the format defined in the init """ if self._version and self._version.load >= 0: diff --git a/kedro-datasets/kedro_datasets/geopandas/geojson_dataset.py b/kedro-datasets/kedro_datasets/geopandas/geojson_dataset.py index 17ade1ded..322fc147c 100644 --- a/kedro-datasets/kedro_datasets/geopandas/geojson_dataset.py +++ b/kedro-datasets/kedro_datasets/geopandas/geojson_dataset.py @@ -6,7 +6,7 @@ import copy from pathlib import PurePosixPath -from typing import Any, Union +from typing import Any import fsspec import geopandas as gpd @@ -21,7 +21,7 @@ class GeoJSONDataset( AbstractVersionedDataset[ - gpd.GeoDataFrame, Union[gpd.GeoDataFrame, dict[str, gpd.GeoDataFrame]] + gpd.GeoDataFrame, gpd.GeoDataFrame | dict[str, gpd.GeoDataFrame] ] ): """``GeoJSONDataset`` loads/saves data to a GeoJSON file using an underlying filesystem diff --git a/kedro-datasets/kedro_datasets/matplotlib/matplotlib_writer.py b/kedro-datasets/kedro_datasets/matplotlib/matplotlib_writer.py index 4e37dd9d1..335109b5d 100644 --- a/kedro-datasets/kedro_datasets/matplotlib/matplotlib_writer.py +++ b/kedro-datasets/kedro_datasets/matplotlib/matplotlib_writer.py @@ -6,7 +6,7 @@ import io from copy import deepcopy from pathlib import PurePosixPath -from typing import Any, NoReturn, Union +from typing import Any, NoReturn from warnings import warn import fsspec @@ -24,7 +24,7 @@ class MatplotlibWriter( - AbstractVersionedDataset[Union[Figure, list[Figure], dict[str, Figure]], NoReturn] + AbstractVersionedDataset[Figure | list[Figure] | dict[str, Figure], NoReturn] ): """``MatplotlibWriter`` saves one or more Matplotlib objects as image files to an underlying filesystem (e.g. local, S3, GCS). @@ -203,7 +203,7 @@ def load(self) -> NoReturn: def save(self, data: Figure | (list[Figure] | dict[str, Figure])) -> None: save_path = self._get_save_path() - if isinstance(data, (list, dict)) and self._overwrite and self._exists(): + if isinstance(data, list | dict) and self._overwrite and self._exists(): self._fs.rm(get_filepath_str(save_path, self._protocol), recursive=True) if isinstance(data, list): diff --git a/kedro-datasets/kedro_datasets/pandas/excel_dataset.py b/kedro-datasets/kedro_datasets/pandas/excel_dataset.py index c1c657900..42c204b84 100644 --- a/kedro-datasets/kedro_datasets/pandas/excel_dataset.py +++ b/kedro-datasets/kedro_datasets/pandas/excel_dataset.py @@ -6,7 +6,7 @@ import logging from copy import deepcopy from pathlib import PurePosixPath -from typing import Any, Union +from typing import Any import fsspec import pandas as pd @@ -26,8 +26,8 @@ class ExcelDataset( AbstractVersionedDataset[ - Union[pd.DataFrame, dict[str, pd.DataFrame]], - Union[pd.DataFrame, dict[str, pd.DataFrame]], + pd.DataFrame | dict[str, pd.DataFrame], + pd.DataFrame | dict[str, pd.DataFrame], ] ): """``ExcelDataset`` loads/saves data from/to a Excel file using an underlying diff --git a/kedro-datasets/kedro_datasets/partitions/incremental_dataset.py b/kedro-datasets/kedro_datasets/partitions/incremental_dataset.py index ae33850e4..65ef4e55f 100644 --- a/kedro-datasets/kedro_datasets/partitions/incremental_dataset.py +++ b/kedro-datasets/kedro_datasets/partitions/incremental_dataset.py @@ -9,8 +9,9 @@ from __future__ import annotations import operator +from collections.abc import Callable from copy import deepcopy -from typing import Any, Callable +from typing import Any from cachetools import cachedmethod from kedro.io.core import ( diff --git a/kedro-datasets/kedro_datasets/partitions/partitioned_dataset.py b/kedro-datasets/kedro_datasets/partitions/partitioned_dataset.py index a488ba7af..ea2461034 100644 --- a/kedro-datasets/kedro_datasets/partitions/partitioned_dataset.py +++ b/kedro-datasets/kedro_datasets/partitions/partitioned_dataset.py @@ -5,9 +5,10 @@ from __future__ import annotations import operator +from collections.abc import Callable from copy import deepcopy from pathlib import PurePosixPath -from typing import Any, Callable +from typing import Any from urllib.parse import urlparse from warnings import warn diff --git a/kedro-datasets/kedro_datasets/plotly/html_dataset.py b/kedro-datasets/kedro_datasets/plotly/html_dataset.py index d61cb39b9..83585d57b 100644 --- a/kedro-datasets/kedro_datasets/plotly/html_dataset.py +++ b/kedro-datasets/kedro_datasets/plotly/html_dataset.py @@ -5,7 +5,7 @@ from copy import deepcopy from pathlib import PurePosixPath -from typing import Any, NoReturn, Union +from typing import Any, NoReturn import fsspec from kedro.io.core import ( @@ -18,9 +18,7 @@ from plotly import graph_objects as go -class HTMLDataset( - AbstractVersionedDataset[go.Figure, Union[go.Figure, go.FigureWidget]] -): +class HTMLDataset(AbstractVersionedDataset[go.Figure, go.Figure | go.FigureWidget]): """``HTMLDataset`` saves a plotly figure to an HTML file using an underlying filesystem (e.g.: local, S3, GCS). diff --git a/kedro-datasets/kedro_datasets/plotly/json_dataset.py b/kedro-datasets/kedro_datasets/plotly/json_dataset.py index 5e4cdbb65..2ce626a8b 100644 --- a/kedro-datasets/kedro_datasets/plotly/json_dataset.py +++ b/kedro-datasets/kedro_datasets/plotly/json_dataset.py @@ -6,7 +6,7 @@ import json from copy import deepcopy from pathlib import PurePosixPath -from typing import Any, Union +from typing import Any import fsspec import plotly.io as pio @@ -21,9 +21,7 @@ from kedro_datasets._typing import PlotlyPreview -class JSONDataset( - AbstractVersionedDataset[go.Figure, Union[go.Figure, go.FigureWidget]] -): +class JSONDataset(AbstractVersionedDataset[go.Figure, go.Figure | go.FigureWidget]): """``JSONDataset`` loads/saves a plotly figure from/to a JSON file using an underlying filesystem (e.g.: local, S3, GCS). diff --git a/kedro-datasets/kedro_datasets/polars/lazy_polars_dataset.py b/kedro-datasets/kedro_datasets/polars/lazy_polars_dataset.py index 14ff601ce..d1c59e049 100644 --- a/kedro-datasets/kedro_datasets/polars/lazy_polars_dataset.py +++ b/kedro-datasets/kedro_datasets/polars/lazy_polars_dataset.py @@ -7,7 +7,7 @@ import logging from copy import deepcopy from pathlib import PurePosixPath -from typing import Any, ClassVar, Union +from typing import Any, ClassVar import fsspec import polars as pl @@ -22,7 +22,7 @@ ACCEPTED_FILE_FORMATS = ["csv", "parquet"] -PolarsFrame = Union[pl.LazyFrame, pl.DataFrame] +PolarsFrame = pl.LazyFrame | pl.DataFrame logger = logging.getLogger(__name__) diff --git a/kedro-datasets/kedro_datasets/svmlight/svmlight_dataset.py b/kedro-datasets/kedro_datasets/svmlight/svmlight_dataset.py index 93d980f67..85247ee68 100644 --- a/kedro-datasets/kedro_datasets/svmlight/svmlight_dataset.py +++ b/kedro-datasets/kedro_datasets/svmlight/svmlight_dataset.py @@ -6,7 +6,7 @@ from copy import deepcopy from pathlib import PurePosixPath -from typing import Any, Union +from typing import Any import fsspec from kedro.io.core import ( @@ -25,7 +25,7 @@ # in kedro-plugins (https://github.com/kedro-org/kedro-plugins) # Type of data input -_DI = tuple[Union[ndarray, csr_matrix], ndarray] +_DI = tuple[ndarray | csr_matrix, ndarray] # Type of data output _DO = tuple[csr_matrix, ndarray] diff --git a/kedro-datasets/pyproject.toml b/kedro-datasets/pyproject.toml index d8863cb73..6d882a2a0 100644 --- a/kedro-datasets/pyproject.toml +++ b/kedro-datasets/pyproject.toml @@ -8,7 +8,7 @@ authors = [ {name = "Kedro"} ] description = "Kedro-Datasets is where you can find all of Kedro's data connectors." -requires-python = ">=3.9" +requires-python = ">=3.10" license = {text = "Apache Software License (Apache 2.0)"} dependencies = [ "kedro>=0.19.7", @@ -227,8 +227,7 @@ test = [ "jupyterlab>=3.0", "jupyter~=1.0", "lxml~=4.6", - "matplotlib>=3.0.3, <3.4; python_version < '3.10'", # 3.4.0 breaks holoviews - "matplotlib>=3.5, <3.6; python_version >= '3.10'", + "matplotlib>=3.5, <3.6", "memory_profiler>=0.50.0, <1.0", "moto==5.0.0", "mypy~=1.0", diff --git a/kedro-datasets/tests/conftest.py b/kedro-datasets/tests/conftest.py index 67237b574..c493539f3 100644 --- a/kedro-datasets/tests/conftest.py +++ b/kedro-datasets/tests/conftest.py @@ -5,7 +5,7 @@ https://docs.pytest.org/en/latest/fixture.html """ -from typing import Callable +from collections.abc import Callable from unittest.mock import MagicMock import aiobotocore.awsrequest diff --git a/kedro-datasets/tests/spark/conftest.py b/kedro-datasets/tests/spark/conftest.py index fa7504f0a..d0bf31976 100644 --- a/kedro-datasets/tests/spark/conftest.py +++ b/kedro-datasets/tests/spark/conftest.py @@ -8,11 +8,7 @@ import pytest from delta import configure_spark_with_delta_pip from filelock import FileLock - -try: - from pyspark.sql import SparkSession -except ImportError: # pragma: no cover - pass # this is only for test discovery to succeed on Python 3.8, 3.9 +from pyspark.sql import SparkSession def _setup_spark_session(): From 987dab9038360ca2f95fb846d08b86ca040e23f6 Mon Sep 17 00:00:00 2001 From: Deepyaman Datta Date: Thu, 10 Oct 2024 09:46:42 -0600 Subject: [PATCH 7/7] chore(datasets): replace "data set" with "dataset" (#867) * chore(datasets): replace "data set" with "dataset" Signed-off-by: Deepyaman Datta * style(datasets): reformat everything using `black` Signed-off-by: Deepyaman Datta --------- Signed-off-by: Deepyaman Datta --- kedro-datasets/kedro_datasets/dask/csv_dataset.py | 2 +- .../kedro_datasets/dask/parquet_dataset.py | 2 +- .../databricks/managed_table_dataset.py | 2 +- .../kedro_datasets/pandas/feather_dataset.py | 2 +- .../kedro_datasets/pandas/sql_dataset.py | 2 +- .../kedro_datasets/spark/spark_hive_dataset.py | 2 +- .../langchain/_anthropic.py | 2 +- .../langchain/_cohere.py | 2 +- .../langchain/_openai.py | 2 +- .../tests/netcdf/test_netcdf_dataset.py | 4 ++-- .../tests/prophet/test_prophet_dataset.py | 2 +- .../tests/rioxarray/test_geotiff_dataset.py | 8 ++++---- .../tests/biosequence/test_biosequence_dataset.py | 6 +++--- kedro-datasets/tests/dask/test_csv_dataset.py | 6 +++--- kedro-datasets/tests/dask/test_parquet_dataset.py | 6 +++--- kedro-datasets/tests/email/test_message_dataset.py | 12 ++++++------ .../tests/geopandas/test_geojson_dataset.py | 8 ++++---- .../tests/holoviews/test_holoviews_writer.py | 4 ++-- kedro-datasets/tests/ibis/test_table_dataset.py | 4 ++-- kedro-datasets/tests/json/test_json_dataset.py | 12 ++++++------ kedro-datasets/tests/matlab/test_matlab_dataset.py | 12 ++++++------ .../tests/matplotlib/test_matplotlib_writer.py | 6 +++--- kedro-datasets/tests/networkx/test_gml_dataset.py | 10 +++++----- .../tests/networkx/test_graphml_dataset.py | 10 +++++----- kedro-datasets/tests/networkx/test_json_dataset.py | 10 +++++----- kedro-datasets/tests/pandas/test_csv_dataset.py | 12 ++++++------ kedro-datasets/tests/pandas/test_excel_dataset.py | 14 +++++++------- .../tests/pandas/test_feather_dataset.py | 12 ++++++------ kedro-datasets/tests/pandas/test_gbq_dataset.py | 14 +++++++------- .../tests/pandas/test_generic_dataset.py | 6 +++--- kedro-datasets/tests/pandas/test_hdf_dataset.py | 14 +++++++------- kedro-datasets/tests/pandas/test_json_dataset.py | 12 ++++++------ .../tests/pandas/test_parquet_dataset.py | 14 +++++++------- kedro-datasets/tests/pandas/test_sql_dataset.py | 8 ++++---- kedro-datasets/tests/pandas/test_xml_dataset.py | 12 ++++++------ .../tests/partitions/test_partitioned_dataset.py | 4 ++-- kedro-datasets/tests/pickle/test_pickle_dataset.py | 12 ++++++------ kedro-datasets/tests/pillow/test_image_dataset.py | 12 ++++++------ kedro-datasets/tests/plotly/test_html_dataset.py | 4 ++-- kedro-datasets/tests/plotly/test_json_dataset.py | 6 +++--- kedro-datasets/tests/plotly/test_plotly_dataset.py | 6 +++--- kedro-datasets/tests/polars/test_csv_dataset.py | 12 ++++++------ .../tests/polars/test_eager_polars_dataset.py | 10 +++++----- .../tests/polars/test_lazy_polars_dataset.py | 8 ++++---- kedro-datasets/tests/redis/test_redis_dataset.py | 4 ++-- .../tests/spark/test_deltatable_dataset.py | 2 +- kedro-datasets/tests/spark/test_memory_dataset.py | 6 +++--- kedro-datasets/tests/spark/test_spark_dataset.py | 2 +- .../tests/spark/test_spark_hive_dataset.py | 2 +- .../tests/svmlight/test_svmlight_dataset.py | 12 ++++++------ .../tensorflow/test_tensorflow_model_dataset.py | 14 ++++++-------- kedro-datasets/tests/text/test_text_dataset.py | 12 ++++++------ kedro-datasets/tests/tracking/test_json_dataset.py | 6 +++--- .../tests/tracking/test_metrics_dataset.py | 6 +++--- kedro-datasets/tests/video/test_video_dataset.py | 6 +++--- kedro-datasets/tests/yaml/test_yaml_dataset.py | 12 ++++++------ 56 files changed, 206 insertions(+), 208 deletions(-) diff --git a/kedro-datasets/kedro_datasets/dask/csv_dataset.py b/kedro-datasets/kedro_datasets/dask/csv_dataset.py index 31f20680f..053da6b00 100644 --- a/kedro-datasets/kedro_datasets/dask/csv_dataset.py +++ b/kedro-datasets/kedro_datasets/dask/csv_dataset.py @@ -1,4 +1,4 @@ -"""``CSVDataset`` is a data set used to load and save data to CSV files using Dask +"""``CSVDataset`` is a dataset used to load and save data to CSV files using Dask dataframe""" from __future__ import annotations diff --git a/kedro-datasets/kedro_datasets/dask/parquet_dataset.py b/kedro-datasets/kedro_datasets/dask/parquet_dataset.py index 03082b341..1acfe7cda 100644 --- a/kedro-datasets/kedro_datasets/dask/parquet_dataset.py +++ b/kedro-datasets/kedro_datasets/dask/parquet_dataset.py @@ -1,4 +1,4 @@ -"""``ParquetDataset`` is a data set used to load and save data to parquet files using Dask +"""``ParquetDataset`` is a dataset used to load and save data to parquet files using Dask dataframe""" from __future__ import annotations diff --git a/kedro-datasets/kedro_datasets/databricks/managed_table_dataset.py b/kedro-datasets/kedro_datasets/databricks/managed_table_dataset.py index 5783cebcc..ecca89f80 100644 --- a/kedro-datasets/kedro_datasets/databricks/managed_table_dataset.py +++ b/kedro-datasets/kedro_datasets/databricks/managed_table_dataset.py @@ -242,7 +242,7 @@ def __init__( # noqa: PLR0913 database: the name of the database. (also referred to as schema). Defaults to "default". write_mode: the mode to write the data into the table. If not - present, the data set is read-only. + present, the dataset is read-only. Options are:["overwrite", "append", "upsert"]. "upsert" mode requires primary_key field to be populated. Defaults to None. diff --git a/kedro-datasets/kedro_datasets/pandas/feather_dataset.py b/kedro-datasets/kedro_datasets/pandas/feather_dataset.py index 4c1f68fa8..56e3eab25 100644 --- a/kedro-datasets/kedro_datasets/pandas/feather_dataset.py +++ b/kedro-datasets/kedro_datasets/pandas/feather_dataset.py @@ -1,4 +1,4 @@ -"""``FeatherDataset`` is a data set used to load and save data to feather files +"""``FeatherDataset`` is a dataset used to load and save data to feather files using an underlying filesystem (e.g.: local, S3, GCS). The underlying functionality is supported by pandas, so it supports all operations the pandas supports. """ diff --git a/kedro-datasets/kedro_datasets/pandas/sql_dataset.py b/kedro-datasets/kedro_datasets/pandas/sql_dataset.py index 12f71e790..e34f3a257 100644 --- a/kedro-datasets/kedro_datasets/pandas/sql_dataset.py +++ b/kedro-datasets/kedro_datasets/pandas/sql_dataset.py @@ -309,7 +309,7 @@ class SQLQueryDataset(AbstractDataset[None, pd.DataFrame]): by SQLAlchemy can be found here: https://docs.sqlalchemy.org/core/engines.html#database-urls - It does not support save method so it is a read only data set. + It does not support save method so it is a read only dataset. To save data to a SQL server use ``SQLTableDataset``. Example usage for the diff --git a/kedro-datasets/kedro_datasets/spark/spark_hive_dataset.py b/kedro-datasets/kedro_datasets/spark/spark_hive_dataset.py index 5886dd7ce..8908c0fac 100644 --- a/kedro-datasets/kedro_datasets/spark/spark_hive_dataset.py +++ b/kedro-datasets/kedro_datasets/spark/spark_hive_dataset.py @@ -16,7 +16,7 @@ class SparkHiveDataset(AbstractDataset[DataFrame, DataFrame]): """``SparkHiveDataset`` loads and saves Spark dataframes stored on Hive. - This data set also handles some incompatible file types such as using partitioned parquet on + This dataset also handles some incompatible file types such as using partitioned parquet on hive which will not normally allow upserts to existing data without a complete replacement of the existing file/partition. diff --git a/kedro-datasets/kedro_datasets_experimental/langchain/_anthropic.py b/kedro-datasets/kedro_datasets_experimental/langchain/_anthropic.py index ea082dfd9..d643706e6 100644 --- a/kedro-datasets/kedro_datasets_experimental/langchain/_anthropic.py +++ b/kedro-datasets/kedro_datasets_experimental/langchain/_anthropic.py @@ -68,7 +68,7 @@ def _describe(self) -> dict[str, Any]: return {**self.kwargs} def save(self, data: None) -> NoReturn: - raise DatasetError(f"{self.__class__.__name__} is a read only data set type") + raise DatasetError(f"{self.__class__.__name__} is a read only dataset type") def load(self) -> ChatAnthropic: return ChatAnthropic( diff --git a/kedro-datasets/kedro_datasets_experimental/langchain/_cohere.py b/kedro-datasets/kedro_datasets_experimental/langchain/_cohere.py index 38c33e48b..5b4e0eba7 100644 --- a/kedro-datasets/kedro_datasets_experimental/langchain/_cohere.py +++ b/kedro-datasets/kedro_datasets_experimental/langchain/_cohere.py @@ -70,7 +70,7 @@ def _describe(self) -> dict[str, Any]: return {**self.kwargs} def save(self, data: None) -> NoReturn: - raise DatasetError(f"{self.__class__.__name__} is a read only data set type") + raise DatasetError(f"{self.__class__.__name__} is a read only dataset type") def load(self) -> ChatCohere: return ChatCohere(cohere_api_key=self.cohere_api_key, base_url=self.cohere_api_url, **self.kwargs) diff --git a/kedro-datasets/kedro_datasets_experimental/langchain/_openai.py b/kedro-datasets/kedro_datasets_experimental/langchain/_openai.py index 952ae7eb6..2c1ad002d 100644 --- a/kedro-datasets/kedro_datasets_experimental/langchain/_openai.py +++ b/kedro-datasets/kedro_datasets_experimental/langchain/_openai.py @@ -32,7 +32,7 @@ def _describe(self) -> dict[str, Any]: return {**self.kwargs} def save(self, data: None) -> NoReturn: - raise DatasetError(f"{self.__class__.__name__} is a read only data set type") + raise DatasetError(f"{self.__class__.__name__} is a read only dataset type") def load(self) -> OPENAI_TYPE: return self.constructor( diff --git a/kedro-datasets/kedro_datasets_experimental/tests/netcdf/test_netcdf_dataset.py b/kedro-datasets/kedro_datasets_experimental/tests/netcdf/test_netcdf_dataset.py index 1526f89a5..1e657e1d4 100644 --- a/kedro-datasets/kedro_datasets_experimental/tests/netcdf/test_netcdf_dataset.py +++ b/kedro-datasets/kedro_datasets_experimental/tests/netcdf/test_netcdf_dataset.py @@ -153,7 +153,7 @@ def test_empty_credentials_load(self, bad_credentials, tmp_path): netcdf_dataset = NetCDFDataset( filepath=S3_PATH, temppath=tmp_path, credentials=bad_credentials ) - pattern = r"Failed while loading data from data set NetCDFDataset\(.+\)" + pattern = r"Failed while loading data from dataset NetCDFDataset\(.+\)" with pytest.raises(DatasetError, match=pattern): netcdf_dataset.load() @@ -165,7 +165,7 @@ def test_pass_credentials(self, mocker, tmp_path): s3_dataset = NetCDFDataset( filepath=S3_PATH, temppath=tmp_path, credentials=AWS_CREDENTIALS ) - pattern = r"Failed while loading data from data set NetCDFDataset\(.+\)" + pattern = r"Failed while loading data from dataset NetCDFDataset\(.+\)" with pytest.raises(DatasetError, match=pattern): s3_dataset.load() diff --git a/kedro-datasets/kedro_datasets_experimental/tests/prophet/test_prophet_dataset.py b/kedro-datasets/kedro_datasets_experimental/tests/prophet/test_prophet_dataset.py index 88510a99b..668ae544c 100644 --- a/kedro-datasets/kedro_datasets_experimental/tests/prophet/test_prophet_dataset.py +++ b/kedro-datasets/kedro_datasets_experimental/tests/prophet/test_prophet_dataset.py @@ -75,7 +75,7 @@ def test_open_extra_args(self, prophet_model_dataset, fs_args): def test_load_missing_file(self, prophet_model_dataset): """Check the error when trying to load missing file.""" - pattern = r"Failed while loading data from data set ProphetModelDataset\(.*\)" + pattern = r"Failed while loading data from dataset ProphetModelDataset\(.*\)" with pytest.raises(DatasetError, match=pattern): prophet_model_dataset.load() diff --git a/kedro-datasets/kedro_datasets_experimental/tests/rioxarray/test_geotiff_dataset.py b/kedro-datasets/kedro_datasets_experimental/tests/rioxarray/test_geotiff_dataset.py index 7f217eee6..51dcc8596 100644 --- a/kedro-datasets/kedro_datasets_experimental/tests/rioxarray/test_geotiff_dataset.py +++ b/kedro-datasets/kedro_datasets_experimental/tests/rioxarray/test_geotiff_dataset.py @@ -65,7 +65,7 @@ def cog_geotiff_dataset(cog_file_path, save_args) -> GeoTIFFDataset: def test_load_cog_geotiff(cog_geotiff_dataset): - """Test loading cloud optimised geotiff reloading the data set.""" + """Test loading cloud optimised geotiff reloading the dataset.""" loaded_xr = cog_geotiff_dataset.load() assert isinstance(loaded_xr.rio.crs, CRS) assert isinstance(loaded_xr, xr.DataArray) @@ -144,7 +144,7 @@ def test_load_not_tif(): def test_exists(tmp_path, synthetic_xarray): """Test `exists` method invocation for both existing and - nonexistent data set.""" + nonexistent dataset.""" dataset = GeoTIFFDataset(filepath=str(tmp_path / "tmp.tif")) assert not dataset.exists() dataset.save(synthetic_xarray) @@ -155,7 +155,7 @@ def test_exists(tmp_path, synthetic_xarray): "synthetic_xarray", ]) def test_save_and_load_geotiff(tmp_path, request, xarray_fixture): - """Test saving and reloading the data set.""" + """Test saving and reloading the dataset.""" xarray_data = request.getfixturevalue(xarray_fixture) dataset = GeoTIFFDataset(filepath=str(tmp_path / "tmp.tif")) dataset.save(xarray_data) @@ -176,6 +176,6 @@ def test_load_missing_file(tmp_path): """Check the error when trying to load missing file.""" dataset = GeoTIFFDataset(filepath=str(tmp_path / "tmp.tif")) assert not dataset._exists(), "File unexpectedly exists" - pattern = r"Failed while loading data from data set GeoTIFFDataset\(.*\)" + pattern = r"Failed while loading data from dataset GeoTIFFDataset\(.*\)" with pytest.raises(DatasetError, match=pattern): dataset.load() diff --git a/kedro-datasets/tests/biosequence/test_biosequence_dataset.py b/kedro-datasets/tests/biosequence/test_biosequence_dataset.py index 7566a559f..b5c35bd8f 100644 --- a/kedro-datasets/tests/biosequence/test_biosequence_dataset.py +++ b/kedro-datasets/tests/biosequence/test_biosequence_dataset.py @@ -38,7 +38,7 @@ def dummy_data(): class TestBioSequenceDataset: def test_save_and_load(self, biosequence_dataset, dummy_data): - """Test saving and reloading the data set.""" + """Test saving and reloading the dataset.""" biosequence_dataset.save(dummy_data) reloaded = biosequence_dataset.load() assert dummy_data[0].id, reloaded[0].id @@ -49,7 +49,7 @@ def test_save_and_load(self, biosequence_dataset, dummy_data): def test_exists(self, biosequence_dataset, dummy_data): """Test `exists` method invocation for both existing and - nonexistent data set.""" + nonexistent dataset.""" assert not biosequence_dataset.exists() biosequence_dataset.save(dummy_data) assert biosequence_dataset.exists() @@ -75,7 +75,7 @@ def test_open_extra_args(self, biosequence_dataset, fs_args): def test_load_missing_file(self, biosequence_dataset): """Check the error when trying to load missing file.""" - pattern = r"Failed while loading data from data set BioSequenceDataset\(.*\)" + pattern = r"Failed while loading data from dataset BioSequenceDataset\(.*\)" with pytest.raises(DatasetError, match=pattern): biosequence_dataset.load() diff --git a/kedro-datasets/tests/dask/test_csv_dataset.py b/kedro-datasets/tests/dask/test_csv_dataset.py index 898606ad3..f8fe8773b 100644 --- a/kedro-datasets/tests/dask/test_csv_dataset.py +++ b/kedro-datasets/tests/dask/test_csv_dataset.py @@ -84,7 +84,7 @@ def test_incorrect_credentials_load(self): @pytest.mark.parametrize("bad_credentials", [{"key": None, "secret": None}]) def test_empty_credentials_load(self, bad_credentials): csv_dataset = CSVDataset(filepath=S3_PATH, credentials=bad_credentials) - pattern = r"Failed while loading data from data set CSVDataset\(.+\)" + pattern = r"Failed while loading data from dataset CSVDataset\(.+\)" with pytest.raises(DatasetError, match=pattern): csv_dataset.load().compute() @@ -94,7 +94,7 @@ def test_pass_credentials(self, mocker): client instantiation on creating S3 connection.""" client_mock = mocker.patch("botocore.session.Session.create_client") s3_dataset = CSVDataset(filepath=S3_PATH, credentials=AWS_CREDENTIALS) - pattern = r"Failed while loading data from data set CSVDataset\(.+\)" + pattern = r"Failed while loading data from dataset CSVDataset\(.+\)" with pytest.raises(DatasetError, match=pattern): s3_dataset.load().compute() @@ -121,7 +121,7 @@ def test_load_data(self, s3_dataset, dummy_dd_dataframe, mocked_s3_object): def test_exists(self, s3_dataset, dummy_dd_dataframe, mocked_s3_bucket): """Test `exists` method invocation for both existing and - nonexistent data set.""" + nonexistent dataset.""" assert not s3_dataset.exists() s3_dataset.save(dummy_dd_dataframe) assert s3_dataset.exists() diff --git a/kedro-datasets/tests/dask/test_parquet_dataset.py b/kedro-datasets/tests/dask/test_parquet_dataset.py index 5babced77..72d348e6b 100644 --- a/kedro-datasets/tests/dask/test_parquet_dataset.py +++ b/kedro-datasets/tests/dask/test_parquet_dataset.py @@ -87,7 +87,7 @@ def test_incorrect_credentials_load(self): @pytest.mark.parametrize("bad_credentials", [{"key": None, "secret": None}]) def test_empty_credentials_load(self, bad_credentials): parquet_dataset = ParquetDataset(filepath=S3_PATH, credentials=bad_credentials) - pattern = r"Failed while loading data from data set ParquetDataset\(.+\)" + pattern = r"Failed while loading data from dataset ParquetDataset\(.+\)" with pytest.raises(DatasetError, match=pattern): parquet_dataset.load().compute() @@ -97,7 +97,7 @@ def test_pass_credentials(self, mocker): client instantiation on creating S3 connection.""" client_mock = mocker.patch("botocore.session.Session.create_client") s3_dataset = ParquetDataset(filepath=S3_PATH, credentials=AWS_CREDENTIALS) - pattern = r"Failed while loading data from data set ParquetDataset\(.+\)" + pattern = r"Failed while loading data from dataset ParquetDataset\(.+\)" with pytest.raises(DatasetError, match=pattern): s3_dataset.load().compute() @@ -124,7 +124,7 @@ def test_load_data(self, s3_dataset, dummy_dd_dataframe, mocked_s3_object): def test_exists(self, s3_dataset, dummy_dd_dataframe, mocked_s3_bucket): """Test `exists` method invocation for both existing and - nonexistent data set.""" + nonexistent dataset.""" assert not s3_dataset.exists() s3_dataset.save(dummy_dd_dataframe) assert s3_dataset.exists() diff --git a/kedro-datasets/tests/email/test_message_dataset.py b/kedro-datasets/tests/email/test_message_dataset.py index d35fa5f8d..f8cd6ec67 100644 --- a/kedro-datasets/tests/email/test_message_dataset.py +++ b/kedro-datasets/tests/email/test_message_dataset.py @@ -50,7 +50,7 @@ def dummy_msg(): class TestEmailMessageDataset: def test_save_and_load(self, message_dataset, dummy_msg): - """Test saving and reloading the data set.""" + """Test saving and reloading the dataset.""" message_dataset.save(dummy_msg) reloaded = message_dataset.load() assert dummy_msg.__dict__ == reloaded.__dict__ @@ -59,7 +59,7 @@ def test_save_and_load(self, message_dataset, dummy_msg): def test_exists(self, message_dataset, dummy_msg): """Test `exists` method invocation for both existing and - nonexistent data set.""" + nonexistent dataset.""" assert not message_dataset.exists() message_dataset.save(dummy_msg) assert message_dataset.exists() @@ -91,7 +91,7 @@ def test_open_extra_args(self, message_dataset, fs_args): def test_load_missing_file(self, message_dataset): """Check the error when trying to load missing file.""" - pattern = r"Failed while loading data from data set EmailMessageDataset\(.*\)" + pattern = r"Failed while loading data from dataset EmailMessageDataset\(.*\)" with pytest.raises(DatasetError, match=pattern): message_dataset.load() @@ -149,7 +149,7 @@ def test_version_str_repr(self, load_version, save_version): def test_save_and_load(self, versioned_message_dataset, dummy_msg): """Test that saved and reloaded data matches the original one for - the versioned data set.""" + the versioned dataset.""" versioned_message_dataset.save(dummy_msg) reloaded = versioned_message_dataset.load() assert dummy_msg.__dict__ == reloaded.__dict__ @@ -161,13 +161,13 @@ def test_no_versions(self, versioned_message_dataset): versioned_message_dataset.load() def test_exists(self, versioned_message_dataset, dummy_msg): - """Test `exists` method invocation for versioned data set.""" + """Test `exists` method invocation for versioned dataset.""" assert not versioned_message_dataset.exists() versioned_message_dataset.save(dummy_msg) assert versioned_message_dataset.exists() def test_prevent_overwrite(self, versioned_message_dataset, dummy_msg): - """Check the error when attempting to override the data set if the + """Check the error when attempting to override the dataset if the corresponding text file for a given save version already exists.""" versioned_message_dataset.save(dummy_msg) pattern = ( diff --git a/kedro-datasets/tests/geopandas/test_geojson_dataset.py b/kedro-datasets/tests/geopandas/test_geojson_dataset.py index d2779e5c2..9c6cb49fe 100644 --- a/kedro-datasets/tests/geopandas/test_geojson_dataset.py +++ b/kedro-datasets/tests/geopandas/test_geojson_dataset.py @@ -72,7 +72,7 @@ def test_save_and_load(self, geojson_dataset, dummy_dataframe): @pytest.mark.parametrize("geojson_dataset", [{"index": False}], indirect=True) def test_load_missing_file(self, geojson_dataset): """Check the error while trying to load from missing source.""" - pattern = r"Failed while loading data from data set GeoJSONDataset" + pattern = r"Failed while loading data from dataset GeoJSONDataset" with pytest.raises(DatasetError, match=pattern): geojson_dataset.load() @@ -156,7 +156,7 @@ def test_version_str_repr(self, load_version, save_version): def test_save_and_load(self, versioned_geojson_dataset, dummy_dataframe): """Test that saved and reloaded data matches the original one for - the versioned data set.""" + the versioned dataset.""" versioned_geojson_dataset.save(dummy_dataframe) reloaded_df = versioned_geojson_dataset.load() assert_frame_equal(reloaded_df, dummy_dataframe) @@ -168,13 +168,13 @@ def test_no_versions(self, versioned_geojson_dataset): versioned_geojson_dataset.load() def test_exists(self, versioned_geojson_dataset, dummy_dataframe): - """Test `exists` method invocation for versioned data set.""" + """Test `exists` method invocation for versioned dataset.""" assert not versioned_geojson_dataset.exists() versioned_geojson_dataset.save(dummy_dataframe) assert versioned_geojson_dataset.exists() def test_prevent_override(self, versioned_geojson_dataset, dummy_dataframe): - """Check the error when attempt to override the same data set + """Check the error when attempt to override the same dataset version.""" versioned_geojson_dataset.save(dummy_dataframe) pattern = ( diff --git a/kedro-datasets/tests/holoviews/test_holoviews_writer.py b/kedro-datasets/tests/holoviews/test_holoviews_writer.py index 1426b2e28..94d722d1f 100644 --- a/kedro-datasets/tests/holoviews/test_holoviews_writer.py +++ b/kedro-datasets/tests/holoviews/test_holoviews_writer.py @@ -140,7 +140,7 @@ def test_version_str_repr(self, hv_writer, versioned_hv_writer): assert "save_args" in str(versioned_hv_writer) def test_prevent_overwrite(self, dummy_hv_object, versioned_hv_writer): - """Check the error when attempting to override the data set if the + """Check the error when attempting to override the dataset if the corresponding file for a given save version already exists.""" versioned_hv_writer.save(dummy_hv_object) pattern = ( @@ -185,7 +185,7 @@ def test_load_not_supported(self, versioned_hv_writer): versioned_hv_writer.load() def test_exists(self, versioned_hv_writer, dummy_hv_object): - """Test `exists` method invocation for versioned data set.""" + """Test `exists` method invocation for versioned dataset.""" assert not versioned_hv_writer.exists() versioned_hv_writer.save(dummy_hv_object) assert versioned_hv_writer.exists() diff --git a/kedro-datasets/tests/ibis/test_table_dataset.py b/kedro-datasets/tests/ibis/test_table_dataset.py index b7ee7baca..644bbc127 100644 --- a/kedro-datasets/tests/ibis/test_table_dataset.py +++ b/kedro-datasets/tests/ibis/test_table_dataset.py @@ -52,7 +52,7 @@ def dummy_table(table_dataset_from_csv): class TestTableDataset: def test_save_and_load(self, table_dataset, dummy_table, database): - """Test saving and reloading the data set.""" + """Test saving and reloading the dataset.""" table_dataset.save(dummy_table) reloaded = table_dataset.load() assert_frame_equal(dummy_table.execute(), reloaded.execute()) @@ -64,7 +64,7 @@ def test_save_and_load(self, table_dataset, dummy_table, database): def test_exists(self, table_dataset, dummy_table): """Test `exists` method invocation for both existing and - nonexistent data set.""" + nonexistent dataset.""" assert not table_dataset.exists() table_dataset.save(dummy_table) assert table_dataset.exists() diff --git a/kedro-datasets/tests/json/test_json_dataset.py b/kedro-datasets/tests/json/test_json_dataset.py index 52075266d..beaafc343 100644 --- a/kedro-datasets/tests/json/test_json_dataset.py +++ b/kedro-datasets/tests/json/test_json_dataset.py @@ -36,7 +36,7 @@ def dummy_data(): class TestJSONDataset: def test_save_and_load(self, json_dataset, dummy_data): - """Test saving and reloading the data set.""" + """Test saving and reloading the dataset.""" json_dataset.save(dummy_data) reloaded = json_dataset.load() assert dummy_data == reloaded @@ -45,7 +45,7 @@ def test_save_and_load(self, json_dataset, dummy_data): def test_exists(self, json_dataset, dummy_data): """Test `exists` method invocation for both existing and - nonexistent data set.""" + nonexistent dataset.""" assert not json_dataset.exists() json_dataset.save(dummy_data) assert json_dataset.exists() @@ -69,7 +69,7 @@ def test_open_extra_args(self, json_dataset, fs_args): def test_load_missing_file(self, json_dataset): """Check the error when trying to load missing file.""" - pattern = r"Failed while loading data from data set JSONDataset\(.*\)" + pattern = r"Failed while loading data from dataset JSONDataset\(.*\)" with pytest.raises(DatasetError, match=pattern): json_dataset.load() @@ -125,7 +125,7 @@ def test_version_str_repr(self, load_version, save_version): def test_save_and_load(self, versioned_json_dataset, dummy_data): """Test that saved and reloaded data matches the original one for - the versioned data set.""" + the versioned dataset.""" versioned_json_dataset.save(dummy_data) reloaded = versioned_json_dataset.load() assert dummy_data == reloaded @@ -137,13 +137,13 @@ def test_no_versions(self, versioned_json_dataset): versioned_json_dataset.load() def test_exists(self, versioned_json_dataset, dummy_data): - """Test `exists` method invocation for versioned data set.""" + """Test `exists` method invocation for versioned dataset.""" assert not versioned_json_dataset.exists() versioned_json_dataset.save(dummy_data) assert versioned_json_dataset.exists() def test_prevent_overwrite(self, versioned_json_dataset, dummy_data): - """Check the error when attempting to override the data set if the + """Check the error when attempting to override the dataset if the corresponding json file for a given save version already exists.""" versioned_json_dataset.save(dummy_data) pattern = ( diff --git a/kedro-datasets/tests/matlab/test_matlab_dataset.py b/kedro-datasets/tests/matlab/test_matlab_dataset.py index 331702db9..284a0892f 100644 --- a/kedro-datasets/tests/matlab/test_matlab_dataset.py +++ b/kedro-datasets/tests/matlab/test_matlab_dataset.py @@ -36,7 +36,7 @@ def dummy_data(): class TestMatlabDataset: def test_save_and_load(self, matlab_dataset, dummy_data): - """Test saving and reloading the data set.""" + """Test saving and reloading the dataset.""" matlab_dataset.save(dummy_data) reloaded = matlab_dataset.load() assert (dummy_data == reloaded["data"]).all() @@ -45,7 +45,7 @@ def test_save_and_load(self, matlab_dataset, dummy_data): def test_exists(self, matlab_dataset, dummy_data): """Test `exists` method invocation for both existing and - nonexistent data set.""" + nonexistent dataset.""" assert not matlab_dataset.exists() matlab_dataset.save(dummy_data) assert matlab_dataset.exists() @@ -69,7 +69,7 @@ def test_open_extra_args(self, matlab_dataset, fs_args): def test_load_missing_file(self, matlab_dataset): """Check the error when trying to load missing file.""" - pattern = r"Failed while loading data from data set MatlabDataset\(.*\)" + pattern = r"Failed while loading data from dataset MatlabDataset\(.*\)" with pytest.raises(DatasetError, match=pattern): matlab_dataset.load() @@ -125,7 +125,7 @@ def test_version_str_repr(self, load_version, save_version): def test_save_and_load(self, versioned_matlab_dataset, dummy_data): """Test that saved and reloaded data matches the original one for - the versioned data set.""" + the versioned dataset.""" versioned_matlab_dataset.save(dummy_data) reloaded = versioned_matlab_dataset.load() assert (dummy_data == reloaded["data"]).all() @@ -137,13 +137,13 @@ def test_no_versions(self, versioned_matlab_dataset): versioned_matlab_dataset.load() def test_exists(self, versioned_matlab_dataset, dummy_data): - """Test `exists` method invocation for versioned data set.""" + """Test `exists` method invocation for versioned dataset.""" assert not versioned_matlab_dataset.exists() versioned_matlab_dataset.save(dummy_data) assert versioned_matlab_dataset.exists() def test_prevent_overwrite(self, versioned_matlab_dataset, dummy_data): - """Check the error when attempting to override the data set if the + """Check the error when attempting to override the dataset if the corresponding json file for a given save version already exists.""" versioned_matlab_dataset.save(dummy_data) pattern = ( diff --git a/kedro-datasets/tests/matplotlib/test_matplotlib_writer.py b/kedro-datasets/tests/matplotlib/test_matplotlib_writer.py index 8b58ed9fc..4cdb58e92 100644 --- a/kedro-datasets/tests/matplotlib/test_matplotlib_writer.py +++ b/kedro-datasets/tests/matplotlib/test_matplotlib_writer.py @@ -282,7 +282,7 @@ def test_version_str_repr(self, load_version, save_version): assert ver_str in str(chart_versioned) def test_prevent_overwrite(self, mock_single_plot, versioned_plot_writer): - """Check the error when attempting to override the data set if the + """Check the error when attempting to override the dataset if the corresponding matplotlib file for a given save version already exists.""" versioned_plot_writer.save(mock_single_plot) pattern = ( @@ -341,13 +341,13 @@ def test_load_not_supported(self, versioned_plot_writer): versioned_plot_writer.load() def test_exists(self, versioned_plot_writer, mock_single_plot): - """Test `exists` method invocation for versioned data set.""" + """Test `exists` method invocation for versioned dataset.""" assert not versioned_plot_writer.exists() versioned_plot_writer.save(mock_single_plot) assert versioned_plot_writer.exists() def test_exists_multiple(self, versioned_plot_writer, mock_list_plot): - """Test `exists` method invocation for versioned data set.""" + """Test `exists` method invocation for versioned dataset.""" assert not versioned_plot_writer.exists() versioned_plot_writer.save(mock_list_plot) assert versioned_plot_writer.exists() diff --git a/kedro-datasets/tests/networkx/test_gml_dataset.py b/kedro-datasets/tests/networkx/test_gml_dataset.py index 6d2d3cea7..8caf55654 100644 --- a/kedro-datasets/tests/networkx/test_gml_dataset.py +++ b/kedro-datasets/tests/networkx/test_gml_dataset.py @@ -51,7 +51,7 @@ def dummy_graph_data(): class TestGMLDataset: def test_save_and_load(self, gml_dataset, dummy_graph_data): - """Test saving and reloading the data set.""" + """Test saving and reloading the dataset.""" gml_dataset.save(dummy_graph_data) reloaded = gml_dataset.load() assert dummy_graph_data.nodes(data=True) == reloaded.nodes(data=True) @@ -60,7 +60,7 @@ def test_save_and_load(self, gml_dataset, dummy_graph_data): def test_load_missing_file(self, gml_dataset): """Check the error when trying to load missing file.""" - pattern = r"Failed while loading data from data set GMLDataset\(.*\)" + pattern = r"Failed while loading data from dataset GMLDataset\(.*\)" with pytest.raises(DatasetError, match=pattern): assert gml_dataset.load() @@ -100,7 +100,7 @@ def test_catalog_release(self, mocker): class TestGMLDatasetVersioned: def test_save_and_load(self, versioned_gml_dataset, dummy_graph_data): """Test that saved and reloaded data matches the original one for - the versioned data set.""" + the versioned dataset.""" versioned_gml_dataset.save(dummy_graph_data) reloaded = versioned_gml_dataset.load() assert dummy_graph_data.nodes(data=True) == reloaded.nodes(data=True) @@ -114,13 +114,13 @@ def test_no_versions(self, versioned_gml_dataset): versioned_gml_dataset.load() def test_exists(self, versioned_gml_dataset, dummy_graph_data): - """Test `exists` method invocation for versioned data set.""" + """Test `exists` method invocation for versioned dataset.""" assert not versioned_gml_dataset.exists() versioned_gml_dataset.save(dummy_graph_data) assert versioned_gml_dataset.exists() def test_prevent_override(self, versioned_gml_dataset, dummy_graph_data): - """Check the error when attempt to override the same data set + """Check the error when attempt to override the same dataset version.""" versioned_gml_dataset.save(dummy_graph_data) pattern = ( diff --git a/kedro-datasets/tests/networkx/test_graphml_dataset.py b/kedro-datasets/tests/networkx/test_graphml_dataset.py index acffd14b0..659c3d55b 100644 --- a/kedro-datasets/tests/networkx/test_graphml_dataset.py +++ b/kedro-datasets/tests/networkx/test_graphml_dataset.py @@ -51,7 +51,7 @@ def dummy_graph_data(): class TestGraphMLDataset: def test_save_and_load(self, graphml_dataset, dummy_graph_data): - """Test saving and reloading the data set.""" + """Test saving and reloading the dataset.""" graphml_dataset.save(dummy_graph_data) reloaded = graphml_dataset.load() assert dummy_graph_data.nodes(data=True) == reloaded.nodes(data=True) @@ -60,7 +60,7 @@ def test_save_and_load(self, graphml_dataset, dummy_graph_data): def test_load_missing_file(self, graphml_dataset): """Check the error when trying to load missing file.""" - pattern = r"Failed while loading data from data set GraphMLDataset\(.*\)" + pattern = r"Failed while loading data from dataset GraphMLDataset\(.*\)" with pytest.raises(DatasetError, match=pattern): assert graphml_dataset.load() @@ -100,7 +100,7 @@ def test_catalog_release(self, mocker): class TestGraphMLDatasetVersioned: def test_save_and_load(self, versioned_graphml_dataset, dummy_graph_data): """Test that saved and reloaded data matches the original one for - the versioned data set.""" + the versioned dataset.""" versioned_graphml_dataset.save(dummy_graph_data) reloaded = versioned_graphml_dataset.load() assert dummy_graph_data.nodes(data=True) == reloaded.nodes(data=True) @@ -114,13 +114,13 @@ def test_no_versions(self, versioned_graphml_dataset): versioned_graphml_dataset.load() def test_exists(self, versioned_graphml_dataset, dummy_graph_data): - """Test `exists` method invocation for versioned data set.""" + """Test `exists` method invocation for versioned dataset.""" assert not versioned_graphml_dataset.exists() versioned_graphml_dataset.save(dummy_graph_data) assert versioned_graphml_dataset.exists() def test_prevent_override(self, versioned_graphml_dataset, dummy_graph_data): - """Check the error when attempt to override the same data set + """Check the error when attempt to override the same dataset version.""" versioned_graphml_dataset.save(dummy_graph_data) pattern = ( diff --git a/kedro-datasets/tests/networkx/test_json_dataset.py b/kedro-datasets/tests/networkx/test_json_dataset.py index 53039c0c9..d61043126 100644 --- a/kedro-datasets/tests/networkx/test_json_dataset.py +++ b/kedro-datasets/tests/networkx/test_json_dataset.py @@ -51,7 +51,7 @@ def dummy_graph_data(): class TestJSONDataset: def test_save_and_load(self, json_dataset, dummy_graph_data): - """Test saving and reloading the data set.""" + """Test saving and reloading the dataset.""" json_dataset.save(dummy_graph_data) reloaded = json_dataset.load() assert dummy_graph_data.nodes(data=True) == reloaded.nodes(data=True) @@ -60,7 +60,7 @@ def test_save_and_load(self, json_dataset, dummy_graph_data): def test_load_missing_file(self, json_dataset): """Check the error when trying to load missing file.""" - pattern = r"Failed while loading data from data set JSONDataset\(.*\)" + pattern = r"Failed while loading data from dataset JSONDataset\(.*\)" with pytest.raises(DatasetError, match=pattern): assert json_dataset.load() @@ -140,7 +140,7 @@ def test_catalog_release(self, mocker): class TestJSONDatasetVersioned: def test_save_and_load(self, versioned_json_dataset, dummy_graph_data): """Test that saved and reloaded data matches the original one for - the versioned data set.""" + the versioned dataset.""" versioned_json_dataset.save(dummy_graph_data) reloaded = versioned_json_dataset.load() assert dummy_graph_data.nodes(data=True) == reloaded.nodes(data=True) @@ -152,13 +152,13 @@ def test_no_versions(self, versioned_json_dataset): versioned_json_dataset.load() def test_exists(self, versioned_json_dataset, dummy_graph_data): - """Test `exists` method invocation for versioned data set.""" + """Test `exists` method invocation for versioned dataset.""" assert not versioned_json_dataset.exists() versioned_json_dataset.save(dummy_graph_data) assert versioned_json_dataset.exists() def test_prevent_override(self, versioned_json_dataset, dummy_graph_data): - """Check the error when attempt to override the same data set + """Check the error when attempt to override the same dataset version.""" versioned_json_dataset.save(dummy_graph_data) pattern = ( diff --git a/kedro-datasets/tests/pandas/test_csv_dataset.py b/kedro-datasets/tests/pandas/test_csv_dataset.py index 6a5c52464..449de4cfd 100644 --- a/kedro-datasets/tests/pandas/test_csv_dataset.py +++ b/kedro-datasets/tests/pandas/test_csv_dataset.py @@ -87,14 +87,14 @@ def mocked_csv_in_s3(mocked_s3_bucket, mocked_dataframe): class TestCSVDataset: def test_save_and_load(self, csv_dataset, dummy_dataframe): - """Test saving and reloading the data set.""" + """Test saving and reloading the dataset.""" csv_dataset.save(dummy_dataframe) reloaded = csv_dataset.load() assert_frame_equal(dummy_dataframe, reloaded) def test_exists(self, csv_dataset, dummy_dataframe): """Test `exists` method invocation for both existing and - nonexistent data set.""" + nonexistent dataset.""" assert not csv_dataset.exists() csv_dataset.save(dummy_dataframe) assert csv_dataset.exists() @@ -195,7 +195,7 @@ def test_preview(self, csv_dataset, dummy_dataframe, nrows, expected): def test_load_missing_file(self, csv_dataset): """Check the error when trying to load missing file.""" - pattern = r"Failed while loading data from data set CSVDataset\(.*\)" + pattern = r"Failed while loading data from dataset CSVDataset\(.*\)" with pytest.raises(DatasetError, match=pattern): csv_dataset.load() @@ -258,7 +258,7 @@ def test_version_str_repr(self, load_version, save_version): def test_save_and_load(self, versioned_csv_dataset, dummy_dataframe): """Test that saved and reloaded data matches the original one for - the versioned data set.""" + the versioned dataset.""" versioned_csv_dataset.save(dummy_dataframe) reloaded_df = versioned_csv_dataset.load() assert_frame_equal(dummy_dataframe, reloaded_df) @@ -337,13 +337,13 @@ def test_no_versions(self, versioned_csv_dataset): versioned_csv_dataset.load() def test_exists(self, versioned_csv_dataset, dummy_dataframe): - """Test `exists` method invocation for versioned data set.""" + """Test `exists` method invocation for versioned dataset.""" assert not versioned_csv_dataset.exists() versioned_csv_dataset.save(dummy_dataframe) assert versioned_csv_dataset.exists() def test_prevent_overwrite(self, versioned_csv_dataset, dummy_dataframe): - """Check the error when attempting to override the data set if the + """Check the error when attempting to override the dataset if the corresponding CSV file for a given save version already exists.""" versioned_csv_dataset.save(dummy_dataframe) pattern = ( diff --git a/kedro-datasets/tests/pandas/test_excel_dataset.py b/kedro-datasets/tests/pandas/test_excel_dataset.py index 16f0c8605..ba950d10a 100644 --- a/kedro-datasets/tests/pandas/test_excel_dataset.py +++ b/kedro-datasets/tests/pandas/test_excel_dataset.py @@ -58,7 +58,7 @@ def another_dummy_dataframe(): class TestExcelDataset: def test_save_and_load(self, excel_dataset, dummy_dataframe): - """Test saving and reloading the data set.""" + """Test saving and reloading the dataset.""" excel_dataset.save(dummy_dataframe) reloaded = excel_dataset.load() assert_frame_equal(dummy_dataframe, reloaded) @@ -66,7 +66,7 @@ def test_save_and_load(self, excel_dataset, dummy_dataframe): def test_save_and_load_multiple_sheets( self, excel_multisheet_dataset, dummy_dataframe, another_dummy_dataframe ): - """Test saving and reloading the data set with multiple sheets.""" + """Test saving and reloading the dataset with multiple sheets.""" dummy_multisheet = { "sheet 1": dummy_dataframe, "sheet 2": another_dummy_dataframe, @@ -78,7 +78,7 @@ def test_save_and_load_multiple_sheets( def test_exists(self, excel_dataset, dummy_dataframe): """Test `exists` method invocation for both existing and - nonexistent data set.""" + nonexistent dataset.""" assert not excel_dataset.exists() excel_dataset.save(dummy_dataframe) assert excel_dataset.exists() @@ -169,7 +169,7 @@ def test_preview(self, excel_dataset, dummy_dataframe, nrows, expected): def test_load_missing_file(self, excel_dataset): """Check the error when trying to load missing file.""" - pattern = r"Failed while loading data from data set ExcelDataset\(.*\)" + pattern = r"Failed while loading data from dataset ExcelDataset\(.*\)" with pytest.raises(DatasetError, match=pattern): excel_dataset.load() @@ -238,7 +238,7 @@ def test_version_str_repr(self, load_version, save_version): def test_save_and_load(self, versioned_excel_dataset, dummy_dataframe): """Test that saved and reloaded data matches the original one for - the versioned data set.""" + the versioned dataset.""" versioned_excel_dataset.save(dummy_dataframe) reloaded_df = versioned_excel_dataset.load() assert_frame_equal(dummy_dataframe, reloaded_df) @@ -264,13 +264,13 @@ def test_versioning_not_supported_in_append_mode( ) def test_exists(self, versioned_excel_dataset, dummy_dataframe): - """Test `exists` method invocation for versioned data set.""" + """Test `exists` method invocation for versioned dataset.""" assert not versioned_excel_dataset.exists() versioned_excel_dataset.save(dummy_dataframe) assert versioned_excel_dataset.exists() def test_prevent_overwrite(self, versioned_excel_dataset, dummy_dataframe): - """Check the error when attempting to override the data set if the + """Check the error when attempting to override the dataset if the corresponding Excel file for a given save version already exists.""" versioned_excel_dataset.save(dummy_dataframe) pattern = ( diff --git a/kedro-datasets/tests/pandas/test_feather_dataset.py b/kedro-datasets/tests/pandas/test_feather_dataset.py index 38d1f0e31..5c2ef8190 100644 --- a/kedro-datasets/tests/pandas/test_feather_dataset.py +++ b/kedro-datasets/tests/pandas/test_feather_dataset.py @@ -38,14 +38,14 @@ def dummy_dataframe(): class TestFeatherDataset: def test_save_and_load(self, feather_dataset, dummy_dataframe): - """Test saving and reloading the data set.""" + """Test saving and reloading the dataset.""" feather_dataset.save(dummy_dataframe) reloaded = feather_dataset.load() assert_frame_equal(dummy_dataframe, reloaded) def test_exists(self, feather_dataset, dummy_dataframe): """Test `exists` method invocation for both existing and - nonexistent data set.""" + nonexistent dataset.""" assert not feather_dataset.exists() feather_dataset.save(dummy_dataframe) assert feather_dataset.exists() @@ -92,7 +92,7 @@ def test_storage_options_dropped(self, load_args, save_args, caplog, tmp_path): def test_load_missing_file(self, feather_dataset): """Check the error when trying to load missing file.""" - pattern = r"Failed while loading data from data set FeatherDataset\(.*\)" + pattern = r"Failed while loading data from dataset FeatherDataset\(.*\)" with pytest.raises(DatasetError, match=pattern): feather_dataset.load() @@ -154,7 +154,7 @@ def test_version_str_repr(self, load_version, save_version): def test_save_and_load(self, versioned_feather_dataset, dummy_dataframe): """Test that saved and reloaded data matches the original one for - the versioned data set.""" + the versioned dataset.""" versioned_feather_dataset.save(dummy_dataframe) reloaded_df = versioned_feather_dataset.load() assert_frame_equal(dummy_dataframe, reloaded_df) @@ -166,13 +166,13 @@ def test_no_versions(self, versioned_feather_dataset): versioned_feather_dataset.load() def test_exists(self, versioned_feather_dataset, dummy_dataframe): - """Test `exists` method invocation for versioned data set.""" + """Test `exists` method invocation for versioned dataset.""" assert not versioned_feather_dataset.exists() versioned_feather_dataset.save(dummy_dataframe) assert versioned_feather_dataset.exists() def test_prevent_overwrite(self, versioned_feather_dataset, dummy_dataframe): - """Check the error when attempting to overwrite the data set if the + """Check the error when attempting to overwrite the dataset if the corresponding feather file for a given save version already exists.""" versioned_feather_dataset.save(dummy_dataframe) pattern = ( diff --git a/kedro-datasets/tests/pandas/test_gbq_dataset.py b/kedro-datasets/tests/pandas/test_gbq_dataset.py index 63095b74e..19767f15b 100644 --- a/kedro-datasets/tests/pandas/test_gbq_dataset.py +++ b/kedro-datasets/tests/pandas/test_gbq_dataset.py @@ -94,7 +94,7 @@ def test_save_extra_params(self, gbq_dataset, save_args): def test_load_missing_file(self, gbq_dataset, mocker): """Check the error when trying to load missing table.""" - pattern = r"Failed while loading data from data set GBQTableDataset\(.*\)" + pattern = r"Failed while loading data from dataset GBQTableDataset\(.*\)" mocked_read_gbq = mocker.patch( "kedro_datasets.pandas.gbq_dataset.pd_gbq.read_gbq" ) @@ -121,7 +121,7 @@ def test_invalid_location(self, save_args, load_args): @pytest.mark.parametrize("save_args", [{"option1": "value1"}], indirect=True) @pytest.mark.parametrize("load_args", [{"option2": "value2"}], indirect=True) def test_str_representation(self, gbq_dataset, save_args, load_args): - """Test string representation of the data set instance.""" + """Test string representation of the dataset instance.""" str_repr = str(gbq_dataset) assert "GBQTableDataset" in str_repr assert TABLE_NAME in str_repr @@ -132,7 +132,7 @@ def test_str_representation(self, gbq_dataset, save_args, load_args): assert k in str_repr def test_save_load_data(self, gbq_dataset, dummy_dataframe, mocker): - """Test saving and reloading the data set.""" + """Test saving and reloading the dataset.""" sql = f"select * from {DATASET}.{TABLE_NAME}" table_id = f"{DATASET}.{TABLE_NAME}" mocked_to_gbq = mocker.patch("kedro_datasets.pandas.gbq_dataset.pd_gbq.to_gbq") @@ -161,7 +161,7 @@ def test_save_load_data(self, gbq_dataset, dummy_dataframe, mocker): "load_args", [{"query_or_table": "Select 1"}], indirect=True ) def test_read_gbq_with_query(self, gbq_dataset, dummy_dataframe, mocker, load_args): - """Test loading data set with query in the argument.""" + """Test loading dataset with query in the argument.""" mocked_read_gbq = mocker.patch( "kedro_datasets.pandas.gbq_dataset.pd_gbq.read_gbq" ) @@ -283,13 +283,13 @@ def test_load_query_file(self, mocker, gbq_sql_file_dataset, dummy_dataframe): assert_frame_equal(dummy_dataframe, loaded_data) def test_save_error(self, gbq_sql_dataset, dummy_dataframe): - """Check the error when trying to save to the data set""" + """Check the error when trying to save to the dataset""" pattern = r"'save' is not supported on GBQQueryDataset" with pytest.raises(DatasetError, match=pattern): gbq_sql_dataset.save(dummy_dataframe) def test_str_representation_sql(self, gbq_sql_dataset, sql_file): - """Test the data set instance string representation""" + """Test the dataset instance string representation""" str_repr = str(gbq_sql_dataset) assert ( f"GBQQueryDataset(filepath=None, load_args={{}}, sql={SQL_QUERY})" @@ -298,7 +298,7 @@ def test_str_representation_sql(self, gbq_sql_dataset, sql_file): assert sql_file not in str_repr def test_str_representation_filepath(self, gbq_sql_file_dataset, sql_file): - """Test the data set instance string representation with filepath arg.""" + """Test the dataset instance string representation with filepath arg.""" str_repr = str(gbq_sql_file_dataset) assert ( f"GBQQueryDataset(filepath={str(sql_file)}, load_args={{}}, sql=None)" diff --git a/kedro-datasets/tests/pandas/test_generic_dataset.py b/kedro-datasets/tests/pandas/test_generic_dataset.py index a6436622d..f5b30e21b 100644 --- a/kedro-datasets/tests/pandas/test_generic_dataset.py +++ b/kedro-datasets/tests/pandas/test_generic_dataset.py @@ -170,7 +170,7 @@ def test_version_str_repr(self, filepath_csv, load_version, save_version): def test_save_and_load(self, versioned_csv_dataset, dummy_dataframe): """Test that saved and reloaded data matches the original one for - the versioned data set.""" + the versioned dataset.""" versioned_csv_dataset.save(dummy_dataframe) reloaded_df = versioned_csv_dataset.load() assert_frame_equal(dummy_dataframe, reloaded_df) @@ -271,13 +271,13 @@ def test_no_versions(self, versioned_csv_dataset): versioned_csv_dataset.load() def test_exists(self, versioned_csv_dataset, dummy_dataframe): - """Test `exists` method invocation for versioned data set.""" + """Test `exists` method invocation for versioned dataset.""" assert not versioned_csv_dataset.exists() versioned_csv_dataset.save(dummy_dataframe) assert versioned_csv_dataset.exists() def test_prevent_overwrite(self, versioned_csv_dataset, dummy_dataframe): - """Check the error when attempting to override the data set if the + """Check the error when attempting to override the dataset if the corresponding Generic (csv) file for a given save version already exists.""" versioned_csv_dataset.save(dummy_dataframe) pattern = ( diff --git a/kedro-datasets/tests/pandas/test_hdf_dataset.py b/kedro-datasets/tests/pandas/test_hdf_dataset.py index 997fa6ded..cad2f81ff 100644 --- a/kedro-datasets/tests/pandas/test_hdf_dataset.py +++ b/kedro-datasets/tests/pandas/test_hdf_dataset.py @@ -45,7 +45,7 @@ def dummy_dataframe(): class TestHDFDataset: def test_save_and_load(self, hdf_dataset, dummy_dataframe): - """Test saving and reloading the data set.""" + """Test saving and reloading the dataset.""" hdf_dataset.save(dummy_dataframe) reloaded = hdf_dataset.load() assert_frame_equal(dummy_dataframe, reloaded) @@ -54,7 +54,7 @@ def test_save_and_load(self, hdf_dataset, dummy_dataframe): def test_exists(self, hdf_dataset, dummy_dataframe): """Test `exists` method invocation for both existing and - nonexistent data set.""" + nonexistent dataset.""" assert not hdf_dataset.exists() hdf_dataset.save(dummy_dataframe) assert hdf_dataset.exists() @@ -86,7 +86,7 @@ def test_open_extra_args(self, hdf_dataset, fs_args): def test_load_missing_file(self, hdf_dataset): """Check the error when trying to load missing file.""" - pattern = r"Failed while loading data from data set HDFDataset\(.*\)" + pattern = r"Failed while loading data from dataset HDFDataset\(.*\)" with pytest.raises(DatasetError, match=pattern): hdf_dataset.load() @@ -117,7 +117,7 @@ def test_catalog_release(self, mocker): fs_mock.invalidate_cache.assert_called_once_with(filepath) def test_save_and_load_df_with_categorical_variables(self, hdf_dataset): - """Test saving and reloading the data set with categorical variables.""" + """Test saving and reloading the dataset with categorical variables.""" df = pd.DataFrame( {"A": [1, 2, 3], "B": pd.Series(list("aab")).astype("category")} ) @@ -166,7 +166,7 @@ def test_version_str_repr(self, load_version, save_version): def test_save_and_load(self, versioned_hdf_dataset, dummy_dataframe): """Test that saved and reloaded data matches the original one for - the versioned data set.""" + the versioned dataset.""" versioned_hdf_dataset.save(dummy_dataframe) reloaded_df = versioned_hdf_dataset.load() assert_frame_equal(dummy_dataframe, reloaded_df) @@ -178,13 +178,13 @@ def test_no_versions(self, versioned_hdf_dataset): versioned_hdf_dataset.load() def test_exists(self, versioned_hdf_dataset, dummy_dataframe): - """Test `exists` method invocation for versioned data set.""" + """Test `exists` method invocation for versioned dataset.""" assert not versioned_hdf_dataset.exists() versioned_hdf_dataset.save(dummy_dataframe) assert versioned_hdf_dataset.exists() def test_prevent_overwrite(self, versioned_hdf_dataset, dummy_dataframe): - """Check the error when attempting to override the data set if the + """Check the error when attempting to override the dataset if the corresponding hdf file for a given save version already exists.""" versioned_hdf_dataset.save(dummy_dataframe) pattern = ( diff --git a/kedro-datasets/tests/pandas/test_json_dataset.py b/kedro-datasets/tests/pandas/test_json_dataset.py index 20f0a1e21..1f8ceb0d8 100644 --- a/kedro-datasets/tests/pandas/test_json_dataset.py +++ b/kedro-datasets/tests/pandas/test_json_dataset.py @@ -55,14 +55,14 @@ def json_lines_data(tmp_path): class TestJSONDataset: def test_save_and_load(self, json_dataset, dummy_dataframe): - """Test saving and reloading the data set.""" + """Test saving and reloading the dataset.""" json_dataset.save(dummy_dataframe) reloaded = json_dataset.load() assert_frame_equal(dummy_dataframe, reloaded) def test_exists(self, json_dataset, dummy_dataframe): """Test `exists` method invocation for both existing and - nonexistent data set.""" + nonexistent dataset.""" assert not json_dataset.exists() json_dataset.save(dummy_dataframe) assert json_dataset.exists() @@ -117,7 +117,7 @@ def test_storage_options_dropped(self, load_args, save_args, caplog, tmp_path): def test_load_missing_file(self, json_dataset): """Check the error when trying to load missing file.""" - pattern = r"Failed while loading data from data set JSONDataset\(.*\)" + pattern = r"Failed while loading data from dataset JSONDataset\(.*\)" with pytest.raises(DatasetError, match=pattern): json_dataset.load() @@ -208,7 +208,7 @@ def test_version_str_repr(self, load_version, save_version): def test_save_and_load(self, versioned_json_dataset, dummy_dataframe): """Test that saved and reloaded data matches the original one for - the versioned data set.""" + the versioned dataset.""" versioned_json_dataset.save(dummy_dataframe) reloaded_df = versioned_json_dataset.load() assert_frame_equal(dummy_dataframe, reloaded_df) @@ -220,13 +220,13 @@ def test_no_versions(self, versioned_json_dataset): versioned_json_dataset.load() def test_exists(self, versioned_json_dataset, dummy_dataframe): - """Test `exists` method invocation for versioned data set.""" + """Test `exists` method invocation for versioned dataset.""" assert not versioned_json_dataset.exists() versioned_json_dataset.save(dummy_dataframe) assert versioned_json_dataset.exists() def test_prevent_overwrite(self, versioned_json_dataset, dummy_dataframe): - """Check the error when attempting to override the data set if the + """Check the error when attempting to override the dataset if the corresponding hdf file for a given save version already exists.""" versioned_json_dataset.save(dummy_dataframe) pattern = ( diff --git a/kedro-datasets/tests/pandas/test_parquet_dataset.py b/kedro-datasets/tests/pandas/test_parquet_dataset.py index 74fb65252..7c7e98c98 100644 --- a/kedro-datasets/tests/pandas/test_parquet_dataset.py +++ b/kedro-datasets/tests/pandas/test_parquet_dataset.py @@ -65,7 +65,7 @@ def test_credentials_propagated(self, mocker): mock_fs.assert_called_once_with("file", auto_mkdir=True, **credentials) def test_save_and_load(self, tmp_path, dummy_dataframe): - """Test saving and reloading the data set.""" + """Test saving and reloading the dataset.""" filepath = (tmp_path / FILENAME).as_posix() dataset = ParquetDataset(filepath=filepath) dataset.save(dummy_dataframe) @@ -77,7 +77,7 @@ def test_save_and_load(self, tmp_path, dummy_dataframe): assert len(files) == 1 def test_save_and_load_non_existing_dir(self, tmp_path, dummy_dataframe): - """Test saving and reloading the data set to non-existing directory.""" + """Test saving and reloading the dataset to non-existing directory.""" filepath = (tmp_path / "non-existing" / FILENAME).as_posix() dataset = ParquetDataset(filepath=filepath) dataset.save(dummy_dataframe) @@ -86,7 +86,7 @@ def test_save_and_load_non_existing_dir(self, tmp_path, dummy_dataframe): def test_exists(self, parquet_dataset, dummy_dataframe): """Test `exists` method invocation for both existing and - nonexistent data set.""" + nonexistent dataset.""" assert not parquet_dataset.exists() parquet_dataset.save(dummy_dataframe) assert parquet_dataset.exists() @@ -131,7 +131,7 @@ def test_storage_options_dropped(self, load_args, save_args, caplog, tmp_path): def test_load_missing_file(self, parquet_dataset): """Check the error when trying to load missing file.""" - pattern = r"Failed while loading data from data set ParquetDataset\(.*\)" + pattern = r"Failed while loading data from dataset ParquetDataset\(.*\)" with pytest.raises(DatasetError, match=pattern): parquet_dataset.load() @@ -273,7 +273,7 @@ def test_version_str_repr(self, load_version, save_version): def test_save_and_load(self, versioned_parquet_dataset, dummy_dataframe, mocker): """Test that saved and reloaded data matches the original one for - the versioned data set.""" + the versioned dataset.""" mocker.patch( "pyarrow.fs._ensure_filesystem", return_value=PyFileSystem(FSSpecHandler(versioned_parquet_dataset._fs)), @@ -289,7 +289,7 @@ def test_no_versions(self, versioned_parquet_dataset): versioned_parquet_dataset.load() def test_exists(self, versioned_parquet_dataset, dummy_dataframe, mocker): - """Test `exists` method invocation for versioned data set.""" + """Test `exists` method invocation for versioned dataset.""" assert not versioned_parquet_dataset.exists() mocker.patch( "pyarrow.fs._ensure_filesystem", @@ -301,7 +301,7 @@ def test_exists(self, versioned_parquet_dataset, dummy_dataframe, mocker): def test_prevent_overwrite( self, versioned_parquet_dataset, dummy_dataframe, mocker ): - """Check the error when attempting to override the data set if the + """Check the error when attempting to override the dataset if the corresponding parquet file for a given save version already exists.""" mocker.patch( "pyarrow.fs._ensure_filesystem", diff --git a/kedro-datasets/tests/pandas/test_sql_dataset.py b/kedro-datasets/tests/pandas/test_sql_dataset.py index 0828f3f18..54698a409 100644 --- a/kedro-datasets/tests/pandas/test_sql_dataset.py +++ b/kedro-datasets/tests/pandas/test_sql_dataset.py @@ -138,7 +138,7 @@ def test_unknown_module(self, mocker): ).exists() def test_str_representation_table(self, table_dataset): - """Test the data set instance string representation""" + """Test the dataset instance string representation""" str_repr = str(table_dataset) assert ( "SQLTableDataset(load_args={}, save_args={'index': False}, " @@ -424,13 +424,13 @@ def test_load_unknown_sql(self): SQLQueryDataset(sql=SQL_QUERY, credentials={"con": FAKE_CONN_STR}).load() def test_save_error(self, query_dataset, dummy_dataframe): - """Check the error when trying to save to the data set""" + """Check the error when trying to save to the dataset""" pattern = r"'save' is not supported on SQLQueryDataset" with pytest.raises(DatasetError, match=pattern): query_dataset.save(dummy_dataframe) def test_str_representation_sql(self, query_dataset, sql_file): - """Test the data set instance string representation""" + """Test the dataset instance string representation""" str_repr = str(query_dataset) assert ( "SQLQueryDataset(execution_options={}, filepath=None, " @@ -440,7 +440,7 @@ def test_str_representation_sql(self, query_dataset, sql_file): assert sql_file not in str_repr def test_str_representation_filepath(self, query_file_dataset, sql_file): - """Test the data set instance string representation with filepath arg.""" + """Test the dataset instance string representation with filepath arg.""" str_repr = str(query_file_dataset) assert ( f"SQLQueryDataset(execution_options={{}}, filepath={str(sql_file)}, " diff --git a/kedro-datasets/tests/pandas/test_xml_dataset.py b/kedro-datasets/tests/pandas/test_xml_dataset.py index be57351ca..0ba840da2 100644 --- a/kedro-datasets/tests/pandas/test_xml_dataset.py +++ b/kedro-datasets/tests/pandas/test_xml_dataset.py @@ -42,14 +42,14 @@ def dummy_dataframe(): class TestXMLDataset: def test_save_and_load(self, xml_dataset, dummy_dataframe): - """Test saving and reloading the data set.""" + """Test saving and reloading the dataset.""" xml_dataset.save(dummy_dataframe) reloaded = xml_dataset.load() assert_frame_equal(dummy_dataframe, reloaded) def test_exists(self, xml_dataset, dummy_dataframe): """Test `exists` method invocation for both existing and - nonexistent data set.""" + nonexistent dataset.""" assert not xml_dataset.exists() xml_dataset.save(dummy_dataframe) assert xml_dataset.exists() @@ -94,7 +94,7 @@ def test_storage_options_dropped(self, load_args, save_args, caplog, tmp_path): def test_load_missing_file(self, xml_dataset): """Check the error when trying to load missing file.""" - pattern = r"Failed while loading data from data set XMLDataset\(.*\)" + pattern = r"Failed while loading data from dataset XMLDataset\(.*\)" with pytest.raises(DatasetError, match=pattern): xml_dataset.load() @@ -165,7 +165,7 @@ def test_version_str_repr(self, load_version, save_version): def test_save_and_load(self, versioned_xml_dataset, dummy_dataframe): """Test that saved and reloaded data matches the original one for - the versioned data set.""" + the versioned dataset.""" versioned_xml_dataset.save(dummy_dataframe) reloaded_df = versioned_xml_dataset.load() assert_frame_equal(dummy_dataframe, reloaded_df) @@ -177,13 +177,13 @@ def test_no_versions(self, versioned_xml_dataset): versioned_xml_dataset.load() def test_exists(self, versioned_xml_dataset, dummy_dataframe): - """Test `exists` method invocation for versioned data set.""" + """Test `exists` method invocation for versioned dataset.""" assert not versioned_xml_dataset.exists() versioned_xml_dataset.save(dummy_dataframe) assert versioned_xml_dataset.exists() def test_prevent_overwrite(self, versioned_xml_dataset, dummy_dataframe): - """Check the error when attempting to override the data set if the + """Check the error when attempting to override the dataset if the corresponding hdf file for a given save version already exists.""" versioned_xml_dataset.save(dummy_dataframe) pattern = ( diff --git a/kedro-datasets/tests/partitions/test_partitioned_dataset.py b/kedro-datasets/tests/partitions/test_partitioned_dataset.py index 2d16665cb..f0126887d 100644 --- a/kedro-datasets/tests/partitions/test_partitioned_dataset.py +++ b/kedro-datasets/tests/partitions/test_partitioned_dataset.py @@ -272,7 +272,7 @@ def test_invalid_dataset(self, dataset, local_csvs): loaded_partitions = pds.load() for partition, df_loader in loaded_partitions.items(): - pattern = r"Failed while loading data from data set ParquetDataset(.*)" + pattern = r"Failed while loading data from dataset ParquetDataset(.*)" with pytest.raises(DatasetError, match=pattern) as exc_info: df_loader() error_message = str(exc_info.value) @@ -293,7 +293,7 @@ def test_invalid_dataset(self, dataset, local_csvs): ( FakeDataset, r"Dataset type 'tests\.partitions\.test_partitioned_dataset\.FakeDataset' " - r"is invalid\: all data set types must extend 'AbstractDataset'", + r"is invalid\: all dataset types must extend 'AbstractDataset'", ), ({}, "'type' is missing from dataset catalog configuration"), ], diff --git a/kedro-datasets/tests/pickle/test_pickle_dataset.py b/kedro-datasets/tests/pickle/test_pickle_dataset.py index 5e03f91cd..5d5b25bcb 100644 --- a/kedro-datasets/tests/pickle/test_pickle_dataset.py +++ b/kedro-datasets/tests/pickle/test_pickle_dataset.py @@ -59,7 +59,7 @@ class TestPickleDataset: indirect=True, ) def test_save_and_load(self, pickle_dataset, dummy_dataframe): - """Test saving and reloading the data set.""" + """Test saving and reloading the dataset.""" pickle_dataset.save(dummy_dataframe) reloaded = pickle_dataset.load() assert_frame_equal(dummy_dataframe, reloaded) @@ -68,7 +68,7 @@ def test_save_and_load(self, pickle_dataset, dummy_dataframe): def test_exists(self, pickle_dataset, dummy_dataframe): """Test `exists` method invocation for both existing and - nonexistent data set.""" + nonexistent dataset.""" assert not pickle_dataset.exists() pickle_dataset.save(dummy_dataframe) assert pickle_dataset.exists() @@ -98,7 +98,7 @@ def test_open_extra_args(self, pickle_dataset, fs_args): def test_load_missing_file(self, pickle_dataset): """Check the error when trying to load missing file.""" - pattern = r"Failed while loading data from data set PickleDataset\(.*\)" + pattern = r"Failed while loading data from dataset PickleDataset\(.*\)" with pytest.raises(DatasetError, match=pattern): pickle_dataset.load() @@ -189,7 +189,7 @@ def test_version_str_repr(self, load_version, save_version): def test_save_and_load(self, versioned_pickle_dataset, dummy_dataframe): """Test that saved and reloaded data matches the original one for - the versioned data set.""" + the versioned dataset.""" versioned_pickle_dataset.save(dummy_dataframe) reloaded_df = versioned_pickle_dataset.load() assert_frame_equal(dummy_dataframe, reloaded_df) @@ -201,13 +201,13 @@ def test_no_versions(self, versioned_pickle_dataset): versioned_pickle_dataset.load() def test_exists(self, versioned_pickle_dataset, dummy_dataframe): - """Test `exists` method invocation for versioned data set.""" + """Test `exists` method invocation for versioned dataset.""" assert not versioned_pickle_dataset.exists() versioned_pickle_dataset.save(dummy_dataframe) assert versioned_pickle_dataset.exists() def test_prevent_overwrite(self, versioned_pickle_dataset, dummy_dataframe): - """Check the error when attempting to override the data set if the + """Check the error when attempting to override the dataset if the corresponding Pickle file for a given save version already exists.""" versioned_pickle_dataset.save(dummy_dataframe) pattern = ( diff --git a/kedro-datasets/tests/pillow/test_image_dataset.py b/kedro-datasets/tests/pillow/test_image_dataset.py index 325d08aeb..2a3ef296a 100644 --- a/kedro-datasets/tests/pillow/test_image_dataset.py +++ b/kedro-datasets/tests/pillow/test_image_dataset.py @@ -41,7 +41,7 @@ def images_equal(image_1, image_2): class TestImageDataset: def test_save_and_load(self, image_dataset, image_object): - """Test saving and reloading the data set.""" + """Test saving and reloading the dataset.""" image_dataset.save(image_object) reloaded_image = image_dataset.load() assert images_equal(image_object, reloaded_image) @@ -49,7 +49,7 @@ def test_save_and_load(self, image_dataset, image_object): def test_exists(self, image_dataset, image_object): """Test `exists` method invocation for both existing and - nonexistent data set.""" + nonexistent dataset.""" assert not image_dataset.exists() image_dataset.save(image_object) assert image_dataset.exists() @@ -80,7 +80,7 @@ def test_open_extra_args(self, image_dataset, fs_args): def test_load_missing_file(self, image_dataset): """Check the error when trying to load missing file.""" - pattern = r"Failed while loading data from data set ImageDataset\(.*\)" + pattern = r"Failed while loading data from dataset ImageDataset\(.*\)" with pytest.raises(DatasetError, match=pattern): image_dataset.load() @@ -147,7 +147,7 @@ def test_version_str_repr(self, load_version, save_version): def test_save_and_load(self, versioned_image_dataset, image_object): """Test that saved and reloaded data matches the original one for - the versioned data set.""" + the versioned dataset.""" versioned_image_dataset.save(image_object) reloaded_image = versioned_image_dataset.load() assert images_equal(image_object, reloaded_image) @@ -182,13 +182,13 @@ def test_no_versions(self, versioned_image_dataset): versioned_image_dataset.load() def test_exists(self, versioned_image_dataset, image_object): - """Test `exists` method invocation for versioned data set.""" + """Test `exists` method invocation for versioned dataset.""" assert not versioned_image_dataset.exists() versioned_image_dataset.save(image_object) assert versioned_image_dataset.exists() def test_prevent_overwrite(self, versioned_image_dataset, image_object): - """Check the error when attempting to override the data set if the + """Check the error when attempting to override the dataset if the corresponding image file for a given save version already exists.""" versioned_image_dataset.save(image_object) pattern = ( diff --git a/kedro-datasets/tests/plotly/test_html_dataset.py b/kedro-datasets/tests/plotly/test_html_dataset.py index 06ed7291b..a37bffcc9 100644 --- a/kedro-datasets/tests/plotly/test_html_dataset.py +++ b/kedro-datasets/tests/plotly/test_html_dataset.py @@ -33,13 +33,13 @@ def dummy_plot(): class TestHTMLDataset: def test_save(self, html_dataset, dummy_plot): - """Test saving and reloading the data set.""" + """Test saving and reloading the dataset.""" html_dataset.save(dummy_plot) assert html_dataset._fs_open_args_save == {"mode": "w", "encoding": "utf-8"} def test_exists(self, html_dataset, dummy_plot): """Test `exists` method invocation for both existing and - nonexistent data set.""" + nonexistent dataset.""" assert not html_dataset.exists() html_dataset.save(dummy_plot) assert html_dataset.exists() diff --git a/kedro-datasets/tests/plotly/test_json_dataset.py b/kedro-datasets/tests/plotly/test_json_dataset.py index 571b0d048..0f1835e7a 100644 --- a/kedro-datasets/tests/plotly/test_json_dataset.py +++ b/kedro-datasets/tests/plotly/test_json_dataset.py @@ -35,7 +35,7 @@ def dummy_plot(): class TestJSONDataset: def test_save_and_load(self, json_dataset, dummy_plot): - """Test saving and reloading the data set.""" + """Test saving and reloading the dataset.""" json_dataset.save(dummy_plot) reloaded = json_dataset.load() assert dummy_plot == reloaded @@ -44,14 +44,14 @@ def test_save_and_load(self, json_dataset, dummy_plot): def test_exists(self, json_dataset, dummy_plot): """Test `exists` method invocation for both existing and - nonexistent data set.""" + nonexistent dataset.""" assert not json_dataset.exists() json_dataset.save(dummy_plot) assert json_dataset.exists() def test_load_missing_file(self, json_dataset): """Check the error when trying to load missing file.""" - pattern = r"Failed while loading data from data set JSONDataset\(.*\)" + pattern = r"Failed while loading data from dataset JSONDataset\(.*\)" with pytest.raises(DatasetError, match=pattern): json_dataset.load() diff --git a/kedro-datasets/tests/plotly/test_plotly_dataset.py b/kedro-datasets/tests/plotly/test_plotly_dataset.py index 7f1b0eae9..37ee92a40 100644 --- a/kedro-datasets/tests/plotly/test_plotly_dataset.py +++ b/kedro-datasets/tests/plotly/test_plotly_dataset.py @@ -47,7 +47,7 @@ def dummy_dataframe(): class TestPlotlyDataset: def test_save_and_load(self, plotly_dataset, dummy_dataframe): - """Test saving and reloading the data set.""" + """Test saving and reloading the dataset.""" plotly_dataset.save(dummy_dataframe) reloaded = plotly_dataset.load() assert isinstance(reloaded, graph_objects.Figure) @@ -56,14 +56,14 @@ def test_save_and_load(self, plotly_dataset, dummy_dataframe): def test_exists(self, plotly_dataset, dummy_dataframe): """Test `exists` method invocation for both existing and - nonexistent data set.""" + nonexistent dataset.""" assert not plotly_dataset.exists() plotly_dataset.save(dummy_dataframe) assert plotly_dataset.exists() def test_load_missing_file(self, plotly_dataset): """Check the error when trying to load missing file.""" - pattern = r"Failed while loading data from data set PlotlyDataset\(.*\)" + pattern = r"Failed while loading data from dataset PlotlyDataset\(.*\)" with pytest.raises(DatasetError, match=pattern): plotly_dataset.load() diff --git a/kedro-datasets/tests/polars/test_csv_dataset.py b/kedro-datasets/tests/polars/test_csv_dataset.py index 15abf3392..5312e9b48 100644 --- a/kedro-datasets/tests/polars/test_csv_dataset.py +++ b/kedro-datasets/tests/polars/test_csv_dataset.py @@ -89,14 +89,14 @@ def mocked_csv_in_s3(mocked_s3_bucket, mocked_dataframe: pl.DataFrame): class TestCSVDataset: def test_save_and_load(self, csv_dataset, dummy_dataframe): - """Test saving and reloading the data set.""" + """Test saving and reloading the dataset.""" csv_dataset.save(dummy_dataframe) reloaded = csv_dataset.load() assert_frame_equal(dummy_dataframe, reloaded) def test_exists(self, csv_dataset, dummy_dataframe): """Test `exists` method invocation for both existing and - nonexistent data set.""" + nonexistent dataset.""" assert not csv_dataset.exists() csv_dataset.save(dummy_dataframe) assert csv_dataset.exists() @@ -141,7 +141,7 @@ def test_storage_options_dropped(self, load_args, save_args, caplog, tmp_path): def test_load_missing_file(self, csv_dataset): """Check the error when trying to load missing file.""" - pattern = r"Failed while loading data from data set CSVDataset\(.*\)" + pattern = r"Failed while loading data from dataset CSVDataset\(.*\)" with pytest.raises(DatasetError, match=pattern): csv_dataset.load() @@ -204,7 +204,7 @@ def test_version_str_repr(self, load_version, save_version): def test_save_and_load(self, versioned_csv_dataset, dummy_dataframe): """Test that saved and reloaded data matches the original one for - the versioned data set.""" + the versioned dataset.""" versioned_csv_dataset.save(dummy_dataframe) reloaded_df = versioned_csv_dataset.load() assert_frame_equal(dummy_dataframe, reloaded_df) @@ -283,13 +283,13 @@ def test_no_versions(self, versioned_csv_dataset): versioned_csv_dataset.load() def test_exists(self, versioned_csv_dataset, dummy_dataframe): - """Test `exists` method invocation for versioned data set.""" + """Test `exists` method invocation for versioned dataset.""" assert not versioned_csv_dataset.exists() versioned_csv_dataset.save(dummy_dataframe) assert versioned_csv_dataset.exists() def test_prevent_overwrite(self, versioned_csv_dataset, dummy_dataframe): - """Check the error when attempting to override the data set if the + """Check the error when attempting to override the dataset if the corresponding CSV file for a given save version already exists.""" versioned_csv_dataset.save(dummy_dataframe) pattern = ( diff --git a/kedro-datasets/tests/polars/test_eager_polars_dataset.py b/kedro-datasets/tests/polars/test_eager_polars_dataset.py index 615f3a26e..6da005fb2 100644 --- a/kedro-datasets/tests/polars/test_eager_polars_dataset.py +++ b/kedro-datasets/tests/polars/test_eager_polars_dataset.py @@ -155,7 +155,7 @@ def test_load_args(self, parquet_dataset_ignore): assert df.shape == (2, 3) def test_save_and_load(self, versioned_parquet_dataset, dummy_dataframe): - """Test saving and reloading the data set.""" + """Test saving and reloading the dataset.""" versioned_parquet_dataset.save(dummy_dataframe) reloaded_df = versioned_parquet_dataset.load() assert_frame_equal(dummy_dataframe, reloaded_df) @@ -241,7 +241,7 @@ def test_multiple_saves(self, dummy_dataframe, filepath_parquet): class TestEagerIPCDatasetVersioned: def test_save_and_load(self, versioned_ipc_dataset, dummy_dataframe): - """Test saving and reloading the data set.""" + """Test saving and reloading the dataset.""" versioned_ipc_dataset.save(dummy_dataframe) reloaded_df = versioned_ipc_dataset.load() assert_frame_equal(dummy_dataframe, reloaded_df) @@ -345,7 +345,7 @@ def test_version_str_repr(self, filepath_csv, load_version, save_version): def test_save_and_load(self, versioned_csv_dataset, dummy_dataframe): """Test that saved and reloaded data matches the original one for - the versioned data set.""" + the versioned dataset.""" versioned_csv_dataset.save(dummy_dataframe) reloaded_df = versioned_csv_dataset.load() assert_frame_equal(dummy_dataframe, reloaded_df) @@ -446,13 +446,13 @@ def test_no_versions(self, versioned_csv_dataset): versioned_csv_dataset.load() def test_exists(self, versioned_csv_dataset, dummy_dataframe): - """Test `exists` method invocation for versioned data set.""" + """Test `exists` method invocation for versioned dataset.""" assert not versioned_csv_dataset.exists() versioned_csv_dataset.save(dummy_dataframe) assert versioned_csv_dataset.exists() def test_prevent_overwrite(self, versioned_csv_dataset, dummy_dataframe): - """Check the error when attempting to override the data set if the + """Check the error when attempting to override the dataset if the corresponding Generic (csv) file for a given save version already exists.""" versioned_csv_dataset.save(dummy_dataframe) pattern = ( diff --git a/kedro-datasets/tests/polars/test_lazy_polars_dataset.py b/kedro-datasets/tests/polars/test_lazy_polars_dataset.py index e92dbfc50..50528c581 100644 --- a/kedro-datasets/tests/polars/test_lazy_polars_dataset.py +++ b/kedro-datasets/tests/polars/test_lazy_polars_dataset.py @@ -111,7 +111,7 @@ class TestLazyCSVDataset: def test_exists(self, csv_dataset, dummy_dataframe): """Test `exists` method invocation for both existing and - nonexistent data set. + nonexistent dataset. """ assert not csv_dataset.exists() csv_dataset.save(dummy_dataframe) @@ -137,7 +137,7 @@ def test_save_and_load(self, csv_dataset, dummy_dataframe): def test_load_missing_file(self, csv_dataset): """Check the error when trying to load missing file.""" - pattern = r"Failed while loading data from data set LazyPolarsDataset\(.*\)" + pattern = r"Failed while loading data from dataset LazyPolarsDataset\(.*\)" with pytest.raises(DatasetError, match=pattern): csv_dataset.load() @@ -229,7 +229,7 @@ def test_load_args(self, parquet_dataset_ignore, dummy_dataframe, filepath_pq): assert df.shape == (2, 3) def test_save_and_load(self, versioned_parquet_dataset, dummy_dataframe): - """Test saving and reloading the data set.""" + """Test saving and reloading the dataset.""" versioned_parquet_dataset.save(dummy_dataframe.lazy()) reloaded_df = versioned_parquet_dataset.load().collect() assert_frame_equal(dummy_dataframe, reloaded_df) @@ -350,7 +350,7 @@ def test_no_versions(self, versioned_parquet_dataset): versioned_parquet_dataset.load() def test_prevent_overwrite(self, versioned_parquet_dataset, dummy_dataframe): - """Check the error when attempting to override the data set if the + """Check the error when attempting to override the dataset if the corresponding Generic (parquet) file for a given save version already exists.""" versioned_parquet_dataset.save(dummy_dataframe) pattern = ( diff --git a/kedro-datasets/tests/redis/test_redis_dataset.py b/kedro-datasets/tests/redis/test_redis_dataset.py index 322c35480..7b9dfeb65 100644 --- a/kedro-datasets/tests/redis/test_redis_dataset.py +++ b/kedro-datasets/tests/redis/test_redis_dataset.py @@ -77,7 +77,7 @@ def test_save_and_load( serialised_dummy_object, key, ): - """Test saving and reloading the data set.""" + """Test saving and reloading the dataset.""" set_mocker = mocker.patch("redis.StrictRedis.set") get_mocker = mocker.patch( "redis.StrictRedis.get", return_value=serialised_dummy_object @@ -94,7 +94,7 @@ def test_save_and_load( def test_exists(self, mocker, pickle_dataset, dummy_object, key): """Test `exists` method invocation for both existing and - nonexistent data set.""" + nonexistent dataset.""" mocker.patch("redis.StrictRedis.exists", return_value=False) assert not pickle_dataset.exists() mocker.patch("redis.StrictRedis.set") diff --git a/kedro-datasets/tests/spark/test_deltatable_dataset.py b/kedro-datasets/tests/spark/test_deltatable_dataset.py index 24ad7a3c6..938e90a31 100644 --- a/kedro-datasets/tests/spark/test_deltatable_dataset.py +++ b/kedro-datasets/tests/spark/test_deltatable_dataset.py @@ -94,7 +94,7 @@ def no_output(x): catalog = DataCatalog({"delta_in": delta_ds}) pipeline = modular_pipeline([node(no_output, "delta_in", None)]) pattern = ( - r"The following data sets cannot be used with " + r"The following datasets cannot be used with " r"multiprocessing: \['delta_in'\]" ) with pytest.raises(AttributeError, match=pattern): diff --git a/kedro-datasets/tests/spark/test_memory_dataset.py b/kedro-datasets/tests/spark/test_memory_dataset.py index 8dd469217..7f4f2c43b 100644 --- a/kedro-datasets/tests/spark/test_memory_dataset.py +++ b/kedro-datasets/tests/spark/test_memory_dataset.py @@ -35,14 +35,14 @@ def memory_dataset(spark_data_frame): def test_load_modify_original_data(memory_dataset, spark_data_frame): - """Check that the data set object is not updated when the original + """Check that the dataset object is not updated when the original SparkDataFrame is changed.""" spark_data_frame = _update_spark_df(spark_data_frame, 1, 1, -5) assert not _check_equals(memory_dataset.load(), spark_data_frame) def test_save_modify_original_data(spark_data_frame): - """Check that the data set object is not updated when the original + """Check that the dataset object is not updated when the original SparkDataFrame is changed.""" memory_dataset = MemoryDataset() memory_dataset.save(spark_data_frame) @@ -62,5 +62,5 @@ def test_load_returns_same_spark_object(memory_dataset, spark_data_frame): def test_str_representation(memory_dataset): - """Test string representation of the data set""" + """Test string representation of the dataset""" assert "MemoryDataset(data=)" in str(memory_dataset) diff --git a/kedro-datasets/tests/spark/test_spark_dataset.py b/kedro-datasets/tests/spark/test_spark_dataset.py index e4eed4481..bc40f9512 100644 --- a/kedro-datasets/tests/spark/test_spark_dataset.py +++ b/kedro-datasets/tests/spark/test_spark_dataset.py @@ -427,7 +427,7 @@ def test_parallel_runner(self, is_async, spark_in): catalog = DataCatalog({"spark_in": spark_in}) pipeline = modular_pipeline([node(identity, "spark_in", "spark_out")]) pattern = ( - r"The following data sets cannot be used with " + r"The following datasets cannot be used with " r"multiprocessing: \['spark_in'\]" ) with pytest.raises(AttributeError, match=pattern): diff --git a/kedro-datasets/tests/spark/test_spark_hive_dataset.py b/kedro-datasets/tests/spark/test_spark_hive_dataset.py index 5f11674dd..6ea4be9e7 100644 --- a/kedro-datasets/tests/spark/test_spark_hive_dataset.py +++ b/kedro-datasets/tests/spark/test_spark_hive_dataset.py @@ -294,7 +294,7 @@ def test_read_from_non_existent_table(self): ) with pytest.raises( DatasetError, - match=r"Failed while loading data from data set SparkHiveDataset" + match=r"Failed while loading data from dataset SparkHiveDataset" r"|table_doesnt_exist" r"|UnresolvedRelation", ): diff --git a/kedro-datasets/tests/svmlight/test_svmlight_dataset.py b/kedro-datasets/tests/svmlight/test_svmlight_dataset.py index ec504deda..7d9176a62 100644 --- a/kedro-datasets/tests/svmlight/test_svmlight_dataset.py +++ b/kedro-datasets/tests/svmlight/test_svmlight_dataset.py @@ -39,7 +39,7 @@ def dummy_data(): class TestSVMLightDataset: def test_save_and_load(self, svm_dataset, dummy_data): - """Test saving and reloading the data set.""" + """Test saving and reloading the dataset.""" svm_dataset.save(dummy_data) reloaded_features, reloaded_label = svm_dataset.load() original_features, original_label = dummy_data @@ -50,7 +50,7 @@ def test_save_and_load(self, svm_dataset, dummy_data): def test_exists(self, svm_dataset, dummy_data): """Test `exists` method invocation for both existing and - nonexistent data set.""" + nonexistent dataset.""" assert not svm_dataset.exists() svm_dataset.save(dummy_data) assert svm_dataset.exists() @@ -82,7 +82,7 @@ def test_open_extra_args(self, svm_dataset, fs_args): def test_load_missing_file(self, svm_dataset): """Check the error when trying to load missing file.""" - pattern = r"Failed while loading data from data set SVMLightDataset\(.*\)" + pattern = r"Failed while loading data from dataset SVMLightDataset\(.*\)" with pytest.raises(DatasetError, match=pattern): svm_dataset.load() @@ -135,7 +135,7 @@ def test_version_str_repr(self, load_version, save_version): def test_save_and_load(self, versioned_svm_dataset, dummy_data): """Test that saved and reloaded data matches the original one for - the versioned data set.""" + the versioned dataset.""" versioned_svm_dataset.save(dummy_data) reloaded_features, reloaded_label = versioned_svm_dataset.load() original_features, original_label = dummy_data @@ -149,13 +149,13 @@ def test_no_versions(self, versioned_svm_dataset): versioned_svm_dataset.load() def test_exists(self, versioned_svm_dataset, dummy_data): - """Test `exists` method invocation for versioned data set.""" + """Test `exists` method invocation for versioned dataset.""" assert not versioned_svm_dataset.exists() versioned_svm_dataset.save(dummy_data) assert versioned_svm_dataset.exists() def test_prevent_overwrite(self, versioned_svm_dataset, dummy_data): - """Check the error when attempting to override the data set if the + """Check the error when attempting to override the dataset if the corresponding json file for a given save version already exists.""" versioned_svm_dataset.save(dummy_data) pattern = ( diff --git a/kedro-datasets/tests/tensorflow/test_tensorflow_model_dataset.py b/kedro-datasets/tests/tensorflow/test_tensorflow_model_dataset.py index ea22a3188..8855dc4f3 100644 --- a/kedro-datasets/tests/tensorflow/test_tensorflow_model_dataset.py +++ b/kedro-datasets/tests/tensorflow/test_tensorflow_model_dataset.py @@ -142,7 +142,7 @@ class TestTensorFlowModelDataset: """No versioning passed to creator""" def test_save_and_load(self, tf_model_dataset, dummy_tf_base_model, dummy_x_test): - """Test saving and reloading the data set.""" + """Test saving and reloading the dataset.""" predictions = dummy_tf_base_model.predict(dummy_x_test) tf_model_dataset.save(dummy_tf_base_model) @@ -155,14 +155,12 @@ def test_save_and_load(self, tf_model_dataset, dummy_tf_base_model, dummy_x_test def test_load_missing_model(self, tf_model_dataset): """Test error message when trying to load missing model.""" - pattern = ( - r"Failed while loading data from data set TensorFlowModelDataset\(.*\)" - ) + pattern = r"Failed while loading data from dataset TensorFlowModelDataset\(.*\)" with pytest.raises(DatasetError, match=pattern): tf_model_dataset.load() def test_exists(self, tf_model_dataset, dummy_tf_base_model): - """Test `exists` method invocation for both existing and nonexistent data set.""" + """Test `exists` method invocation for both existing and nonexistent dataset.""" assert not tf_model_dataset.exists() tf_model_dataset.save(dummy_tf_base_model) assert tf_model_dataset.exists() @@ -301,7 +299,7 @@ def test_save_and_load( load_version, save_version, ): - """Test saving and reloading the versioned data set.""" + """Test saving and reloading the versioned dataset.""" predictions = dummy_tf_base_model.predict(dummy_x_test) versioned_tf_model_dataset.save(dummy_tf_base_model) @@ -334,7 +332,7 @@ def test_hdf5_save_format( np.testing.assert_allclose(predictions, new_predictions, rtol=1e-6, atol=1e-6) def test_prevent_overwrite(self, dummy_tf_base_model, versioned_tf_model_dataset): - """Check the error when attempting to override the data set if the + """Check the error when attempting to override the dataset if the corresponding file for a given save version already exists.""" versioned_tf_model_dataset.save(dummy_tf_base_model) pattern = ( @@ -374,7 +372,7 @@ def test_http_filesystem_no_versioning(self, tensorflow_model_dataset): ) def test_exists(self, versioned_tf_model_dataset, dummy_tf_base_model): - """Test `exists` method invocation for versioned data set.""" + """Test `exists` method invocation for versioned dataset.""" assert not versioned_tf_model_dataset.exists() versioned_tf_model_dataset.save(dummy_tf_base_model) assert versioned_tf_model_dataset.exists() diff --git a/kedro-datasets/tests/text/test_text_dataset.py b/kedro-datasets/tests/text/test_text_dataset.py index 4109c163e..a883fa552 100644 --- a/kedro-datasets/tests/text/test_text_dataset.py +++ b/kedro-datasets/tests/text/test_text_dataset.py @@ -31,7 +31,7 @@ def versioned_txt_dataset(filepath_txt, load_version, save_version): class TestTextDataset: def test_save_and_load(self, txt_dataset): - """Test saving and reloading the data set.""" + """Test saving and reloading the dataset.""" txt_dataset.save(STRING) reloaded = txt_dataset.load() assert STRING == reloaded @@ -40,7 +40,7 @@ def test_save_and_load(self, txt_dataset): def test_exists(self, txt_dataset): """Test `exists` method invocation for both existing and - nonexistent data set.""" + nonexistent dataset.""" assert not txt_dataset.exists() txt_dataset.save(STRING) assert txt_dataset.exists() @@ -56,7 +56,7 @@ def test_open_extra_args(self, txt_dataset, fs_args): def test_load_missing_file(self, txt_dataset): """Check the error when trying to load missing file.""" - pattern = r"Failed while loading data from data set TextDataset\(.*\)" + pattern = r"Failed while loading data from dataset TextDataset\(.*\)" with pytest.raises(DatasetError, match=pattern): txt_dataset.load() @@ -109,7 +109,7 @@ def test_version_str_repr(self, load_version, save_version): def test_save_and_load(self, versioned_txt_dataset): """Test that saved and reloaded data matches the original one for - the versioned data set.""" + the versioned dataset.""" versioned_txt_dataset.save(STRING) reloaded_df = versioned_txt_dataset.load() assert STRING == reloaded_df @@ -121,13 +121,13 @@ def test_no_versions(self, versioned_txt_dataset): versioned_txt_dataset.load() def test_exists(self, versioned_txt_dataset): - """Test `exists` method invocation for versioned data set.""" + """Test `exists` method invocation for versioned dataset.""" assert not versioned_txt_dataset.exists() versioned_txt_dataset.save(STRING) assert versioned_txt_dataset.exists() def test_prevent_overwrite(self, versioned_txt_dataset): - """Check the error when attempting to override the data set if the + """Check the error when attempting to override the dataset if the corresponding text file for a given save version already exists.""" versioned_txt_dataset.save(STRING) pattern = ( diff --git a/kedro-datasets/tests/tracking/test_json_dataset.py b/kedro-datasets/tests/tracking/test_json_dataset.py index 0353d4669..de24ba9b9 100644 --- a/kedro-datasets/tests/tracking/test_json_dataset.py +++ b/kedro-datasets/tests/tracking/test_json_dataset.py @@ -35,7 +35,7 @@ def dummy_data(): class TestJSONDataset: def test_save(self, filepath_json, dummy_data, tmp_path, save_version): - """Test saving and reloading the data set.""" + """Test saving and reloading the dataset.""" json_dataset = JSONDataset( filepath=filepath_json, version=Version(None, save_version) ) @@ -68,7 +68,7 @@ def test_load_fail(self, json_dataset, dummy_data): def test_exists(self, json_dataset, dummy_data): """Test `exists` method invocation for both existing and - nonexistent data set.""" + nonexistent dataset.""" assert not json_dataset.exists() json_dataset.save(dummy_data) assert json_dataset.exists() @@ -143,7 +143,7 @@ def test_version_str_repr(self, load_version, save_version): assert "save_args={'indent': 2}" in str(ds_versioned) def test_prevent_overwrite(self, explicit_versioned_json_dataset, dummy_data): - """Check the error when attempting to override the data set if the + """Check the error when attempting to override the dataset if the corresponding json file for a given save version already exists.""" explicit_versioned_json_dataset.save(dummy_data) pattern = ( diff --git a/kedro-datasets/tests/tracking/test_metrics_dataset.py b/kedro-datasets/tests/tracking/test_metrics_dataset.py index 0f6e07c37..b638fcdfd 100644 --- a/kedro-datasets/tests/tracking/test_metrics_dataset.py +++ b/kedro-datasets/tests/tracking/test_metrics_dataset.py @@ -41,7 +41,7 @@ def test_save_data( filepath_json, save_version, ): - """Test saving and reloading the data set.""" + """Test saving and reloading the dataset.""" metrics_dataset = MetricsDataset( filepath=filepath_json, version=Version(None, save_version) ) @@ -74,7 +74,7 @@ def test_load_fail(self, metrics_dataset, dummy_data): def test_exists(self, metrics_dataset, dummy_data): """Test `exists` method invocation for both existing and - nonexistent data set.""" + nonexistent dataset.""" assert not metrics_dataset.exists() metrics_dataset.save(dummy_data) assert metrics_dataset.exists() @@ -156,7 +156,7 @@ def test_version_str_repr(self, load_version, save_version): assert "save_args={'indent': 2}" in str(ds_versioned) def test_prevent_overwrite(self, explicit_versioned_metrics_dataset, dummy_data): - """Check the error when attempting to override the data set if the + """Check the error when attempting to override the dataset if the corresponding json file for a given save version already exists.""" explicit_versioned_metrics_dataset.save(dummy_data) pattern = ( diff --git a/kedro-datasets/tests/video/test_video_dataset.py b/kedro-datasets/tests/video/test_video_dataset.py index 357cfd001..d37fae3ca 100644 --- a/kedro-datasets/tests/video/test_video_dataset.py +++ b/kedro-datasets/tests/video/test_video_dataset.py @@ -58,7 +58,7 @@ def test_load_mp4(self, filepath_mp4, mp4_object): assert_videos_equal(loaded_video, mp4_object) def test_save_and_load_mp4(self, empty_dataset_mp4, mp4_object): - """Test saving and reloading the data set.""" + """Test saving and reloading the dataset.""" empty_dataset_mp4.save(mp4_object) reloaded_video = empty_dataset_mp4.load() assert_videos_equal(mp4_object, reloaded_video) @@ -109,7 +109,7 @@ def test_save_generator_video( def test_exists(self, empty_dataset_mp4, mp4_object): """Test `exists` method invocation for both existing and - nonexistent data set.""" + nonexistent dataset.""" assert not empty_dataset_mp4.exists() empty_dataset_mp4.save(mp4_object) assert empty_dataset_mp4.exists() @@ -123,7 +123,7 @@ def test_convert_video(self, empty_dataset_mp4, mjpeg_object): def test_load_missing_file(self, empty_dataset_mp4): """Check the error when trying to load missing file.""" - pattern = r"Failed while loading data from data set VideoDataset\(.*\)" + pattern = r"Failed while loading data from dataset VideoDataset\(.*\)" with pytest.raises(DatasetError, match=pattern): empty_dataset_mp4.load() diff --git a/kedro-datasets/tests/yaml/test_yaml_dataset.py b/kedro-datasets/tests/yaml/test_yaml_dataset.py index 243f0e0d5..611baee0c 100644 --- a/kedro-datasets/tests/yaml/test_yaml_dataset.py +++ b/kedro-datasets/tests/yaml/test_yaml_dataset.py @@ -39,7 +39,7 @@ def dummy_data(): class TestYAMLDataset: def test_save_and_load(self, yaml_dataset, dummy_data): - """Test saving and reloading the data set.""" + """Test saving and reloading the dataset.""" yaml_dataset.save(dummy_data) reloaded = yaml_dataset.load() assert dummy_data == reloaded @@ -48,7 +48,7 @@ def test_save_and_load(self, yaml_dataset, dummy_data): def test_exists(self, yaml_dataset, dummy_data): """Test `exists` method invocation for both existing and - nonexistent data set.""" + nonexistent dataset.""" assert not yaml_dataset.exists() yaml_dataset.save(dummy_data) assert yaml_dataset.exists() @@ -72,7 +72,7 @@ def test_open_extra_args(self, yaml_dataset, fs_args): def test_load_missing_file(self, yaml_dataset): """Check the error when trying to load missing file.""" - pattern = r"Failed while loading data from data set YAMLDataset\(.*\)" + pattern = r"Failed while loading data from dataset YAMLDataset\(.*\)" with pytest.raises(DatasetError, match=pattern): yaml_dataset.load() @@ -137,7 +137,7 @@ def test_version_str_repr(self, load_version, save_version): def test_save_and_load(self, versioned_yaml_dataset, dummy_data): """Test that saved and reloaded data matches the original one for - the versioned data set.""" + the versioned dataset.""" versioned_yaml_dataset.save(dummy_data) reloaded = versioned_yaml_dataset.load() assert dummy_data == reloaded @@ -149,13 +149,13 @@ def test_no_versions(self, versioned_yaml_dataset): versioned_yaml_dataset.load() def test_exists(self, versioned_yaml_dataset, dummy_data): - """Test `exists` method invocation for versioned data set.""" + """Test `exists` method invocation for versioned dataset.""" assert not versioned_yaml_dataset.exists() versioned_yaml_dataset.save(dummy_data) assert versioned_yaml_dataset.exists() def test_prevent_overwrite(self, versioned_yaml_dataset, dummy_data): - """Check the error when attempting to override the data set if the + """Check the error when attempting to override the dataset if the corresponding yaml file for a given save version already exists.""" versioned_yaml_dataset.save(dummy_data) pattern = (