From 85faa71188fc86c3fadf10a12f4173d14ce97990 Mon Sep 17 00:00:00 2001 From: Josiah Outram Halstead Date: Tue, 2 Jan 2024 10:57:25 +0000 Subject: [PATCH 1/4] Override path formatting method for data-URIs This fixes an issue with `UPath.stat()` for data URIs, where fsspec was expecting the full URI to be passed to `fs.info` instead of just the URI path as was previously implemented. --- upath/implementations/data.py | 13 +++++++++++++ upath/registry.py | 1 + 2 files changed, 14 insertions(+) create mode 100644 upath/implementations/data.py diff --git a/upath/implementations/data.py b/upath/implementations/data.py new file mode 100644 index 00000000..69cdcc72 --- /dev/null +++ b/upath/implementations/data.py @@ -0,0 +1,13 @@ +from __future__ import annotations + +import upath.core + + +class _DataAccessor(upath.core._FSSpecAccessor): + + def _format_path(self, path): + return str(path) + +class DataPath(upath.core.UPath): + _default_accessor = _DataAccessor + diff --git a/upath/registry.py b/upath/registry.py index 085b2274..177c5ef6 100644 --- a/upath/registry.py +++ b/upath/registry.py @@ -63,6 +63,7 @@ class _Registry(MutableMapping[str, "type[upath.core.UPath]"]): "abfss": "upath.implementations.cloud.AzurePath", "adl": "upath.implementations.cloud.AzurePath", "az": "upath.implementations.cloud.AzurePath", + "data": "upath.implementations.data.DataPath", "file": "upath.implementations.local.LocalPath", "gcs": "upath.implementations.cloud.GCSPath", "gs": "upath.implementations.cloud.GCSPath", From b0171ddbf651bc7b49119b7aef0708833cc0dbcf Mon Sep 17 00:00:00 2001 From: Josiah Outram Halstead Date: Tue, 2 Jan 2024 22:18:59 +0000 Subject: [PATCH 2/4] Update registry tests --- upath/tests/test_registry.py | 1 + 1 file changed, 1 insertion(+) diff --git a/upath/tests/test_registry.py b/upath/tests/test_registry.py index 93388f11..edf0c27f 100644 --- a/upath/tests/test_registry.py +++ b/upath/tests/test_registry.py @@ -11,6 +11,7 @@ "abfss", "adl", "az", + "data", "file", "gcs", "gs", From cda3f1c9d9e7eee5ea167e224c5581407328a735 Mon Sep 17 00:00:00 2001 From: Andreas Poehlmann Date: Sat, 10 Feb 2024 19:00:55 +0100 Subject: [PATCH 3/4] upath.implementations.data: adjust DataPath and add tests --- upath/implementations/data.py | 22 ++- upath/tests/implementations/test_data.py | 175 +++++++++++++++++++++++ 2 files changed, 192 insertions(+), 5 deletions(-) create mode 100644 upath/tests/implementations/test_data.py diff --git a/upath/implementations/data.py b/upath/implementations/data.py index 69cdcc72..251a0683 100644 --- a/upath/implementations/data.py +++ b/upath/implementations/data.py @@ -3,11 +3,23 @@ import upath.core -class _DataAccessor(upath.core._FSSpecAccessor): +class DataPath(upath.core.UPath): - def _format_path(self, path): - return str(path) + @property + def parts(self): + return (self.path,) -class DataPath(upath.core.UPath): - _default_accessor = _DataAccessor + def __str__(self): + return self.path + + def with_segments(self, *pathsegments): + raise NotImplementedError("path operation not supported by DataPath") + + def mkdir(self, mode=0o777, parents=False, exist_ok=False): + raise FileExistsError(str(self)) + + def write_bytes(self, data): + raise NotImplementedError("DataPath does not support writing") + def write_text(self, data, **kwargs): + raise NotImplementedError("DataPath does not support writing") diff --git a/upath/tests/implementations/test_data.py b/upath/tests/implementations/test_data.py new file mode 100644 index 00000000..1ed5fd5f --- /dev/null +++ b/upath/tests/implementations/test_data.py @@ -0,0 +1,175 @@ +import stat + +import fsspec +import pytest + +from upath import UPath +from upath.implementations.data import DataPath +from upath.tests.cases import BaseTests + + +class TestUPathDataPath(BaseTests): + """ + Unit-tests for the DataPath implementation of UPath. + """ + + @pytest.fixture(autouse=True) + def path(self): + """ + Fixture for the UPath instance to be tested. + """ + path = "" # noqa: E501 + self.path = UPath(path) + + def test_is_DataPath(self): + """ + Test that the path is a GitHubPath instance. + """ + assert isinstance(self.path, DataPath) + + @pytest.mark.skip(reason="DataPath does not have directories") + def test_stat_dir_st_mode(self): + super().test_stat_dir_st_mode() + + def test_stat_file_st_mode(self): + assert self.path.is_file() + assert stat.S_ISREG(self.path.stat().st_mode) + + def test_stat_st_size(self): + assert self.path.stat().st_size == 69 + + def test_exists(self): + # datapath exists is always true... + path = self.path + assert path.exists() + + @pytest.mark.skip(reason="DataPath does support joins or globs") + def test_glob(self, pathlib_base): + with pytest.raises(NotImplementedError): + pathlib_base.glob("*") + + def test_is_dir(self): + assert not self.path.is_dir() + + def test_is_file(self): + assert self.path.is_file() + + def test_iterdir(self): + with pytest.raises(NotImplementedError): + list(self.path.iterdir()) + + @pytest.mark.skip(reason="DataPath does not have directories") + def test_iterdir2(self): + pass + + @pytest.mark.skip(reason="DataPath does not have directories") + def test_iterdir_trailing_slash(self): + pass + + def test_mkdir(self): + with pytest.raises(FileExistsError): + self.path.mkdir() + + @pytest.mark.skip(reason="DataPath does not have directories") + def test_mkdir_exists_ok_true(self): + pass + + @pytest.mark.skip(reason="DataPath does not have directories") + def test_mkdir_exists_ok_false(self): + pass + + @pytest.mark.skip(reason="DataPath does not have directories") + def test_mkdir_parents_true_exists_ok_true(self): + pass + + @pytest.mark.skip(reason="DataPath does not have directories") + def test_mkdir_parents_true_exists_ok_false(self): + pass + + def test_read_bytes(self, pathlib_base): + assert len(self.path.read_bytes()) == 69 + + def test_read_text(self, local_testdir): + assert UPath("data:base64,SGVsbG8gV29ybGQ=").read_text() == "Hello World" + + def test_parents(self): + with pytest.raises(NotImplementedError): + self.path.parents[0] + + def test_rename(self): + with pytest.raises(NotImplementedError): + self.path.rename("newname") + + def test_rename2(self): + self.path.rename(self.path) + + def test_rglob(self, pathlib_base): + with pytest.raises(NotImplementedError): + list(self.path.rglob("*")) + + def test_touch_unlink(self): + with pytest.raises(NotImplementedError): + self.path.touch() + with pytest.raises(NotImplementedError): + self.path.unlink() + + def test_write_bytes(self, pathlib_base): + with pytest.raises(NotImplementedError): + self.path.write_bytes(b"test") + + def test_write_text(self, pathlib_base): + with pytest.raises(NotImplementedError): + self.path.write_text("test") + + def test_read_with_fsspec(self): + pth = self.path + fs = fsspec.filesystem(pth.protocol, **pth.storage_options) + assert fs.cat_file(pth.path) == pth.read_bytes() + + @pytest.mark.skip(reason="DataPath does not support joins") + def test_pickling_child_path(self): + pass + + @pytest.mark.skip(reason="DataPath does not support joins") + def test_child_path(self): + pass + + def test_with_name(self): + with pytest.raises(NotImplementedError): + self.path.with_name("newname") + + def test_with_suffix(self): + with pytest.raises(NotImplementedError): + self.path.with_suffix(".new") + + def test_with_stem(self): + with pytest.raises(NotImplementedError): + self.path.with_stem("newname") + + @pytest.mark.skip(reason="DataPath does not support joins") + def test_repr_after_with_suffix(self): + pass + + @pytest.mark.skip(reason="DataPath does not support joins") + def test_repr_after_with_name(self): + pass + + @pytest.mark.skip(reason="DataPath does not support directories") + def test_rmdir_no_dir(self): + pass + + @pytest.mark.skip(reason="DataPath does not support directories") + def test_iterdir_no_dir(self): + pass + + @pytest.mark.skip(reason="DataPath does not support joins") + def test_private_url_attr_in_sync(self): + pass + + @pytest.mark.skip(reason="DataPath does not support joins") + def test_fsspec_compat(self): + pass + + def test_rmdir_not_empty(self): + with pytest.raises(NotADirectoryError): + self.path.rmdir() From 8bfc5eeeb80ee911576877913b08b897d3c842c8 Mon Sep 17 00:00:00 2001 From: Andreas Poehlmann Date: Sat, 10 Feb 2024 19:06:21 +0100 Subject: [PATCH 4/4] tests: xfail data tests when fsspec is too old --- upath/tests/implementations/test_data.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/upath/tests/implementations/test_data.py b/upath/tests/implementations/test_data.py index 1ed5fd5f..6342cc46 100644 --- a/upath/tests/implementations/test_data.py +++ b/upath/tests/implementations/test_data.py @@ -7,6 +7,12 @@ from upath.implementations.data import DataPath from upath.tests.cases import BaseTests +from ..utils import xfail_if_version + +pytestmark = xfail_if_version( + "fsspec", lt="2023.12.2", reason="fsspec<2023.12.2 does not support data" +) + class TestUPathDataPath(BaseTests): """