Skip to content

Commit

Permalink
Merge pull request #502 from koordinates/dataset-names-2
Browse files Browse the repository at this point in the history
Loosen dataset path restrictions
  • Loading branch information
craigds authored Nov 11, 2021
2 parents 20ce4fc + c1bdb0f commit 661689d
Show file tree
Hide file tree
Showing 6 changed files with 24 additions and 22 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ _When adding new entries to the changelog, please include issue/PR numbers where
- `kart create-patch` now supports `--patch-type minimal`, which creates a much-smaller patch; relying on the patch recipient having the HEAD commit in their repository [#482](https://github.com/koordinates/kart/issues/482)
- `kart apply` now applies both types of patch.
* `kart log` now accepts a `--` marker to signal that all remaining arguments are dataset names. [#498](https://github.com/koordinates/kart/issues/498)
* `import` from a Postgres or MSSQL source will no longer prepend the database schema name to the imported dataset path.
* Bugfix: Diffing between an old commit and the current working copy no longer fails when datasets have been deleted in the intervening commits.
* Bugfix: Existing auto-incrementing integer PK sequences are now overwritten properly in GPKG working copies. [#468](https://github.com/koordinates/kart/pull/468)

Expand Down
9 changes: 6 additions & 3 deletions docs/DATASETS_v3.md
Original file line number Diff line number Diff line change
Expand Up @@ -381,9 +381,12 @@ Datasets have names, which can actually be hierarchical paths, e.g. `hydro/sound

* Paths may contain most unicode characters
* Paths must not contain any ASCII control characters (codepoints 00 to 1F), or any of the characters `:`, `<`, `>`, `"`, `|`, `?`, or `*`
* Paths must begin with a letter or an underscore (`_`).
* No path component may end with a `.` or a ` ` (space)
* Path components may not be any of these [reserved Windows filenames](https://docs.microsoft.com/en-us/windows/win32/fileio/naming-a-file?redirectedfrom=MSDN#naming-conventions): `CON`, `PRN`, `AUX`, `NUL`, `COM1`, `COM2`, `COM3`, `COM4`, `COM5`, `COM6`, `COM7`, `COM8`, `COM9`, `LPT1`, `LPT2`, `LPT3`, `LPT4`, `LPT5`, `LPT6`, `LPT7`, `LPT8`, `LPT9`.
* Paths must not start or end with a `/`
* No path component (`/`-separated) may:
- be empty
- start or end with a `.`
- end with a ` ` (space)
- be any of these [reserved Windows filenames](https://docs.microsoft.com/en-us/windows/win32/fileio/naming-a-file?redirectedfrom=MSDN#naming-conventions): `CON`, `PRN`, `AUX`, `NUL`, `COM1`, `COM2`, `COM3`, `COM4`, `COM5`, `COM6`, `COM7`, `COM8`, `COM9`, `LPT1`, `LPT2`, `LPT3`, `LPT4`, `LPT5`, `LPT6`, `LPT7`, `LPT8`, `LPT9`.
* Repositories may not contain more than one dataset with names that differ only by case.

Additionally, backslashes (`\`) in dataset paths are converted to forward slashes (`/`) when imported.
Expand Down
10 changes: 4 additions & 6 deletions kart/base_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
import functools
import logging
import time
import unicodedata

from . import crs_util
from .import_source import ImportSource
Expand Down Expand Up @@ -119,11 +118,6 @@ def _validate_dataset_path(cls, path: str) -> None:
if not path_bytes:
raise InvalidOperation(f"Dataset path {path!r} may not be empty")

if not (path[0] == "_" or unicodedata.category(path[0]).startswith("L")):
raise InvalidOperation(
f"Dataset path {path!r} must begin with a letter or an underscore"
)

if path_bytes.intersection(control_chars):
raise InvalidOperation(
f"Dataset path {path!r} may not contain ASCII control characters"
Expand All @@ -133,6 +127,10 @@ def _validate_dataset_path(cls, path: str) -> None:
raise InvalidOperation(
f"Dataset path {path!r} may not contain any of these characters: {other}"
)

if path.startswith("/"):
raise InvalidOperation(f"Dataset path {path!r} may not start with a '/'")

components = path.upper().split("/")
if any(not c for c in components):
raise InvalidOperation(
Expand Down
1 change: 0 additions & 1 deletion kart/init.py
Original file line number Diff line number Diff line change
Expand Up @@ -283,7 +283,6 @@ def import_(
dest_path = table

meta_overrides = table_info.get(dest_path, {})
dest_path = dest_path.replace(".", "/")
if is_windows:
dest_path = dest_path.replace("\\", "/") # git paths use / as a delimiter

Expand Down
6 changes: 2 additions & 4 deletions kart/sqlalchemy_import_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
class SqlAlchemyImportSource(ImportSource):
"""
ImportSource that uses SqlAlchemy directly to import into Kart.
Currently only GPKG is supported, but in theory should work for Postgis, SQL Server, MySQL.
Supports GPKG, Postgres (+ PostGIS), SQL Server, MySQL.
"""

CURSOR_SIZE = 10000
Expand Down Expand Up @@ -152,9 +152,7 @@ def aggregate_import_source_desc(self, import_sources):
return desc

def default_dest_path(self):
return self._normalise_dataset_path(
self.table_location_within_source or self.table
)
return self._normalise_dataset_path(self.table)

@functools.lru_cache(maxsize=1)
def get_tables(self):
Expand Down
19 changes: 11 additions & 8 deletions tests/test_init.py
Original file line number Diff line number Diff line change
Expand Up @@ -1062,20 +1062,23 @@ def test_import_list_formats(data_archive_readonly, cli_runner):
pytest.param(("a:b",), False, id="special-ascii-chars"),
pytest.param(("a\nb",), False, id="ascii-control-chars"),
pytest.param(("a/b/c",), True, id="slash-separated"),
pytest.param(("/a",), False, id="leading-slash"),
pytest.param(("a/b/",), False, id="trailing-slash"),
pytest.param(("a//b",), False, id="empty-component"),
pytest.param((r"a\b\c",), True, id="backslash-separated"),
pytest.param(("1a",), False, id="leading-numeral"),
pytest.param(("1a",), True, id="leading-numeral"),
pytest.param(("a1",), True, id="trailing-numeral"),
pytest.param(("_1",), True, id="leading-underscore"),
pytest.param((".a",), False, id="leading-dot"),
pytest.param(("a.",), False, id="trailing-dot"),
pytest.param(("a.b",), True, id="contains-dot"),
pytest.param((" b",), True, id="leading-space"),
pytest.param(("a ",), False, id="trailing-space"),
pytest.param(("a b",), True, id="contains-space"),
pytest.param(("a/ b",), True, id="component-leading-space"),
pytest.param(("a /b",), False, id="component-trailing-space"),
pytest.param(("COM1",), False, id="windows-reserved-filename"),
pytest.param(("com1",), False, id="windows-reserved-filename-ignoring-case"),
pytest.param(("ⅶ",), False, id="leading-numeral-unicode"),
pytest.param(("ⅶ",), True, id="leading-numeral-unicode"),
pytest.param(("Ἢⅶ",), True, id="trailing-numeral-unicode"),
pytest.param(("a", "a"), False, id="identical-names"),
pytest.param(("a", "A"), False, id="identical-names-ignoring-case"),
Expand All @@ -1095,13 +1098,13 @@ def test_validate_dataset_paths(names, is_okay):

def test_import_bad_dataset_path(data_archive, data_archive_readonly, cli_runner):
with data_archive_readonly("gpkg-polygons") as data:
with data_archive("points") as repo_path:
# importing as '1a' isn't allowed. See test_validate_dataset_paths for full coverage of these cases.
with data_archive("points"):
# importing as 'a.' isn't allowed. See test_validate_dataset_paths for full coverage of these cases.
r = cli_runner.invoke(
[
"import",
str(data / "nz-waca-adjustments.gpkg"),
f"{H.POLYGONS.LAYER}:1a",
f"{H.POLYGONS.LAYER}:a.",
]
)
assert r.exit_code == 20, r.stderr
Expand All @@ -1121,8 +1124,8 @@ def test_import_backslash_in_dataset_path(
data_archive, data_archive_readonly, cli_runner
):
with data_archive_readonly("gpkg-polygons") as data:
with data_archive("points") as repo_path:
# importing as '1a' isn't allowed. See test_validate_dataset_paths for full coverage of these cases.
with data_archive("points"):
# See test_validate_dataset_paths for full coverage of these cases.
r = cli_runner.invoke(
[
"import",
Expand Down

0 comments on commit 661689d

Please sign in to comment.