From b0878084dff79a8be5bd8d5fef5ef01e28ec40cc Mon Sep 17 00:00:00 2001 From: Craig de Stigter Date: Wed, 6 Oct 2021 14:41:50 +1300 Subject: [PATCH 1/2] Fix GeoJSON diff regressions in v0.10.1 * Revert `kart diff --output-format=geojson` output handling to the pre-0.10.1 behaviour. That is: - by default, output goes to stdout - if output goes to stdout and there are more than one dataset, a usage error occurs (exit code 2) with an error message. - `--output=mydirectory` can be used to set a directory into which to write output files; one file per dataset. - if an existing directory is given, any `*.geojson` files in the directory are deleted before creating output files - if a path to an existing regular file is given, a usage error occurs. * Remove `kart show --output-format=geojson`. This doesn't work well with multiple-dataset commits and was added rather coincidentally between 0.10.0 and 0.10.1. We'll remove it for simplicity as it isn't very usable; if multiple datasets end up in a single commit, then this shows all features together in one FeatureCollection, even if their feature schemas are different. --- CHANGELOG.md | 2 + kart/json_diff_writers.py | 54 +++++++++++----------- kart/show.py | 7 ++- tests/test_diff.py | 94 +++++++++++++++++++++++++++++++++++++-- 4 files changed, 122 insertions(+), 35 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 08dbc76ff..d1aa64d56 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,8 @@ _When adding new entries to the changelog, please include issue/PR numbers where ## 0.10.5 (UNRELEASED) + * Fixed regressions in `diff -o geojson` since Kart 0.10.1 [#487](https://github.com/koordinates/kart/issues/487) + * Removed `kart show -o geojson` [#487](https://github.com/koordinates/kart/issues/487#issuecomment-933561924) * Fix for [#478](https://github.com/koordinates/kart/issues/478) `merge --dry-run` raises error * Fix for [#483](https://github.com/koordinates/kart/issues/483) `diff` error with Z/M geometries diff --git a/kart/json_diff_writers.py b/kart/json_diff_writers.py index 82ac28e56..202ca229b 100644 --- a/kart/json_diff_writers.py +++ b/kart/json_diff_writers.py @@ -262,26 +262,29 @@ class GeojsonDiffWriter(BaseDiffWriter): @classmethod def _check_output_path(cls, repo, output_path): + if isinstance(output_path, Path): + if output_path.is_file(): + raise click.BadParameter( + "Output path should be a directory for geojson format.", + param_hint="--output", + ) + if not output_path.exists(): + output_path.mkdir() + else: + for p in output_path.glob("*.geojson"): + p.unlink() + else: + output_path = "-" return output_path def write_diff(self): - if isinstance(self.output_path, Path) and self.output_path.is_dir(): - self.write_file_per_dataset() - else: - output_obj = { - "type": "FeatureCollection", - "features": self.filtered_repo_feature_deltas_as_geojson(), - } + has_changes = False - dump_json_output( - output_obj, - self.output_path, - json_style=self.json_style, + if len(self.all_ds_paths) > 1 and not isinstance(self.output_path, Path): + raise click.BadParameter( + "Need to specify a directory for geojson with >1 dataset", + param_hint="--output", ) - self.write_warnings_footer() - - def write_file_per_dataset(self): - has_changes = False for ds_path in self.all_ds_paths: ds_diff = self.get_dataset_diff(ds_path) if not ds_diff: @@ -291,26 +294,23 @@ def write_file_per_dataset(self): has_changes = True output_obj = { "type": "FeatureCollection", - "features": self.filtered_ds_feature_deltas(ds_path, ds_diff), + "features": self.filtered_ds_feature_deltas_as_geojson( + ds_path, ds_diff + ), } - ds_output_filename = str(ds_path).replace("/", "__") + ".geojson" - ds_output_path = self.output_path / ds_output_filename + if self.output_path == "-": + ds_output_path = "-" + else: + ds_output_filename = str(ds_path).replace("/", "__") + ".geojson" + ds_output_path = self.output_path / ds_output_filename dump_json_output( output_obj, ds_output_path, json_style=self.json_style, ) self.has_changes = has_changes - - def filtered_repo_feature_deltas_as_geojson(self): - has_changes = False - for ds_path in self.all_ds_paths: - ds_diff = self.get_dataset_diff(ds_path) - has_changes |= bool(ds_diff) - self._warn_about_any_meta_diffs(ds_path, ds_diff) - yield from self.filtered_ds_feature_deltas_as_geojson(ds_path, ds_diff) - self.has_changes = has_changes + self.write_warnings_footer() def _warn_about_any_meta_diffs(self, ds_path, ds_diff): if "meta" in ds_diff: diff --git a/kart/show.py b/kart/show.py index 0873ae9dc..983c28e0b 100644 --- a/kart/show.py +++ b/kart/show.py @@ -10,9 +10,8 @@ @click.option( "--output-format", "-o", - type=click.Choice( - ["text", "json", "geojson", "quiet", "feature-count", "html", "json-lines"] - ), + # note: geojson was removed from here because it doesn't work with multiple datasets. + type=click.Choice(["text", "json", "quiet", "feature-count", "html", "json-lines"]), default="text", help=( "Output format. 'quiet' disables all output and implies --exit-code.\n" @@ -39,7 +38,7 @@ "--json-style", type=click.Choice(["extracompact", "compact", "pretty"]), default="pretty", - help="How to format the output. Only used with -o json or -o geojson", + help="How to format the output. Only used with -o json", ) @click.option( "--only-feature-count", diff --git a/tests/test_diff.py b/tests/test_diff.py index fce6f220c..83f2ae0d5 100644 --- a/tests/test_diff.py +++ b/tests/test_diff.py @@ -14,6 +14,7 @@ H = pytest.helpers.helpers() DIFF_OUTPUT_FORMATS = ["text", "geojson", "json", "json-lines", "quiet", "html"] +SHOW_OUTPUT_FORMATS = ["text", "json", "json-lines", "quiet", "html"] def _check_html_output(s): @@ -1428,7 +1429,7 @@ def test_diff_3way(data_working_copy, cli_runner, insert, request): assert len(features) == 1 -@pytest.mark.parametrize("output_format", DIFF_OUTPUT_FORMATS) +@pytest.mark.parametrize("output_format", SHOW_OUTPUT_FORMATS) def test_show_points_HEAD(output_format, data_archive_readonly, cli_runner): """ Show a patch; ref defaults to HEAD @@ -1529,7 +1530,7 @@ def test_diff_filtered(data_archive_readonly, cli_runner): ] -@pytest.mark.parametrize("output_format", DIFF_OUTPUT_FORMATS) +@pytest.mark.parametrize("output_format", SHOW_OUTPUT_FORMATS) def test_show_polygons_initial(output_format, data_archive_readonly, cli_runner): """Make sure we can show the initial commit""" with data_archive_readonly("polygons"): @@ -1684,12 +1685,97 @@ def override_get_feature(self, *args, **kwargs): @pytest.mark.parametrize( - "output_format", [o for o in DIFF_OUTPUT_FORMATS if o not in {"html", "quiet"}] + "output_format", [o for o in SHOW_OUTPUT_FORMATS if o not in {"html", "quiet"}] ) -def test_diff_output_to_file(output_format, data_archive, cli_runner): +def test_show_output_to_file(output_format, data_archive, cli_runner): with data_archive("points") as repo_path: r = cli_runner.invoke( ["show", f"--output-format={output_format}", "--output=out"] ) assert r.exit_code == 0, r assert (repo_path / "out").exists() + + +def test_diff_geojson_usage(data_archive, cli_runner, tmp_path): + with data_archive("points") as repo_path: + # output to stdout + r = cli_runner.invoke( + ["diff", "--output-format=geojson", "--output=-", "HEAD^..."] + ) + assert r.exit_code == 0, r.stderr + # output to stdout (by default) + r = cli_runner.invoke(["diff", "--output-format=geojson", "HEAD^..."]) + assert r.exit_code == 0, r.stderr + + # output to a directory that doesn't yet exist + r = cli_runner.invoke( + [ + "diff", + "--output-format=geojson", + f"--output={tmp_path / 'abc'}", + "HEAD^...", + ] + ) + assert r.exit_code == 0, r.stderr + assert {p.name for p in (tmp_path / "abc").iterdir()} == { + "nz_pa_points_topo_150k.geojson" + } + + # output to a directory that does exist + d = tmp_path / "def" + d.mkdir() + # this gets left alone + (d / "empty.file").write_bytes(b"") + # this gets deleted. + (d / "some.geojson").write_bytes(b"{}") + r = cli_runner.invoke( + [ + "diff", + "--output-format=geojson", + f"--output={d}", + "HEAD^...", + ] + ) + assert r.exit_code == 0, r.stderr + assert {p.name for p in d.iterdir()} == { + "nz_pa_points_topo_150k.geojson", + "empty.file", + } + + # (add another dataset) + with data_archive("gpkg-3d-points") as src: + src_gpkg_path = src / "points-3d.gpkg" + r = cli_runner.invoke(["-C", repo_path, "import", src_gpkg_path]) + assert r.exit_code == 0, r.stderr + + # file/stdout output isn't allowed when there are multiple datasets + r = cli_runner.invoke( + [ + "diff", + "--output-format=geojson", + "HEAD^...", + ] + ) + assert r.exit_code == 2, r.stderr + assert ( + r.stderr.splitlines()[-1] + == "Error: Invalid value for --output: Need to specify a directory for geojson with >1 dataset" + ) + + # Can't specify an (existing) regular file either + myfile = tmp_path / "ghi" + myfile.write_bytes(b"") + assert myfile.exists() + r = cli_runner.invoke( + [ + "diff", + "--output-format=geojson", + f"--output={myfile}", + "HEAD^...", + ] + ) + assert r.exit_code == 2, r.stderr + assert ( + r.stderr.splitlines()[-1] + == "Error: Invalid value for --output: Output path should be a directory for geojson format." + ) From 5c8de2f3d521465354b0514e0bac135caef9eea5 Mon Sep 17 00:00:00 2001 From: Craig de Stigter Date: Thu, 7 Oct 2021 09:26:19 +1300 Subject: [PATCH 2/2] Apply suggestions from code review Co-authored-by: Robert Coup --- kart/json_diff_writers.py | 18 ++++++++++++++---- kart/show.py | 2 +- tests/test_diff.py | 4 ++-- 3 files changed, 17 insertions(+), 7 deletions(-) diff --git a/kart/json_diff_writers.py b/kart/json_diff_writers.py index 202ca229b..b56972cbf 100644 --- a/kart/json_diff_writers.py +++ b/kart/json_diff_writers.py @@ -1,5 +1,6 @@ from datetime import datetime, timezone, timedelta import json +import logging from pathlib import Path import click @@ -11,6 +12,8 @@ from .output_util import dump_json_output, resolve_output_path from .timestamps import datetime_to_iso8601_utc, timedelta_to_iso8601_tz +L = logging.getLogger(__name__) + class JsonDiffWriter(BaseDiffWriter): """ @@ -265,13 +268,20 @@ def _check_output_path(cls, repo, output_path): if isinstance(output_path, Path): if output_path.is_file(): raise click.BadParameter( - "Output path should be a directory for geojson format.", - param_hint="--output", + "Output path should be a directory for GeoJSON format.", + param_hint="--output-path", ) if not output_path.exists(): output_path.mkdir() else: - for p in output_path.glob("*.geojson"): + geojson_paths = list(output_path.glob("*.geojson")) + if geojson_paths: + L.debug( + "Cleaning %d *.geojson files from %s ...", + len(geojson_paths), + output_path, + ) + for p in geojson_paths: p.unlink() else: output_path = "-" @@ -282,7 +292,7 @@ def write_diff(self): if len(self.all_ds_paths) > 1 and not isinstance(self.output_path, Path): raise click.BadParameter( - "Need to specify a directory for geojson with >1 dataset", + "Need to specify a directory via --output-path for GeoJSON with more than one dataset", param_hint="--output", ) for ds_path in self.all_ds_paths: diff --git a/kart/show.py b/kart/show.py index 983c28e0b..516a39148 100644 --- a/kart/show.py +++ b/kart/show.py @@ -10,7 +10,7 @@ @click.option( "--output-format", "-o", - # note: geojson was removed from here because it doesn't work with multiple datasets. + # note: geojson isn't here because it doesn't work cleanly with multiple datasets. type=click.Choice(["text", "json", "quiet", "feature-count", "html", "json-lines"]), default="text", help=( diff --git a/tests/test_diff.py b/tests/test_diff.py index 83f2ae0d5..fdb941944 100644 --- a/tests/test_diff.py +++ b/tests/test_diff.py @@ -1759,7 +1759,7 @@ def test_diff_geojson_usage(data_archive, cli_runner, tmp_path): assert r.exit_code == 2, r.stderr assert ( r.stderr.splitlines()[-1] - == "Error: Invalid value for --output: Need to specify a directory for geojson with >1 dataset" + == "Error: Invalid value for --output: Need to specify a directory via --output-path for GeoJSON with more than one dataset" ) # Can't specify an (existing) regular file either @@ -1777,5 +1777,5 @@ def test_diff_geojson_usage(data_archive, cli_runner, tmp_path): assert r.exit_code == 2, r.stderr assert ( r.stderr.splitlines()[-1] - == "Error: Invalid value for --output: Output path should be a directory for geojson format." + == "Error: Invalid value for --output-path: Output path should be a directory for GeoJSON format." )