Skip to content

Commit

Permalink
Merge pull request #489 from koordinates/geojson-diff-fix
Browse files Browse the repository at this point in the history
Fix GeoJSON diff regressions in v0.10.1
  • Loading branch information
craigds authored Oct 6, 2021
2 parents 5cd24c8 + 5c8de2f commit 8b4b35d
Show file tree
Hide file tree
Showing 4 changed files with 132 additions and 35 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ _When adding new entries to the changelog, please include issue/PR numbers where

## 0.10.5 (UNRELEASED)

* Fixed regressions in `diff -o geojson` since Kart 0.10.1 [#487](https://github.com/koordinates/kart/issues/487)
* Removed `kart show -o geojson` [#487](https://github.com/koordinates/kart/issues/487#issuecomment-933561924)
* Fix for [#478](https://github.com/koordinates/kart/issues/478) `merge --dry-run` raises error
* Fix for [#483](https://github.com/koordinates/kart/issues/483) `diff` error with Z/M geometries

Expand Down
64 changes: 37 additions & 27 deletions kart/json_diff_writers.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from datetime import datetime, timezone, timedelta
import json
import logging
from pathlib import Path

import click
Expand All @@ -11,6 +12,8 @@
from .output_util import dump_json_output, resolve_output_path
from .timestamps import datetime_to_iso8601_utc, timedelta_to_iso8601_tz

L = logging.getLogger(__name__)


class JsonDiffWriter(BaseDiffWriter):
"""
Expand Down Expand Up @@ -262,26 +265,36 @@ class GeojsonDiffWriter(BaseDiffWriter):

@classmethod
def _check_output_path(cls, repo, output_path):
if isinstance(output_path, Path):
if output_path.is_file():
raise click.BadParameter(
"Output path should be a directory for GeoJSON format.",
param_hint="--output-path",
)
if not output_path.exists():
output_path.mkdir()
else:
geojson_paths = list(output_path.glob("*.geojson"))
if geojson_paths:
L.debug(
"Cleaning %d *.geojson files from %s ...",
len(geojson_paths),
output_path,
)
for p in geojson_paths:
p.unlink()
else:
output_path = "-"
return output_path

def write_diff(self):
if isinstance(self.output_path, Path) and self.output_path.is_dir():
self.write_file_per_dataset()
else:
output_obj = {
"type": "FeatureCollection",
"features": self.filtered_repo_feature_deltas_as_geojson(),
}
has_changes = False

dump_json_output(
output_obj,
self.output_path,
json_style=self.json_style,
if len(self.all_ds_paths) > 1 and not isinstance(self.output_path, Path):
raise click.BadParameter(
"Need to specify a directory via --output-path for GeoJSON with more than one dataset",
param_hint="--output",
)
self.write_warnings_footer()

def write_file_per_dataset(self):
has_changes = False
for ds_path in self.all_ds_paths:
ds_diff = self.get_dataset_diff(ds_path)
if not ds_diff:
Expand All @@ -291,26 +304,23 @@ def write_file_per_dataset(self):
has_changes = True
output_obj = {
"type": "FeatureCollection",
"features": self.filtered_ds_feature_deltas(ds_path, ds_diff),
"features": self.filtered_ds_feature_deltas_as_geojson(
ds_path, ds_diff
),
}

ds_output_filename = str(ds_path).replace("/", "__") + ".geojson"
ds_output_path = self.output_path / ds_output_filename
if self.output_path == "-":
ds_output_path = "-"
else:
ds_output_filename = str(ds_path).replace("/", "__") + ".geojson"
ds_output_path = self.output_path / ds_output_filename
dump_json_output(
output_obj,
ds_output_path,
json_style=self.json_style,
)
self.has_changes = has_changes

def filtered_repo_feature_deltas_as_geojson(self):
has_changes = False
for ds_path in self.all_ds_paths:
ds_diff = self.get_dataset_diff(ds_path)
has_changes |= bool(ds_diff)
self._warn_about_any_meta_diffs(ds_path, ds_diff)
yield from self.filtered_ds_feature_deltas_as_geojson(ds_path, ds_diff)
self.has_changes = has_changes
self.write_warnings_footer()

def _warn_about_any_meta_diffs(self, ds_path, ds_diff):
if "meta" in ds_diff:
Expand Down
7 changes: 3 additions & 4 deletions kart/show.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,8 @@
@click.option(
"--output-format",
"-o",
type=click.Choice(
["text", "json", "geojson", "quiet", "feature-count", "html", "json-lines"]
),
# note: geojson isn't here because it doesn't work cleanly with multiple datasets.
type=click.Choice(["text", "json", "quiet", "feature-count", "html", "json-lines"]),
default="text",
help=(
"Output format. 'quiet' disables all output and implies --exit-code.\n"
Expand All @@ -39,7 +38,7 @@
"--json-style",
type=click.Choice(["extracompact", "compact", "pretty"]),
default="pretty",
help="How to format the output. Only used with -o json or -o geojson",
help="How to format the output. Only used with -o json",
)
@click.option(
"--only-feature-count",
Expand Down
94 changes: 90 additions & 4 deletions tests/test_diff.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
H = pytest.helpers.helpers()

DIFF_OUTPUT_FORMATS = ["text", "geojson", "json", "json-lines", "quiet", "html"]
SHOW_OUTPUT_FORMATS = ["text", "json", "json-lines", "quiet", "html"]


def _check_html_output(s):
Expand Down Expand Up @@ -1428,7 +1429,7 @@ def test_diff_3way(data_working_copy, cli_runner, insert, request):
assert len(features) == 1


@pytest.mark.parametrize("output_format", DIFF_OUTPUT_FORMATS)
@pytest.mark.parametrize("output_format", SHOW_OUTPUT_FORMATS)
def test_show_points_HEAD(output_format, data_archive_readonly, cli_runner):
"""
Show a patch; ref defaults to HEAD
Expand Down Expand Up @@ -1529,7 +1530,7 @@ def test_diff_filtered(data_archive_readonly, cli_runner):
]


@pytest.mark.parametrize("output_format", DIFF_OUTPUT_FORMATS)
@pytest.mark.parametrize("output_format", SHOW_OUTPUT_FORMATS)
def test_show_polygons_initial(output_format, data_archive_readonly, cli_runner):
"""Make sure we can show the initial commit"""
with data_archive_readonly("polygons"):
Expand Down Expand Up @@ -1684,12 +1685,97 @@ def override_get_feature(self, *args, **kwargs):


@pytest.mark.parametrize(
"output_format", [o for o in DIFF_OUTPUT_FORMATS if o not in {"html", "quiet"}]
"output_format", [o for o in SHOW_OUTPUT_FORMATS if o not in {"html", "quiet"}]
)
def test_diff_output_to_file(output_format, data_archive, cli_runner):
def test_show_output_to_file(output_format, data_archive, cli_runner):
with data_archive("points") as repo_path:
r = cli_runner.invoke(
["show", f"--output-format={output_format}", "--output=out"]
)
assert r.exit_code == 0, r
assert (repo_path / "out").exists()


def test_diff_geojson_usage(data_archive, cli_runner, tmp_path):
with data_archive("points") as repo_path:
# output to stdout
r = cli_runner.invoke(
["diff", "--output-format=geojson", "--output=-", "HEAD^..."]
)
assert r.exit_code == 0, r.stderr
# output to stdout (by default)
r = cli_runner.invoke(["diff", "--output-format=geojson", "HEAD^..."])
assert r.exit_code == 0, r.stderr

# output to a directory that doesn't yet exist
r = cli_runner.invoke(
[
"diff",
"--output-format=geojson",
f"--output={tmp_path / 'abc'}",
"HEAD^...",
]
)
assert r.exit_code == 0, r.stderr
assert {p.name for p in (tmp_path / "abc").iterdir()} == {
"nz_pa_points_topo_150k.geojson"
}

# output to a directory that does exist
d = tmp_path / "def"
d.mkdir()
# this gets left alone
(d / "empty.file").write_bytes(b"")
# this gets deleted.
(d / "some.geojson").write_bytes(b"{}")
r = cli_runner.invoke(
[
"diff",
"--output-format=geojson",
f"--output={d}",
"HEAD^...",
]
)
assert r.exit_code == 0, r.stderr
assert {p.name for p in d.iterdir()} == {
"nz_pa_points_topo_150k.geojson",
"empty.file",
}

# (add another dataset)
with data_archive("gpkg-3d-points") as src:
src_gpkg_path = src / "points-3d.gpkg"
r = cli_runner.invoke(["-C", repo_path, "import", src_gpkg_path])
assert r.exit_code == 0, r.stderr

# file/stdout output isn't allowed when there are multiple datasets
r = cli_runner.invoke(
[
"diff",
"--output-format=geojson",
"HEAD^...",
]
)
assert r.exit_code == 2, r.stderr
assert (
r.stderr.splitlines()[-1]
== "Error: Invalid value for --output: Need to specify a directory via --output-path for GeoJSON with more than one dataset"
)

# Can't specify an (existing) regular file either
myfile = tmp_path / "ghi"
myfile.write_bytes(b"")
assert myfile.exists()
r = cli_runner.invoke(
[
"diff",
"--output-format=geojson",
f"--output={myfile}",
"HEAD^...",
]
)
assert r.exit_code == 2, r.stderr
assert (
r.stderr.splitlines()[-1]
== "Error: Invalid value for --output-path: Output path should be a directory for GeoJSON format."
)

0 comments on commit 8b4b35d

Please sign in to comment.