From d5049bf86bfa0a27bc9e2b71acc84f48f2ef0eeb Mon Sep 17 00:00:00 2001 From: James McKinney <26463+jpmckinney@users.noreply.github.com> Date: Thu, 16 Mar 2023 14:45:37 -0400 Subject: [PATCH] Jsonschema 4.18+ is now required. https://github.com/python-jsonschema/jsonschema/issues/994 --- CHANGELOG.md | 5 ++-- libcove/lib/common.py | 54 ++++++++++++++++------------------------ setup.py | 3 ++- tests/lib/test_common.py | 11 +++----- 4 files changed, 30 insertions(+), 43 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b3c8739..2d8c3b7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,17 +10,18 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ### Removed - Dropped support for Python 3.6 & 3.7, as these are now end of life. -- Drop jsonschema 3 support +- Jsonschema 4.18+ is now required. Support for 3 and older versions of 4 is removed. ### Changed - Restore jsonschema's type validator, as its performance has improved in recent Python versions https://github.com/OpenDataServices/lib-cove/pull/127 -- Allow `SchemaJsonMixin` classes to define a `validator` method, that accepts lib-cove's JSON Schema draft 4 validator class and its format checker, and returns a validator instance. https://github.com/OpenDataServices/lib-cove/pull/128 +- Allow `SchemaJsonMixin` classes to define a `registry` value, TODO ### Fixed - Calculate additional codelist values for schema using `anyOf` or `oneOf`, like OCDS record packages https://github.com/open-contracting/lib-cove-ocds/issues/106 - Descend into nullable objects and arrays. (For example, OCDS `parties/details` is nullable, and additional codes for `parties/details/scale` were unreported.) https://github.com/OpenDataServices/lib-cove/pull/131 +- Process subschemas with our custom validator. Fixes an issue in later versions of Jsonschema. ## [0.31.0] - 2023-07-06 diff --git a/libcove/lib/common.py b/libcove/lib/common.py index f336479..73ffbea 100644 --- a/libcove/lib/common.py +++ b/libcove/lib/common.py @@ -16,6 +16,7 @@ import jsonref import jsonschema.validators import requests +from referencing import Registry, Resource try: from functools import cached_property @@ -23,8 +24,8 @@ from cached_property import cached_property from flattentool import unflatten -from jsonschema import FormatChecker, RefResolver -from jsonschema._utils import extras_msg, find_additional_properties, uniq +from jsonschema import FormatChecker +from jsonschema._utils import ensure_list, extras_msg, find_additional_properties, uniq from jsonschema.exceptions import UndefinedTypeCheck, ValidationError from .exceptions import cove_spreadsheet_conversion_error @@ -813,14 +814,8 @@ def get_schema_validation_errors( if extra_checkers: format_checker.checkers.update(extra_checkers) - # Force jsonschema to use our validator. - # https://github.com/python-jsonschema/jsonschema/issues/994 - jsonschema.validators.validates("http://json-schema.org/draft-04/schema#")( - validator - ) - - if hasattr(schema_obj, "validator"): - our_validator = schema_obj.validator(validator, format_checker) + if hasattr(schema_obj, "registry"): + registry = schema_obj.registry else: if getattr(schema_obj, "extended", None): resolver = CustomRefResolver( @@ -839,9 +834,17 @@ def get_schema_validation_errors( schema_url=schema_obj.schema_host, ) - our_validator = validator( - pkg_schema_obj, format_checker=format_checker, resolver=resolver - ) + registry = Registry(retrieve=resolver.retrieve) + + # Force jsonschema to use our validator. + # https://github.com/python-jsonschema/jsonschema/issues/994 + jsonschema.validators.validates("http://json-schema.org/draft-04/schema#")( + validator + ) + + our_validator = validator( + pkg_schema_obj, format_checker=format_checker, registry=registry + ) for e in our_validator.iter_errors(json_data): message = e.message @@ -1165,7 +1168,7 @@ def get_fields_present(*args, **kwargs): } -class CustomRefResolver(RefResolver): +class CustomRefResolver: """This RefResolver is only for use with the jsonschema library""" def __init__(self, *args, **kw): @@ -1178,44 +1181,29 @@ def __init__(self, *args, **kw): # this is ignored when you supply a file self.schema_url = kw.pop("schema_url", "") self.config = kw.pop("config", "") - super().__init__(*args, **kw) - def resolve_remote(self, uri): + def retrieve(self, uri): schema_name = uri.split("/")[-1] if self.schema_file and self.file_schema_name == schema_name: uri = self.schema_file else: uri = urljoin(self.schema_url, schema_name) - document = self.store.get(uri) - - if document: - return document if uri.startswith("http"): # This branch of the if-statement in-lines `RefResolver.resolve_remote()`, but using `get_request()`. + # https://github.com/python-jsonschema/jsonschema/blob/dbc398245a583cb2366795dc529ae042d10c1577/jsonschema/validators.py#L1008-L1023 scheme = urlsplit(uri).scheme - - if scheme in self.handlers: - result = self.handlers[scheme](uri) - elif scheme in ["http", "https"]: - # Requests has support for detecting the correct encoding of - # json over http + if scheme in ("http", "https"): result = get_request(uri, config=self.config).json() else: - # Otherwise, pass off to urllib and assume utf-8 with urlopen(uri) as url: result = json.loads(url.read().decode("utf-8")) - - if self.cache_remote: - self.store[uri] = result - return result else: with open(uri) as schema_file: result = json.load(schema_file) add_is_codelist(result) - self.store[uri] = result - return result + return Resource.from_contents(result) def _get_schema_deprecated_paths( diff --git a/setup.py b/setup.py index 0ffbd03..b02546c 100644 --- a/setup.py +++ b/setup.py @@ -11,7 +11,8 @@ long_description="A data review library", install_requires=[ "jsonref", - "jsonschema>=4", + "jsonschema>=4.18", + "referencing", "requests", "cached-property;python_version<'3.8'", "flattentool>=0.11.0", diff --git a/tests/lib/test_common.py b/tests/lib/test_common.py index f9c51ac..2a45fd6 100644 --- a/tests/lib/test_common.py +++ b/tests/lib/test_common.py @@ -7,6 +7,7 @@ import jsonschema import pytest from freezegun import freeze_time +from referencing.exceptions import CannotDetermineSpecification from libcove.lib.common import ( SchemaJsonMixin, @@ -766,7 +767,7 @@ def get_pkg_schema_obj(self): assert "[Decimal('3.1')] is too short" in validation_error_json -def test_property_that_is_not_json_schema_doesnt_raise_exception(caplog, tmpdir): +def test_property_that_is_not_json_schema_does_raise_exception(tmpdir): tmpdir.join("test.json").write( json.dumps({"properties": {"bad_property": "not_a_json_schema"}}) ) @@ -778,12 +779,8 @@ class DummySchemaObj: def get_pkg_schema_obj(self): return {"$ref": "test.json"} - validation_errors = get_schema_validation_errors({}, DummySchemaObj(), "", {}, {}) - assert validation_errors == {} - assert ( - "A 'properties' object contains a 'bad_property' value that is not a JSON Schema: 'not_a_json_schema'" - in caplog.text - ) + with pytest.raises(CannotDetermineSpecification): + get_schema_validation_errors({}, DummySchemaObj(), "", {}, {}) @pytest.mark.parametrize(