-
Notifications
You must be signed in to change notification settings - Fork 1
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Schema Update #262
Schema Update #262
Changes from all commits
4e7ae47
97ccd1c
a591c5d
4886b50
1601ad2
b0957bc
c62aff3
1b46fc4
321ee01
4427d8f
3233556
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Large diffs are not rendered by default.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,6 @@ | ||
[tool.poetry] | ||
name = "dcicsnovault" | ||
version = "9.1.1" | ||
version = "10.0.0" | ||
description = "Storage support for 4DN Data Portals." | ||
authors = ["4DN-DCIC Team <[email protected]>"] | ||
license = "MIT" | ||
|
@@ -46,7 +46,6 @@ dcicutils = "^7.5.0" | |
future = ">=0.15.2,<1" | ||
html5lib = ">=1.1" # experimental, should be OK now that we're not using moto server | ||
humanfriendly = "^1.44.9" | ||
jsonschema_serialize_fork = "^2.1.1" | ||
netaddr = ">=0.8.0,<1" | ||
passlib = "^1.7.4" | ||
pillow = "^9.5.0" | ||
|
@@ -83,6 +82,7 @@ xlrd = "^1.0.0" | |
"zope.deprecation" = "^4.4.0" | ||
"zope.interface" = ">=4.7.2,<6" | ||
"zope.sqlalchemy" = "1.6" | ||
jsonschema = "^4.18.4" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yay. i hope this works and the others (cgap-portal, encoded-core, smaht-portal, but apparently not fourfront?) can drop dependency on the fork, too. |
||
|
||
[tool.poetry.dev-dependencies] | ||
botocore-stubs = ">=1.29.119" # no particular version required, but this speeds up search | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -44,6 +44,7 @@ | |
get_uuids_for_types, | ||
SCAN_PAGE_SIZE, | ||
) | ||
from ..schema_utils import load_schema | ||
from .interfaces import ELASTIC_SEARCH, INDEXER_QUEUE | ||
from ..settings import Settings | ||
|
||
|
@@ -104,6 +105,8 @@ def schema_mapping(field, schema, top_level=False, from_array=False): | |
TODO: rename 'lower_case_sort' to 'lowercase' and adjust search code | ||
""" | ||
ignored(top_level) # TODO: maybe wants to be used below, but isn't yet? | ||
if '$merge' in schema: | ||
schema = load_schema(schema) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is there a risk of circularity here? Is something checking that already? |
||
type_ = schema['type'] | ||
|
||
# Elasticsearch handles multiple values for a field | ||
|
@@ -715,6 +718,8 @@ def type_mapping(types, item_type, embed=True): | |
# to relevant fields so that they are not mapped into full_text, for example. | ||
properties = schema['properties'] | ||
for _, sub_mapping in properties.items(): | ||
if '$merge' in sub_mapping: | ||
sub_mapping = load_schema(sub_mapping) | ||
if sub_mapping['type'] == 'text': | ||
sub_mapping['copy_to'] = ['full_text'] | ||
return mapping | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6,12 +6,10 @@ | |
|
||
from datetime import datetime | ||
from dcicutils.misc_utils import ignored | ||
from jsonschema_serialize_fork import ( | ||
Draft4Validator, | ||
FormatChecker, | ||
RefResolver, | ||
) | ||
from jsonschema_serialize_fork.exceptions import ValidationError | ||
from snovault.schema_validation import SerializingSchemaValidator | ||
from jsonschema import FormatChecker | ||
from jsonschema import RefResolver | ||
from jsonschema.exceptions import ValidationError | ||
import os | ||
from pyramid.path import AssetResolver, caller_package | ||
from pyramid.threadlocal import get_current_request | ||
|
@@ -42,7 +40,13 @@ def server_default(func): | |
|
||
class NoRemoteResolver(RefResolver): | ||
def resolve_remote(self, uri): | ||
raise ValueError('Resolution disallowed for: %s' % uri) | ||
""" Resolves remote uri for files so we can cross reference across our own | ||
repos, which now contain base schemas we may want to use | ||
""" | ||
if any(s in uri for s in ['http', 'https', 'ftp', 'sftp']): | ||
raise ValueError(f'Resolution disallowed for: {uri}') | ||
else: | ||
return load_schema(uri) | ||
|
||
|
||
def favor_app_specific_schema(schema: str) -> str: | ||
|
@@ -82,13 +86,13 @@ def favor_app_specific_schema_ref(schema_ref: str) -> str: | |
def json_file_contains_element(json_filename: str, json_element: str) -> bool: | ||
""" | ||
If the given JSON file exists and contains the given JSON element name then | ||
returns True, otherwise returnes False. The given JSON element may or may | ||
returns True; otherwise returns False. The given JSON element may or may | ||
not begin with a slash. Currently only looks at one single top-level element. | ||
""" | ||
if json_filename and json_element: | ||
try: | ||
with io.open(json_filename, "r") as json_f: | ||
json_content = json.load(json_f) | ||
json_content = json.load(json_f) | ||
json_element = json_element.strip("/") | ||
if json_element: | ||
if json_content.get(json_element): | ||
|
@@ -111,6 +115,64 @@ def json_file_contains_element(json_filename: str, json_element: str) -> bool: | |
return schema_ref | ||
|
||
|
||
def resolve_merge_ref(ref, resolver): | ||
with resolver.resolving(ref) as resolved: | ||
if not isinstance(resolved, dict): | ||
raise ValueError( | ||
f'Schema ref {ref} must resolve dict, not {type(resolved)}' | ||
) | ||
return resolved | ||
|
||
|
||
def _update_resolved_data(resolved_data, value, resolver): | ||
# Assumes resolved value is dictionary. | ||
resolved_data.update( | ||
# Recurse here in case the resolved value has refs. | ||
resolve_merge_refs( | ||
# Actually get the ref value. | ||
resolve_merge_ref(value, resolver), | ||
resolver | ||
) | ||
) | ||
|
||
|
||
def _handle_list_or_string_value(resolved_data, value, resolver): | ||
if isinstance(value, list): | ||
for v in value: | ||
_update_resolved_data(resolved_data, v, resolver) | ||
else: | ||
_update_resolved_data(resolved_data, value, resolver) | ||
|
||
|
||
def resolve_merge_refs(data, resolver): | ||
if isinstance(data, dict): | ||
# Return copy. | ||
resolved_data = {} | ||
for k, v in data.items(): | ||
if k == '$merge': | ||
_handle_list_or_string_value(resolved_data, v, resolver) | ||
else: | ||
resolved_data[k] = resolve_merge_refs(v, resolver) | ||
elif isinstance(data, list): | ||
# Return copy. | ||
resolved_data = [ | ||
resolve_merge_refs(v, resolver) | ||
for v in data | ||
] | ||
else: | ||
# Assumes we're only dealing with other JSON types | ||
# like string, number, boolean, null, not other | ||
# types like tuples, sets, functions, classes, etc., | ||
# which would require a deep copy. | ||
resolved_data = data | ||
Comment on lines
+163
to
+167
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Regarding the comment, is any kind of warning warranted? Should there be a debugging mode? Is it ever desirable for this to happen? In |
||
return resolved_data | ||
|
||
|
||
def fill_in_schema_merge_refs(schema, resolver): | ||
""" Resolves $merge properties, custom $ref implementation from IGVF SNO2-6 """ | ||
return resolve_merge_refs(schema, resolver) | ||
|
||
|
||
def mixinSchemas(schema, resolver, key_name='properties'): | ||
mixinKeyName = 'mixin' + key_name.capitalize() | ||
mixins = schema.get(mixinKeyName) | ||
|
@@ -210,10 +272,6 @@ def linkTo(validator, linkTo, instance, schema): | |
yield ValidationError(error) | ||
return | ||
|
||
# And normalize the value to a uuid | ||
if validator._serialize: | ||
validator._validated[-1] = str(item.uuid) | ||
|
||
|
||
class IgnoreUnchanged(ValidationError): | ||
pass | ||
|
@@ -287,8 +345,8 @@ def schema_is_array_of_objects(schema): | |
yield ValidationError('submission of calculatedProperty disallowed') | ||
|
||
|
||
class SchemaValidator(Draft4Validator): | ||
VALIDATORS = Draft4Validator.VALIDATORS.copy() | ||
class SchemaValidator(SerializingSchemaValidator): | ||
VALIDATORS = SerializingSchemaValidator.VALIDATORS.copy() | ||
VALIDATORS['calculatedProperty'] = calculatedProperty | ||
VALIDATORS['linkTo'] = linkTo | ||
VALIDATORS['permission'] = permission | ||
|
@@ -320,9 +378,10 @@ def load_schema(filename): | |
), | ||
resolver, 'columns' | ||
) | ||
schema = fill_in_schema_merge_refs(schema, resolver) | ||
|
||
# SchemaValidator is not thread safe for now | ||
SchemaValidator(schema, resolver=resolver, serialize=True) | ||
SchemaValidator(schema, resolver=resolver) | ||
return schema | ||
|
||
|
||
|
@@ -344,7 +403,7 @@ def validate(schema, data, current=None, validate_current=False): | |
dict validated contents, list of errors | ||
""" | ||
resolver = NoRemoteResolver.from_schema(schema) | ||
sv = SchemaValidator(schema, resolver=resolver, serialize=True, format_checker=format_checker) | ||
sv = SchemaValidator(schema, resolver=resolver, format_checker=format_checker) | ||
validated, errors = sv.serialize(data) | ||
# validate against current contents if validate_current is set | ||
if current and validate_current: | ||
|
@@ -381,6 +440,18 @@ def validate(schema, data, current=None, validate_current=False): | |
# Right now those other arms set seemingly-unused variables. -kmp 7-Aug-2022 | ||
if validated_value == current_value: | ||
continue # value is unchanged between data/current; ignore | ||
# Also ignore requestMethod and permission errors from defaults. | ||
if isinstance(error, IgnoreUnchanged): | ||
current_value = data | ||
try: | ||
for key in error.path: | ||
# If it's in original data then either user passed it in | ||
# or it's from PATCH object with unchanged data. If it's | ||
# unchanged then it's already been skipped above. | ||
current_value = current_value[key] | ||
except KeyError: | ||
# If it's not in original data then it's filled in by defaults. | ||
continue | ||
filtered_errors.append(error) | ||
|
||
return validated, filtered_errors | ||
|
@@ -452,7 +523,6 @@ def combine_schemas(a, b): | |
|
||
# for integrated tests | ||
def utc_now_str(): | ||
# from jsonschema_serialize_fork date-time format requires a timezone | ||
return datetime.utcnow().isoformat() + '+00:00' | ||
|
||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Because this change is something obscure and technical, it would be really helpful to people trying to figure out how to make downstream changes for you to summarize things you had to change. Things like the "dependencies" to "depdendenciesRequired" change, the change from "POST" to ["POST"], etc. Not sure which things need documenting and which don't, but someone trying to debug another body of code in another repo might find such a summary helpful. In general, one visits a changelog for two reasons: one is just for fun, to see what's new, and the other is because something broke and you're looking for clues.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Oh, documentation of breaking changes. Good. :)