From e610a3c6212a2044222591178dfd5dfba162c928 Mon Sep 17 00:00:00 2001 From: Juan Cruz Viotti Date: Sat, 31 Aug 2024 17:12:28 -0400 Subject: [PATCH] Centralize annotation containment at least a bit So we can more easily create other compiler rules without duplicating all of the currently messy logic. Signed-off-by: Juan Cruz Viotti --- src/jsonschema/compile_evaluate.cc | 220 ++++++++++++++++------------- 1 file changed, 123 insertions(+), 97 deletions(-) diff --git a/src/jsonschema/compile_evaluate.cc b/src/jsonschema/compile_evaluate.cc index d2c2c0794..8826414b4 100644 --- a/src/jsonschema/compile_evaluate.cc +++ b/src/jsonschema/compile_evaluate.cc @@ -39,6 +39,7 @@ class EvaluationContext { return {*(result.first), result.second}; } +private: auto annotations(const Pointer ¤t_instance_location, const Pointer &schema_location) const -> const Annotations & { @@ -60,6 +61,7 @@ class EvaluationContext { return schema_location_result->second; } + // TODO: This should be a private method auto annotations(const Pointer ¤t_instance_location) const -> const InstanceAnnotations & { static const InstanceAnnotations placeholder; @@ -74,6 +76,99 @@ class EvaluationContext { return instance_location_result->second; } +public: + auto defines_any_adjacent_annotation( + const Pointer &expected_instance_location, + const Pointer &base_evaluate_path, + const std::set &keywords) const -> bool { + for (const auto &keyword : keywords) { + // TODO: How can we avoid this expensive pointer manipulation? + auto expected_evaluate_path{base_evaluate_path}; + expected_evaluate_path.push_back({keyword}); + if (!this->annotations(expected_instance_location, expected_evaluate_path) + .empty()) { + return true; + } + } + + return false; + } + + auto defines_adjacent_annotation(const Pointer &expected_instance_location, + const Pointer &base_evaluate_path, + const std::set &keywords, + const JSON &value) const -> bool { + for (const auto &keyword : keywords) { + auto expected_evaluate_path{base_evaluate_path}; + expected_evaluate_path.push_back({keyword}); + if (this->annotations(expected_instance_location, expected_evaluate_path) + .contains(value)) { + return true; + } + } + + return false; + } + + auto defines_annotation(const Pointer &expected_instance_location, + const Pointer &base_evaluate_path, + const std::set &keywords, + const JSON &value) const -> bool { + if (keywords.empty()) { + return false; + } + + const auto instance_annotations{ + this->annotations(expected_instance_location)}; + for (const auto &[schema_location, schema_annotations] : + instance_annotations) { + assert(!schema_location.empty()); + const auto &keyword{schema_location.back()}; + if (keyword.is_property() && keywords.contains(keyword.to_property()) && + schema_annotations.contains(value) && + schema_location.initial().starts_with(base_evaluate_path)) { + bool blacklisted = false; + for (const auto &masked : this->annotation_blacklist) { + if (schema_location.starts_with(masked) && + !this->evaluate_path_.starts_with(masked)) { + blacklisted = true; + break; + } + } + + if (!blacklisted) { + return true; + } + } + } + + return false; + } + + auto largest_annotation_index(const Pointer &expected_instance_location, + const std::set &keywords, + const std::uint64_t default_value) const + -> std::uint64_t { + std::uint64_t result{default_value}; + for (const auto &[schema_location, schema_annotations] : + this->annotations(expected_instance_location)) { + assert(!schema_location.empty()); + const auto &keyword{schema_location.back()}; + if (!keyword.is_property() || !keywords.contains(keyword.to_property())) { + continue; + } + + for (const auto &annotation : schema_annotations) { + if (annotation.is_integer() && annotation.is_positive()) { + result = std::max( + result, static_cast(annotation.to_integer()) + 1); + } + } + } + + return result; + } + template auto push(const T &step, const Pointer &relative_evaluate_path, const Pointer &relative_instance_location) -> void { @@ -201,17 +296,6 @@ class EvaluationContext { this->annotation_blacklist.insert(this->evaluate_path_); } - auto masked(const Pointer &path) const -> bool { - for (const auto &masked : this->annotation_blacklist) { - if (path.starts_with(masked) && - !this->evaluate_path_.starts_with(masked)) { - return true; - } - } - - return false; - } - auto find_dynamic_anchor(const std::string &anchor) const -> std::optional { for (const auto &resource : this->resources()) { @@ -615,33 +699,15 @@ auto evaluate_step( instance); CALLBACK_PRE(assertion, context.instance_location()); const auto &value{context.resolve_value(assertion.value, instance)}; - const auto &target{ - context.resolve_target( - assertion.target, instance)}; - result = true; - if (!assertion.data.empty()) { - for (const auto &[schema_location, annotations] : target) { - assert(!schema_location.empty()); - const auto &keyword{schema_location.back()}; - if (keyword.is_property() && - assertion.data.contains(keyword.to_property()) && - annotations.contains(value) && - // Make sure its not a cousin annotation, which can - // never be seen - // TODO: Have a better function at Pointer to check - // for these "initial starts with" cases in a way - // that we don't have to copy pointers, which `.initial()` - // does. - schema_location.initial().starts_with( - context.evaluate_path().initial()) && - // We want to ignore certain annotations, like the ones - // inside "not" - !context.masked(schema_location)) { - result = false; - break; - } - } + if (assertion.target.first == SchemaCompilerTargetType::ParentAnnotations) { + result = !context.defines_annotation( + context.instance_location().initial(), + context.evaluate_path().initial(), assertion.data, value); + } else { + result = !context.defines_annotation(context.instance_location(), + context.evaluate_path().initial(), + assertion.data, value); } CALLBACK_POST("SchemaCompilerAssertionNoAnnotation", assertion); @@ -728,16 +794,11 @@ auto evaluate_step( EVALUATE_CONDITION_GUARD("SchemaCompilerLogicalWhenUnmarked", logical, instance); const auto &value{context.resolve_value(logical.value, instance)}; - - // TODO: How can we avoid this expensive pointer manipulation? - auto expected_evaluate_path{context.evaluate_path()}; - expected_evaluate_path.pop_back(); - expected_evaluate_path.push_back({value}); - const auto ¤t_annotations{context.annotations( - context.instance_location(), expected_evaluate_path)}; EVALUATE_IMPLICIT_PRECONDITION("SchemaCompilerLogicalWhenUnmarked", logical, - current_annotations.empty()); - + !context.defines_any_adjacent_annotation( + context.instance_location(), + context.evaluate_path().initial(), + {value})); CALLBACK_PRE(logical, context.instance_location()); result = true; for (const auto &child : logical.children) { @@ -755,16 +816,11 @@ auto evaluate_step( EVALUATE_CONDITION_GUARD("SchemaCompilerLogicalWhenMarked", logical, instance); const auto &value{context.resolve_value(logical.value, instance)}; - - // TODO: How can we avoid this expensive pointer manipulation? - auto expected_evaluate_path{context.evaluate_path()}; - expected_evaluate_path.pop_back(); - expected_evaluate_path.push_back({value}); - const auto ¤t_annotations{context.annotations( - context.instance_location(), expected_evaluate_path)}; EVALUATE_IMPLICIT_PRECONDITION("SchemaCompilerLogicalWhenMarked", logical, - !current_annotations.empty()); - + context.defines_any_adjacent_annotation( + context.instance_location(), + context.evaluate_path().initial(), + {value})); CALLBACK_PRE(logical, context.instance_location()); result = true; for (const auto &child : logical.children) { @@ -1077,30 +1133,16 @@ auto evaluate_step( const auto &value{context.resolve_value(loop.value, instance)}; assert(!value.empty()); - // TODO: Find a way to be more efficient with this - std::vector> - current_annotations; - for (const auto &keyword : value) { - assert(!context.evaluate_path().empty()); - // TODO: Can we avoid this expensive pointer manipulation? - auto expected_evaluate_path{context.evaluate_path()}; - expected_evaluate_path.pop_back(); - expected_evaluate_path.push_back({keyword}); - current_annotations.emplace_back(context.annotations( - context.instance_location(), expected_evaluate_path)); - } - for (const auto &entry : target.as_object()) { - bool apply_children{true}; - for (const auto &annotations : current_annotations) { - // TODO: Can we avoid this JSON conversion? - if (annotations.get().contains(JSON{entry.first})) { - apply_children = false; - break; - } - } - - if (!apply_children) { + // TODO: It might be more efficient to get all the annotations we + // potentially care about as a set first, and the make the loop + // check for O(1) containment in that set? + if (context.defines_adjacent_annotation( + context.instance_location(), + // TODO: Can we avoid doing this expensive operation on a loop? + context.evaluate_path().initial(), value, + // TODO: This conversion implies a string copy + JSON{entry.first})) { continue; } @@ -1203,25 +1245,9 @@ auto evaluate_step( auto iterator{array.cbegin()}; // Determine the proper start based on integer annotations collected for the - // current instance location by the keyword requested by the user. We will - // exhaustively check the matching annotations and end up with the largest - // index or zero - std::uint64_t start{0}; - for (const auto &[schema_location, annotations] : - context.annotations(context.instance_location())) { - assert(!schema_location.empty()); - const auto &keyword{schema_location.back()}; - if (!keyword.is_property() || keyword.to_property() != value) { - continue; - } - - for (const auto &annotation : annotations) { - if (annotation.is_integer() && annotation.is_positive()) { - start = std::max( - start, static_cast(annotation.to_integer()) + 1); - } - } - } + // current instance location by the keyword requested by the user. + const std::uint64_t start{context.largest_annotation_index( + context.instance_location(), {value}, 0)}; // We need this check, as advancing an iterator past its bounds // is considered undefined behavior