Revise EvaluationContext annotation interface (#1295)

Signed-off-by: Juan Cruz Viotti <[email protected]>
sourcemeta · Oct 13, 2024 · 325d8d5 · 325d8d5 · github-actions · Oct 13, 2024
1 parent 3a4fe47
commit 325d8d5
Show file tree

Hide file tree

Showing 5 changed files with 76 additions and 63 deletions.
diff --git a/src/evaluator/context.cc b/src/evaluator/context.cc
@@ -13,10 +13,10 @@ auto EvaluationContext::prepare(const JSON &instance) -> void {
   assert(this->resources_.empty());
   this->instances_.clear();
   this->instances_.emplace_back(instance);
-  this->annotation_blacklist.clear();
-  this->annotations_.clear();
   this->labels.clear();
   this->property_as_instance = false;
+  this->annotation_blacklist.clear();
+  this->annotations_.clear();
 }
 
 auto EvaluationContext::push_without_traverse(
@@ -93,6 +93,14 @@ auto EvaluationContext::pop(const bool dynamic) -> void {
   }
 }
 
+// TODO: At least currently, we only need to mask if a schema
+// makes use of `unevaluatedProperties` or `unevaluatedItems`
+// Detect if a schema does need this so if not, we avoid
+// an unnecessary copy
+auto EvaluationContext::mask() -> void {
+  this->annotation_blacklist.push_back(this->evaluate_path_);
+}
+
 auto EvaluationContext::annotate(const WeakPointer &current_instance_location,
                                  const JSON &value)
     -> std::pair<std::reference_wrapper<const JSON>, bool> {
@@ -102,16 +110,48 @@ auto EvaluationContext::annotate(const WeakPointer &current_instance_location,
   return {*(result.first), result.second};
 }
 
-auto EvaluationContext::defines_annotation(
-    const WeakPointer &expected_instance_location,
-    const WeakPointer &base_evaluate_path,
+auto EvaluationContext::defines_any_annotation(
+    const std::string &expected_keyword) const -> bool {
+  const auto instance_location_result{
+      this->annotations_.find(this->instance_location_)};
+  if (instance_location_result == this->annotations_.end()) {
+    return false;
+  }
+
+  for (const auto &[schema_location, schema_annotations] :
+       instance_location_result->second) {
+    assert(!schema_location.empty());
+    const auto &keyword{schema_location.back()};
+
+    if (keyword.is_property() && expected_keyword == keyword.to_property() &&
+        !schema_annotations.empty() &&
+        schema_location.initial().starts_with(this->evaluate_path_)) {
+      bool blacklisted = false;
+      for (const auto &masked : this->annotation_blacklist) {
+        if (schema_location.starts_with(masked) &&
+            !this->evaluate_path_.starts_with(masked)) {
+          blacklisted = true;
+          break;
+        }
+      }
+
+      if (!blacklisted) {
+        return true;
+      }
+    }
+  }
+
+  return false;
+}
+
+auto EvaluationContext::defines_sibling_annotation(
     const std::vector<std::string> &keywords, const JSON &value) const -> bool {
   if (keywords.empty()) {
     return false;
   }
 
   const auto instance_location_result{
-      this->annotations_.find(expected_instance_location)};
+      this->annotations_.find(this->instance_location_)};
   if (instance_location_result == this->annotations_.end()) {
     return false;
   }
@@ -125,7 +165,7 @@ auto EvaluationContext::defines_annotation(
         std::find(keywords.cbegin(), keywords.cend(), keyword.to_property()) !=
             keywords.cend() &&
         schema_annotations.contains(value) &&
-        schema_location.initial().starts_with(base_evaluate_path)) {
+        schema_location.initial().starts_with(this->evaluate_path_.initial())) {
       bool blacklisted = false;
       for (const auto &masked : this->annotation_blacklist) {
         if (schema_location.starts_with(masked) &&
@@ -145,15 +185,12 @@ auto EvaluationContext::defines_annotation(
 }
 
 auto EvaluationContext::largest_annotation_index(
-    const WeakPointer &expected_instance_location,
-    const std::vector<std::string> &keywords,
-    const std::uint64_t default_value) const -> std::uint64_t {
+    const std::string &expected_keyword) const -> std::uint64_t {
   // TODO: We should be taking masks into account
-
-  std::uint64_t result{default_value};
+  std::uint64_t result{0};
 
   const auto instance_location_result{
-      this->annotations_.find(expected_instance_location)};
+      this->annotations_.find(this->instance_location_)};
   if (instance_location_result == this->annotations_.end()) {
     return result;
   }
@@ -162,12 +199,7 @@ auto EvaluationContext::largest_annotation_index(
        instance_location_result->second) {
     assert(!schema_location.empty());
     const auto &keyword{schema_location.back()};
-    if (!keyword.is_property()) {
-      continue;
-    }
-
-    if (std::find(keywords.cbegin(), keywords.cend(), keyword.to_property()) ==
-        keywords.cend()) {
+    if (!keyword.is_property() || expected_keyword != keyword.to_property()) {
       continue;
     }
 
@@ -255,14 +287,6 @@ auto EvaluationContext::mark(const std::size_t id,
   this->labels.try_emplace(id, children);
 }
 
-// TODO: At least currently, we only need to mask if a schema
-// makes use of `unevaluatedProperties` or `unevaluatedItems`
-// Detect if a schema does need this so if not, we avoid
-// an unnecessary copy
-auto EvaluationContext::mask() -> void {
-  this->annotation_blacklist.push_back(this->evaluate_path_);
-}
-
 auto EvaluationContext::jump(const std::size_t id) const noexcept
     -> const SchemaCompilerTemplate & {
   assert(this->labels.contains(id));

diff --git a/src/evaluator/evaluator.cc b/src/evaluator/evaluator.cc
@@ -749,10 +749,8 @@ auto evaluate_step(
         // TODO: It might be more efficient to get all the annotations we
         // potentially care about as a set first, and the make the loop
         // check for O(1) containment in that set?
-        if (context.defines_annotation(
-                context.instance_location(),
-                // TODO: Can we avoid doing this expensive operation on a loop?
-                context.evaluate_path().initial(), loop.value,
+        if (context.defines_sibling_annotation(
+                loop.value,
                 // TODO: This conversion implies a string copy
                 JSON{entry.first})) {
           continue;
@@ -778,9 +776,7 @@ auto evaluate_step(
     case IS_STEP(SchemaCompilerAnnotationLoopItemsUnmarked): {
       EVALUATE_BEGIN(loop, SchemaCompilerAnnotationLoopItemsUnmarked,
                      target.is_array() &&
-                         !context.defines_annotation(
-                             context.instance_location(),
-                             context.evaluate_path(), loop.value, JSON{true}));
+                         !context.defines_any_annotation(loop.value));
       // Otherwise you shouldn't be using this step?
       assert(!loop.value.empty());
       const auto &array{target.as_array()};
@@ -808,18 +804,16 @@ auto evaluate_step(
     case IS_STEP(SchemaCompilerAnnotationLoopItemsUnevaluated): {
       // TODO: This precondition is very expensive due to pointer manipulation
       EVALUATE_BEGIN(loop, SchemaCompilerAnnotationLoopItemsUnevaluated,
-                     target.is_array() && !context.defines_annotation(
-                                              context.instance_location(),
-                                              context.evaluate_path().initial(),
+                     target.is_array() && !context.defines_sibling_annotation(
                                               loop.value.mask, JSON{true}));
       const auto &array{target.as_array()};
       result = true;
       auto iterator{array.cbegin()};
 
       // Determine the proper start based on integer annotations collected for
       // the current instance location by the keyword requested by the user.
-      const std::uint64_t start{context.largest_annotation_index(
-          context.instance_location(), {loop.value.index}, 0)};
+      const std::uint64_t start{
+          context.largest_annotation_index(loop.value.index)};
 
       // We need this check, as advancing an iterator past its bounds
       // is considered undefined behavior
@@ -831,11 +825,9 @@ auto evaluate_step(
       for (; iterator != array.cend(); ++iterator) {
         const auto index{std::distance(array.cbegin(), iterator)};
 
-        if (context.defines_annotation(
-                context.instance_location(),
-                // TODO: Can we avoid doing this expensive operation on a loop?
-                context.evaluate_path().initial(), loop.value.filter,
-                JSON{static_cast<std::size_t>(index)})) {
+        // TODO: Can we avoid doing this expensive operation on a loop?
+        if (context.defines_sibling_annotation(
+                loop.value.filter, JSON{static_cast<std::size_t>(index)})) {
           continue;
         }
 

diff --git a/src/evaluator/include/sourcemeta/jsontoolkit/evaluator_context.h b/src/evaluator/include/sourcemeta/jsontoolkit/evaluator_context.h
@@ -77,11 +77,6 @@ class SOURCEMETA_JSONTOOLKIT_EVALUATOR_EXPORT EvaluationContext {
   auto resources() const noexcept -> const std::vector<std::string> &;
   auto mark(const std::size_t id, const SchemaCompilerTemplate &children)
       -> void;
-  // TODO: At least currently, we only need to mask if a schema
-  // makes use of `unevaluatedProperties` or `unevaluatedItems`
-  // Detect if a schema does need this so if not, we avoid
-  // an unnecessary copy
-  auto mask() -> void;
   auto jump(const std::size_t id) const noexcept
       -> const SchemaCompilerTemplate &;
   auto find_dynamic_anchor(const std::string &anchor) const
@@ -91,15 +86,17 @@ class SOURCEMETA_JSONTOOLKIT_EVALUATOR_EXPORT EvaluationContext {
   // Annotations
   ///////////////////////////////////////////////
 
+  // TODO: At least currently, we only need to mask if a schema
+  // makes use of `unevaluatedProperties` or `unevaluatedItems`
+  // Detect if a schema does need this so if not, we avoid
+  // an unnecessary copy
+  auto mask() -> void;
   auto annotate(const WeakPointer &current_instance_location, const JSON &value)
       -> std::pair<std::reference_wrapper<const JSON>, bool>;
-  auto defines_annotation(const WeakPointer &expected_instance_location,
-                          const WeakPointer &base_evaluate_path,
-                          const std::vector<std::string> &keywords,
-                          const JSON &value) const -> bool;
-  auto largest_annotation_index(const WeakPointer &expected_instance_location,
-                                const std::vector<std::string> &keywords,
-                                const std::uint64_t default_value) const
+  auto defines_any_annotation(const std::string &keyword) const -> bool;
+  auto defines_sibling_annotation(const std::vector<std::string> &keywords,
+                                  const JSON &value) const -> bool;
+  auto largest_annotation_index(const std::string &keyword) const
       -> std::uint64_t;
 
 public:
@@ -119,14 +116,15 @@ class SOURCEMETA_JSONTOOLKIT_EVALUATOR_EXPORT EvaluationContext {
   std::vector<std::pair<std::size_t, std::size_t>> frame_sizes;
   // TODO: Keep hashes of schema resources URI instead for performance reasons
   std::vector<std::string> resources_;
-  std::vector<WeakPointer> annotation_blacklist;
-  // We don't use a pair for holding the two pointers for runtime
-  // efficiency when resolving keywords like `unevaluatedProperties`
-  std::map<WeakPointer, std::map<WeakPointer, std::set<JSON>>> annotations_;
+  // TODO: Try unordered_map
   std::map<std::size_t,
            const std::reference_wrapper<const SchemaCompilerTemplate>>
       labels;
   bool property_as_instance{false};
+
+  // For annotations
+  std::vector<WeakPointer> annotation_blacklist;
+  std::map<WeakPointer, std::map<WeakPointer, std::set<JSON>>> annotations_;
 #if defined(_MSC_VER)
 #pragma warning(default : 4251 4275)
 #endif

diff --git a/src/evaluator/include/sourcemeta/jsontoolkit/evaluator_template.h b/src/evaluator/include/sourcemeta/jsontoolkit/evaluator_template.h
@@ -398,8 +398,7 @@ DEFINE_STEP_APPLICATOR(Annotation, LoopPropertiesUnevaluated,
 /// @ingroup evaluator_instructions
 /// @brief Represents a compiler step that loops over array items when the array
 /// is considered unmarked
-DEFINE_STEP_APPLICATOR(Annotation, LoopItemsUnmarked,
-                       SchemaCompilerValueStrings)
+DEFINE_STEP_APPLICATOR(Annotation, LoopItemsUnmarked, SchemaCompilerValueString)
 
 /// @ingroup evaluator_instructions
 /// @brief Represents a compiler step that loops over unevaluated array items

diff --git a/src/jsonschema/default_compiler_2019_09.h b/src/jsonschema/default_compiler_2019_09.h
@@ -254,7 +254,7 @@ auto compiler_2019_09_applicator_unevaluateditems(
   } else {
     return {make<SchemaCompilerAnnotationLoopItemsUnmarked>(
         true, context, schema_context, dynamic_context,
-        SchemaCompilerValueStrings{"unevaluatedItems"}, std::move(children))};
+        SchemaCompilerValueString{"unevaluatedItems"}, std::move(children))};
   }
 }
Benchmark suite	Current: `325d8d5`	Previous: `3a4fe47`	Ratio
`JSON_Array_Of_Objects_Unique`	`3601.20531728524` ns/iter	`3549.393265882946` ns/iter	`1.01`
`JSONSchema_Validate_Draft4_Meta_1_No_Callback`	`759.0822256651695` ns/iter	`734.1725795083421` ns/iter	`1.03`
`JSONSchema_Validate_Draft4_Required_Properties`	`937.3719000425016` ns/iter	`945.2266017646925` ns/iter	`0.99`
`JSONSchema_Validate_Draft4_Many_Optional_Properties_Minimal_Match`	`155.73805638945015` ns/iter	`153.6907047478036` ns/iter	`1.01`
`JSONSchema_Validate_Draft4_Few_Optional_Properties_Minimal_Match`	`105.68416360676466` ns/iter	`103.0077705579607` ns/iter	`1.03`
`JSONSchema_Validate_Draft4_Items_Schema`	`2637.8901797582394` ns/iter	`2659.5580418166933` ns/iter	`0.99`
`JSONSchema_Validate_Draft4_Nested_Object`	`22.856555156030144` ns/iter	`22.783026745988277` ns/iter	`1.00`
`JSONSchema_Validate_Draft4_Properties_Triad_Optional`	`1251.3728065119212` ns/iter	`1265.7504878534703` ns/iter	`0.99`
`JSONSchema_Validate_Draft4_Properties_Triad_Closed`	`947.204010681582` ns/iter	`946.1034076987258` ns/iter	`1.00`
`JSONSchema_Validate_Draft4_Properties_Triad_Required`	`1290.1556189434955` ns/iter	`1285.5867607536527` ns/iter	`1.00`
`JSONSchema_Validate_Draft4_Non_Recursive_Ref`	`212.21779049455017` ns/iter	`217.4742164170237` ns/iter	`0.98`
`JSONSchema_Validate_Draft4_Pattern_Properties_True`	`1336.24399196807` ns/iter	`1375.888668120108` ns/iter	`0.97`
`JSONSchema_Validate_Draft4_Ref_To_Single_Property`	`105.36712688851483` ns/iter	`106.05604820684901` ns/iter	`0.99`
`JSONSchema_Validate_Draft4_Additional_Properties_Type`	`404.58275075378754` ns/iter	`405.4761293396963` ns/iter	`1.00`
`JSONSchema_Validate_Draft4_Nested_Oneof`	`367.01093976465825` ns/iter	`362.43916855723046` ns/iter	`1.01`
`JSONSchema_Validate_Draft6_Property_Names`	`772.3649948488365` ns/iter	`774.568959249853` ns/iter	`1.00`
`JSONSchema_Validate_Draft7_If_Then_Else`	`170.1775544856989` ns/iter	`169.54774506262194` ns/iter	`1.00`
`JSONSchema_Compiler_Draft6_AdaptiveCard`	`2877265915.9999423` ns/iter	`2858944000.000065` ns/iter	`1.01`
Benchmark suite	Current: `325d8d5`	Previous: `3a4fe47`	Ratio
`JSON_Array_Of_Objects_Unique`	`2161.4760046110646` ns/iter	`2137.640880041647` ns/iter	`1.01`
`JSONSchema_Validate_Draft4_Meta_1_No_Callback`	`1046.9972514545368` ns/iter	`1011.2791365068838` ns/iter	`1.04`
`JSONSchema_Validate_Draft4_Required_Properties`	`1587.7048612219821` ns/iter	`1587.6124219236085` ns/iter	`1.00`
`JSONSchema_Validate_Draft4_Many_Optional_Properties_Minimal_Match`	`185.68398964542652` ns/iter	`188.5442385411892` ns/iter	`0.98`
`JSONSchema_Validate_Draft4_Few_Optional_Properties_Minimal_Match`	`132.86315295071216` ns/iter	`132.334791888428` ns/iter	`1.00`
`JSONSchema_Validate_Draft4_Items_Schema`	`3675.679766452237` ns/iter	`3619.1100703314773` ns/iter	`1.02`
`JSONSchema_Validate_Draft4_Nested_Object`	`33.459430347148256` ns/iter	`33.12390363981007` ns/iter	`1.01`
`JSONSchema_Validate_Draft4_Properties_Triad_Optional`	`1826.0754071809838` ns/iter	`1821.922172815473` ns/iter	`1.00`
`JSONSchema_Validate_Draft4_Properties_Triad_Closed`	`1501.9867296727937` ns/iter	`1497.8387577872168` ns/iter	`1.00`
`JSONSchema_Validate_Draft4_Properties_Triad_Required`	`1894.5364671280304` ns/iter	`1905.5558402039896` ns/iter	`0.99`
`JSONSchema_Validate_Draft4_Non_Recursive_Ref`	`484.593667820038` ns/iter	`492.04189321307945` ns/iter	`0.98`
`JSONSchema_Validate_Draft4_Pattern_Properties_True`	`2544.0580979472275` ns/iter	`2441.788125416647` ns/iter	`1.04`
`JSONSchema_Validate_Draft4_Ref_To_Single_Property`	`137.0662373200657` ns/iter	`139.95139848421047` ns/iter	`0.98`
`JSONSchema_Validate_Draft4_Additional_Properties_Type`	`604.8456599569215` ns/iter	`622.0221165113974` ns/iter	`0.97`
`JSONSchema_Validate_Draft4_Nested_Oneof`	`530.0078453367903` ns/iter	`482.70570428259407` ns/iter	`1.10`
`JSONSchema_Validate_Draft6_Property_Names`	`1290.9646134583027` ns/iter	`1231.2305365190764` ns/iter	`1.05`
`JSONSchema_Validate_Draft7_If_Then_Else`	`215.67793123181485` ns/iter	`211.09648734766827` ns/iter	`1.02`
`JSONSchema_Compiler_Draft6_AdaptiveCard`	`5486346679.000008` ns/iter	`5673982712.999987` ns/iter	`0.97`
Benchmark suite	Current: `325d8d5`	Previous: `3a4fe47`	Ratio
`JSONSchema_Compiler_Draft6_AdaptiveCard`	`6096565805.000069` ns/iter	`6252062793.000051` ns/iter	`0.98`
`JSONSchema_Validate_Draft4_Meta_1_No_Callback`	`1068.7569706779334` ns/iter	`1078.9147022041338` ns/iter	`0.99`
`JSONSchema_Validate_Draft4_Required_Properties`	`2228.087821262491` ns/iter	`2291.8973900288606` ns/iter	`0.97`
`JSONSchema_Validate_Draft4_Many_Optional_Properties_Minimal_Match`	`187.8614577336374` ns/iter	`195.82817690354526` ns/iter	`0.96`
`JSONSchema_Validate_Draft4_Few_Optional_Properties_Minimal_Match`	`131.84615226402724` ns/iter	`137.2107533278727` ns/iter	`0.96`
`JSONSchema_Validate_Draft4_Items_Schema`	`3169.8085298163483` ns/iter	`3078.5310473437903` ns/iter	`1.03`
`JSONSchema_Validate_Draft4_Nested_Object`	`22.395024477255976` ns/iter	`22.686073076710056` ns/iter	`0.99`
`JSONSchema_Validate_Draft4_Properties_Triad_Optional`	`1714.0403844673463` ns/iter	`1726.2935193364506` ns/iter	`0.99`
`JSONSchema_Validate_Draft4_Properties_Triad_Closed`	`1423.2336896693096` ns/iter	`1393.4087155077277` ns/iter	`1.02`
`JSONSchema_Validate_Draft4_Properties_Triad_Required`	`1821.2695044419013` ns/iter	`1779.8570804707992` ns/iter	`1.02`
`JSONSchema_Validate_Draft4_Non_Recursive_Ref`	`454.7486075398781` ns/iter	`472.326041552563` ns/iter	`0.96`
`JSONSchema_Validate_Draft4_Pattern_Properties_True`	`2259.6506273785826` ns/iter	`2325.710832059663` ns/iter	`0.97`
`JSONSchema_Validate_Draft4_Ref_To_Single_Property`	`144.73176980318598` ns/iter	`140.86203494581542` ns/iter	`1.03`
`JSONSchema_Validate_Draft4_Additional_Properties_Type`	`1069.8795104903218` ns/iter	`1115.7069373575357` ns/iter	`0.96`
`JSONSchema_Validate_Draft4_Nested_Oneof`	`426.12291734646726` ns/iter	`433.2267651895623` ns/iter	`0.98`
`JSONSchema_Validate_Draft6_Property_Names`	`1583.9339249665736` ns/iter	`1620.868124300633` ns/iter	`0.98`
`JSONSchema_Validate_Draft7_If_Then_Else`	`209.88699317788087` ns/iter	`196.12464334596655` ns/iter	`1.07`
`JSON_Array_Of_Objects_Unique`	`3224.1391836145463` ns/iter	`3267.846664787346` ns/iter	`0.99`
Benchmark suite	Current: `325d8d5`	Previous: `3a4fe47`	Ratio
`JSON_Array_Of_Objects_Unique`	`5109.583000000839` ns/iter	`5140.114999999241` ns/iter	`0.99`
`JSONSchema_Validate_Draft4_Meta_1_No_Callback`	`2357.2960714285987` ns/iter	`2336.7004054683894` ns/iter	`1.01`
`JSONSchema_Validate_Draft4_Required_Properties`	`2032.681398082235` ns/iter	`2057.8084375003414` ns/iter	`0.99`
`JSONSchema_Validate_Draft4_Many_Optional_Properties_Minimal_Match`	`562.9867000000104` ns/iter	`553.3603571428055` ns/iter	`1.02`
`JSONSchema_Validate_Draft4_Few_Optional_Properties_Minimal_Match`	`409.98590334601863` ns/iter	`409.5960308216625` ns/iter	`1.00`
`JSONSchema_Validate_Draft4_Items_Schema`	`6249.211607141093` ns/iter	`6445.889285714656` ns/iter	`0.97`
`JSONSchema_Validate_Draft4_Nested_Object`	`156.89470982143763` ns/iter	`160.14011160712423` ns/iter	`0.98`
`JSONSchema_Validate_Draft4_Properties_Triad_Optional`	`5502.149107142275` ns/iter	`5426.927999999406` ns/iter	`1.01`
`JSONSchema_Validate_Draft4_Properties_Triad_Closed`	`4505.31967918685` ns/iter	`4443.189865551247` ns/iter	`1.01`
`JSONSchema_Validate_Draft4_Properties_Triad_Required`	`5615.056999999979` ns/iter	`5493.878571428469` ns/iter	`1.02`
`JSONSchema_Validate_Draft4_Non_Recursive_Ref`	`555.5636999999933` ns/iter	`546.8381250001439` ns/iter	`1.02`
`JSONSchema_Validate_Draft4_Pattern_Properties_True`	`8455.160914460374` ns/iter	`8123.293526787835` ns/iter	`1.04`
`JSONSchema_Validate_Draft4_Ref_To_Single_Property`	`410.99135216376925` ns/iter	`411.0367673643791` ns/iter	`1.00`
`JSONSchema_Validate_Draft4_Additional_Properties_Type`	`789.9059151785793` ns/iter	`766.9361607141363` ns/iter	`1.03`
`JSONSchema_Validate_Draft4_Nested_Oneof`	`1097.370468750114` ns/iter	`1102.560624999782` ns/iter	`1.00`
`JSONSchema_Validate_Draft6_Property_Names`	`1858.4620164839218` ns/iter	`1903.14893138271` ns/iter	`0.98`
`JSONSchema_Validate_Draft7_If_Then_Else`	`595.73649999993` ns/iter	`560.327857142795` ns/iter	`1.06`
`JSONSchema_Compiler_Draft6_AdaptiveCard`	`9984234000.000015` ns/iter	`10156487200.000128` ns/iter	`0.98`