Skip to content

Commit

Permalink
Revise EvaluationContext annotation interface (#1295)
Browse files Browse the repository at this point in the history
Signed-off-by: Juan Cruz Viotti <[email protected]>
  • Loading branch information
jviotti authored Oct 13, 2024
1 parent 3a4fe47 commit 325d8d5
Show file tree
Hide file tree
Showing 5 changed files with 76 additions and 63 deletions.
78 changes: 51 additions & 27 deletions src/evaluator/context.cc
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,10 @@ auto EvaluationContext::prepare(const JSON &instance) -> void {
assert(this->resources_.empty());
this->instances_.clear();
this->instances_.emplace_back(instance);
this->annotation_blacklist.clear();
this->annotations_.clear();
this->labels.clear();
this->property_as_instance = false;
this->annotation_blacklist.clear();
this->annotations_.clear();
}

auto EvaluationContext::push_without_traverse(
Expand Down Expand Up @@ -93,6 +93,14 @@ auto EvaluationContext::pop(const bool dynamic) -> void {
}
}

// TODO: At least currently, we only need to mask if a schema
// makes use of `unevaluatedProperties` or `unevaluatedItems`
// Detect if a schema does need this so if not, we avoid
// an unnecessary copy
auto EvaluationContext::mask() -> void {
this->annotation_blacklist.push_back(this->evaluate_path_);
}

auto EvaluationContext::annotate(const WeakPointer &current_instance_location,
const JSON &value)
-> std::pair<std::reference_wrapper<const JSON>, bool> {
Expand All @@ -102,16 +110,48 @@ auto EvaluationContext::annotate(const WeakPointer &current_instance_location,
return {*(result.first), result.second};
}

auto EvaluationContext::defines_annotation(
const WeakPointer &expected_instance_location,
const WeakPointer &base_evaluate_path,
auto EvaluationContext::defines_any_annotation(
const std::string &expected_keyword) const -> bool {
const auto instance_location_result{
this->annotations_.find(this->instance_location_)};
if (instance_location_result == this->annotations_.end()) {
return false;
}

for (const auto &[schema_location, schema_annotations] :
instance_location_result->second) {
assert(!schema_location.empty());
const auto &keyword{schema_location.back()};

if (keyword.is_property() && expected_keyword == keyword.to_property() &&
!schema_annotations.empty() &&
schema_location.initial().starts_with(this->evaluate_path_)) {
bool blacklisted = false;
for (const auto &masked : this->annotation_blacklist) {
if (schema_location.starts_with(masked) &&
!this->evaluate_path_.starts_with(masked)) {
blacklisted = true;
break;
}
}

if (!blacklisted) {
return true;
}
}
}

return false;
}

auto EvaluationContext::defines_sibling_annotation(
const std::vector<std::string> &keywords, const JSON &value) const -> bool {
if (keywords.empty()) {
return false;
}

const auto instance_location_result{
this->annotations_.find(expected_instance_location)};
this->annotations_.find(this->instance_location_)};
if (instance_location_result == this->annotations_.end()) {
return false;
}
Expand All @@ -125,7 +165,7 @@ auto EvaluationContext::defines_annotation(
std::find(keywords.cbegin(), keywords.cend(), keyword.to_property()) !=
keywords.cend() &&
schema_annotations.contains(value) &&
schema_location.initial().starts_with(base_evaluate_path)) {
schema_location.initial().starts_with(this->evaluate_path_.initial())) {
bool blacklisted = false;
for (const auto &masked : this->annotation_blacklist) {
if (schema_location.starts_with(masked) &&
Expand All @@ -145,15 +185,12 @@ auto EvaluationContext::defines_annotation(
}

auto EvaluationContext::largest_annotation_index(
const WeakPointer &expected_instance_location,
const std::vector<std::string> &keywords,
const std::uint64_t default_value) const -> std::uint64_t {
const std::string &expected_keyword) const -> std::uint64_t {
// TODO: We should be taking masks into account

std::uint64_t result{default_value};
std::uint64_t result{0};

const auto instance_location_result{
this->annotations_.find(expected_instance_location)};
this->annotations_.find(this->instance_location_)};
if (instance_location_result == this->annotations_.end()) {
return result;
}
Expand All @@ -162,12 +199,7 @@ auto EvaluationContext::largest_annotation_index(
instance_location_result->second) {
assert(!schema_location.empty());
const auto &keyword{schema_location.back()};
if (!keyword.is_property()) {
continue;
}

if (std::find(keywords.cbegin(), keywords.cend(), keyword.to_property()) ==
keywords.cend()) {
if (!keyword.is_property() || expected_keyword != keyword.to_property()) {
continue;
}

Expand Down Expand Up @@ -255,14 +287,6 @@ auto EvaluationContext::mark(const std::size_t id,
this->labels.try_emplace(id, children);
}

// TODO: At least currently, we only need to mask if a schema
// makes use of `unevaluatedProperties` or `unevaluatedItems`
// Detect if a schema does need this so if not, we avoid
// an unnecessary copy
auto EvaluationContext::mask() -> void {
this->annotation_blacklist.push_back(this->evaluate_path_);
}

auto EvaluationContext::jump(const std::size_t id) const noexcept
-> const SchemaCompilerTemplate & {
assert(this->labels.contains(id));
Expand Down
26 changes: 9 additions & 17 deletions src/evaluator/evaluator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -749,10 +749,8 @@ auto evaluate_step(
// TODO: It might be more efficient to get all the annotations we
// potentially care about as a set first, and the make the loop
// check for O(1) containment in that set?
if (context.defines_annotation(
context.instance_location(),
// TODO: Can we avoid doing this expensive operation on a loop?
context.evaluate_path().initial(), loop.value,
if (context.defines_sibling_annotation(
loop.value,
// TODO: This conversion implies a string copy
JSON{entry.first})) {
continue;
Expand All @@ -778,9 +776,7 @@ auto evaluate_step(
case IS_STEP(SchemaCompilerAnnotationLoopItemsUnmarked): {
EVALUATE_BEGIN(loop, SchemaCompilerAnnotationLoopItemsUnmarked,
target.is_array() &&
!context.defines_annotation(
context.instance_location(),
context.evaluate_path(), loop.value, JSON{true}));
!context.defines_any_annotation(loop.value));
// Otherwise you shouldn't be using this step?
assert(!loop.value.empty());
const auto &array{target.as_array()};
Expand Down Expand Up @@ -808,18 +804,16 @@ auto evaluate_step(
case IS_STEP(SchemaCompilerAnnotationLoopItemsUnevaluated): {
// TODO: This precondition is very expensive due to pointer manipulation
EVALUATE_BEGIN(loop, SchemaCompilerAnnotationLoopItemsUnevaluated,
target.is_array() && !context.defines_annotation(
context.instance_location(),
context.evaluate_path().initial(),
target.is_array() && !context.defines_sibling_annotation(
loop.value.mask, JSON{true}));
const auto &array{target.as_array()};
result = true;
auto iterator{array.cbegin()};

// Determine the proper start based on integer annotations collected for
// the current instance location by the keyword requested by the user.
const std::uint64_t start{context.largest_annotation_index(
context.instance_location(), {loop.value.index}, 0)};
const std::uint64_t start{
context.largest_annotation_index(loop.value.index)};

// We need this check, as advancing an iterator past its bounds
// is considered undefined behavior
Expand All @@ -831,11 +825,9 @@ auto evaluate_step(
for (; iterator != array.cend(); ++iterator) {
const auto index{std::distance(array.cbegin(), iterator)};

if (context.defines_annotation(
context.instance_location(),
// TODO: Can we avoid doing this expensive operation on a loop?
context.evaluate_path().initial(), loop.value.filter,
JSON{static_cast<std::size_t>(index)})) {
// TODO: Can we avoid doing this expensive operation on a loop?
if (context.defines_sibling_annotation(
loop.value.filter, JSON{static_cast<std::size_t>(index)})) {
continue;
}

Expand Down
30 changes: 14 additions & 16 deletions src/evaluator/include/sourcemeta/jsontoolkit/evaluator_context.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,11 +77,6 @@ class SOURCEMETA_JSONTOOLKIT_EVALUATOR_EXPORT EvaluationContext {
auto resources() const noexcept -> const std::vector<std::string> &;
auto mark(const std::size_t id, const SchemaCompilerTemplate &children)
-> void;
// TODO: At least currently, we only need to mask if a schema
// makes use of `unevaluatedProperties` or `unevaluatedItems`
// Detect if a schema does need this so if not, we avoid
// an unnecessary copy
auto mask() -> void;
auto jump(const std::size_t id) const noexcept
-> const SchemaCompilerTemplate &;
auto find_dynamic_anchor(const std::string &anchor) const
Expand All @@ -91,15 +86,17 @@ class SOURCEMETA_JSONTOOLKIT_EVALUATOR_EXPORT EvaluationContext {
// Annotations
///////////////////////////////////////////////

// TODO: At least currently, we only need to mask if a schema
// makes use of `unevaluatedProperties` or `unevaluatedItems`
// Detect if a schema does need this so if not, we avoid
// an unnecessary copy
auto mask() -> void;
auto annotate(const WeakPointer &current_instance_location, const JSON &value)
-> std::pair<std::reference_wrapper<const JSON>, bool>;
auto defines_annotation(const WeakPointer &expected_instance_location,
const WeakPointer &base_evaluate_path,
const std::vector<std::string> &keywords,
const JSON &value) const -> bool;
auto largest_annotation_index(const WeakPointer &expected_instance_location,
const std::vector<std::string> &keywords,
const std::uint64_t default_value) const
auto defines_any_annotation(const std::string &keyword) const -> bool;
auto defines_sibling_annotation(const std::vector<std::string> &keywords,
const JSON &value) const -> bool;
auto largest_annotation_index(const std::string &keyword) const
-> std::uint64_t;

public:
Expand All @@ -119,14 +116,15 @@ class SOURCEMETA_JSONTOOLKIT_EVALUATOR_EXPORT EvaluationContext {
std::vector<std::pair<std::size_t, std::size_t>> frame_sizes;
// TODO: Keep hashes of schema resources URI instead for performance reasons
std::vector<std::string> resources_;
std::vector<WeakPointer> annotation_blacklist;
// We don't use a pair for holding the two pointers for runtime
// efficiency when resolving keywords like `unevaluatedProperties`
std::map<WeakPointer, std::map<WeakPointer, std::set<JSON>>> annotations_;
// TODO: Try unordered_map
std::map<std::size_t,
const std::reference_wrapper<const SchemaCompilerTemplate>>
labels;
bool property_as_instance{false};

// For annotations
std::vector<WeakPointer> annotation_blacklist;
std::map<WeakPointer, std::map<WeakPointer, std::set<JSON>>> annotations_;
#if defined(_MSC_VER)
#pragma warning(default : 4251 4275)
#endif
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -398,8 +398,7 @@ DEFINE_STEP_APPLICATOR(Annotation, LoopPropertiesUnevaluated,
/// @ingroup evaluator_instructions
/// @brief Represents a compiler step that loops over array items when the array
/// is considered unmarked
DEFINE_STEP_APPLICATOR(Annotation, LoopItemsUnmarked,
SchemaCompilerValueStrings)
DEFINE_STEP_APPLICATOR(Annotation, LoopItemsUnmarked, SchemaCompilerValueString)

/// @ingroup evaluator_instructions
/// @brief Represents a compiler step that loops over unevaluated array items
Expand Down
2 changes: 1 addition & 1 deletion src/jsonschema/default_compiler_2019_09.h
Original file line number Diff line number Diff line change
Expand Up @@ -254,7 +254,7 @@ auto compiler_2019_09_applicator_unevaluateditems(
} else {
return {make<SchemaCompilerAnnotationLoopItemsUnmarked>(
true, context, schema_context, dynamic_context,
SchemaCompilerValueStrings{"unevaluatedItems"}, std::move(children))};
SchemaCompilerValueString{"unevaluatedItems"}, std::move(children))};
}
}

Expand Down

4 comments on commit 325d8d5

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Benchmark (macos/llvm)

Benchmark suite Current: 325d8d5 Previous: 3a4fe47 Ratio
JSON_Array_Of_Objects_Unique 3601.20531728524 ns/iter 3549.393265882946 ns/iter 1.01
JSONSchema_Validate_Draft4_Meta_1_No_Callback 759.0822256651695 ns/iter 734.1725795083421 ns/iter 1.03
JSONSchema_Validate_Draft4_Required_Properties 937.3719000425016 ns/iter 945.2266017646925 ns/iter 0.99
JSONSchema_Validate_Draft4_Many_Optional_Properties_Minimal_Match 155.73805638945015 ns/iter 153.6907047478036 ns/iter 1.01
JSONSchema_Validate_Draft4_Few_Optional_Properties_Minimal_Match 105.68416360676466 ns/iter 103.0077705579607 ns/iter 1.03
JSONSchema_Validate_Draft4_Items_Schema 2637.8901797582394 ns/iter 2659.5580418166933 ns/iter 0.99
JSONSchema_Validate_Draft4_Nested_Object 22.856555156030144 ns/iter 22.783026745988277 ns/iter 1.00
JSONSchema_Validate_Draft4_Properties_Triad_Optional 1251.3728065119212 ns/iter 1265.7504878534703 ns/iter 0.99
JSONSchema_Validate_Draft4_Properties_Triad_Closed 947.204010681582 ns/iter 946.1034076987258 ns/iter 1.00
JSONSchema_Validate_Draft4_Properties_Triad_Required 1290.1556189434955 ns/iter 1285.5867607536527 ns/iter 1.00
JSONSchema_Validate_Draft4_Non_Recursive_Ref 212.21779049455017 ns/iter 217.4742164170237 ns/iter 0.98
JSONSchema_Validate_Draft4_Pattern_Properties_True 1336.24399196807 ns/iter 1375.888668120108 ns/iter 0.97
JSONSchema_Validate_Draft4_Ref_To_Single_Property 105.36712688851483 ns/iter 106.05604820684901 ns/iter 0.99
JSONSchema_Validate_Draft4_Additional_Properties_Type 404.58275075378754 ns/iter 405.4761293396963 ns/iter 1.00
JSONSchema_Validate_Draft4_Nested_Oneof 367.01093976465825 ns/iter 362.43916855723046 ns/iter 1.01
JSONSchema_Validate_Draft6_Property_Names 772.3649948488365 ns/iter 774.568959249853 ns/iter 1.00
JSONSchema_Validate_Draft7_If_Then_Else 170.1775544856989 ns/iter 169.54774506262194 ns/iter 1.00
JSONSchema_Compiler_Draft6_AdaptiveCard 2877265915.9999423 ns/iter 2858944000.000065 ns/iter 1.01

This comment was automatically generated by workflow using github-action-benchmark.

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Benchmark (linux/llvm)

Benchmark suite Current: 325d8d5 Previous: 3a4fe47 Ratio
JSON_Array_Of_Objects_Unique 2161.4760046110646 ns/iter 2137.640880041647 ns/iter 1.01
JSONSchema_Validate_Draft4_Meta_1_No_Callback 1046.9972514545368 ns/iter 1011.2791365068838 ns/iter 1.04
JSONSchema_Validate_Draft4_Required_Properties 1587.7048612219821 ns/iter 1587.6124219236085 ns/iter 1.00
JSONSchema_Validate_Draft4_Many_Optional_Properties_Minimal_Match 185.68398964542652 ns/iter 188.5442385411892 ns/iter 0.98
JSONSchema_Validate_Draft4_Few_Optional_Properties_Minimal_Match 132.86315295071216 ns/iter 132.334791888428 ns/iter 1.00
JSONSchema_Validate_Draft4_Items_Schema 3675.679766452237 ns/iter 3619.1100703314773 ns/iter 1.02
JSONSchema_Validate_Draft4_Nested_Object 33.459430347148256 ns/iter 33.12390363981007 ns/iter 1.01
JSONSchema_Validate_Draft4_Properties_Triad_Optional 1826.0754071809838 ns/iter 1821.922172815473 ns/iter 1.00
JSONSchema_Validate_Draft4_Properties_Triad_Closed 1501.9867296727937 ns/iter 1497.8387577872168 ns/iter 1.00
JSONSchema_Validate_Draft4_Properties_Triad_Required 1894.5364671280304 ns/iter 1905.5558402039896 ns/iter 0.99
JSONSchema_Validate_Draft4_Non_Recursive_Ref 484.593667820038 ns/iter 492.04189321307945 ns/iter 0.98
JSONSchema_Validate_Draft4_Pattern_Properties_True 2544.0580979472275 ns/iter 2441.788125416647 ns/iter 1.04
JSONSchema_Validate_Draft4_Ref_To_Single_Property 137.0662373200657 ns/iter 139.95139848421047 ns/iter 0.98
JSONSchema_Validate_Draft4_Additional_Properties_Type 604.8456599569215 ns/iter 622.0221165113974 ns/iter 0.97
JSONSchema_Validate_Draft4_Nested_Oneof 530.0078453367903 ns/iter 482.70570428259407 ns/iter 1.10
JSONSchema_Validate_Draft6_Property_Names 1290.9646134583027 ns/iter 1231.2305365190764 ns/iter 1.05
JSONSchema_Validate_Draft7_If_Then_Else 215.67793123181485 ns/iter 211.09648734766827 ns/iter 1.02
JSONSchema_Compiler_Draft6_AdaptiveCard 5486346679.000008 ns/iter 5673982712.999987 ns/iter 0.97

This comment was automatically generated by workflow using github-action-benchmark.

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Benchmark (linux/gcc)

Benchmark suite Current: 325d8d5 Previous: 3a4fe47 Ratio
JSONSchema_Compiler_Draft6_AdaptiveCard 6096565805.000069 ns/iter 6252062793.000051 ns/iter 0.98
JSONSchema_Validate_Draft4_Meta_1_No_Callback 1068.7569706779334 ns/iter 1078.9147022041338 ns/iter 0.99
JSONSchema_Validate_Draft4_Required_Properties 2228.087821262491 ns/iter 2291.8973900288606 ns/iter 0.97
JSONSchema_Validate_Draft4_Many_Optional_Properties_Minimal_Match 187.8614577336374 ns/iter 195.82817690354526 ns/iter 0.96
JSONSchema_Validate_Draft4_Few_Optional_Properties_Minimal_Match 131.84615226402724 ns/iter 137.2107533278727 ns/iter 0.96
JSONSchema_Validate_Draft4_Items_Schema 3169.8085298163483 ns/iter 3078.5310473437903 ns/iter 1.03
JSONSchema_Validate_Draft4_Nested_Object 22.395024477255976 ns/iter 22.686073076710056 ns/iter 0.99
JSONSchema_Validate_Draft4_Properties_Triad_Optional 1714.0403844673463 ns/iter 1726.2935193364506 ns/iter 0.99
JSONSchema_Validate_Draft4_Properties_Triad_Closed 1423.2336896693096 ns/iter 1393.4087155077277 ns/iter 1.02
JSONSchema_Validate_Draft4_Properties_Triad_Required 1821.2695044419013 ns/iter 1779.8570804707992 ns/iter 1.02
JSONSchema_Validate_Draft4_Non_Recursive_Ref 454.7486075398781 ns/iter 472.326041552563 ns/iter 0.96
JSONSchema_Validate_Draft4_Pattern_Properties_True 2259.6506273785826 ns/iter 2325.710832059663 ns/iter 0.97
JSONSchema_Validate_Draft4_Ref_To_Single_Property 144.73176980318598 ns/iter 140.86203494581542 ns/iter 1.03
JSONSchema_Validate_Draft4_Additional_Properties_Type 1069.8795104903218 ns/iter 1115.7069373575357 ns/iter 0.96
JSONSchema_Validate_Draft4_Nested_Oneof 426.12291734646726 ns/iter 433.2267651895623 ns/iter 0.98
JSONSchema_Validate_Draft6_Property_Names 1583.9339249665736 ns/iter 1620.868124300633 ns/iter 0.98
JSONSchema_Validate_Draft7_If_Then_Else 209.88699317788087 ns/iter 196.12464334596655 ns/iter 1.07
JSON_Array_Of_Objects_Unique 3224.1391836145463 ns/iter 3267.846664787346 ns/iter 0.99

This comment was automatically generated by workflow using github-action-benchmark.

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Benchmark (windows/msvc)

Benchmark suite Current: 325d8d5 Previous: 3a4fe47 Ratio
JSON_Array_Of_Objects_Unique 5109.583000000839 ns/iter 5140.114999999241 ns/iter 0.99
JSONSchema_Validate_Draft4_Meta_1_No_Callback 2357.2960714285987 ns/iter 2336.7004054683894 ns/iter 1.01
JSONSchema_Validate_Draft4_Required_Properties 2032.681398082235 ns/iter 2057.8084375003414 ns/iter 0.99
JSONSchema_Validate_Draft4_Many_Optional_Properties_Minimal_Match 562.9867000000104 ns/iter 553.3603571428055 ns/iter 1.02
JSONSchema_Validate_Draft4_Few_Optional_Properties_Minimal_Match 409.98590334601863 ns/iter 409.5960308216625 ns/iter 1.00
JSONSchema_Validate_Draft4_Items_Schema 6249.211607141093 ns/iter 6445.889285714656 ns/iter 0.97
JSONSchema_Validate_Draft4_Nested_Object 156.89470982143763 ns/iter 160.14011160712423 ns/iter 0.98
JSONSchema_Validate_Draft4_Properties_Triad_Optional 5502.149107142275 ns/iter 5426.927999999406 ns/iter 1.01
JSONSchema_Validate_Draft4_Properties_Triad_Closed 4505.31967918685 ns/iter 4443.189865551247 ns/iter 1.01
JSONSchema_Validate_Draft4_Properties_Triad_Required 5615.056999999979 ns/iter 5493.878571428469 ns/iter 1.02
JSONSchema_Validate_Draft4_Non_Recursive_Ref 555.5636999999933 ns/iter 546.8381250001439 ns/iter 1.02
JSONSchema_Validate_Draft4_Pattern_Properties_True 8455.160914460374 ns/iter 8123.293526787835 ns/iter 1.04
JSONSchema_Validate_Draft4_Ref_To_Single_Property 410.99135216376925 ns/iter 411.0367673643791 ns/iter 1.00
JSONSchema_Validate_Draft4_Additional_Properties_Type 789.9059151785793 ns/iter 766.9361607141363 ns/iter 1.03
JSONSchema_Validate_Draft4_Nested_Oneof 1097.370468750114 ns/iter 1102.560624999782 ns/iter 1.00
JSONSchema_Validate_Draft6_Property_Names 1858.4620164839218 ns/iter 1903.14893138271 ns/iter 0.98
JSONSchema_Validate_Draft7_If_Then_Else 595.73649999993 ns/iter 560.327857142795 ns/iter 1.06
JSONSchema_Compiler_Draft6_AdaptiveCard 9984234000.000015 ns/iter 10156487200.000128 ns/iter 0.98

This comment was automatically generated by workflow using github-action-benchmark.

Please sign in to comment.