Skip to content

Commit

Permalink
Centralize annotation containment at least a bit
Browse files Browse the repository at this point in the history
So we can more easily create other compiler rules without duplicating
all of the currently messy logic.

Signed-off-by: Juan Cruz Viotti <[email protected]>
  • Loading branch information
jviotti committed Aug 31, 2024
1 parent a4a0cbd commit e610a3c
Showing 1 changed file with 123 additions and 97 deletions.
220 changes: 123 additions & 97 deletions src/jsonschema/compile_evaluate.cc
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ class EvaluationContext {
return {*(result.first), result.second};
}

private:
auto
annotations(const Pointer &current_instance_location,
const Pointer &schema_location) const -> const Annotations & {
Expand All @@ -60,6 +61,7 @@ class EvaluationContext {
return schema_location_result->second;
}

// TODO: This should be a private method
auto annotations(const Pointer &current_instance_location) const
-> const InstanceAnnotations & {
static const InstanceAnnotations placeholder;
Expand All @@ -74,6 +76,99 @@ class EvaluationContext {
return instance_location_result->second;
}

public:
auto defines_any_adjacent_annotation(
const Pointer &expected_instance_location,
const Pointer &base_evaluate_path,
const std::set<std::string> &keywords) const -> bool {
for (const auto &keyword : keywords) {
// TODO: How can we avoid this expensive pointer manipulation?
auto expected_evaluate_path{base_evaluate_path};
expected_evaluate_path.push_back({keyword});
if (!this->annotations(expected_instance_location, expected_evaluate_path)
.empty()) {
return true;
}
}

return false;
}

auto defines_adjacent_annotation(const Pointer &expected_instance_location,
const Pointer &base_evaluate_path,
const std::set<std::string> &keywords,
const JSON &value) const -> bool {
for (const auto &keyword : keywords) {
auto expected_evaluate_path{base_evaluate_path};
expected_evaluate_path.push_back({keyword});
if (this->annotations(expected_instance_location, expected_evaluate_path)
.contains(value)) {
return true;
}
}

return false;
}

auto defines_annotation(const Pointer &expected_instance_location,
const Pointer &base_evaluate_path,
const std::set<std::string> &keywords,
const JSON &value) const -> bool {
if (keywords.empty()) {
return false;
}

const auto instance_annotations{
this->annotations(expected_instance_location)};
for (const auto &[schema_location, schema_annotations] :
instance_annotations) {
assert(!schema_location.empty());
const auto &keyword{schema_location.back()};
if (keyword.is_property() && keywords.contains(keyword.to_property()) &&
schema_annotations.contains(value) &&
schema_location.initial().starts_with(base_evaluate_path)) {
bool blacklisted = false;
for (const auto &masked : this->annotation_blacklist) {
if (schema_location.starts_with(masked) &&
!this->evaluate_path_.starts_with(masked)) {
blacklisted = true;
break;
}
}

if (!blacklisted) {
return true;
}
}
}

return false;
}

auto largest_annotation_index(const Pointer &expected_instance_location,
const std::set<std::string> &keywords,
const std::uint64_t default_value) const
-> std::uint64_t {
std::uint64_t result{default_value};
for (const auto &[schema_location, schema_annotations] :
this->annotations(expected_instance_location)) {
assert(!schema_location.empty());
const auto &keyword{schema_location.back()};
if (!keyword.is_property() || !keywords.contains(keyword.to_property())) {
continue;
}

for (const auto &annotation : schema_annotations) {
if (annotation.is_integer() && annotation.is_positive()) {
result = std::max(
result, static_cast<std::uint64_t>(annotation.to_integer()) + 1);
}
}
}

return result;
}

template <typename T>
auto push(const T &step, const Pointer &relative_evaluate_path,
const Pointer &relative_instance_location) -> void {
Expand Down Expand Up @@ -201,17 +296,6 @@ class EvaluationContext {
this->annotation_blacklist.insert(this->evaluate_path_);
}

auto masked(const Pointer &path) const -> bool {
for (const auto &masked : this->annotation_blacklist) {
if (path.starts_with(masked) &&
!this->evaluate_path_.starts_with(masked)) {
return true;
}
}

return false;
}

auto find_dynamic_anchor(const std::string &anchor) const
-> std::optional<std::size_t> {
for (const auto &resource : this->resources()) {
Expand Down Expand Up @@ -615,33 +699,15 @@ auto evaluate_step(
instance);
CALLBACK_PRE(assertion, context.instance_location());
const auto &value{context.resolve_value(assertion.value, instance)};
const auto &target{
context.resolve_target<EvaluationContext::InstanceAnnotations>(
assertion.target, instance)};
result = true;

if (!assertion.data.empty()) {
for (const auto &[schema_location, annotations] : target) {
assert(!schema_location.empty());
const auto &keyword{schema_location.back()};
if (keyword.is_property() &&
assertion.data.contains(keyword.to_property()) &&
annotations.contains(value) &&
// Make sure its not a cousin annotation, which can
// never be seen
// TODO: Have a better function at Pointer to check
// for these "initial starts with" cases in a way
// that we don't have to copy pointers, which `.initial()`
// does.
schema_location.initial().starts_with(
context.evaluate_path().initial()) &&
// We want to ignore certain annotations, like the ones
// inside "not"
!context.masked(schema_location)) {
result = false;
break;
}
}
if (assertion.target.first == SchemaCompilerTargetType::ParentAnnotations) {
result = !context.defines_annotation(
context.instance_location().initial(),
context.evaluate_path().initial(), assertion.data, value);
} else {
result = !context.defines_annotation(context.instance_location(),
context.evaluate_path().initial(),
assertion.data, value);
}

CALLBACK_POST("SchemaCompilerAssertionNoAnnotation", assertion);
Expand Down Expand Up @@ -728,16 +794,11 @@ auto evaluate_step(
EVALUATE_CONDITION_GUARD("SchemaCompilerLogicalWhenUnmarked", logical,
instance);
const auto &value{context.resolve_value(logical.value, instance)};

// TODO: How can we avoid this expensive pointer manipulation?
auto expected_evaluate_path{context.evaluate_path()};
expected_evaluate_path.pop_back();
expected_evaluate_path.push_back({value});
const auto &current_annotations{context.annotations(
context.instance_location(), expected_evaluate_path)};
EVALUATE_IMPLICIT_PRECONDITION("SchemaCompilerLogicalWhenUnmarked", logical,
current_annotations.empty());

!context.defines_any_adjacent_annotation(
context.instance_location(),
context.evaluate_path().initial(),
{value}));
CALLBACK_PRE(logical, context.instance_location());
result = true;
for (const auto &child : logical.children) {
Expand All @@ -755,16 +816,11 @@ auto evaluate_step(
EVALUATE_CONDITION_GUARD("SchemaCompilerLogicalWhenMarked", logical,
instance);
const auto &value{context.resolve_value(logical.value, instance)};

// TODO: How can we avoid this expensive pointer manipulation?
auto expected_evaluate_path{context.evaluate_path()};
expected_evaluate_path.pop_back();
expected_evaluate_path.push_back({value});
const auto &current_annotations{context.annotations(
context.instance_location(), expected_evaluate_path)};
EVALUATE_IMPLICIT_PRECONDITION("SchemaCompilerLogicalWhenMarked", logical,
!current_annotations.empty());

context.defines_any_adjacent_annotation(
context.instance_location(),
context.evaluate_path().initial(),
{value}));
CALLBACK_PRE(logical, context.instance_location());
result = true;
for (const auto &child : logical.children) {
Expand Down Expand Up @@ -1077,30 +1133,16 @@ auto evaluate_step(
const auto &value{context.resolve_value(loop.value, instance)};
assert(!value.empty());

// TODO: Find a way to be more efficient with this
std::vector<std::reference_wrapper<const EvaluationContext::Annotations>>
current_annotations;
for (const auto &keyword : value) {
assert(!context.evaluate_path().empty());
// TODO: Can we avoid this expensive pointer manipulation?
auto expected_evaluate_path{context.evaluate_path()};
expected_evaluate_path.pop_back();
expected_evaluate_path.push_back({keyword});
current_annotations.emplace_back(context.annotations(
context.instance_location(), expected_evaluate_path));
}

for (const auto &entry : target.as_object()) {
bool apply_children{true};
for (const auto &annotations : current_annotations) {
// TODO: Can we avoid this JSON conversion?
if (annotations.get().contains(JSON{entry.first})) {
apply_children = false;
break;
}
}

if (!apply_children) {
// TODO: It might be more efficient to get all the annotations we
// potentially care about as a set first, and the make the loop
// check for O(1) containment in that set?
if (context.defines_adjacent_annotation(
context.instance_location(),
// TODO: Can we avoid doing this expensive operation on a loop?
context.evaluate_path().initial(), value,
// TODO: This conversion implies a string copy
JSON{entry.first})) {
continue;
}

Expand Down Expand Up @@ -1203,25 +1245,9 @@ auto evaluate_step(
auto iterator{array.cbegin()};

// Determine the proper start based on integer annotations collected for the
// current instance location by the keyword requested by the user. We will
// exhaustively check the matching annotations and end up with the largest
// index or zero
std::uint64_t start{0};
for (const auto &[schema_location, annotations] :
context.annotations(context.instance_location())) {
assert(!schema_location.empty());
const auto &keyword{schema_location.back()};
if (!keyword.is_property() || keyword.to_property() != value) {
continue;
}

for (const auto &annotation : annotations) {
if (annotation.is_integer() && annotation.is_positive()) {
start = std::max(
start, static_cast<std::uint64_t>(annotation.to_integer()) + 1);
}
}
}
// current instance location by the keyword requested by the user.
const std::uint64_t start{context.largest_annotation_index(
context.instance_location(), {value}, 0)};

// We need this check, as advancing an iterator past its bounds
// is considered undefined behavior
Expand Down

0 comments on commit e610a3c

Please sign in to comment.