Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Emit step schema resources as hashes #1306

Merged
merged 1 commit into from
Oct 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 6 additions & 13 deletions src/evaluator/context.cc
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ auto EvaluationContext::prepare(const JSON &instance) -> void {
auto EvaluationContext::push_without_traverse(
const Pointer &relative_schema_location,
const Pointer &relative_instance_location,
const std::string &schema_resource, const bool dynamic) -> void {
const std::size_t &schema_resource, const bool dynamic) -> void {
// Guard against infinite recursion in a cheap manner, as
// infinite recursion will manifest itself through huge
// ever-growing evaluate paths
Expand All @@ -49,7 +49,7 @@ auto EvaluationContext::push_without_traverse(

auto EvaluationContext::push(const Pointer &relative_schema_location,
const Pointer &relative_instance_location,
const std::string &schema_resource,
const std::size_t &schema_resource,
const bool dynamic) -> void {
this->push_without_traverse(relative_schema_location,
relative_instance_location, schema_resource,
Expand All @@ -63,7 +63,7 @@ auto EvaluationContext::push(const Pointer &relative_schema_location,

auto EvaluationContext::push(const Pointer &relative_schema_location,
const Pointer &relative_instance_location,
const std::string &schema_resource,
const std::size_t &schema_resource,
const bool dynamic,
std::reference_wrapper<const JSON> &&new_instance)
-> void {
Expand All @@ -85,8 +85,6 @@ auto EvaluationContext::pop(const bool dynamic) -> void {

this->frame_sizes.pop_back();

// TODO: Do schema resource management using hashes to avoid
// expensive string comparisons
if (dynamic) {
assert(!this->resources_.empty());
this->resources_.pop_back();
Expand Down Expand Up @@ -235,19 +233,14 @@ auto EvaluationContext::instances() const noexcept
return this->instances_;
}

auto EvaluationContext::hash(const std::string &base,
auto EvaluationContext::hash(const std::size_t &resource,
const std::string &fragment) const noexcept
-> std::size_t {
// TODO: Avoid these string copies
std::ostringstream result;
result << base;
result << '#';
result << fragment;
return this->hasher_(result.str());
return resource + this->hasher_(fragment);
}

auto EvaluationContext::resources() const noexcept
-> const std::vector<std::string> & {
-> const std::vector<std::size_t> & {
return this->resources_;
}

Expand Down
15 changes: 7 additions & 8 deletions src/evaluator/include/sourcemeta/jsontoolkit/evaluator_context.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,12 +42,12 @@ class SOURCEMETA_JSONTOOLKIT_EVALUATOR_EXPORT EvaluationContext {
auto instance_location() const noexcept -> const WeakPointer &;
auto push(const Pointer &relative_schema_location,
const Pointer &relative_instance_location,
const std::string &schema_resource, const bool dynamic) -> void;
const std::size_t &schema_resource, const bool dynamic) -> void;
// A performance shortcut for pushing without re-traversing the target
// if we already know that the destination target will be
auto push(const Pointer &relative_schema_location,
const Pointer &relative_instance_location,
const std::string &schema_resource, const bool dynamic,
const std::size_t &schema_resource, const bool dynamic,
std::reference_wrapper<const JSON> &&new_instance) -> void;
auto pop(const bool dynamic) -> void;
auto enter(const WeakPointer::Token::Property &property) -> void;
Expand All @@ -57,7 +57,7 @@ class SOURCEMETA_JSONTOOLKIT_EVALUATOR_EXPORT EvaluationContext {
private:
auto push_without_traverse(const Pointer &relative_schema_location,
const Pointer &relative_instance_location,
const std::string &schema_resource,
const std::size_t &schema_resource,
const bool dynamic) -> void;

public:
Expand All @@ -77,9 +77,9 @@ class SOURCEMETA_JSONTOOLKIT_EVALUATOR_EXPORT EvaluationContext {
// References and anchors
///////////////////////////////////////////////

auto hash(const std::string &base, const std::string &fragment) const noexcept
-> std::size_t;
auto resources() const noexcept -> const std::vector<std::string> &;
auto hash(const std::size_t &resource,
const std::string &fragment) const noexcept -> std::size_t;
auto resources() const noexcept -> const std::vector<std::size_t> &;
auto mark(const std::size_t id, const SchemaCompilerTemplate &children)
-> void;
auto jump(const std::size_t id) const noexcept
Expand Down Expand Up @@ -120,8 +120,7 @@ class SOURCEMETA_JSONTOOLKIT_EVALUATOR_EXPORT EvaluationContext {
WeakPointer instance_location_;
std::vector<std::pair<std::size_t, std::size_t>> frame_sizes;
const std::hash<std::string> hasher_{};
// TODO: Keep hashes of schema resources URI instead for performance reasons
std::vector<std::string> resources_;
std::vector<std::size_t> resources_;
// TODO: Try unordered_map
std::map<std::size_t,
const std::reference_wrapper<const SchemaCompilerTemplate>>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,7 @@ enum class SchemaCompilerTemplateIndex : std::uint8_t {
const Pointer relative_schema_location; \
const Pointer relative_instance_location; \
const std::string keyword_location; \
const std::string schema_resource; \
const std::size_t schema_resource; \
const bool dynamic; \
const bool report; \
const type value; \
Expand All @@ -198,7 +198,7 @@ enum class SchemaCompilerTemplateIndex : std::uint8_t {
const Pointer relative_schema_location; \
const Pointer relative_instance_location; \
const std::string keyword_location; \
const std::string schema_resource; \
const std::size_t schema_resource; \
const bool dynamic; \
const bool report; \
const type value; \
Expand Down
10 changes: 7 additions & 3 deletions src/jsonschema/compile.cc
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ auto compile_subschema(
context.resolver, default_dialect}) {
assert(entry.pointer.back().is_property());
const auto &keyword{entry.pointer.back().to_property()};
// Bases must not contain fragments
assert(!schema_context.base.fragment().has_value());
for (auto &&step : context.compiler(
context,
{schema_context.relative_pointer.concat({keyword}),
Expand Down Expand Up @@ -108,7 +110,7 @@ auto compile(const JSON &schema, const SchemaWalker &walker,
empty_pointer,
result,
vocabularies(schema, resolver, root_frame_entry.dialect),
root_frame_entry.base,
URI{root_frame_entry.base}.canonicalize().recompose(),
{},
{}};

Expand Down Expand Up @@ -193,7 +195,8 @@ auto compile(const JSON &schema, const SchemaWalker &walker,

const URI anchor_uri{entry.first.second};
const auto label{EvaluationContext{}.hash(
anchor_uri.recompose_without_fragment().value_or(""),
schema_resource_id(
context, anchor_uri.recompose_without_fragment().value_or("")),
std::string{anchor_uri.fragment().value_or("")})};
schema_context.labels.insert(label);

Expand Down Expand Up @@ -270,7 +273,8 @@ auto compile(const SchemaCompilerContext &context,
return compile_subschema(
context,
{entry.relative_pointer, new_schema,
vocabularies(new_schema, context.resolver, entry.dialect), entry.base,
vocabularies(new_schema, context.resolver, entry.dialect),
URI{entry.base}.recompose_without_fragment().value_or(""),
// TODO: This represents a copy
schema_context.labels, schema_context.references},
{dynamic_context.keyword, destination_pointer,
Expand Down
25 changes: 21 additions & 4 deletions src/jsonschema/compile_helpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,32 @@
#define SOURCEMETA_JSONTOOLKIT_JSONSCHEMA_COMPILE_HELPERS_H_

#include <sourcemeta/jsontoolkit/jsonschema_compile.h>
#include <sourcemeta/jsontoolkit/uri.h>

#include <cassert> // assert
#include <utility> // std::declval, std::move
#include <algorithm> // std::find
#include <cassert> // assert
#include <iterator> // std::distance
#include <utility> // std::declval, std::move

namespace sourcemeta::jsontoolkit {

static const SchemaCompilerDynamicContext relative_dynamic_context{
"", empty_pointer, empty_pointer};

inline auto schema_resource_id(const SchemaCompilerContext &context,
const std::string &resource) -> std::size_t {
const auto iterator{std::find(context.resources.cbegin(),
context.resources.cend(),
URI{resource}.canonicalize().recompose())};
if (iterator == context.resources.cend()) {
assert(resource.empty());
return 0;
}

return 1 + static_cast<std::size_t>(
std::distance(context.resources.cbegin(), iterator));
}

// Instantiate a value-oriented step
template <typename Step>
auto make(const bool report, const SchemaCompilerContext &context,
Expand All @@ -25,7 +42,7 @@ auto make(const bool report, const SchemaCompilerContext &context,
{dynamic_context.keyword}),
dynamic_context.base_instance_location,
to_uri(schema_context.relative_pointer, schema_context.base).recompose(),
schema_context.base.recompose(),
schema_resource_id(context, schema_context.base.recompose()),
context.uses_dynamic_scopes,
report,
value};
Expand All @@ -46,7 +63,7 @@ auto make(const bool report, const SchemaCompilerContext &context,
{dynamic_context.keyword}),
dynamic_context.base_instance_location,
to_uri(schema_context.relative_pointer, schema_context.base).recompose(),
schema_context.base.recompose(),
schema_resource_id(context, schema_context.base.recompose()),
context.uses_dynamic_scopes,
report,
std::move(value),
Expand Down
5 changes: 3 additions & 2 deletions src/jsonschema/default_compiler_draft4.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,9 @@ auto compiler_draft4_core_ref(
}

const auto &reference{context.references.at({type, entry.pointer})};
const auto label{EvaluationContext{}.hash(reference.base.value_or(""),
reference.fragment.value_or(""))};
const auto label{EvaluationContext{}.hash(
schema_resource_id(context, reference.base.value_or("")),
reference.fragment.value_or(""))};

// The label is already registered, so just jump to it
if (schema_context.labels.contains(label)) {
Expand Down
Loading