Skip to content

Commit

Permalink
Emit step schema resources as hashes
Browse files Browse the repository at this point in the history
Signed-off-by: Juan Cruz Viotti <[email protected]>
  • Loading branch information
jviotti committed Oct 14, 2024
1 parent 7d89944 commit bc1182f
Show file tree
Hide file tree
Showing 7 changed files with 84 additions and 74 deletions.
19 changes: 6 additions & 13 deletions src/evaluator/context.cc
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ auto EvaluationContext::prepare(const JSON &instance) -> void {
auto EvaluationContext::push_without_traverse(
const Pointer &relative_schema_location,
const Pointer &relative_instance_location,
const std::string &schema_resource, const bool dynamic) -> void {
const std::size_t &schema_resource, const bool dynamic) -> void {
// Guard against infinite recursion in a cheap manner, as
// infinite recursion will manifest itself through huge
// ever-growing evaluate paths
Expand All @@ -49,7 +49,7 @@ auto EvaluationContext::push_without_traverse(

auto EvaluationContext::push(const Pointer &relative_schema_location,
const Pointer &relative_instance_location,
const std::string &schema_resource,
const std::size_t &schema_resource,
const bool dynamic) -> void {
this->push_without_traverse(relative_schema_location,
relative_instance_location, schema_resource,
Expand All @@ -63,7 +63,7 @@ auto EvaluationContext::push(const Pointer &relative_schema_location,

auto EvaluationContext::push(const Pointer &relative_schema_location,
const Pointer &relative_instance_location,
const std::string &schema_resource,
const std::size_t &schema_resource,
const bool dynamic,
std::reference_wrapper<const JSON> &&new_instance)
-> void {
Expand All @@ -85,8 +85,6 @@ auto EvaluationContext::pop(const bool dynamic) -> void {

this->frame_sizes.pop_back();

// TODO: Do schema resource management using hashes to avoid
// expensive string comparisons
if (dynamic) {
assert(!this->resources_.empty());
this->resources_.pop_back();
Expand Down Expand Up @@ -235,19 +233,14 @@ auto EvaluationContext::instances() const noexcept
return this->instances_;
}

auto EvaluationContext::hash(const std::string &base,
auto EvaluationContext::hash(const std::size_t &resource,
const std::string &fragment) const noexcept
-> std::size_t {
// TODO: Avoid these string copies
std::ostringstream result;
result << base;
result << '#';
result << fragment;
return this->hasher_(result.str());
return resource + this->hasher_(fragment);
}

auto EvaluationContext::resources() const noexcept
-> const std::vector<std::string> & {
-> const std::vector<std::size_t> & {
return this->resources_;
}

Expand Down
15 changes: 7 additions & 8 deletions src/evaluator/include/sourcemeta/jsontoolkit/evaluator_context.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,12 +42,12 @@ class SOURCEMETA_JSONTOOLKIT_EVALUATOR_EXPORT EvaluationContext {
auto instance_location() const noexcept -> const WeakPointer &;
auto push(const Pointer &relative_schema_location,
const Pointer &relative_instance_location,
const std::string &schema_resource, const bool dynamic) -> void;
const std::size_t &schema_resource, const bool dynamic) -> void;
// A performance shortcut for pushing without re-traversing the target
// if we already know that the destination target will be
auto push(const Pointer &relative_schema_location,
const Pointer &relative_instance_location,
const std::string &schema_resource, const bool dynamic,
const std::size_t &schema_resource, const bool dynamic,
std::reference_wrapper<const JSON> &&new_instance) -> void;
auto pop(const bool dynamic) -> void;
auto enter(const WeakPointer::Token::Property &property) -> void;
Expand All @@ -57,7 +57,7 @@ class SOURCEMETA_JSONTOOLKIT_EVALUATOR_EXPORT EvaluationContext {
private:
auto push_without_traverse(const Pointer &relative_schema_location,
const Pointer &relative_instance_location,
const std::string &schema_resource,
const std::size_t &schema_resource,
const bool dynamic) -> void;

public:
Expand All @@ -77,9 +77,9 @@ class SOURCEMETA_JSONTOOLKIT_EVALUATOR_EXPORT EvaluationContext {
// References and anchors
///////////////////////////////////////////////

auto hash(const std::string &base, const std::string &fragment) const noexcept
-> std::size_t;
auto resources() const noexcept -> const std::vector<std::string> &;
auto hash(const std::size_t &resource,
const std::string &fragment) const noexcept -> std::size_t;
auto resources() const noexcept -> const std::vector<std::size_t> &;
auto mark(const std::size_t id, const SchemaCompilerTemplate &children)
-> void;
auto jump(const std::size_t id) const noexcept
Expand Down Expand Up @@ -120,8 +120,7 @@ class SOURCEMETA_JSONTOOLKIT_EVALUATOR_EXPORT EvaluationContext {
WeakPointer instance_location_;
std::vector<std::pair<std::size_t, std::size_t>> frame_sizes;
const std::hash<std::string> hasher_{};
// TODO: Keep hashes of schema resources URI instead for performance reasons
std::vector<std::string> resources_;
std::vector<std::size_t> resources_;
// TODO: Try unordered_map
std::map<std::size_t,
const std::reference_wrapper<const SchemaCompilerTemplate>>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,7 @@ enum class SchemaCompilerTemplateIndex : std::uint8_t {
const Pointer relative_schema_location; \
const Pointer relative_instance_location; \
const std::string keyword_location; \
const std::string schema_resource; \
const std::size_t schema_resource; \
const bool dynamic; \
const bool report; \
const type value; \
Expand All @@ -198,7 +198,7 @@ enum class SchemaCompilerTemplateIndex : std::uint8_t {
const Pointer relative_schema_location; \
const Pointer relative_instance_location; \
const std::string keyword_location; \
const std::string schema_resource; \
const std::size_t schema_resource; \
const bool dynamic; \
const bool report; \
const type value; \
Expand Down
11 changes: 8 additions & 3 deletions src/jsonschema/compile.cc
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ auto compile_subschema(
context.resolver, default_dialect}) {
assert(entry.pointer.back().is_property());
const auto &keyword{entry.pointer.back().to_property()};
// Bases must not contain fragments
assert(!schema_context.base.fragment().has_value());
for (auto &&step : context.compiler(
context,
{schema_context.relative_pointer.concat({keyword}),
Expand Down Expand Up @@ -108,7 +110,8 @@ auto compile(const JSON &schema, const SchemaWalker &walker,
empty_pointer,
result,
vocabularies(schema, resolver, root_frame_entry.dialect),
root_frame_entry.base,
// TODO: We shouldn't need to canonicalize if framing did the right thing?
URI{root_frame_entry.base}.canonicalize().recompose(),
{},
{}};

Expand Down Expand Up @@ -193,7 +196,8 @@ auto compile(const JSON &schema, const SchemaWalker &walker,

const URI anchor_uri{entry.first.second};
const auto label{EvaluationContext{}.hash(
anchor_uri.recompose_without_fragment().value_or(""),
schema_resource_id(
context, anchor_uri.recompose_without_fragment().value_or("")),
std::string{anchor_uri.fragment().value_or("")})};
schema_context.labels.insert(label);

Expand Down Expand Up @@ -270,7 +274,8 @@ auto compile(const SchemaCompilerContext &context,
return compile_subschema(
context,
{entry.relative_pointer, new_schema,
vocabularies(new_schema, context.resolver, entry.dialect), entry.base,
vocabularies(new_schema, context.resolver, entry.dialect),
URI{entry.base}.recompose_without_fragment().value_or(""),
// TODO: This represents a copy
schema_context.labels, schema_context.references},
{dynamic_context.keyword, destination_pointer,
Expand Down
27 changes: 23 additions & 4 deletions src/jsonschema/compile_helpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,34 @@
#define SOURCEMETA_JSONTOOLKIT_JSONSCHEMA_COMPILE_HELPERS_H_

#include <sourcemeta/jsontoolkit/jsonschema_compile.h>
#include <sourcemeta/jsontoolkit/uri.h>

#include <cassert> // assert
#include <utility> // std::declval, std::move
#include <algorithm> // std::find
#include <cassert> // assert
#include <iterator> // std::distance
#include <utility> // std::declval, std::move

namespace sourcemeta::jsontoolkit {

static const SchemaCompilerDynamicContext relative_dynamic_context{
"", empty_pointer, empty_pointer};

inline auto schema_resource_id(const SchemaCompilerContext &context,
const std::string &resource) -> std::size_t {
const auto iterator{std::find(context.resources.cbegin(),
context.resources.cend(),
// TODO: We shouldn't need to canonicalize if
// framing did the right thing?
URI{resource}.canonicalize().recompose())};
if (iterator == context.resources.cend()) {
assert(resource.empty());
return 0;
}

return 1 + static_cast<std::size_t>(
std::distance(context.resources.cbegin(), iterator));
}

// Instantiate a value-oriented step
template <typename Step>
auto make(const bool report, const SchemaCompilerContext &context,
Expand All @@ -25,7 +44,7 @@ auto make(const bool report, const SchemaCompilerContext &context,
{dynamic_context.keyword}),
dynamic_context.base_instance_location,
to_uri(schema_context.relative_pointer, schema_context.base).recompose(),
schema_context.base.recompose(),
schema_resource_id(context, schema_context.base.recompose()),
context.uses_dynamic_scopes,
report,
value};
Expand All @@ -46,7 +65,7 @@ auto make(const bool report, const SchemaCompilerContext &context,
{dynamic_context.keyword}),
dynamic_context.base_instance_location,
to_uri(schema_context.relative_pointer, schema_context.base).recompose(),
schema_context.base.recompose(),
schema_resource_id(context, schema_context.base.recompose()),
context.uses_dynamic_scopes,
report,
std::move(value),
Expand Down
5 changes: 3 additions & 2 deletions src/jsonschema/default_compiler_draft4.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,9 @@ auto compiler_draft4_core_ref(
}

const auto &reference{context.references.at({type, entry.pointer})};
const auto label{EvaluationContext{}.hash(reference.base.value_or(""),
reference.fragment.value_or(""))};
const auto label{EvaluationContext{}.hash(
schema_resource_id(context, reference.base.value_or("")),
reference.fragment.value_or(""))};

// The label is already registered, so just jump to it
if (schema_context.labels.contains(label)) {
Expand Down
Loading

0 comments on commit bc1182f

Please sign in to comment.