From 48f3f421f58611e0d19147bb610e30dc4497488a Mon Sep 17 00:00:00 2001 From: Juan Cruz Viotti Date: Wed, 9 Oct 2024 15:54:10 -0400 Subject: [PATCH] Implement `encode` and `decode` schema-less commands (#174) Signed-off-by: Juan Cruz Viotti --- README.markdown | 2 ++ cmake/FindJSONBinPack.cmake | 1 - docs/decode.markdown | 35 +++++++++++++++++++++ docs/encode.markdown | 35 +++++++++++++++++++++ src/CMakeLists.txt | 5 ++- src/command.h | 2 ++ src/command_decode.cc | 55 +++++++++++++++++++++++++++++++++ src/command_encode.cc | 50 ++++++++++++++++++++++++++++++ src/main.cc | 12 +++++++ test/CMakeLists.txt | 10 ++++++ test/decode/fail_no_document.sh | 23 ++++++++++++++ test/decode/fail_no_output.sh | 24 ++++++++++++++ test/decode/pass_schema_less.sh | 23 ++++++++++++++ test/encode/fail_no_document.sh | 23 ++++++++++++++ test/encode/fail_no_output.sh | 23 ++++++++++++++ test/encode/pass_schema_less.sh | 26 ++++++++++++++++ 16 files changed, 347 insertions(+), 2 deletions(-) create mode 100644 docs/decode.markdown create mode 100644 docs/encode.markdown create mode 100644 src/command_decode.cc create mode 100644 src/command_encode.cc create mode 100755 test/decode/fail_no_document.sh create mode 100755 test/decode/fail_no_output.sh create mode 100755 test/decode/pass_schema_less.sh create mode 100755 test/encode/fail_no_document.sh create mode 100755 test/encode/fail_no_output.sh create mode 100755 test/encode/pass_schema_less.sh diff --git a/README.markdown b/README.markdown index 2420a90..92ae33f 100644 --- a/README.markdown +++ b/README.markdown @@ -66,6 +66,8 @@ documentation: - [`jsonschema compile`](./docs/compile.markdown) (for internal debugging) - [`jsonschema identify`](./docs/identify.markdown) - [`jsonschema canonicalize`](./docs/canonicalize.markdown) (for static analysis) +- [`jsonschema encode`](./docs/encode.markdown) (for binary compression) +- [`jsonschema decode`](./docs/decode.markdown) Installation ------------ diff --git a/cmake/FindJSONBinPack.cmake b/cmake/FindJSONBinPack.cmake index 671718e..95c58f3 100644 --- a/cmake/FindJSONBinPack.cmake +++ b/cmake/FindJSONBinPack.cmake @@ -1,6 +1,5 @@ if(NOT JSONBinPack_FOUND) set(JSONBINPACK_INSTALL OFF CACHE BOOL "disable installation") - set(JSONBINPACK_RUNTIME OFF CACHE BOOL "disable the JSON BinPack runtime module") add_subdirectory("${PROJECT_SOURCE_DIR}/vendor/jsonbinpack") set(JSONBinPack_FOUND ON) endif() diff --git a/docs/decode.markdown b/docs/decode.markdown new file mode 100644 index 0000000..a137adf --- /dev/null +++ b/docs/decode.markdown @@ -0,0 +1,35 @@ +Decode +====== + +```sh +jsonschema decode +``` + +This command decodes a JSON document using [JSON +BinPack](https://jsonbinpack.sourcemeta.com) schema-less mode. **Note this +command is considered experimental and might not decode binary files produced +by other versions of this CLI**. + +Examples +-------- + +For example, consider the following encoded file: + +``` +$ xxd output.binpack +00000000: 1308 7665 7273 696f 6e37 02 ..version7. +``` + +Decoding this file using JSON BinPack will result in the following document: + +```json +{ + "version": 2.0 +} +``` + +### Decode a binary file + +```sh +jsonschema decode path/to/output.binpack path/to/my/output.json +``` diff --git a/docs/encode.markdown b/docs/encode.markdown new file mode 100644 index 0000000..90b9073 --- /dev/null +++ b/docs/encode.markdown @@ -0,0 +1,35 @@ +Encode +====== + +```sh +jsonschema encode +``` + +This command encodes a JSON document using [JSON +BinPack](https://jsonbinpack.sourcemeta.com) schema-less mode. **Note this +command is considered experimental and its output might not be decodable across +versions of this CLI**. + +Examples +-------- + +For example, consider the following simple document: + +```json +{ + "version": 2.0 +} +``` + +The JSON BinPack schema-less encoding will result in something like this: + +``` +$ xxd output.binpack +00000000: 1308 7665 7273 696f 6e37 02 ..version7. +``` + +### Encode a JSON document + +```sh +jsonschema encode path/to/my/document.json path/to/output.binpack +``` diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 491a457..dd00bc9 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -10,7 +10,9 @@ add_executable(jsonschema_cli command_metaschema.cc command_validate.cc command_identify.cc - command_canonicalize.cc) + command_canonicalize.cc + command_encode.cc + command_decode.cc) noa_add_default_options(PRIVATE jsonschema_cli) set_target_properties(jsonschema_cli PROPERTIES OUTPUT_NAME jsonschema) @@ -22,6 +24,7 @@ target_link_libraries(jsonschema_cli PRIVATE sourcemeta::alterschema::engine) target_link_libraries(jsonschema_cli PRIVATE sourcemeta::alterschema::linter) target_link_libraries(jsonschema_cli PRIVATE sourcemeta::hydra::httpclient) target_link_libraries(jsonschema_cli PRIVATE sourcemeta::jsonbinpack::compiler) +target_link_libraries(jsonschema_cli PRIVATE sourcemeta::jsonbinpack::runtime) configure_file(configure.h.in configure.h @ONLY) target_include_directories(jsonschema_cli PRIVATE "${CMAKE_CURRENT_BINARY_DIR}") diff --git a/src/command.h b/src/command.h index f5641bc..62074f6 100644 --- a/src/command.h +++ b/src/command.h @@ -15,6 +15,8 @@ auto validate(const std::span &arguments) -> int; auto metaschema(const std::span &arguments) -> int; auto identify(const std::span &arguments) -> int; auto canonicalize(const std::span &arguments) -> int; +auto encode(const std::span &arguments) -> int; +auto decode(const std::span &arguments) -> int; } // namespace sourcemeta::jsonschema::cli #endif diff --git a/src/command_decode.cc b/src/command_decode.cc new file mode 100644 index 0000000..f627daa --- /dev/null +++ b/src/command_decode.cc @@ -0,0 +1,55 @@ +#include +#include +#include +#include + +#include // assert +#include // EXIT_SUCCESS +#include // std::filesystem +#include // std::ifstream +#include // std::cout, std::endl + +#include "command.h" +#include "utils.h" + +auto sourcemeta::jsonschema::cli::decode( + const std::span &arguments) -> int { + const auto options{parse_options(arguments, {})}; + + if (options.at("").size() < 2) { + std::cerr + << "error: This command expects a path to a binary file and an " + "output path. For example:\n\n" + << " jsonschema decode path/to/output.binpack path/to/document.json\n"; + return EXIT_FAILURE; + } + + // TODO: Take a real schema as argument + auto schema{sourcemeta::jsontoolkit::parse(R"JSON({ + "$schema": "https://json-schema.org/draft/2020-12/schema" + })JSON")}; + + sourcemeta::jsonbinpack::compile( + schema, sourcemeta::jsontoolkit::default_schema_walker, + resolver(options, options.contains("h") || options.contains("http"))); + const auto encoding{sourcemeta::jsonbinpack::load(schema)}; + + std::ifstream input_stream{std::filesystem::canonical(options.at("").front()), + std::ios::binary}; + input_stream.exceptions(std::ifstream::failbit | std::ifstream::badbit); + assert(!input_stream.fail()); + assert(input_stream.is_open()); + sourcemeta::jsonbinpack::Decoder decoder{input_stream}; + const auto document{decoder.read(encoding)}; + + std::ofstream output_stream( + std::filesystem::weakly_canonical(options.at("").at(1)), + std::ios::binary); + output_stream.exceptions(std::ios_base::badbit); + sourcemeta::jsontoolkit::prettify( + document, output_stream, sourcemeta::jsontoolkit::schema_format_compare); + output_stream << "\n"; + output_stream.flush(); + output_stream.close(); + return EXIT_SUCCESS; +} diff --git a/src/command_encode.cc b/src/command_encode.cc new file mode 100644 index 0000000..e0554fc --- /dev/null +++ b/src/command_encode.cc @@ -0,0 +1,50 @@ +#include +#include +#include +#include + +#include // EXIT_SUCCESS +#include // std::filesystem +#include // std::ofstream +#include // std::cout, std::endl + +#include "command.h" +#include "utils.h" + +auto sourcemeta::jsonschema::cli::encode( + const std::span &arguments) -> int { + const auto options{parse_options(arguments, {})}; + + if (options.at("").size() < 2) { + std::cerr + << "error: This command expects a path to a JSON document and an " + "output path. For example:\n\n" + << " jsonschema encode path/to/document.json path/to/output.binpack\n"; + return EXIT_FAILURE; + } + + // TODO: Take a real schema as argument + auto schema{sourcemeta::jsontoolkit::parse(R"JSON({ + "$schema": "https://json-schema.org/draft/2020-12/schema" + })JSON")}; + + sourcemeta::jsonbinpack::compile( + schema, sourcemeta::jsontoolkit::default_schema_walker, + resolver(options, options.contains("h") || options.contains("http"))); + const auto encoding{sourcemeta::jsonbinpack::load(schema)}; + + const auto document{ + sourcemeta::jsontoolkit::from_file(options.at("").front())}; + + std::ofstream output_stream( + std::filesystem::weakly_canonical(options.at("").at(1)), + std::ios::binary); + output_stream.exceptions(std::ios_base::badbit); + sourcemeta::jsonbinpack::Encoder encoder{output_stream}; + encoder.write(document, encoding); + output_stream.flush(); + const auto size{output_stream.tellp()}; + output_stream.close(); + std::cerr << "size: " << size << " bytes\n"; + return EXIT_SUCCESS; +} diff --git a/src/main.cc b/src/main.cc index 928e6bf..c562dce 100644 --- a/src/main.cc +++ b/src/main.cc @@ -74,6 +74,14 @@ Global Options: Pre-process a JSON Schema into JSON BinPack's canonical form for static analysis. + encode + + Encode a JSON document or JSONL dataset using JSON BinPack. + + decode + + Decode a JSON document or JSONL dataset using JSON BinPack. + For more documentation, visit https://github.com/sourcemeta/jsonschema )EOF"}; @@ -99,6 +107,10 @@ auto jsonschema_main(const std::string &program, const std::string &command, return sourcemeta::jsonschema::cli::identify(arguments); } else if (command == "canonicalize") { return sourcemeta::jsonschema::cli::canonicalize(arguments); + } else if (command == "encode") { + return sourcemeta::jsonschema::cli::encode(arguments); + } else if (command == "decode") { + return sourcemeta::jsonschema::cli::decode(arguments); } else { std::cout << "JSON Schema CLI - v" << sourcemeta::jsonschema::cli::PROJECT_VERSION << "\n"; diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 325a7be..416a02d 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -172,6 +172,16 @@ add_jsonschema_test_unix(canonicalize/fail_no_schema) add_jsonschema_test_unix(canonicalize/fail_schema_invalid_json) add_jsonschema_test_unix(canonicalize/fail_unknown_metaschema) +# Encode +add_jsonschema_test_unix(encode/pass_schema_less) +add_jsonschema_test_unix(encode/fail_no_document) +add_jsonschema_test_unix(encode/fail_no_output) + +# Decode +add_jsonschema_test_unix(decode/pass_schema_less) +add_jsonschema_test_unix(decode/fail_no_document) +add_jsonschema_test_unix(decode/fail_no_output) + # CI specific tests add_jsonschema_test_unix_ci(pass_bundle_http) add_jsonschema_test_unix_ci(fail_bundle_http_non_200) diff --git a/test/decode/fail_no_document.sh b/test/decode/fail_no_document.sh new file mode 100755 index 0000000..c19d764 --- /dev/null +++ b/test/decode/fail_no_document.sh @@ -0,0 +1,23 @@ +#!/bin/sh + +set -o errexit +set -o nounset + +TMP="$(mktemp -d)" +clean() { rm -rf "$TMP"; } +trap clean EXIT + +cat << 'EOF' > "$TMP/document.json" +{ "version": 2.0 } +EOF + +"$1" decode 2>"$TMP/stderr.txt" && CODE="$?" || CODE="$?" +test "$CODE" = "1" || exit 1 + +cat << 'EOF' > "$TMP/expected.txt" +error: This command expects a path to a binary file and an output path. For example: + + jsonschema decode path/to/output.binpack path/to/document.json +EOF + +diff "$TMP/stderr.txt" "$TMP/expected.txt" diff --git a/test/decode/fail_no_output.sh b/test/decode/fail_no_output.sh new file mode 100755 index 0000000..970d15f --- /dev/null +++ b/test/decode/fail_no_output.sh @@ -0,0 +1,24 @@ +#!/bin/sh + +set -o errexit +set -o nounset + +TMP="$(mktemp -d)" +clean() { rm -rf "$TMP"; } +trap clean EXIT + +cat << 'EOF' > "$TMP/document.json" +{ "version": 2.0 } +EOF + +"$1" encode "$TMP/document.json" "$TMP/output.binpack" +"$1" decode "$TMP/output.binpack" 2>"$TMP/stderr.txt" && CODE="$?" || CODE="$?" +test "$CODE" = "1" || exit 1 + +cat << 'EOF' > "$TMP/expected.txt" +error: This command expects a path to a binary file and an output path. For example: + + jsonschema decode path/to/output.binpack path/to/document.json +EOF + +diff "$TMP/stderr.txt" "$TMP/expected.txt" diff --git a/test/decode/pass_schema_less.sh b/test/decode/pass_schema_less.sh new file mode 100755 index 0000000..10f2508 --- /dev/null +++ b/test/decode/pass_schema_less.sh @@ -0,0 +1,23 @@ +#!/bin/sh + +set -o errexit +set -o nounset + +TMP="$(mktemp -d)" +clean() { rm -rf "$TMP"; } +trap clean EXIT + +cat << 'EOF' > "$TMP/document.json" +{ "version": 2.0 } +EOF + +"$1" encode "$TMP/document.json" "$TMP/output.binpack" +"$1" decode "$TMP/output.binpack" "$TMP/decode.json" + +cat << 'EOF' > "$TMP/expected.json" +{ + "version": 2.0 +} +EOF + +diff "$TMP/decode.json" "$TMP/expected.json" diff --git a/test/encode/fail_no_document.sh b/test/encode/fail_no_document.sh new file mode 100755 index 0000000..a64b2ed --- /dev/null +++ b/test/encode/fail_no_document.sh @@ -0,0 +1,23 @@ +#!/bin/sh + +set -o errexit +set -o nounset + +TMP="$(mktemp -d)" +clean() { rm -rf "$TMP"; } +trap clean EXIT + +cat << 'EOF' > "$TMP/document.json" +{ "version": 2.0 } +EOF + +"$1" encode 2> "$TMP/stderr.txt" && CODE="$?" || CODE="$?" +test "$CODE" = "1" || exit 1 + +cat << 'EOF' > "$TMP/expected.txt" +error: This command expects a path to a JSON document and an output path. For example: + + jsonschema encode path/to/document.json path/to/output.binpack +EOF + +diff "$TMP/stderr.txt" "$TMP/expected.txt" diff --git a/test/encode/fail_no_output.sh b/test/encode/fail_no_output.sh new file mode 100755 index 0000000..6fb72d6 --- /dev/null +++ b/test/encode/fail_no_output.sh @@ -0,0 +1,23 @@ +#!/bin/sh + +set -o errexit +set -o nounset + +TMP="$(mktemp -d)" +clean() { rm -rf "$TMP"; } +trap clean EXIT + +cat << 'EOF' > "$TMP/document.json" +{ "version": 2.0 } +EOF + +"$1" encode "$TMP/document.json" 2>"$TMP/stderr.txt" && CODE="$?" || CODE="$?" +test "$CODE" = "1" || exit 1 + +cat << 'EOF' > "$TMP/expected.txt" +error: This command expects a path to a JSON document and an output path. For example: + + jsonschema encode path/to/document.json path/to/output.binpack +EOF + +diff "$TMP/stderr.txt" "$TMP/expected.txt" diff --git a/test/encode/pass_schema_less.sh b/test/encode/pass_schema_less.sh new file mode 100755 index 0000000..61116e8 --- /dev/null +++ b/test/encode/pass_schema_less.sh @@ -0,0 +1,26 @@ +#!/bin/sh + +set -o errexit +set -o nounset + +TMP="$(mktemp -d)" +clean() { rm -rf "$TMP"; } +trap clean EXIT + +cat << 'EOF' > "$TMP/document.json" +{ "version": 2.0 } +EOF + +"$1" encode "$TMP/document.json" "$TMP/output.binpack" > "$TMP/output.txt" 2>&1 +xxd "$TMP/output.binpack" > "$TMP/output.hex" + +cat << 'EOF' > "$TMP/expected.txt" +00000000: 1308 7665 7273 696f 6e37 02 ..version7. +EOF + +cat << 'EOF' > "$TMP/expected-output.txt" +size: 11 bytes +EOF + +diff "$TMP/expected.txt" "$TMP/output.hex" +diff "$TMP/output.txt" "$TMP/expected-output.txt"