From 353024540ba60ef5d1e11050b8ae5809dd34a40e Mon Sep 17 00:00:00 2001 From: Daniel Parker Date: Fri, 15 Nov 2024 12:03:36 -0500 Subject: [PATCH 1/6] skip_space issue --- include/jsoncons/json_parser.hpp | 20 +++++++++++++++----- include/jsoncons/json_reader.hpp | 2 +- 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/include/jsoncons/json_parser.hpp b/include/jsoncons/json_parser.hpp index 7b70de4ba..c7dfc24d7 100644 --- a/include/jsoncons/json_parser.hpp +++ b/include/jsoncons/json_parser.hpp @@ -297,6 +297,7 @@ class basic_json_parser : public ser_context, public virtual basic_parser_input< void skip_space(std::error_code& ec) { + bool got_cr = false; const char_type* local_input_end = input_end_; while (true) { @@ -316,17 +317,25 @@ class basic_json_parser : public ser_context, public virtual basic_parser_input< ++position_; break; case '\r': - push_state(state_); ++input_ptr_; + ++line_; ++position_; - state_ = json_parse_state::cr; - return; + mark_position_ = position_; + got_cr = true; + break; case '\n': ++input_ptr_; - ++line_; + if (got_cr) + { + got_cr = false; + } + else + { + ++line_; + } ++position_; mark_position_ = position_; - return; + break; default: return; } @@ -335,6 +344,7 @@ class basic_json_parser : public ser_context, public virtual basic_parser_input< void skip_whitespace(std::error_code& ec) { + bool got_cr = false; const char_type* local_input_end = input_end_; while (true) diff --git a/include/jsoncons/json_reader.hpp b/include/jsoncons/json_reader.hpp index 868e5eab7..1e947880e 100644 --- a/include/jsoncons/json_reader.hpp +++ b/include/jsoncons/json_reader.hpp @@ -321,7 +321,7 @@ namespace jsoncons { while (!source_.eof()) { - parser_.skip_whitespace(ec); + parser_.skip_space(ec); if (parser_.source_exhausted()) { auto s1 = source_.read_buffer(ec); From 9fb6faca7fa05ec23a731ed75acdd0e20fd02d5c Mon Sep 17 00:00:00 2001 From: Daniel Parker Date: Fri, 15 Nov 2024 12:21:25 -0500 Subject: [PATCH 2/6] Improve skip_space --- include/jsoncons/json_parser.hpp | 62 ++++---------------------------- 1 file changed, 6 insertions(+), 56 deletions(-) diff --git a/include/jsoncons/json_parser.hpp b/include/jsoncons/json_parser.hpp index c7dfc24d7..5a37844af 100644 --- a/include/jsoncons/json_parser.hpp +++ b/include/jsoncons/json_parser.hpp @@ -297,7 +297,7 @@ class basic_json_parser : public ser_context, public virtual basic_parser_input< void skip_space(std::error_code& ec) { - bool got_cr = false; + bool prev_char_is_cr = false; const char_type* local_input_end = input_end_; while (true) { @@ -313,24 +313,25 @@ class basic_json_parser : public ser_context, public virtual basic_parser_input< { case ' ': case '\t': + prev_char_is_cr = false; ++input_ptr_; ++position_; break; case '\r': + prev_char_is_cr = true; ++input_ptr_; ++line_; ++position_; mark_position_ = position_; - got_cr = true; break; case '\n': ++input_ptr_; - if (got_cr) + if (prev_char_is_cr) { - got_cr = false; + prev_char_is_cr = false; } else - { + { ++line_; } ++position_; @@ -342,57 +343,6 @@ class basic_json_parser : public ser_context, public virtual basic_parser_input< } } - void skip_whitespace(std::error_code& ec) - { - bool got_cr = false; - const char_type* local_input_end = input_end_; - - while (true) - { - if (input_ptr_ == local_input_end) - { - if (!chunk_rdr_->read_chunk(*this, ec)) - { - break; - } - local_input_end = input_end_; - } - switch (state_) - { - case json_parse_state::cr: - ++line_; - ++position_; - mark_position_ = position_; - switch (*input_ptr_) - { - case '\n': - ++input_ptr_; - ++position_; - state_ = pop_state(); - break; - default: - state_ = pop_state(); - break; - } - break; - - default: - switch (*input_ptr_) - { - case ' ': - case '\t': - case '\n': - case '\r': - skip_space(ec); - break; - default: - return; - } - break; - } - } - } - void begin_object(basic_json_visitor& visitor, std::error_code& ec) { if (JSONCONS_UNLIKELY(++nesting_depth_ > options_.max_nesting_depth())) From efb3b00eec086958dca88f43dea198935ba0fcff Mon Sep 17 00:00:00 2001 From: Daniel Parker Date: Fri, 15 Nov 2024 14:06:31 -0500 Subject: [PATCH 3/6] Improve skip_space --- include/jsoncons/json_parser.hpp | 3 +-- test/corelib/src/json_reader_tests.cpp | 12 +++++++----- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/include/jsoncons/json_parser.hpp b/include/jsoncons/json_parser.hpp index 5a37844af..4acabfd53 100644 --- a/include/jsoncons/json_parser.hpp +++ b/include/jsoncons/json_parser.hpp @@ -500,8 +500,7 @@ class basic_json_parser : public ser_context, public virtual basic_parser_input< { for (; input_ptr_ != input_end_; ++input_ptr_) { - char_type curr_char_ = *input_ptr_; - switch (curr_char_) + switch (*input_ptr_) { case '\n': case '\r': diff --git a/test/corelib/src/json_reader_tests.cpp b/test/corelib/src/json_reader_tests.cpp index 8da23c2ac..3d8d5b846 100644 --- a/test/corelib/src/json_reader_tests.cpp +++ b/test/corelib/src/json_reader_tests.cpp @@ -340,16 +340,18 @@ TEST_CASE("json_reader json lines") json_decoder decoder; json_stream_reader reader(is, decoder); - CHECK(!reader.eof()); + REQUIRE(!reader.eof()); reader.read_next(); - CHECK(!reader.eof()); + CHECK(decoder.get_result() == json::parse(R"(["Name", "Session", "Score", "Completed"])")); + REQUIRE(!reader.eof()); reader.read_next(); - CHECK(!reader.eof()); + REQUIRE(!reader.eof()); reader.read_next(); - CHECK(!reader.eof()); + REQUIRE(!reader.eof()); reader.read_next(); - CHECK(!reader.eof()); + REQUIRE(!reader.eof()); reader.read_next(); + CHECK(decoder.get_result() == json::parse(R"(["Deloise", "2012A", 19, true])")); CHECK(!reader.eof()); reader.read_next(); CHECK(reader.eof()); From 91f385ec21502d2b4ea9a75c280bedc1656e0557 Mon Sep 17 00:00:00 2001 From: Daniel Parker Date: Fri, 15 Nov 2024 14:42:15 -0500 Subject: [PATCH 4/6] Improve check_done --- include/jsoncons/json_parser.hpp | 12 ++++- include/jsoncons/json_reader.hpp | 45 +------------------ test/CMakeLists.txt | 2 +- ..._tests.cpp => json_parser_error_tests.cpp} | 1 - test/corelib/src/json_reader_tests.cpp | 2 - 5 files changed, 14 insertions(+), 48 deletions(-) rename test/corelib/src/{json_parse_error_tests.cpp => json_parser_error_tests.cpp} (99%) diff --git a/include/jsoncons/json_parser.hpp b/include/jsoncons/json_parser.hpp index 4acabfd53..d9a4574ff 100644 --- a/include/jsoncons/json_parser.hpp +++ b/include/jsoncons/json_parser.hpp @@ -498,8 +498,17 @@ class basic_json_parser : public ser_context, public virtual basic_parser_input< void check_done(std::error_code& ec) { - for (; input_ptr_ != input_end_; ++input_ptr_) + const char_type* local_input_end = input_end_; + while (true) { + if (input_ptr_ == local_input_end) + { + if (!chunk_rdr_->read_chunk(*this, ec)) + { + break; + } + local_input_end = input_end_; + } switch (*input_ptr_) { case '\n': @@ -516,6 +525,7 @@ class basic_json_parser : public ser_context, public virtual basic_parser_input< } break; } + ++input_ptr_; } } diff --git a/include/jsoncons/json_reader.hpp b/include/jsoncons/json_reader.hpp index 1e947880e..2bb60d192 100644 --- a/include/jsoncons/json_reader.hpp +++ b/include/jsoncons/json_reader.hpp @@ -319,23 +319,7 @@ namespace jsoncons { return; } - while (!source_.eof()) - { - parser_.skip_space(ec); - if (parser_.source_exhausted()) - { - auto s1 = source_.read_buffer(ec); - if (ec) return; - if (s1.size() > 0) - { - parser_.set_buffer(s1.data(),s1.size()); - } - } - else - { - break; - } - } + parser_.skip_space(ec); } void check_done() @@ -365,32 +349,7 @@ namespace jsoncons { ec = json_errc::source_error; return; } - if (source_.eof()) - { - parser_.check_done(ec); - if (ec) return; - } - else - { - do - { - if (parser_.source_exhausted()) - { - auto s = source_.read_buffer(ec); - if (ec) return; - if (s.size() > 0) - { - parser_.set_buffer(s.data(),s.size()); - } - } - if (!parser_.source_exhausted()) - { - parser_.check_done(ec); - if (ec) return; - } - } - while (!eof()); - } + parser_.check_done(ec); } bool eof() const diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index c29297c20..de2760485 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -154,7 +154,7 @@ add_executable(unit_tests corelib/src/json_object_tests.cpp corelib/src/ojson_object_tests.cpp corelib/src/json_options_tests.cpp - corelib/src/json_parse_error_tests.cpp + corelib/src/json_parser_error_tests.cpp corelib/src/json_parser_position_tests.cpp corelib/src/json_parser_tests.cpp corelib/src/json_push_back_tests.cpp diff --git a/test/corelib/src/json_parse_error_tests.cpp b/test/corelib/src/json_parser_error_tests.cpp similarity index 99% rename from test/corelib/src/json_parse_error_tests.cpp rename to test/corelib/src/json_parser_error_tests.cpp index c0b53d0db..e02f01df0 100644 --- a/test/corelib/src/json_parse_error_tests.cpp +++ b/test/corelib/src/json_parser_error_tests.cpp @@ -233,4 +233,3 @@ TEST_CASE("test_positive_integer_overflow") CHECK(j2.is()); CHECK(s2 == j2.as()); } - diff --git a/test/corelib/src/json_reader_tests.cpp b/test/corelib/src/json_reader_tests.cpp index 3d8d5b846..3d5d2cabf 100644 --- a/test/corelib/src/json_reader_tests.cpp +++ b/test/corelib/src/json_reader_tests.cpp @@ -352,8 +352,6 @@ TEST_CASE("json_reader json lines") REQUIRE(!reader.eof()); reader.read_next(); CHECK(decoder.get_result() == json::parse(R"(["Deloise", "2012A", 19, true])")); - CHECK(!reader.eof()); - reader.read_next(); CHECK(reader.eof()); } } From 226110f90e54caa344951d79d040fac1aa49d51c Mon Sep 17 00:00:00 2001 From: Daniel Parker Date: Fri, 15 Nov 2024 14:59:50 -0500 Subject: [PATCH 5/6] Simplify lf handling --- include/jsoncons/json_parser.hpp | 159 +++---------------------------- 1 file changed, 11 insertions(+), 148 deletions(-) diff --git a/include/jsoncons/json_parser.hpp b/include/jsoncons/json_parser.hpp index d9a4574ff..b38bd4c5c 100644 --- a/include/jsoncons/json_parser.hpp +++ b/include/jsoncons/json_parser.hpp @@ -677,18 +677,7 @@ class basic_json_parser : public ser_context, public virtual basic_parser_input< return; } break; - case '\r': - push_state(state_); - ++input_ptr_; - ++position_; - state_ = json_parse_state::cr; - break; - case '\n': - ++input_ptr_; - ++line_; - ++position_; - mark_position_ = position_; - break; + case '\r': case '\n': case ' ':case '\t': skip_space(ec); break; @@ -786,18 +775,7 @@ class basic_json_parser : public ser_context, public virtual basic_parser_input< ++input_ptr_; ++position_; break; - case '\r': - ++input_ptr_; - ++position_; - push_state(state_); - state_ = json_parse_state::cr; - break; - case '\n': - ++input_ptr_; - ++line_; - ++position_; - mark_position_ = position_; - break; + case '\r': case '\n': case ' ':case '\t': skip_space(ec); break; @@ -866,18 +844,7 @@ class basic_json_parser : public ser_context, public virtual basic_parser_input< ++input_ptr_; ++position_; break; - case '\r': - ++input_ptr_; - ++position_; - push_state(state_); - state_ = json_parse_state::cr; - break; - case '\n': - ++input_ptr_; - ++line_; - ++position_; - mark_position_ = position_; - break; + case '\r': case '\n': case ' ':case '\t': skip_space(ec); break; @@ -940,18 +907,7 @@ class basic_json_parser : public ser_context, public virtual basic_parser_input< ++input_ptr_; ++position_; break; - case '\r': - ++input_ptr_; - ++position_; - push_state(state_); - state_ = json_parse_state::cr; - break; - case '\n': - ++input_ptr_; - ++line_; - ++position_; - mark_position_ = position_; - break; + case '\r': case '\n': case ' ':case '\t': skip_space(ec); break; @@ -1020,18 +976,7 @@ class basic_json_parser : public ser_context, public virtual basic_parser_input< ++input_ptr_; ++position_; break; - case '\r': - push_state(state_); - state_ = json_parse_state::cr; - ++input_ptr_; - ++position_; - break; - case '\n': - ++input_ptr_; - ++line_; - ++position_; - mark_position_ = position_; - break; + case '\r': case '\n': case ' ':case '\t': skip_space(ec); break; @@ -1074,18 +1019,7 @@ class basic_json_parser : public ser_context, public virtual basic_parser_input< ++input_ptr_; ++position_; break; - case '\r': - push_state(state_); - ++input_ptr_; - ++position_; - state_ = json_parse_state::cr; - break; - case '\n': - ++input_ptr_; - ++line_; - ++position_; - mark_position_ = position_; - break; + case '\r': case '\n': case ' ':case '\t': skip_space(ec); break; @@ -1212,18 +1146,7 @@ class basic_json_parser : public ser_context, public virtual basic_parser_input< ++input_ptr_; ++position_; break; - case '\r': - ++input_ptr_; - ++position_; - push_state(state_); - state_ = json_parse_state::cr; - break; - case '\n': - ++input_ptr_; - ++line_; - ++position_; - mark_position_ = position_; - break; + case '\r': case '\n': case ' ':case '\t': skip_space(ec); break; @@ -1695,22 +1618,7 @@ class basic_json_parser : public ser_context, public virtual basic_parser_input< } switch (*input_ptr_) { - case '\r': - end_integer_value(visitor, ec); - if (ec) return; - ++input_ptr_; - ++position_; - push_state(state_); - state_ = json_parse_state::cr; - return; - case '\n': - end_integer_value(visitor, ec); - if (ec) return; - ++input_ptr_; - ++line_; - ++position_; - mark_position_ = position_; - return; + case '\r':case '\n': case ' ':case '\t': end_integer_value(visitor, ec); if (ec) return; @@ -1775,22 +1683,7 @@ class basic_json_parser : public ser_context, public virtual basic_parser_input< } switch (*input_ptr_) { - case '\r': - end_integer_value(visitor, ec); - if (ec) return; - push_state(state_); - ++input_ptr_; - ++position_; - state_ = json_parse_state::cr; - return; - case '\n': - end_integer_value(visitor, ec); - if (ec) return; - ++input_ptr_; - ++line_; - ++position_; - mark_position_ = position_; - return; + case '\r':case '\n': case ' ':case '\t': end_integer_value(visitor, ec); if (ec) return; @@ -1877,22 +1770,7 @@ class basic_json_parser : public ser_context, public virtual basic_parser_input< } switch (*input_ptr_) { - case '\r': - end_fraction_value(visitor, ec); - if (ec) return; - push_state(state_); - ++input_ptr_; - ++position_; - state_ = json_parse_state::cr; - return; - case '\n': - end_fraction_value(visitor, ec); - if (ec) return; - ++input_ptr_; - ++line_; - ++position_; - mark_position_ = position_; - return; + case '\r':case '\n': case ' ':case '\t': end_fraction_value(visitor, ec); if (ec) return; @@ -2010,22 +1888,7 @@ class basic_json_parser : public ser_context, public virtual basic_parser_input< } switch (*input_ptr_) { - case '\r': - end_fraction_value(visitor, ec); - if (ec) return; - ++input_ptr_; - ++position_; - push_state(state_); - state_ = json_parse_state::cr; - return; - case '\n': - end_fraction_value(visitor, ec); - if (ec) return; - ++input_ptr_; - ++line_; - ++position_; - mark_position_ = position_; - return; + case '\r':case '\n': case ' ':case '\t': end_fraction_value(visitor, ec); if (ec) return; From 26d6cf702281acf8ee9ee76c59eb0511dde884ec Mon Sep 17 00:00:00 2001 From: Daniel Parker Date: Fri, 15 Nov 2024 15:08:37 -0500 Subject: [PATCH 6/6] Remove json_parser cr state --- include/jsoncons/json_parser.hpp | 32 ++------------------------------ 1 file changed, 2 insertions(+), 30 deletions(-) diff --git a/include/jsoncons/json_parser.hpp b/include/jsoncons/json_parser.hpp index b38bd4c5c..b439c5203 100644 --- a/include/jsoncons/json_parser.hpp +++ b/include/jsoncons/json_parser.hpp @@ -48,7 +48,6 @@ enum class json_parse_state : uint8_t expect_value, array, member_name, - cr, done }; @@ -620,9 +619,6 @@ class basic_json_parser : public ser_context, public virtual basic_parser_input< case json_parse_state::done: more_ = false; break; - case json_parse_state::cr: - state_ = pop_state(); - break; default: err_handler_(json_errc::unexpected_eof, *this); ec = json_errc::unexpected_eof; @@ -650,21 +646,6 @@ class basic_json_parser : public ser_context, public virtual basic_parser_input< state_ = json_parse_state::done; more_ = false; break; - case json_parse_state::cr: - ++line_; - mark_position_ = position_; - switch (*input_ptr_) - { - case '\n': - ++input_ptr_; - ++position_; - state_ = pop_state(); - break; - default: - state_ = pop_state(); - break; - } - break; case json_parse_state::start: { switch (*input_ptr_) @@ -1280,17 +1261,8 @@ class basic_json_parser : public ser_context, public virtual basic_parser_input< { switch (*input_ptr_) { - case '\r': - push_state(state_); - ++input_ptr_; - ++position_; - state_ = json_parse_state::cr; - break; - case '\n': - ++input_ptr_; - ++line_; - ++position_; - mark_position_ = position_; + case '\r': case '\n': + skip_space(ec); break; case '*': ++input_ptr_;