From 74a7da83a5f3bde33ecd550232bce8eaa5693c30 Mon Sep 17 00:00:00 2001 From: Christian Berger Date: Thu, 7 Feb 2019 22:44:36 +0100 Subject: [PATCH] * Updated livefeed Signed-off-by: Christian Berger --- Dockerfile.aarch64 | 4 +- Dockerfile.amd64 | 4 +- Dockerfile.armhf | 4 +- README.md | 8 +- ...0.0.104.hpp => cluon-complete-v0.0.121.hpp | 4934 ++++++++++------- 5 files changed, 2813 insertions(+), 2141 deletions(-) rename cluon-complete-v0.0.104.hpp => cluon-complete-v0.0.121.hpp (82%) diff --git a/Dockerfile.aarch64 b/Dockerfile.aarch64 index 625e093..3cca1a6 100644 --- a/Dockerfile.aarch64 +++ b/Dockerfile.aarch64 @@ -1,4 +1,4 @@ -# Copyright (C) 2018 Christian Berger +# Copyright (C) 2019 Christian Berger # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -28,7 +28,7 @@ RUN apk update && \ g++ ADD . /opt/sources WORKDIR /opt/sources -RUN ln -sf cluon-complete-v0.0.104.hpp cluon-complete.cpp && \ +RUN ln -sf cluon-complete-v0.0.121.hpp cluon-complete.cpp && \ g++ -std=c++14 -Wall -D HAVE_CLUON_LIVEFEED -pthread -s -static -static-libgcc -static-libstdc++ -o /tmp/cluon-livefeed cluon-complete.cpp RUN [ "cross-build-end" ] diff --git a/Dockerfile.amd64 b/Dockerfile.amd64 index ac00e0e..627da13 100644 --- a/Dockerfile.amd64 +++ b/Dockerfile.amd64 @@ -1,4 +1,4 @@ -# Copyright (C) 2018 Christian Berger +# Copyright (C) 2019 Christian Berger # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -22,7 +22,7 @@ RUN apk update && \ g++ ADD . /opt/sources WORKDIR /opt/sources -RUN ln -sf cluon-complete-v0.0.104.hpp cluon-complete.cpp && \ +RUN ln -sf cluon-complete-v0.0.121.hpp cluon-complete.cpp && \ g++ -std=c++14 -Wall -D HAVE_CLUON_LIVEFEED -pthread -s -static -static-libgcc -static-libstdc++ -o /tmp/cluon-livefeed cluon-complete.cpp diff --git a/Dockerfile.armhf b/Dockerfile.armhf index 4a0d177..6fefcd9 100644 --- a/Dockerfile.armhf +++ b/Dockerfile.armhf @@ -1,4 +1,4 @@ -# Copyright (C) 2018 Christian Berger +# Copyright (C) 2019 Christian Berger # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -28,7 +28,7 @@ RUN apk update && \ g++ ADD . /opt/sources WORKDIR /opt/sources -RUN ln -sf cluon-complete-v0.0.104.hpp cluon-complete.cpp && \ +RUN ln -sf cluon-complete-v0.0.121.hpp cluon-complete.cpp && \ g++ -std=c++14 -Wall -D HAVE_CLUON_LIVEFEED -pthread -s -static -static-libgcc -static-libstdc++ -o /tmp/cluon-livefeed cluon-complete.cpp RUN [ "cross-build-end" ] diff --git a/README.md b/README.md index 7abde7b..7691a8e 100644 --- a/README.md +++ b/README.md @@ -32,19 +32,15 @@ This microservice is created automatically on changes to this repository via Doc This microservice is supposed to be used in parallel with a running [OD4Sessions](https://github.com/chalmers-revere/opendlv) with other microservices that exchange messages in [`Envelope`](https://github.com/chrberger/libcluon/blob/master/libcluon/resources/cluonDataStructures.odvd#L23-L30) data format. The purpose of this microservice to display the type and timestamps of the currently exchanged messages on console. It can be used as shown in the following: ``` -docker run --rm -ti --init --net=host chrberger/cluon-livefeed-multi:v0.0.104 --cid=111 +docker run --rm -ti --init --net=host chrberger/cluon-livefeed-multi:v0.0.121 --cid=111 ``` Additionally, you can supply a message specification in `.odvd`-file like, for example, the [OpenDLV Standard Message Set](https://github.com/chalmers-revere/opendlv.standard-message-set/blob/master/opendlv.odvd) to dynamically resolve the data types of the exchanged messages. In the following, it is assumed that you have the `.odvd`-file named `example.odvd` residing in the current working directory: ``` -docker run --rm -ti --init --net=host -v $PWD:/opt chrberger/cluon-livefeed-multi:v0.0.104 --cid=111 --odvd=/opt/example.odvd +docker run --rm -ti --init --net=host -v $PWD:/opt chrberger/cluon-livefeed-multi:v0.0.121 --cid=111 --odvd=/opt/example.odvd ``` -You can watch the usage of this microservice here: - -[![asciicast](https://asciinema.org/a/zT1Mr5aKUGx3k43ax8a9eapBb.png)](https://asciinema.org/a/zT1Mr5aKUGx3k43ax8a9eapBb?autoplay=1) - ## License * This project is released under the terms of the GNU GPLv3 License diff --git a/cluon-complete-v0.0.104.hpp b/cluon-complete-v0.0.121.hpp similarity index 82% rename from cluon-complete-v0.0.104.hpp rename to cluon-complete-v0.0.121.hpp index 0e55820..0d1ead7 100644 --- a/cluon-complete-v0.0.104.hpp +++ b/cluon-complete-v0.0.121.hpp @@ -1,6 +1,6 @@ // This is an auto-generated header-only single-file distribution of libcluon. -// Date: Mon, 16 Jul 2018 21:38:22 +0200 -// Version: 0.0.104 +// Date: Thu, 07 Feb 2019 22:42:31 +0100 +// Version: 0.0.120 // // // Implementation of N4562 std::experimental::any (merged into C++17) for C++11 compilers. @@ -506,8 +506,8 @@ namespace std namespace peg { -#if __clang__ == 1 && __clang_major__ == 5 && __clang_minor__ == 0 && __clang_patchlevel__ == 0 -static void* enabler = nullptr; // workaround for Clang 5.0.0 +#if __clang__ == 1 && __clang_major__ <= 5 +static void* enabler = nullptr; // workaround for Clang version <= 5.0.0 #else extern void* enabler; #endif @@ -670,6 +670,213 @@ auto make_scope_exit(EF&& exit_function) -> scope_exit { return scope_exit::type>(std::forward(exit_function)); } +/*----------------------------------------------------------------------------- + * UTF8 functions + *---------------------------------------------------------------------------*/ + +inline size_t codepoint_length(const char *s8, size_t l) { + if (l) { + auto b = static_cast(s8[0]); + if ((b & 0x80) == 0) { + return 1; + } else if ((b & 0xE0) == 0xC0) { + return 2; + } else if ((b & 0xF0) == 0xE0) { + return 3; + } else if ((b & 0xF8) == 0xF0) { + return 4; + } + } + return 0; +} + +inline size_t encode_codepoint(char32_t cp, char *buff) { + if (cp < 0x0080) { + buff[0] = static_cast(cp & 0x7F); + return 1; + } else if (cp < 0x0800) { + buff[0] = static_cast(0xC0 | ((cp >> 6) & 0x1F)); + buff[1] = static_cast(0x80 | (cp & 0x3F)); + return 2; + } else if (cp < 0xD800) { + buff[0] = static_cast(0xE0 | ((cp >> 12) & 0xF)); + buff[1] = static_cast(0x80 | ((cp >> 6) & 0x3F)); + buff[2] = static_cast(0x80 | (cp & 0x3F)); + return 3; + } else if (cp < 0xE000) { + // D800 - DFFF is invalid... + return 0; + } else if (cp < 0x10000) { + buff[0] = static_cast(0xE0 | ((cp >> 12) & 0xF)); + buff[1] = static_cast(0x80 | ((cp >> 6) & 0x3F)); + buff[2] = static_cast(0x80 | (cp & 0x3F)); + return 3; + } else if (cp < 0x110000) { + buff[0] = static_cast(0xF0 | ((cp >> 18) & 0x7)); + buff[1] = static_cast(0x80 | ((cp >> 12) & 0x3F)); + buff[2] = static_cast(0x80 | ((cp >> 6) & 0x3F)); + buff[3] = static_cast(0x80 | (cp & 0x3F)); + return 4; + } + return 0; +} + +inline std::string encode_codepoint(char32_t cp) { + char buff[4]; + auto l = encode_codepoint(cp, buff); + return std::string(buff, l); +} + +inline bool decode_codepoint(const char *s8, size_t l, size_t &bytes, + char32_t &cp) { + if (l) { + auto b = static_cast(s8[0]); + if ((b & 0x80) == 0) { + bytes = 1; + cp = b; + return true; + } else if ((b & 0xE0) == 0xC0) { + if (l >= 2) { + bytes = 2; + cp = ((static_cast(s8[0] & 0x1F)) << 6) | + (static_cast(s8[1] & 0x3F)); + return true; + } + } else if ((b & 0xF0) == 0xE0) { + if (l >= 3) { + bytes = 3; + cp = ((static_cast(s8[0] & 0x0F)) << 12) | + ((static_cast(s8[1] & 0x3F)) << 6) | + (static_cast(s8[2] & 0x3F)); + return true; + } + } else if ((b & 0xF8) == 0xF0) { + if (l >= 4) { + bytes = 4; + cp = ((static_cast(s8[0] & 0x07)) << 18) | + ((static_cast(s8[1] & 0x3F)) << 12) | + ((static_cast(s8[2] & 0x3F)) << 6) | + (static_cast(s8[3] & 0x3F)); + return true; + } + } + } + return false; +} + +inline size_t decode_codepoint(const char *s8, size_t l, char32_t &out) { + size_t bytes; + if (decode_codepoint(s8, l, bytes, out)) { + return bytes; + } + return 0; +} + +inline char32_t decode_codepoint(const char *s8, size_t l) { + char32_t out = 0; + decode_codepoint(s8, l, out); + return out; +} + +inline std::u32string decode(const char *s8, size_t l) { + std::u32string out; + size_t i = 0; + while (i < l) { + auto beg = i++; + while (i < l && (s8[i] & 0xc0) == 0x80) { + i++; + } + out += decode_codepoint(&s8[beg], (i - beg)); + } + return out; +} + +/*----------------------------------------------------------------------------- + * resolve_escape_sequence + *---------------------------------------------------------------------------*/ + +inline bool is_hex(char c, int& v) { + if ('0' <= c && c <= '9') { + v = c - '0'; + return true; + } else if ('a' <= c && c <= 'f') { + v = c - 'a' + 10; + return true; + } else if ('A' <= c && c <= 'F') { + v = c - 'A' + 10; + return true; + } + return false; +} + +inline bool is_digit(char c, int& v) { + if ('0' <= c && c <= '9') { + v = c - '0'; + return true; + } + return false; +} + +inline std::pair parse_hex_number(const char* s, size_t n, size_t i) { + int ret = 0; + int val; + while (i < n && is_hex(s[i], val)) { + ret = static_cast(ret * 16 + val); + i++; + } + return std::make_pair(ret, i); +} + +inline std::pair parse_octal_number(const char* s, size_t n, size_t i) { + int ret = 0; + int val; + while (i < n && is_digit(s[i], val)) { + ret = static_cast(ret * 8 + val); + i++; + } + return std::make_pair(ret, i); +} + +inline std::string resolve_escape_sequence(const char* s, size_t n) { + std::string r; + r.reserve(n); + + size_t i = 0; + while (i < n) { + auto ch = s[i]; + if (ch == '\\') { + i++; + switch (s[i]) { + case 'n': r += '\n'; i++; break; + case 'r': r += '\r'; i++; break; + case 't': r += '\t'; i++; break; + case '\'': r += '\''; i++; break; + case '"': r += '"'; i++; break; + case '[': r += '['; i++; break; + case ']': r += ']'; i++; break; + case '\\': r += '\\'; i++; break; + case 'x': + case 'u': { + char32_t cp; + std::tie(cp, i) = parse_hex_number(s, n, i + 1); + r += encode_codepoint(cp); + break; + } + default: { + char32_t cp; + std::tie(cp, i) = parse_octal_number(s, n, i); + r += encode_codepoint(cp); + break; + } + } + } else { + r += ch; + i++; + } + } + return r; +} + /*----------------------------------------------------------------------------- * PEG *---------------------------------------------------------------------------*/ @@ -717,6 +924,9 @@ struct SemanticValues : protected std::vector return peg::line_info(ss, s_); } + // Choice count + size_t choice_count() const { return choice_count_; } + // Choice number (0 based index) size_t choice() const { return choice_; } @@ -738,7 +948,7 @@ struct SemanticValues : protected std::vector return this->transform(beg, end, [](const any& v) { return v.get(); }); } - SemanticValues() : s_(nullptr), n_(0), choice_(0) {} + SemanticValues() : s_(nullptr), n_(0), choice_count_(0), choice_(0) {} using std::vector::iterator; using std::vector::const_iterator; @@ -765,11 +975,13 @@ struct SemanticValues : protected std::vector private: friend class Context; + friend class Sequence; friend class PrioritizedChoice; friend class Holder; const char* s_; size_t n_; + size_t choice_count_; size_t choice_; template @@ -790,6 +1002,17 @@ struct SemanticValues : protected std::vector } return r; } + + void reset() { + path = nullptr; + ss = nullptr; + tokens.clear(); + + s_ = nullptr; + n_ = 0; + choice_count_ = 0; + choice_ = 0; + } }; /* @@ -857,61 +1080,111 @@ class Action return bool(fn_); } - any operator()(const SemanticValues& sv, any& dt) const { + any operator()(SemanticValues& sv, any& dt) const { return fn_(sv, dt); } private: template - struct TypeAdaptor { - TypeAdaptor(std::function fn) + struct TypeAdaptor_sv { + TypeAdaptor_sv(std::function fn) + : fn_(fn) {} + any operator()(SemanticValues& sv, any& /*dt*/) { + return call(fn_, sv); + } + std::function fn_; + }; + + template + struct TypeAdaptor_csv { + TypeAdaptor_csv(std::function fn) : fn_(fn) {} - any operator()(const SemanticValues& sv, any& /*dt*/) { + any operator()(SemanticValues& sv, any& /*dt*/) { return call(fn_, sv); } std::function fn_; }; template - struct TypeAdaptor_c { - TypeAdaptor_c(std::function fn) + struct TypeAdaptor_sv_dt { + TypeAdaptor_sv_dt(std::function fn) : fn_(fn) {} - any operator()(const SemanticValues& sv, any& dt) { + any operator()(SemanticValues& sv, any& dt) { + return call(fn_, sv, dt); + } + std::function fn_; + }; + + template + struct TypeAdaptor_csv_dt { + TypeAdaptor_csv_dt(std::function fn) + : fn_(fn) {} + any operator()(SemanticValues& sv, any& dt) { return call(fn_, sv, dt); } std::function fn_; }; - typedef std::function Fty; + typedef std::function Fty; + + template + Fty make_adaptor(F fn, R (F::* /*mf*/)(SemanticValues& sv) const) { + return TypeAdaptor_sv(fn); + } template Fty make_adaptor(F fn, R (F::* /*mf*/)(const SemanticValues& sv) const) { - return TypeAdaptor(fn); + return TypeAdaptor_csv(fn); + } + + template + Fty make_adaptor(F fn, R (F::* /*mf*/)(SemanticValues& sv)) { + return TypeAdaptor_sv(fn); } template Fty make_adaptor(F fn, R (F::* /*mf*/)(const SemanticValues& sv)) { - return TypeAdaptor(fn); + return TypeAdaptor_csv(fn); + } + + template + Fty make_adaptor(F fn, R (* /*mf*/)(SemanticValues& sv)) { + return TypeAdaptor_sv(fn); } template Fty make_adaptor(F fn, R (* /*mf*/)(const SemanticValues& sv)) { - return TypeAdaptor(fn); + return TypeAdaptor_csv(fn); + } + + template + Fty make_adaptor(F fn, R (F::* /*mf*/)(SemanticValues& sv, any& dt) const) { + return TypeAdaptor_sv_dt(fn); } template Fty make_adaptor(F fn, R (F::* /*mf*/)(const SemanticValues& sv, any& dt) const) { - return TypeAdaptor_c(fn); + return TypeAdaptor_csv_dt(fn); + } + + template + Fty make_adaptor(F fn, R (F::* /*mf*/)(SemanticValues& sv, any& dt)) { + return TypeAdaptor_sv_dt(fn); } template Fty make_adaptor(F fn, R (F::* /*mf*/)(const SemanticValues& sv, any& dt)) { - return TypeAdaptor_c(fn); + return TypeAdaptor_csv_dt(fn); + } + + template + Fty make_adaptor(F fn, R(* /*mf*/)(SemanticValues& sv, any& dt)) { + return TypeAdaptor_sv_dt(fn); } template Fty make_adaptor(F fn, R(* /*mf*/)(const SemanticValues& sv, any& dt)) { - return TypeAdaptor_c(fn); + return TypeAdaptor_csv_dt(fn); } Fty fn_; @@ -962,6 +1235,7 @@ class Context std::vector> value_stack; size_t value_stack_size; + std::vector>> args_stack; size_t nest_level; @@ -972,7 +1246,7 @@ class Context std::shared_ptr wordOpe; - std::unordered_map captures; + std::vector> capture_scope_stack; const size_t def_count; const bool enablePackratParsing; @@ -1009,6 +1283,8 @@ class Context , cache_success(enablePackratParsing ? def_count * (l + 1) : 0) , tracer(a_tracer) { + args_stack.resize(1); + capture_scope_stack.resize(1); } template @@ -1051,11 +1327,9 @@ class Context if (!sv.empty()) { sv.clear(); } + sv.reset(); sv.path = path; sv.ss = s; - sv.s_ = nullptr; - sv.n_ = 0; - sv.tokens.clear(); return sv; } @@ -1063,6 +1337,35 @@ class Context value_stack_size--; } + void push_args(const std::vector>& args) { + args_stack.push_back(args); + } + + void pop_args() { + args_stack.pop_back(); + } + + const std::vector>& top_args() const { + return args_stack[args_stack.size() - 1]; + } + + void push_capture_scope() { + capture_scope_stack.resize(capture_scope_stack.size() + 1); + } + + void pop_capture_scope() { + capture_scope_stack.pop_back(); + } + + void shift_capture_values() { + assert(capture_scope_stack.size() >= 2); + auto it = capture_scope_stack.rbegin(); + auto it_prev = it + 1; + for (const auto& kv: *it) { + (*it_prev)[kv.first] = kv.second; + } + } + void set_error_pos(const char* a_s) { if (error_pos < a_s) error_pos = a_s; } @@ -1111,17 +1414,22 @@ class Sequence : public Ope size_t parse(const char* s, size_t n, SemanticValues& sv, Context& c, any& dt) const override { c.trace("Sequence", s, n, sv, dt); + auto& chldsv = c.push(); size_t i = 0; for (const auto& ope : opes_) { c.nest_level++; auto se = make_scope_exit([&]() { c.nest_level--; }); const auto& rule = *ope; - auto len = rule.parse(s + i, n - i, sv, c, dt); + auto len = rule.parse(s + i, n - i, chldsv, c, dt); if (fail(len)) { return static_cast(-1); } i += len; } + sv.insert(sv.end(), chldsv.begin(), chldsv.end()); + sv.s_ = chldsv.c_str(); + sv.n_ = chldsv.length(); + sv.tokens.insert(sv.tokens.end(), chldsv.tokens.begin(), chldsv.tokens.end()); return i; } @@ -1158,20 +1466,23 @@ class PrioritizedChoice : public Ope for (const auto& ope : opes_) { c.nest_level++; auto& chldsv = c.push(); + c.push_capture_scope(); auto se = make_scope_exit([&]() { c.nest_level--; c.pop(); + c.pop_capture_scope(); }); const auto& rule = *ope; auto len = rule.parse(s, n, chldsv, c, dt); if (success(len)) { - if (!chldsv.empty()) { - sv.insert(sv.end(), chldsv.begin(), chldsv.end()); - } + sv.insert(sv.end(), chldsv.begin(), chldsv.end()); sv.s_ = chldsv.c_str(); sv.n_ = chldsv.length(); + sv.choice_count_ = opes_.size(); sv.choice_ = id; sv.tokens.insert(sv.tokens.end(), chldsv.tokens.begin(), chldsv.tokens.end()); + + c.shift_capture_values(); return len; } id++; @@ -1197,12 +1508,18 @@ class ZeroOrMore : public Ope size_t i = 0; while (n - i > 0) { c.nest_level++; - auto se = make_scope_exit([&]() { c.nest_level--; }); + c.push_capture_scope(); + auto se = make_scope_exit([&]() { + c.nest_level--; + c.pop_capture_scope(); + }); auto save_sv_size = sv.size(); auto save_tok_size = sv.tokens.size(); const auto& rule = *ope_; auto len = rule.parse(s + i, n - i, sv, c, dt); - if (fail(len)) { + if (success(len)) { + c.shift_capture_values(); + } else { if (sv.size() != save_sv_size) { sv.erase(sv.begin() + static_cast(save_sv_size)); } @@ -1232,10 +1549,16 @@ class OneOrMore : public Ope size_t len = 0; { c.nest_level++; - auto se = make_scope_exit([&]() { c.nest_level--; }); + c.push_capture_scope(); + auto se = make_scope_exit([&]() { + c.nest_level--; + c.pop_capture_scope(); + }); const auto& rule = *ope_; len = rule.parse(s, n, sv, c, dt); - if (fail(len)) { + if (success(len)) { + c.shift_capture_values(); + } else { return static_cast(-1); } } @@ -1243,12 +1566,18 @@ class OneOrMore : public Ope auto i = len; while (n - i > 0) { c.nest_level++; - auto se = make_scope_exit([&]() { c.nest_level--; }); + c.push_capture_scope(); + auto se = make_scope_exit([&]() { + c.nest_level--; + c.pop_capture_scope(); + }); auto save_sv_size = sv.size(); auto save_tok_size = sv.tokens.size(); const auto& rule = *ope_; len = rule.parse(s + i, n - i, sv, c, dt); - if (fail(len)) { + if (success(len)) { + c.shift_capture_values(); + } else { if (sv.size() != save_sv_size) { sv.erase(sv.begin() + static_cast(save_sv_size)); } @@ -1279,10 +1608,15 @@ class Option : public Ope c.nest_level++; auto save_sv_size = sv.size(); auto save_tok_size = sv.tokens.size(); - auto se = make_scope_exit([&]() { c.nest_level--; }); + c.push_capture_scope(); + auto se = make_scope_exit([&]() { + c.nest_level--; + c.pop_capture_scope(); + }); const auto& rule = *ope_; auto len = rule.parse(s, n, sv, c, dt); if (success(len)) { + c.shift_capture_values(); return len; } else { if (sv.size() != save_sv_size) { @@ -1310,9 +1644,11 @@ class AndPredicate : public Ope c.trace("AndPredicate", s, n, sv, dt); c.nest_level++; auto& chldsv = c.push(); + c.push_capture_scope(); auto se = make_scope_exit([&]() { c.nest_level--; c.pop(); + c.pop_capture_scope(); }); const auto& rule = *ope_; auto len = rule.parse(s, n, chldsv, c, dt); @@ -1338,9 +1674,11 @@ class NotPredicate : public Ope auto save_error_pos = c.error_pos; c.nest_level++; auto& chldsv = c.push(); + c.push_capture_scope(); auto se = make_scope_exit([&]() { c.nest_level--; c.pop(); + c.pop_capture_scope(); }); const auto& rule = *ope_; auto len = rule.parse(s, n, chldsv, c, dt); @@ -1359,6 +1697,7 @@ class NotPredicate : public Ope }; class LiteralString : public Ope + , public std::enable_shared_from_this { public: LiteralString(const std::string& s) @@ -1372,54 +1711,69 @@ class LiteralString : public Ope void accept(Visitor& v) override; std::string lit_; - mutable bool init_is_word_; - mutable bool is_word_; + mutable bool init_is_word_; + mutable bool is_word_; }; class CharacterClass : public Ope + , public std::enable_shared_from_this { public: - CharacterClass(const std::string& chars) : chars_(chars) {} + CharacterClass(const std::string& s) { + auto chars = decode(s.c_str(), s.length()); + auto i = 0u; + while (i < chars.size()) { + if (i + 2 < chars.size() && chars[i + 1] == '-') { + auto cp1 = chars[i]; + auto cp2 = chars[i + 2]; + ranges_.emplace_back(std::make_pair(cp1, cp2)); + i += 3; + } else { + auto cp = chars[i]; + ranges_.emplace_back(std::make_pair(cp, cp)); + i += 1; + } + } + } + + CharacterClass(const std::vector>& ranges) : ranges_(ranges) {} size_t parse(const char* s, size_t n, SemanticValues& sv, Context& c, any& dt) const override { c.trace("CharacterClass", s, n, sv, dt); - // TODO: UTF8 support + if (n < 1) { c.set_error_pos(s); return static_cast(-1); } - auto ch = s[0]; - auto i = 0u; - while (i < chars_.size()) { - if (i + 2 < chars_.size() && chars_[i + 1] == '-') { - if (chars_[i] <= ch && ch <= chars_[i + 2]) { - return 1; - } - i += 3; - } else { - if (chars_[i] == ch) { - return 1; + + char32_t cp; + auto len = decode_codepoint(s, n, cp); + + if (!ranges_.empty()) { + for (const auto& range: ranges_) { + if (range.first <= cp && cp <= range.second) { + return len; } - i += 1; } } + c.set_error_pos(s); return static_cast(-1); } void accept(Visitor& v) override; - std::string chars_; + std::vector> ranges_; }; class Character : public Ope + , public std::enable_shared_from_this { public: Character(char ch) : ch_(ch) {} size_t parse(const char* s, size_t n, SemanticValues& sv, Context& c, any& dt) const override { c.trace("Character", s, n, sv, dt); - // TODO: UTF8 support if (n < 1 || s[0] != ch_) { c.set_error_pos(s); return static_cast(-1); @@ -1433,19 +1787,41 @@ class Character : public Ope }; class AnyCharacter : public Ope + , public std::enable_shared_from_this { public: size_t parse(const char* s, size_t n, SemanticValues& sv, Context& c, any& dt) const override { c.trace("AnyCharacter", s, n, sv, dt); - // TODO: UTF8 support - if (n < 1) { + auto len = codepoint_length(s, n); + if (len < 1) { c.set_error_pos(s); return static_cast(-1); } - return 1; + return len; + } + + void accept(Visitor& v) override; +}; + +class CaptureScope : public Ope +{ +public: + CaptureScope(const std::shared_ptr& ope) + : ope_(ope) {} + + size_t parse(const char* s, size_t n, SemanticValues& sv, Context& c, any& dt) const override { + c.push_capture_scope(); + auto se = make_scope_exit([&]() { + c.pop_capture_scope(); + }); + const auto& rule = *ope_; + auto len = rule.parse(s, n, sv, c, dt); + return len; } void accept(Visitor& v) override; + + std::shared_ptr ope_; }; class Capture : public Ope @@ -1468,9 +1844,7 @@ class Capture : public Ope void accept(Visitor& v) override; std::shared_ptr ope_; - -private: - MatchAction match_action_; + MatchAction match_action_; }; class TokenBoundary : public Ope @@ -1506,6 +1880,19 @@ class Ignore : public Ope typedef std::function Parser; +class User : public Ope +{ +public: + User(Parser fn) : fn_(fn) {} + size_t parse(const char* s, size_t n, SemanticValues& sv, Context& c, any& dt) const override { + c.trace("User", s, n, sv, dt); + assert(fn_); + return fn_(s, n, sv, dt); + } + void accept(Visitor& v) override; + std::function fn_; +}; + class WeakHolder : public Ope { public: @@ -1533,7 +1920,7 @@ class Holder : public Ope void accept(Visitor& v) override; - any reduce(const SemanticValues& sv, any& dt) const; + any reduce(SemanticValues& sv, any& dt) const; std::shared_ptr ope_; Definition* outer_; @@ -1541,28 +1928,42 @@ class Holder : public Ope friend class Definition; }; -class DefinitionReference : public Ope +typedef std::unordered_map Grammar; + +class Reference : public Ope + , public std::enable_shared_from_this { public: - DefinitionReference( - const std::unordered_map& grammar, const std::string& name, const char* s) + Reference( + const Grammar& grammar, + const std::string& name, + const char* s, + bool is_macro, + const std::vector>& args) : grammar_(grammar) , name_(name) - , s_(s) {} + , s_(s) + , is_macro_(is_macro) + , args_(args) + , rule_(nullptr) + , iarg_(0) + {} size_t parse(const char* s, size_t n, SemanticValues& sv, Context& c, any& dt) const override; void accept(Visitor& v) override; - std::shared_ptr get_rule() const; + std::shared_ptr get_core_operator() const; - const std::unordered_map& grammar_; - const std::string name_; - const char* s_; + const Grammar& grammar_; + const std::string name_; + const char* s_; -private: - mutable std::once_flag init_; - mutable std::shared_ptr rule_; + const bool is_macro_; + const std::vector> args_; + + Definition* rule_; + size_t iarg_; }; class Whitespace : public Ope @@ -1598,68 +1999,124 @@ class BackReference : public Ope }; /* - * Visitor + * Factories */ -struct Ope::Visitor -{ - virtual ~Visitor() {} - virtual void visit(Sequence& /*ope*/) {} - virtual void visit(PrioritizedChoice& /*ope*/) {} - virtual void visit(ZeroOrMore& /*ope*/) {} - virtual void visit(OneOrMore& /*ope*/) {} - virtual void visit(Option& /*ope*/) {} - virtual void visit(AndPredicate& /*ope*/) {} - virtual void visit(NotPredicate& /*ope*/) {} - virtual void visit(LiteralString& /*ope*/) {} - virtual void visit(CharacterClass& /*ope*/) {} - virtual void visit(Character& /*ope*/) {} - virtual void visit(AnyCharacter& /*ope*/) {} - virtual void visit(Capture& /*ope*/) {} - virtual void visit(TokenBoundary& /*ope*/) {} - virtual void visit(Ignore& /*ope*/) {} - virtual void visit(WeakHolder& /*ope*/) {} - virtual void visit(Holder& /*ope*/) {} - virtual void visit(DefinitionReference& /*ope*/) {} - virtual void visit(Whitespace& /*ope*/) {} - virtual void visit(BackReference& /*ope*/) {} -}; +template +std::shared_ptr seq(Args&& ...args) { + return std::make_shared(static_cast>(args)...); +} -struct AssignIDToDefinition : public Ope::Visitor -{ - using Ope::Visitor::visit; +template +std::shared_ptr cho(Args&& ...args) { + return std::make_shared(static_cast>(args)...); +} - void visit(Sequence& ope) override { - for (auto op: ope.opes_) { - op->accept(*this); - } - } - void visit(PrioritizedChoice& ope) override { - for (auto op: ope.opes_) { - op->accept(*this); - } - } - void visit(ZeroOrMore& ope) override { ope.ope_->accept(*this); } - void visit(OneOrMore& ope) override { ope.ope_->accept(*this); } - void visit(Option& ope) override { ope.ope_->accept(*this); } - void visit(AndPredicate& ope) override { ope.ope_->accept(*this); } - void visit(NotPredicate& ope) override { ope.ope_->accept(*this); } - void visit(Capture& ope) override { ope.ope_->accept(*this); } - void visit(TokenBoundary& ope) override { ope.ope_->accept(*this); } - void visit(Ignore& ope) override { ope.ope_->accept(*this); } - void visit(WeakHolder& ope) override { ope.weak_.lock()->accept(*this); } - void visit(Holder& ope) override; - void visit(DefinitionReference& ope) override { ope.get_rule()->accept(*this); } +inline std::shared_ptr zom(const std::shared_ptr& ope) { + return std::make_shared(ope); +} - std::unordered_map ids; -}; +inline std::shared_ptr oom(const std::shared_ptr& ope) { + return std::make_shared(ope); +} -struct IsToken : public Ope::Visitor -{ - IsToken() : has_token_boundary(false), has_rule(false) {} +inline std::shared_ptr opt(const std::shared_ptr& ope) { + return std::make_shared