Skip to content

Commit

Permalink
NEW Macros (#15)
Browse files Browse the repository at this point in the history
* JSON Macros

* Add Compare Macros

* Fix Parameter syntax

* Update JSON Functions

* dictGet emulation with Variables

```
D SET VARIABLE my_map = MAP {'k1': 10, 'k2': 20};
D SELECT dictGet('my_map', 'k1');
```

* ch_scan extension
  • Loading branch information
lmangani authored Nov 7, 2024
1 parent a000d4f commit 40f740f
Showing 1 changed file with 58 additions and 21 deletions.
79 changes: 58 additions & 21 deletions chsql/src/chsql_extension.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,30 +65,30 @@ static DefaultMacro chsql_macros[] = {
{DEFAULT_SCHEMA, "toFloatOrNull", {"x", nullptr}, {{nullptr, nullptr}}, R"(TRY_CAST(x AS DOUBLE))"},
{DEFAULT_SCHEMA, "toFloatOrZero", {"x", nullptr}, {{nullptr, nullptr}}, R"(CASE WHEN TRY_CAST(x AS DOUBLE) IS NOT NULL THEN CAST(x AS DOUBLE) ELSE 0 END)"},
// -- Arithmetic macros
{DEFAULT_SCHEMA, "intDiv", {"a", "b"}, {{nullptr, nullptr}}, R"((CAST(a AS BIGINT) // CAST(b AS BIGINT)))"},
{DEFAULT_SCHEMA, "intDivOrNull", {"a", "b"}, {{nullptr, nullptr}}, R"(TRY_CAST((TRY_CAST(a AS BIGINT) // TRY_CAST(b AS BIGINT)) AS BIGINT))"},
{DEFAULT_SCHEMA, "intDiv", {"a", "b", nullptr}, {{nullptr, nullptr}}, R"((CAST(a AS BIGINT) // CAST(b AS BIGINT)))"},
{DEFAULT_SCHEMA, "intDivOrNull", {"a", "b", nullptr}, {{nullptr, nullptr}}, R"(TRY_CAST((TRY_CAST(a AS BIGINT) // TRY_CAST(b AS BIGINT)) AS BIGINT))"},
{DEFAULT_SCHEMA, "intDivOZero", {"x", nullptr}, {{nullptr, nullptr}}, R"(COALESCE((TRY_CAST((TRY_CAST(a AS BIGINT) // TRY_CAST(b AS BIGINT)) AS BIGINT)),0))"},
{DEFAULT_SCHEMA, "plus", {"a", "b"}, {{nullptr, nullptr}}, R"(add(a, b))"},
{DEFAULT_SCHEMA, "minus", {"a", "b"}, {{nullptr, nullptr}}, R"(subtract(a, b))"},
{DEFAULT_SCHEMA, "modulo", {"a", "b"}, {{nullptr, nullptr}}, R"(CAST(a AS BIGINT) % CAST(b AS BIGINT))"},
{DEFAULT_SCHEMA, "moduloOrZero", {"a", "b"}, {{nullptr, nullptr}}, R"(COALESCE(((TRY_CAST(a AS BIGINT) % TRY_CAST(b AS BIGINT))),0))"},
{DEFAULT_SCHEMA, "plus", {"a", "b", nullptr}, {{nullptr, nullptr}}, R"(add(a, b))"},
{DEFAULT_SCHEMA, "minus", {"a", "b", nullptr}, {{nullptr, nullptr}}, R"(subtract(a, b))"},
{DEFAULT_SCHEMA, "modulo", {"a", "b", nullptr}, {{nullptr, nullptr}}, R"(CAST(a AS BIGINT) % CAST(b AS BIGINT))"},
{DEFAULT_SCHEMA, "moduloOrZero", {"a", "b", nullptr}, {{nullptr, nullptr}}, R"(COALESCE(((TRY_CAST(a AS BIGINT) % TRY_CAST(b AS BIGINT))),0))"},
// -- Tuple macros
{DEFAULT_SCHEMA, "tupleIntDiv", {"a", "b"}, {{nullptr, nullptr}}, R"(apply(a, (x,i) -> apply(b, x -> CAST(x AS BIGINT))[i] // CAST(x AS BIGINT)))"},
{DEFAULT_SCHEMA, "tupleIntDivByNumber", {"a", "b"}, {{nullptr, nullptr}}, R"(apply(a, (x) -> CAST(apply(b, x -> CAST(x AS BIGINT))[1] as BIGINT) // CAST(x AS BIGINT)))"},
{DEFAULT_SCHEMA, "tupleDivide", {"a", "b"}, {{nullptr, nullptr}}, R"(apply(a, (x,i) -> apply(b, x -> CAST(x AS BIGINT))[i] / CAST(x AS BIGINT)))"},
{DEFAULT_SCHEMA, "tupleMultiply", {"a", "b"}, {{nullptr, nullptr}}, R"(apply(a, (x,i) -> CAST(apply(b, x -> CAST(x AS BIGINT))[i] as BIGINT) * CAST(x AS BIGINT)))"},
{DEFAULT_SCHEMA, "tupleMinus", {"a", "b"}, {{nullptr, nullptr}}, R"(apply(a, (x,i) -> apply(b, x -> CAST(x AS BIGINT))[i] - CAST(x AS BIGINT)))"},
{DEFAULT_SCHEMA, "tuplePlus", {"a", "b"}, {{nullptr, nullptr}}, R"(apply(a, (x,i) -> apply(b, x -> CAST(x AS BIGINT))[i] + CAST(x AS BIGINT)))"},
{DEFAULT_SCHEMA, "tupleMultiplyByNumber", {"a", "b"}, {{nullptr, nullptr}}, R"(apply(a, (x) -> CAST(apply(b, x -> CAST(x AS BIGINT))[1] as BIGINT) * CAST(x AS BIGINT)))"},
{DEFAULT_SCHEMA, "tupleDivideByNumber", {"a", "b"}, {{nullptr, nullptr}}, R"(apply(a, (x) -> CAST(apply(b, x -> CAST(x AS BIGINT))[1] as BIGINT) / CAST(x AS BIGINT)))"},
{DEFAULT_SCHEMA, "tupleModulo", {"a", "b"}, {{nullptr, nullptr}}, R"(apply(a, (x) -> CAST(apply(b, x -> CAST(x AS BIGINT))[1] as BIGINT) % CAST(x AS BIGINT)))"},
{DEFAULT_SCHEMA, "tupleModuloByNumber", {"a", "b"}, {{nullptr, nullptr}}, R"(apply(a, (x) -> CAST(apply(b, x -> CAST(x AS BIGINT))[1] as BIGINT) % CAST(x AS BIGINT)))"},
{DEFAULT_SCHEMA, "tupleConcat", {"a", "b"}, {{nullptr, nullptr}}, R"(list_concat(a, b))"},
{DEFAULT_SCHEMA, "tupleIntDiv", {"a", "b", nullptr}, {{nullptr, nullptr}}, R"(apply(a, (x,i) -> apply(b, x -> CAST(x AS BIGINT))[i] // CAST(x AS BIGINT)))"},
{DEFAULT_SCHEMA, "tupleIntDivByNumber", {"a", "b", nullptr}, {{nullptr, nullptr}}, R"(apply(a, (x) -> CAST(apply(b, x -> CAST(x AS BIGINT))[1] as BIGINT) // CAST(x AS BIGINT)))"},
{DEFAULT_SCHEMA, "tupleDivide", {"a", "b", nullptr}, {{nullptr, nullptr}}, R"(apply(a, (x,i) -> apply(b, x -> CAST(x AS BIGINT))[i] / CAST(x AS BIGINT)))"},
{DEFAULT_SCHEMA, "tupleMultiply", {"a", "b", nullptr}, {{nullptr, nullptr}}, R"(apply(a, (x,i) -> CAST(apply(b, x -> CAST(x AS BIGINT))[i] as BIGINT) * CAST(x AS BIGINT)))"},
{DEFAULT_SCHEMA, "tupleMinus", {"a", "b", nullptr}, {{nullptr, nullptr}}, R"(apply(a, (x,i) -> apply(b, x -> CAST(x AS BIGINT))[i] - CAST(x AS BIGINT)))"},
{DEFAULT_SCHEMA, "tuplePlus", {"a", "b", nullptr}, {{nullptr, nullptr}}, R"(apply(a, (x,i) -> apply(b, x -> CAST(x AS BIGINT))[i] + CAST(x AS BIGINT)))"},
{DEFAULT_SCHEMA, "tupleMultiplyByNumber", {"a", "b", nullptr}, {{nullptr, nullptr}}, R"(apply(a, (x) -> CAST(apply(b, x -> CAST(x AS BIGINT))[1] as BIGINT) * CAST(x AS BIGINT)))"},
{DEFAULT_SCHEMA, "tupleDivideByNumber", {"a", "b", nullptr}, {{nullptr, nullptr}}, R"(apply(a, (x) -> CAST(apply(b, x -> CAST(x AS BIGINT))[1] as BIGINT) / CAST(x AS BIGINT)))"},
{DEFAULT_SCHEMA, "tupleModulo", {"a", "b", nullptr}, {{nullptr, nullptr}}, R"(apply(a, (x) -> CAST(apply(b, x -> CAST(x AS BIGINT))[1] as BIGINT) % CAST(x AS BIGINT)))"},
{DEFAULT_SCHEMA, "tupleModuloByNumber", {"a", "b", nullptr}, {{nullptr, nullptr}}, R"(apply(a, (x) -> CAST(apply(b, x -> CAST(x AS BIGINT))[1] as BIGINT) % CAST(x AS BIGINT)))"},
{DEFAULT_SCHEMA, "tupleConcat", {"a", "b", nullptr}, {{nullptr, nullptr}}, R"(list_concat(a, b))"},
// -- String matching macros
{DEFAULT_SCHEMA, "match", {"string", "token"}, {{nullptr, nullptr}}, R"(string LIKE token)"},
// -- Array macros
{DEFAULT_SCHEMA, "arrayExists", {"needle", "haystack"}, {{nullptr, nullptr}}, R"(haystack @> ARRAY[needle])"},
{DEFAULT_SCHEMA, "arrayMap", {"e", "arr"}, {{nullptr, nullptr}}, R"(array_transform(arr, e -> (e * e)))"},
{DEFAULT_SCHEMA, "arrayExists", {"needle", "haystack", nullptr}, {{nullptr, nullptr}}, R"(haystack @> ARRAY[needle])"},
{DEFAULT_SCHEMA, "arrayMap", {"e", "arr", nullptr}, {{nullptr, nullptr}}, R"(array_transform(arr, e -> (e * e)))"},
// Date and Time Functions
{DEFAULT_SCHEMA, "toYear", {"date_expression", nullptr}, {{nullptr, nullptr}}, R"(EXTRACT(YEAR FROM date_expression))"},
{DEFAULT_SCHEMA, "toMonth", {"date_expression", nullptr}, {{nullptr, nullptr}}, R"(EXTRACT(MONTH FROM date_expression))"},
Expand Down Expand Up @@ -119,10 +119,32 @@ static DefaultMacro chsql_macros[] = {
// IP Address Functions
{DEFAULT_SCHEMA, "IPv4NumToString", {"num", nullptr}, {{nullptr, nullptr}}, R"(CONCAT(CAST((num >> 24) & 255 AS VARCHAR), '.', CAST((num >> 16) & 255 AS VARCHAR), '.', CAST((num >> 8) & 255 AS VARCHAR), '.', CAST(num & 255 AS VARCHAR)))"},
{DEFAULT_SCHEMA, "IPv4StringToNum", {"ip", nullptr}, {{nullptr, nullptr}}, R"(CAST(SPLIT_PART(ip, '.', 1) AS INTEGER) * 256 * 256 * 256 + CAST(SPLIT_PART(ip, '.', 2) AS INTEGER) * 256 * 256 + CAST(SPLIT_PART(ip, '.', 3) AS INTEGER) * 256 + CAST(SPLIT_PART(ip, '.', 4) AS INTEGER))"},
// -- JSON Macros
{DEFAULT_SCHEMA, "JSONExtract", {"json", "key", nullptr}, {{nullptr, nullptr}}, R"(json_extract(json, key))"},
{DEFAULT_SCHEMA, "JSONExtractString", {"json", "key", nullptr}, {{nullptr, nullptr}}, R"(CAST(json_extract(json, key) AS VARCHAR))"},
{DEFAULT_SCHEMA, "JSONExtractUInt", {"json", "key", nullptr}, {{nullptr, nullptr}}, R"(CAST(json_extract(json, key) AS UINTEGER))"},
{DEFAULT_SCHEMA, "JSONExtractInt", {"json", "key", nullptr}, {{nullptr, nullptr}}, R"(CAST(json_extract(json, key) AS INT32))"},
{DEFAULT_SCHEMA, "JSONExtractFloat", {"json", "key", nullptr}, {{nullptr, nullptr}}, R"(CAST(json_extract(json, key) AS DOUBLE))"},
{DEFAULT_SCHEMA, "JSONExtractRaw", {"json", "key", nullptr}, {{nullptr, nullptr}}, R"(json_extract(json, key))"},
{DEFAULT_SCHEMA, "JSONHas", {"json", "key", nullptr}, {{nullptr, nullptr}}, R"(json_extract(json, key) IS NOT NULL)"},
{DEFAULT_SCHEMA, "JSONLength", {"json", nullptr}, {{nullptr, nullptr}}, R"(json_array_length(json))"},
{DEFAULT_SCHEMA, "JSONType", {"json", "path", nullptr}, {{nullptr, nullptr}}, R"(CASE WHEN path IS NULL THEN json_each(json) ELSE json_each(json,path) END)"},
{DEFAULT_SCHEMA, "JSONExtractKeys", {"json", nullptr}, {{nullptr, nullptr}}, R"(json_object_keys(json))"},
{DEFAULT_SCHEMA, "JSONExtractValues", {"json", nullptr}, {{nullptr, nullptr}}, R"(json_each_text(json))"},
// -- Compare Macros
{DEFAULT_SCHEMA, "equals", {"a", "b", nullptr}, {{nullptr, nullptr}}, R"((a = b))"},
{DEFAULT_SCHEMA, "notEquals", {"a", "b", nullptr}, {{nullptr, nullptr}}, R"((a <> b))"},
{DEFAULT_SCHEMA, "less", {"a", "b", nullptr}, {{nullptr, nullptr}}, R"((a < b))"},
{DEFAULT_SCHEMA, "greater", {"a", "b", nullptr}, {{nullptr, nullptr}}, R"((a > b))"},
{DEFAULT_SCHEMA, "lessOrEquals", {"a", "b", nullptr}, {{nullptr, nullptr}}, R"((a <= b))"},
{DEFAULT_SCHEMA, "greaterOrEquals", {"a", "b", nullptr}, {{nullptr, nullptr}}, R"((a >= b))"},
// -- Misc macros
{DEFAULT_SCHEMA, "generateUUIDv4", {nullptr}, {{nullptr, nullptr}}, R"(toString(uuid()))"},
{DEFAULT_SCHEMA, "parseURL", {"url", "part", nullptr}, {{nullptr, nullptr}}, R"(CASE part WHEN 'protocol' THEN REGEXP_EXTRACT(url, '^(\w+)://') WHEN 'domain' THEN REGEXP_EXTRACT(url, '://([^/:]+)') WHEN 'port' THEN REGEXP_EXTRACT(url, ':(\d+)') WHEN 'path' THEN REGEXP_EXTRACT(url, '://[^/]+(/.+?)(\?|#|$)') WHEN 'query' THEN REGEXP_EXTRACT(url, '\?([^#]+)') WHEN 'fragment' THEN REGEXP_EXTRACT(url, '#(.+)$') END)"},
{DEFAULT_SCHEMA, "bitCount", {"num", nullptr}, {{nullptr, nullptr}}, R"(BIT_COUNT(num))"},
// Dictionary Emulation using MAP VARIABLES
{DEFAULT_SCHEMA, "dictGet", {"dict","attr", nullptr}, {{nullptr, nullptr}}, R"(getvariable(dict)[attr][1])"},
// -- End Macro
{nullptr, nullptr, {nullptr}, {{nullptr, nullptr}}, nullptr}};

// To add a new table SQL macro, add a new macro to this array!
Expand All @@ -142,8 +164,23 @@ static DefaultMacro chsql_macros[] = {
// clang-format off
static const DefaultTableMacro chsql_table_macros[] = {
{DEFAULT_SCHEMA, "numbers", {"x", nullptr}, {{"z", "0"}, {nullptr, nullptr}}, R"(SELECT * as number FROM generate_series(z,x-1);)"},
{DEFAULT_SCHEMA, "ch_scan", {"query", "server"}, {{"format", "JSONEachRow"}, {"user", "play"}, {nullptr, nullptr}}, R"(SELECT * FROM read_json_auto(concat(server, '/?default_format=', format, '&user=', user, '&query=', query)))"},
{DEFAULT_SCHEMA, "url", {"url", "format"}, {{nullptr, nullptr}}, R"(WITH "JSON" as (SELECT * FROM read_json_auto(url)), "PARQUET" as (SELECT * FROM read_parquet(url)), "CSV" as (SELECT * FROM read_csv_auto(url)), "BLOB" as (SELECT * FROM read_blob(url)), "TEXT" as (SELECT * FROM read_text(url)) FROM query_table(format))"},
{DEFAULT_SCHEMA, "url", {"url", "format", nullptr}, {{nullptr, nullptr}}, R"(WITH "JSON" as (SELECT * FROM read_json_auto(url)), "PARQUET" as (SELECT * FROM read_parquet(url)), "CSV" as (SELECT * FROM read_csv_auto(url)), "BLOB" as (SELECT * FROM read_blob(url)), "TEXT" as (SELECT * FROM read_text(url)) FROM query_table(format))"},
{DEFAULT_SCHEMA, "ch_scan", {"query", "server", nullptr}, {{"format", "JSONEachRow"}, {"user", "play"}, {nullptr, nullptr}}, R"(
WITH
"json" AS (SELECT * FROM read_json_auto(concat(server, '/?default_format=jsoneachrow&user=', user, '&query=', query))),
"parquet" AS (SELECT * FROM read_parquet(concat(server, '/?default_format=parquet&user=', user, '&query=', query))),
"csv" AS (SELECT * FROM read_csv_auto(concat(server, '/?default_format=csv&user=', user, '&query=', query))),
"blob" AS (SELECT * FROM read_blob(concat(server, '/?default_format=blob&user=', user, '&query=', query))),
"text" AS (SELECT * FROM read_text(concat(server, '/?default_format=text&user=', user, '&query=', query)))
FROM query_table(CASE
WHEN LOWER(format) LIKE 'json%' THEN 'json'
WHEN LOWER(format) LIKE 'csv%' THEN 'csv'
WHEN LOWER(format) = 'parquet' THEN 'parquet'
WHEN LOWER(format) = 'blob' THEN 'blob'
WHEN LOWER(format) = 'text' THEN 'text'
ELSE 'json'
END)
)"},
{nullptr, nullptr, {nullptr}, {{nullptr, nullptr}}, nullptr}
};
// clang-format on
Expand Down

0 comments on commit 40f740f

Please sign in to comment.