From 2a2e24c3a660f789767be64852939157c97ab0f3 Mon Sep 17 00:00:00 2001 From: Masatake YAMATO Date: Sat, 2 Oct 2021 14:24:21 +0900 Subject: [PATCH] SQL: allow identifier with placeholder such as '${a_1}' Close #3169. Some(?) dialects support shell-like variable substitution. HiveQL is one of such dialects. https://cwiki.apache.org/confluence/display/Hive/LanguageManual+VariableSubstitution With this change, the SQL parser accepts '${var}' as a part of an identifier. TODO: `var` itself can be extracted as a reference tag. Signed-off-by: Masatake YAMATO --- Units/parser-sql.r/sql-var-subst.d/args.ctags | 1 + .../sql-var-subst.d/expected.tags | 18 ++++ Units/parser-sql.r/sql-var-subst.d/input.sql | 19 ++++ parsers/sql.c | 88 +++++++++++++++++-- 4 files changed, 118 insertions(+), 8 deletions(-) create mode 100644 Units/parser-sql.r/sql-var-subst.d/args.ctags create mode 100644 Units/parser-sql.r/sql-var-subst.d/expected.tags create mode 100644 Units/parser-sql.r/sql-var-subst.d/input.sql diff --git a/Units/parser-sql.r/sql-var-subst.d/args.ctags b/Units/parser-sql.r/sql-var-subst.d/args.ctags new file mode 100644 index 0000000000..5ee5f79f70 --- /dev/null +++ b/Units/parser-sql.r/sql-var-subst.d/args.ctags @@ -0,0 +1 @@ +--sort=no diff --git a/Units/parser-sql.r/sql-var-subst.d/expected.tags b/Units/parser-sql.r/sql-var-subst.d/expected.tags new file mode 100644 index 0000000000..b558914942 --- /dev/null +++ b/Units/parser-sql.r/sql-var-subst.d/expected.tags @@ -0,0 +1,18 @@ +tb_name${dt} input.sql /^create table database.tb_name${dt} as$/;" t +col_a input.sql /^select col_a, col_b from database.tb_name;$/;" E table:tb_name${dt} +col_b input.sql /^select col_a, col_b from database.tb_name;$/;" E table:tb_name${dt} +tb_name${dt}${dt0} input.sql /^create table database.tb_name${dt}${dt0} as$/;" t +col_a input.sql /^select col_a, col_b from database.tb_name;$/;" E table:tb_name${dt}${dt0} +col_b input.sql /^select col_a, col_b from database.tb_name;$/;" E table:tb_name${dt}${dt0} +${dt1}tb_name${dt}${dt0} input.sql /^create table database.${dt1}tb_name${dt}${dt0} as$/;" t +col_a input.sql /^select col_a, col_b from database.tb_name;$/;" E table:${dt1}tb_name${dt}${dt0} +col_b input.sql /^select col_a, col_b from database.tb_name;$/;" E table:${dt1}tb_name${dt}${dt0} +${dt1}tb_name${dt}${dt0}Z input.sql /^create table database.${dt1}tb_name${dt}${dt0}Z as$/;" t +col_a input.sql /^select col_a, col_b from database.tb_name;$/;" E table:${dt1}tb_name${dt}${dt0}Z +col_b input.sql /^select col_a, col_b from database.tb_name;$/;" E table:${dt1}tb_name${dt}${dt0}Z +tb_${dt2}_name input.sql /^create table database.tb_${dt2}_name as$/;" t +col_${key0} input.sql /^select col_${key0}, col_${key1} from database.tb_name;$/;" E table:tb_${dt2}_name +col_${key1} input.sql /^select col_${key0}, col_${key1} from database.tb_name;$/;" E table:tb_${dt2}_name +tb_${${d}${t:h}${i}}_name input.sql /^create table database.tb_${${d}${t:h}${i}}_name as$/;" t +col_${key${n}${m}a} input.sql /^select col_${key${n}${m}a}, col_${key${m}${n}b} from database.tb_name;$/;" E table:tb_${${d}${t:h}${i}}_name +col_${key${m}${n}b} input.sql /^select col_${key${n}${m}a}, col_${key${m}${n}b} from database.tb_name;$/;" E table:tb_${${d}${t:h}${i}}_name diff --git a/Units/parser-sql.r/sql-var-subst.d/input.sql b/Units/parser-sql.r/sql-var-subst.d/input.sql new file mode 100644 index 0000000000..120272ddbb --- /dev/null +++ b/Units/parser-sql.r/sql-var-subst.d/input.sql @@ -0,0 +1,19 @@ +-- Based on issue #3169 opened by @Appalled + +create table database.tb_name${dt} as +select col_a, col_b from database.tb_name; + +create table database.tb_name${dt}${dt0} as +select col_a, col_b from database.tb_name; + +create table database.${dt1}tb_name${dt}${dt0} as +select col_a, col_b from database.tb_name; + +create table database.${dt1}tb_name${dt}${dt0}Z as +select col_a, col_b from database.tb_name; + +create table database.tb_${dt2}_name as +select col_${key0}, col_${key1} from database.tb_name; + +create table database.tb_${${d}${t:h}${i}}_name as +select col_${key${n}${m}a}, col_${key${m}${n}b} from database.tb_name; diff --git a/parsers/sql.c b/parsers/sql.c index 2101d9eea2..869c858d4c 100644 --- a/parsers/sql.c +++ b/parsers/sql.c @@ -684,16 +684,68 @@ static void parseString (vString *const string, const int delimiter, int *promis } } +/* Parsing ${foo}. + * + * HiveQL is one of implementation having the variable substitution feature. + * https://cwiki.apache.org/confluence/display/Hive/LanguageManual+VariableSubstitution + */ +static int parseVarSubstSequence (vString *const string, const int firstChar); +static int parseVarSubst (vString *const string, const int firstChar) +{ + int c = firstChar; + Assert (c == '$'); + vStringPut (string, c); + + c = getcFromInputFile (); + if (c != '{') + return c; + vStringPut (string, c); + + while ((c = getcFromInputFile ())!= EOF) + { + if (c == '}') + { + vStringPut (string, c); + c = getcFromInputFile (); + return c; + } + else if (c == '$') + { + c = parseVarSubstSequence (string, c); + ungetcToInputFile (c); + } + else + vStringPut (string, c); + } + + return c; +} + +static int parseVarSubstSequence (vString *const string, const int firstChar) +{ + int c = firstChar; + + do + c = parseVarSubst (string, c); + while (c == '$'); + + return c; +} + /* Read a C identifier beginning with "firstChar" and places it into "name". */ static void parseIdentifier (vString *const string, const int firstChar) { int c = firstChar; - Assert (isIdentChar1 (c)); + Assert (vStringLength (string) > 0 || isIdentChar1 (c)); do { vStringPut (string, c); c = getcFromInputFile (); + + /* Handle ${var} in HiveQL. */ + if (c == '$') + c = parseVarSubstSequence (string, c); } while (isIdentChar (c)); if (!isspace (c)) ungetcToInputFile (c); /* unget non-identifier character */ @@ -937,15 +989,23 @@ static void readToken (tokenInfo *const token) } case '$': - token->type = parseDollarQuote (token->string, c, &token->promise); - token->lineNumber = getInputLineNumber (); - token->filePosition = getInputFilePosition (); - break; + { + int c0 = getcFromInputFile (); + ungetcToInputFile (c0); + if (c0 != '{') + { + token->type = parseDollarQuote (token->string, c, &token->promise); + token->lineNumber = getInputLineNumber (); + token->filePosition = getInputFilePosition (); + break; + } + c = parseVarSubstSequence (token->string, c); + /* FALL THROUGH */ + } default: - if (! isIdentChar1 (c)) - token->type = TOKEN_UNDEFINED; - else + if ( isIdentChar1 (c) + || (vStringLength (token->string) > 0 && isIdentChar (c))) { parseIdentifier (token->string, c); token->lineNumber = getInputLineNumber (); @@ -962,6 +1022,18 @@ static void readToken (tokenInfo *const token) else token->type = TOKEN_KEYWORD; } + else if (vStringLength (token->string) > 0) + { + ungetcToInputFile (c); + + /* token->string may be ${var}. + * We regard ${var} as an identifier. */ + token->type = TOKEN_IDENTIFIER; + token->lineNumber = getInputLineNumber (); + token->filePosition = getInputFilePosition (); + } + else + token->type = TOKEN_UNDEFINED; break; } }