Skip to content

Commit

Permalink
SQL: allow identifier with placeholder such as '${a_1}'
Browse files Browse the repository at this point in the history
Close #3169.

Some? dialects support shell-like variable substitution.
HiveQL is one of such dialects.
https://cwiki.apache.org/confluence/display/Hive/LanguageManual+VariableSubstitution

With this change, the SQL parser accepts '${var}' as a part of an identifier.

TODO: `var` itself can be extracted as a reference tag.
Signed-off-by: Masatake YAMATO <[email protected]>
  • Loading branch information
masatake committed Oct 3, 2021
1 parent c31d572 commit 1c9086b
Show file tree
Hide file tree
Showing 4 changed files with 118 additions and 8 deletions.
1 change: 1 addition & 0 deletions Units/parser-sql.r/sql-var-subst.d/args.ctags
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
--sort=no
18 changes: 18 additions & 0 deletions Units/parser-sql.r/sql-var-subst.d/expected.tags
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
tb_name${dt} input.sql /^create table database.tb_name${dt} as$/;" t
col_a input.sql /^select col_a, col_b from database.tb_name;$/;" E table:tb_name${dt}
col_b input.sql /^select col_a, col_b from database.tb_name;$/;" E table:tb_name${dt}
tb_name${dt}${dt0} input.sql /^create table database.tb_name${dt}${dt0} as$/;" t
col_a input.sql /^select col_a, col_b from database.tb_name;$/;" E table:tb_name${dt}${dt0}
col_b input.sql /^select col_a, col_b from database.tb_name;$/;" E table:tb_name${dt}${dt0}
${dt1}tb_name${dt}${dt0} input.sql /^create table database.${dt1}tb_name${dt}${dt0} as$/;" t
col_a input.sql /^select col_a, col_b from database.tb_name;$/;" E table:${dt1}tb_name${dt}${dt0}
col_b input.sql /^select col_a, col_b from database.tb_name;$/;" E table:${dt1}tb_name${dt}${dt0}
${dt1}tb_name${dt}${dt0}Z input.sql /^create table database.${dt1}tb_name${dt}${dt0}Z as$/;" t
col_a input.sql /^select col_a, col_b from database.tb_name;$/;" E table:${dt1}tb_name${dt}${dt0}Z
col_b input.sql /^select col_a, col_b from database.tb_name;$/;" E table:${dt1}tb_name${dt}${dt0}Z
tb_${dt2}_name input.sql /^create table database.tb_${dt2}_name as$/;" t
col_${key0} input.sql /^select col_${key0}, col_${key1} from database.tb_name;$/;" E table:tb_${dt2}_name
col_${key1} input.sql /^select col_${key0}, col_${key1} from database.tb_name;$/;" E table:tb_${dt2}_name
tb_${${d}${t:h}${i}}_name input.sql /^create table database.tb_${${d}${t:h}${i}}_name as$/;" t
col_${key${n}${m}a} input.sql /^select col_${key${n}${m}a}, col_${key${m}${n}b} from database.tb_name;$/;" E table:tb_${${d}${t:h}${i}}_name
col_${key${m}${n}b} input.sql /^select col_${key${n}${m}a}, col_${key${m}${n}b} from database.tb_name;$/;" E table:tb_${${d}${t:h}${i}}_name
19 changes: 19 additions & 0 deletions Units/parser-sql.r/sql-var-subst.d/input.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
-- Based on issue #3169 opened by @Appalled

create table database.tb_name${dt} as
select col_a, col_b from database.tb_name;

create table database.tb_name${dt}${dt0} as
select col_a, col_b from database.tb_name;

create table database.${dt1}tb_name${dt}${dt0} as
select col_a, col_b from database.tb_name;

create table database.${dt1}tb_name${dt}${dt0}Z as
select col_a, col_b from database.tb_name;

create table database.tb_${dt2}_name as
select col_${key0}, col_${key1} from database.tb_name;

create table database.tb_${${d}${t:h}${i}}_name as
select col_${key${n}${m}a}, col_${key${m}${n}b} from database.tb_name;
88 changes: 80 additions & 8 deletions parsers/sql.c
Original file line number Diff line number Diff line change
Expand Up @@ -684,16 +684,68 @@ static void parseString (vString *const string, const int delimiter, int *promis
}
}

/* Parsing ${foo}.
*
* HiveQL is one of implementation having the variable substitution feature.
* https://cwiki.apache.org/confluence/display/Hive/LanguageManual+VariableSubstitution
*/
static int parseVarSubstSequence (vString *const string, const int firstChar);
static int parseVarSubst (vString *const string, const int firstChar)
{
int c = firstChar;
Assert (c == '$');
vStringPut (string, c);

c = getcFromInputFile ();
if (c != '{')
return c;
vStringPut (string, c);

while ((c = getcFromInputFile ())!= EOF)
{
if (c == '}')
{
vStringPut (string, c);
c = getcFromInputFile ();
return c;
}
else if (c == '$')
{
c = parseVarSubstSequence (string, c);
ungetcToInputFile (c);
}
else
vStringPut (string, c);
}

return c;
}

static int parseVarSubstSequence (vString *const string, const int firstChar)
{
int c;

do
c = parseVarSubst (string, c);
while (c == '$');

return c;
}

/* Read a C identifier beginning with "firstChar" and places it into "name".
*/
static void parseIdentifier (vString *const string, const int firstChar)
{
int c = firstChar;
Assert (isIdentChar1 (c));
Assert (vStringLength (string) > 0 || isIdentChar1 (c));
do
{
vStringPut (string, c);
c = getcFromInputFile ();

/* Handle ${var} in HiveQL. */
if (c == '$')
c = parseVarSubstSequence (string, c);
} while (isIdentChar (c));
if (!isspace (c))
ungetcToInputFile (c); /* unget non-identifier character */
Expand Down Expand Up @@ -937,15 +989,23 @@ static void readToken (tokenInfo *const token)
}

case '$':
token->type = parseDollarQuote (token->string, c, &token->promise);
token->lineNumber = getInputLineNumber ();
token->filePosition = getInputFilePosition ();
break;
{
int c0 = getcFromInputFile ();
ungetcToInputFile (c0);
if (c0 != '{')
{
token->type = parseDollarQuote (token->string, c, &token->promise);
token->lineNumber = getInputLineNumber ();
token->filePosition = getInputFilePosition ();
break;
}
c = parseVarSubstSequence (token->string, c);
/* FALL THROUGH */
}

default:
if (! isIdentChar1 (c))
token->type = TOKEN_UNDEFINED;
else
if ( isIdentChar1 (c)
|| (vStringLength (token->string) > 0 && isIdentChar (c)))
{
parseIdentifier (token->string, c);
token->lineNumber = getInputLineNumber ();
Expand All @@ -962,6 +1022,18 @@ static void readToken (tokenInfo *const token)
else
token->type = TOKEN_KEYWORD;
}
else if (vStringLength (token->string) > 0)
{
ungetcToInputFile (c);

/* token->string may be ${var}.
* We regard ${var} as an identifier. */
token->type = TOKEN_IDENTIFIER;
token->lineNumber = getInputLineNumber ();
token->filePosition = getInputFilePosition ();
}
else
token->type = TOKEN_UNDEFINED;
break;
}
}
Expand Down

0 comments on commit 1c9086b

Please sign in to comment.