Skip to content

Commit

Permalink
Merge pull request #233 from dmaresma/feature/fix_snowflake
Browse files Browse the repository at this point in the history
Feature/fix snowflake
  • Loading branch information
xnuinside authored Jan 20, 2024
2 parents f5fe0f6 + 63fd597 commit f6ad6ec
Show file tree
Hide file tree
Showing 6 changed files with 525 additions and 351 deletions.
8 changes: 8 additions & 0 deletions CHANGELOG.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,11 @@
**v1.0.3**
### Improvements
1. Fixed bug with `CREATE OR REPLACE SCHEMA`.

### Snowflake
1. Fixed bug with snowflake (stage_)fileformat option value equal a single string as `FIELD_OPTIONALLY_ENCLOSED_BY = '\"'`, `FIELD_DELIMITER = '|'`
2. improve snowflake fileformat key equals value into dict. type.

**v1.0.2**
### Improvements
1. Fixed bug with places first table property value in 'authorization' key. Now it is used real property name.
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "simple-ddl-parser"
version = "1.0.2"
version = "1.0.3"
description = "Simple DDL Parser to parse SQL & dialects like HQL, TSQL (MSSQL), Oracle, AWS Redshift, Snowflake, MySQL, PostgreSQL, etc ddl files to json/python dict with full information about columns: types, defaults, primary keys, etc.; sequences, alters, custom types & other entities from ddl."
authors = ["Iuliia Volkova <[email protected]>"]
license = "MIT"
Expand Down
30 changes: 19 additions & 11 deletions simple_ddl_parser/dialects/snowflake.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
from typing import List

from simple_ddl_parser.utils import remove_par
import re


class Snowflake:

def p_clone(self, p: List) -> None:
"""clone : CLONE id"""
p_list = list(p)
Expand All @@ -17,20 +19,26 @@ def p_expression_cluster_by(self, p: List) -> None:
p_list = remove_par(list(p))
p[0]["cluster_by"] = p_list[-1]

def p_multiple_format_equals(self0, p: List) -> None:
"""multiple_format_equals : fmt_equals
| multiple_format_equals fmt_equals
def p_multi_id_or_string(self, p: List) -> None:
"""multi_id_or_string : id_or_string
| multi_id_or_string id_or_string
| f_call
| multi_id_or_string f_call
"""
# Handles multiple format in the same format statement
p[0] = p[1]
p_list = list(p)
if isinstance(p[1], list):
p[0] = p[1]
p[0].append(p_list[-1])
else:
value = " ".join(p_list[1:])
p[0] = value

def p_fmt_equals(self, p: List) -> None:
"""fmt_equals : id LP RP
| id LP fmt_equals RP
| id LP multi_id RP
"""fmt_equals : id LP multi_id_or_string RP
"""
fmt_split = re.compile(r"\w+\s*=\s*\w+|\w+\s*=\s*'.'|\w+\s*=\s*'..'|\w+\s*=\s*\('.+'\)|\w+\s*=\(\)")
p_list = list(p)
p[0] = p_list[2:][1].split(" ")
p[0] = {f.split('=')[0].strip(): f.split('=')[1].strip() for f in fmt_split.findall(p_list[3]) if '=' in f}

def p_table_property_equals(self, p: List) -> None:
"""table_property_equals : id id id_or_string
Expand Down Expand Up @@ -164,13 +172,13 @@ def p_expression_catalog(self, p: List) -> None:
p[0]["catalog"] = p_list[-1]

def p_expression_file_format(self, p: List) -> None:
"""expr : expr FILE_FORMAT multiple_format_equals"""
"""expr : expr FILE_FORMAT fmt_equals"""
p[0] = p[1]
p_list = remove_par(list(p))
p[0]["file_format"] = p_list[-1]

def p_expression_stage_file_format(self, p: List) -> None:
"""expr : expr STAGE_FILE_FORMAT multiple_format_equals"""
"""expr : expr STAGE_FILE_FORMAT fmt_equals"""
p[0] = p[1]
p_list = remove_par(list(p))
p[0]["stage_file_format"] = p_list[-1] if len(p_list[-1]) > 1 else p_list[-1][0]
Expand Down
4 changes: 3 additions & 1 deletion simple_ddl_parser/dialects/sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -516,7 +516,8 @@ def set_auth_property_in_schema(self, p: List, p_list: List) -> None:

def p_c_schema(self, p: List) -> None:
"""c_schema : CREATE SCHEMA
| CREATE ID SCHEMA"""
| CREATE ID SCHEMA
| CREATE OR REPLACE SCHEMA"""
if len(p) == 4:
p[0] = {"remote": True}

Expand All @@ -539,6 +540,7 @@ def p_create_schema(self, p: List) -> None:
del p_list[-1]

self.add_if_not_exists(p[0], p_list)

if isinstance(p_list[1], dict):
p[0] = p_list[1]
self.set_properties_for_schema_and_database(p, p_list)
Expand Down
663 changes: 332 additions & 331 deletions simple_ddl_parser/parsetab.py

Large diffs are not rendered by default.

169 changes: 162 additions & 7 deletions tests/dialects/test_snowflake.py
Original file line number Diff line number Diff line change
Expand Up @@ -967,15 +967,170 @@ def test_virtual_column_table():
"location": "@ADL_Azure_Storage_Account_Container_Name/",
"table_properties": {
"auto_refresh": False,
"file_format": [
"TYPE","=","JSON",
"NULL_IF","=('field')",
"DATE_FORMAT","=","AUTO",
"TRIM_SPACE","=","TRUE",
],
"stage_file_format": ["TYPE","=","JSON", "NULL_IF","=()"],
"file_format": {'TYPE' : 'JSON', 'NULL_IF' : "('field')", 'DATE_FORMAT' : 'AUTO', 'TRIM_SPACE' : 'TRUE'},
"stage_file_format": {'TYPE' : 'JSON', 'NULL_IF' :'()'},
},
}
]

assert result_ext_table == expected_ext_table

def test_schema_create():
ddl = """
create schema myschema;
"""
result = DDLParser(ddl).run(output_mode="snowflake")
expected = [{"schema_name": 'myschema'}]

assert expected == result

def test_schema_create_if_not_exists():
ddl = """
create schema if not exists myschema;
"""
result = DDLParser(ddl).run(output_mode="snowflake")
expected = [{"schema_name": 'myschema', 'if_not_exists' : True}]

assert expected == result

def test_schema_create_or_replace():
#https://docs.snowflake.com/en/sql-reference/sql/create-schema
ddl = """
create or replace schema myschema;
"""
result = DDLParser(ddl, normalize_names=True, debug=True).run(output_mode="snowflake")
expected = [{"schema_name": 'myschema'}]

assert result == expected

def test_external_table_with_nullif():
ddl = """create or replace external table if not exists ${database_name}.MySchemaName.MyTableName(
"Filename" VARCHAR(16777216) AS (METADATA$FILENAME))
partition by ("Filename")
location = @ADL_DH_DL_PTS/
auto_refresh = false
file_format = (TYPE=JSON NULLIF=())
;"""

result = DDLParser(ddl, normalize_names=True, debug=True).run(output_mode="snowflake")
expected = [{'table_name': 'MyTableName',
'schema': 'MySchemaName',
'primary_key': [],
'columns': [{
'name': 'Filename',
'type': 'VARCHAR',
'size': 16777216,
'references': None,
'unique': False,
'nullable': True,
'default': None,
'check': None,
'generated': {'as' : 'METADATA$FILENAME'}
}],
'alter': {},
'checks': [],
'index': [],
'partitioned_by': [],
'partition_by': {'columns': ['Filename'], 'type': None},
'tablespace': None,
'if_not_exists': True,
'table_properties': {'project': '${database_name}',
'auto_refresh': False,
'file_format': {'TYPE' : 'JSON', 'NULLIF':'()'},
},
'replace': True,
'location': '@ADL_DH_DL_PTS/',
'external' : True,
'primary_key_enforced' : None,
'clone' : None
}]

assert result == expected

def test_external_table_with_field_delimiter():
ddl = """create or replace external table if not exists ${database_name}.MySchemaName.MyTableName(
"Filename" VARCHAR(16777216) AS (METADATA$FILENAME))
partition by ("Filename")
location = @ADL_DH_DL_PTS/
auto_refresh = false
file_format = (TYPE=CSV FIELD_DELIMITER='|' TRIM_SPACE=TRUE ERROR_ON_COLUMN_COUNT_MISMATCH=FALSE REPLACE_INVALID_CHARACTERS=TRUE)
;"""

result = DDLParser(ddl, normalize_names=True, debug=True).run(output_mode="snowflake")
expected = [{'table_name': 'MyTableName',
'schema': 'MySchemaName',
'primary_key': [],
'columns': [{
'name': 'Filename',
'type': 'VARCHAR',
'size': 16777216,
'references': None,
'unique': False,
'nullable': True,
'default': None,
'check': None,
'generated': {'as' : 'METADATA$FILENAME'}
}],
'alter': {},
'checks': [],
'index': [],
'partitioned_by': [],
'partition_by': {'columns': ['Filename'], 'type': None},
'tablespace': None,
'if_not_exists': True,
'table_properties': {'project': '${database_name}',
'auto_refresh': False,
'file_format': {'TYPE' : 'CSV',
'FIELD_DELIMITER' : "'|'",
'TRIM_SPACE' : 'TRUE',
'ERROR_ON_COLUMN_COUNT_MISMATCH' : 'FALSE',
'REPLACE_INVALID_CHARACTERS' :'TRUE'}},
'replace': True,
'location': '@ADL_DH_DL_PTS/',
'external' : True,
'primary_key_enforced' : None,
'clone' : None
}]

assert result == expected

def test_table_column_def_clusterby():
ddl = """CREATE TABLE ${database_name}.MySchemaName."MyTableName" (ID NUMBER(38,0) NOT NULL, "DocProv" VARCHAR(2)) cluster by ("DocProv");"""

result = DDLParser(ddl, normalize_names=True, debug=True).run(output_mode="snowflake")
expected = [{'table_name': 'MyTableName',
'schema': 'MySchemaName',
'primary_key': [],
'columns': [{
'name': 'ID',
'size' : (38,0),
'type': 'NUMBER',
'references': None,
'unique': False,
'nullable': False,
'default': None,
'check': None,
},
{
'name': 'DocProv',
'size' : 2,
'type': 'VARCHAR',
'references': None,
'unique': False,
'nullable': True,
'default': None,
'check': None,
}],
'alter': {},
'checks': [],
'index': [],
'partitioned_by': [],
'cluster_by' : ['DocProv'],
'tablespace': None,
'external' : False,
'primary_key_enforced' : None,
'table_properties': {'project': '${database_name}'},
'clone' : None
}]

assert result == expected

0 comments on commit f6ad6ec

Please sign in to comment.