small bigquery updates in 0.22.1

xnuinside · Nov 16, 2021 · 46c5f37 · 46c5f37
1 parent aa9d46e
commit 46c5f37
Show file tree

Hide file tree

Showing 7 changed files with 129 additions and 17 deletions.
diff --git a/CHANGELOG.txt b/CHANGELOG.txt
@@ -1,4 +1,4 @@
-**v0.23.0**
+**v0.22.1**
 ### New Features:
 
 ## BigQuery:

diff --git a/README.md b/README.md
@@ -14,8 +14,7 @@ However, in process of adding support for new statements & features I see that o
 
 
 ### How does it work?
-Parser tested on different DDLs mostly for PostgreSQL & Hive. But idea to support as much as possible DDL dialects (AWS 
-Redshift, Oracle, Hive, MsSQL, etc.). You can check dialects sections after `Supported Statements` section to get more information that statements from dialects already supported by parser.
+Parser tested on different DDLs mostly for PostgreSQL & Hive. But idea to support as much as possible DDL dialects (AWS Redshift, Oracle, Hive, MsSQL, BigQuery etc.). You can check dialects sections after `Supported Statements` section to get more information that statements from dialects already supported by parser.
 
 ### Feel free to open Issue with DDL sample
 **If you need some statement, that not supported by parser yet**: please provide DDL example & information about that is it SQL dialect or DB.
@@ -386,6 +385,17 @@ Big thanks for the involving & contribution with test cases with DDL samples & o
 
 
 ## Changelog
+**v0.22.1**
+### New Features:
+
+## BigQuery:
+
+1. Added support for OPTION for full CREATE TABLE statement & column definition
+
+## Improvements:
+1. CLUSTED BY can be used without ()
+
+
 **v0.22.0**
 ### New Features:
 

diff --git a/docs/README.rst b/docs/README.rst
@@ -34,8 +34,7 @@ However, in process of adding support for new statements & features I see that o
 How does it work?
 ^^^^^^^^^^^^^^^^^
 
-Parser tested on different DDLs mostly for PostgreSQL & Hive. But idea to support as much as possible DDL dialects (AWS 
-Redshift, Oracle, Hive, MsSQL, etc.). You can check dialects sections after ``Supported Statements`` section to get more information that statements from dialects already supported by parser.
+Parser tested on different DDLs mostly for PostgreSQL & Hive. But idea to support as much as possible DDL dialects (AWS Redshift, Oracle, Hive, MsSQL, BigQuery etc.). You can check dialects sections after ``Supported Statements`` section to get more information that statements from dialects already supported by parser.
 
 Feel free to open Issue with DDL sample
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -406,6 +405,8 @@ BigQuery
 
 
 * OPTION in CREATE SCHEMA statement
+* OPTION in CREATE TABLE statement
+* OPTION in column defenition statement
 
 TODO in next Releases (if you don't see feature that you need - open the issue)
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -442,6 +443,23 @@ Big thanks for the involving & contribution with test cases with DDL samples & o
 Changelog
 ---------
 
+**v0.22.1**
+
+New Features:
+^^^^^^^^^^^^^
+
+BigQuery:
+---------
+
+
+#. Added support for OPTION for full CREATE TABLE statement & column definition
+
+Improvements:
+-------------
+
+
+#. CLUSTED BY can be used without ()
+
 **v0.22.0**
 
 New Features:

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "simple-ddl-parser"
-version = "0.22.0"
+version = "0.22.1"
 description = "Simple DDL Parser to parse SQL & dialects like HQL, TSQL (MSSQL), Oracle, AWS Redshift, Snowflake, MySQL, PostgreSQL, etc ddl files to json/python dict with full information about columns: types, defaults, primary keys, etc.; sequences, alters, custom types & other entities from ddl."
 authors = ["Iuliia Volkova <[email protected]>"]
 license = "MIT"

diff --git a/simple_ddl_parser/dialects/bigquery.py b/simple_ddl_parser/dialects/bigquery.py
@@ -5,5 +5,11 @@ def p_expression_options(self, p):
         p[1].update(p[2])
 
     def p_options(self, p):
-        """options : OPTIONS LP id_equals RP """
-        p[0] = {"options": p[3]}
+        """options : OPTIONS LP id_equals RP"""
+        p_list = list(p)
+        if not isinstance(p[1], dict):
+            p[0] = {"options": p[3]}
+        else:
+            p[0] = p[1]
+            if len(p) == 4:
+                p[0]["options"].append(p_list[-1][0])
diff --git a/simple_ddl_parser/dialects/sql.py b/simple_ddl_parser/dialects/sql.py
@@ -504,29 +504,54 @@ def p_domain_name(self, p: List) -> None:
 class BaseSQL(
     Database, Table, Drop, Domain, Column, AfterColumns, Type, Schema, TableSpaces
 ):
+    def clean_up_id_list_in_equal(self, p_list: List) -> List:
+        if isinstance(p_list[1], str) and p_list[1].endswith("="):
+            p_list[1] = p_list[1][:-1]
+        elif "," in p_list:
+            if len(p_list) == 4:
+                p_list = p_list[-1].split("=")
+            elif len(p_list) == 5 and p_list[-2].endswith("="):
+                p_list[-2] = p_list[-2][:-1]
+        elif "=" == p_list[-2]:
+            p_list.pop(-2)
+        return p_list
+
+    def get_property(self, p_list: List) -> Dict:
+        _property = None
+        if not isinstance(p_list[-2], list):
+            if not p_list[-2] == "=":
+                if "=" in p_list[-2]:
+                    p_list[-2] = p_list[-2].split("=")
+                    p_list[-1] = f"{p_list[-2][1]} {p_list[-1]}"
+                    p_list[-2] = p_list[-2][0]
+                key = p_list[-2]
+            else:
+                key = p_list[-3]
+            _property = {key: p_list[-1]}
+        else:
+            _property = p_list[-2][0]
+        return _property
+
     def p_id_equals(self, p: List) -> None:
         """id_equals : id id id
         | id id
         | id_equals COMMA
         | id_equals COMMA id id id
         | id
+        | id_equals COMMA id id
         | id_equals COMMA id
         """
         p_list = list(p)
-        _property = None
-        if isinstance(p_list[1], str) and p_list[1].endswith("="):
-            p_list[1] = p_list[1][:-1]
-        elif "," in p_list and len(p_list) == 4:
-            p_list = p_list[-1].split("=")
-        elif "=" == p_list[-2]:
-            p_list.pop(-2)
-        _property = {p_list[-2]: p_list[-1]}
+        p_list = self.clean_up_id_list_in_equal(p_list)
+        _property = self.get_property(p_list)
+
         if _property:
             if not isinstance(p[1], list):
                 p[0] = [_property]
             else:
                 p[0] = p[1]
-                p[0].append(_property)
+                if not p_list[-1] == ",":
+                    p[0].append(_property)
 
     def p_expression_index(self, p: List) -> None:
         """expr : index_table_name LP index_pid RP"""

diff --git a/tests/test_bigquery.py b/tests/test_bigquery.py
@@ -341,3 +341,56 @@ def test_cluster_by_without_brackets():
         "types": [],
     }
     assert expected == result
+
+
+def test_two_options_in_create_table():
+
+    ddl = """
+    CREATE TABLE mydataset.newtable
+    (
+    x INT64 OPTIONS(description="An optional INTEGER field")
+    )
+    OPTIONS(
+    expiration_timestamp="2023-01-01 00:00:00 UTC",
+    description="a table that expires in 2023",
+    )
+
+    """
+    result = DDLParser(ddl).run(group_by_type=True)
+    expected = {
+        "ddl_properties": [],
+        "domains": [],
+        "schemas": [],
+        "sequences": [],
+        "tables": [
+            {
+                "alter": {},
+                "checks": [],
+                "columns": [
+                    {
+                        "check": None,
+                        "default": None,
+                        "name": "x",
+                        "nullable": True,
+                        "options": [{"description": '"An optional INTEGER ' 'field"'}],
+                        "references": None,
+                        "size": None,
+                        "type": "INT64",
+                        "unique": False,
+                    }
+                ],
+                "index": [],
+                "options": [
+                    {"expiration_timestamp": '"2023-01-01 00:00:00 UTC"'},
+                    {"description": '"a table that expires in 2023"'},
+                ],
+                "partitioned_by": [],
+                "primary_key": [],
+                "schema": "mydataset",
+                "table_name": "newtable",
+                "tablespace": None,
+            }
+        ],
+        "types": [],
+    }
+    assert expected == result