databricks · benc-db · Oct 11, 2024 · Oct 9, 2024 · Oct 10, 2024 · Oct 11, 2024
@@ -112,6 +112,7 @@ class DatabricksConfig(AdapterConfig):
     file_format: str = "delta"
     table_format: TableFormat = TableFormat.DEFAULT
     location_root: Optional[str] = None
+    include_full_name_in_path: bool = False
     partition_by: Optional[Union[List[str], str]] = None
     clustered_by: Optional[Union[List[str], str]] = None
     liquid_clustered_by: Optional[Union[List[str], str]] = None
@@ -206,6 +207,25 @@ def update_tblproperties_for_iceberg(
             result["delta.universalFormat.enabledFormats"] = "iceberg"
         return result
 
+    @available.parse(lambda *a, **k: 0)
+    def compute_external_path(
+        self, config: BaseConfig, model: BaseConfig, is_incremental: bool = False
+    ) -> str:
+        location_root = config.get("location_root")
+        database = model.get("database", "hive_metastore")
+        schema = model.get("schema", "default")
+        identifier = model.get("alias")
+        if location_root is None:
+            raise DbtConfigError("location_root is required for external tables.")
+        include_full_name_in_path = config.get("include_full_name_in_path", False)
+        if include_full_name_in_path:
+            path = os.path.join(location_root, database, schema, identifier)
+        else:
+            path = os.path.join(location_root, identifier)
+        if is_incremental:
+            path = path + "_tmp"
+        return path
+
     # override/overload
     def acquire_connection(
         self, name: Optional[str] = None, query_header_context: Any = None

@@ -60,11 +60,8 @@ writer.saveAsTable("{{ target_relation }}")
 {%- set buckets = config.get('buckets', validator=validation.any[int]) -%}
 .format("{{ file_format }}")
 {%- if location_root is not none %}
-{%- set identifier = model['alias'] %}
-{%- if is_incremental() %}
-{%- set identifier = identifier + '__dbt_tmp' %}
-{%- endif %}
-.option("path", "{{ location_root }}/{{ identifier }}")
+{%- set model_path = adapter.compute_external_path(config, model, is_incremental()) %}
+.option("path", "{{ model_path }}")
 {%- endif -%}
 {%- if partition_by is not none -%}
     {%- if partition_by is string -%}

@@ -3,7 +3,8 @@
   {%- set file_format = config.get('file_format', default='delta') -%}
   {%- set identifier = model['alias'] -%}
   {%- if location_root is not none %}
-    location '{{ location_root }}/{{ identifier }}'
+  {%- set model_path = adapter.compute_external_path(config, model, is_incremental()) %}
+    location '{{ model_path }}'
   {%- elif (not relation.is_hive_metastore()) and file_format != 'delta' -%}
     {{ exceptions.raise_compiler_error(
         'Incompatible configuration: `location_root` must be set when using a non-delta file format with Unity Catalog'

@@ -34,6 +34,7 @@ def project_config_update(self):
             "models": {
                 "+file_format": "parquet",
                 "+location_root": f"{location_root}/parquet",
+                "+include_full_name_in_path": "true",
                 "+incremental_strategy": "append",
             },
         }
@@ -61,6 +62,7 @@ def project_config_update(self):
             "models": {
                 "+file_format": "csv",
                 "+location_root": f"{location_root}/csv",
+                "+include_full_name_in_path": "true",
                 "+incremental_strategy": "append",
             },
         }

@@ -52,6 +52,7 @@ def project_config_update(self):
             "models": {
                 "+file_format": "parquet",
                 "+location_root": f"{location_root}/parquet_append",
+                "+include_full_name_in_path": "true",
                 "+incremental_strategy": "append",
             },
         }
@@ -129,6 +130,7 @@ def project_config_update(self):
             "models": {
                 "+file_format": "parquet",
                 "+location_root": f"{location_root}/parquet_insert_overwrite",
+                "+include_full_name_in_path": "true",
                 "+incremental_strategy": "insert_overwrite",
             },
         }
@@ -144,6 +146,7 @@ def project_config_update(self):
             "models": {
                 "+file_format": "parquet",
                 "+location_root": f"{location_root}/parquet_insert_overwrite_partitions",
+                "+include_full_name_in_path": "true",
                 "+incremental_strategy": "insert_overwrite",
                 "+partition_by": "id",
             },

@@ -5,6 +5,7 @@
     description: 'A seed description'
     config:
       location_root: '{{ env_var("DBT_DATABRICKS_LOCATION_ROOT") }}'
+      include_full_name_in_path: true
       persist_docs:
         relation: True
         columns: True
@@ -22,6 +23,7 @@
     description: 'A seed description'
     config:
       location_root: '/mnt/dbt_databricks/seeds'
+      include_full_name_in_path: true
       persist_docs:
         relation: True
         columns: True

@@ -102,7 +102,8 @@ def model(dbt, spark):
       marterialized: table
       tags: ["python"]
       create_notebook: true
-      location_root: "{root}/{schema}"
+      include_full_name_in_path: true
+      location_root: "{{ env_var('DBT_DATABRICKS_LOCATION_ROOT') }}"
     columns:
       - name: date
         tests:

@@ -119,15 +119,6 @@ def project_config_update(self):
         }
 
     def test_expected_handling_of_complex_config(self, project):
-        unformatted_schema_yml = util.read_file("models", "schema.yml")
-        util.write_file(
-            unformatted_schema_yml.replace(
-                "root", os.environ["DBT_DATABRICKS_LOCATION_ROOT"]
-            ).replace("{schema}", project.test_schema),
-            "models",
-            "schema.yml",
-        )
-
         util.run_dbt(["seed"])
         util.run_dbt(["build", "-s", "complex_config"])
         util.run_dbt(["build", "-s", "complex_config"])