databricks · andrewnester · Dec 24, 2024 · Dec 24, 2024 · Dec 24, 2024 · Dec 24, 2024
diff --git a/contrib/templates/app-template/README.md b/contrib/templates/app-template/README.md
@@ -0,0 +1,10 @@
+# app-template template
+
+This template contains a bundle structure to create and manage Databricks App which reads data from a SQL warehouse and visualizes it.
+The app is written in Python using Streamlit framework.
+
+Install the template using the following command:
+
+```
+databricks bundle init https://github.com/databricks/bundle-examples --template-dir contrib/templates/app-template
+```
diff --git a/contrib/templates/app-template/databricks_template_schema.json b/contrib/templates/app-template/databricks_template_schema.json
@@ -0,0 +1,21 @@
+{
+    "welcome_message": "\nWelcome to the app-template template for Databricks Asset Bundles!",
+    "properties": {
+        "project_name": {
+            "type": "string",
+            "default": "my_data_app",
+            "description": "Please provide the following details to tailor the template to your preferences.\n\nUnique name for this project\nproject_name",
+            "order": 1,
+            "pattern": "^[A-Za-z0-9_]+$",
+            "pattern_match_failure_message": "Name must consist of letters, numbers, and underscores."
+        },
+        "sql_warehouse_id": {
+            "type": "string",
+            "pattern": "^\\w*$",
+            "pattern_match_failure_message": "Invalid sql warehouse id.",
+            "description": "SQL Warehouse ID to be used with the app\nsql_warehouse_id",
+            "order": 2
+        }
+    },
+    "success_message": "\n\nYour new project has been created in the '{{.project_name}}' directory!"
+}
diff --git a/contrib/templates/app-template/template/{{.project_name}}/.gitignore b/contrib/templates/app-template/template/{{.project_name}}/.gitignore
@@ -0,0 +1,6 @@
+.databricks/
+build/
+dist/
+__pycache__/
+*.egg-info
+.venv/
diff --git a/contrib/templates/app-template/template/{{.project_name}}/app/app.py b/contrib/templates/app-template/template/{{.project_name}}/app/app.py
@@ -0,0 +1,51 @@
+import os
+from databricks import sql
+from databricks.sdk.core import Config
+import streamlit as st
+import pandas as pd
+
+# Ensure environment variable is set correctly
+assert os.getenv(
+    "DATABRICKS_WAREHOUSE_ID"
+), "DATABRICKS_WAREHOUSE_ID must be set in app.yaml."
+
+
+def sqlQuery(query: str) -> pd.DataFrame:
+    cfg = Config()  # Pull environment variables for auth
+    with sql.connect(
+        server_hostname=cfg.host,
+        http_path=f"/sql/1.0/warehouses/{os.getenv('DATABRICKS_WAREHOUSE_ID')}",
+        credentials_provider=lambda: cfg.authenticate,
+    ) as connection:
+        with connection.cursor() as cursor:
+            cursor.execute(query)
+            return cursor.fetchall_arrow().to_pandas()
+
+
+st.set_page_config(layout="wide")
+
+
+@st.cache_data(ttl=30)  # only re-query if it's been 30 seconds
+def getData():
+    # This example query depends on the nyctaxi data set in Unity Catalog, see https://docs.databricks.com/en/discover/databricks-datasets.html for details
+    return sqlQuery("select * from samples.nyctaxi.trips limit 5000")
+
+
+data = getData()
+
+st.header("Taxi fare distribution !!! :)")
+col1, col2 = st.columns([3, 1])
+with col1:
+    st.scatter_chart(
+        data=data, height=400, width=700, y="fare_amount", x="trip_distance"
+    )
+with col2:
+    st.subheader("Predict fare")
+    pickup = st.text_input("From (zipcode)", value="10003")
+    dropoff = st.text_input("To (zipcode)", value="11238")
+    d = data[
+        (data["pickup_zip"] == int(pickup)) & (data["dropoff_zip"] == int(dropoff))
+    ]
+    st.write(f"# **${d['fare_amount'].mean() if len(d) > 0 else 99:.2f}**")
+
+st.dataframe(data=data, height=600, use_container_width=True)
diff --git a/contrib/templates/app-template/template/{{.project_name}}/app/requirements.txt b/contrib/templates/app-template/template/{{.project_name}}/app/requirements.txt
diff --git a/contrib/templates/app-template/template/{{.project_name}}/databricks.yml.tmpl b/contrib/templates/app-template/template/{{.project_name}}/databricks.yml.tmpl
@@ -0,0 +1,27 @@
+# This is a Databricks asset bundle definition for {{.project_name}}.
+# See https://docs.databricks.com/dev-tools/bundles/index.html for documentation.
+bundle:
+  name: {{.project_name}}
+
+include:
+  - resources/*.yml
+
+targets:
+  dev:
+    # The default target uses 'mode: development' to create a development copy.
+    # - Deployed resources get prefixed with '[dev my_user_name]'
+    # - Any job schedules and triggers are paused by default.
+    # See also https://docs.databricks.com/dev-tools/bundles/deployment-modes.html.
+    mode: development
+    default: true
+    workspace:
+      host: {{workspace_host}}
+  prod:
+    mode: production
+    workspace:
+      host: {{workspace_host}}
+      # We explicitly specify /Workspace/Users/{{user_name}} to make sure we only have a single copy.
+      root_path: /Workspace/Users/{{user_name}}/.bundle/${bundle.name}/${bundle.target}
+    permissions:
+      - {{if is_service_principal}}service_principal{{else}}user{{end}}_name: {{user_name}}
+        level: CAN_MANAGE
diff --git a/contrib/templates/app-template/template/{{.project_name}}/resources/data.app.yml.tmpl b/contrib/templates/app-template/template/{{.project_name}}/resources/data.app.yml.tmpl
@@ -0,0 +1,24 @@
+resources:
+  apps:
+    data_app:
+      name: "data-app"
+      source_code_path: ../app
+      description: "A Streamlit app that uses a SQL warehouse"
+      config:
+        command:
+          - "streamlit"
+          - "run"
+          - "app.py"
+        env:
+          - name: "DATABRICKS_WAREHOUSE_ID"
+            value: "{{.sql_warehouse_id}}"
+          - name: STREAMLIT_BROWSER_GATHER_USAGE_STATS
+            value: "false"
+
+      resources:
+        - name: "sql-warehouse"
+          description: "A SQL warehouse for app to be able to work with"
+          sql_warehouse:
+            id: "{{.sql_warehouse_id}}"
+            permission: "CAN_USE"
+