From 4651058293483cb4b62bf8a36f4917ee73d99f6b Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Tue, 24 Dec 2024 13:47:09 +0100 Subject: [PATCH 1/9] Added a template for Streamlit Databricks data app --- contrib/templates/app-template/README.md | 10 ++++ .../databricks_template_schema.json | 21 ++++++++ .../template/{{.project_name}}/.gitignore | 6 +++ .../template/{{.project_name}}/app/app.py | 51 +++++++++++++++++++ .../{{.project_name}}/app/requirements.txt | 0 .../{{.project_name}}/databricks.yml.tmpl | 27 ++++++++++ .../resources/data.app.yml.tmpl | 22 ++++++++ 7 files changed, 137 insertions(+) create mode 100644 contrib/templates/app-template/README.md create mode 100644 contrib/templates/app-template/databricks_template_schema.json create mode 100644 contrib/templates/app-template/template/{{.project_name}}/.gitignore create mode 100644 contrib/templates/app-template/template/{{.project_name}}/app/app.py create mode 100644 contrib/templates/app-template/template/{{.project_name}}/app/requirements.txt create mode 100644 contrib/templates/app-template/template/{{.project_name}}/databricks.yml.tmpl create mode 100644 contrib/templates/app-template/template/{{.project_name}}/resources/data.app.yml.tmpl diff --git a/contrib/templates/app-template/README.md b/contrib/templates/app-template/README.md new file mode 100644 index 0000000..6fe8b41 --- /dev/null +++ b/contrib/templates/app-template/README.md @@ -0,0 +1,10 @@ +# app-template template + +This template contains a bundle structure to create and manage Databricks App which reads data from a SQL warehouse and visualizes it. +The app is written in Python using Streamlit framework. + +Install the template using the following command: + +``` +databricks bundle init https://github.com/databricks/bundle-examples --template-dir contrib/templates/app-template +``` \ No newline at end of file diff --git a/contrib/templates/app-template/databricks_template_schema.json b/contrib/templates/app-template/databricks_template_schema.json new file mode 100644 index 0000000..092c10e --- /dev/null +++ b/contrib/templates/app-template/databricks_template_schema.json @@ -0,0 +1,21 @@ +{ + "welcome_message": "\nWelcome to the app-template template for Databricks Asset Bundles!", + "properties": { + "project_name": { + "type": "string", + "default": "my_data_app", + "description": "Please provide the following details to tailor the template to your preferences.\n\nUnique name for this project\nproject_name", + "order": 1, + "pattern": "^[A-Za-z0-9_]+$", + "pattern_match_failure_message": "Name must consist of letters, numbers, and underscores." + }, + "sql_warehouse_id": { + "type": "string", + "pattern": "^\\w*$", + "pattern_match_failure_message": "Invalid sql warehouse id.", + "description": "SQL Warehouse ID to be used with the app\nsql_warehouse_id", + "order": 2 + } + }, + "success_message": "\n\nYour new project has been created in the '{{.project_name}}' directory!" +} \ No newline at end of file diff --git a/contrib/templates/app-template/template/{{.project_name}}/.gitignore b/contrib/templates/app-template/template/{{.project_name}}/.gitignore new file mode 100644 index 0000000..fd93f83 --- /dev/null +++ b/contrib/templates/app-template/template/{{.project_name}}/.gitignore @@ -0,0 +1,6 @@ +.databricks/ +build/ +dist/ +__pycache__/ +*.egg-info +.venv/ diff --git a/contrib/templates/app-template/template/{{.project_name}}/app/app.py b/contrib/templates/app-template/template/{{.project_name}}/app/app.py new file mode 100644 index 0000000..8cec20a --- /dev/null +++ b/contrib/templates/app-template/template/{{.project_name}}/app/app.py @@ -0,0 +1,51 @@ +import os +from databricks import sql +from databricks.sdk.core import Config +import streamlit as st +import pandas as pd + +# Ensure environment variable is set correctly +assert os.getenv( + "DATABRICKS_WAREHOUSE_ID" +), "DATABRICKS_WAREHOUSE_ID must be set in app.yaml." + + +def sqlQuery(query: str) -> pd.DataFrame: + cfg = Config() # Pull environment variables for auth + with sql.connect( + server_hostname=cfg.host, + http_path=f"/sql/1.0/warehouses/{os.getenv('DATABRICKS_WAREHOUSE_ID')}", + credentials_provider=lambda: cfg.authenticate, + ) as connection: + with connection.cursor() as cursor: + cursor.execute(query) + return cursor.fetchall_arrow().to_pandas() + + +st.set_page_config(layout="wide") + + +@st.cache_data(ttl=30) # only re-query if it's been 30 seconds +def getData(): + # This example query depends on the nyctaxi data set in Unity Catalog, see https://docs.databricks.com/en/discover/databricks-datasets.html for details + return sqlQuery("select * from samples.nyctaxi.trips limit 5000") + + +data = getData() + +st.header("Taxi fare distribution !!! :)") +col1, col2 = st.columns([3, 1]) +with col1: + st.scatter_chart( + data=data, height=400, width=700, y="fare_amount", x="trip_distance" + ) +with col2: + st.subheader("Predict fare") + pickup = st.text_input("From (zipcode)", value="10003") + dropoff = st.text_input("To (zipcode)", value="11238") + d = data[ + (data["pickup_zip"] == int(pickup)) & (data["dropoff_zip"] == int(dropoff)) + ] + st.write(f"# **${d['fare_amount'].mean() if len(d) > 0 else 99:.2f}**") + +st.dataframe(data=data, height=600, use_container_width=True) diff --git a/contrib/templates/app-template/template/{{.project_name}}/app/requirements.txt b/contrib/templates/app-template/template/{{.project_name}}/app/requirements.txt new file mode 100644 index 0000000..e69de29 diff --git a/contrib/templates/app-template/template/{{.project_name}}/databricks.yml.tmpl b/contrib/templates/app-template/template/{{.project_name}}/databricks.yml.tmpl new file mode 100644 index 0000000..8b572a7 --- /dev/null +++ b/contrib/templates/app-template/template/{{.project_name}}/databricks.yml.tmpl @@ -0,0 +1,27 @@ +# This is a Databricks asset bundle definition for {{.project_name}}. +# See https://docs.databricks.com/dev-tools/bundles/index.html for documentation. +bundle: + name: {{.project_name}} + +include: + - resources/*.yml + +targets: + dev: + # The default target uses 'mode: development' to create a development copy. + # - Deployed resources get prefixed with '[dev my_user_name]' + # - Any job schedules and triggers are paused by default. + # See also https://docs.databricks.com/dev-tools/bundles/deployment-modes.html. + mode: development + default: true + workspace: + host: {{workspace_host}} + prod: + mode: production + workspace: + host: {{workspace_host}} + # We explicitly specify /Workspace/Users/{{user_name}} to make sure we only have a single copy. + root_path: /Workspace/Users/{{user_name}}/.bundle/${bundle.name}/${bundle.target} + permissions: + - {{if is_service_principal}}service_principal{{else}}user{{end}}_name: {{user_name}} + level: CAN_MANAGE \ No newline at end of file diff --git a/contrib/templates/app-template/template/{{.project_name}}/resources/data.app.yml.tmpl b/contrib/templates/app-template/template/{{.project_name}}/resources/data.app.yml.tmpl new file mode 100644 index 0000000..bd6c2ce --- /dev/null +++ b/contrib/templates/app-template/template/{{.project_name}}/resources/data.app.yml.tmpl @@ -0,0 +1,22 @@ +resources: + data_app: + name: "data-app" + source_code_path: ../app + config: + command: + - "streamlit" + - "run" + - "app.py" + env: + - name: "DATABRICKS_WAREHOUSE_ID" + value: "{{.sql_warehouse_id}}" + - name: STREAMLIT_BROWSER_GATHER_USAGE_STATS + value: "false" + + resources: + - name: "sql-warehouse" + description: "A SQL warehouse for app to be able to work with" + job: + id: "{{.sql_warehouse_id}}" + permission: "CAN_USE" + From 263e948866af75bda64af0eaa35eac0d2e20343c Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Tue, 24 Dec 2024 13:51:16 +0100 Subject: [PATCH 2/9] fixes --- .../{{.project_name}}/resources/data.app.yml.tmpl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/contrib/templates/app-template/template/{{.project_name}}/resources/data.app.yml.tmpl b/contrib/templates/app-template/template/{{.project_name}}/resources/data.app.yml.tmpl index bd6c2ce..45e2ede 100644 --- a/contrib/templates/app-template/template/{{.project_name}}/resources/data.app.yml.tmpl +++ b/contrib/templates/app-template/template/{{.project_name}}/resources/data.app.yml.tmpl @@ -15,8 +15,8 @@ resources: resources: - name: "sql-warehouse" - description: "A SQL warehouse for app to be able to work with" - job: - id: "{{.sql_warehouse_id}}" - permission: "CAN_USE" + description: "A SQL warehouse for app to be able to work with" + sql_warehouse: + id: "{{.sql_warehouse_id}}" + permission: "CAN_USE" From e4c4dc7c80bd7ce467158d5a42952c8d8fa3a94f Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Tue, 24 Dec 2024 13:52:25 +0100 Subject: [PATCH 3/9] fix --- .../resources/data.app.yml.tmpl | 27 ++++++++++--------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/contrib/templates/app-template/template/{{.project_name}}/resources/data.app.yml.tmpl b/contrib/templates/app-template/template/{{.project_name}}/resources/data.app.yml.tmpl index 45e2ede..f272c4a 100644 --- a/contrib/templates/app-template/template/{{.project_name}}/resources/data.app.yml.tmpl +++ b/contrib/templates/app-template/template/{{.project_name}}/resources/data.app.yml.tmpl @@ -1,17 +1,18 @@ resources: - data_app: - name: "data-app" - source_code_path: ../app - config: - command: - - "streamlit" - - "run" - - "app.py" - env: - - name: "DATABRICKS_WAREHOUSE_ID" - value: "{{.sql_warehouse_id}}" - - name: STREAMLIT_BROWSER_GATHER_USAGE_STATS - value: "false" + apps: + data_app: + name: "data-app" + source_code_path: ../app + config: + command: + - "streamlit" + - "run" + - "app.py" + env: + - name: "DATABRICKS_WAREHOUSE_ID" + value: "{{.sql_warehouse_id}}" + - name: STREAMLIT_BROWSER_GATHER_USAGE_STATS + value: "false" resources: - name: "sql-warehouse" From 83490b1e5a6fff6d771c7addef68a6e67f496cd1 Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Tue, 24 Dec 2024 13:56:54 +0100 Subject: [PATCH 4/9] - --- .../{{.project_name}}/resources/data.app.yml.tmpl | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/contrib/templates/app-template/template/{{.project_name}}/resources/data.app.yml.tmpl b/contrib/templates/app-template/template/{{.project_name}}/resources/data.app.yml.tmpl index f272c4a..8209dfb 100644 --- a/contrib/templates/app-template/template/{{.project_name}}/resources/data.app.yml.tmpl +++ b/contrib/templates/app-template/template/{{.project_name}}/resources/data.app.yml.tmpl @@ -14,10 +14,10 @@ resources: - name: STREAMLIT_BROWSER_GATHER_USAGE_STATS value: "false" - resources: - - name: "sql-warehouse" - description: "A SQL warehouse for app to be able to work with" - sql_warehouse: - id: "{{.sql_warehouse_id}}" - permission: "CAN_USE" + resources: + - name: "sql-warehouse" + description: "A SQL warehouse for app to be able to work with" + sql_warehouse: + id: "{{.sql_warehouse_id}}" + permission: "CAN_USE" From 99ad7c70c83e2327917db0dac892b241ca2f9df8 Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Tue, 24 Dec 2024 14:03:43 +0100 Subject: [PATCH 5/9] added a description --- .../template/{{.project_name}}/resources/data.app.yml.tmpl | 1 + 1 file changed, 1 insertion(+) diff --git a/contrib/templates/app-template/template/{{.project_name}}/resources/data.app.yml.tmpl b/contrib/templates/app-template/template/{{.project_name}}/resources/data.app.yml.tmpl index 8209dfb..7608b55 100644 --- a/contrib/templates/app-template/template/{{.project_name}}/resources/data.app.yml.tmpl +++ b/contrib/templates/app-template/template/{{.project_name}}/resources/data.app.yml.tmpl @@ -3,6 +3,7 @@ resources: data_app: name: "data-app" source_code_path: ../app + description: "A Streamlit app that uses a SQL warehouse" config: command: - "streamlit" From 5d7e43558cba8ee036b32f19cfd1c07120c72e63 Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Mon, 30 Dec 2024 12:48:31 +0100 Subject: [PATCH 6/9] Update contrib/templates/app-template/README.md Co-authored-by: Pieter Noordhuis --- contrib/templates/app-template/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/templates/app-template/README.md b/contrib/templates/app-template/README.md index 6fe8b41..a9ad421 100644 --- a/contrib/templates/app-template/README.md +++ b/contrib/templates/app-template/README.md @@ -1,6 +1,6 @@ # app-template template -This template contains a bundle structure to create and manage Databricks App which reads data from a SQL warehouse and visualizes it. +This template contains a bundle structure for creating and managing a Databricks app that reads data from a SQL warehouse and visualizes it. The app is written in Python using Streamlit framework. Install the template using the following command: From b185b1da6448b3e2c7744efa955c3a2106ade766 Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Mon, 30 Dec 2024 12:48:51 +0100 Subject: [PATCH 7/9] Update contrib/templates/app-template/README.md Co-authored-by: Pieter Noordhuis --- contrib/templates/app-template/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/templates/app-template/README.md b/contrib/templates/app-template/README.md index a9ad421..892be1a 100644 --- a/contrib/templates/app-template/README.md +++ b/contrib/templates/app-template/README.md @@ -3,7 +3,7 @@ This template contains a bundle structure for creating and managing a Databricks app that reads data from a SQL warehouse and visualizes it. The app is written in Python using Streamlit framework. -Install the template using the following command: +Initialize the template using the following command: ``` databricks bundle init https://github.com/databricks/bundle-examples --template-dir contrib/templates/app-template From b10d3dee77cb449cdd86b5cb71b7fa949e97240a Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Mon, 30 Dec 2024 12:48:56 +0100 Subject: [PATCH 8/9] Update contrib/templates/app-template/README.md Co-authored-by: Pieter Noordhuis --- contrib/templates/app-template/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/templates/app-template/README.md b/contrib/templates/app-template/README.md index 892be1a..2906138 100644 --- a/contrib/templates/app-template/README.md +++ b/contrib/templates/app-template/README.md @@ -1,7 +1,7 @@ # app-template template This template contains a bundle structure for creating and managing a Databricks app that reads data from a SQL warehouse and visualizes it. -The app is written in Python using Streamlit framework. +The app is written in Python using the Streamlit framework. Initialize the template using the following command: From d51b53b40d321827f542f064276c73bb73135738 Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Mon, 30 Dec 2024 12:50:26 +0100 Subject: [PATCH 9/9] rename --- contrib/templates/{app-template => streamlit-app}/README.md | 4 ++-- .../databricks_template_schema.json | 2 +- .../template/{{.project_name}}/.gitignore | 0 .../template/{{.project_name}}/app/app.py | 0 .../template/{{.project_name}}/app/requirements.txt | 0 .../template/{{.project_name}}/databricks.yml.tmpl | 0 .../template/{{.project_name}}/resources/data.app.yml.tmpl | 0 7 files changed, 3 insertions(+), 3 deletions(-) rename contrib/templates/{app-template => streamlit-app}/README.md (80%) rename contrib/templates/{app-template => streamlit-app}/databricks_template_schema.json (89%) rename contrib/templates/{app-template => streamlit-app}/template/{{.project_name}}/.gitignore (100%) rename contrib/templates/{app-template => streamlit-app}/template/{{.project_name}}/app/app.py (100%) rename contrib/templates/{app-template => streamlit-app}/template/{{.project_name}}/app/requirements.txt (100%) rename contrib/templates/{app-template => streamlit-app}/template/{{.project_name}}/databricks.yml.tmpl (100%) rename contrib/templates/{app-template => streamlit-app}/template/{{.project_name}}/resources/data.app.yml.tmpl (100%) diff --git a/contrib/templates/app-template/README.md b/contrib/templates/streamlit-app/README.md similarity index 80% rename from contrib/templates/app-template/README.md rename to contrib/templates/streamlit-app/README.md index 2906138..4e68c0c 100644 --- a/contrib/templates/app-template/README.md +++ b/contrib/templates/streamlit-app/README.md @@ -1,4 +1,4 @@ -# app-template template +# streamlit-app template This template contains a bundle structure for creating and managing a Databricks app that reads data from a SQL warehouse and visualizes it. The app is written in Python using the Streamlit framework. @@ -6,5 +6,5 @@ The app is written in Python using the Streamlit framework. Initialize the template using the following command: ``` -databricks bundle init https://github.com/databricks/bundle-examples --template-dir contrib/templates/app-template +databricks bundle init https://github.com/databricks/bundle-examples --template-dir contrib/templates/streamlit-app ``` \ No newline at end of file diff --git a/contrib/templates/app-template/databricks_template_schema.json b/contrib/templates/streamlit-app/databricks_template_schema.json similarity index 89% rename from contrib/templates/app-template/databricks_template_schema.json rename to contrib/templates/streamlit-app/databricks_template_schema.json index 092c10e..9faa1a4 100644 --- a/contrib/templates/app-template/databricks_template_schema.json +++ b/contrib/templates/streamlit-app/databricks_template_schema.json @@ -1,5 +1,5 @@ { - "welcome_message": "\nWelcome to the app-template template for Databricks Asset Bundles!", + "welcome_message": "\nWelcome to the streamlit-app template for Databricks Asset Bundles!", "properties": { "project_name": { "type": "string", diff --git a/contrib/templates/app-template/template/{{.project_name}}/.gitignore b/contrib/templates/streamlit-app/template/{{.project_name}}/.gitignore similarity index 100% rename from contrib/templates/app-template/template/{{.project_name}}/.gitignore rename to contrib/templates/streamlit-app/template/{{.project_name}}/.gitignore diff --git a/contrib/templates/app-template/template/{{.project_name}}/app/app.py b/contrib/templates/streamlit-app/template/{{.project_name}}/app/app.py similarity index 100% rename from contrib/templates/app-template/template/{{.project_name}}/app/app.py rename to contrib/templates/streamlit-app/template/{{.project_name}}/app/app.py diff --git a/contrib/templates/app-template/template/{{.project_name}}/app/requirements.txt b/contrib/templates/streamlit-app/template/{{.project_name}}/app/requirements.txt similarity index 100% rename from contrib/templates/app-template/template/{{.project_name}}/app/requirements.txt rename to contrib/templates/streamlit-app/template/{{.project_name}}/app/requirements.txt diff --git a/contrib/templates/app-template/template/{{.project_name}}/databricks.yml.tmpl b/contrib/templates/streamlit-app/template/{{.project_name}}/databricks.yml.tmpl similarity index 100% rename from contrib/templates/app-template/template/{{.project_name}}/databricks.yml.tmpl rename to contrib/templates/streamlit-app/template/{{.project_name}}/databricks.yml.tmpl diff --git a/contrib/templates/app-template/template/{{.project_name}}/resources/data.app.yml.tmpl b/contrib/templates/streamlit-app/template/{{.project_name}}/resources/data.app.yml.tmpl similarity index 100% rename from contrib/templates/app-template/template/{{.project_name}}/resources/data.app.yml.tmpl rename to contrib/templates/streamlit-app/template/{{.project_name}}/resources/data.app.yml.tmpl