diff --git a/contrib/templates/streamlit-app/README.md b/contrib/templates/streamlit-app/README.md new file mode 100644 index 0000000..4e68c0c --- /dev/null +++ b/contrib/templates/streamlit-app/README.md @@ -0,0 +1,10 @@ +# streamlit-app template + +This template contains a bundle structure for creating and managing a Databricks app that reads data from a SQL warehouse and visualizes it. +The app is written in Python using the Streamlit framework. + +Initialize the template using the following command: + +``` +databricks bundle init https://github.com/databricks/bundle-examples --template-dir contrib/templates/streamlit-app +``` \ No newline at end of file diff --git a/contrib/templates/streamlit-app/databricks_template_schema.json b/contrib/templates/streamlit-app/databricks_template_schema.json new file mode 100644 index 0000000..9faa1a4 --- /dev/null +++ b/contrib/templates/streamlit-app/databricks_template_schema.json @@ -0,0 +1,21 @@ +{ + "welcome_message": "\nWelcome to the streamlit-app template for Databricks Asset Bundles!", + "properties": { + "project_name": { + "type": "string", + "default": "my_data_app", + "description": "Please provide the following details to tailor the template to your preferences.\n\nUnique name for this project\nproject_name", + "order": 1, + "pattern": "^[A-Za-z0-9_]+$", + "pattern_match_failure_message": "Name must consist of letters, numbers, and underscores." + }, + "sql_warehouse_id": { + "type": "string", + "pattern": "^\\w*$", + "pattern_match_failure_message": "Invalid sql warehouse id.", + "description": "SQL Warehouse ID to be used with the app\nsql_warehouse_id", + "order": 2 + } + }, + "success_message": "\n\nYour new project has been created in the '{{.project_name}}' directory!" +} \ No newline at end of file diff --git a/contrib/templates/streamlit-app/template/{{.project_name}}/.gitignore b/contrib/templates/streamlit-app/template/{{.project_name}}/.gitignore new file mode 100644 index 0000000..fd93f83 --- /dev/null +++ b/contrib/templates/streamlit-app/template/{{.project_name}}/.gitignore @@ -0,0 +1,6 @@ +.databricks/ +build/ +dist/ +__pycache__/ +*.egg-info +.venv/ diff --git a/contrib/templates/streamlit-app/template/{{.project_name}}/app/app.py b/contrib/templates/streamlit-app/template/{{.project_name}}/app/app.py new file mode 100644 index 0000000..8cec20a --- /dev/null +++ b/contrib/templates/streamlit-app/template/{{.project_name}}/app/app.py @@ -0,0 +1,51 @@ +import os +from databricks import sql +from databricks.sdk.core import Config +import streamlit as st +import pandas as pd + +# Ensure environment variable is set correctly +assert os.getenv( + "DATABRICKS_WAREHOUSE_ID" +), "DATABRICKS_WAREHOUSE_ID must be set in app.yaml." + + +def sqlQuery(query: str) -> pd.DataFrame: + cfg = Config() # Pull environment variables for auth + with sql.connect( + server_hostname=cfg.host, + http_path=f"/sql/1.0/warehouses/{os.getenv('DATABRICKS_WAREHOUSE_ID')}", + credentials_provider=lambda: cfg.authenticate, + ) as connection: + with connection.cursor() as cursor: + cursor.execute(query) + return cursor.fetchall_arrow().to_pandas() + + +st.set_page_config(layout="wide") + + +@st.cache_data(ttl=30) # only re-query if it's been 30 seconds +def getData(): + # This example query depends on the nyctaxi data set in Unity Catalog, see https://docs.databricks.com/en/discover/databricks-datasets.html for details + return sqlQuery("select * from samples.nyctaxi.trips limit 5000") + + +data = getData() + +st.header("Taxi fare distribution !!! :)") +col1, col2 = st.columns([3, 1]) +with col1: + st.scatter_chart( + data=data, height=400, width=700, y="fare_amount", x="trip_distance" + ) +with col2: + st.subheader("Predict fare") + pickup = st.text_input("From (zipcode)", value="10003") + dropoff = st.text_input("To (zipcode)", value="11238") + d = data[ + (data["pickup_zip"] == int(pickup)) & (data["dropoff_zip"] == int(dropoff)) + ] + st.write(f"# **${d['fare_amount'].mean() if len(d) > 0 else 99:.2f}**") + +st.dataframe(data=data, height=600, use_container_width=True) diff --git a/contrib/templates/streamlit-app/template/{{.project_name}}/app/requirements.txt b/contrib/templates/streamlit-app/template/{{.project_name}}/app/requirements.txt new file mode 100644 index 0000000..e69de29 diff --git a/contrib/templates/streamlit-app/template/{{.project_name}}/databricks.yml.tmpl b/contrib/templates/streamlit-app/template/{{.project_name}}/databricks.yml.tmpl new file mode 100644 index 0000000..8b572a7 --- /dev/null +++ b/contrib/templates/streamlit-app/template/{{.project_name}}/databricks.yml.tmpl @@ -0,0 +1,27 @@ +# This is a Databricks asset bundle definition for {{.project_name}}. +# See https://docs.databricks.com/dev-tools/bundles/index.html for documentation. +bundle: + name: {{.project_name}} + +include: + - resources/*.yml + +targets: + dev: + # The default target uses 'mode: development' to create a development copy. + # - Deployed resources get prefixed with '[dev my_user_name]' + # - Any job schedules and triggers are paused by default. + # See also https://docs.databricks.com/dev-tools/bundles/deployment-modes.html. + mode: development + default: true + workspace: + host: {{workspace_host}} + prod: + mode: production + workspace: + host: {{workspace_host}} + # We explicitly specify /Workspace/Users/{{user_name}} to make sure we only have a single copy. + root_path: /Workspace/Users/{{user_name}}/.bundle/${bundle.name}/${bundle.target} + permissions: + - {{if is_service_principal}}service_principal{{else}}user{{end}}_name: {{user_name}} + level: CAN_MANAGE \ No newline at end of file diff --git a/contrib/templates/streamlit-app/template/{{.project_name}}/resources/data.app.yml.tmpl b/contrib/templates/streamlit-app/template/{{.project_name}}/resources/data.app.yml.tmpl new file mode 100644 index 0000000..7608b55 --- /dev/null +++ b/contrib/templates/streamlit-app/template/{{.project_name}}/resources/data.app.yml.tmpl @@ -0,0 +1,24 @@ +resources: + apps: + data_app: + name: "data-app" + source_code_path: ../app + description: "A Streamlit app that uses a SQL warehouse" + config: + command: + - "streamlit" + - "run" + - "app.py" + env: + - name: "DATABRICKS_WAREHOUSE_ID" + value: "{{.sql_warehouse_id}}" + - name: STREAMLIT_BROWSER_GATHER_USAGE_STATS + value: "false" + + resources: + - name: "sql-warehouse" + description: "A SQL warehouse for app to be able to work with" + sql_warehouse: + id: "{{.sql_warehouse_id}}" + permission: "CAN_USE" +