This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Deploy Recipes to Google Dataflow | |
env: | |
JOB_NAME: ${{ github.event.inputs.recipe_id }}-${{ github.run_id }}-${{ github.run_attempt }} | |
on: | |
workflow_dispatch: | |
inputs: | |
recipe_id: | |
description: 'The id of a single recipe to submit to Dataflow' | |
required: true | |
default: 'all' | |
jobs: | |
deploy-recipes: | |
runs-on: ubuntu-latest | |
steps: | |
- name: Checkout Repo | |
- uses: actions/checkout@v4 | |
- name: Setup python | |
- uses: actions/setup-python@v5 | |
with: | |
python-version: '3.12' | |
- name: "Authenticate to Google Cloud" | |
id: "auth" | |
uses: "google-github-actions/auth@v2" | |
with: | |
credentials_json: "${{ secrets.LEAP_BAKERY_SERVICE_ACCOUNT }}" | |
- name: "Install deps" | |
run: | | |
python -m pip install --upgrade pip | |
python -m pip install -e .[test] | |
python -m pip install apache-beam[gcp] | |
- name : "Deploy Beam pipeline" | |
run: | | |
python -m xbeam_virtualizarr/recipe.py \ | |
--runner DataflowRunner \ | |
--setup_file=./setup.py \ | |
--project leap-pangeo \ | |
--job_name $JOB_NAME \ | |
--region us-central1 \ | |
--machine-type n1-highmem-2 \ | |
--max_num_workers 10 \ | |
--temp_location gs://leap-scratch/norlandrhagen/dataflow_temp/temp/ \ | |
--staging_location gs://leap-scratch/norlandrhagen/dataflow_temp/staging/ | |
--input gs://leap-scratch/norlandrhagen/dataflow_temp/input/ \ | |
--output gs://leap-scratch/norlandrhagen/dataflow_temp/output/ \ | |
--service_account_email [email protected] \ | |
# - name: Wait for Dataflow jobs to finish | |
# # I tried to make this reusable but the fucking thing would not accept env.JOB_NAME as input. | |
# # AT that point, screw it, not worth it. | |
# run: | | |
# jobname="${{ env.JOB_NAME }}" | |
# while true; do | |
# count=$(gcloud dataflow jobs list --status=active --filter="name:${jobname}" --format="value(id)" | wc -l) | |
# echo "Active Dataflow jobs: $count" | |
# if [ "$count" -eq "0" ]; then | |
# echo "No active Dataflow jobs found." | |
# break | |
# fi | |
# echo "Waiting for Dataflow jobs to finish..." | |
# sleep 20 | |
# done |