Skip to content

Deploy Recipes to Google Dataflow #25

Deploy Recipes to Google Dataflow

Deploy Recipes to Google Dataflow #25

name: Deploy Recipes to Google Dataflow
env:
JOB_NAME: ${{ github.event.inputs.recipe_id }}-${{ github.run_id }}-${{ github.run_attempt }}
# Can we sanitize the job name here to remove special characters?
on:
workflow_dispatch:
inputs:
recipe_id:
description: 'The id of a single recipe to submit to Dataflow'
required: true
default: 'all'
jobs:
deploy-recipes:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: "Authenticate to Google Cloud"
id: "auth"
uses: "google-github-actions/auth@v2"
with:
credentials_json: "${{ secrets.LEAP_BAKERY_SERVICE_ACCOUNT }}"
- name: "Install dependencies"
run: |
python -m pip install --upgrade pip
pip install pangeo-forge-runner
- name: "Deploy recipes"
run: |
pangeo-forge-runner bake \
--repo=${{ github.server_url }}/${{ github.repository }}.git \
--ref=${{ github.sha }} \
--feedstock-subdir='feedstock' \
--Bake.job_name=${{ env.JOB_NAME }} \
--Bake.recipe_id=${{ github.event.inputs.recipe_id }} \
-f configs/config_dataflow.py
env:
GOOGLE_APPLICATION_CREDENTIALS: "${{ steps.auth.outputs.credentials_file_path }}"
- name: Wait for Dataflow jobs to finish
# I tried to make this reusable but the fucking thing would not accept env.JOB_NAME as input.
# AT that point, screw it, not worth it.
run: |
jobname="${{ env.JOB_NAME }}"
while true; do
count=$(gcloud dataflow jobs list --status=active --filter="name:${jobname}" --format="value(id)" | wc -l)
echo "Active Dataflow jobs: $count"
if [ "$count" -eq "0" ]; then
echo "No active Dataflow jobs found."
break
fi
echo "Waiting for Dataflow jobs to finish..."
sleep 20
done