Skip to content

GCS Insert

GCS Insert #1090

Workflow file for this run

name: GCS Insert
on:
schedule:
- cron: "0 1 * * *"
workflow_dispatch:
jobs:
gcs_insert:
if: github.repository_owner == 'ibis-project'
runs-on: ubuntu-latest
# this allows extractions/setup-just to list releases for `just` at a higher
# rate limit while restricting GITHUB_TOKEN permissions elsewhere
permissions:
contents: "read"
# required for GCP workload identity federation
id-token: "write"
steps:
- name: set date
id: set_date
run: |
set -euo pipefail
echo "yesterday=$(date --date=yesterday +%Y-%m-%d)" >> "$GITHUB_OUTPUT"
- name: generate data
id: generate_data
run: |
set -euo pipefail
yesterday="${{ steps.set_date.outputs.yesterday }}"
[ -n "$yesterday" ] || exit 1
seconds_per_hour=3600
requests_per_hour=1000
rate_limit_sleep="$(bc <<< "scale = 1; $seconds_per_hour / $requests_per_hour" | xargs printf "%.0f")"
gh api --paginate "repos/ibis-project/ibis/actions/runs?per_page=100&created=${yesterday}" --jq '.workflow_runs[]' > workflows.json
jq -rcM '.id' < workflows.json | while read -r run_id; do
gh api --paginate "repos/ibis-project/ibis/actions/runs/${run_id}/jobs?per_page=100" --jq '.jobs[]' >> jobs.json
# sleep to avoid rate limiting
sleep "$rate_limit_sleep"
done
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- uses: google-github-actions/auth@v2
with:
project_id: "ibis-gbq"
workload_identity_provider: "${{ vars.WIF_PROVIDER_NAME }}"
- uses: google-github-actions/setup-gcloud@v2
- run: gcloud info
- name: copy to gcs
run: |
set -euo pipefail
yesterday="${{ steps.set_date.outputs.yesterday }}"
bucket="gs://ibis-workflow-data/${yesterday}"
gsutil cp -Z workflows.json jobs.json "${bucket}"