From e064e00cca1a40238610842e541c74b57dbad0d8 Mon Sep 17 00:00:00 2001 From: David Wood Date: Wed, 18 Sep 2024 16:07:24 -0400 Subject: [PATCH] worklow readme, transform existence verification, disable build-library, and tools tests Signed-off-by: David Wood --- .github/workflows/README.md | 22 ++++++++++++++++--- ...uild-library.yml => build-library.yml.old} | 0 .github/workflows/test-code-code2parquet.yml | 10 ++++++++- .github/workflows/test-code-code_quality.yml | 10 ++++++++- .../workflows/test-code-header_cleanser.yml | 10 ++++++++- .github/workflows/test-code-malware.yml | 10 ++++++++- .../workflows/test-code-proglang_select.yml | 10 ++++++++- .../test-code-repo_level_ordering.yml | 10 ++++++++- .github/workflows/test-kfp.yml | 2 ++ .github/workflows/test-language-doc_chunk.yml | 10 ++++++++- .../workflows/test-language-doc_quality.yml | 10 ++++++++- .github/workflows/test-language-lang_id.yml | 10 ++++++++- .../workflows/test-language-pdf2parquet.yml | 10 ++++++++- .../workflows/test-language-pii_redactor.yml | 10 ++++++++- .../workflows/test-language-text_encoder.yml | 10 ++++++++- .github/workflows/test-misc.yml | 18 +++++---------- .github/workflows/test-transform.template | 10 ++++++++- .github/workflows/test-universal-doc_id.yml | 10 ++++++++- .github/workflows/test-universal-ededup.yml | 10 ++++++++- .github/workflows/test-universal-fdedup.yml | 10 ++++++++- .github/workflows/test-universal-filter.yml | 10 ++++++++- .../workflows/test-universal-html2parquet.yml | 10 ++++++++- .github/workflows/test-universal-noop.yml | 10 ++++++++- .github/workflows/test-universal-profiler.yml | 10 ++++++++- .github/workflows/test-universal-resize.yml | 10 ++++++++- .../workflows/test-universal-tokenization.yml | 10 ++++++++- scripts/check-workflows.sh | 20 +++++++++++++++++ 27 files changed, 244 insertions(+), 38 deletions(-) rename .github/workflows/{build-library.yml => build-library.yml.old} (100%) create mode 100755 scripts/check-workflows.sh diff --git a/.github/workflows/README.md b/.github/workflows/README.md index 843ca5cf0..c357ff33a 100644 --- a/.github/workflows/README.md +++ b/.github/workflows/README.md @@ -3,12 +3,25 @@ Here we have the start of a system to automatically generated github workflows (currently only for transforms). In general, the design is to use templates and `make` to generate/update the workflows. +Goals +1. Run only tests for a given transform when only the transform changes. +Includes python, ray, spark and kfp_ray as available. +2. When the core dpk lib components files changes, test all transforms +3. When the shared kfp components changes, test a randomly selected transform test + (We would like to avoid running all transform kfp tests in one PR) +4. Extra credit: If .md or other non-code changes are made, run no tests. + +Assumptions: +1. All transforms will have test workflows. A transform can disable its tests locally +(temporarily?) by renaming its transforms/universal/noop/Makefile.disabled. ``` -make +git clone .... +... +git checkout -b new-branch +make # Creates new test*.yml workflows git commit -a -s -m "update workflows" -git push +git push --set-upstream origin new-branch ``` - should be sufficient. ## Transforms @@ -24,3 +37,6 @@ When a new transform is added to the repository, 1. Run `make` in this directory to create the new test .yml for all transforms found in transforms/{universal,code,language} directories 1. commit and push the change to your branch with the new transform. +## KFP + +## DPK libraries \ No newline at end of file diff --git a/.github/workflows/build-library.yml b/.github/workflows/build-library.yml.old similarity index 100% rename from .github/workflows/build-library.yml rename to .github/workflows/build-library.yml.old diff --git a/.github/workflows/test-code-code2parquet.yml b/.github/workflows/test-code-code2parquet.yml index a9e701631..5bddc137f 100644 --- a/.github/workflows/test-code-code2parquet.yml +++ b/.github/workflows/test-code-code2parquet.yml @@ -14,6 +14,10 @@ on: paths: - "transforms/code/code2parquet/**" - "data-processing-lib/**" + - "!**.md" + - "!**/doc/**" + - "!**/images/**" + - "!**.gitignore" pull_request: branches: - "dev" @@ -21,6 +25,10 @@ on: paths: - "transforms/code/code2parquet/**" - "data-processing-lib/**" + - "!**.md" + - "!**/doc/**" + - "!**/images/**" + - "!**.gitignore" jobs: check_if_push_image: @@ -84,7 +92,7 @@ jobs: - name: Test transform image in transforms/code/code2parquet run: | if [ -e "transforms/code/code2parquet/Makefile" ]; then - make -C data-processing-lib/spark DOCKER=docker image + make -C data-processing-lib/spark DOCKER=docker image make -C transforms/code/code2parquet DOCKER=docker test-image else echo "transforms/code/code2parquet/Makefile not found - testing disabled for this transform." diff --git a/.github/workflows/test-code-code_quality.yml b/.github/workflows/test-code-code_quality.yml index 4847e292a..faccf10d0 100644 --- a/.github/workflows/test-code-code_quality.yml +++ b/.github/workflows/test-code-code_quality.yml @@ -14,6 +14,10 @@ on: paths: - "transforms/code/code_quality/**" - "data-processing-lib/**" + - "!**.md" + - "!**/doc/**" + - "!**/images/**" + - "!**.gitignore" pull_request: branches: - "dev" @@ -21,6 +25,10 @@ on: paths: - "transforms/code/code_quality/**" - "data-processing-lib/**" + - "!**.md" + - "!**/doc/**" + - "!**/images/**" + - "!**.gitignore" jobs: check_if_push_image: @@ -84,7 +92,7 @@ jobs: - name: Test transform image in transforms/code/code_quality run: | if [ -e "transforms/code/code_quality/Makefile" ]; then - make -C data-processing-lib/spark DOCKER=docker image + make -C data-processing-lib/spark DOCKER=docker image make -C transforms/code/code_quality DOCKER=docker test-image else echo "transforms/code/code_quality/Makefile not found - testing disabled for this transform." diff --git a/.github/workflows/test-code-header_cleanser.yml b/.github/workflows/test-code-header_cleanser.yml index b8b0d751f..3adce163d 100644 --- a/.github/workflows/test-code-header_cleanser.yml +++ b/.github/workflows/test-code-header_cleanser.yml @@ -14,6 +14,10 @@ on: paths: - "transforms/code/header_cleanser/**" - "data-processing-lib/**" + - "!**.md" + - "!**/doc/**" + - "!**/images/**" + - "!**.gitignore" pull_request: branches: - "dev" @@ -21,6 +25,10 @@ on: paths: - "transforms/code/header_cleanser/**" - "data-processing-lib/**" + - "!**.md" + - "!**/doc/**" + - "!**/images/**" + - "!**.gitignore" jobs: check_if_push_image: @@ -84,7 +92,7 @@ jobs: - name: Test transform image in transforms/code/header_cleanser run: | if [ -e "transforms/code/header_cleanser/Makefile" ]; then - make -C data-processing-lib/spark DOCKER=docker image + make -C data-processing-lib/spark DOCKER=docker image make -C transforms/code/header_cleanser DOCKER=docker test-image else echo "transforms/code/header_cleanser/Makefile not found - testing disabled for this transform." diff --git a/.github/workflows/test-code-malware.yml b/.github/workflows/test-code-malware.yml index dd13cbe74..90074b77f 100644 --- a/.github/workflows/test-code-malware.yml +++ b/.github/workflows/test-code-malware.yml @@ -14,6 +14,10 @@ on: paths: - "transforms/code/malware/**" - "data-processing-lib/**" + - "!**.md" + - "!**/doc/**" + - "!**/images/**" + - "!**.gitignore" pull_request: branches: - "dev" @@ -21,6 +25,10 @@ on: paths: - "transforms/code/malware/**" - "data-processing-lib/**" + - "!**.md" + - "!**/doc/**" + - "!**/images/**" + - "!**.gitignore" jobs: check_if_push_image: @@ -84,7 +92,7 @@ jobs: - name: Test transform image in transforms/code/malware run: | if [ -e "transforms/code/malware/Makefile" ]; then - make -C data-processing-lib/spark DOCKER=docker image + make -C data-processing-lib/spark DOCKER=docker image make -C transforms/code/malware DOCKER=docker test-image else echo "transforms/code/malware/Makefile not found - testing disabled for this transform." diff --git a/.github/workflows/test-code-proglang_select.yml b/.github/workflows/test-code-proglang_select.yml index 16e1ba330..5f3146d93 100644 --- a/.github/workflows/test-code-proglang_select.yml +++ b/.github/workflows/test-code-proglang_select.yml @@ -14,6 +14,10 @@ on: paths: - "transforms/code/proglang_select/**" - "data-processing-lib/**" + - "!**.md" + - "!**/doc/**" + - "!**/images/**" + - "!**.gitignore" pull_request: branches: - "dev" @@ -21,6 +25,10 @@ on: paths: - "transforms/code/proglang_select/**" - "data-processing-lib/**" + - "!**.md" + - "!**/doc/**" + - "!**/images/**" + - "!**.gitignore" jobs: check_if_push_image: @@ -84,7 +92,7 @@ jobs: - name: Test transform image in transforms/code/proglang_select run: | if [ -e "transforms/code/proglang_select/Makefile" ]; then - make -C data-processing-lib/spark DOCKER=docker image + make -C data-processing-lib/spark DOCKER=docker image make -C transforms/code/proglang_select DOCKER=docker test-image else echo "transforms/code/proglang_select/Makefile not found - testing disabled for this transform." diff --git a/.github/workflows/test-code-repo_level_ordering.yml b/.github/workflows/test-code-repo_level_ordering.yml index 26144e57b..50eefc1d3 100644 --- a/.github/workflows/test-code-repo_level_ordering.yml +++ b/.github/workflows/test-code-repo_level_ordering.yml @@ -14,6 +14,10 @@ on: paths: - "transforms/code/repo_level_ordering/**" - "data-processing-lib/**" + - "!**.md" + - "!**/doc/**" + - "!**/images/**" + - "!**.gitignore" pull_request: branches: - "dev" @@ -21,6 +25,10 @@ on: paths: - "transforms/code/repo_level_ordering/**" - "data-processing-lib/**" + - "!**.md" + - "!**/doc/**" + - "!**/images/**" + - "!**.gitignore" jobs: check_if_push_image: @@ -84,7 +92,7 @@ jobs: - name: Test transform image in transforms/code/repo_level_ordering run: | if [ -e "transforms/code/repo_level_ordering/Makefile" ]; then - make -C data-processing-lib/spark DOCKER=docker image + make -C data-processing-lib/spark DOCKER=docker image make -C transforms/code/repo_level_ordering DOCKER=docker test-image else echo "transforms/code/repo_level_ordering/Makefile not found - testing disabled for this transform." diff --git a/.github/workflows/test-kfp.yml b/.github/workflows/test-kfp.yml index 1f96cc6da..f0984c21b 100644 --- a/.github/workflows/test-kfp.yml +++ b/.github/workflows/test-kfp.yml @@ -12,6 +12,7 @@ on: - "kfp/**" - "!**.md" - "!**/doc/**" + - "!**/images/**" - "!**/.gitignore" pull_request: branches: @@ -21,6 +22,7 @@ on: - "kfp/**" - "!**.md" - "!**/doc/**" + - "!**/images/**" - "!**/.gitignore" env: diff --git a/.github/workflows/test-language-doc_chunk.yml b/.github/workflows/test-language-doc_chunk.yml index cc969c66a..a6b203032 100644 --- a/.github/workflows/test-language-doc_chunk.yml +++ b/.github/workflows/test-language-doc_chunk.yml @@ -14,6 +14,10 @@ on: paths: - "transforms/language/doc_chunk/**" - "data-processing-lib/**" + - "!**.md" + - "!**/doc/**" + - "!**/images/**" + - "!**.gitignore" pull_request: branches: - "dev" @@ -21,6 +25,10 @@ on: paths: - "transforms/language/doc_chunk/**" - "data-processing-lib/**" + - "!**.md" + - "!**/doc/**" + - "!**/images/**" + - "!**.gitignore" jobs: check_if_push_image: @@ -84,7 +92,7 @@ jobs: - name: Test transform image in transforms/language/doc_chunk run: | if [ -e "transforms/language/doc_chunk/Makefile" ]; then - make -C data-processing-lib/spark DOCKER=docker image + make -C data-processing-lib/spark DOCKER=docker image make -C transforms/language/doc_chunk DOCKER=docker test-image else echo "transforms/language/doc_chunk/Makefile not found - testing disabled for this transform." diff --git a/.github/workflows/test-language-doc_quality.yml b/.github/workflows/test-language-doc_quality.yml index c3da25938..0c6972e7b 100644 --- a/.github/workflows/test-language-doc_quality.yml +++ b/.github/workflows/test-language-doc_quality.yml @@ -14,6 +14,10 @@ on: paths: - "transforms/language/doc_quality/**" - "data-processing-lib/**" + - "!**.md" + - "!**/doc/**" + - "!**/images/**" + - "!**.gitignore" pull_request: branches: - "dev" @@ -21,6 +25,10 @@ on: paths: - "transforms/language/doc_quality/**" - "data-processing-lib/**" + - "!**.md" + - "!**/doc/**" + - "!**/images/**" + - "!**.gitignore" jobs: check_if_push_image: @@ -84,7 +92,7 @@ jobs: - name: Test transform image in transforms/language/doc_quality run: | if [ -e "transforms/language/doc_quality/Makefile" ]; then - make -C data-processing-lib/spark DOCKER=docker image + make -C data-processing-lib/spark DOCKER=docker image make -C transforms/language/doc_quality DOCKER=docker test-image else echo "transforms/language/doc_quality/Makefile not found - testing disabled for this transform." diff --git a/.github/workflows/test-language-lang_id.yml b/.github/workflows/test-language-lang_id.yml index d07f2e0a6..78b755738 100644 --- a/.github/workflows/test-language-lang_id.yml +++ b/.github/workflows/test-language-lang_id.yml @@ -14,6 +14,10 @@ on: paths: - "transforms/language/lang_id/**" - "data-processing-lib/**" + - "!**.md" + - "!**/doc/**" + - "!**/images/**" + - "!**.gitignore" pull_request: branches: - "dev" @@ -21,6 +25,10 @@ on: paths: - "transforms/language/lang_id/**" - "data-processing-lib/**" + - "!**.md" + - "!**/doc/**" + - "!**/images/**" + - "!**.gitignore" jobs: check_if_push_image: @@ -84,7 +92,7 @@ jobs: - name: Test transform image in transforms/language/lang_id run: | if [ -e "transforms/language/lang_id/Makefile" ]; then - make -C data-processing-lib/spark DOCKER=docker image + make -C data-processing-lib/spark DOCKER=docker image make -C transforms/language/lang_id DOCKER=docker test-image else echo "transforms/language/lang_id/Makefile not found - testing disabled for this transform." diff --git a/.github/workflows/test-language-pdf2parquet.yml b/.github/workflows/test-language-pdf2parquet.yml index e5284b4a5..0d4ca89e0 100644 --- a/.github/workflows/test-language-pdf2parquet.yml +++ b/.github/workflows/test-language-pdf2parquet.yml @@ -14,6 +14,10 @@ on: paths: - "transforms/language/pdf2parquet/**" - "data-processing-lib/**" + - "!**.md" + - "!**/doc/**" + - "!**/images/**" + - "!**.gitignore" pull_request: branches: - "dev" @@ -21,6 +25,10 @@ on: paths: - "transforms/language/pdf2parquet/**" - "data-processing-lib/**" + - "!**.md" + - "!**/doc/**" + - "!**/images/**" + - "!**.gitignore" jobs: check_if_push_image: @@ -84,7 +92,7 @@ jobs: - name: Test transform image in transforms/language/pdf2parquet run: | if [ -e "transforms/language/pdf2parquet/Makefile" ]; then - make -C data-processing-lib/spark DOCKER=docker image + make -C data-processing-lib/spark DOCKER=docker image make -C transforms/language/pdf2parquet DOCKER=docker test-image else echo "transforms/language/pdf2parquet/Makefile not found - testing disabled for this transform." diff --git a/.github/workflows/test-language-pii_redactor.yml b/.github/workflows/test-language-pii_redactor.yml index 3eded38f5..90508be12 100644 --- a/.github/workflows/test-language-pii_redactor.yml +++ b/.github/workflows/test-language-pii_redactor.yml @@ -14,6 +14,10 @@ on: paths: - "transforms/language/pii_redactor/**" - "data-processing-lib/**" + - "!**.md" + - "!**/doc/**" + - "!**/images/**" + - "!**.gitignore" pull_request: branches: - "dev" @@ -21,6 +25,10 @@ on: paths: - "transforms/language/pii_redactor/**" - "data-processing-lib/**" + - "!**.md" + - "!**/doc/**" + - "!**/images/**" + - "!**.gitignore" jobs: check_if_push_image: @@ -84,7 +92,7 @@ jobs: - name: Test transform image in transforms/language/pii_redactor run: | if [ -e "transforms/language/pii_redactor/Makefile" ]; then - make -C data-processing-lib/spark DOCKER=docker image + make -C data-processing-lib/spark DOCKER=docker image make -C transforms/language/pii_redactor DOCKER=docker test-image else echo "transforms/language/pii_redactor/Makefile not found - testing disabled for this transform." diff --git a/.github/workflows/test-language-text_encoder.yml b/.github/workflows/test-language-text_encoder.yml index cf9a0604a..b971199bb 100644 --- a/.github/workflows/test-language-text_encoder.yml +++ b/.github/workflows/test-language-text_encoder.yml @@ -14,6 +14,10 @@ on: paths: - "transforms/language/text_encoder/**" - "data-processing-lib/**" + - "!**.md" + - "!**/doc/**" + - "!**/images/**" + - "!**.gitignore" pull_request: branches: - "dev" @@ -21,6 +25,10 @@ on: paths: - "transforms/language/text_encoder/**" - "data-processing-lib/**" + - "!**.md" + - "!**/doc/**" + - "!**/images/**" + - "!**.gitignore" jobs: check_if_push_image: @@ -84,7 +92,7 @@ jobs: - name: Test transform image in transforms/language/text_encoder run: | if [ -e "transforms/language/text_encoder/Makefile" ]; then - make -C data-processing-lib/spark DOCKER=docker image + make -C data-processing-lib/spark DOCKER=docker image make -C transforms/language/text_encoder DOCKER=docker test-image else echo "transforms/language/text_encoder/Makefile not found - testing disabled for this transform." diff --git a/.github/workflows/test-misc.yml b/.github/workflows/test-misc.yml index 50f3edbc1..2c601bbd5 100644 --- a/.github/workflows/test-misc.yml +++ b/.github/workflows/test-misc.yml @@ -12,6 +12,7 @@ on: - "**.md" - "examples/**" - "**/doc/**" + - "**/images/**" - "**/.gitignore" - "**/.dockerignore" pull_request: @@ -22,6 +23,7 @@ on: - "**.md" - "examples/**" - "**/doc/**" + - "**/images/**" - "**/.gitignore" - "**/.dockerignore" @@ -34,20 +36,10 @@ jobs: - name: Test top-level recursive make targets. run: | make -n clean test build publish set-versions - test-tools: + check-transform-test-workflows: runs-on: ubuntu-22.04 steps: - name: Checkout uses: actions/checkout@v4 - - name: Test tools - run: | - make -C tools DOCKER=docker venv test - test-tool-images: - runs-on: ubuntu-22.04 - timeout-minutes: 30 - steps: - - name: Checkout - uses: actions/checkout@v4 - - name: Build and Test Tool images - run: | - make -C tools/ingest2parquet DOCKER=docker test-image + - name: Make sure all transforms have a test workflow + run: bash scripts/check-workflows.sh diff --git a/.github/workflows/test-transform.template b/.github/workflows/test-transform.template index 1ed7fc89b..57750a032 100644 --- a/.github/workflows/test-transform.template +++ b/.github/workflows/test-transform.template @@ -14,6 +14,10 @@ on: paths: - "@TARGET_TRANSFORM_DIR@/**" - "data-processing-lib/**" + - "!**.md" + - "!**/doc/**" + - "!**/images/**" + - "!**.gitignore" pull_request: branches: - "dev" @@ -21,6 +25,10 @@ on: paths: - "@TARGET_TRANSFORM_DIR@/**" - "data-processing-lib/**" + - "!**.md" + - "!**/doc/**" + - "!**/images/**" + - "!**.gitignore" jobs: check_if_push_image: @@ -84,7 +92,7 @@ jobs: - name: Test transform image in @TARGET_TRANSFORM_DIR@ run: | if [ -e "@TARGET_TRANSFORM_DIR@/Makefile" ]; then - make -C data-processing-lib/spark DOCKER=docker image + make -C data-processing-lib/spark DOCKER=docker image make -C @TARGET_TRANSFORM_DIR@ DOCKER=docker test-image else echo "@TARGET_TRANSFORM_DIR@/Makefile not found - testing disabled for this transform." diff --git a/.github/workflows/test-universal-doc_id.yml b/.github/workflows/test-universal-doc_id.yml index 3110ba0eb..0f75c469c 100644 --- a/.github/workflows/test-universal-doc_id.yml +++ b/.github/workflows/test-universal-doc_id.yml @@ -14,6 +14,10 @@ on: paths: - "transforms/universal/doc_id/**" - "data-processing-lib/**" + - "!**.md" + - "!**/doc/**" + - "!**/images/**" + - "!**.gitignore" pull_request: branches: - "dev" @@ -21,6 +25,10 @@ on: paths: - "transforms/universal/doc_id/**" - "data-processing-lib/**" + - "!**.md" + - "!**/doc/**" + - "!**/images/**" + - "!**.gitignore" jobs: check_if_push_image: @@ -84,7 +92,7 @@ jobs: - name: Test transform image in transforms/universal/doc_id run: | if [ -e "transforms/universal/doc_id/Makefile" ]; then - make -C data-processing-lib/spark DOCKER=docker image + make -C data-processing-lib/spark DOCKER=docker image make -C transforms/universal/doc_id DOCKER=docker test-image else echo "transforms/universal/doc_id/Makefile not found - testing disabled for this transform." diff --git a/.github/workflows/test-universal-ededup.yml b/.github/workflows/test-universal-ededup.yml index 7da3e6569..ce3988434 100644 --- a/.github/workflows/test-universal-ededup.yml +++ b/.github/workflows/test-universal-ededup.yml @@ -14,6 +14,10 @@ on: paths: - "transforms/universal/ededup/**" - "data-processing-lib/**" + - "!**.md" + - "!**/doc/**" + - "!**/images/**" + - "!**.gitignore" pull_request: branches: - "dev" @@ -21,6 +25,10 @@ on: paths: - "transforms/universal/ededup/**" - "data-processing-lib/**" + - "!**.md" + - "!**/doc/**" + - "!**/images/**" + - "!**.gitignore" jobs: check_if_push_image: @@ -84,7 +92,7 @@ jobs: - name: Test transform image in transforms/universal/ededup run: | if [ -e "transforms/universal/ededup/Makefile" ]; then - make -C data-processing-lib/spark DOCKER=docker image + make -C data-processing-lib/spark DOCKER=docker image make -C transforms/universal/ededup DOCKER=docker test-image else echo "transforms/universal/ededup/Makefile not found - testing disabled for this transform." diff --git a/.github/workflows/test-universal-fdedup.yml b/.github/workflows/test-universal-fdedup.yml index 1fb9ead05..7262a25fb 100644 --- a/.github/workflows/test-universal-fdedup.yml +++ b/.github/workflows/test-universal-fdedup.yml @@ -14,6 +14,10 @@ on: paths: - "transforms/universal/fdedup/**" - "data-processing-lib/**" + - "!**.md" + - "!**/doc/**" + - "!**/images/**" + - "!**.gitignore" pull_request: branches: - "dev" @@ -21,6 +25,10 @@ on: paths: - "transforms/universal/fdedup/**" - "data-processing-lib/**" + - "!**.md" + - "!**/doc/**" + - "!**/images/**" + - "!**.gitignore" jobs: check_if_push_image: @@ -84,7 +92,7 @@ jobs: - name: Test transform image in transforms/universal/fdedup run: | if [ -e "transforms/universal/fdedup/Makefile" ]; then - make -C data-processing-lib/spark DOCKER=docker image + make -C data-processing-lib/spark DOCKER=docker image make -C transforms/universal/fdedup DOCKER=docker test-image else echo "transforms/universal/fdedup/Makefile not found - testing disabled for this transform." diff --git a/.github/workflows/test-universal-filter.yml b/.github/workflows/test-universal-filter.yml index 6eefa1c05..40f6e2f96 100644 --- a/.github/workflows/test-universal-filter.yml +++ b/.github/workflows/test-universal-filter.yml @@ -14,6 +14,10 @@ on: paths: - "transforms/universal/filter/**" - "data-processing-lib/**" + - "!**.md" + - "!**/doc/**" + - "!**/images/**" + - "!**.gitignore" pull_request: branches: - "dev" @@ -21,6 +25,10 @@ on: paths: - "transforms/universal/filter/**" - "data-processing-lib/**" + - "!**.md" + - "!**/doc/**" + - "!**/images/**" + - "!**.gitignore" jobs: check_if_push_image: @@ -84,7 +92,7 @@ jobs: - name: Test transform image in transforms/universal/filter run: | if [ -e "transforms/universal/filter/Makefile" ]; then - make -C data-processing-lib/spark DOCKER=docker image + make -C data-processing-lib/spark DOCKER=docker image make -C transforms/universal/filter DOCKER=docker test-image else echo "transforms/universal/filter/Makefile not found - testing disabled for this transform." diff --git a/.github/workflows/test-universal-html2parquet.yml b/.github/workflows/test-universal-html2parquet.yml index 3a4763682..466cdb0ed 100644 --- a/.github/workflows/test-universal-html2parquet.yml +++ b/.github/workflows/test-universal-html2parquet.yml @@ -14,6 +14,10 @@ on: paths: - "transforms/universal/html2parquet/**" - "data-processing-lib/**" + - "!**.md" + - "!**/doc/**" + - "!**/images/**" + - "!**.gitignore" pull_request: branches: - "dev" @@ -21,6 +25,10 @@ on: paths: - "transforms/universal/html2parquet/**" - "data-processing-lib/**" + - "!**.md" + - "!**/doc/**" + - "!**/images/**" + - "!**.gitignore" jobs: check_if_push_image: @@ -84,7 +92,7 @@ jobs: - name: Test transform image in transforms/universal/html2parquet run: | if [ -e "transforms/universal/html2parquet/Makefile" ]; then - make -C data-processing-lib/spark DOCKER=docker image + make -C data-processing-lib/spark DOCKER=docker image make -C transforms/universal/html2parquet DOCKER=docker test-image else echo "transforms/universal/html2parquet/Makefile not found - testing disabled for this transform." diff --git a/.github/workflows/test-universal-noop.yml b/.github/workflows/test-universal-noop.yml index 703f70baf..0a46d249e 100644 --- a/.github/workflows/test-universal-noop.yml +++ b/.github/workflows/test-universal-noop.yml @@ -14,6 +14,10 @@ on: paths: - "transforms/universal/noop/**" - "data-processing-lib/**" + - "!**.md" + - "!**/doc/**" + - "!**/images/**" + - "!**.gitignore" pull_request: branches: - "dev" @@ -21,6 +25,10 @@ on: paths: - "transforms/universal/noop/**" - "data-processing-lib/**" + - "!**.md" + - "!**/doc/**" + - "!**/images/**" + - "!**.gitignore" jobs: check_if_push_image: @@ -84,7 +92,7 @@ jobs: - name: Test transform image in transforms/universal/noop run: | if [ -e "transforms/universal/noop/Makefile" ]; then - make -C data-processing-lib/spark DOCKER=docker image + make -C data-processing-lib/spark DOCKER=docker image make -C transforms/universal/noop DOCKER=docker test-image else echo "transforms/universal/noop/Makefile not found - testing disabled for this transform." diff --git a/.github/workflows/test-universal-profiler.yml b/.github/workflows/test-universal-profiler.yml index 6fcd60642..2d49adca6 100644 --- a/.github/workflows/test-universal-profiler.yml +++ b/.github/workflows/test-universal-profiler.yml @@ -14,6 +14,10 @@ on: paths: - "transforms/universal/profiler/**" - "data-processing-lib/**" + - "!**.md" + - "!**/doc/**" + - "!**/images/**" + - "!**.gitignore" pull_request: branches: - "dev" @@ -21,6 +25,10 @@ on: paths: - "transforms/universal/profiler/**" - "data-processing-lib/**" + - "!**.md" + - "!**/doc/**" + - "!**/images/**" + - "!**.gitignore" jobs: check_if_push_image: @@ -84,7 +92,7 @@ jobs: - name: Test transform image in transforms/universal/profiler run: | if [ -e "transforms/universal/profiler/Makefile" ]; then - make -C data-processing-lib/spark DOCKER=docker image + make -C data-processing-lib/spark DOCKER=docker image make -C transforms/universal/profiler DOCKER=docker test-image else echo "transforms/universal/profiler/Makefile not found - testing disabled for this transform." diff --git a/.github/workflows/test-universal-resize.yml b/.github/workflows/test-universal-resize.yml index b27f0bf9c..2a69e323c 100644 --- a/.github/workflows/test-universal-resize.yml +++ b/.github/workflows/test-universal-resize.yml @@ -14,6 +14,10 @@ on: paths: - "transforms/universal/resize/**" - "data-processing-lib/**" + - "!**.md" + - "!**/doc/**" + - "!**/images/**" + - "!**.gitignore" pull_request: branches: - "dev" @@ -21,6 +25,10 @@ on: paths: - "transforms/universal/resize/**" - "data-processing-lib/**" + - "!**.md" + - "!**/doc/**" + - "!**/images/**" + - "!**.gitignore" jobs: check_if_push_image: @@ -84,7 +92,7 @@ jobs: - name: Test transform image in transforms/universal/resize run: | if [ -e "transforms/universal/resize/Makefile" ]; then - make -C data-processing-lib/spark DOCKER=docker image + make -C data-processing-lib/spark DOCKER=docker image make -C transforms/universal/resize DOCKER=docker test-image else echo "transforms/universal/resize/Makefile not found - testing disabled for this transform." diff --git a/.github/workflows/test-universal-tokenization.yml b/.github/workflows/test-universal-tokenization.yml index 1d4af4eaa..0641b7e2b 100644 --- a/.github/workflows/test-universal-tokenization.yml +++ b/.github/workflows/test-universal-tokenization.yml @@ -14,6 +14,10 @@ on: paths: - "transforms/universal/tokenization/**" - "data-processing-lib/**" + - "!**.md" + - "!**/doc/**" + - "!**/images/**" + - "!**.gitignore" pull_request: branches: - "dev" @@ -21,6 +25,10 @@ on: paths: - "transforms/universal/tokenization/**" - "data-processing-lib/**" + - "!**.md" + - "!**/doc/**" + - "!**/images/**" + - "!**.gitignore" jobs: check_if_push_image: @@ -84,7 +92,7 @@ jobs: - name: Test transform image in transforms/universal/tokenization run: | if [ -e "transforms/universal/tokenization/Makefile" ]; then - make -C data-processing-lib/spark DOCKER=docker image + make -C data-processing-lib/spark DOCKER=docker image make -C transforms/universal/tokenization DOCKER=docker test-image else echo "transforms/universal/tokenization/Makefile not found - testing disabled for this transform." diff --git a/scripts/check-workflows.sh b/scripts/check-workflows.sh new file mode 100755 index 000000000..40f4e2615 --- /dev/null +++ b/scripts/check-workflows.sh @@ -0,0 +1,20 @@ +#!/bin/bash +# Check that each transform in transforms// has a corresponding +# .github/workflows/test--.yml file. +if [ ! -d transforms ]; then + echo Please run this script from the top of the repository + exit 1 +fi +for i in $(find transforms -maxdepth 2 -mindepth 2 -type d | grep -v venv); do + transform=$(basename $i) + category=$(dirname $i) + category=$(basename $category) + workflow=.github/workflows/test-$category-$transform.yml + if [ ! -e $workflow ]; then + echo Missing $workflow for transform $category/$transform + echo Fix this by running make in the .github/workflows directory + exit 1 + else + echo Verified existence of $workflow + fi +done