From f2be3fb3c799432c12f7a104523ce57580757975 Mon Sep 17 00:00:00 2001 From: bertiethorpe Date: Thu, 14 Nov 2024 11:33:48 +0000 Subject: [PATCH 1/4] remove cuda from CI and ofed from packer config --- .github/workflows/fatimage.yml | 10 ++-------- .github/workflows/nightlybuild.yml | 8 -------- .github/workflows/s3-image-sync.yml | 2 -- .github/workflows/trivyscan.yml | 2 +- packer/openstack.pkr.hcl | 4 ++-- 5 files changed, 5 insertions(+), 21 deletions(-) diff --git a/.github/workflows/fatimage.yml b/.github/workflows/fatimage.yml index 947f9410..37d402c7 100644 --- a/.github/workflows/fatimage.yml +++ b/.github/workflows/fatimage.yml @@ -26,10 +26,6 @@ jobs: - RL9 build: - openstack.openhpc - - openstack.openhpc-cuda - exclude: - - os_version: RL8 - build: openstack.openhpc-cuda env: ANSIBLE_FORCE_COLOR: True OS_CLOUD: openstack @@ -37,12 +33,10 @@ jobs: SOURCE_IMAGES_MAP: | { "RL8": { - "openstack.openhpc": "rocky-latest-RL8", - "openstack.openhpc-cuda": "rocky-latest-cuda-RL8" + "openstack.openhpc": "rocky-latest-RL8" }, "RL9": { - "openstack.openhpc": "rocky-latest-RL9", - "openstack.openhpc-cuda": "rocky-latest-cuda-RL9" + "openstack.openhpc": "rocky-latest-RL9" } } diff --git a/.github/workflows/nightlybuild.yml b/.github/workflows/nightlybuild.yml index 5e06a314..45b0e142 100644 --- a/.github/workflows/nightlybuild.yml +++ b/.github/workflows/nightlybuild.yml @@ -28,11 +28,6 @@ jobs: - RL9 build: - openstack.rocky-latest - - openstack.rocky-latest-cuda - exclude: - - os_version: RL8 - build: openstack.rocky-latest-cuda - env: ANSIBLE_FORCE_COLOR: True OS_CLOUD: openstack @@ -144,10 +139,7 @@ jobs: - RL9 image: - rocky-latest - - rocky-latest-cuda exclude: - - os_version: RL8 - image: rocky-latest-cuda - target_cloud: LEAFCLOUD env: OS_CLOUD: openstack diff --git a/.github/workflows/s3-image-sync.yml b/.github/workflows/s3-image-sync.yml index 0ffaae95..85e0c2fa 100644 --- a/.github/workflows/s3-image-sync.yml +++ b/.github/workflows/s3-image-sync.yml @@ -42,7 +42,6 @@ jobs: build: - RL8 - RL9 - - RL9-cuda env: ANSIBLE_FORCE_COLOR: True OS_CLOUD: openstack @@ -112,7 +111,6 @@ jobs: build: - RL8 - RL9 - - RL9-cuda exclude: - cloud: ${{ needs.image_upload.outputs.ci_cloud }} diff --git a/.github/workflows/trivyscan.yml b/.github/workflows/trivyscan.yml index d1c789a1..625a4746 100644 --- a/.github/workflows/trivyscan.yml +++ b/.github/workflows/trivyscan.yml @@ -16,7 +16,7 @@ jobs: strategy: fail-fast: false matrix: - build: ["RL8", "RL9", "RL9-cuda"] + build: ["RL8", "RL9"] env: JSON_PATH: environments/.stackhpc/terraform/cluster_image.auto.tfvars.json OS_CLOUD: openstack diff --git a/packer/openstack.pkr.hcl b/packer/openstack.pkr.hcl index fae0bf7b..00b40d24 100644 --- a/packer/openstack.pkr.hcl +++ b/packer/openstack.pkr.hcl @@ -153,8 +153,8 @@ variable "groups" { description = "Additional inventory groups (other than 'builder') to add build VM to, keyed by source name" default = { # fat image builds: - rocky-latest = ["update", "ofed"] - rocky-latest-cuda = ["update", "ofed", "cuda"] + rocky-latest = ["update"] + rocky-latest-cuda = ["update", "cuda"] openhpc = ["control", "compute", "login"] openhpc-cuda = ["control", "compute", "login"] } From 4a3f1958f5dea077160f82734c5870c740bbe7e9 Mon Sep 17 00:00:00 2001 From: bertiethorpe Date: Fri, 15 Nov 2024 09:57:11 +0000 Subject: [PATCH 2/4] remove all reference to cuda/ofed in CI and packer config --- .github/workflows/fatimage.yml | 13 ++++++------- .github/workflows/nightlybuild.yml | 9 ++++----- .github/workflows/trivyscan.yml | 4 ++-- packer/openstack.pkr.hcl | 20 ++------------------ 4 files changed, 14 insertions(+), 32 deletions(-) diff --git a/.github/workflows/fatimage.yml b/.github/workflows/fatimage.yml index 37d402c7..893a8f7e 100644 --- a/.github/workflows/fatimage.yml +++ b/.github/workflows/fatimage.yml @@ -15,17 +15,15 @@ jobs: openstack: name: openstack-imagebuild concurrency: - group: ${{ github.workflow }}-${{ github.ref }}-${{ matrix.os_version }}-${{ matrix.build }} # to branch/PR + OS + build + group: ${{ github.workflow }}-${{ github.ref }}-${{ matrix.os_version }} # to branch/PR + OS cancel-in-progress: true runs-on: ubuntu-22.04 strategy: fail-fast: false # allow other matrix jobs to continue even if one fails - matrix: # build RL8+OFED, RL9+OFED, RL9+OFED+CUDA versions + matrix: # build RL8, RL9 os_version: - RL8 - RL9 - build: - - openstack.openhpc env: ANSIBLE_FORCE_COLOR: True OS_CLOUD: openstack @@ -39,6 +37,7 @@ jobs: "openstack.openhpc": "rocky-latest-RL9" } } + BUILD: openstack.openhpc steps: - uses: actions/checkout@v2 @@ -84,13 +83,13 @@ jobs: PACKER_LOG=1 packer build \ -on-error=${{ vars.PACKER_ON_ERROR }} \ - -only=${{ matrix.build }} \ + -only=${{ env.BUILD }} \ -var-file=$PKR_VAR_environment_root/${{ env.CI_CLOUD }}.pkrvars.hcl \ -var "source_image_name=${{ env.SOURCE_IMAGE }}" \ openstack.pkr.hcl env: PKR_VAR_os_version: ${{ matrix.os_version }} - SOURCE_IMAGE: ${{ fromJSON(env.SOURCE_IMAGES_MAP)[matrix.os_version][matrix.build] }} + SOURCE_IMAGE: ${{ fromJSON(env.SOURCE_IMAGES_MAP)[matrix.os_version][env.BUILD] }} - name: Get created image names from manifest id: manifest @@ -107,7 +106,7 @@ jobs: - name: Upload manifest artifact uses: actions/upload-artifact@v4 with: - name: image-details-${{ matrix.build }}-${{ matrix.os_version }} + name: image-details-${{ env.BUILD }}-${{ matrix.os_version }} path: | ./image-id.txt ./image-name.txt diff --git a/.github/workflows/nightlybuild.yml b/.github/workflows/nightlybuild.yml index 45b0e142..4d51aa2f 100644 --- a/.github/workflows/nightlybuild.yml +++ b/.github/workflows/nightlybuild.yml @@ -17,17 +17,15 @@ jobs: openstack: name: openstack-imagebuild concurrency: - group: ${{ github.workflow }}-${{ github.ref }}-${{ matrix.os_version }}-${{ matrix.build }} # to branch/PR + OS + build + group: ${{ github.workflow }}-${{ github.ref }}-${{ matrix.os_version }} # to branch/PR + OS cancel-in-progress: true runs-on: ubuntu-22.04 strategy: fail-fast: false # allow other matrix jobs to continue even if one fails - matrix: # build RL8, RL9, RL9+CUDA versions + matrix: # build RL8, RL9 os_version: - RL8 - RL9 - build: - - openstack.rocky-latest env: ANSIBLE_FORCE_COLOR: True OS_CLOUD: openstack @@ -37,6 +35,7 @@ jobs: "RL8": "Rocky-8-GenericCloud-Base-8.9-20231119.0.x86_64.qcow2", "RL9": "Rocky-9-GenericCloud-Base-9.4-20240523.0.x86_64.qcow2" } + BUILD: openstack.openhpc steps: - uses: actions/checkout@v2 @@ -82,7 +81,7 @@ jobs: PACKER_LOG=1 packer build \ -on-error=${{ vars.PACKER_ON_ERROR }} \ - -only=${{ matrix.build }} \ + -only=${{ env.BUILD }} \ -var-file=$PKR_VAR_environment_root/${{ env.CI_CLOUD }}.pkrvars.hcl \ -var "source_image_name=${{ env.SOURCE_IMAGE }}" \ openstack.pkr.hcl diff --git a/.github/workflows/trivyscan.yml b/.github/workflows/trivyscan.yml index 625a4746..4c090b85 100644 --- a/.github/workflows/trivyscan.yml +++ b/.github/workflows/trivyscan.yml @@ -10,7 +10,7 @@ on: jobs: scan: concurrency: - group: ${{ github.workflow }}-${{ github.ref }}-${{ matrix.build }} # to branch/PR + OS + build + group: ${{ github.workflow }}-${{ github.ref }}-${{ matrix.build }} # to branch/PR + build cancel-in-progress: true runs-on: ubuntu-latest strategy: @@ -100,7 +100,7 @@ jobs: uses: github/codeql-action/upload-sarif@v3 with: sarif_file: "${{ steps.manifest.outputs.image-name }}.sarif" - category: "${{ matrix.os_version }}-${{ matrix.build }}" + category: "${{ matrix.build }}" - name: Fail if scan has CRITICAL vulnerabilities uses: aquasecurity/trivy-action@0.24.0 diff --git a/packer/openstack.pkr.hcl b/packer/openstack.pkr.hcl index 00b40d24..52202ead 100644 --- a/packer/openstack.pkr.hcl +++ b/packer/openstack.pkr.hcl @@ -127,15 +127,13 @@ variable "volume_size" { default = { # fat image builds, GB: rocky-latest = 15 - rocky-latest-cuda = 30 openhpc = 15 - openhpc-cuda = 30 } } variable "extra_build_volume_size" { type = number - default = 15 # same as default non-CUDA build + default = 15 } variable "image_disk_format" { @@ -154,9 +152,7 @@ variable "groups" { default = { # fat image builds: rocky-latest = ["update"] - rocky-latest-cuda = ["update", "cuda"] openhpc = ["control", "compute", "login"] - openhpc-cuda = ["control", "compute", "login"] } } @@ -210,24 +206,12 @@ build { image_name = "${source.name}-${var.os_version}" } - # latest nightly cuda image: - source "source.openstack.openhpc" { - name = "rocky-latest-cuda" - image_name = "${source.name}-${var.os_version}" - } - - # OFED fat image: + # fat image: source "source.openstack.openhpc" { name = "openhpc" image_name = "${source.name}-${var.os_version}-${local.timestamp}-${substr(local.git_commit, 0, 8)}" } - # CUDA fat image: - source "source.openstack.openhpc" { - name = "openhpc-cuda" - image_name = "${source.name}-${var.os_version}-${local.timestamp}-${substr(local.git_commit, 0, 8)}" - } - # Extended site-specific image, built on fat image: source "source.openstack.openhpc" { name = "openhpc-extra" From 097cdae12b9ffabd9fcc7965adeb5038a64619ab Mon Sep 17 00:00:00 2001 From: bertiethorpe Date: Fri, 15 Nov 2024 11:37:24 +0000 Subject: [PATCH 3/4] fix nightly building fatimage in ci, revert to matrix.build --- .github/workflows/fatimage.yml | 11 ++++++----- .github/workflows/nightlybuild.yml | 7 ++++--- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/.github/workflows/fatimage.yml b/.github/workflows/fatimage.yml index 893a8f7e..a8d3dbe2 100644 --- a/.github/workflows/fatimage.yml +++ b/.github/workflows/fatimage.yml @@ -15,7 +15,7 @@ jobs: openstack: name: openstack-imagebuild concurrency: - group: ${{ github.workflow }}-${{ github.ref }}-${{ matrix.os_version }} # to branch/PR + OS + group: ${{ github.workflow }}-${{ github.ref }}-${{ matrix.os_version }}-${{ matrix.build }} # to branch/PR + OS + build cancel-in-progress: true runs-on: ubuntu-22.04 strategy: @@ -24,6 +24,8 @@ jobs: os_version: - RL8 - RL9 + build: + - openstack.openhpc env: ANSIBLE_FORCE_COLOR: True OS_CLOUD: openstack @@ -37,7 +39,6 @@ jobs: "openstack.openhpc": "rocky-latest-RL9" } } - BUILD: openstack.openhpc steps: - uses: actions/checkout@v2 @@ -83,13 +84,13 @@ jobs: PACKER_LOG=1 packer build \ -on-error=${{ vars.PACKER_ON_ERROR }} \ - -only=${{ env.BUILD }} \ + -only=${{ matrix.build }} \ -var-file=$PKR_VAR_environment_root/${{ env.CI_CLOUD }}.pkrvars.hcl \ -var "source_image_name=${{ env.SOURCE_IMAGE }}" \ openstack.pkr.hcl env: PKR_VAR_os_version: ${{ matrix.os_version }} - SOURCE_IMAGE: ${{ fromJSON(env.SOURCE_IMAGES_MAP)[matrix.os_version][env.BUILD] }} + SOURCE_IMAGE: ${{ fromJSON(env.SOURCE_IMAGES_MAP)[matrix.os_version][matrix.build] }} - name: Get created image names from manifest id: manifest @@ -106,7 +107,7 @@ jobs: - name: Upload manifest artifact uses: actions/upload-artifact@v4 with: - name: image-details-${{ env.BUILD }}-${{ matrix.os_version }} + name: image-details-${{ matrix.build }}-${{ matrix.os_version }} path: | ./image-id.txt ./image-name.txt diff --git a/.github/workflows/nightlybuild.yml b/.github/workflows/nightlybuild.yml index 4d51aa2f..da3de4ea 100644 --- a/.github/workflows/nightlybuild.yml +++ b/.github/workflows/nightlybuild.yml @@ -17,7 +17,7 @@ jobs: openstack: name: openstack-imagebuild concurrency: - group: ${{ github.workflow }}-${{ github.ref }}-${{ matrix.os_version }} # to branch/PR + OS + group: ${{ github.workflow }}-${{ github.ref }}-${{ matrix.os_version }}-${{ matrix.build }} # to branch/PR + OS + build cancel-in-progress: true runs-on: ubuntu-22.04 strategy: @@ -26,6 +26,8 @@ jobs: os_version: - RL8 - RL9 + build: + - openstack.rocky-latest env: ANSIBLE_FORCE_COLOR: True OS_CLOUD: openstack @@ -35,7 +37,6 @@ jobs: "RL8": "Rocky-8-GenericCloud-Base-8.9-20231119.0.x86_64.qcow2", "RL9": "Rocky-9-GenericCloud-Base-9.4-20240523.0.x86_64.qcow2" } - BUILD: openstack.openhpc steps: - uses: actions/checkout@v2 @@ -81,7 +82,7 @@ jobs: PACKER_LOG=1 packer build \ -on-error=${{ vars.PACKER_ON_ERROR }} \ - -only=${{ env.BUILD }} \ + -only=${{ matrix.build }} \ -var-file=$PKR_VAR_environment_root/${{ env.CI_CLOUD }}.pkrvars.hcl \ -var "source_image_name=${{ env.SOURCE_IMAGE }}" \ openstack.pkr.hcl From eaa40f6d9ef69f2187d223f6be69156544f22eff Mon Sep 17 00:00:00 2001 From: bertiethorpe Date: Fri, 15 Nov 2024 13:02:08 +0000 Subject: [PATCH 4/4] bump images --- .../.stackhpc/terraform/cluster_image.auto.tfvars.json | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/environments/.stackhpc/terraform/cluster_image.auto.tfvars.json b/environments/.stackhpc/terraform/cluster_image.auto.tfvars.json index cca77908..87f5c46c 100644 --- a/environments/.stackhpc/terraform/cluster_image.auto.tfvars.json +++ b/environments/.stackhpc/terraform/cluster_image.auto.tfvars.json @@ -1,7 +1,6 @@ { "cluster_image": { - "RL8": "openhpc-RL8-241114-1531-6f0a3a02", - "RL9": "openhpc-RL9-241114-1531-6f0a3a02", - "RL9-cuda": "openhpc-cuda-RL9-241114-1531-6f0a3a02" + "RL8": "openhpc-RL8-241115-1209-097cdae1", + "RL9": "openhpc-RL9-241115-1209-097cdae1" } }