Skip to content

nightly-build-and-test #107

nightly-build-and-test

nightly-build-and-test #107

name: nightly-build-and-test
on:
workflow_dispatch:
schedule:
- cron: '30 5 * * *' # Daily 5:30 AM UTC / 12:30 AM EST
defaults:
run:
shell: bash
permissions:
contents: read
jobs:
sdk-build-and-test:
name: pti-sdk
#
# pti is a selector to machines with Intel Ponte Vecchio officially
# Intel(R) Data Center GPU Max 1100
#
runs-on: [self-hosted, Linux, pti-2gpu]
continue-on-error: true
strategy:
max-parallel: 1
matrix:
include:
- container: ${{ vars.PTI_DOCKER_RHEL_9_ONEAPI_24_2_1 }}
uploadname: "rhel-9.2024.2.1"
preset: linux-icpx-release
- container: ${{ vars.PTI_DOCKER_SLES_15_3_ONEAPI_24_2_1 }}
uploadname: "sles-15.3.2024.2.1"
preset: linux-icpx-release
- container: ${{ vars.PTI_DOCKER_UBUNTU_24_4_ONEAPI_24_2_1 }}
uploadname: "ubuntu-24.04.2024.2.1"
preset: linux-icpx-release
- container: ${{ vars.PTI_DOCKER_ROCKY_8_ONEAPI_24_2_1 }}
uploadname: "rocky-8.2024.2.1"
preset: linux-icpx-release
container:
image: ${{ matrix.container }}
options: --device=/dev/dri --cap-add CAP_PERFMON ${{ vars.PTI_DOCKER_PROXIES }}
if: vars.PTI_RUN_TESTS == 1
steps:
- name: Clean-up
run: rm -rf *
- name: Checkout
uses: actions/checkout@v4
- name: Build
working-directory: sdk
run: |
source /opt/intel/oneapi/setvars.sh
cmake --preset ${{ matrix.preset }}
cmake --build --preset ${{ matrix.preset }} -j $(($(nproc)/2))
- name: Test
working-directory: sdk
run: |
source /opt/intel/oneapi/setvars.sh
ctest --output-on-failure --preset ${{ matrix.preset }}
- name: Build AddressSanitizer
working-directory: sdk
run: |
source /opt/intel/oneapi/setvars.sh
cmake --preset linux-asan
cmake --build --preset linux-asan --parallel $(($(nproc)/2))
- name: Build ThreadSanitizer
working-directory: sdk
run: |
source /opt/intel/oneapi/setvars.sh
cmake --preset linux-tsan
cmake --build --preset linux-tsan --parallel $(($(nproc)/2))
- name: Build libFuzzer
working-directory: sdk
run: |
# To ensure it still builds, run build for fuzz targets until we have
# proper fuzz testing infrastructure in place.
source /opt/intel/oneapi/setvars.sh
cmake --preset linux-fuzz
cmake --build --preset linux-fuzz --parallel $(($(nproc)/2))
#
# Rocky8 and Red Hat has a problem with this test.
#
- name: Test AddressSanitizer
working-directory: sdk
run: |
source /opt/intel/oneapi/setvars.sh
#
# Must skip Rocky8 with more than one GPU because of
# an unexpected failure as documented in PTI-74
#
if [ $(sycl-ls|grep 'ext_oneapi_level_zero:gpu:' -c) -gt 1 ] || \
[[ $(cat /etc/redhat-release) =~ Rocky.*8 ]] || \
[[ $(cat /etc/redhat-release) =~ Red\ Hat.*9 ]]; then
[ -f /etc/redhat-release] && cat /etc/redhat-release
echo "Skipping Test AddressSanitizer"
exit 0
fi
ctest --preset linux-asan --output-on-failure -L samples
- name: Test ThreadSanitizer
working-directory: sdk
run: |
source /opt/intel/oneapi/setvars.sh
ctest --preset linux-tsan --output-on-failure -L samples
- name: Package SDK
working-directory: sdk
run: |
source /opt/intel/oneapi/setvars.sh
cpack -G TGZ --config build-${{ matrix.preset }}/CPackConfig.cmake
- name: Upload SDK
uses: actions/upload-artifact@v4
with:
name: "PTI-SDK-PKG-${{ matrix.preset }}-${{ matrix.uploadname }}"
path: |
sdk/pti*.tar.gz
sdk/pti*.tar.gz*
pti-tools-build-and-test:
if: ${{ always() }} && vars.PTI_RUN_TESTS == 1
needs: sdk-build-and-test
name: Test pti library Ubuntu-22-04
runs-on: [Linux, pti-2gpu]
continue-on-error: true
strategy:
max-parallel: 1
matrix:
include:
- container: ${{ vars.PTI_DOCKER_IMAGE_2024_1_1 }}
- container: ${{ vars.PTI_DOCKER_IMAGE_2024_1_0 }}
container:
image: ${{ matrix.container }}
options: --device=/dev/dri --cap-add CAP_PERFMON
steps:
- name: Clean-up
run: rm -rf *
- name: Checkout
uses: actions/checkout@v4
- name: Build-and-test-unitrace
run: |
source /opt/intel/oneapi/setvars.sh
python ./tests/run.py -s unitrace
- name: Build-and-test-onetrace
run: |
source /opt/intel/oneapi/setvars.sh
python ./tests/run.py -s onetrace
- name: Build-and-test-oneprof
run: |
source /opt/intel/oneapi/setvars.sh
#
# Must with more than one GPU because of
# an random failures as documented in PTI-75
#
if [ $(sycl-ls|grep 'ext_oneapi_level_zero:gpu:' -c) -gt 1 ]; then
exit 0
fi
python ./tests/run.py -s oneprof
- name: Build-and-test-sysmon
run: |
source /opt/intel/oneapi/setvars.sh
python ./tests/run.py -s sysmon
- name: Build-and-test-cl_gpu_metrics
run: |
source /opt/intel/oneapi/setvars.sh
python ./tests/run.py -s cl_gpu_metrics
- name: Build-and-test-instcount
run: |
source /opt/intel/oneapi/setvars.sh
python ./tests/run.py -s instcount
- name: Build-and-test-ze_debug_info
run: |
source /opt/intel/oneapi/setvars.sh
python ./tests/run.py -s ze_debug_info
- name: Build-and-test-cl_debug_info
run: |
source /opt/intel/oneapi/setvars.sh
python ./tests/run.py -s cl_debug_info
pti-tools-build-and-test-rocky:
name: Test pti library for Rocky8
if: ${{ always() }} && vars.PTI_RUN_TESTS == 1
needs: pti-tools-build-and-test
runs-on: [Linux, pti-2gpu]
strategy:
matrix:
include:
- container: ${{ vars.PTI_DOCKER_IMAGE_ROCKY8_2024_1_1 }}
container:
image: ${{ matrix.container }}
options: --device=/dev/dri --cap-add CAP_PERFMON
steps:
- name: Clean-up
run: rm -rf *
- name: Checkout
uses: actions/checkout@v4
- name: Build-and-test-unitrace
run: |
source /opt/intel/oneapi/setvars.sh
source /opt/rh/gcc-toolset-13/enable
python3.11 ./tests/run.py -s unitrace
- name: Build-and-test-onetrace
run: |
source /opt/intel/oneapi/setvars.sh
source /opt/rh/gcc-toolset-13/enable
python3.11 ./tests/run.py -s onetrace
- name: Build-and-test-oneprof
run: |
source /opt/intel/oneapi/setvars.sh
source /opt/rh/gcc-toolset-13/enable
python3.11 ./tests/run.py -s oneprof
- name: Build-and-test-sysmon
run: |
source /opt/intel/oneapi/setvars.sh
source /opt/rh/gcc-toolset-13/enable
python3.11 ./tests/run.py -s sysmon
- name: Build-and-test-cl_gpu_metrics
run: |
source /opt/intel/oneapi/setvars.sh
source /opt/rh/gcc-toolset-13/enable
python3.11 ./tests/run.py -s cl_gpu_metrics
#
# Fix!!
# A problem with GLIBCXX_3.4.26 as noted in VASP-30704
#
- name: Build-and-test-instcount
if: vars.PTI_DOCKER_IMAGE_ROCKY8_2024_1_1 != matrix.container
run: |
source /opt/intel/oneapi/setvars.sh
source /opt/rh/gcc-toolset-13/enable
python3.11 ./tests/run.py -s instcount