From e3513b569929b9557f0f76b92a37923bb08780d0 Mon Sep 17 00:00:00 2001 From: Jean-Francois Panisset Date: Fri, 23 Jun 2023 10:10:10 -0700 Subject: [PATCH 01/10] Check for Optix install Signed-off-by: Jean-Francois Panisset --- .github/workflows/gpu_test.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/gpu_test.yml b/.github/workflows/gpu_test.yml index 103d92e..02b5cfd 100644 --- a/.github/workflows/gpu_test.yml +++ b/.github/workflows/gpu_test.yml @@ -26,3 +26,5 @@ jobs: sudo apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/3bf863cc.pub time sudo apt-get update sudo apt -y install libnpp-11-8 + dpkg -l | grep -i nvidia + ls -l /usr/lib/x86_64-linux-gnu/libnvoptix.so.* From b556b4ff8d4b46dcc514d811def26b7d3eae0df4 Mon Sep 17 00:00:00 2001 From: Jean-Francois Panisset Date: Fri, 23 Jun 2023 10:20:27 -0700 Subject: [PATCH 02/10] Only look at what we need Signed-off-by: Jean-Francois Panisset --- .github/workflows/gpu_test.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/gpu_test.yml b/.github/workflows/gpu_test.yml index 02b5cfd..08c8967 100644 --- a/.github/workflows/gpu_test.yml +++ b/.github/workflows/gpu_test.yml @@ -19,12 +19,12 @@ jobs: echo '#### nvidia-docker.list ####' cat /etc/apt/sources.list.d/nvidia-docker.list echo '### Get rid of cuda.list.save ###' - rm /etc/apt/sources.list.d/cuda.list.save + #sudo rm /etc/apt/sources.list.d/cuda.list.save #sudo apt-key del 7fa2af80 #wget http://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-keyring_1.0-1_all.deb #sudo dpkg -i cuda-keyring_1.0-1_all.deb - sudo apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/3bf863cc.pub - time sudo apt-get update - sudo apt -y install libnpp-11-8 + #sudo apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/3bf863cc.pub + #sudo apt-get update + #sudo apt -y install libnpp-11-8 dpkg -l | grep -i nvidia ls -l /usr/lib/x86_64-linux-gnu/libnvoptix.so.* From 3beaa1b2544c1792b2d030a7917d4a695583685b Mon Sep 17 00:00:00 2001 From: Jean-Francois Panisset Date: Fri, 23 Jun 2023 10:57:46 -0700 Subject: [PATCH 03/10] Print out driver version Signed-off-by: Jean-Francois Panisset --- .github/workflows/gpu_test.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/gpu_test.yml b/.github/workflows/gpu_test.yml index 08c8967..1e230ae 100644 --- a/.github/workflows/gpu_test.yml +++ b/.github/workflows/gpu_test.yml @@ -26,5 +26,6 @@ jobs: #sudo apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/3bf863cc.pub #sudo apt-get update #sudo apt -y install libnpp-11-8 + cat /proc/driver/nvidia/version dpkg -l | grep -i nvidia ls -l /usr/lib/x86_64-linux-gnu/libnvoptix.so.* From 32aec6811436031293122ea473eb7d4a88262616 Mon Sep 17 00:00:00 2001 From: Jean-Francois Panisset Date: Wed, 19 Jul 2023 18:22:19 -0700 Subject: [PATCH 04/10] What GPU do we have? Signed-off-by: Jean-Francois Panisset --- .github/workflows/gpu_test.yml | 54 +++++++++++++++++++++------------- 1 file changed, 33 insertions(+), 21 deletions(-) diff --git a/.github/workflows/gpu_test.yml b/.github/workflows/gpu_test.yml index 1e230ae..e7e4b8e 100644 --- a/.github/workflows/gpu_test.yml +++ b/.github/workflows/gpu_test.yml @@ -7,25 +7,37 @@ on: jobs: ubuntu-gpu: runs-on: ubuntu-20.04-gpu-6c-112g-336h-16vr + container: + image: aswf/ci-common:3 + env: + - DISPLAY: $DISPLAY + - QT_X11_NO_MITSHM: 1 + volumes: + - /tmp/.X11-unix:/tmp/.X11-unix + options: --gpus all steps: - - name: Update APT repo for CUDA - run: | - cat /etc/apt/sources.list - ls -l /etc/apt/sources.list.d/ - echo '#### cuda.list ####' - cat /etc/apt/sources.list.d/cuda.list - echo '#### cuda.list.save ####' - cat /etc/apt/sources.list.d/cuda.list.save - echo '#### nvidia-docker.list ####' - cat /etc/apt/sources.list.d/nvidia-docker.list - echo '### Get rid of cuda.list.save ###' - #sudo rm /etc/apt/sources.list.d/cuda.list.save - #sudo apt-key del 7fa2af80 - #wget http://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-keyring_1.0-1_all.deb - #sudo dpkg -i cuda-keyring_1.0-1_all.deb - #sudo apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/3bf863cc.pub - #sudo apt-get update - #sudo apt -y install libnpp-11-8 - cat /proc/driver/nvidia/version - dpkg -l | grep -i nvidia - ls -l /usr/lib/x86_64-linux-gnu/libnvoptix.so.* + - name: run glxinfo inside container + run: | + nvidia-smi + glxinfo +# - name: Update APT repo for CUDA +# run: | +# cat /etc/apt/sources.list +# ls -l /etc/apt/sources.list.d/ +# echo '#### cuda.list ####' +# cat /etc/apt/sources.list.d/cuda.list +# echo '#### cuda.list.save ####' +# cat /etc/apt/sources.list.d/cuda.list.save +# echo '#### nvidia-docker.list ####' +# cat /etc/apt/sources.list.d/nvidia-docker.list +# echo '### Get rid of cuda.list.save ###' +# #sudo rm /etc/apt/sources.list.d/cuda.list.save +# #sudo apt-key del 7fa2af80 +# #wget http://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-keyring_1.0-1_all.deb +# #sudo dpkg -i cuda-keyring_1.0-1_all.deb +# #sudo apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/3bf863cc.pub +# #sudo apt-get update +# #sudo apt -y install libnpp-11-8 +# cat /proc/driver/nvidia/version +# dpkg -l | grep -i nvidia +# ls -l /usr/lib/x86_64-linux-gnu/libnvoptix.so.* From b5e7733d60db97c05029c1fb07d4fa0593ba0ab6 Mon Sep 17 00:00:00 2001 From: Jean-Francois Panisset Date: Wed, 19 Jul 2023 18:28:57 -0700 Subject: [PATCH 05/10] multi env var syntax Signed-off-by: Jean-Francois Panisset --- .github/workflows/gpu_test.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/gpu_test.yml b/.github/workflows/gpu_test.yml index e7e4b8e..6461924 100644 --- a/.github/workflows/gpu_test.yml +++ b/.github/workflows/gpu_test.yml @@ -10,8 +10,8 @@ jobs: container: image: aswf/ci-common:3 env: - - DISPLAY: $DISPLAY - - QT_X11_NO_MITSHM: 1 + DISPLAY: $DISPLAY + QT_X11_NO_MITSHM: 1 volumes: - /tmp/.X11-unix:/tmp/.X11-unix options: --gpus all From 12a07b5d2b524473196c4f07baeba2eb93b33749 Mon Sep 17 00:00:00 2001 From: Jean-Francois Panisset Date: Thu, 20 Jul 2023 12:55:35 -0700 Subject: [PATCH 06/10] Are GPU runners now running Ubuntu 22.04? Signed-off-by: Jean-Francois Panisset --- .github/workflows/gpu_test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/gpu_test.yml b/.github/workflows/gpu_test.yml index 6461924..332a6f9 100644 --- a/.github/workflows/gpu_test.yml +++ b/.github/workflows/gpu_test.yml @@ -6,7 +6,7 @@ on: pull_request: jobs: ubuntu-gpu: - runs-on: ubuntu-20.04-gpu-6c-112g-336h-16vr + runs-on: ubuntu-22.04-gpu-6c-112g-336h-16vr container: image: aswf/ci-common:3 env: From 541b6939536f3da66e38730281e55c6582769a44 Mon Sep 17 00:00:00 2001 From: Jean-Francois Panisset Date: Sat, 22 Jul 2023 09:39:00 -0700 Subject: [PATCH 07/10] Ubuntu 22.04 is correct, use the right container Signed-off-by: Jean-Francois Panisset --- .github/workflows/gpu_test.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/gpu_test.yml b/.github/workflows/gpu_test.yml index 332a6f9..6031d2e 100644 --- a/.github/workflows/gpu_test.yml +++ b/.github/workflows/gpu_test.yml @@ -6,9 +6,9 @@ on: pull_request: jobs: ubuntu-gpu: - runs-on: ubuntu-22.04-gpu-6c-112g-336h-16vr + runs-on: ubuntu-20.04-gpu-6c-112g-336h-16vr container: - image: aswf/ci-common:3 + image: aswf/ci-common:3-clang15 env: DISPLAY: $DISPLAY QT_X11_NO_MITSHM: 1 From 2020daacf53b201cee8de90e175ccd517796134c Mon Sep 17 00:00:00 2001 From: Jean-Francois Panisset Date: Sat, 22 Jul 2023 09:44:17 -0700 Subject: [PATCH 08/10] Try larger instance, no container Signed-off-by: Jean-Francois Panisset --- .github/workflows/gpu_test.yml | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/.github/workflows/gpu_test.yml b/.github/workflows/gpu_test.yml index 6031d2e..7616a5c 100644 --- a/.github/workflows/gpu_test.yml +++ b/.github/workflows/gpu_test.yml @@ -6,20 +6,21 @@ on: pull_request: jobs: ubuntu-gpu: - runs-on: ubuntu-20.04-gpu-6c-112g-336h-16vr - container: - image: aswf/ci-common:3-clang15 - env: - DISPLAY: $DISPLAY - QT_X11_NO_MITSHM: 1 - volumes: - - /tmp/.X11-unix:/tmp/.X11-unix - options: --gpus all +# runs-on: ubuntu-20.04-gpu-6c-112g-336h-16vr + runs-on: ubuntu-20.04-gpu-12c-224g-672h-32vr +# container: +# image: aswf/ci-common:3-clang15 +# env: +# DISPLAY: $DISPLAY +# QT_X11_NO_MITSHM: 1 +# volumes: +# - /tmp/.X11-unix:/tmp/.X11-unix +# options: --gpus all steps: - name: run glxinfo inside container run: | nvidia-smi - glxinfo +# glxinfo # - name: Update APT repo for CUDA # run: | # cat /etc/apt/sources.list From 163c315b3d1b3190524df01f1130280d45023213 Mon Sep 17 00:00:00 2001 From: Jean-Francois Panisset Date: Tue, 28 May 2024 23:23:14 -0700 Subject: [PATCH 09/10] Pick up to date instance Signed-off-by: Jean-Francois Panisset --- .github/workflows/gpu_test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/gpu_test.yml b/.github/workflows/gpu_test.yml index 7616a5c..7c46e00 100644 --- a/.github/workflows/gpu_test.yml +++ b/.github/workflows/gpu_test.yml @@ -7,7 +7,7 @@ on: jobs: ubuntu-gpu: # runs-on: ubuntu-20.04-gpu-6c-112g-336h-16vr - runs-on: ubuntu-20.04-gpu-12c-224g-672h-32vr + runs-on: ubuntu-20.04-gpu-8c-32g-300h # container: # image: aswf/ci-common:3-clang15 # env: From 7092717df8862ec9397915d78cc0eb377cc7f919 Mon Sep 17 00:00:00 2001 From: Jean-Francois Panisset Date: Tue, 28 May 2024 23:25:34 -0700 Subject: [PATCH 10/10] Pick the right GPU runner Signed-off-by: Jean-Francois Panisset --- .github/workflows/gpu_test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/gpu_test.yml b/.github/workflows/gpu_test.yml index 7c46e00..6e72858 100644 --- a/.github/workflows/gpu_test.yml +++ b/.github/workflows/gpu_test.yml @@ -7,7 +7,7 @@ on: jobs: ubuntu-gpu: # runs-on: ubuntu-20.04-gpu-6c-112g-336h-16vr - runs-on: ubuntu-20.04-gpu-8c-32g-300h + runs-on: ubuntu-20.04-gpu-t4-4c-16g-176h # container: # image: aswf/ci-common:3-clang15 # env: