Update iree packages to nightly, fix name of test, create README

Signed-off-by: aviator19941 <[email protected]>
nod-ai · Nov 16, 2024 · 0510450 · 0510450
1 parent 1547655
commit 0510450
Show file tree

Hide file tree

Showing 5 changed files with 31 additions and 17 deletions.
diff --git a/.github/workflows/ci-llama-large-tests.yaml b/.github/workflows/ci-llama-large-tests.yaml
@@ -70,12 +70,12 @@ jobs:
 
 
           # Test with nightly releases, not what iree-turbine uses.
-          pip install --upgrade --pre --no-cache-dir -f https://iree.dev/pip-release-links.html \
+          pip install -f https://iree.dev/pip-release-links.html --upgrade --pre \
             iree-base-compiler \
             iree-base-runtime \
 
       - name: Run llama tests
-        run: pytest sharktank/tests/models/llama/benchmark_amdgpu_test.py -v -s --run-all-llama --iree-hip-target=gfx942 --html=out/index.html
+        run: pytest sharktank/tests/models/llama/benchmark_amdgpu_test.py -v -s --run-nightly-llama-tests --iree-hip-target=gfx942 --html=out/index.html
 
       - name: Deploy to GitHub Pages
         uses: peaceiris/actions-gh-pages@4f9cc6602d3f66b9c108549d475ec49e8ef4d45e # v4.0.0

diff --git a/.github/workflows/ci-llama-quick-tests.yaml b/.github/workflows/ci-llama-quick-tests.yaml
@@ -71,12 +71,12 @@ jobs:
 
 
           # Test with nightly releases, not what iree-turbine uses.
-          pip install --upgrade --pre --no-cache-dir -f https://iree.dev/pip-release-links.html \
+          pip install -f https://iree.dev/pip-release-links.html --upgrade --pre \
             iree-base-compiler \
             iree-base-runtime \
 
-      - name: Run llama 8b tests
-        run: pytest sharktank/tests/models/llama/benchmark_amdgpu_test.py -v -s --iree-hip-target=gfx942 --run-8b-llama
+      - name: Run llama 8b f16 decomposed test
+        run: pytest sharktank/tests/models/llama/benchmark_amdgpu_test.py -v -s --iree-hip-target=gfx942 --run-quick-llama-test
 
       - name: Upload llama executable files
         uses: actions/upload-artifact@v4

diff --git a/sharktank/conftest.py b/sharktank/conftest.py
@@ -73,17 +73,17 @@ def pytest_addoption(parser):
     )
 
     parser.addoption(
-        "--run-8b-llama",
+        "--run-quick-llama-test",
         action="store_true",
-        dest="run-8b-llama",
+        dest="--run-quick-llama-test",
         default=False,
-        help="Enable llama 8b benchmarking tests",
+        help="Enable llama 8b f16 decomposed benchmarking test",
     )
 
     parser.addoption(
-        "--run-all-llama",
+        "--run-nightly-llama-tests",
         action="store_true",
-        dest="run-all-llama",
+        dest="run-nightly-llama-tests",
         default=False,
         help="Enable all llama benchmarking tests",
     )

diff --git a/sharktank/tests/models/llama/README.md b/sharktank/tests/models/llama/README.md
@@ -0,0 +1,14 @@
+# How to run Llama 3.1 Benchmarking Tests
+In order to run Llama 3.1 8B F16 Decomposed test:
+```
+pytest sharktank/tests/models/llama/benchmark_amdgpu_test.py -v -s \
+    --run-quick-test --iree-hip-target=gfx942
+```
+
+In order to filter by test, use the -k option. If you
+wanted to only run the Llama 3.1 70B F16 Decomposed test:
+```
+pytest sharktank/tests/models/llama/benchmark_amdgpu_test.py -v -s \
+    --run-nightly-llama-tests --iree-hip-target=gfx942 \
+    -k 'testBenchmark70B_f16_TP8_Decomposed'
+```
diff --git a/sharktank/tests/models/llama/benchmark_amdgpu_test.py b/sharktank/tests/models/llama/benchmark_amdgpu_test.py
@@ -21,9 +21,9 @@
 )
 
 is_mi300x = pytest.mark.skipif("config.getoption('iree_hip_target') != 'gfx942'")
-skipif_run_8b_llama = pytest.mark.skipif(
-    'config.getoption("run-8b-llama") and not config.getoption("run-all-llama")',
-    reason="Skipping largs tests when --run-8b is set.",
+skipif_run_quick_llama_test = pytest.mark.skipif(
+    'config.getoption("run-quick-llama-test") and not config.getoption("run-nightly-llama-tests")',
+    reason="Skipping largs tests when --run-quick-llama-test is set.",
 )
 
 
@@ -180,7 +180,7 @@ def testBenchmark8B_f16_Decomposed(self):
             cwd=self.repo_root,
         )
 
-    @skipif_run_8b_llama
+    @skipif_run_quick_llama_test
     def testBenchmark8B_f16_Non_Decomposed_Prefill(self):
         output_file_name = self.dir_path_8b / "f16_torch_prefill"
         output_mlir = self.llama8b_f16_torch_sdpa_artifacts.create_file(
@@ -214,7 +214,7 @@ def testBenchmark8B_f16_Non_Decomposed_Prefill(self):
             cwd=self.repo_root,
         )
 
-    @skipif_run_8b_llama
+    @skipif_run_quick_llama_test
     @pytest.mark.xfail(reason="Compile Error", strict=True, raises=IreeCompileException)
     def testBenchmark8B_f16_Non_Decomposed(self):
         output_file_name = self.dir_path_8b / "f16_torch"
@@ -342,7 +342,7 @@ def testBenchmark8B_fp8_Non_Decomposed(self):
 
 
 @is_mi300x
-@skipif_run_8b_llama
+@skipif_run_quick_llama_test
 class BenchmarkLlama3_1_70B(BaseBenchmarkTest):
     def setUp(self):
         super().setUp()
@@ -622,7 +622,7 @@ def testBenchmark70B_fp8_TP8_Non_Decomposed(self):
 
 
 @is_mi300x
-@skipif_run_8b_llama
+@skipif_run_quick_llama_test
 class BenchmarkLlama3_1_405B(BaseBenchmarkTest):
     def setUp(self):
         super().setUp()