From a8803448e647261ea4c29a9937922f4c832bec27 Mon Sep 17 00:00:00 2001
From: crivella <davide.grassano@epfl.ch>
Date: Mon, 15 Jul 2024 11:15:04 +0200
Subject: [PATCH 01/11] Added ReFrame test for MetalWalls

---
 eessi/testsuite/tests/apps/MetalWalls.py | 103 +++++++++++++++++++++++
 1 file changed, 103 insertions(+)
 create mode 100644 eessi/testsuite/tests/apps/MetalWalls.py

diff --git a/eessi/testsuite/tests/apps/MetalWalls.py b/eessi/testsuite/tests/apps/MetalWalls.py
new file mode 100644
index 00000000..f3ba1b7a
--- /dev/null
+++ b/eessi/testsuite/tests/apps/MetalWalls.py
@@ -0,0 +1,103 @@
+"""
+This module tests the binary 'mw' in available modules containing substring 'MetalWalls'.
+Test input files are defined in MetalWalls's repo under hackathonGPU/benchmark*,
+see https://github.com/reframe-hpc/reframe/blob/develop/hpctestlib/sciapps/qespresso/benchmarks.py
+
+ReFrame terminology:
+
+"pipeline stages":
+https://reframe-hpc.readthedocs.io/en/stable/regression_test_api.html#pipeline-hooks
+
+"test parameter": a list of values, which will generate different test variants.
+https://reframe-hpc.readthedocs.io/en/stable/regression_test_api.html#reframe.core.builtins.parameter
+
+"test variant": a version of a test with a specific value for each test parameter
+https://reframe-hpc.readthedocs.io/en/stable/regression_test_api.html#test-variants
+
+"concrete test cases": all test combinations that will actually run:
+- test variants
+- valid system:partition+programming environment combinations
+https://reframe-hpc.readthedocs.io/en/stable/tutorial_deps.html#listing-dependencies
+
+Tests can be filtered by name, tag, programming environment, system, partition, or maintainer,
+see https://reframe-hpc.readthedocs.io/en/stable/manpage.html#test-filtering
+
+Hooks acting on all possible test combinations (before filtering) are called after the 'init' stage.
+Hooks acting on concrete test cases (after filtering) are called after the 'setup' stage.
+
+See also https://reframe-hpc.readthedocs.io/en/stable/pipeline.html
+"""
+import reframe as rfm
+from hpctestlib.sciapps.metalwalls.benchmarks import MetalWallsCheck
+from reframe.core.builtins import run_after
+from reframe.core.parameters import TestParam as parameter
+
+from eessi.testsuite import hooks
+from eessi.testsuite.constants import (COMPUTE_UNIT, CPU, DEVICE_TYPES, GPU,
+                                       SCALES, TAGS)
+from eessi.testsuite.utils import find_modules, log
+
+
+@rfm.simple_test
+class EESSI_MetalWalls_MW(MetalWallsCheck):
+    """MetalWalls benchmark tests.
+
+    `MetalWalls <https://gitlab.com/ampere2/metalwalls>`__ """
+
+    scale = parameter(SCALES.keys())
+
+    valid_systems = ['*']
+    valid_prog_environs = ['default']
+    time_limit = '30m'
+
+    module_name = parameter(find_modules('MetalWalls'))
+    # For now, MetalWalls is being build for CPU targets only
+    # compute_device = parameter([DEVICE_TYPES[CPU], DEVICE_TYPES[GPU]])
+    compute_device = parameter([DEVICE_TYPES[CPU], ])
+
+    @run_after('init')
+    def run_after_init(self):
+        """Hooks to run after the init phase"""
+
+        # Filter on which scales are supported by the partitions defined in the ReFrame configuration
+        hooks.filter_supported_scales(self)
+
+        # Make sure that GPU tests run in partitions that support running on a GPU,
+        # and that CPU-only tests run in partitions that support running CPU-only.
+        # Also support setting valid_systems on the cmd line.
+        hooks.filter_valid_systems_by_device_type(self, required_device_type=self.compute_device)
+
+        # Support selecting modules on the cmd line.
+        hooks.set_modules(self)
+
+        # Support selecting scales on the cmd line via tags.
+        hooks.set_tag_scale(self)
+
+    @run_after('init')
+    def set_tag_ci(self):
+        """Set tag CI on smallest benchmark, so it can be selected on the cmd line via --tag CI"""
+        if self.benchmark_info[0] == 'hackathonGPU/benchmark':
+            self.tags.add(TAGS['CI'])
+            log(f'tags set to {self.tags}')
+
+    @run_after('setup')
+    def run_after_setup(self):
+        """Hooks to run after the setup phase"""
+
+        # Calculate default requested resources based on the scale:
+        # 1 task per CPU for CPU-only tests, 1 task per GPU for GPU tests.
+        # Also support setting the resources on the cmd line.
+        if self.compute_device == DEVICE_TYPES[GPU]:
+            hooks.assign_tasks_per_compute_unit(test=self, compute_unit=COMPUTE_UNIT[GPU])
+        else:
+            hooks.assign_tasks_per_compute_unit(test=self, compute_unit=COMPUTE_UNIT[CPU])
+
+    @run_after('setup')
+    def set_omp_num_threads(self):
+        """
+        Set number of OpenMP threads via OMP_NUM_THREADS.
+        Set default number of OpenMP threads equal to number of CPUs per task.
+        """
+
+        self.env_vars['OMP_NUM_THREADS'] = self.num_cpus_per_task
+        log(f'env_vars set to {self.env_vars}')
\ No newline at end of file

From 66262819622d01f0a9f255e17b6e99ed2f6f3365 Mon Sep 17 00:00:00 2001
From: crivella <davide.grassano@epfl.ch>
Date: Mon, 15 Jul 2024 12:34:47 +0200
Subject: [PATCH 02/11] PEP

---
 eessi/testsuite/tests/apps/MetalWalls.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/eessi/testsuite/tests/apps/MetalWalls.py b/eessi/testsuite/tests/apps/MetalWalls.py
index f3ba1b7a..76fd6d3e 100644
--- a/eessi/testsuite/tests/apps/MetalWalls.py
+++ b/eessi/testsuite/tests/apps/MetalWalls.py
@@ -100,4 +100,4 @@ def set_omp_num_threads(self):
         """
 
         self.env_vars['OMP_NUM_THREADS'] = self.num_cpus_per_task
-        log(f'env_vars set to {self.env_vars}')
\ No newline at end of file
+        log(f'env_vars set to {self.env_vars}')

From 06f90c96b9c7e6f01a470a4122cbb4b68117e881 Mon Sep 17 00:00:00 2001
From: crivella <davide.grassano@epfl.ch>
Date: Thu, 8 Aug 2024 17:02:58 +0200
Subject: [PATCH 03/11] Added process binding and memory requirements

---
 eessi/testsuite/tests/apps/MetalWalls.py | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/eessi/testsuite/tests/apps/MetalWalls.py b/eessi/testsuite/tests/apps/MetalWalls.py
index 76fd6d3e..94c9fed9 100644
--- a/eessi/testsuite/tests/apps/MetalWalls.py
+++ b/eessi/testsuite/tests/apps/MetalWalls.py
@@ -92,6 +92,18 @@ def run_after_setup(self):
         else:
             hooks.assign_tasks_per_compute_unit(test=self, compute_unit=COMPUTE_UNIT[CPU])
 
+    @run_after('setup')
+    def set_binding(self):
+        hooks.set_compact_process_binding(self)
+
+    @run_after('setup')
+    def request_mem(self):
+        mem_per_task = 0.4
+        if self.benchmark_info[0] == 'hackathonGPU/benchmark5':
+            mem_per_task = 1.2
+        memory_required = self.num_tasks_per_node * mem_per_task + 2
+        hooks.req_memory_per_node(test=self, app_mem_req=memory_required * 1024)
+
     @run_after('setup')
     def set_omp_num_threads(self):
         """

From 77fa436428373ae10bc916890918153106aa7a33 Mon Sep 17 00:00:00 2001
From: crivella <davide.grassano@epfl.ch>
Date: Thu, 29 Aug 2024 13:41:39 +0200
Subject: [PATCH 04/11] Docstrings and skip test if corecnt > 256

---
 eessi/testsuite/tests/apps/MetalWalls.py | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/eessi/testsuite/tests/apps/MetalWalls.py b/eessi/testsuite/tests/apps/MetalWalls.py
index 94c9fed9..9a2af792 100644
--- a/eessi/testsuite/tests/apps/MetalWalls.py
+++ b/eessi/testsuite/tests/apps/MetalWalls.py
@@ -94,16 +94,27 @@ def run_after_setup(self):
 
     @run_after('setup')
     def set_binding(self):
+        """Set binding to compact to improve performance reproducibility."""
         hooks.set_compact_process_binding(self)
 
     @run_after('setup')
     def request_mem(self):
+        """Request memory per node based on the benchmark."""
         mem_per_task = 0.4
         if self.benchmark_info[0] == 'hackathonGPU/benchmark5':
             mem_per_task = 1.2
         memory_required = self.num_tasks_per_node * mem_per_task + 2
         hooks.req_memory_per_node(test=self, app_mem_req=memory_required * 1024)
 
+    @run_after('setup')
+    def skip_max_corecnt(self):
+        """Skip tests if number of tasks per node exceeds maximum core count."""
+        max_corecnt = 256
+        self.skip_if(
+            self.num_tasks > max_corecnt,
+            f'Number of tasks per node {self.num_tasks} exceeds maximum core count {max_corecnt} for {self.bench_name}'
+        )
+
     @run_after('setup')
     def set_omp_num_threads(self):
         """

From 9aeeb9961f24eca35885078ada995b6913607453 Mon Sep 17 00:00:00 2001
From: crivella <davide.grassano@epfl.ch>
Date: Tue, 3 Sep 2024 15:57:28 +0200
Subject: [PATCH 05/11] Replaced setting OMP_NUM_THREADS with new hook

---
 eessi/testsuite/tests/apps/MetalWalls.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/eessi/testsuite/tests/apps/MetalWalls.py b/eessi/testsuite/tests/apps/MetalWalls.py
index 9a2af792..bfeaa0e7 100644
--- a/eessi/testsuite/tests/apps/MetalWalls.py
+++ b/eessi/testsuite/tests/apps/MetalWalls.py
@@ -121,6 +121,4 @@ def set_omp_num_threads(self):
         Set number of OpenMP threads via OMP_NUM_THREADS.
         Set default number of OpenMP threads equal to number of CPUs per task.
         """
-
-        self.env_vars['OMP_NUM_THREADS'] = self.num_cpus_per_task
-        log(f'env_vars set to {self.env_vars}')
+        hooks.set_omp_num_threads(self)

From da17ec85965830c322a4d15ec2965f6006038a54 Mon Sep 17 00:00:00 2001
From: crivella <davide.grassano@epfl.ch>
Date: Mon, 23 Sep 2024 10:40:27 +0200
Subject: [PATCH 06/11] Fixed error

---
 eessi/testsuite/tests/apps/MetalWalls.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/eessi/testsuite/tests/apps/MetalWalls.py b/eessi/testsuite/tests/apps/MetalWalls.py
index bfeaa0e7..057a294a 100644
--- a/eessi/testsuite/tests/apps/MetalWalls.py
+++ b/eessi/testsuite/tests/apps/MetalWalls.py
@@ -112,7 +112,7 @@ def skip_max_corecnt(self):
         max_corecnt = 256
         self.skip_if(
             self.num_tasks > max_corecnt,
-            f'Number of tasks per node {self.num_tasks} exceeds maximum core count {max_corecnt} for {self.bench_name}'
+            f'Number of tasks per node {self.num_tasks} exceeds maximum core count {max_corecnt} for {self.benchmark_info[0]}'
         )
 
     @run_after('setup')

From 3aa9f346ca5e486358a32366935bbc3ff7fa437f Mon Sep 17 00:00:00 2001
From: crivella <davide.grassano@epfl.ch>
Date: Mon, 23 Sep 2024 10:42:54 +0200
Subject: [PATCH 07/11] linting

---
 eessi/testsuite/tests/apps/MetalWalls.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/eessi/testsuite/tests/apps/MetalWalls.py b/eessi/testsuite/tests/apps/MetalWalls.py
index 057a294a..989cc92b 100644
--- a/eessi/testsuite/tests/apps/MetalWalls.py
+++ b/eessi/testsuite/tests/apps/MetalWalls.py
@@ -110,9 +110,10 @@ def request_mem(self):
     def skip_max_corecnt(self):
         """Skip tests if number of tasks per node exceeds maximum core count."""
         max_corecnt = 256
+        bench_name = self.benchmark_info[0]
         self.skip_if(
             self.num_tasks > max_corecnt,
-            f'Number of tasks per node {self.num_tasks} exceeds maximum core count {max_corecnt} for {self.benchmark_info[0]}'
+            f'Number of tasks per node {self.num_tasks} exceeds maximum core count {max_corecnt} for {bench_name}'
         )
 
     @run_after('setup')

From cf9f0706fb34d2800ea6f7e0ecb8d574f3d3636f Mon Sep 17 00:00:00 2001
From: Davide Grassano <34096612+Crivella@users.noreply.github.com>
Date: Thu, 10 Oct 2024 15:45:19 +0200
Subject: [PATCH 08/11] Apply suggestions from code review

Co-authored-by: Caspar van Leeuwen <33718780+casparvl@users.noreply.github.com>
---
 eessi/testsuite/tests/apps/MetalWalls.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/eessi/testsuite/tests/apps/MetalWalls.py b/eessi/testsuite/tests/apps/MetalWalls.py
index 989cc92b..35fcba10 100644
--- a/eessi/testsuite/tests/apps/MetalWalls.py
+++ b/eessi/testsuite/tests/apps/MetalWalls.py
@@ -109,11 +109,11 @@ def request_mem(self):
     @run_after('setup')
     def skip_max_corecnt(self):
         """Skip tests if number of tasks per node exceeds maximum core count."""
-        max_corecnt = 256
+        max_task_cnt = 256
         bench_name = self.benchmark_info[0]
         self.skip_if(
             self.num_tasks > max_corecnt,
-            f'Number of tasks per node {self.num_tasks} exceeds maximum core count {max_corecnt} for {bench_name}'
+            f'Number of tasks {self.num_tasks} exceeds maximum task count {max_task_cnt} for {bench_name}'
         )
 
     @run_after('setup')

From c2c3f2a7606e989c58b827265ad4671710b54b4a Mon Sep 17 00:00:00 2001
From: crivella <davide.grassano@epfl.ch>
Date: Thu, 10 Oct 2024 15:47:12 +0200
Subject: [PATCH 09/11] Fixed var name

---
 eessi/testsuite/tests/apps/MetalWalls.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/eessi/testsuite/tests/apps/MetalWalls.py b/eessi/testsuite/tests/apps/MetalWalls.py
index 35fcba10..3c29546f 100644
--- a/eessi/testsuite/tests/apps/MetalWalls.py
+++ b/eessi/testsuite/tests/apps/MetalWalls.py
@@ -112,7 +112,7 @@ def skip_max_corecnt(self):
         max_task_cnt = 256
         bench_name = self.benchmark_info[0]
         self.skip_if(
-            self.num_tasks > max_corecnt,
+            self.num_tasks > max_task_cnt,
             f'Number of tasks {self.num_tasks} exceeds maximum task count {max_task_cnt} for {bench_name}'
         )
 

From 6b04d5308928fcd152efe3402ca480c1b20919ea Mon Sep 17 00:00:00 2001
From: crivella <davide.grassano@epfl.ch>
Date: Thu, 10 Oct 2024 16:40:09 +0200
Subject: [PATCH 10/11] Added increased time limit for low-core runs

---
 eessi/testsuite/tests/apps/MetalWalls.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/eessi/testsuite/tests/apps/MetalWalls.py b/eessi/testsuite/tests/apps/MetalWalls.py
index 3c29546f..20c053eb 100644
--- a/eessi/testsuite/tests/apps/MetalWalls.py
+++ b/eessi/testsuite/tests/apps/MetalWalls.py
@@ -80,6 +80,14 @@ def set_tag_ci(self):
             self.tags.add(TAGS['CI'])
             log(f'tags set to {self.tags}')
 
+    @run_after('init')
+    def set_increased_walltime(self):
+        """Increase the amount of time for the largest benchmark, when running with few cores."""
+        # List of benchmarks that require more time to run
+        large_benchmarks = ['hackathonGPU/benchmark2']
+        if self.num_tasks <= 4 and self.benchmark_info[0] in large_benchmarks:
+            self.time_limit = '120m'
+
     @run_after('setup')
     def run_after_setup(self):
         """Hooks to run after the setup phase"""

From 68cff9a38e73f66dd06854ee20e258497650fe5a Mon Sep 17 00:00:00 2001
From: crivella <davidecrivella@yahoo.it>
Date: Fri, 18 Oct 2024 11:36:55 +0200
Subject: [PATCH 11/11] Increased base timeout to 60m

---
 eessi/testsuite/tests/apps/MetalWalls.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/eessi/testsuite/tests/apps/MetalWalls.py b/eessi/testsuite/tests/apps/MetalWalls.py
index 20c053eb..cc4e9036 100644
--- a/eessi/testsuite/tests/apps/MetalWalls.py
+++ b/eessi/testsuite/tests/apps/MetalWalls.py
@@ -48,7 +48,7 @@ class EESSI_MetalWalls_MW(MetalWallsCheck):
 
     valid_systems = ['*']
     valid_prog_environs = ['default']
-    time_limit = '30m'
+    time_limit = '60m'
 
     module_name = parameter(find_modules('MetalWalls'))
     # For now, MetalWalls is being build for CPU targets only