equinor · berland · Nov 13, 2024 · Nov 11, 2024 · Nov 12, 2024 · Nov 13, 2024
diff --git a/.github/workflows/test_ert.yml b/.github/workflows/test_ert.yml
@@ -50,7 +50,7 @@ jobs:
     - name: CLI Test
       if: inputs.test-type == 'cli-tests'
       run: |
-        pytest --cov=ert --cov-report=xml:cov1.xml --junit-xml=junit.xml -n logical -v --benchmark-disable  --dist loadgroup tests/ui_tests/cli
+        pytest --cov=ert --cov-report=xml:cov1.xml --junit-xml=junit.xml -o junit_family=legacy -n logical --maxprocesses=2 -v --benchmark-disable  --dist loadgroup tests/ui_tests/cli
 
     - name: Unit Test
       if: inputs.test-type == 'unit-tests'

diff --git a/ci/testkomodo.sh b/ci/testkomodo.sh
@@ -15,7 +15,7 @@ install_test_dependencies () {
     pip install ".[dev]"
 }
 
-run_ert_with_opm () {
+run_ert_with_opm() {
     pushd "${CI_TEST_ROOT}"
 
     cp -r "${CI_SOURCE_ROOT}/test-data/flow_example" ert_with_opm
@@ -24,7 +24,7 @@ run_ert_with_opm () {
     ert test_run flow.ert ||
         (
             # In case ert fails, print log files if they are there:
-            cat spe1_out/realization-0/iter-0/STATUS  || true
+            cat spe1_out/realization-0/iter-0/STATUS || true
             cat spe1_out/realization-0/iter-0/ERROR || true
             cat spe1_out/realization-0/iter-0/FLOW.stderr.0 || true
             cat spe1_out/realization-0/iter-0/FLOW.stdout.0 || true
@@ -41,7 +41,7 @@ start_tests () {
     pushd ${CI_TEST_ROOT}/tests
 
     # Run all ert tests except tests evaluating memory consumption and tests requiring windows manager (GUI tests)
-    pytest --eclipse-simulator -n logical --show-capture=stderr -v --max-worker-restart 0 \
+    pytest --eclipse-simulator -n auto --show-capture=stderr -v --max-worker-restart 0 \
         -m "not limit_memory and not requires_window_manager" --benchmark-disable --dist loadgroup
     return_code_ert_main_tests=$?
 
@@ -72,7 +72,6 @@ start_tests () {
 
     set -e
 
-
     return_code_combined_tests=0
     # We error if one or more returncodes are nonzero
     if [ "$return_code_ert_main_tests" -ne 0 ]; then

diff --git a/pyproject.toml b/pyproject.toml
@@ -60,7 +60,7 @@ dependencies = [
     "python-dateutil",
     "python-multipart",
     "pyyaml",
-    "qtpy",
+    "qtpy==2.4.1",
     "requests",
     "resfo",
     "scipy >= 1.10.1",
@@ -71,7 +71,7 @@ dependencies = [
     "tqdm>=4.62.0",
     "typing_extensions>=4.5",
     "uvicorn >= 0.17.0",
-    "websockets",
+    "websockets < 14",
     "xarray",
     "xtgeo >= 3.3.0",
 ]

diff --git a/src/ert/scheduler/lsf_driver.py b/src/ert/scheduler/lsf_driver.py
@@ -94,7 +94,14 @@ class RunningJob:
 LSF_INFO_JSON_FILENAME = "lsf_info.json"
 FLAKY_SSH_RETURNCODE = 255
 JOB_ALREADY_FINISHED_BKILL_MSG = "Job has already finished"
-BSUB_FAILURE_MESSAGES = ("Job not submitted",)
+BSUB_FAILURE_MESSAGES = (
+    "Error in rusage section",
+    "Expeced number, string",
+    "No such queue",
+    "Too many processors requested",
+    "cannot be used in the resource requirement section",
+    "duplicate section",
+)
 
 
 def _parse_jobs_dict(jobs: Mapping[str, JobState]) -> dict[str, AnyJob]:

diff --git a/tests/unit_tests/forward_model_runner/test_job.py b/tests/unit_tests/forward_model_runner/test_job.py
@@ -122,7 +122,7 @@ def max_memory_per_subprocess_layer(layers: int) -> int:
         job = Job(
             {
                 "executable": executable,
-                "argList": [str(layers), str(int(1e6))],
+                "argList": [str(layers), str(int(1e7))],
             },
             0,
         )
@@ -144,7 +144,8 @@ def max_memory_per_subprocess_layer(layers: int) -> int:
     assert max_seens[1] + memory_per_numbers_list < max_seens[2]
 
 
-@pytest.mark.flaky(reruns=3)
+@pytest.mark.integration_test
+@pytest.mark.flaky(reruns=5)
 @pytest.mark.usefixtures("use_tmpdir")
 def test_memory_profile_in_running_events():
     scriptname = "increasing_memory.py"
@@ -190,10 +191,26 @@ def test_memory_profile_in_running_events():
         # Avoid the tail of the array, then the process is tearing down
     ).all(), f"Emitted memory usage not increasing, got {emitted_rss_values[:-3]=}"
 
+    memory_deltas = np.diff(np.array(emitted_rss_values[7:]))
+    if not len(memory_deltas):
+        # This can happen if memory profiling is lagging behind the process
+        # we are trying to track.
+        memory_deltas = np.diff(np.array(emitted_rss_values[2:]))
+
+    lenience_factor = 4
+    # Ideally this is 1 which corresponds to being able to track every memory
+    # allocation perfectly. But on loaded hardware, some of the allocations can be
+    # missed due to process scheduling. Bump as needed.
+
     assert (
-        np.diff(np.array(emitted_rss_values[7:])).max() < 3 * 1024 * 1024
+        max(memory_deltas) < lenience_factor * 1024 * 1024
         # Avoid the first steps, which includes the Python interpreters memory usage
-    ), f"Memory increased too sharply, missing a measurement? Got {emitted_rss_values[7:]=}"
+    ), (
+        "Memory increased too sharply, missing a measurement? "
+        f"Got {emitted_rss_values=} with selected diffs {memory_deltas}. "
+        "If the maximal number is at the beginning, it is probably the Python process "
+        "startup that is tracked."
+    )
 
     if sys.platform.startswith("darwin"):
         # No oom_score on MacOS

diff --git a/tests/unit_tests/scheduler/test_generic_driver.py b/tests/unit_tests/scheduler/test_generic_driver.py
@@ -162,13 +162,9 @@ async def test_kill_actually_kills(driver: Driver, tmp_path, pytestconfig):
         # Allow more time when tested on a real compute cluster to avoid false positives.
         job_kill_window = 60
         test_grace_time = 120
-    elif sys.platform.startswith("darwin"):
-        # Mitigate flakiness on low-power test nodes
-        job_kill_window = 5
-        test_grace_time = 8
     else:
-        job_kill_window = 1
-        test_grace_time = 2
+        job_kill_window = 5  # Busy test nodes require a long kill window
+        test_grace_time = 8
 
     async def kill_job_once_started(iens):
         nonlocal driver

diff --git a/tests/unit_tests/scheduler/test_lsf_driver.py b/tests/unit_tests/scheduler/test_lsf_driver.py
@@ -578,7 +578,6 @@ async def test_that_bsub_will_retry_and_fail(
             " '&' cannot be used in the resource requirement section. Job not submitted.",
         ),
         (255, "Error in rusage section. Job not submitted."),
-        (255, "Job not submitted."),
     ],
 )
 async def test_that_bsub_will_fail_without_retries(
@@ -604,6 +603,8 @@ async def test_that_bsub_will_fail_without_retries(
     [
         (0, "void"),
         (FLAKY_SSH_RETURNCODE, ""),
+        (0, "Request from non-LSF host rejected"),
+        (FLAKY_SSH_RETURNCODE, "Request from non-LSF host rejected"),
     ],
 )
 async def test_that_bsub_will_retry_and_succeed(