From 5f4eaf81ad274c4c1c51edc6cc8b297c4df5caa5 Mon Sep 17 00:00:00 2001
From: Nicolas Herment <nherment@users.noreply.github.com>
Date: Mon, 9 Dec 2024 09:14:14 +0100
Subject: [PATCH 1/4] test: llm eval, add correctness score, port alertmanager
 test cases (#216)

---
 holmes/main.py                                |   1 +
 poetry.lock                                   | 180 +++++++++---------
 tests/llm/README.md                           |   8 +-
 .../kubectl_find_resource.txt                 |  48 ++---
 .../kubectl_find_resource_pod_by_keyword.txt  |  48 ++---
 .../01_how_many_pods/test_case.yaml           |   8 +-
 .../02_what_is_wrong_with_pod/test_case.yaml  |   4 +-
 .../test_case.yaml                            |  10 +-
 .../04_related_k8s_events/test_case.yaml      |  17 +-
 .../05_image_version/test_case.yaml           |   3 +-
 .../06_explain_issue/test_case.yaml           |   9 +-
 .../07_high_latency/test_case.yaml            |   7 +-
 .../08_sock_shop_frontend/test_case.yaml      |  11 +-
 .../09_crashpod/test_case.yaml                |   5 +-
 .../10_image_pull_backoff/test_case.yaml      |  20 +-
 .../11_init_containers/test_case.yaml         |   9 +-
 .../12_job_crashing/test_case.yaml            |   9 +-
 .../13_pending_node_selector/test_case.yaml   |   8 +-
 .../14_pending_resources/test_case.yaml       |   6 +-
 .../15_failed_readiness_probe/test_case.yaml  |  18 +-
 .../16_failed_no_toolset_found/test_case.yaml |   5 +-
 .../17_oom_kill/test_case.yaml                |   8 +-
 .../18_crash_looping_v2/test_case.yaml        |  18 +-
 .../test_case.yaml                            |   5 +-
 .../kubectl_find_resource_pod_by_keyword.txt  |  23 ---
 .../1_how_many_pods/kubectl_get_node.txt      |   3 -
 .../1_how_many_pods/kubectl_get_pod.txt       |   2 -
 .../1_how_many_pods/kubectl_top_pods.txt      |   6 -
 .../20_long_log_file_search/test_case.yaml    |   3 +-
 .../test_case.yaml                            |  12 +-
 .../22_high_latency_dbi_down/test_case.yaml   |   5 +-
 .../test_case.yaml                            |   8 +-
 .../24_misconfigured_pvc/test_case.yaml       |   6 +-
 .../test_case.yaml                            |   8 +-
 .../01_oom_kill/test_case.yaml                |  32 +---
 .../02_crashloop_backoff/test_case.yaml       |  35 +---
 .../03_cpu_throttling/test_case.yaml          |  32 +---
 .../04_image_pull_backoff/test_case.yaml      |  25 +--
 .../05_crashpod/test_case.yaml                |  32 +---
 .../06_job_failure/test_case.yaml             |  22 +--
 .../07_job_syntax_error/test_case.yaml        |  32 +---
 .../08_memory_pressure/test_case.yaml         |  24 +--
 .../09_high_latency/test_case.yaml            |  22 +--
 .../investigate_request.json                  |  15 ++
 .../issue_data.json                           |  31 +++
 .../kubectl_get_all.txt                       |   6 +
 .../resource_instructions.json                |   6 +
 .../test_case.yaml                            |   3 +
 .../investigate_request.json                  |  22 +++
 .../issue_data.json                           |  39 ++++
 .../kubectl_describe.txt                      |  42 ++++
 .../kubectl_describe_pod.txt                  |  64 +++++++
 .../kubectl_get_all.txt                       |  54 ++++++
 .../kubectl_get_deployment.txt                |   6 +
 .../kubectl_get_pod.txt                       |   6 +
 .../kubectl_logs.txt                          |   5 +
 .../resource_instructions.json                |   1 +
 .../test_case.yaml                            |   5 +
 .../investigate_request.json                  |  22 +++
 .../12_KubePodCrashLooping/issue_data.json    |  40 ++++
 .../kubectl_describe.txt                      |  72 +++++++
 .../12_KubePodCrashLooping/kubectl_logs.txt   |   5 +
 .../kubectl_previous_logs.txt                 |   5 +
 .../resource_instructions.json                |   1 +
 .../12_KubePodCrashLooping/test_case.yaml     |   3 +
 .../investigate_request.json                  |  22 +++
 .../13_KubePodNotReady/issue_data.json        |  33 ++++
 .../13_KubePodNotReady/kubectl_describe.txt   |  41 ++++
 .../13_KubePodNotReady/kubectl_events.txt     |   7 +
 .../13_KubePodNotReady/kubectl_logs.txt       |   4 +
 .../kubectl_previous_logs.txt                 |   4 +
 .../resource_instructions.json                |   1 +
 .../13_KubePodNotReady/test_case.yaml         |   2 +
 .../14_Watchdog/investigate_request.json      |  15 ++
 .../14_Watchdog/issue_data.json               |  31 +++
 .../14_Watchdog/resource_instructions.json    |   1 +
 .../14_Watchdog/test_case.yaml                |   4 +
 tests/llm/test_ask_holmes.py                  |  29 +--
 tests/llm/test_investigate.py                 |  34 ++--
 tests/llm/utils/braintrust.py                 |   6 +-
 tests/llm/utils/classifiers.py                |  56 +++++-
 tests/llm/utils/mock_utils.py                 |   7 +-
 82 files changed, 992 insertions(+), 555 deletions(-)
 delete mode 100644 tests/llm/fixtures/test_ask_holmes/1_how_many_pods/kubectl_find_resource_pod_by_keyword.txt
 delete mode 100644 tests/llm/fixtures/test_ask_holmes/1_how_many_pods/kubectl_get_node.txt
 delete mode 100644 tests/llm/fixtures/test_ask_holmes/1_how_many_pods/kubectl_get_pod.txt
 delete mode 100644 tests/llm/fixtures/test_ask_holmes/1_how_many_pods/kubectl_top_pods.txt
 create mode 100644 tests/llm/fixtures/test_investigate/10_kube_controller_manager_down/investigate_request.json
 create mode 100644 tests/llm/fixtures/test_investigate/10_kube_controller_manager_down/issue_data.json
 create mode 100644 tests/llm/fixtures/test_investigate/10_kube_controller_manager_down/kubectl_get_all.txt
 create mode 100644 tests/llm/fixtures/test_investigate/10_kube_controller_manager_down/resource_instructions.json
 create mode 100644 tests/llm/fixtures/test_investigate/10_kube_controller_manager_down/test_case.yaml
 create mode 100644 tests/llm/fixtures/test_investigate/11_KubeDeploymentReplicasMismatch/investigate_request.json
 create mode 100644 tests/llm/fixtures/test_investigate/11_KubeDeploymentReplicasMismatch/issue_data.json
 create mode 100644 tests/llm/fixtures/test_investigate/11_KubeDeploymentReplicasMismatch/kubectl_describe.txt
 create mode 100644 tests/llm/fixtures/test_investigate/11_KubeDeploymentReplicasMismatch/kubectl_describe_pod.txt
 create mode 100644 tests/llm/fixtures/test_investigate/11_KubeDeploymentReplicasMismatch/kubectl_get_all.txt
 create mode 100644 tests/llm/fixtures/test_investigate/11_KubeDeploymentReplicasMismatch/kubectl_get_deployment.txt
 create mode 100644 tests/llm/fixtures/test_investigate/11_KubeDeploymentReplicasMismatch/kubectl_get_pod.txt
 create mode 100644 tests/llm/fixtures/test_investigate/11_KubeDeploymentReplicasMismatch/kubectl_logs.txt
 create mode 100644 tests/llm/fixtures/test_investigate/11_KubeDeploymentReplicasMismatch/resource_instructions.json
 create mode 100644 tests/llm/fixtures/test_investigate/11_KubeDeploymentReplicasMismatch/test_case.yaml
 create mode 100644 tests/llm/fixtures/test_investigate/12_KubePodCrashLooping/investigate_request.json
 create mode 100644 tests/llm/fixtures/test_investigate/12_KubePodCrashLooping/issue_data.json
 create mode 100644 tests/llm/fixtures/test_investigate/12_KubePodCrashLooping/kubectl_describe.txt
 create mode 100644 tests/llm/fixtures/test_investigate/12_KubePodCrashLooping/kubectl_logs.txt
 create mode 100644 tests/llm/fixtures/test_investigate/12_KubePodCrashLooping/kubectl_previous_logs.txt
 create mode 100644 tests/llm/fixtures/test_investigate/12_KubePodCrashLooping/resource_instructions.json
 create mode 100644 tests/llm/fixtures/test_investigate/12_KubePodCrashLooping/test_case.yaml
 create mode 100644 tests/llm/fixtures/test_investigate/13_KubePodNotReady/investigate_request.json
 create mode 100644 tests/llm/fixtures/test_investigate/13_KubePodNotReady/issue_data.json
 create mode 100644 tests/llm/fixtures/test_investigate/13_KubePodNotReady/kubectl_describe.txt
 create mode 100644 tests/llm/fixtures/test_investigate/13_KubePodNotReady/kubectl_events.txt
 create mode 100644 tests/llm/fixtures/test_investigate/13_KubePodNotReady/kubectl_logs.txt
 create mode 100644 tests/llm/fixtures/test_investigate/13_KubePodNotReady/kubectl_previous_logs.txt
 create mode 100644 tests/llm/fixtures/test_investigate/13_KubePodNotReady/resource_instructions.json
 create mode 100644 tests/llm/fixtures/test_investigate/13_KubePodNotReady/test_case.yaml
 create mode 100644 tests/llm/fixtures/test_investigate/14_Watchdog/investigate_request.json
 create mode 100644 tests/llm/fixtures/test_investigate/14_Watchdog/issue_data.json
 create mode 100644 tests/llm/fixtures/test_investigate/14_Watchdog/resource_instructions.json
 create mode 100644 tests/llm/fixtures/test_investigate/14_Watchdog/test_case.yaml

diff --git a/holmes/main.py b/holmes/main.py
index bfc0b79d..be5bb12f 100644
--- a/holmes/main.py
+++ b/holmes/main.py
@@ -394,6 +394,7 @@ def alertmanager(
         results.append({"issue": issue.model_dump(), "result": result.model_dump()})
         handle_result(result, console, destination, config, issue, False, True)
 
+
     if json_output_file:
         write_json_file(json_output_file, results)
 
diff --git a/poetry.lock b/poetry.lock
index 203e57bf..93687fc9 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -13,87 +13,87 @@ files = [
 
 [[package]]
 name = "aiohttp"
-version = "3.11.9"
+version = "3.11.10"
 description = "Async http client/server framework (asyncio)"
 optional = false
 python-versions = ">=3.9"
 files = [
-    {file = "aiohttp-3.11.9-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:0411777249f25d11bd2964a230b3ffafcbed6cd65d0f2b132bc2b8f5b8c347c7"},
-    {file = "aiohttp-3.11.9-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:499368eb904566fbdf1a3836a1532000ef1308f34a1bcbf36e6351904cced771"},
-    {file = "aiohttp-3.11.9-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:0b5a5009b0159a8f707879dc102b139466d8ec6db05103ec1520394fdd8ea02c"},
-    {file = "aiohttp-3.11.9-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:176f8bb8931da0613bb0ed16326d01330066bb1e172dd97e1e02b1c27383277b"},
-    {file = "aiohttp-3.11.9-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6435a66957cdba1a0b16f368bde03ce9c79c57306b39510da6ae5312a1a5b2c1"},
-    {file = "aiohttp-3.11.9-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:202f40fb686e5f93908eee0c75d1e6fbe50a43e9bd4909bf3bf4a56b560ca180"},
-    {file = "aiohttp-3.11.9-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:39625703540feb50b6b7f938b3856d1f4886d2e585d88274e62b1bd273fae09b"},
-    {file = "aiohttp-3.11.9-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c6beeac698671baa558e82fa160be9761cf0eb25861943f4689ecf9000f8ebd0"},
-    {file = "aiohttp-3.11.9-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:96726839a42429318017e67a42cca75d4f0d5248a809b3cc2e125445edd7d50d"},
-    {file = "aiohttp-3.11.9-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:3f5461c77649358610fb9694e790956b4238ac5d9e697a17f63619c096469afe"},
-    {file = "aiohttp-3.11.9-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:4313f3bc901255b22f01663eeeae167468264fdae0d32c25fc631d5d6e15b502"},
-    {file = "aiohttp-3.11.9-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:d6e274661c74195708fc4380a4ef64298926c5a50bb10fbae3d01627d7a075b7"},
-    {file = "aiohttp-3.11.9-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:db2914de2559809fdbcf3e48f41b17a493b58cb7988d3e211f6b63126c55fe82"},
-    {file = "aiohttp-3.11.9-cp310-cp310-win32.whl", hash = "sha256:27935716f8d62c1c73010428db310fd10136002cfc6d52b0ba7bdfa752d26066"},
-    {file = "aiohttp-3.11.9-cp310-cp310-win_amd64.whl", hash = "sha256:afbe85b50ade42ddff5669947afde9e8a610e64d2c80be046d67ec4368e555fa"},
-    {file = "aiohttp-3.11.9-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:afcda759a69c6a8be3aae764ec6733155aa4a5ad9aad4f398b52ba4037942fe3"},
-    {file = "aiohttp-3.11.9-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c5bba6b83fde4ca233cfda04cbd4685ab88696b0c8eaf76f7148969eab5e248a"},
-    {file = "aiohttp-3.11.9-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:442356e8924fe1a121f8c87866b0ecdc785757fd28924b17c20493961b3d6697"},
-    {file = "aiohttp-3.11.9-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f737fef6e117856400afee4f17774cdea392b28ecf058833f5eca368a18cf1bf"},
-    {file = "aiohttp-3.11.9-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ea142255d4901b03f89cb6a94411ecec117786a76fc9ab043af8f51dd50b5313"},
-    {file = "aiohttp-3.11.9-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6e1e9e447856e9b7b3d38e1316ae9a8c92e7536ef48373de758ea055edfd5db5"},
-    {file = "aiohttp-3.11.9-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e7f6173302f8a329ca5d1ee592af9e628d3ade87816e9958dcf7cdae2841def7"},
-    {file = "aiohttp-3.11.9-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a7c6147c6306f537cff59409609508a1d2eff81199f0302dd456bb9e7ea50c39"},
-    {file = "aiohttp-3.11.9-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:e9d036a9a41fc78e8a3f10a86c2fc1098fca8fab8715ba9eb999ce4788d35df0"},
-    {file = "aiohttp-3.11.9-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:2ac9fd83096df36728da8e2f4488ac3b5602238f602706606f3702f07a13a409"},
-    {file = "aiohttp-3.11.9-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:d3108f0ad5c6b6d78eec5273219a5bbd884b4aacec17883ceefaac988850ce6e"},
-    {file = "aiohttp-3.11.9-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:96bbec47beb131bbf4bae05d8ef99ad9e5738f12717cfbbf16648b78b0232e87"},
-    {file = "aiohttp-3.11.9-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:fc726c3fa8f606d07bd2b500e5dc4c0fd664c59be7788a16b9e34352c50b6b6b"},
-    {file = "aiohttp-3.11.9-cp311-cp311-win32.whl", hash = "sha256:5720ebbc7a1b46c33a42d489d25d36c64c419f52159485e55589fbec648ea49a"},
-    {file = "aiohttp-3.11.9-cp311-cp311-win_amd64.whl", hash = "sha256:17af09d963fa1acd7e4c280e9354aeafd9e3d47eaa4a6bfbd2171ad7da49f0c5"},
-    {file = "aiohttp-3.11.9-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:c1f2d7fd583fc79c240094b3e7237d88493814d4b300d013a42726c35a734bc9"},
-    {file = "aiohttp-3.11.9-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:d4b8a1b6c7a68c73191f2ebd3bf66f7ce02f9c374e309bdb68ba886bbbf1b938"},
-    {file = "aiohttp-3.11.9-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:bd3f711f4c99da0091ced41dccdc1bcf8be0281dc314d6d9c6b6cf5df66f37a9"},
-    {file = "aiohttp-3.11.9-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:44cb1a1326a0264480a789e6100dc3e07122eb8cd1ad6b784a3d47d13ed1d89c"},
-    {file = "aiohttp-3.11.9-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7a7ddf981a0b953ade1c2379052d47ccda2f58ab678fca0671c7c7ca2f67aac2"},
-    {file = "aiohttp-3.11.9-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6ffa45cc55b18d4ac1396d1ddb029f139b1d3480f1594130e62bceadf2e1a838"},
-    {file = "aiohttp-3.11.9-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cca505829cdab58c2495ff418c96092d225a1bbd486f79017f6de915580d3c44"},
-    {file = "aiohttp-3.11.9-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:44d323aa80a867cb6db6bebb4bbec677c6478e38128847f2c6b0f70eae984d72"},
-    {file = "aiohttp-3.11.9-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:b2fab23003c4bb2249729a7290a76c1dda38c438300fdf97d4e42bf78b19c810"},
-    {file = "aiohttp-3.11.9-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:be0c7c98e38a1e3ad7a6ff64af8b6d6db34bf5a41b1478e24c3c74d9e7f8ed42"},
-    {file = "aiohttp-3.11.9-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:5cc5e0d069c56645446c45a4b5010d4b33ac6c5ebfd369a791b5f097e46a3c08"},
-    {file = "aiohttp-3.11.9-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:9bcf97b971289be69638d8b1b616f7e557e1342debc7fc86cf89d3f08960e411"},
-    {file = "aiohttp-3.11.9-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:c7333e7239415076d1418dbfb7fa4df48f3a5b00f8fdf854fca549080455bc14"},
-    {file = "aiohttp-3.11.9-cp312-cp312-win32.whl", hash = "sha256:9384b07cfd3045b37b05ed002d1c255db02fb96506ad65f0f9b776b762a7572e"},
-    {file = "aiohttp-3.11.9-cp312-cp312-win_amd64.whl", hash = "sha256:f5252ba8b43906f206048fa569debf2cd0da0316e8d5b4d25abe53307f573941"},
-    {file = "aiohttp-3.11.9-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:282e0a7ddd36ebc411f156aeaa0491e8fe7f030e2a95da532cf0c84b0b70bc66"},
-    {file = "aiohttp-3.11.9-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:ebd3e6b0c7d4954cca59d241970011f8d3327633d555051c430bd09ff49dc494"},
-    {file = "aiohttp-3.11.9-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:30f9f89ae625d412043f12ca3771b2ccec227cc93b93bb1f994db6e1af40a7d3"},
-    {file = "aiohttp-3.11.9-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7a3b5b2c012d70c63d9d13c57ed1603709a4d9d7d473e4a9dfece0e4ea3d5f51"},
-    {file = "aiohttp-3.11.9-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6ef1550bb5f55f71b97a6a395286db07f7f2c01c8890e613556df9a51da91e8d"},
-    {file = "aiohttp-3.11.9-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:317251b9c9a2f1a9ff9cd093775b34c6861d1d7df9439ce3d32a88c275c995cd"},
-    {file = "aiohttp-3.11.9-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:21cbe97839b009826a61b143d3ca4964c8590d7aed33d6118125e5b71691ca46"},
-    {file = "aiohttp-3.11.9-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:618b18c3a2360ac940a5503da14fa4f880c5b9bc315ec20a830357bcc62e6bae"},
-    {file = "aiohttp-3.11.9-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:a0cf4d814689e58f57ecd5d8c523e6538417ca2e72ff52c007c64065cef50fb2"},
-    {file = "aiohttp-3.11.9-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:15c4e489942d987d5dac0ba39e5772dcbed4cc9ae3710d1025d5ba95e4a5349c"},
-    {file = "aiohttp-3.11.9-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:ec8df0ff5a911c6d21957a9182402aad7bf060eaeffd77c9ea1c16aecab5adbf"},
-    {file = "aiohttp-3.11.9-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:ed95d66745f53e129e935ad726167d3a6cb18c5d33df3165974d54742c373868"},
-    {file = "aiohttp-3.11.9-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:647ec5bee7e4ec9f1034ab48173b5fa970d9a991e565549b965e93331f1328fe"},
-    {file = "aiohttp-3.11.9-cp313-cp313-win32.whl", hash = "sha256:ef2c9499b7bd1e24e473dc1a85de55d72fd084eea3d8bdeec7ee0720decb54fa"},
-    {file = "aiohttp-3.11.9-cp313-cp313-win_amd64.whl", hash = "sha256:84de955314aa5e8d469b00b14d6d714b008087a0222b0f743e7ffac34ef56aff"},
-    {file = "aiohttp-3.11.9-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:e738aabff3586091221044b7a584865ddc4d6120346d12e28e788307cd731043"},
-    {file = "aiohttp-3.11.9-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:28f29bce89c3b401a53d6fd4bee401ee943083bf2bdc12ef297c1d63155070b0"},
-    {file = "aiohttp-3.11.9-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:31de2f10f63f96cc19e04bd2df9549559beadd0b2ee2da24a17e7ed877ca8c60"},
-    {file = "aiohttp-3.11.9-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:77f31cebd8c27a36af6c7346055ac564946e562080ee1a838da724585c67474f"},
-    {file = "aiohttp-3.11.9-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0bcb7f6976dc0b6b56efde13294862adf68dd48854111b422a336fa729a82ea6"},
-    {file = "aiohttp-3.11.9-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1a8b13b9950d8b2f8f58b6e5842c4b842b5887e2c32e3f4644d6642f1659a530"},
-    {file = "aiohttp-3.11.9-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c9c23e62f3545c2216100603614f9e019e41b9403c47dd85b8e7e5015bf1bde0"},
-    {file = "aiohttp-3.11.9-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ec656680fc53a13f849c71afd0c84a55c536206d524cbc831cde80abbe80489e"},
-    {file = "aiohttp-3.11.9-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:36df00e0541f264ce42d62280281541a47474dfda500bc5b7f24f70a7f87be7a"},
-    {file = "aiohttp-3.11.9-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:8dcfd14c712aa9dd18049280bfb2f95700ff6a8bde645e09f17c3ed3f05a0130"},
-    {file = "aiohttp-3.11.9-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:14624d96f0d69cf451deed3173079a68c322279be6030208b045ab77e1e8d550"},
-    {file = "aiohttp-3.11.9-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:4b01d9cfcb616eeb6d40f02e66bebfe7b06d9f2ef81641fdd50b8dd981166e0b"},
-    {file = "aiohttp-3.11.9-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:928f92f80e2e8d6567b87d3316c1fd9860ccfe36e87a9a7f5237d4cda8baa1ba"},
-    {file = "aiohttp-3.11.9-cp39-cp39-win32.whl", hash = "sha256:c8a02f74ae419e3955af60f570d83187423e42e672a6433c5e292f1d23619269"},
-    {file = "aiohttp-3.11.9-cp39-cp39-win_amd64.whl", hash = "sha256:0a97d657f6cf8782a830bb476c13f7d777cfcab8428ac49dde15c22babceb361"},
-    {file = "aiohttp-3.11.9.tar.gz", hash = "sha256:a9266644064779840feec0e34f10a89b3ff1d2d6b751fe90017abcad1864fa7c"},
+    {file = "aiohttp-3.11.10-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:cbad88a61fa743c5d283ad501b01c153820734118b65aee2bd7dbb735475ce0d"},
+    {file = "aiohttp-3.11.10-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:80886dac673ceaef499de2f393fc80bb4481a129e6cb29e624a12e3296cc088f"},
+    {file = "aiohttp-3.11.10-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:61b9bae80ed1f338c42f57c16918853dc51775fb5cb61da70d590de14d8b5fb4"},
+    {file = "aiohttp-3.11.10-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9e2e576caec5c6a6b93f41626c9c02fc87cd91538b81a3670b2e04452a63def6"},
+    {file = "aiohttp-3.11.10-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:02c13415b5732fb6ee7ff64583a5e6ed1c57aa68f17d2bda79c04888dfdc2769"},
+    {file = "aiohttp-3.11.10-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4cfce37f31f20800a6a6620ce2cdd6737b82e42e06e6e9bd1b36f546feb3c44f"},
+    {file = "aiohttp-3.11.10-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3bbbfff4c679c64e6e23cb213f57cc2c9165c9a65d63717108a644eb5a7398df"},
+    {file = "aiohttp-3.11.10-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:49c7dbbc1a559ae14fc48387a115b7d4bbc84b4a2c3b9299c31696953c2a5219"},
+    {file = "aiohttp-3.11.10-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:68386d78743e6570f054fe7949d6cb37ef2b672b4d3405ce91fafa996f7d9b4d"},
+    {file = "aiohttp-3.11.10-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:9ef405356ba989fb57f84cac66f7b0260772836191ccefbb987f414bcd2979d9"},
+    {file = "aiohttp-3.11.10-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:5d6958671b296febe7f5f859bea581a21c1d05430d1bbdcf2b393599b1cdce77"},
+    {file = "aiohttp-3.11.10-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:99b7920e7165be5a9e9a3a7f1b680f06f68ff0d0328ff4079e5163990d046767"},
+    {file = "aiohttp-3.11.10-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:0dc49f42422163efb7e6f1df2636fe3db72713f6cd94688e339dbe33fe06d61d"},
+    {file = "aiohttp-3.11.10-cp310-cp310-win32.whl", hash = "sha256:40d1c7a7f750b5648642586ba7206999650208dbe5afbcc5284bcec6579c9b91"},
+    {file = "aiohttp-3.11.10-cp310-cp310-win_amd64.whl", hash = "sha256:68ff6f48b51bd78ea92b31079817aff539f6c8fc80b6b8d6ca347d7c02384e33"},
+    {file = "aiohttp-3.11.10-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:77c4aa15a89847b9891abf97f3d4048f3c2d667e00f8a623c89ad2dccee6771b"},
+    {file = "aiohttp-3.11.10-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:909af95a72cedbefe5596f0bdf3055740f96c1a4baa0dd11fd74ca4de0b4e3f1"},
+    {file = "aiohttp-3.11.10-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:386fbe79863eb564e9f3615b959e28b222259da0c48fd1be5929ac838bc65683"},
+    {file = "aiohttp-3.11.10-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3de34936eb1a647aa919655ff8d38b618e9f6b7f250cc19a57a4bf7fd2062b6d"},
+    {file = "aiohttp-3.11.10-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0c9527819b29cd2b9f52033e7fb9ff08073df49b4799c89cb5754624ecd98299"},
+    {file = "aiohttp-3.11.10-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:65a96e3e03300b41f261bbfd40dfdbf1c301e87eab7cd61c054b1f2e7c89b9e8"},
+    {file = "aiohttp-3.11.10-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:98f5635f7b74bcd4f6f72fcd85bea2154b323a9f05226a80bc7398d0c90763b0"},
+    {file = "aiohttp-3.11.10-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:03b6002e20938fc6ee0918c81d9e776bebccc84690e2b03ed132331cca065ee5"},
+    {file = "aiohttp-3.11.10-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:6362cc6c23c08d18ddbf0e8c4d5159b5df74fea1a5278ff4f2c79aed3f4e9f46"},
+    {file = "aiohttp-3.11.10-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:3691ed7726fef54e928fe26344d930c0c8575bc968c3e239c2e1a04bd8cf7838"},
+    {file = "aiohttp-3.11.10-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:31d5093d3acd02b31c649d3a69bb072d539d4c7659b87caa4f6d2bcf57c2fa2b"},
+    {file = "aiohttp-3.11.10-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:8b3cf2dc0f0690a33f2d2b2cb15db87a65f1c609f53c37e226f84edb08d10f52"},
+    {file = "aiohttp-3.11.10-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:fbbaea811a2bba171197b08eea288b9402faa2bab2ba0858eecdd0a4105753a3"},
+    {file = "aiohttp-3.11.10-cp311-cp311-win32.whl", hash = "sha256:4b2c7ac59c5698a7a8207ba72d9e9c15b0fc484a560be0788b31312c2c5504e4"},
+    {file = "aiohttp-3.11.10-cp311-cp311-win_amd64.whl", hash = "sha256:974d3a2cce5fcfa32f06b13ccc8f20c6ad9c51802bb7f829eae8a1845c4019ec"},
+    {file = "aiohttp-3.11.10-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:b78f053a7ecfc35f0451d961dacdc671f4bcbc2f58241a7c820e9d82559844cf"},
+    {file = "aiohttp-3.11.10-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:ab7485222db0959a87fbe8125e233b5a6f01f4400785b36e8a7878170d8c3138"},
+    {file = "aiohttp-3.11.10-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:cf14627232dfa8730453752e9cdc210966490992234d77ff90bc8dc0dce361d5"},
+    {file = "aiohttp-3.11.10-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:076bc454a7e6fd646bc82ea7f98296be0b1219b5e3ef8a488afbdd8e81fbac50"},
+    {file = "aiohttp-3.11.10-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:482cafb7dc886bebeb6c9ba7925e03591a62ab34298ee70d3dd47ba966370d2c"},
+    {file = "aiohttp-3.11.10-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:bf3d1a519a324af764a46da4115bdbd566b3c73fb793ffb97f9111dbc684fc4d"},
+    {file = "aiohttp-3.11.10-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:24213ba85a419103e641e55c27dc7ff03536c4873470c2478cce3311ba1eee7b"},
+    {file = "aiohttp-3.11.10-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b99acd4730ad1b196bfb03ee0803e4adac371ae8efa7e1cbc820200fc5ded109"},
+    {file = "aiohttp-3.11.10-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:14cdb5a9570be5a04eec2ace174a48ae85833c2aadc86de68f55541f66ce42ab"},
+    {file = "aiohttp-3.11.10-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:7e97d622cb083e86f18317282084bc9fbf261801b0192c34fe4b1febd9f7ae69"},
+    {file = "aiohttp-3.11.10-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:012f176945af138abc10c4a48743327a92b4ca9adc7a0e078077cdb5dbab7be0"},
+    {file = "aiohttp-3.11.10-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:44224d815853962f48fe124748227773acd9686eba6dc102578defd6fc99e8d9"},
+    {file = "aiohttp-3.11.10-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:c87bf31b7fdab94ae3adbe4a48e711bfc5f89d21cf4c197e75561def39e223bc"},
+    {file = "aiohttp-3.11.10-cp312-cp312-win32.whl", hash = "sha256:06a8e2ee1cbac16fe61e51e0b0c269400e781b13bcfc33f5425912391a542985"},
+    {file = "aiohttp-3.11.10-cp312-cp312-win_amd64.whl", hash = "sha256:be2b516f56ea883a3e14dda17059716593526e10fb6303189aaf5503937db408"},
+    {file = "aiohttp-3.11.10-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:8cc5203b817b748adccb07f36390feb730b1bc5f56683445bfe924fc270b8816"},
+    {file = "aiohttp-3.11.10-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:5ef359ebc6949e3a34c65ce20230fae70920714367c63afd80ea0c2702902ccf"},
+    {file = "aiohttp-3.11.10-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:9bca390cb247dbfaec3c664326e034ef23882c3f3bfa5fbf0b56cad0320aaca5"},
+    {file = "aiohttp-3.11.10-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:811f23b3351ca532af598405db1093f018edf81368e689d1b508c57dcc6b6a32"},
+    {file = "aiohttp-3.11.10-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ddf5f7d877615f6a1e75971bfa5ac88609af3b74796ff3e06879e8422729fd01"},
+    {file = "aiohttp-3.11.10-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6ab29b8a0beb6f8eaf1e5049252cfe74adbaafd39ba91e10f18caeb0e99ffb34"},
+    {file = "aiohttp-3.11.10-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c49a76c1038c2dd116fa443eba26bbb8e6c37e924e2513574856de3b6516be99"},
+    {file = "aiohttp-3.11.10-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7f3dc0e330575f5b134918976a645e79adf333c0a1439dcf6899a80776c9ab39"},
+    {file = "aiohttp-3.11.10-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:efb15a17a12497685304b2d976cb4939e55137df7b09fa53f1b6a023f01fcb4e"},
+    {file = "aiohttp-3.11.10-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:db1d0b28fcb7f1d35600150c3e4b490775251dea70f894bf15c678fdd84eda6a"},
+    {file = "aiohttp-3.11.10-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:15fccaf62a4889527539ecb86834084ecf6e9ea70588efde86e8bc775e0e7542"},
+    {file = "aiohttp-3.11.10-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:593c114a2221444f30749cc5e5f4012488f56bd14de2af44fe23e1e9894a9c60"},
+    {file = "aiohttp-3.11.10-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:7852bbcb4d0d2f0c4d583f40c3bc750ee033265d80598d0f9cb6f372baa6b836"},
+    {file = "aiohttp-3.11.10-cp313-cp313-win32.whl", hash = "sha256:65e55ca7debae8faaffee0ebb4b47a51b4075f01e9b641c31e554fd376595c6c"},
+    {file = "aiohttp-3.11.10-cp313-cp313-win_amd64.whl", hash = "sha256:beb39a6d60a709ae3fb3516a1581777e7e8b76933bb88c8f4420d875bb0267c6"},
+    {file = "aiohttp-3.11.10-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:0580f2e12de2138f34debcd5d88894786453a76e98febaf3e8fe5db62d01c9bf"},
+    {file = "aiohttp-3.11.10-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:a55d2ad345684e7c3dd2c20d2f9572e9e1d5446d57200ff630e6ede7612e307f"},
+    {file = "aiohttp-3.11.10-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:04814571cb72d65a6899db6099e377ed00710bf2e3eafd2985166f2918beaf59"},
+    {file = "aiohttp-3.11.10-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e44a9a3c053b90c6f09b1bb4edd880959f5328cf63052503f892c41ea786d99f"},
+    {file = "aiohttp-3.11.10-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:502a1464ccbc800b4b1995b302efaf426e8763fadf185e933c2931df7db9a199"},
+    {file = "aiohttp-3.11.10-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:613e5169f8ae77b1933e42e418a95931fb4867b2991fc311430b15901ed67079"},
+    {file = "aiohttp-3.11.10-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4cca22a61b7fe45da8fc73c3443150c3608750bbe27641fc7558ec5117b27fdf"},
+    {file = "aiohttp-3.11.10-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:86a5dfcc39309470bd7b68c591d84056d195428d5d2e0b5ccadfbaf25b026ebc"},
+    {file = "aiohttp-3.11.10-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:77ae58586930ee6b2b6f696c82cf8e78c8016ec4795c53e36718365f6959dc82"},
+    {file = "aiohttp-3.11.10-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:78153314f26d5abef3239b4a9af20c229c6f3ecb97d4c1c01b22c4f87669820c"},
+    {file = "aiohttp-3.11.10-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:98283b94cc0e11c73acaf1c9698dea80c830ca476492c0fe2622bd931f34b487"},
+    {file = "aiohttp-3.11.10-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:53bf2097e05c2accc166c142a2090e4c6fd86581bde3fd9b2d3f9e93dda66ac1"},
+    {file = "aiohttp-3.11.10-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:c5532f0441fc09c119e1dca18fbc0687e64fbeb45aa4d6a87211ceaee50a74c4"},
+    {file = "aiohttp-3.11.10-cp39-cp39-win32.whl", hash = "sha256:47ad15a65fb41c570cd0ad9a9ff8012489e68176e7207ec7b82a0940dddfd8be"},
+    {file = "aiohttp-3.11.10-cp39-cp39-win_amd64.whl", hash = "sha256:c6b9e6d7e41656d78e37ce754813fa44b455c3d0d0dced2a047def7dc5570b74"},
+    {file = "aiohttp-3.11.10.tar.gz", hash = "sha256:b1fc6b45010a8d0ff9e88f9f2418c6fd408c99c211257334aff41597ebece42e"},
 ]
 
 [package.dependencies]
@@ -136,24 +136,24 @@ files = [
 
 [[package]]
 name = "anyio"
-version = "4.6.2.post1"
+version = "4.7.0"
 description = "High level compatibility layer for multiple asynchronous event loop implementations"
 optional = false
 python-versions = ">=3.9"
 files = [
-    {file = "anyio-4.6.2.post1-py3-none-any.whl", hash = "sha256:6d170c36fba3bdd840c73d3868c1e777e33676a69c3a72cf0a0d5d6d8009b61d"},
-    {file = "anyio-4.6.2.post1.tar.gz", hash = "sha256:4c8bc31ccdb51c7f7bd251f51c609e038d63e34219b44aa86e47576389880b4c"},
+    {file = "anyio-4.7.0-py3-none-any.whl", hash = "sha256:ea60c3723ab42ba6fff7e8ccb0488c898ec538ff4df1f1d5e642c3601d07e352"},
+    {file = "anyio-4.7.0.tar.gz", hash = "sha256:2f834749c602966b7d456a7567cafcb309f96482b5081d14ac93ccd457f9dd48"},
 ]
 
 [package.dependencies]
 exceptiongroup = {version = ">=1.0.2", markers = "python_version < \"3.11\""}
 idna = ">=2.8"
 sniffio = ">=1.1"
-typing-extensions = {version = ">=4.1", markers = "python_version < \"3.11\""}
+typing_extensions = {version = ">=4.5", markers = "python_version < \"3.13\""}
 
 [package.extras]
-doc = ["Sphinx (>=7.4,<8.0)", "packaging", "sphinx-autodoc-typehints (>=1.2.0)", "sphinx-rtd-theme"]
-test = ["anyio[trio]", "coverage[toml] (>=7)", "exceptiongroup (>=1.2.0)", "hypothesis (>=4.0)", "psutil (>=5.9)", "pytest (>=7.0)", "pytest-mock (>=3.6.1)", "trustme", "truststore (>=0.9.1)", "uvloop (>=0.21.0b1)"]
+doc = ["Sphinx (>=7.4,<8.0)", "packaging", "sphinx-autodoc-typehints (>=1.2.0)", "sphinx_rtd_theme"]
+test = ["anyio[trio]", "coverage[toml] (>=7)", "exceptiongroup (>=1.2.0)", "hypothesis (>=4.0)", "psutil (>=5.9)", "pytest (>=7.0)", "pytest-mock (>=3.6.1)", "trustme", "truststore (>=0.9.1)", "uvloop (>=0.21)"]
 trio = ["trio (>=0.26.1)"]
 
 [[package]]
@@ -1634,13 +1634,13 @@ files = [
 
 [[package]]
 name = "openai"
-version = "1.56.2"
+version = "1.57.0"
 description = "The official Python library for the openai API"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "openai-1.56.2-py3-none-any.whl", hash = "sha256:82d0c48f9504e04c7797e9b799dcf7f49a246d99b6cbfd90f3193ea80815b69e"},
-    {file = "openai-1.56.2.tar.gz", hash = "sha256:17312af69bc7670d4048f98ab5849f8784d98c39ac64fcde19406e3774a0c1e5"},
+    {file = "openai-1.57.0-py3-none-any.whl", hash = "sha256:972e36960b821797952da3dc4532f486c28e28a2a332d7d0c5407f242e9d9c39"},
+    {file = "openai-1.57.0.tar.gz", hash = "sha256:76f91971c4bdbd78380c9970581075e0337b5d497c2fbf7b5255078f4b31abf9"},
 ]
 
 [package.dependencies]
@@ -2702,17 +2702,17 @@ files = [
 
 [[package]]
 name = "slack-bolt"
-version = "1.21.2"
+version = "1.21.3"
 description = "The Bolt Framework for Python"
 optional = false
 python-versions = ">=3.6"
 files = [
-    {file = "slack_bolt-1.21.2-py2.py3-none-any.whl", hash = "sha256:6860fc8693ca543b653c5d49a09b8b542f5fb7a02638342a7ddd18d8bc6f3ba0"},
-    {file = "slack_bolt-1.21.2.tar.gz", hash = "sha256:05ac2d454adfddfc629fb63c7a3723bd1432a24373119368bc81f2f52b029cbf"},
+    {file = "slack_bolt-1.21.3-py2.py3-none-any.whl", hash = "sha256:f5c41bfd5758db2023a7591e93d05ebb218aaae704e0dc382ae400bc1d5f2670"},
+    {file = "slack_bolt-1.21.3.tar.gz", hash = "sha256:f91ca5dc22da749a42e40a1885b466a4f2af1f65d40c544fccce56e4bb95f1fe"},
 ]
 
 [package.dependencies]
-slack-sdk = ">=3.33.1,<4"
+slack_sdk = ">=3.33.5,<4"
 
 [[package]]
 name = "slack-sdk"
diff --git a/tests/llm/README.md b/tests/llm/README.md
index fe97ffc3..4d33f729 100644
--- a/tests/llm/README.md
+++ b/tests/llm/README.md
@@ -23,12 +23,13 @@ In this folder, add a `test_case.yaml` file:
 
 ```yaml
 user_prompt: 'Is pod xyz healthy? '
-expected_output: "Yes, pod xyz is healthy. It is running and there are no errors in the logs."
+expected_output:
+  - pod xyz is running and healthy
 retrieval_context:
   - Any element of context. This will inform the evaluation score 'context'
   - These context elements are expected to be present in the output
 evaluation: # expected evaluation scores. The test will fail unless the LLM scores at least the following:
-  faithfulness: 0.5 # defaults to 0.3
+  correctness: 0.5 # defaults to 0.3
   context: 0 # defaults to 0
 before-test: kubectl apply -f manifest.yaml
 after-test: kubectl delete -f manifest.yaml
@@ -41,7 +42,7 @@ Here are the possible fields in the `test_case.yaml` yaml file:
 | Field             | Type             | Required/optional | Example value                                                               | Description                                                                                                                                                                                                            |
 |-------------------|------------------|-------------------|-----------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
 | user_prompt       | str              | Required          | Is pod xyz healthy?                                                         | The user prompt                                                                                                                                                                                                        |
-| expected_output   | str              | Required          | Yes, pod xyz is healthy. It is running and there are no errors in the logs. | The expected answer from the LLM                                                                                                                                                                                       |
+| expected_output   | str or List[str] | Required          | Yes, pod xyz is healthy. It is running and there are no errors in the logs. | The expected answer from the LLM. This can be a string or a list of expected elements. If it is a string, the response will be scored with 'faithfulness'. Otherwise it is 'correctness'.                              |
 | retrieval_context | List[str]        | Optional          | - pod xyz is running and healthy - there are no errors in the logs          | Context that the LLM is expected to have used in its answer. If present, this generates a 'context' score proportional to the number of matching context elements found in the LLM's output.                           |
 | evaluation        | Dict[str, float] | Optional          | evaluation: <br/>   faithfulness: 1  <br/>  context: 1  <br/>               | The minimum expected scores. The test will fail unless these are met. Set to 0 for unstable tests.                                                                                                                     |
 | before-test       | str              | Optional          | kubectl apply -f manifest.yaml                                              | A command to run before the LLM evaluation. The CWD for this command is the same folder as the fixture. This step is skipped unless `RUN_LIVE` environment variable is set                                             |
@@ -67,3 +68,4 @@ The test may pass or not based on whether the evaluation scores are high enough.
 | BRAINTRUST_API_KEY | BRAINTRUST_API_KEY=sk-1dh1...swdO02 | The braintrust API key you get from your account. Log in https://www.braintrust.dev -> top right persona logo -> settings -> API keys. |
 | UPLOAD_DATASET     | UPLOAD_DATASET=1                    | Synchronise the dataset from the local machine to braintrust. This is usually safe as datasets are separated by branch name.           |
 | EXPERIMENT_ID      | EXPERIMENT_ID=nicolas_gemini_v1     | Override the experiment name in Braintrust. Helps with identifying and comparing experiments. Must be unique across ALL experiments.   |
+| MODEL              | MODEL=anthropic/claude-3.5          | The model to use for generation.                                                                                                       |
diff --git a/tests/llm/fixtures/test_ask_holmes/01_how_many_pods/kubectl_find_resource.txt b/tests/llm/fixtures/test_ask_holmes/01_how_many_pods/kubectl_find_resource.txt
index fd0a5a8a..90a8ab70 100644
--- a/tests/llm/fixtures/test_ask_holmes/01_how_many_pods/kubectl_find_resource.txt
+++ b/tests/llm/fixtures/test_ask_holmes/01_how_many_pods/kubectl_find_resource.txt
@@ -1,23 +1,27 @@
 {"toolset_name":"kubernetes/core","tool_name":"kubectl_find_resource","match_params":{"kind":"pod","keyword":"ip-172-31-8-128.us-east-2.compute.internal"}}
-stdout:
-default       alertmanager-robusta-kube-prometheus-st-alertmanager-0   2/2     Running                      0                  3d22h   172.31.5.200    ip-172-31-8-128.us-east-2.compute.internal    <none>           <none>            alertmanager=robusta-kube-prometheus-st-alertmanager,app.kubernetes.io/instance=robusta-kube-prometheus-st-alertmanager,app.kubernetes.io/managed-by=prometheus-operator,app.kubernetes.io/name=alertmanager,app.kubernetes.io/version=0.26.0,apps.kubernetes.io/pod-index=0,controller-revision-hash=alertmanager-robusta-kube-prometheus-st-alertmanager-57cd7fb46f,statefulset.kubernetes.io/pod-name=alertmanager-robusta-kube-prometheus-st-alertmanager-0
-default       analytics-exporter-fast-8cf8c9446-6rqwc                  0/1     CrashLoopBackOff             1061 (2m51s ago)   3d18h   172.31.15.122   ip-172-31-8-128.us-east-2.compute.internal    <none>           <none>            app=analytics-exporter-fast,pod-template-hash=8cf8c9446
-default       customer-relations-webapp-5d98ffcfd-nj5gs                0/1     ImagePullBackOff             0                  3d18h   172.31.14.171   ip-172-31-8-128.us-east-2.compute.internal    <none>           <none>            app=customer-relations,pod-template-hash=5d98ffcfd,visualize=true
-default       db-certs-authenticator-7ffd769f48-d9pxl                  0/1     CrashLoopBackOff             886 (69s ago)      3d18h   172.31.3.214    ip-172-31-8-128.us-east-2.compute.internal    <none>           <none>            app=flask,pod-template-hash=7ffd769f48
-default       java-api-checker-9pj7k                                   0/1     Error                        0                  3d18h   172.31.12.200   ip-172-31-8-128.us-east-2.compute.internal    <none>           <none>            batch.kubernetes.io/controller-uid=ea3f2c52-3382-4cbc-8958-41832511a3e7,batch.kubernetes.io/job-name=java-api-checker,controller-uid=ea3f2c52-3382-4cbc-8958-41832511a3e7,job-name=java-api-checker
-default       java-api-checker-vzm7z                                   0/1     Error                        0                  3d18h   172.31.13.205   ip-172-31-8-128.us-east-2.compute.internal    <none>           <none>            batch.kubernetes.io/controller-uid=ea3f2c52-3382-4cbc-8958-41832511a3e7,batch.kubernetes.io/job-name=java-api-checker,controller-uid=ea3f2c52-3382-4cbc-8958-41832511a3e7,job-name=java-api-checker
-default       logging-agent                                            0/1     Init:CrashLoopBackOff        1067 (15s ago)     3d18h   172.31.1.249    ip-172-31-8-128.us-east-2.compute.internal    <none>           <none>            <none>
-default       prometheus-robusta-kube-prometheus-st-prometheus-0       2/2     Running                      0                  3d22h   172.31.11.168   ip-172-31-8-128.us-east-2.compute.internal    <none>           <none>            app.kubernetes.io/instance=robusta-kube-prometheus-st-prometheus,app.kubernetes.io/managed-by=prometheus-operator,app.kubernetes.io/name=prometheus,app.kubernetes.io/version=2.48.1,apps.kubernetes.io/pod-index=0,controller-revision-hash=prometheus-robusta-kube-prometheus-st-prometheus-55d87c869b,operator.prometheus.io/name=robusta-kube-prometheus-st-prometheus,operator.prometheus.io/shard=0,prometheus=robusta-kube-prometheus-st-prometheus,statefulset.kubernetes.io/pod-name=prometheus-robusta-kube-prometheus-st-prometheus-0
-default       robusta-forwarder-89f44d49b-fxtrh                        1/1     Running                      0                  3d22h   172.31.3.106    ip-172-31-8-128.us-east-2.compute.internal    <none>           <none>            app=robusta-forwarder,pod-template-hash=89f44d49b
-default       robusta-kube-prometheus-st-operator-7fc5db7f4d-dr46l     1/1     Running                      0                  3d22h   172.31.6.195    ip-172-31-8-128.us-east-2.compute.internal    <none>           <none>            app.kubernetes.io/instance=robusta,app.kubernetes.io/managed-by=Helm,app.kubernetes.io/part-of=kube-prometheus-stack,app.kubernetes.io/version=55.7.0,app=kube-prometheus-stack-operator,chart=kube-prometheus-stack-55.7.0,heritage=Helm,pod-template-hash=7fc5db7f4d,release=robusta
-default       robusta-prometheus-node-exporter-t2b5k                   1/1     Running                      0                  3d22h   172.31.8.128    ip-172-31-8-128.us-east-2.compute.internal    <none>           <none>            app.kubernetes.io/component=metrics,app.kubernetes.io/instance=robusta,app.kubernetes.io/managed-by=Helm,app.kubernetes.io/name=prometheus-node-exporter,app.kubernetes.io/part-of=prometheus-node-exporter,app.kubernetes.io/version=1.7.0,controller-revision-hash=7bf445876b,helm.sh/chart=prometheus-node-exporter-4.24.0,jobLabel=node-exporter,pod-template-generation=1,release=robusta
-default       search-engine-service                                    0/1     Running                      0                  3d18h   172.31.11.151   ip-172-31-8-128.us-east-2.compute.internal    <none>           <none>            <none>
-kube-system   aws-node-m47xg                                           2/2     Running                      0                  25d     172.31.8.128    ip-172-31-8-128.us-east-2.compute.internal    <none>           <none>            app.kubernetes.io/instance=aws-vpc-cni,app.kubernetes.io/name=aws-node,controller-revision-hash=54f5998898,k8s-app=aws-node,pod-template-generation=1
-kube-system   ebs-csi-controller-7bb676b68d-cs2gx                      6/6     Running                      0                  25d     172.31.12.254   ip-172-31-8-128.us-east-2.compute.internal    <none>           <none>            app.kubernetes.io/component=csi-driver,app.kubernetes.io/managed-by=EKS,app.kubernetes.io/name=aws-ebs-csi-driver,app.kubernetes.io/version=1.35.0,app=ebs-csi-controller,pod-template-hash=7bb676b68d
-kube-system   ebs-csi-node-pgrvq                                       3/3     Running                      0                  25d     172.31.2.194    ip-172-31-8-128.us-east-2.compute.internal    <none>           <none>            app.kubernetes.io/component=csi-driver,app.kubernetes.io/managed-by=EKS,app.kubernetes.io/name=aws-ebs-csi-driver,app.kubernetes.io/version=1.35.0,app=ebs-csi-node,controller-revision-hash=6bc69bc4b9,pod-template-generation=1
-kube-system   eks-pod-identity-agent-vgz8h                             1/1     Running                      0                  25d     172.31.8.128    ip-172-31-8-128.us-east-2.compute.internal    <none>           <none>            app.kubernetes.io/instance=eks-pod-identity-agent,app.kubernetes.io/name=eks-pod-identity-agent,controller-revision-hash=74bcb67854,pod-template-generation=1
-kube-system   kube-proxy-l7vqp                                         1/1     Running                      0                  25d     172.31.8.128    ip-172-31-8-128.us-east-2.compute.internal    <none>           <none>            controller-revision-hash=6b64cc6947,k8s-app=kube-proxy,pod-template-generation=1
-sock-shop     user-5bd96d75fb-ld8xv                                    1/1     Running                      0                  3d18h   172.31.0.106    ip-172-31-8-128.us-east-2.compute.internal    <none>           <none>            name=user,pod-template-hash=5bd96d75fb
-sock-shop     user-db-5dc5c5f488-dw6xw                                 1/1     Running                      0                  3d18h   172.31.0.66     ip-172-31-8-128.us-east-2.compute.internal    <none>           <none>            name=user-db,pod-template-hash=5dc5c5f488
-
-stderr:
+NAME                                                     READY   STATUS                       RESTARTS          AGE   IP              NODE                                          NOMINATED NODE   READINESS GATES
+alertmanager-robusta-kube-prometheus-st-alertmanager-0   2/2     Running                      0                 28h   172.31.5.200    ip-172-31-8-128.us-east-2.compute.internal    <none>           <none>
+analytics-exporter-fast-8cf8c9446-6rqwc                  0/1     CrashLoopBackOff             295 (82s ago)     24h   172.31.15.122   ip-172-31-8-128.us-east-2.compute.internal    <none>           <none>
+customer-orders-57cc4b9565-tltfd                         1/2     CreateContainerConfigError   0                 24h   172.31.23.36    ip-172-31-21-139.us-east-2.compute.internal   <none>           <none>
+customer-relations-webapp-5d98ffcfd-4svkf                0/1     ImagePullBackOff             0                 24h   172.31.45.91    ip-172-31-40-111.us-east-2.compute.internal   <none>           <none>
+customer-relations-webapp-5d98ffcfd-nj5gs                0/1     ImagePullBackOff             0                 24h   172.31.14.171   ip-172-31-8-128.us-east-2.compute.internal    <none>           <none>
+customer-relations-webapp-5d98ffcfd-nxggs                0/1     ImagePullBackOff             0                 24h   172.31.23.28    ip-172-31-21-139.us-east-2.compute.internal   <none>           <none>
+db-certs-authenticator-7ffd769f48-d9pxl                  0/1     CrashLoopBackOff             246 (60s ago)     24h   172.31.3.214    ip-172-31-8-128.us-east-2.compute.internal    <none>           <none>
+java-api-checker-9pj7k                                   0/1     Error                        0                 24h   172.31.12.200   ip-172-31-8-128.us-east-2.compute.internal    <none>           <none>
+java-api-checker-vzm7z                                   0/1     Error                        0                 24h   172.31.13.205   ip-172-31-8-128.us-east-2.compute.internal    <none>           <none>
+logging-agent                                            0/1     Init:CrashLoopBackOff        296 (3m43s ago)   24h   172.31.1.249    ip-172-31-8-128.us-east-2.compute.internal    <none>           <none>
+order-processor                                          1/1     Running                      258 (61m ago)     15d   172.31.23.45    ip-172-31-21-139.us-east-2.compute.internal   <none>           <none>
+payment-processing-worker-74754cf949-k8jsd               0/1     CrashLoopBackOff             296 (3m38s ago)   24h   172.31.37.156   ip-172-31-40-111.us-east-2.compute.internal   <none>           <none>
+prometheus-robusta-kube-prometheus-st-prometheus-0       2/2     Running                      0                 28h   172.31.11.168   ip-172-31-8-128.us-east-2.compute.internal    <none>           <none>
+robusta-forwarder-89f44d49b-fxtrh                        1/1     Running                      0                 28h   172.31.3.106    ip-172-31-8-128.us-east-2.compute.internal    <none>           <none>
+robusta-grafana-597597b88-r8cql                          3/3     Running                      0                 28h   172.31.38.193   ip-172-31-40-111.us-east-2.compute.internal   <none>           <none>
+robusta-holmes-7869fc74bc-bmf2m                          1/1     Running                      0                 28h   172.31.21.29    ip-172-31-21-139.us-east-2.compute.internal   <none>           <none>
+robusta-kube-prometheus-st-operator-7fc5db7f4d-dr46l     1/1     Running                      0                 28h   172.31.6.195    ip-172-31-8-128.us-east-2.compute.internal    <none>           <none>
+robusta-kube-state-metrics-7964495ff6-77m58              1/1     Running                      0                 28h   172.31.40.222   ip-172-31-40-111.us-east-2.compute.internal   <none>           <none>
+robusta-prometheus-node-exporter-7rqmn                   1/1     Running                      0                 28h   172.31.21.139   ip-172-31-21-139.us-east-2.compute.internal   <none>           <none>
+robusta-prometheus-node-exporter-dsd4f                   1/1     Running                      0                 28h   172.31.40.111   ip-172-31-40-111.us-east-2.compute.internal   <none>           <none>
+robusta-prometheus-node-exporter-t2b5k                   1/1     Running                      0                 28h   172.31.8.128    ip-172-31-8-128.us-east-2.compute.internal    <none>           <none>
+robusta-runner-6958c5bdd8-69gtn                          1/1     Running                      0                 28h   172.31.25.172   ip-172-31-21-139.us-east-2.compute.internal   <none>           <none>
+search-engine-service                                    0/1     Running                      0                 24h   172.31.11.151   ip-172-31-8-128.us-east-2.compute.internal    <none>           <none>
+user-profile-import                                      0/1     Pending                      0                 24h   <none>          <none>                                        <none>           <none>
+user-profile-resources-685bfd9c86-sbcv6                  0/1     Pending                      0                 24h   <none>          <none>                                        <none>           <none>
diff --git a/tests/llm/fixtures/test_ask_holmes/01_how_many_pods/kubectl_find_resource_pod_by_keyword.txt b/tests/llm/fixtures/test_ask_holmes/01_how_many_pods/kubectl_find_resource_pod_by_keyword.txt
index fd0a5a8a..90a8ab70 100644
--- a/tests/llm/fixtures/test_ask_holmes/01_how_many_pods/kubectl_find_resource_pod_by_keyword.txt
+++ b/tests/llm/fixtures/test_ask_holmes/01_how_many_pods/kubectl_find_resource_pod_by_keyword.txt
@@ -1,23 +1,27 @@
 {"toolset_name":"kubernetes/core","tool_name":"kubectl_find_resource","match_params":{"kind":"pod","keyword":"ip-172-31-8-128.us-east-2.compute.internal"}}
-stdout:
-default       alertmanager-robusta-kube-prometheus-st-alertmanager-0   2/2     Running                      0                  3d22h   172.31.5.200    ip-172-31-8-128.us-east-2.compute.internal    <none>           <none>            alertmanager=robusta-kube-prometheus-st-alertmanager,app.kubernetes.io/instance=robusta-kube-prometheus-st-alertmanager,app.kubernetes.io/managed-by=prometheus-operator,app.kubernetes.io/name=alertmanager,app.kubernetes.io/version=0.26.0,apps.kubernetes.io/pod-index=0,controller-revision-hash=alertmanager-robusta-kube-prometheus-st-alertmanager-57cd7fb46f,statefulset.kubernetes.io/pod-name=alertmanager-robusta-kube-prometheus-st-alertmanager-0
-default       analytics-exporter-fast-8cf8c9446-6rqwc                  0/1     CrashLoopBackOff             1061 (2m51s ago)   3d18h   172.31.15.122   ip-172-31-8-128.us-east-2.compute.internal    <none>           <none>            app=analytics-exporter-fast,pod-template-hash=8cf8c9446
-default       customer-relations-webapp-5d98ffcfd-nj5gs                0/1     ImagePullBackOff             0                  3d18h   172.31.14.171   ip-172-31-8-128.us-east-2.compute.internal    <none>           <none>            app=customer-relations,pod-template-hash=5d98ffcfd,visualize=true
-default       db-certs-authenticator-7ffd769f48-d9pxl                  0/1     CrashLoopBackOff             886 (69s ago)      3d18h   172.31.3.214    ip-172-31-8-128.us-east-2.compute.internal    <none>           <none>            app=flask,pod-template-hash=7ffd769f48
-default       java-api-checker-9pj7k                                   0/1     Error                        0                  3d18h   172.31.12.200   ip-172-31-8-128.us-east-2.compute.internal    <none>           <none>            batch.kubernetes.io/controller-uid=ea3f2c52-3382-4cbc-8958-41832511a3e7,batch.kubernetes.io/job-name=java-api-checker,controller-uid=ea3f2c52-3382-4cbc-8958-41832511a3e7,job-name=java-api-checker
-default       java-api-checker-vzm7z                                   0/1     Error                        0                  3d18h   172.31.13.205   ip-172-31-8-128.us-east-2.compute.internal    <none>           <none>            batch.kubernetes.io/controller-uid=ea3f2c52-3382-4cbc-8958-41832511a3e7,batch.kubernetes.io/job-name=java-api-checker,controller-uid=ea3f2c52-3382-4cbc-8958-41832511a3e7,job-name=java-api-checker
-default       logging-agent                                            0/1     Init:CrashLoopBackOff        1067 (15s ago)     3d18h   172.31.1.249    ip-172-31-8-128.us-east-2.compute.internal    <none>           <none>            <none>
-default       prometheus-robusta-kube-prometheus-st-prometheus-0       2/2     Running                      0                  3d22h   172.31.11.168   ip-172-31-8-128.us-east-2.compute.internal    <none>           <none>            app.kubernetes.io/instance=robusta-kube-prometheus-st-prometheus,app.kubernetes.io/managed-by=prometheus-operator,app.kubernetes.io/name=prometheus,app.kubernetes.io/version=2.48.1,apps.kubernetes.io/pod-index=0,controller-revision-hash=prometheus-robusta-kube-prometheus-st-prometheus-55d87c869b,operator.prometheus.io/name=robusta-kube-prometheus-st-prometheus,operator.prometheus.io/shard=0,prometheus=robusta-kube-prometheus-st-prometheus,statefulset.kubernetes.io/pod-name=prometheus-robusta-kube-prometheus-st-prometheus-0
-default       robusta-forwarder-89f44d49b-fxtrh                        1/1     Running                      0                  3d22h   172.31.3.106    ip-172-31-8-128.us-east-2.compute.internal    <none>           <none>            app=robusta-forwarder,pod-template-hash=89f44d49b
-default       robusta-kube-prometheus-st-operator-7fc5db7f4d-dr46l     1/1     Running                      0                  3d22h   172.31.6.195    ip-172-31-8-128.us-east-2.compute.internal    <none>           <none>            app.kubernetes.io/instance=robusta,app.kubernetes.io/managed-by=Helm,app.kubernetes.io/part-of=kube-prometheus-stack,app.kubernetes.io/version=55.7.0,app=kube-prometheus-stack-operator,chart=kube-prometheus-stack-55.7.0,heritage=Helm,pod-template-hash=7fc5db7f4d,release=robusta
-default       robusta-prometheus-node-exporter-t2b5k                   1/1     Running                      0                  3d22h   172.31.8.128    ip-172-31-8-128.us-east-2.compute.internal    <none>           <none>            app.kubernetes.io/component=metrics,app.kubernetes.io/instance=robusta,app.kubernetes.io/managed-by=Helm,app.kubernetes.io/name=prometheus-node-exporter,app.kubernetes.io/part-of=prometheus-node-exporter,app.kubernetes.io/version=1.7.0,controller-revision-hash=7bf445876b,helm.sh/chart=prometheus-node-exporter-4.24.0,jobLabel=node-exporter,pod-template-generation=1,release=robusta
-default       search-engine-service                                    0/1     Running                      0                  3d18h   172.31.11.151   ip-172-31-8-128.us-east-2.compute.internal    <none>           <none>            <none>
-kube-system   aws-node-m47xg                                           2/2     Running                      0                  25d     172.31.8.128    ip-172-31-8-128.us-east-2.compute.internal    <none>           <none>            app.kubernetes.io/instance=aws-vpc-cni,app.kubernetes.io/name=aws-node,controller-revision-hash=54f5998898,k8s-app=aws-node,pod-template-generation=1
-kube-system   ebs-csi-controller-7bb676b68d-cs2gx                      6/6     Running                      0                  25d     172.31.12.254   ip-172-31-8-128.us-east-2.compute.internal    <none>           <none>            app.kubernetes.io/component=csi-driver,app.kubernetes.io/managed-by=EKS,app.kubernetes.io/name=aws-ebs-csi-driver,app.kubernetes.io/version=1.35.0,app=ebs-csi-controller,pod-template-hash=7bb676b68d
-kube-system   ebs-csi-node-pgrvq                                       3/3     Running                      0                  25d     172.31.2.194    ip-172-31-8-128.us-east-2.compute.internal    <none>           <none>            app.kubernetes.io/component=csi-driver,app.kubernetes.io/managed-by=EKS,app.kubernetes.io/name=aws-ebs-csi-driver,app.kubernetes.io/version=1.35.0,app=ebs-csi-node,controller-revision-hash=6bc69bc4b9,pod-template-generation=1
-kube-system   eks-pod-identity-agent-vgz8h                             1/1     Running                      0                  25d     172.31.8.128    ip-172-31-8-128.us-east-2.compute.internal    <none>           <none>            app.kubernetes.io/instance=eks-pod-identity-agent,app.kubernetes.io/name=eks-pod-identity-agent,controller-revision-hash=74bcb67854,pod-template-generation=1
-kube-system   kube-proxy-l7vqp                                         1/1     Running                      0                  25d     172.31.8.128    ip-172-31-8-128.us-east-2.compute.internal    <none>           <none>            controller-revision-hash=6b64cc6947,k8s-app=kube-proxy,pod-template-generation=1
-sock-shop     user-5bd96d75fb-ld8xv                                    1/1     Running                      0                  3d18h   172.31.0.106    ip-172-31-8-128.us-east-2.compute.internal    <none>           <none>            name=user,pod-template-hash=5bd96d75fb
-sock-shop     user-db-5dc5c5f488-dw6xw                                 1/1     Running                      0                  3d18h   172.31.0.66     ip-172-31-8-128.us-east-2.compute.internal    <none>           <none>            name=user-db,pod-template-hash=5dc5c5f488
-
-stderr:
+NAME                                                     READY   STATUS                       RESTARTS          AGE   IP              NODE                                          NOMINATED NODE   READINESS GATES
+alertmanager-robusta-kube-prometheus-st-alertmanager-0   2/2     Running                      0                 28h   172.31.5.200    ip-172-31-8-128.us-east-2.compute.internal    <none>           <none>
+analytics-exporter-fast-8cf8c9446-6rqwc                  0/1     CrashLoopBackOff             295 (82s ago)     24h   172.31.15.122   ip-172-31-8-128.us-east-2.compute.internal    <none>           <none>
+customer-orders-57cc4b9565-tltfd                         1/2     CreateContainerConfigError   0                 24h   172.31.23.36    ip-172-31-21-139.us-east-2.compute.internal   <none>           <none>
+customer-relations-webapp-5d98ffcfd-4svkf                0/1     ImagePullBackOff             0                 24h   172.31.45.91    ip-172-31-40-111.us-east-2.compute.internal   <none>           <none>
+customer-relations-webapp-5d98ffcfd-nj5gs                0/1     ImagePullBackOff             0                 24h   172.31.14.171   ip-172-31-8-128.us-east-2.compute.internal    <none>           <none>
+customer-relations-webapp-5d98ffcfd-nxggs                0/1     ImagePullBackOff             0                 24h   172.31.23.28    ip-172-31-21-139.us-east-2.compute.internal   <none>           <none>
+db-certs-authenticator-7ffd769f48-d9pxl                  0/1     CrashLoopBackOff             246 (60s ago)     24h   172.31.3.214    ip-172-31-8-128.us-east-2.compute.internal    <none>           <none>
+java-api-checker-9pj7k                                   0/1     Error                        0                 24h   172.31.12.200   ip-172-31-8-128.us-east-2.compute.internal    <none>           <none>
+java-api-checker-vzm7z                                   0/1     Error                        0                 24h   172.31.13.205   ip-172-31-8-128.us-east-2.compute.internal    <none>           <none>
+logging-agent                                            0/1     Init:CrashLoopBackOff        296 (3m43s ago)   24h   172.31.1.249    ip-172-31-8-128.us-east-2.compute.internal    <none>           <none>
+order-processor                                          1/1     Running                      258 (61m ago)     15d   172.31.23.45    ip-172-31-21-139.us-east-2.compute.internal   <none>           <none>
+payment-processing-worker-74754cf949-k8jsd               0/1     CrashLoopBackOff             296 (3m38s ago)   24h   172.31.37.156   ip-172-31-40-111.us-east-2.compute.internal   <none>           <none>
+prometheus-robusta-kube-prometheus-st-prometheus-0       2/2     Running                      0                 28h   172.31.11.168   ip-172-31-8-128.us-east-2.compute.internal    <none>           <none>
+robusta-forwarder-89f44d49b-fxtrh                        1/1     Running                      0                 28h   172.31.3.106    ip-172-31-8-128.us-east-2.compute.internal    <none>           <none>
+robusta-grafana-597597b88-r8cql                          3/3     Running                      0                 28h   172.31.38.193   ip-172-31-40-111.us-east-2.compute.internal   <none>           <none>
+robusta-holmes-7869fc74bc-bmf2m                          1/1     Running                      0                 28h   172.31.21.29    ip-172-31-21-139.us-east-2.compute.internal   <none>           <none>
+robusta-kube-prometheus-st-operator-7fc5db7f4d-dr46l     1/1     Running                      0                 28h   172.31.6.195    ip-172-31-8-128.us-east-2.compute.internal    <none>           <none>
+robusta-kube-state-metrics-7964495ff6-77m58              1/1     Running                      0                 28h   172.31.40.222   ip-172-31-40-111.us-east-2.compute.internal   <none>           <none>
+robusta-prometheus-node-exporter-7rqmn                   1/1     Running                      0                 28h   172.31.21.139   ip-172-31-21-139.us-east-2.compute.internal   <none>           <none>
+robusta-prometheus-node-exporter-dsd4f                   1/1     Running                      0                 28h   172.31.40.111   ip-172-31-40-111.us-east-2.compute.internal   <none>           <none>
+robusta-prometheus-node-exporter-t2b5k                   1/1     Running                      0                 28h   172.31.8.128    ip-172-31-8-128.us-east-2.compute.internal    <none>           <none>
+robusta-runner-6958c5bdd8-69gtn                          1/1     Running                      0                 28h   172.31.25.172   ip-172-31-21-139.us-east-2.compute.internal   <none>           <none>
+search-engine-service                                    0/1     Running                      0                 24h   172.31.11.151   ip-172-31-8-128.us-east-2.compute.internal    <none>           <none>
+user-profile-import                                      0/1     Pending                      0                 24h   <none>          <none>                                        <none>           <none>
+user-profile-resources-685bfd9c86-sbcv6                  0/1     Pending                      0                 24h   <none>          <none>                                        <none>           <none>
diff --git a/tests/llm/fixtures/test_ask_holmes/01_how_many_pods/test_case.yaml b/tests/llm/fixtures/test_ask_holmes/01_how_many_pods/test_case.yaml
index 2ac5ca6a..98d58260 100644
--- a/tests/llm/fixtures/test_ask_holmes/01_how_many_pods/test_case.yaml
+++ b/tests/llm/fixtures/test_ask_holmes/01_how_many_pods/test_case.yaml
@@ -1,10 +1,12 @@
 user_prompt: 'How many pods do I have running on << { "type": "node", "name": "ip-172-31-8-128.us-east-2.compute.internal" } >> ?'
 #user_prompt: "How many pods do I have running on node ip-172-31-8-128.us-east-2.compute.internal?"
-expected_output: "5 pods are running on ip-172-31-8-128.us-east-2.compute.internal"
+expected_output:
+  - 6 pods are in a running state on node ip-172-31-8-128.us-east-2.compute.internal
 retrieval_context:
   - There is a total of 12 pods on node ip-172-31-8-128.us-east-2.compute.internal
-  - There are 5 pods in running state
-  - 7 pods are not running as indicated by the STATUS column
+  - There are 6 pods in running state
+  - One of the pod is in running state but has a container in the process of starting or restarting
+  - 6 pods are not running as indicated by the STATUS column
 evaluation:
   answer_relevancy: 0
   faithfulness: 0
diff --git a/tests/llm/fixtures/test_ask_holmes/02_what_is_wrong_with_pod/test_case.yaml b/tests/llm/fixtures/test_ask_holmes/02_what_is_wrong_with_pod/test_case.yaml
index 8933b508..3dbe5b72 100644
--- a/tests/llm/fixtures/test_ask_holmes/02_what_is_wrong_with_pod/test_case.yaml
+++ b/tests/llm/fixtures/test_ask_holmes/02_what_is_wrong_with_pod/test_case.yaml
@@ -1,5 +1,7 @@
 user_prompt: 'What is wrong with << { "type": "pod", "name": "robusta-runner-6958c5bdd8-69gtn" } >> ?'
-expected_output: "The pod is currently running and healthy but has recently been killed due to it being out of memory. Increase the memory available to this pod."
+expected_output:
+  - The pod was recently killed due to it being out of memory
+  - The pod is currently running and healthy
 retrieval_context:
   - |
     Last State:   Terminate
diff --git a/tests/llm/fixtures/test_ask_holmes/03_what_is_the_command_to_port_forward/test_case.yaml b/tests/llm/fixtures/test_ask_holmes/03_what_is_the_command_to_port_forward/test_case.yaml
index 86c57bae..f30ba0ba 100644
--- a/tests/llm/fixtures/test_ask_holmes/03_what_is_the_command_to_port_forward/test_case.yaml
+++ b/tests/llm/fixtures/test_ask_holmes/03_what_is_the_command_to_port_forward/test_case.yaml
@@ -1,13 +1,7 @@
 user_prompt: 'what is the command to port-forward to << { "type": "pod", "name": "my_grafana_4j981" } >>'
 #user_prompt: "what is the command to port-forward to my grafana service?"
-expected_output: |
-  To port-forward to the pod `my_grafana_4j981`, use the following command:
-
-  ```bash
-  kubectl port-forward pod/my_grafana_4j981 3000:3000 -n default
-  ```
-
-  This command forwards port 3000 on your local machine to port 3000 on the pod.
+expected_output:
+  - kubectl port-forward pod/my_grafana_4j981 3000:3000 -n default
 retrieval_context:
   - "The grafana service is running but does not have an external IP address"
   - "The name of the grafana service is my_grafana_4j981"
diff --git a/tests/llm/fixtures/test_ask_holmes/04_related_k8s_events/test_case.yaml b/tests/llm/fixtures/test_ask_holmes/04_related_k8s_events/test_case.yaml
index 8e47d997..4de7ca25 100644
--- a/tests/llm/fixtures/test_ask_holmes/04_related_k8s_events/test_case.yaml
+++ b/tests/llm/fixtures/test_ask_holmes/04_related_k8s_events/test_case.yaml
@@ -1,13 +1,11 @@
 user_prompt: 'What kubernetes events are related to << { "type": "pod", "name": "nginx-6958c5bdd8-69gtn" } >>'
 #user_prompt: "What kubernetes events are related to the nginx pod >>"
-expected_output: |
-  Kubernetes events related to the `nginx-6958c5bdd8-69gtn` pod include:
-
-  1. **Scheduled**: Successfully assigned to node `aks-nodepool1-26081864-vmss000004`.
-  2. **Pulling**: Pulling image "nginx".
-  3. **Pulled**: Successfully pulled image "nginx".
-  4. **Created**: Created container `nginxreplica`.
-  5. **Started**: Started container `nginxreplica`.
+expected_output:
+  - "Scheduled: Successfully assigned to node `aks-nodepool1-26081864-vmss000004`"
+  - "Pulling: Pulling image `nginx`"
+  - "Pulled: Successfully pulled image `nginx`"
+  - "Created: Created container `nginxreplica`"
+  - "Started: Started container `nginxreplica`"
 retrieval_context:
   - |
     Events:
@@ -21,6 +19,3 @@ retrieval_context:
 evaluation:
   answer_relevancy: 0
   faithfulness: 0
-  contextual_precision: 0
-  contextual_recall: 0
-  contextual_relevancy: 0
diff --git a/tests/llm/fixtures/test_ask_holmes/05_image_version/test_case.yaml b/tests/llm/fixtures/test_ask_holmes/05_image_version/test_case.yaml
index 1af50eb8..783551b4 100644
--- a/tests/llm/fixtures/test_ask_holmes/05_image_version/test_case.yaml
+++ b/tests/llm/fixtures/test_ask_holmes/05_image_version/test_case.yaml
@@ -1,5 +1,6 @@
 user_prompt: 'what is the image version of << { "type": "pod", "name": "robusta-runner" } >>'
 #user_prompt: "what is the image version of my robusta-runner pod?"
-expected_output: "The image version of the `robusta-runner` pod is `0.18.0`."
+expected_output:
+  - The image version of the `robusta-runner` pod is `0.18.0`.
 retrieval_context:
   - "image: robustadev/robusta-runner:0.18.0"
diff --git a/tests/llm/fixtures/test_ask_holmes/06_explain_issue/test_case.yaml b/tests/llm/fixtures/test_ask_holmes/06_explain_issue/test_case.yaml
index abc8486a..e2f955c8 100644
--- a/tests/llm/fixtures/test_ask_holmes/06_explain_issue/test_case.yaml
+++ b/tests/llm/fixtures/test_ask_holmes/06_explain_issue/test_case.yaml
@@ -1,12 +1,11 @@
 user_prompt: 'Explain << { "type": "issue", "id": "asodfkq1209edyhqawdo2uydqawidh" } >>'
-expected_output: "The finding indicates that the pod `my-cluster-kafka-0` in the `kafka` namespace was evicted due to low ephemeral storage on the node. The threshold was 2126213140, but only 1884540Ki was available. The `kafka` container was using 3264Ki of ephemeral storage, with no specific request set, leading to higher consumption. This resulted in a warning and the container being stopped. The issue occurred in the `payments-kafka-eu-eks-v2` cluster."
+expected_output:
+  - The pod `my-cluster-kafka-0` in the `kafka` namespace was evicted due to low ephemeral storage on the node
+  - The threshold was 2126213140, but only 1884540Ki was available
+  - The `kafka` container was using 3264Ki of ephemeral storage
 retrieval_context:
   - "Pod my-cluster-kafka was evicted"
   - "The node was low on resource: ephemeral-storage. Threshold quantity: 2126213140, available: 1884540Ki. Container kafka was using 3264Ki, request is 0, has larger consumption of ephemeral-storage."
-
 evaluation:
   answer_relevancy: 0
   faithfulness: 0
-  contextual_precision: 0
-  contextual_recall: 0
-  contextual_relevancy: 0
diff --git a/tests/llm/fixtures/test_ask_holmes/07_high_latency/test_case.yaml b/tests/llm/fixtures/test_ask_holmes/07_high_latency/test_case.yaml
index b6e5cd97..fcd525de 100644
--- a/tests/llm/fixtures/test_ask_holmes/07_high_latency/test_case.yaml
+++ b/tests/llm/fixtures/test_ask_holmes/07_high_latency/test_case.yaml
@@ -1,7 +1,6 @@
 user_prompt: "Why is there high latency with the customer-orders deployment?"
-expected_output: |
-  High latency in the `customer-orders` deployment is due to slow database calls.
-  The logs show that database operations, specifically fetching data using a stored procedure, are taking between 5 to 10 seconds to complete.
-  This delay directly impacts the page rendering time, causing high latency in the application.
+expected_output:
+  - High latency in the `customer-orders` deployment is due to slow database calls
+  - Database operations are taking between 5 to 10 seconds to complete
 before_test: kubectl apply -f ./helm/manifest.yaml
 after_test: kubectl delete -f ./helm/manifest.yaml
diff --git a/tests/llm/fixtures/test_ask_holmes/08_sock_shop_frontend/test_case.yaml b/tests/llm/fixtures/test_ask_holmes/08_sock_shop_frontend/test_case.yaml
index 80995c6a..4eaf2dd6 100644
--- a/tests/llm/fixtures/test_ask_holmes/08_sock_shop_frontend/test_case.yaml
+++ b/tests/llm/fixtures/test_ask_holmes/08_sock_shop_frontend/test_case.yaml
@@ -1,12 +1,7 @@
 user_prompt: "Why did the sock-shop front-end stop working?"
-expected_output: |
-  The sock-shop front-end stopped working due to an internal server error related to MongoDB. The error message indicates that the MongoDB client driver may require an upgrade due to an unsupported OP_QUERY command. This results in a failure to process orders, causing a crash in the front-end application.
-
-  To resolve this issue, consider upgrading the MongoDB client driver to a version that supports the current MongoDB server's command set.
-
-  The sock-shop front-end stopped working due to an internal server error caused by an outdated MongoDB client driver. The error message indicates that the `find` command is unsupported, suggesting the client driver needs an upgrade. This led to a failure in processing orders, resulting in a crash with a `TypeError` when trying to access `customerOrders`.
-
-  To resolve this, update the MongoDB client driver to a version that supports the current MongoDB server's commands.
+expected_output:
+  - The sock-shop front-end stopped working due to an internal server error related to MongoDB.
+  - The MongoDB client driver may require an upgrade
 retrieval_context:
   - "The sock-shop front-end crashed"
   - "The mongodb client driver may require an upgrade"
diff --git a/tests/llm/fixtures/test_ask_holmes/09_crashpod/test_case.yaml b/tests/llm/fixtures/test_ask_holmes/09_crashpod/test_case.yaml
index 88d55457..52f2add3 100644
--- a/tests/llm/fixtures/test_ask_holmes/09_crashpod/test_case.yaml
+++ b/tests/llm/fixtures/test_ask_holmes/09_crashpod/test_case.yaml
@@ -1,7 +1,6 @@
 user_prompt: "What is the issue with payment-processing-worker?"
-expected_output: |
-  The `payment-processing-worker` pod is crashing due to an undefined environment variable `DEPLOY_ENV`. The container is set to check for this variable, and if it's not defined, it exits immediately, causing a CrashLoopBackOff state.
-  To resolve this, define the `DEPLOY_ENV` environment variable in the pod's configuration.
+expected_output:
+  - The `DEPLOY_ENV` environment variable is undefined or missing
 retrieval_context:
   - "logs from payment-processing-worker mention: Environment variable DEPLOY_ENV is undefined"
 # before_test: |
diff --git a/tests/llm/fixtures/test_ask_holmes/10_image_pull_backoff/test_case.yaml b/tests/llm/fixtures/test_ask_holmes/10_image_pull_backoff/test_case.yaml
index 460e26b6..a3566f18 100644
--- a/tests/llm/fixtures/test_ask_holmes/10_image_pull_backoff/test_case.yaml
+++ b/tests/llm/fixtures/test_ask_holmes/10_image_pull_backoff/test_case.yaml
@@ -1,14 +1,8 @@
 user_prompt: What is the issue with customer-relations-webapp?
-expected_output: |
-  The `customer-relations-webapp` pods are failing to start due to an `ImagePullBackOff` error. The issue is caused by the inability to pull the Docker image `yourcompany/crw:latest`. The error message indicates that the image pull failed because the repository does not exist or may require authorization.
-
-  Possible solutions:
-  1. Verify the image name and tag are correct.
-  2. Ensure the image is available in the specified repository.
-  3. Check if authentication credentials are needed to access the repository and configure them in Kubernetes if necessary.
-
-# before_test: |
-#   kubectl apply -f https://raw.githubusercontent.com/robusta-dev/kubernetes-demos/main/image_pull_backoff/no_such_image.yaml
-#   sleep 20
-# after_test: |
-#   kubectl delete -f https://raw.githubusercontent.com/robusta-dev/kubernetes-demos/main/image_pull_backoff/no_such_image.yaml
+expected_output:
+  - The Docker image `yourcompany/crw:latest` cannot be pulled
+before_test: |
+  kubectl apply -f https://raw.githubusercontent.com/robusta-dev/kubernetes-demos/main/image_pull_backoff/no_such_image.yaml
+  sleep 20
+after_test: |
+  kubectl delete -f https://raw.githubusercontent.com/robusta-dev/kubernetes-demos/main/image_pull_backoff/no_such_image.yaml
diff --git a/tests/llm/fixtures/test_ask_holmes/11_init_containers/test_case.yaml b/tests/llm/fixtures/test_ask_holmes/11_init_containers/test_case.yaml
index e3c3ca52..51d667bd 100644
--- a/tests/llm/fixtures/test_ask_holmes/11_init_containers/test_case.yaml
+++ b/tests/llm/fixtures/test_ask_holmes/11_init_containers/test_case.yaml
@@ -1,10 +1,7 @@
 user_prompt: "What is the issue with logging-agent?"
-expected_output: |
-  The `logging-agent` pod is failing to initialize due to an error in the init container `downloader`.
-  The command `wge` is not found, likely a typo for `wget`.
-  This causes the container to fail with a `RunContainerError`.
-  Correct the command in the init container to use `wget` instead of `wge`.
-
+expected_output:
+  - The command `wge` is not found
+  - The answer should suggets `wge` may be a typo for `wget`.
 # before_test: |
 #   kubectl apply -f https://raw.githubusercontent.com/robusta-dev/kubernetes-demos/main/init_crashloop_backoff/create_init_crashloop_backoff.yaml
 #   sleep 40
diff --git a/tests/llm/fixtures/test_ask_holmes/12_job_crashing/test_case.yaml b/tests/llm/fixtures/test_ask_holmes/12_job_crashing/test_case.yaml
index d17d33d7..242d99b9 100644
--- a/tests/llm/fixtures/test_ask_holmes/12_job_crashing/test_case.yaml
+++ b/tests/llm/fixtures/test_ask_holmes/12_job_crashing/test_case.yaml
@@ -1,11 +1,6 @@
 user_prompt: "What is the issue with job java-api-checker"
-expected_output: |
-  The `java-api-checker` job is experiencing a network exception.
-  It repeatedly fails to connect to the database at `prod-db:3333`,
-  with the error indicating that no available connection exists and the
-  queue has reached its maximum size of 256.
-  This suggests a potential issue with the database being unreachable or overloaded.
-
+expected_output:
+  - The `java-api-checker` job repeatedly fails to connect to the database at `prod-db:3333`
 # before_test: |
 #   kubectl delete -f https://raw.githubusercontent.com/robusta-dev/kubernetes-demos/main/job_failure/job_crash.yaml || true
 #   kubectl apply -f https://raw.githubusercontent.com/robusta-dev/kubernetes-demos/main/job_failure/job_crash.yaml
diff --git a/tests/llm/fixtures/test_ask_holmes/13_pending_node_selector/test_case.yaml b/tests/llm/fixtures/test_ask_holmes/13_pending_node_selector/test_case.yaml
index 3d8e51b2..9330d190 100644
--- a/tests/llm/fixtures/test_ask_holmes/13_pending_node_selector/test_case.yaml
+++ b/tests/llm/fixtures/test_ask_holmes/13_pending_node_selector/test_case.yaml
@@ -1,9 +1,7 @@
 user_prompt: "What is the issue with user-profile-import"
-expected_output: |
-  The `user-profile-import` pod is pending because it has a node selector that doesn't match any available nodes.
-  The node selector is set to `label=someLabel`, but no nodes in the cluster have this label.
-  To resolve this, either update the node selector to match an existing node label or add the required label to a node.
-
+expected_output:
+  - The node selector that doesn't match any available nodes.
+  - The node selector is set to `label=someLabel`, but no nodes in the cluster have this label.
 # before_test: |
 #   kubectl apply -f https://raw.githubusercontent.com/robusta-dev/kubernetes-demos/main/pending_pods/pending_pod_node_selector.yaml
 #   sleep 30
diff --git a/tests/llm/fixtures/test_ask_holmes/14_pending_resources/test_case.yaml b/tests/llm/fixtures/test_ask_holmes/14_pending_resources/test_case.yaml
index 2f6f41fb..098487bf 100644
--- a/tests/llm/fixtures/test_ask_holmes/14_pending_resources/test_case.yaml
+++ b/tests/llm/fixtures/test_ask_holmes/14_pending_resources/test_case.yaml
@@ -1,8 +1,6 @@
 user_prompt: "What is the issue with user-profile-resources"
-expected_output: |
-  The `user-profile-resources` deployment is not running due to insufficient GPU resources.
-  The pod `user-profile-resources-659d4dd659-d2tlj` is pending because there are no available nodes with the required 5 GPUs.
-  Adjust the GPU resource requests or ensure nodes with sufficient GPUs are available.
+expected_output:
+  - This must mention insufficient GPU resources
 before_test: |
   kubectl apply -f https://raw.githubusercontent.com/robusta-dev/kubernetes-demos/main/pending_pods/pending_pod_resources.yaml
   sleep 30
diff --git a/tests/llm/fixtures/test_ask_holmes/15_failed_readiness_probe/test_case.yaml b/tests/llm/fixtures/test_ask_holmes/15_failed_readiness_probe/test_case.yaml
index bf2b7661..811c3661 100644
--- a/tests/llm/fixtures/test_ask_holmes/15_failed_readiness_probe/test_case.yaml
+++ b/tests/llm/fixtures/test_ask_holmes/15_failed_readiness_probe/test_case.yaml
@@ -1,11 +1,9 @@
 user_prompt: "What is the issue with search-engine-service"
-expected_output: |
-  The `search-engine-service` pod is running but not ready due to a failing readiness probe.
-  The readiness probe is set to execute `sh -c exit 1`, which always fails, causing the pod to be marked as not ready.
-  Adjust the readiness probe to a command that accurately reflects the pod's readiness state.
-
-# before_test: |
-#   kubectl apply -f https://raw.githubusercontent.com/robusta-dev/kubernetes-demos/main/readiness_probe_fail/failing_readiness_probe.yaml
-#   sleep 30
-# after_test: |
-#   kubectl delete -f https://raw.githubusercontent.com/robusta-dev/kubernetes-demos/main/readiness_probe_fail/failing_readiness_probe.yaml
+expected_output:
+  - The `search-engine-service` pod is running but not ready due to a failing readiness probe
+  - The readiness probe fails
+before_test: |
+  kubectl apply -f https://raw.githubusercontent.com/robusta-dev/kubernetes-demos/main/readiness_probe_fail/failing_readiness_probe.yaml
+  sleep 30
+after_test: |
+  kubectl delete -f https://raw.githubusercontent.com/robusta-dev/kubernetes-demos/main/readiness_probe_fail/failing_readiness_probe.yaml
diff --git a/tests/llm/fixtures/test_ask_holmes/16_failed_no_toolset_found/test_case.yaml b/tests/llm/fixtures/test_ask_holmes/16_failed_no_toolset_found/test_case.yaml
index 1b7c314d..33539024 100644
--- a/tests/llm/fixtures/test_ask_holmes/16_failed_no_toolset_found/test_case.yaml
+++ b/tests/llm/fixtures/test_ask_holmes/16_failed_no_toolset_found/test_case.yaml
@@ -1,4 +1,3 @@
 user_prompt: "Why is my Digital ocean droplet not working fine"
-expected_output: |
-  I don't have access to Digital Ocean droplets.
-  You can add a Holmes Digital Ocean integration which will give me access yo your droplets.
+expected_output:
+  - The answer should prompt the user to add a Holmes Digital Ocean integration to access droplets
diff --git a/tests/llm/fixtures/test_ask_holmes/17_oom_kill/test_case.yaml b/tests/llm/fixtures/test_ask_holmes/17_oom_kill/test_case.yaml
index 1bcfeac2..9b273e11 100644
--- a/tests/llm/fixtures/test_ask_holmes/17_oom_kill/test_case.yaml
+++ b/tests/llm/fixtures/test_ask_holmes/17_oom_kill/test_case.yaml
@@ -1,10 +1,6 @@
 user_prompt: "What is the issue with analytics-exporter-fast?"
-expected_output: |
-  The `analytics-exporter-fast` pod is crashing due to being OOMKilled (Out of Memory Killed).
-  The container `memory-eater` is configured with a memory limit of 100Mi, and it is exceeding this limit, leading to termination.
-
-  To resolve this, consider increasing the memory limit for the container or optimizing the application's memory usage.
-
+expected_output:
+  - The result mentions analytics-exporter-fast is getting OOMKILLED
 before_test: |
   kubectl apply -f ./fast_oom_deployment.yaml
   sleep 5
diff --git a/tests/llm/fixtures/test_ask_holmes/18_crash_looping_v2/test_case.yaml b/tests/llm/fixtures/test_ask_holmes/18_crash_looping_v2/test_case.yaml
index 71d86580..215b3236 100644
--- a/tests/llm/fixtures/test_ask_holmes/18_crash_looping_v2/test_case.yaml
+++ b/tests/llm/fixtures/test_ask_holmes/18_crash_looping_v2/test_case.yaml
@@ -1,11 +1,9 @@
 user_prompt: "What is the issue with db-certs-authenticator?"
-expected_output: |
-  The `db-certs-authenticator` pod experienced a crash due to an expired SSL certificate.
-  The logs indicate that the certificate expired on August 3, 2024. To resolve this,
-  update the SSL certificate using the `/update_certificate` API or update the config map with a valid certificate.
-
-# before_test: |
-#   kubectl apply -f https://raw.githubusercontent.com/robusta-dev/kubernetes-demos/main/crashpod.v2/crashloop-cert-app.yaml
-#   sleep 120
-# after_test: |
-#   kubectl delete -f https://raw.githubusercontent.com/robusta-dev/kubernetes-demos/main/crashpod.v2/crashloop-cert-app.yaml
+expected_output:
+  - The `db-certs-authenticator` pod experienced a crash due to an expired SSL certificate.
+  - The certificate expired on August 3, 2024
+before_test: |
+  kubectl apply -f https://raw.githubusercontent.com/robusta-dev/kubernetes-demos/main/crashpod.v2/crashloop-cert-app.yaml
+  sleep 120
+after_test: |
+  kubectl delete -f https://raw.githubusercontent.com/robusta-dev/kubernetes-demos/main/crashpod.v2/crashloop-cert-app.yaml
diff --git a/tests/llm/fixtures/test_ask_holmes/19_detect_missing_app_details/test_case.yaml b/tests/llm/fixtures/test_ask_holmes/19_detect_missing_app_details/test_case.yaml
index 4a69b5ad..7915808f 100644
--- a/tests/llm/fixtures/test_ask_holmes/19_detect_missing_app_details/test_case.yaml
+++ b/tests/llm/fixtures/test_ask_holmes/19_detect_missing_app_details/test_case.yaml
@@ -1,4 +1,3 @@
 user_prompt: "Why is frontend-certs-validator in a crashloopbackoff?"
-expected_output: I couldn't find a pod named "frontend-certs-validator." Please check the name and try again.
-retrieval_context:
-  - "The pod frontend-certs-validator is not present in the cluster"
+expected_output:
+  - There is no pod named "frontend-certs-validator."
diff --git a/tests/llm/fixtures/test_ask_holmes/1_how_many_pods/kubectl_find_resource_pod_by_keyword.txt b/tests/llm/fixtures/test_ask_holmes/1_how_many_pods/kubectl_find_resource_pod_by_keyword.txt
deleted file mode 100644
index fd0a5a8a..00000000
--- a/tests/llm/fixtures/test_ask_holmes/1_how_many_pods/kubectl_find_resource_pod_by_keyword.txt
+++ /dev/null
@@ -1,23 +0,0 @@
-{"toolset_name":"kubernetes/core","tool_name":"kubectl_find_resource","match_params":{"kind":"pod","keyword":"ip-172-31-8-128.us-east-2.compute.internal"}}
-stdout:
-default       alertmanager-robusta-kube-prometheus-st-alertmanager-0   2/2     Running                      0                  3d22h   172.31.5.200    ip-172-31-8-128.us-east-2.compute.internal    <none>           <none>            alertmanager=robusta-kube-prometheus-st-alertmanager,app.kubernetes.io/instance=robusta-kube-prometheus-st-alertmanager,app.kubernetes.io/managed-by=prometheus-operator,app.kubernetes.io/name=alertmanager,app.kubernetes.io/version=0.26.0,apps.kubernetes.io/pod-index=0,controller-revision-hash=alertmanager-robusta-kube-prometheus-st-alertmanager-57cd7fb46f,statefulset.kubernetes.io/pod-name=alertmanager-robusta-kube-prometheus-st-alertmanager-0
-default       analytics-exporter-fast-8cf8c9446-6rqwc                  0/1     CrashLoopBackOff             1061 (2m51s ago)   3d18h   172.31.15.122   ip-172-31-8-128.us-east-2.compute.internal    <none>           <none>            app=analytics-exporter-fast,pod-template-hash=8cf8c9446
-default       customer-relations-webapp-5d98ffcfd-nj5gs                0/1     ImagePullBackOff             0                  3d18h   172.31.14.171   ip-172-31-8-128.us-east-2.compute.internal    <none>           <none>            app=customer-relations,pod-template-hash=5d98ffcfd,visualize=true
-default       db-certs-authenticator-7ffd769f48-d9pxl                  0/1     CrashLoopBackOff             886 (69s ago)      3d18h   172.31.3.214    ip-172-31-8-128.us-east-2.compute.internal    <none>           <none>            app=flask,pod-template-hash=7ffd769f48
-default       java-api-checker-9pj7k                                   0/1     Error                        0                  3d18h   172.31.12.200   ip-172-31-8-128.us-east-2.compute.internal    <none>           <none>            batch.kubernetes.io/controller-uid=ea3f2c52-3382-4cbc-8958-41832511a3e7,batch.kubernetes.io/job-name=java-api-checker,controller-uid=ea3f2c52-3382-4cbc-8958-41832511a3e7,job-name=java-api-checker
-default       java-api-checker-vzm7z                                   0/1     Error                        0                  3d18h   172.31.13.205   ip-172-31-8-128.us-east-2.compute.internal    <none>           <none>            batch.kubernetes.io/controller-uid=ea3f2c52-3382-4cbc-8958-41832511a3e7,batch.kubernetes.io/job-name=java-api-checker,controller-uid=ea3f2c52-3382-4cbc-8958-41832511a3e7,job-name=java-api-checker
-default       logging-agent                                            0/1     Init:CrashLoopBackOff        1067 (15s ago)     3d18h   172.31.1.249    ip-172-31-8-128.us-east-2.compute.internal    <none>           <none>            <none>
-default       prometheus-robusta-kube-prometheus-st-prometheus-0       2/2     Running                      0                  3d22h   172.31.11.168   ip-172-31-8-128.us-east-2.compute.internal    <none>           <none>            app.kubernetes.io/instance=robusta-kube-prometheus-st-prometheus,app.kubernetes.io/managed-by=prometheus-operator,app.kubernetes.io/name=prometheus,app.kubernetes.io/version=2.48.1,apps.kubernetes.io/pod-index=0,controller-revision-hash=prometheus-robusta-kube-prometheus-st-prometheus-55d87c869b,operator.prometheus.io/name=robusta-kube-prometheus-st-prometheus,operator.prometheus.io/shard=0,prometheus=robusta-kube-prometheus-st-prometheus,statefulset.kubernetes.io/pod-name=prometheus-robusta-kube-prometheus-st-prometheus-0
-default       robusta-forwarder-89f44d49b-fxtrh                        1/1     Running                      0                  3d22h   172.31.3.106    ip-172-31-8-128.us-east-2.compute.internal    <none>           <none>            app=robusta-forwarder,pod-template-hash=89f44d49b
-default       robusta-kube-prometheus-st-operator-7fc5db7f4d-dr46l     1/1     Running                      0                  3d22h   172.31.6.195    ip-172-31-8-128.us-east-2.compute.internal    <none>           <none>            app.kubernetes.io/instance=robusta,app.kubernetes.io/managed-by=Helm,app.kubernetes.io/part-of=kube-prometheus-stack,app.kubernetes.io/version=55.7.0,app=kube-prometheus-stack-operator,chart=kube-prometheus-stack-55.7.0,heritage=Helm,pod-template-hash=7fc5db7f4d,release=robusta
-default       robusta-prometheus-node-exporter-t2b5k                   1/1     Running                      0                  3d22h   172.31.8.128    ip-172-31-8-128.us-east-2.compute.internal    <none>           <none>            app.kubernetes.io/component=metrics,app.kubernetes.io/instance=robusta,app.kubernetes.io/managed-by=Helm,app.kubernetes.io/name=prometheus-node-exporter,app.kubernetes.io/part-of=prometheus-node-exporter,app.kubernetes.io/version=1.7.0,controller-revision-hash=7bf445876b,helm.sh/chart=prometheus-node-exporter-4.24.0,jobLabel=node-exporter,pod-template-generation=1,release=robusta
-default       search-engine-service                                    0/1     Running                      0                  3d18h   172.31.11.151   ip-172-31-8-128.us-east-2.compute.internal    <none>           <none>            <none>
-kube-system   aws-node-m47xg                                           2/2     Running                      0                  25d     172.31.8.128    ip-172-31-8-128.us-east-2.compute.internal    <none>           <none>            app.kubernetes.io/instance=aws-vpc-cni,app.kubernetes.io/name=aws-node,controller-revision-hash=54f5998898,k8s-app=aws-node,pod-template-generation=1
-kube-system   ebs-csi-controller-7bb676b68d-cs2gx                      6/6     Running                      0                  25d     172.31.12.254   ip-172-31-8-128.us-east-2.compute.internal    <none>           <none>            app.kubernetes.io/component=csi-driver,app.kubernetes.io/managed-by=EKS,app.kubernetes.io/name=aws-ebs-csi-driver,app.kubernetes.io/version=1.35.0,app=ebs-csi-controller,pod-template-hash=7bb676b68d
-kube-system   ebs-csi-node-pgrvq                                       3/3     Running                      0                  25d     172.31.2.194    ip-172-31-8-128.us-east-2.compute.internal    <none>           <none>            app.kubernetes.io/component=csi-driver,app.kubernetes.io/managed-by=EKS,app.kubernetes.io/name=aws-ebs-csi-driver,app.kubernetes.io/version=1.35.0,app=ebs-csi-node,controller-revision-hash=6bc69bc4b9,pod-template-generation=1
-kube-system   eks-pod-identity-agent-vgz8h                             1/1     Running                      0                  25d     172.31.8.128    ip-172-31-8-128.us-east-2.compute.internal    <none>           <none>            app.kubernetes.io/instance=eks-pod-identity-agent,app.kubernetes.io/name=eks-pod-identity-agent,controller-revision-hash=74bcb67854,pod-template-generation=1
-kube-system   kube-proxy-l7vqp                                         1/1     Running                      0                  25d     172.31.8.128    ip-172-31-8-128.us-east-2.compute.internal    <none>           <none>            controller-revision-hash=6b64cc6947,k8s-app=kube-proxy,pod-template-generation=1
-sock-shop     user-5bd96d75fb-ld8xv                                    1/1     Running                      0                  3d18h   172.31.0.106    ip-172-31-8-128.us-east-2.compute.internal    <none>           <none>            name=user,pod-template-hash=5bd96d75fb
-sock-shop     user-db-5dc5c5f488-dw6xw                                 1/1     Running                      0                  3d18h   172.31.0.66     ip-172-31-8-128.us-east-2.compute.internal    <none>           <none>            name=user-db,pod-template-hash=5dc5c5f488
-
-stderr:
diff --git a/tests/llm/fixtures/test_ask_holmes/1_how_many_pods/kubectl_get_node.txt b/tests/llm/fixtures/test_ask_holmes/1_how_many_pods/kubectl_get_node.txt
deleted file mode 100644
index 3c951eea..00000000
--- a/tests/llm/fixtures/test_ask_holmes/1_how_many_pods/kubectl_get_node.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-{"toolset_name":"kubernetes/core","tool_name":"kubectl_find_resource","match_params":{"kind":"node","keyword":"ip-172-31-8-128.us-east-2.compute.internal"}}
-NAME                                         STATUS   ROLES    AGE   VERSION               INTERNAL-IP    EXTERNAL-IP    OS-IMAGE         KERNEL-VERSION                  CONTAINER-RUNTIME     LABELS
-ip-172-31-8-128.us-east-2.compute.internal   Ready    <none>   25d   v1.30.4-eks-a737599   172.31.8.128   3.147.70.176   Amazon Linux 2   5.10.225-213.878.amzn2.x86_64   containerd://1.7.11   beta.kubernetes.io/arch=amd64,beta.kubernetes.io/instance-type=t3.medium,beta.kubernetes.io/os=linux,eks.amazonaws.com/capacityType=ON_DEMAND,eks.amazonaws.com/nodegroup-image=ami-067ed4d12a282fb31,eks.amazonaws.com/nodegroup=nicolas-node-group,failure-domain.beta.kubernetes.io/region=us-east-2,failure-domain.beta.kubernetes.io/zone=us-east-2a,k8s.io/cloud-provider-aws=02bcd7cbb8e774ede4606ab79260ae31,kubernetes.io/arch=amd64,kubernetes.io/hostname=ip-172-31-8-128.us-east-2.compute.internal,kubernetes.io/os=linux,node.kubernetes.io/instance-type=t3.medium,topology.ebs.csi.aws.com/zone=us-east-2a,topology.k8s.aws/zone-id=use2-az1,topology.kubernetes.io/region=us-east-2,topology.kubernetes.io/zone=us-east-2a
diff --git a/tests/llm/fixtures/test_ask_holmes/1_how_many_pods/kubectl_get_pod.txt b/tests/llm/fixtures/test_ask_holmes/1_how_many_pods/kubectl_get_pod.txt
deleted file mode 100644
index ebe27926..00000000
--- a/tests/llm/fixtures/test_ask_holmes/1_how_many_pods/kubectl_get_pod.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-{"toolset_name":"kubernetes/core","tool_name":"kubectl_find_resource","match_params":{"kind":"pod","name":"ip-172-31-8-128.us-east-2.compute.internal"}}
-Error from server (NotFound): pods "ip-172-31-8-128.us-east-2.compute.internal" not found
diff --git a/tests/llm/fixtures/test_ask_holmes/1_how_many_pods/kubectl_top_pods.txt b/tests/llm/fixtures/test_ask_holmes/1_how_many_pods/kubectl_top_pods.txt
deleted file mode 100644
index a29a1535..00000000
--- a/tests/llm/fixtures/test_ask_holmes/1_how_many_pods/kubectl_top_pods.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-{"toolset_name":"kubernetes/live-metrics","tool_name":"kubectl_top_pods","match_params":{}}
-Command `kubectl top pods -A` failed with return code 1
-stdout:
-
-stderr:
-error: Metrics API not available
diff --git a/tests/llm/fixtures/test_ask_holmes/20_long_log_file_search/test_case.yaml b/tests/llm/fixtures/test_ask_holmes/20_long_log_file_search/test_case.yaml
index 955f2a9a..cc689e93 100644
--- a/tests/llm/fixtures/test_ask_holmes/20_long_log_file_search/test_case.yaml
+++ b/tests/llm/fixtures/test_ask_holmes/20_long_log_file_search/test_case.yaml
@@ -1,4 +1,5 @@
 user_prompt: "Was there any 'Peer down' problem reported for network-connector pod?"
-expected_output: The "Peer down" issue was reported in the logs of the `network-connector` pod.
+expected_output:
+  - The "Peer down" issue was reported in the logs of the `network-connector` pod.
 before_test: kubectl apply -f ./long-log-pod.yaml
 after_test: kubectl delete -f ./long-log-pod.yaml
diff --git a/tests/llm/fixtures/test_ask_holmes/21_job_fail_curl_no_svc_account/test_case.yaml b/tests/llm/fixtures/test_ask_holmes/21_job_fail_curl_no_svc_account/test_case.yaml
index 0eb0e513..a6685f28 100644
--- a/tests/llm/fixtures/test_ask_holmes/21_job_fail_curl_no_svc_account/test_case.yaml
+++ b/tests/llm/fixtures/test_ask_holmes/21_job_fail_curl_no_svc_account/test_case.yaml
@@ -1,12 +1,6 @@
 user_prompt: "Why get-data job failed?"
-expected_output: |
-  The `get-data` job failed because the `curl` command in the `curl-container` exited with code 6.
-  This exit code indicates a "Could not resolve host" error, meaning the specified URL
-  `http://kldsvklnsdiogjerkngernjk23432i90erfefjio.com/fsdkhjsadfjhsadfjsadu343` could not be resolved to an IP address.
-
-  To fix this, ensure the URL is correct and accessible from the cluster.
-  If it's a typo or a temporary issue with the DNS, correcting the URL or ensuring DNS resolution should resolve the problem.
-
+expected_output:
+  - The specified URL `http://kldsvklnsdiogjerkngernjk23432i90erfefjio.com/fsdkhjsadfjhsadfjsadu343` could not be resolved to an IP address
 retrieval_context:
   - "The job ran and returned with an error: curl: (6) Could not resolve host: kldsvklnsdiogjerkngernjk23432i90erfefjio.com"
 before_test: |
@@ -19,5 +13,5 @@ after_test: |
   kubectl delete -f ./job-service-account-event/get-data-job.yaml
   kubectl delete -f ./job-service-account-event/get-data-service-account.yaml
 evaluation:
-  faithfulness: 0.3
+  correctness: 0.3
   context: 0.3
diff --git a/tests/llm/fixtures/test_ask_holmes/22_high_latency_dbi_down/test_case.yaml b/tests/llm/fixtures/test_ask_holmes/22_high_latency_dbi_down/test_case.yaml
index 0f2dc4ce..6e6fbb45 100644
--- a/tests/llm/fixtures/test_ask_holmes/22_high_latency_dbi_down/test_case.yaml
+++ b/tests/llm/fixtures/test_ask_holmes/22_high_latency_dbi_down/test_case.yaml
@@ -1,7 +1,8 @@
 # this requires first creating a secret w/ credentials for the RDS database
 # for instructions, see "Credentials for HolmesGPT RDS AI Test" in 1Password, but change the secret name to db-secrets-for-medium and the db to "promotions-db-for-medium"
 user_prompt: "Why is there high latency with the customer-orders-for-medium deployment?"
-expected_output: "The result says the reason for failure is promotions-db-for-medium.cp8rwothwarq.us-east-2.rds.amazonaws.com is down, stopped, or in unavailable state."
+expected_output:
+  - The reason for failure is promotions-db-for-medium.cp8rwothwarq.us-east-2.rds.amazonaws.com is down, stopped, or in unavailable state.
 before_test: |
   [ "$(aws rds describe-db-instances --db-instance-identifier promotions-db-for-medium --query "DBInstances[0].DBInstanceStatus" --output text)" != "stopped" ] && aws rds stop-db-instance --db-instance-identifier promotions-db-for-medium || echo "RDS instance is already stopped."
   kubectl apply -f ./slow-rds-query-for-medium.yaml
@@ -9,4 +10,4 @@ before_test: |
 after_test: |
   kubectl delete -f ./slow-rds-query-for-medium.yaml
 evaluation:
-  faithfulness: 0
+  correctness: 0
diff --git a/tests/llm/fixtures/test_ask_holmes/23_app_error_in_current_logs/test_case.yaml b/tests/llm/fixtures/test_ask_holmes/23_app_error_in_current_logs/test_case.yaml
index f9140355..d5a3153a 100644
--- a/tests/llm/fixtures/test_ask_holmes/23_app_error_in_current_logs/test_case.yaml
+++ b/tests/llm/fixtures/test_ask_holmes/23_app_error_in_current_logs/test_case.yaml
@@ -1,9 +1,7 @@
 user_prompt: "Why is meme-deployment not working"
-expected_output: |
-  The `meme-deployment` is experiencing issues due to a DNS resolution error.
-  The application is trying to connect to the host `memcom`, but it fails with the error "No address associated with hostname."
-  This indicates that the hostname `memcom` cannot be resolved to an IP address.
-
+expected_output:
+  - The `meme-deployment` is experiencing issues due to a DNS resolution error.
+  - This indicates that the hostname `memcom` cannot be resolved to an IP address.
 retrieval_context:
   - "There is a DNS resolution error"
   - "hostname `memcom` cannot be resolved to an IP address"
diff --git a/tests/llm/fixtures/test_ask_holmes/24_misconfigured_pvc/test_case.yaml b/tests/llm/fixtures/test_ask_holmes/24_misconfigured_pvc/test_case.yaml
index 72386713..48b22ce0 100644
--- a/tests/llm/fixtures/test_ask_holmes/24_misconfigured_pvc/test_case.yaml
+++ b/tests/llm/fixtures/test_ask_holmes/24_misconfigured_pvc/test_case.yaml
@@ -1,7 +1,9 @@
 user_prompt: "What is the issue with redis?"
-expected_output: "The PersistentVolumeClaim (pvc) is misconfigured and that the storage class standard-v2 does not exist"
+expected_output:
+  - The PersistentVolumeClaim (pvc) is misconfigured
+  - That the storage class standard-v2 does not exist
 evaluation:
-  faithfulness: 0
+  correctness: 0
 before_test: |
   kubectl apply -f https://raw.githubusercontent.com/robusta-dev/kubernetes-demos/main/pvc-misconfiguration/redis-deployment.yaml
 after_test: |
diff --git a/tests/llm/fixtures/test_ask_holmes/25_misconfigured_ingress_class/test_case.yaml b/tests/llm/fixtures/test_ask_holmes/25_misconfigured_ingress_class/test_case.yaml
index eac17d77..40af6f50 100644
--- a/tests/llm/fixtures/test_ask_holmes/25_misconfigured_ingress_class/test_case.yaml
+++ b/tests/llm/fixtures/test_ask_holmes/25_misconfigured_ingress_class/test_case.yaml
@@ -1,9 +1,7 @@
 user_prompt: "Other processes in the cluster are unable to reach my-http-app-deployment, why? Investigate ingress class issues"
-expected_output: |
-  1. The `my-http-ingress` is using the `example-ingress-class`, which doesn't exist in the cluster. This could prevent routing to `my-http-app-deployment`.
-  2. Ensure the ingress class `example-ingress-class` is defined and configured correctly.
-  3. Verify that the ingress controller is running and associated with the correct ingress class.
+expected_output:
+  - Ensure the ingress class `example-ingress-class` does not exist in the cluster
 before_test: kubectl apply -f./ingress_with_class.yaml
 after_test: kubectl delete -f./ingress_with_class.yaml
 evaluation:
-  faithfulness: 0
+  correctness: 0
diff --git a/tests/llm/fixtures/test_investigate/01_oom_kill/test_case.yaml b/tests/llm/fixtures/test_investigate/01_oom_kill/test_case.yaml
index b86b3027..0b66cdc7 100644
--- a/tests/llm/fixtures/test_investigate/01_oom_kill/test_case.yaml
+++ b/tests/llm/fixtures/test_investigate/01_oom_kill/test_case.yaml
@@ -1,34 +1,8 @@
-expected_output: |
-  The alert indicates that the pod `oomkill-deployment-696dbdbf67-d47z6` in the `default` namespace was experiencing a `CrashLoopBackOff` state, which typically means the pod was repeatedly crashing and restarting.
-
-  # Investigation
-  I attempted to gather more information by describing the pod and fetching its logs, but the pod `oomkill-deployment-696dbdbf67-d47z6` could not be found in the cluster. This suggests that the pod may have been deleted or the deployment was scaled down after the alert was triggered.
-
-  # Conclusions and Possible Root causes
-  1. *Pod Deletion*: The pod might have been manually deleted or automatically removed by a scaling operation or deployment update.
-  2. *Deployment Update*: A new deployment or update might have replaced the pod, leading to its removal.
-  3. *Resource Constraints*: If the pod was indeed crashing due to resource constraints (e.g., OOMKilled), it might have been removed as part of a cleanup process.
-
-  # Next Steps
-  1. Verify if the deployment `oomkill-deployment` is still present and check its current status using:
-     ```bash
-     kubectl get deployment oomkill-deployment -n default
-     ```
-  2. If the deployment exists, check the replica set and any new pods created:
-     ```bash
-     kubectl get rs -n default
-     kubectl get pods -n default
-     ```
-  3. Review any recent changes or events related to the deployment:
-     ```bash
-     kubectl describe deployment oomkill-deployment -n default
-     ```
-  4. If resource constraints were suspected, consider increasing the resources allocated to the pods in the deployment configuration.
-
+expected_output:
+  - Pod `oomkill-deployment-696dbdbf67-d47z6` is experiencing a `CrashLoopBackOff`
 retrieval_context:
   - There is a total of 12 pods on node ip-172-31-8-128.us-east-2.compute.internal
   - There are 5 pods in running state
   - 7 pods are not running as indicated by the STATUS column
 evaluation:
-  answer_relevancy: 0
-  faithfulness: 0
+  correctness: 0
diff --git a/tests/llm/fixtures/test_investigate/02_crashloop_backoff/test_case.yaml b/tests/llm/fixtures/test_investigate/02_crashloop_backoff/test_case.yaml
index e9bebc6c..172915ee 100644
--- a/tests/llm/fixtures/test_investigate/02_crashloop_backoff/test_case.yaml
+++ b/tests/llm/fixtures/test_investigate/02_crashloop_backoff/test_case.yaml
@@ -1,28 +1,11 @@
-expected_output: |
-  Alert Explanation
-
-  The pod inventory-management-api in the default namespace is experiencing a CrashLoopBackOff due to repeated failures in starting its container.
-
-  Investigation
-
-  - The pod inventory-management-api is using the nginx image and attempts to execute a command wge which is not found in the container's $PATH. This results in a StartError with an exit code 128.
+expected_output:
+  - Pod inventory-management-api is experiencing a CrashLoopBackOff
+  - The pod attempts to execute a command wge which is not found
+  - Mention `wge` might be a typo and the intended command may be `wget`
+retrieval_context:
+  - The pod inventory-management-api is using the nginx image
+  - The pod attempts to execute a command wge which is not found in the container's $PATH. This results in a StartError with an exit code 128.
   - The pod has restarted 4 times and is currently in a Waiting state with the reason CrashLoopBackOff.
-  - The error message indicates: exec: "wge": executable file not found in $PATH.
-
-  Conclusions and Possible Root Causes
-
-  - Incorrect Command: The command wge is likely a typo or misconfiguration. The intended command might be wget, which is a common utility for downloading files.
-  - Missing Executable: The required executable is not present in the container's image, leading to the failure to start the container.
-
-  Next Steps
-
-  - Correct the command in the pod's configuration to use the correct executable, likely wget, if that was the intended command.
-  - Verify the container image includes all necessary executables and dependencies.
-  - Update the pod configuration and redeploy the pod with the corrected command. You can do this by editing the pod's YAML configuration and applying the changes:
-
-  ```
-  kubectl edit pod inventory-management-api -n default
-  ```
-
+  - "The error message indicates: exec: 'wge': executable file not found in $PATH."
 evaluation:
-  faithfulness: 0
+  correctness: 0
diff --git a/tests/llm/fixtures/test_investigate/03_cpu_throttling/test_case.yaml b/tests/llm/fixtures/test_investigate/03_cpu_throttling/test_case.yaml
index 5c737af7..4ea66e0f 100644
--- a/tests/llm/fixtures/test_investigate/03_cpu_throttling/test_case.yaml
+++ b/tests/llm/fixtures/test_investigate/03_cpu_throttling/test_case.yaml
@@ -1,24 +1,10 @@
-expected_output: |
-  Alert Explanation
-
-  The frontend-service pod in the default namespace is experiencing 100% CPU throttling for the stress container. This means the container is being limited by its CPU quota, potentially affecting performance.
-  Investigation
-
-    - The frontend-service pod is running on node ip-172-31-21-139.us-east-2.compute.internal.
-    - The stress container is configured with CPU limits and requests set to 10m.
-    - The container is running a command to stress the CPU with 100 threads, which is likely causing the CPU throttling.
-    - The pod is in a Running state with no restarts, indicating it hasn't crashed or been restarted recently.
-    - Current logs show the stress command is actively running, but there are no previous logs available, suggesting the container hasn't been restarted.
-
-  Conclusions and Possible Root Causes
-
-    - CPU Limit Configuration: The stress container has a CPU limit of 10m, which is insufficient for the workload it is trying to perform (100 CPU threads). This is causing the CPU throttling.
-    - Application Design: The application is designed to use a high number of CPU threads, which is not compatible with the current CPU limits.
-
-  Next Steps
-
-    - Consider removing or increasing the CPU limit for the stress container if the application requires more CPU resources. This can be done by editing the pod's resource configuration:
-      `kubectl edit pod frontend-service -n default`
-    - Adjust the resources.limits.cpu value to a higher number or remove it entirely if appropriate.
+expected_output:
+  - The frontend-service pod in the default namespace is experiencing 100% CPU throttling for the stress container
+  - Suggest increasing the CPU limit
+retrieval_context:
+  - The frontend-service pod is running on node ip-172-31-21-139.us-east-2.compute.internal
+  - The stress container is configured with CPU limits and requests set to 10m
+  - The container is running a command to stress the CPU with 100 threads
+  - The pod is in a Running state with no restarts
 evaluation:
-  faithfulness: 0
+  correctness: 0
diff --git a/tests/llm/fixtures/test_investigate/04_image_pull_backoff/test_case.yaml b/tests/llm/fixtures/test_investigate/04_image_pull_backoff/test_case.yaml
index 0017f9ac..7ae8c64c 100644
--- a/tests/llm/fixtures/test_investigate/04_image_pull_backoff/test_case.yaml
+++ b/tests/llm/fixtures/test_investigate/04_image_pull_backoff/test_case.yaml
@@ -1,20 +1,7 @@
-expected_output: |
-  # Alert Explanation
-  The pod `customer-relations-webapp-5d98ffcfd-tz4nc` in the `default` namespace is experiencing a warning event due to a failure in pulling the Docker image `yourcompany/crw:latest`.
-
-  # Investigation
-  - The pod is in a `Pending` state with the container `crw-main-container` waiting due to `ErrImagePull`.
-  - The events indicate repeated failures to pull the image `yourcompany/crw:latest` due to "pull access denied" and "insufficient_scope: authorization failed".
-  - The logs for the current and previous container states are unavailable because the container has not started successfully.
-
-  # Conclusions and Possible Root Causes
-  1. *Image Pull Authorization Failure*: The image `yourcompany/crw:latest` cannot be pulled because of authorization issues. This could be due to missing or incorrect credentials for accessing the Docker registry.
-  2. *Repository Access Issues*: The repository might not exist or the image tag `latest` might be incorrect, leading to the failure in resolving the image reference.
-
-  # Next Steps
-  - Verify the credentials used for accessing the Docker registry and ensure they have the necessary permissions.
-  - Check if the image `yourcompany/crw:latest` exists in the Docker registry and is accessible.
-  - Update the Kubernetes secret used for Docker registry credentials if necessary.
-  - Consider specifying a specific image tag instead of `latest` to avoid potential issues with image resolution.
+expected_output:
+  - The pod fails in pulling the Docker image `yourcompany/crw:latest`
+  - The answer should prompt the user to verify that the docker image exists
+retrieval_context:
+  - The pod is in a `Pending` state with the container `crw-main-container` waiting due to `ErrImagePull`
 evaluation:
-  faithfulness: 0
+  correctness: 0
diff --git a/tests/llm/fixtures/test_investigate/05_crashpod/test_case.yaml b/tests/llm/fixtures/test_investigate/05_crashpod/test_case.yaml
index 4916b3b0..98ebed6c 100644
--- a/tests/llm/fixtures/test_investigate/05_crashpod/test_case.yaml
+++ b/tests/llm/fixtures/test_investigate/05_crashpod/test_case.yaml
@@ -1,29 +1,5 @@
-expected_output: |
-  The pod `db-certs-authenticator-7ffd769f48-vndgd` in the `default` namespace is experiencing a `CrashLoopBackOff`, indicating that the application within the pod is repeatedly crashing and Kubernetes is attempting to restart it.
-
-  # Investigation
-  1. The runbook for `KubePodCrashLooping` suggests checking pod events, logs, and configuration for issues such as resource constraints, misconfigurations, or missing dependencies.
-  2. Attempts to describe the pod and fetch logs failed because the pod `db-certs-authenticator-7ffd769f48-vndgd` is not currently found in the cluster.
-  3. A search for the pod across all namespaces confirmed that it is not present.
-
-  # Conclusions and Possible Root Causes
-  1. *Pod Deletion*: The pod `db-certs-authenticator-7ffd769f48-vndgd` may have been deleted after the alert was triggered, which is why it is not found in the cluster.
-  2. *Transient Issue*: The crash loop might have been a transient issue that resolved itself, leading to the pod being replaced or removed.
-  3. *Configuration or Resource Issue*: If the pod was deleted due to a configuration or resource issue, it might recur when the pod is redeployed.
-
-  # Next Steps
-  1. **Verify Deployment**: Check the deployment `db-certs-authenticator` to ensure it is correctly configured and running.
-     ```bash
-     kubectl get deployment db-certs-authenticator -n default
-     ```
-  2. **Check for New Pods**: Look for any new pods created by the deployment to replace the crashed pod.
-     ```bash
-     kubectl get pods -n default -l app=db-certs-authenticator
-     ```
-  3. **Review Deployment Configuration**: Ensure that the deployment configuration does not have issues that could cause pods to crash.
-     ```bash
-     kubectl describe deployment db-certs-authenticator -n default
-     ```
-  4. **Monitor for Recurrence**: Keep an eye on the deployment for any new crash loop alerts to identify if the issue persists.
+expected_output:
+  - The pod `db-certs-authenticator-7ffd769f48-vndgd` in the `default` namespace is experiencing a `CrashLoopBackOff`
+  - Mention the runbook `KubePodCrashLooping`
 evaluation:
-  faithfulness: 0
+  correctness: 0
diff --git a/tests/llm/fixtures/test_investigate/06_job_failure/test_case.yaml b/tests/llm/fixtures/test_investigate/06_job_failure/test_case.yaml
index 83075d3b..b7ca4874 100644
--- a/tests/llm/fixtures/test_investigate/06_job_failure/test_case.yaml
+++ b/tests/llm/fixtures/test_investigate/06_job_failure/test_case.yaml
@@ -1,18 +1,6 @@
-expected_output: |
-  The job `java-api-checker` in the `default` namespace has failed because it reached the specified backoff limit.
-
-  # Investigation
-  The job `java-api-checker` was configured with a `backoffLimit` of 1, meaning it would retry once upon failure. The job failed twice, as indicated by the events and logs. The logs from the pod `java-api-checker-slp89` show repeated network exceptions indicating that all attempts to connect to the database at `prod-db:3333` failed due to no available connections and a full queue.
-
-  # Conclusions and Possible Root Causes
-  1. *Database Connectivity Issues*: The primary issue is a failure to connect to the database at `prod-db:3333`. This could be due to network issues, the database being down, or the database being overloaded.
-  2. *Configuration Error*: The job's configuration might not be suitable for the current database load, leading to connection failures.
-  3. *Resource Limitations*: The database might be hitting its connection limits or queue size, preventing new connections.
-
-  # Next Steps
-  1. Verify the status and availability of the database at `prod-db:3333`.
-  2. Check network connectivity between the Kubernetes cluster and the database.
-  3. Review and possibly increase the database's connection limits or queue size.
-  4. Consider adjusting the job's retry logic or backoff limit to allow more retries if transient issues are expected.
+expected_output:
+  - The job `java-api-checker` in the `default` namespace has failed because it reached the specified backoff limit.
+  - The job `java-api-checker` was configured with a `backoffLimit` of 1
+  - The logs from the pod `java-api-checker-slp89` show repeated network exceptions indicating that all attempts to connect to the database at `prod-db:3333` failed
 evaluation:
-  faithfulness: 0
+  correctness: 0
diff --git a/tests/llm/fixtures/test_investigate/07_job_syntax_error/test_case.yaml b/tests/llm/fixtures/test_investigate/07_job_syntax_error/test_case.yaml
index 3542506c..192fc4a4 100644
--- a/tests/llm/fixtures/test_investigate/07_job_syntax_error/test_case.yaml
+++ b/tests/llm/fixtures/test_investigate/07_job_syntax_error/test_case.yaml
@@ -1,28 +1,6 @@
-expected_output: |
-  The `KubeJobFailed` alert indicates that the Kubernetes job `product-data-scraper` in the `default` namespace failed to complete successfully.
-
-  # Investigation
-  1. The job `product-data-scraper` was configured to complete 5 times but failed all 5 attempts, reaching the backoff limit of 4.
-  2. The job's pods (`product-data-scraper-2fqrh`, `product-data-scraper-5fdj6`, etc.) were created successfully but encountered errors during execution.
-  3. The logs from one of the pods, `product-data-scraper-2fqrh`, show a syntax error: `/bin/sh: syntax error: unexpected "fi"`.
-
-  # Conclusions and Possible Root Causes
-  1. *Syntax Error in Command*: The job's container command has a syntax error, specifically an unexpected "fi", which is causing the job to fail.
-  2. *Job Configuration*: The job's backoff limit was reached due to repeated failures, indicating that the job's command needs correction.
-
-  # Next Steps
-  1. **Fix the Command**: Correct the syntax error in the job's command. Ensure that the shell script logic is correct.
-  2. **Redeploy the Job**: After fixing the command, delete the failed job and redeploy it to clear the alert.
-  3. **Verify**: Monitor the job to ensure it completes successfully without hitting the backoff limit.
-
-  To fix the job, you can edit the job's YAML to correct the command and then apply the changes:
-  ```bash
-  kubectl edit job product-data-scraper -n default
-  ```
-  After editing, delete the existing job and create a new one:
-  ```bash
-  kubectl delete job product-data-scraper -n default
-  kubectl apply -f <corrected-job-definition>.yaml
-  ```
+expected_output:
+  - The Kubernetes job `product-data-scraper` failed to complete successfully
+  - The job's pods (`product-data-scraper-2fqrh`, `product-data-scraper-5fdj6`, etc.) were created successfully but encountered errors during execution.
+  - "The logs from one of the pods, `product-data-scraper-2fqrh`, show a syntax error: `/bin/sh: syntax error: unexpected 'fi'`"
 evaluation:
-  faithfulness: 0
+  correctness: 0
diff --git a/tests/llm/fixtures/test_investigate/08_memory_pressure/test_case.yaml b/tests/llm/fixtures/test_investigate/08_memory_pressure/test_case.yaml
index 3463aa3d..d72d4c65 100644
--- a/tests/llm/fixtures/test_investigate/08_memory_pressure/test_case.yaml
+++ b/tests/llm/fixtures/test_investigate/08_memory_pressure/test_case.yaml
@@ -1,21 +1,5 @@
-expected_output: |
-  The node `ip-172-31-40-111.us-east-2.compute.internal` is running at 100% of its pod capacity, which is set to 17 pods. This alert indicates that the node cannot schedule any additional pods.
-
-  # Investigation
-  - The node `ip-172-31-40-111.us-east-2.compute.internal` is currently running 17 pods, which is its maximum capacity.
-  - Several pods are pending, indicating they cannot be scheduled due to the node's capacity limit.
-  - The node is labeled as `t3.medium`, which typically has a lower pod capacity compared to larger instance types.
-  - Some pods on this node do not have CPU or memory requests defined, which can lead to inefficient resource utilization.
-
-  # Conclusions and Possible Root Causes
-  1. *Node Capacity Limit Reached*: The node has reached its maximum pod capacity of 17, preventing new pods from being scheduled.
-  2. *Resource Requests Not Defined*: The absence of CPU and memory requests for some pods may lead to suboptimal scheduling and resource allocation.
-  3. *Instance Type Limitation*: The `t3.medium` instance type may inherently support fewer pods due to its resource constraints.
-
-  # Next Steps
-  1. **Redistribute Pods**: Consider redistributing pods to other nodes with available capacity.
-  2. **Increase Node Capacity**: Add more nodes to the cluster or upgrade existing nodes to a larger instance type to accommodate more pods.
-  3. **Define Resource Requests**: Ensure all pods have defined CPU and memory requests to improve scheduling efficiency.
-  4. **Adjust Kubelet Configuration**: If feasible, increase the `maxPods` setting in the Kubelet configuration for nodes that can handle more pods.
+expected_output:
+  - The node `ip-172-31-40-111.us-east-2.compute.internal` is running at 100% of its pod capacity, which is set to 17 pods
+  - The node cannot schedule any additional pods.
 evaluation:
-  faithfulness: 0
+  correctness: 0
diff --git a/tests/llm/fixtures/test_investigate/09_high_latency/test_case.yaml b/tests/llm/fixtures/test_investigate/09_high_latency/test_case.yaml
index d4d5f8bf..4be18dee 100644
--- a/tests/llm/fixtures/test_investigate/09_high_latency/test_case.yaml
+++ b/tests/llm/fixtures/test_investigate/09_high_latency/test_case.yaml
@@ -1,18 +1,6 @@
-expected_output: |
-  The `HighLatencyForCustomerCheckout` alert is triggered because HTTP requests to the `/checkout` endpoint in the `customer-orders` application are taking longer than 3 seconds to complete.
-
-  # Investigation
-  1. **Deployment Details**: The `customer-orders` deployment in the `default` namespace is running with one replica. The deployment uses the `fastapi-app` container from the image `us-central1-docker.pkg.dev/genuine-flight-317411/devel/rds-demo:nicolas-no-db`.
-  2. **Pod Logs**: The logs from the `fastapi-app` container show that database calls are taking a significant amount of time, ranging from 5 to 10 seconds, which is contributing to the high latency of the checkout page rendering.
-
-  # Conclusions and Possible Root Causes
-  1. *Database Latency*: The primary cause of the high latency appears to be the time taken for database calls. The logs indicate that the database call `sp_CheckUserNotifications` is taking between 5 to 10 seconds to complete, which directly impacts the response time of the `/checkout` endpoint.
-  2. *Database Connection Issues*: The logs mention connecting to a database at `None`, which might indicate a misconfiguration or an issue with the database connection string.
-
-  # Next Steps
-  1. **Database Optimization**: Investigate the stored procedure `sp_CheckUserNotifications` for potential optimizations to reduce execution time.
-  2. **Configuration Check**: Verify the database connection settings in the application configuration to ensure they are correct and pointing to the intended database.
-  3. **Monitoring and Metrics**: Implement detailed monitoring on the database to identify any bottlenecks or resource constraints that could be causing delays.
-  4. **Scaling**: Consider scaling the database resources or the application if the load is higher than expected.
+expected_output:
+  - There is a HighLatencyForCustomerCheckout alert
+  - It is triggered by HTTP requests to the `/checkout` endpoint in the `customer-orders` application which are taking longer than 3 seconds to complete
+  - logs indicate that the database call `sp_CheckUserNotifications` is taking between 5 to 10 seconds to complete
 evaluation:
-  faithfulness: 0
+  correctness: 0
diff --git a/tests/llm/fixtures/test_investigate/10_kube_controller_manager_down/investigate_request.json b/tests/llm/fixtures/test_investigate/10_kube_controller_manager_down/investigate_request.json
new file mode 100644
index 00000000..5e740e4d
--- /dev/null
+++ b/tests/llm/fixtures/test_investigate/10_kube_controller_manager_down/investigate_request.json
@@ -0,0 +1,15 @@
+{
+  "source": "prometheus",
+  "title": "KubeControllerManagerDown",
+  "description": "",
+  "subject": {},
+  "context": {
+    "issue_type": "KubeControllerManagerDown",
+    "robusta_issue_id": "KubeControllerManagerDown-467f7b1923c9cfcc-2024-11-29 06:30:39.202000+00:00",
+    "source": "prometheus"
+  },
+  "source_instance_id": "ApiRequest",
+  "include_tool_calls": true,
+  "include_tool_call_results": true,
+  "prompt_template": "builtin://generic_investigation.jinja2"
+}
diff --git a/tests/llm/fixtures/test_investigate/10_kube_controller_manager_down/issue_data.json b/tests/llm/fixtures/test_investigate/10_kube_controller_manager_down/issue_data.json
new file mode 100644
index 00000000..e743ccbb
--- /dev/null
+++ b/tests/llm/fixtures/test_investigate/10_kube_controller_manager_down/issue_data.json
@@ -0,0 +1,31 @@
+{
+  "id": "KubeControllerManagerDown-467f7b1923c9cfcc-2024-11-29 06:30:39.202000+00:00",
+  "name": "KubeControllerManagerDown",
+  "source_type": "prometheus",
+  "source_instance_id": "http://localhost:9093",
+  "url": "http://robusta-kube-prometheus-st-prometheus.default:9090/graph?g0.expr=absent%28up%7Bjob%3D%22kube-controller-manager%22%7D+%3D%3D+1%29&g0.tab=1",
+  "raw": {
+    "status": "active",
+    "labels": {
+      "alertname": "KubeControllerManagerDown",
+      "prometheus": "default/robusta-kube-prometheus-st-prometheus",
+      "severity": "critical"
+    },
+    "annotations": {
+      "description": "KubeControllerManager has disappeared from Prometheus target discovery.",
+      "runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubecontrollermanagerdown",
+      "summary": "Target disappeared from Prometheus target discovery."
+    },
+    "startsAt": "2024-11-29 06:30:39.202000+00:00",
+    "endsAt": "2024-11-29 09:18:09.202000+00:00",
+    "generatorURL": "http://robusta-kube-prometheus-st-prometheus.default:9090/graph?g0.expr=absent%28up%7Bjob%3D%22kube-controller-manager%22%7D+%3D%3D+1%29&g0.tab=1",
+    "fingerprint": "467f7b1923c9cfcc",
+    "unique_id": "KubeControllerManagerDown-467f7b1923c9cfcc-2024-11-29 06:30:39.202000+00:00",
+    "duration": "2:47:30",
+    "name": "KubeControllerManagerDown",
+    "definition": "absent(up{job=\"kube-controller-manager\"} == 1)"
+  },
+  "presentation_status": null,
+  "presentation_key_metadata": "*Severity*: critical\n*Start Time*: 2024-11-29 06:30:39 UTC\n*Duration*: 2 hours",
+  "presentation_all_metadata": "*Labels:*\n• *alertname*: KubeControllerManagerDown\n• *prometheus*: default/robusta-kube-prometheus-st-prometheus\n• *severity*: critical\n*Annotations:*\n• *description*: KubeControllerManager has disappeared from Prometheus target discovery.\n• *runbook_url*: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubecontrollermanagerdown\n• *summary*: Target disappeared from Prometheus target discovery.\n"
+}
diff --git a/tests/llm/fixtures/test_investigate/10_kube_controller_manager_down/kubectl_get_all.txt b/tests/llm/fixtures/test_investigate/10_kube_controller_manager_down/kubectl_get_all.txt
new file mode 100644
index 00000000..8e9fb22a
--- /dev/null
+++ b/tests/llm/fixtures/test_investigate/10_kube_controller_manager_down/kubectl_get_all.txt
@@ -0,0 +1,6 @@
+{"toolset_name":"kubernetes/core","tool_name":"kubectl_get_all","match_params":{"kind":"node"}}
+stdout:
+NAME                 STATUS   ROLES           AGE   VERSION   INTERNAL-IP   EXTERNAL-IP   OS-IMAGE                         KERNEL-VERSION   CONTAINER-RUNTIME     LABELS
+kind-control-plane   Ready    control-plane   11d   v1.31.2   172.18.0.2    <none>        Debian GNU/Linux 12 (bookworm)   6.1.0-27-amd64   containerd://1.7.18   beta.kubernetes.io/arch=amd64,beta.kubernetes.io/os=linux,kubernetes.io/arch=amd64,kubernetes.io/hostname=kind-control-plane,kubernetes.io/os=linux,node-role.kubernetes.io/control-plane=
+
+stderr:
diff --git a/tests/llm/fixtures/test_investigate/10_kube_controller_manager_down/resource_instructions.json b/tests/llm/fixtures/test_investigate/10_kube_controller_manager_down/resource_instructions.json
new file mode 100644
index 00000000..41c749b6
--- /dev/null
+++ b/tests/llm/fixtures/test_investigate/10_kube_controller_manager_down/resource_instructions.json
@@ -0,0 +1,6 @@
+{
+  "instructions": [
+    "Check if the cluster is a managed cluster like EKS by fetching nodes and looking at their labels. If so, tell the user this is likely a known false positive in the kube-prometheus-stack alert because Prometheus can't scrape the scheduler which is managed by the cloud provider. On the other hand, if this is a self-managed Kubernetes, either the scheduler is really down (unlikely) or it is running but Prometheus can't scrape it."
+  ],
+  "documents": []
+}
diff --git a/tests/llm/fixtures/test_investigate/10_kube_controller_manager_down/test_case.yaml b/tests/llm/fixtures/test_investigate/10_kube_controller_manager_down/test_case.yaml
new file mode 100644
index 00000000..2b1e4ecc
--- /dev/null
+++ b/tests/llm/fixtures/test_investigate/10_kube_controller_manager_down/test_case.yaml
@@ -0,0 +1,3 @@
+expected_output:
+  - Prometheus can't scrape the control plane
+  - this is not an alert that requires your attention
diff --git a/tests/llm/fixtures/test_investigate/11_KubeDeploymentReplicasMismatch/investigate_request.json b/tests/llm/fixtures/test_investigate/11_KubeDeploymentReplicasMismatch/investigate_request.json
new file mode 100644
index 00000000..fae72113
--- /dev/null
+++ b/tests/llm/fixtures/test_investigate/11_KubeDeploymentReplicasMismatch/investigate_request.json
@@ -0,0 +1,22 @@
+{
+  "source": "prometheus",
+  "title": "KubeDeploymentReplicasMismatch",
+  "description": "",
+  "subject": {
+    "name": "robusta-kube-state-metrics-8667fd9775-kbsp7",
+    "namespace": "default",
+    "kind": "pod",
+    "node": null,
+    "container": "kube-state-metrics",
+    "cluster": null
+  },
+  "context": {
+    "issue_type": "KubeDeploymentReplicasMismatch",
+    "robusta_issue_id": "KubeDeploymentReplicasMismatch-412453fa9d1e6ca0-2024-11-29 08:39:57.669000+00:00",
+    "source": "prometheus"
+  },
+  "source_instance_id": "ApiRequest",
+  "include_tool_calls": true,
+  "include_tool_call_results": true,
+  "prompt_template": "builtin://generic_investigation.jinja2"
+}
diff --git a/tests/llm/fixtures/test_investigate/11_KubeDeploymentReplicasMismatch/issue_data.json b/tests/llm/fixtures/test_investigate/11_KubeDeploymentReplicasMismatch/issue_data.json
new file mode 100644
index 00000000..6e482060
--- /dev/null
+++ b/tests/llm/fixtures/test_investigate/11_KubeDeploymentReplicasMismatch/issue_data.json
@@ -0,0 +1,39 @@
+{
+  "id": "KubeDeploymentReplicasMismatch-412453fa9d1e6ca0-2024-11-29 08:39:57.669000+00:00",
+  "name": "KubeDeploymentReplicasMismatch",
+  "source_type": "prometheus",
+  "source_instance_id": "http://localhost:9093",
+  "url": "http://robusta-kube-prometheus-st-prometheus.default:9090/graph?g0.expr=%28kube_deployment_spec_replicas%7Bjob%3D%22kube-state-metrics%22%2Cnamespace%3D~%22.%2A%22%7D+%3E+kube_deployment_status_replicas_available%7Bjob%3D%22kube-state-metrics%22%2Cnamespace%3D~%22.%2A%22%7D%29+and+%28changes%28kube_deployment_status_replicas_updated%7Bjob%3D%22kube-state-metrics%22%2Cnamespace%3D~%22.%2A%22%7D%5B10m%5D%29+%3D%3D+0%29&g0.tab=1",
+  "raw": {
+    "status": "active",
+    "labels": {
+      "alertname": "KubeDeploymentReplicasMismatch",
+      "container": "kube-state-metrics",
+      "deployment": "payment-processing-worker",
+      "endpoint": "http",
+      "instance": "10.244.0.3:8080",
+      "job": "kube-state-metrics",
+      "namespace": "default",
+      "pod": "robusta-kube-state-metrics-8667fd9775-kbsp7",
+      "prometheus": "default/robusta-kube-prometheus-st-prometheus",
+      "service": "robusta-kube-state-metrics",
+      "severity": "warning"
+    },
+    "annotations": {
+      "description": "Deployment default/payment-processing-worker has not matched the expected number of replicas for longer than 15 minutes.",
+      "runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedeploymentreplicasmismatch",
+      "summary": "Deployment has not matched the expected number of replicas."
+    },
+    "startsAt": "2024-11-29 08:39:57.669000+00:00",
+    "endsAt": "2024-11-29 08:55:57.669000+00:00",
+    "generatorURL": "http://robusta-kube-prometheus-st-prometheus.default:9090/graph?g0.expr=%28kube_deployment_spec_replicas%7Bjob%3D%22kube-state-metrics%22%2Cnamespace%3D~%22.%2A%22%7D+%3E+kube_deployment_status_replicas_available%7Bjob%3D%22kube-state-metrics%22%2Cnamespace%3D~%22.%2A%22%7D%29+and+%28changes%28kube_deployment_status_replicas_updated%7Bjob%3D%22kube-state-metrics%22%2Cnamespace%3D~%22.%2A%22%7D%5B10m%5D%29+%3D%3D+0%29&g0.tab=1",
+    "fingerprint": "412453fa9d1e6ca0",
+    "unique_id": "KubeDeploymentReplicasMismatch-412453fa9d1e6ca0-2024-11-29 08:39:57.669000+00:00",
+    "duration": "0:16:00",
+    "name": "KubeDeploymentReplicasMismatch",
+    "definition": "(kube_deployment_spec_replicas{job=\"kube-state-metrics\",namespace=~\".*\"} > kube_deployment_status_replicas_available{job=\"kube-state-metrics\",namespace=~\".*\"}) and (changes(kube_deployment_status_replicas_updated{job=\"kube-state-metrics\",namespace=~\".*\"}[10m]) == 0)"
+  },
+  "presentation_status": null,
+  "presentation_key_metadata": "*Severity*: warning\n*Start Time*: 2024-11-29 08:39:57 UTC\n*Duration*: 16 minutes",
+  "presentation_all_metadata": "*Labels:*\n• *alertname*: KubeDeploymentReplicasMismatch\n• *container*: kube-state-metrics\n• *deployment*: payment-processing-worker\n• *endpoint*: http\n• *instance*: 10.244.0.3:8080\n• *job*: kube-state-metrics\n• *namespace*: default\n• *pod*: robusta-kube-state-metrics-8667fd9775-kbsp7\n• *prometheus*: default/robusta-kube-prometheus-st-prometheus\n• *service*: robusta-kube-state-metrics\n• *severity*: warning\n*Annotations:*\n• *description*: Deployment default/payment-processing-worker has not matched the expected number of replicas for longer than 15 minutes.\n• *runbook_url*: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedeploymentreplicasmismatch\n• *summary*: Deployment has not matched the expected number of replicas.\n"
+}
diff --git a/tests/llm/fixtures/test_investigate/11_KubeDeploymentReplicasMismatch/kubectl_describe.txt b/tests/llm/fixtures/test_investigate/11_KubeDeploymentReplicasMismatch/kubectl_describe.txt
new file mode 100644
index 00000000..636596b1
--- /dev/null
+++ b/tests/llm/fixtures/test_investigate/11_KubeDeploymentReplicasMismatch/kubectl_describe.txt
@@ -0,0 +1,42 @@
+{"toolset_name":"kubernetes/core","tool_name":"kubectl_describe","match_params":{"kind": "deployment", "name": "payment-processing-worker", "namespace": "default"}}
+stdout:
+Name:                   payment-processing-worker
+Namespace:              default
+CreationTimestamp:      Fri, 29 Nov 2024 09:24:29 +0100
+Labels:                 <none>
+Annotations:            deployment.kubernetes.io/revision: 1
+Selector:               app=payment-processing-worker
+Replicas:               1 desired | 1 updated | 1 total | 0 available | 1 unavailable
+StrategyType:           RollingUpdate
+MinReadySeconds:        0
+RollingUpdateStrategy:  25% max unavailable, 25% max surge
+Pod Template:
+  Labels:  app=payment-processing-worker
+  Containers:
+   payment-processing-container:
+    Image:      bash
+    Port:       <none>
+    Host Port:  <none>
+    Command:
+      /bin/sh
+    Args:
+      -c
+      if [[ -z "${DEPLOY_ENV}" ]]; then echo Environment variable DEPLOY_ENV is undefined ; else while true; do echo hello; sleep 10;done; fi
+    Environment:   <none>
+    Mounts:        <none>
+  Volumes:         <none>
+  Node-Selectors:  <none>
+  Tolerations:     <none>
+Conditions:
+  Type           Status  Reason
+  ----           ------  ------
+  Available      False   MinimumReplicasUnavailable
+  Progressing    False   ProgressDeadlineExceeded
+OldReplicaSets:  <none>
+NewReplicaSet:   payment-processing-worker-747ccfb9db (1/1 replicas created)
+Events:
+  Type    Reason             Age   From                   Message
+  ----    ------             ----  ----                   -------
+  Normal  ScalingReplicaSet  28m   deployment-controller  Scaled up replica set payment-processing-worker-747ccfb9db to 1
+
+stderr:
diff --git a/tests/llm/fixtures/test_investigate/11_KubeDeploymentReplicasMismatch/kubectl_describe_pod.txt b/tests/llm/fixtures/test_investigate/11_KubeDeploymentReplicasMismatch/kubectl_describe_pod.txt
new file mode 100644
index 00000000..7d62aad4
--- /dev/null
+++ b/tests/llm/fixtures/test_investigate/11_KubeDeploymentReplicasMismatch/kubectl_describe_pod.txt
@@ -0,0 +1,64 @@
+{"toolset_name":"kubernetes/core","tool_name":"kubectl_describe","match_params":{"kind":"pod","name":"payment-processing-worker-747ccfb9db-pd6wz","namespace":"default"}}
+stdout:
+Name:             payment-processing-worker-747ccfb9db-pd6wz
+Namespace:        default
+Priority:         0
+Service Account:  default
+Node:             kind-control-plane/172.18.0.2
+Start Time:       Fri, 29 Nov 2024 09:24:29 +0100
+Labels:           app=payment-processing-worker
+                  pod-template-hash=747ccfb9db
+Annotations:      <none>
+Status:           Running
+IP:               10.244.0.15
+IPs:
+  IP:           10.244.0.15
+Controlled By:  ReplicaSet/payment-processing-worker-747ccfb9db
+Containers:
+  payment-processing-container:
+    Container ID:  containerd://5714b7a193dd4bd5cfeede3e2cd37a53ba88ce5f292974d5aaeda1c1d0b8167f
+    Image:         bash
+    Image ID:      docker.io/library/bash@sha256:4bbfbe07eceeed5ab9136b37faf4f5cff3c28a339087ce068a76f2c1733054e8
+    Port:          <none>
+    Host Port:     <none>
+    Command:
+      /bin/sh
+    Args:
+      -c
+      if [[ -z "${DEPLOY_ENV}" ]]; then echo Environment variable DEPLOY_ENV is undefined ; else while true; do echo hello; sleep 10;done; fi
+    State:          Waiting
+      Reason:       CrashLoopBackOff
+    Last State:     Terminated
+      Reason:       Completed
+      Exit Code:    0
+      Started:      Fri, 29 Nov 2024 14:01:30 +0100
+      Finished:     Fri, 29 Nov 2024 14:01:30 +0100
+    Ready:          False
+    Restart Count:  59
+    Environment:    <none>
+    Mounts:
+      /var/run/secrets/kubernetes.io/serviceaccount from kube-api-access-lfjgv (ro)
+Conditions:
+  Type                        Status
+  PodReadyToStartContainers   True 
+  Initialized                 True 
+  Ready                       False 
+  ContainersReady             False 
+  PodScheduled                True 
+Volumes:
+  kube-api-access-lfjgv:
+    Type:                    Projected (a volume that contains injected data from multiple sources)
+    TokenExpirationSeconds:  3607
+    ConfigMapName:           kube-root-ca.crt
+    ConfigMapOptional:       <nil>
+    DownwardAPI:             true
+QoS Class:                   BestEffort
+Node-Selectors:              <none>
+Tolerations:                 node.kubernetes.io/not-ready:NoExecute op=Exists for 300s
+                             node.kubernetes.io/unreachable:NoExecute op=Exists for 300s
+Events:
+  Type     Reason   Age                     From     Message
+  ----     ------   ----                    ----     -------
+  Warning  BackOff  73s (x1289 over 4h41m)  kubelet  Back-off restarting failed container payment-processing-container in pod payment-processing-worker-747ccfb9db-pd6wz_default(b9cc2b01-9250-4360-941f-40e522628f6c)
+
+stderr:
diff --git a/tests/llm/fixtures/test_investigate/11_KubeDeploymentReplicasMismatch/kubectl_get_all.txt b/tests/llm/fixtures/test_investigate/11_KubeDeploymentReplicasMismatch/kubectl_get_all.txt
new file mode 100644
index 00000000..e806771a
--- /dev/null
+++ b/tests/llm/fixtures/test_investigate/11_KubeDeploymentReplicasMismatch/kubectl_get_all.txt
@@ -0,0 +1,54 @@
+{"toolset_name":"kubernetes/core","tool_name":"kubectl_get_all","match_params":{"kind":"pod"}}
+stdout:
+NAMESPACE            NAME                                                     READY   STATUS                       RESTARTS         AGE     IP            NODE                 NOMINATED NODE   READINESS GATES   LABELS
+default              alertmanager-robusta-kube-prometheus-st-alertmanager-0   2/2     Running                      16 (6h50m ago)   10d     10.244.0.14   kind-control-plane   <none>           <none>            alertmanager=robusta-kube-prometheus-st-alertmanager,app.kubernetes.io/instance=robusta-kube-prometheus-st-alertmanager,app.kubernetes.io/managed-by=prometheus-operator,app.kubernetes.io/name=alertmanager,app.kubernetes.io/version=0.26.0,apps.kubernetes.io/pod-index=0,controller-revision-hash=alertmanager-robusta-kube-prometheus-st-alertmanager-57cd7fb46f,statefulset.kubernetes.io/pod-name=alertmanager-robusta-kube-prometheus-st-alertmanager-0
+default              analytics-exporter-fast-76897854c-4m49l                  0/1     CrashLoopBackOff             59 (32s ago)     4h39m   10.244.0.37   kind-control-plane   <none>           <none>            app=analytics-exporter-fast,pod-template-hash=76897854c
+default              customer-orders-6c69574678-jl5wk                         1/2     CreateContainerConfigError   0                4h41m   10.244.0.19   kind-control-plane   <none>           <none>            app=customer-orders,pod-template-hash=6c69574678
+default              customer-orders-for-medium-7744d956fb-dd4hh              1/2     CreateContainerConfigError   2 (6h50m ago)    2d6h    10.244.0.7    kind-control-plane   <none>           <none>            app=customer-orders-for-medium,pod-template-hash=7744d956fb
+default              customer-relations-webapp-7c67c65579-kdf9r               0/1     ImagePullBackOff             0                4h41m   10.244.0.18   kind-control-plane   <none>           <none>            app=customer-relations,pod-template-hash=7c67c65579,visualize=true
+default              customer-relations-webapp-7c67c65579-l8rdf               0/1     ImagePullBackOff             0                4h41m   10.244.0.16   kind-control-plane   <none>           <none>            app=customer-relations,pod-template-hash=7c67c65579,visualize=true
+default              customer-relations-webapp-7c67c65579-m5b2s               0/1     ImagePullBackOff             0                4h41m   10.244.0.17   kind-control-plane   <none>           <none>            app=customer-relations,pod-template-hash=7c67c65579,visualize=true
+default              db-certs-authenticator-757f89d977-xjnbs                  0/1     CrashLoopBackOff             49 (49s ago)     4h39m   10.244.0.40   kind-control-plane   <none>           <none>            app=flask,pod-template-hash=757f89d977
+default              get-data-ldxkt                                           0/1     Error                        0                2d6h    <none>        kind-control-plane   <none>           <none>            batch.kubernetes.io/controller-uid=5315b81a-e300-47c0-8115-8e8e2316c910,batch.kubernetes.io/job-name=get-data,controller-uid=5315b81a-e300-47c0-8115-8e8e2316c910,job-name=get-data
+default              java-api-checker-frj2z                                   0/1     Error                        0                4h39m   10.244.0.39   kind-control-plane   <none>           <none>            batch.kubernetes.io/controller-uid=e5d16a17-76e1-43a0-82bc-897d284b3bf7,batch.kubernetes.io/job-name=java-api-checker,controller-uid=e5d16a17-76e1-43a0-82bc-897d284b3bf7,job-name=java-api-checker
+default              java-api-checker-svhxl                                   0/1     Error                        0                4h40m   10.244.0.35   kind-control-plane   <none>           <none>            batch.kubernetes.io/controller-uid=e5d16a17-76e1-43a0-82bc-897d284b3bf7,batch.kubernetes.io/job-name=java-api-checker,controller-uid=e5d16a17-76e1-43a0-82bc-897d284b3bf7,job-name=java-api-checker
+default              logging-agent                                            0/1     Init:CrashLoopBackOff        59 (3m44s ago)   4h41m   10.244.0.34   kind-control-plane   <none>           <none>            <none>
+default              my-http-app-deployment-75cdcff8b4-tgscj                  1/1     Running                      2 (6h50m ago)    2d5h    10.244.0.5    kind-control-plane   <none>           <none>            app=my-http-app,pod-template-hash=75cdcff8b4
+default              network-connector                                        1/1     Running                      0                4h39m   10.244.0.41   kind-control-plane   <none>           <none>            <none>
+default              payment-processing-worker-747ccfb9db-pd6wz               0/1     CrashLoopBackOff             59 (4m14s ago)   4h41m   10.244.0.15   kind-control-plane   <none>           <none>            app=payment-processing-worker,pod-template-hash=747ccfb9db
+default              prometheus-robusta-kube-prometheus-st-prometheus-0       2/2     Running                      16 (6h50m ago)   10d     10.244.0.13   kind-control-plane   <none>           <none>            app.kubernetes.io/instance=robusta-kube-prometheus-st-prometheus,app.kubernetes.io/managed-by=prometheus-operator,app.kubernetes.io/name=prometheus,app.kubernetes.io/version=2.48.1,apps.kubernetes.io/pod-index=0,controller-revision-hash=prometheus-robusta-kube-prometheus-st-prometheus-55d87c869b,operator.prometheus.io/name=robusta-kube-prometheus-st-prometheus,operator.prometheus.io/shard=0,prometheus=robusta-kube-prometheus-st-prometheus,statefulset.kubernetes.io/pod-name=prometheus-robusta-kube-prometheus-st-prometheus-0
+default              robusta-forwarder-5c5fdbbf57-vcm8t                       1/1     Running                      8 (6h50m ago)    10d     10.244.0.4    kind-control-plane   <none>           <none>            app=robusta-forwarder,pod-template-hash=5c5fdbbf57
+default              robusta-grafana-8588b8fb85-fcm7f                         3/3     Running                      24 (6h50m ago)   10d     10.244.0.9    kind-control-plane   <none>           <none>            app.kubernetes.io/instance=robusta,app.kubernetes.io/name=grafana,pod-template-hash=8588b8fb85
+default              robusta-holmes-78fcd58947-djt9g                          1/1     Running                      10 (6h50m ago)   7d5h    10.244.0.6    kind-control-plane   <none>           <none>            app=holmes,pod-template-hash=78fcd58947
+default              robusta-kube-prometheus-st-operator-6885c8f675-56dbg     1/1     Running                      16 (6h49m ago)   10d     10.244.0.12   kind-control-plane   <none>           <none>            app.kubernetes.io/instance=robusta,app.kubernetes.io/managed-by=Helm,app.kubernetes.io/part-of=kube-prometheus-stack,app.kubernetes.io/version=55.7.0,app=kube-prometheus-stack-operator,chart=kube-prometheus-stack-55.7.0,heritage=Helm,pod-template-hash=6885c8f675,release=robusta
+default              robusta-kube-state-metrics-8667fd9775-kbsp7              1/1     Running                      15 (6h50m ago)   10d     10.244.0.3    kind-control-plane   <none>           <none>            app.kubernetes.io/component=metrics,app.kubernetes.io/instance=robusta,app.kubernetes.io/managed-by=Helm,app.kubernetes.io/name=kube-state-metrics,app.kubernetes.io/part-of=kube-state-metrics,app.kubernetes.io/version=2.10.1,helm.sh/chart=kube-state-metrics-5.15.3,pod-template-hash=8667fd9775,release=robusta
+default              robusta-prometheus-node-exporter-g5kxh                   1/1     Running                      8 (6h50m ago)    10d     172.18.0.2    kind-control-plane   <none>           <none>            app.kubernetes.io/component=metrics,app.kubernetes.io/instance=robusta,app.kubernetes.io/managed-by=Helm,app.kubernetes.io/name=prometheus-node-exporter,app.kubernetes.io/part-of=prometheus-node-exporter,app.kubernetes.io/version=1.7.0,controller-revision-hash=7b4f94f6bf,helm.sh/chart=prometheus-node-exporter-4.24.0,jobLabel=node-exporter,pod-template-generation=1,release=robusta
+default              robusta-runner-746d848db9-f8ns9                          1/1     Running                      17 (6h50m ago)   10d     10.244.0.2    kind-control-plane   <none>           <none>            app=robusta-runner,pod-template-hash=746d848db9,robustaComponent=runner
+default              search-engine-service                                    0/1     Running                      0                4h40m   10.244.0.36   kind-control-plane   <none>           <none>            <none>
+default              user-profile-import                                      0/1     Pending                      0                4h40m   <none>        <none>               <none>           <none>            <none>
+default              user-profile-resources-659d4dd659-q7xnw                  0/1     Pending                      0                4h40m   <none>        <none>               <none>           <none>            app=user-profile-resources,pod-template-hash=659d4dd659
+kube-system          coredns-7c65d6cfc9-c24ls                                 1/1     Running                      9 (6h50m ago)    11d     10.244.0.10   kind-control-plane   <none>           <none>            k8s-app=kube-dns,pod-template-hash=7c65d6cfc9
+kube-system          coredns-7c65d6cfc9-cphtd                                 1/1     Running                      9 (6h50m ago)    11d     10.244.0.11   kind-control-plane   <none>           <none>            k8s-app=kube-dns,pod-template-hash=7c65d6cfc9
+kube-system          etcd-kind-control-plane                                  1/1     Running                      9 (6h50m ago)    11d     172.18.0.2    kind-control-plane   <none>           <none>            component=etcd,tier=control-plane
+kube-system          kindnet-wns4v                                            1/1     Running                      9 (6h50m ago)    11d     172.18.0.2    kind-control-plane   <none>           <none>            app=kindnet,controller-revision-hash=f87fcbc4c,k8s-app=kindnet,pod-template-generation=1,tier=node
+kube-system          kube-apiserver-kind-control-plane                        1/1     Running                      9 (6h50m ago)    11d     172.18.0.2    kind-control-plane   <none>           <none>            component=kube-apiserver,tier=control-plane
+kube-system          kube-controller-manager-kind-control-plane               1/1     Running                      9 (6h50m ago)    11d     172.18.0.2    kind-control-plane   <none>           <none>            component=kube-controller-manager,tier=control-plane
+kube-system          kube-proxy-fhk5k                                         1/1     Running                      9 (6h50m ago)    11d     172.18.0.2    kind-control-plane   <none>           <none>            controller-revision-hash=77987969cc,k8s-app=kube-proxy,pod-template-generation=1
+kube-system          kube-scheduler-kind-control-plane                        1/1     Running                      9 (6h50m ago)    11d     172.18.0.2    kind-control-plane   <none>           <none>            component=kube-scheduler,tier=control-plane
+local-path-storage   local-path-provisioner-57c5987fd4-gdjv2                  1/1     Running                      18 (6h49m ago)   11d     10.244.0.8    kind-control-plane   <none>           <none>            app=local-path-provisioner,pod-template-hash=57c5987fd4
+sock-shop            carts-db-676c6b5865-5rsfq                                1/1     Running                      0                4h41m   10.244.0.21   kind-control-plane   <none>           <none>            name=carts-db,pod-template-hash=676c6b5865
+sock-shop            carts-f54496f6d-kl2dj                                    1/1     Running                      0                4h41m   10.244.0.20   kind-control-plane   <none>           <none>            name=carts,pod-template-hash=f54496f6d
+sock-shop            catalogue-db-c948fd796-6mgbm                             1/1     Running                      0                4h41m   10.244.0.23   kind-control-plane   <none>           <none>            name=catalogue-db,pod-template-hash=c948fd796
+sock-shop            catalogue-f7687cb4-crpnn                                 1/1     Running                      0                4h41m   10.244.0.22   kind-control-plane   <none>           <none>            name=catalogue,pod-template-hash=f7687cb4
+sock-shop            front-end-6bc5876d74-qkqwc                               1/1     Running                      0                4h41m   10.244.0.24   kind-control-plane   <none>           <none>            name=front-end,pod-template-hash=6bc5876d74
+sock-shop            orders-697cb4957d-t6slr                                  1/1     Running                      0                4h41m   10.244.0.25   kind-control-plane   <none>           <none>            name=orders,pod-template-hash=697cb4957d
+sock-shop            orders-db-658fc79675-5sdzp                               1/1     Running                      0                4h41m   10.244.0.26   kind-control-plane   <none>           <none>            name=orders-db,pod-template-hash=658fc79675
+sock-shop            payment-84bbbfd97f-4stdr                                 1/1     Running                      0                4h41m   10.244.0.27   kind-control-plane   <none>           <none>            name=payment,pod-template-hash=84bbbfd97f
+sock-shop            queue-master-566bfb8644-gjmh6                            1/1     Running                      0                4h41m   10.244.0.28   kind-control-plane   <none>           <none>            name=queue-master,pod-template-hash=566bfb8644
+sock-shop            rabbitmq-556cb847-4c26z                                  2/2     Running                      0                4h41m   10.244.0.29   kind-control-plane   <none>           <none>            name=rabbitmq,pod-template-hash=556cb847
+sock-shop            session-db-59cc4b855f-7t59g                              1/1     Running                      0                4h41m   10.244.0.30   kind-control-plane   <none>           <none>            name=session-db,pod-template-hash=59cc4b855f
+sock-shop            shipping-c4d77b86d-rzlwp                                 1/1     Running                      0                4h41m   10.244.0.31   kind-control-plane   <none>           <none>            name=shipping,pod-template-hash=c4d77b86d
+sock-shop            user-574f64957c-hxgd4                                    1/1     Running                      0                4h41m   10.244.0.32   kind-control-plane   <none>           <none>            name=user,pod-template-hash=574f64957c
+sock-shop            user-db-7f98f68489-tb2dx                                 1/1     Running                      0                4h41m   10.244.0.33   kind-control-plane   <none>           <none>            name=user-db,pod-template-hash=7f98f68489
+
+stderr:
diff --git a/tests/llm/fixtures/test_investigate/11_KubeDeploymentReplicasMismatch/kubectl_get_deployment.txt b/tests/llm/fixtures/test_investigate/11_KubeDeploymentReplicasMismatch/kubectl_get_deployment.txt
new file mode 100644
index 00000000..a9f46724
--- /dev/null
+++ b/tests/llm/fixtures/test_investigate/11_KubeDeploymentReplicasMismatch/kubectl_get_deployment.txt
@@ -0,0 +1,6 @@
+{"toolset_name":"kubernetes/core","tool_name":"kubectl_get","match_params":{"kind": "deployment", "name": "payment-processing-worker", "namespace": "default"}}
+stdout:
+NAME                        READY   UP-TO-DATE   AVAILABLE   AGE   CONTAINERS                     IMAGES   SELECTOR                        LABELS
+payment-processing-worker   0/1     1            0           28m   payment-processing-container   bash     app=payment-processing-worker   <none>
+
+stderr:
diff --git a/tests/llm/fixtures/test_investigate/11_KubeDeploymentReplicasMismatch/kubectl_get_pod.txt b/tests/llm/fixtures/test_investigate/11_KubeDeploymentReplicasMismatch/kubectl_get_pod.txt
new file mode 100644
index 00000000..ee0c3ace
--- /dev/null
+++ b/tests/llm/fixtures/test_investigate/11_KubeDeploymentReplicasMismatch/kubectl_get_pod.txt
@@ -0,0 +1,6 @@
+{"toolset_name":"kubernetes/core","tool_name":"kubectl_get","match_params":{"kind": "pod", "name": "robusta-kube-state-metrics-8667fd9775-kbsp7", "namespace": "default"}}
+stdout:
+NAME                                          READY   STATUS    RESTARTS        AGE   IP           NODE                 NOMINATED NODE   READINESS GATES   LABELS
+robusta-kube-state-metrics-8667fd9775-kbsp7   1/1     Running   15 (157m ago)   10d   10.244.0.3   kind-control-plane   <none>           <none>            app.kubernetes.io/component=metrics,app.kubernetes.io/instance=robusta,app.kubernetes.io/managed-by=Helm,app.kubernetes.io/name=kube-state-metrics,app.kubernetes.io/part-of=kube-state-metrics,app.kubernetes.io/version=2.10.1,helm.sh/chart=kube-state-metrics-5.15.3,pod-template-hash=8667fd9775,release=robusta
+
+stderr:
diff --git a/tests/llm/fixtures/test_investigate/11_KubeDeploymentReplicasMismatch/kubectl_logs.txt b/tests/llm/fixtures/test_investigate/11_KubeDeploymentReplicasMismatch/kubectl_logs.txt
new file mode 100644
index 00000000..772dee4e
--- /dev/null
+++ b/tests/llm/fixtures/test_investigate/11_KubeDeploymentReplicasMismatch/kubectl_logs.txt
@@ -0,0 +1,5 @@
+{"toolset_name":"kubernetes/core","tool_name":"kubectl_logs","match_params":{"name":"payment-processing-worker-747ccfb9db-pd6wz","namespace":"default"}}
+stdout:
+Environment variable DEPLOY_ENV is undefined
+
+stderr:
diff --git a/tests/llm/fixtures/test_investigate/11_KubeDeploymentReplicasMismatch/resource_instructions.json b/tests/llm/fixtures/test_investigate/11_KubeDeploymentReplicasMismatch/resource_instructions.json
new file mode 100644
index 00000000..9e26dfee
--- /dev/null
+++ b/tests/llm/fixtures/test_investigate/11_KubeDeploymentReplicasMismatch/resource_instructions.json
@@ -0,0 +1 @@
+{}
\ No newline at end of file
diff --git a/tests/llm/fixtures/test_investigate/11_KubeDeploymentReplicasMismatch/test_case.yaml b/tests/llm/fixtures/test_investigate/11_KubeDeploymentReplicasMismatch/test_case.yaml
new file mode 100644
index 00000000..3cab0fee
--- /dev/null
+++ b/tests/llm/fixtures/test_investigate/11_KubeDeploymentReplicasMismatch/test_case.yaml
@@ -0,0 +1,5 @@
+expected_output:
+  - The `payment-processing-worker` deployment has not matched the expected number of replicas for over 15 minutes.
+  - The `payment-processing-worker` deployment is configured with 1 desired replica, but currently has 0 available replicas.
+  - "The deployment's condition indicates `Available: False` due to `MinimumReplicasUnavailable` and `Progressing: False` due to `ProgressDeadlineExceeded`"
+  - The pod `robusta-kube-state-metrics-8667fd9775-kbsp7` is running and has restarted 15 times over the past 10 days, but this is unrelated to the `payment-processing-worker` deployment.
diff --git a/tests/llm/fixtures/test_investigate/12_KubePodCrashLooping/investigate_request.json b/tests/llm/fixtures/test_investigate/12_KubePodCrashLooping/investigate_request.json
new file mode 100644
index 00000000..aaf669f4
--- /dev/null
+++ b/tests/llm/fixtures/test_investigate/12_KubePodCrashLooping/investigate_request.json
@@ -0,0 +1,22 @@
+{
+  "source": "prometheus",
+  "title": "KubePodCrashLooping",
+  "description": "",
+  "subject": {
+    "name": "payment-processing-worker-747ccfb9db-pd6wz",
+    "namespace": "default",
+    "kind": "pod",
+    "node": null,
+    "container": "payment-processing-container",
+    "cluster": null
+  },
+  "context": {
+    "issue_type": "KubePodCrashLooping",
+    "robusta_issue_id": "KubePodCrashLooping-a38bd979bba359db-2024-11-29 08:40:57.669000+00:00",
+    "source": "prometheus"
+  },
+  "source_instance_id": "ApiRequest",
+  "include_tool_calls": true,
+  "include_tool_call_results": true,
+  "prompt_template": "builtin://generic_investigation.jinja2"
+}
diff --git a/tests/llm/fixtures/test_investigate/12_KubePodCrashLooping/issue_data.json b/tests/llm/fixtures/test_investigate/12_KubePodCrashLooping/issue_data.json
new file mode 100644
index 00000000..75189dfb
--- /dev/null
+++ b/tests/llm/fixtures/test_investigate/12_KubePodCrashLooping/issue_data.json
@@ -0,0 +1,40 @@
+{
+  "id": "KubePodCrashLooping-a38bd979bba359db-2024-11-29 08:40:57.669000+00:00",
+  "name": "KubePodCrashLooping",
+  "source_type": "prometheus",
+  "source_instance_id": "http://localhost:9093",
+  "url": "http://robusta-kube-prometheus-st-prometheus.default:9090/graph?g0.expr=max_over_time%28kube_pod_container_status_waiting_reason%7Bjob%3D%22kube-state-metrics%22%2Cnamespace%3D~%22.%2A%22%2Creason%3D%22CrashLoopBackOff%22%7D%5B5m%5D%29+%3E%3D+1&g0.tab=1",
+  "raw": {
+    "status": "active",
+    "labels": {
+      "alertname": "KubePodCrashLooping",
+      "container": "payment-processing-container",
+      "endpoint": "http",
+      "instance": "10.244.0.3:8080",
+      "job": "kube-state-metrics",
+      "namespace": "default",
+      "pod": "payment-processing-worker-747ccfb9db-pd6wz",
+      "prometheus": "default/robusta-kube-prometheus-st-prometheus",
+      "reason": "CrashLoopBackOff",
+      "service": "robusta-kube-state-metrics",
+      "severity": "warning",
+      "uid": "b9cc2b01-9250-4360-941f-40e522628f6c"
+    },
+    "annotations": {
+      "description": "Pod default/payment-processing-worker-747ccfb9db-pd6wz (payment-processing-container) is in waiting state (reason: \"CrashLoopBackOff\").",
+      "runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepodcrashlooping",
+      "summary": "Pod is crash looping."
+    },
+    "startsAt": "2024-11-29 08:40:57.669000+00:00",
+    "endsAt": "2024-11-29 09:16:27.669000+00:00",
+    "generatorURL": "http://robusta-kube-prometheus-st-prometheus.default:9090/graph?g0.expr=max_over_time%28kube_pod_container_status_waiting_reason%7Bjob%3D%22kube-state-metrics%22%2Cnamespace%3D~%22.%2A%22%2Creason%3D%22CrashLoopBackOff%22%7D%5B5m%5D%29+%3E%3D+1&g0.tab=1",
+    "fingerprint": "a38bd979bba359db",
+    "unique_id": "KubePodCrashLooping-a38bd979bba359db-2024-11-29 08:40:57.669000+00:00",
+    "duration": "0:35:30",
+    "name": "KubePodCrashLooping",
+    "definition": "max_over_time(kube_pod_container_status_waiting_reason{job=\"kube-state-metrics\",namespace=~\".*\",reason=\"CrashLoopBackOff\"}[5m]) >= 1"
+  },
+  "presentation_status": null,
+  "presentation_key_metadata": "*Severity*: warning\n*Start Time*: 2024-11-29 08:40:57 UTC\n*Duration*: 35 minutes",
+  "presentation_all_metadata": "*Labels:*\n• *alertname*: KubePodCrashLooping\n• *container*: payment-processing-container\n• *endpoint*: http\n• *instance*: 10.244.0.3:8080\n• *job*: kube-state-metrics\n• *namespace*: default\n• *pod*: payment-processing-worker-747ccfb9db-pd6wz\n• *prometheus*: default/robusta-kube-prometheus-st-prometheus\n• *reason*: CrashLoopBackOff\n• *service*: robusta-kube-state-metrics\n• *severity*: warning\n• *uid*: b9cc2b01-9250-4360-941f-40e522628f6c\n*Annotations:*\n• *description*: Pod default/payment-processing-worker-747ccfb9db-pd6wz (payment-processing-container) is in waiting state (reason: \"CrashLoopBackOff\").\n• *runbook_url*: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepodcrashlooping\n• *summary*: Pod is crash looping.\n"
+}
diff --git a/tests/llm/fixtures/test_investigate/12_KubePodCrashLooping/kubectl_describe.txt b/tests/llm/fixtures/test_investigate/12_KubePodCrashLooping/kubectl_describe.txt
new file mode 100644
index 00000000..a884efec
--- /dev/null
+++ b/tests/llm/fixtures/test_investigate/12_KubePodCrashLooping/kubectl_describe.txt
@@ -0,0 +1,72 @@
+{"toolset_name":"kubernetes/core","tool_name":"kubectl_describe","match_params":{"kind": "pod", "name": "payment-processing-worker-747ccfb9db-pd6wz", "namespace": "default"}}
+stdout:
+Name:             payment-processing-worker-747ccfb9db-pd6wz
+Namespace:        default
+Priority:         0
+Service Account:  default
+Node:             kind-control-plane/172.18.0.2
+Start Time:       Fri, 29 Nov 2024 09:24:29 +0100
+Labels:           app=payment-processing-worker
+                  pod-template-hash=747ccfb9db
+Annotations:      <none>
+Status:           Running
+IP:               10.244.0.15
+IPs:
+  IP:           10.244.0.15
+Controlled By:  ReplicaSet/payment-processing-worker-747ccfb9db
+Containers:
+  payment-processing-container:
+    Container ID:  containerd://fc89df0a97d127b9d9af7e5fa222d79655c679caadb798f92598825cf7b9b6f1
+    Image:         bash
+    Image ID:      docker.io/library/bash@sha256:4bbfbe07eceeed5ab9136b37faf4f5cff3c28a339087ce068a76f2c1733054e8
+    Port:          <none>
+    Host Port:     <none>
+    Command:
+      /bin/sh
+    Args:
+      -c
+      if [[ -z "${DEPLOY_ENV}" ]]; then echo Environment variable DEPLOY_ENV is undefined ; else while true; do echo hello; sleep 10;done; fi
+    State:          Waiting
+      Reason:       CrashLoopBackOff
+    Last State:     Terminated
+      Reason:       Completed
+      Exit Code:    0
+      Started:      Fri, 29 Nov 2024 10:10:59 +0100
+      Finished:     Fri, 29 Nov 2024 10:10:59 +0100
+    Ready:          False
+    Restart Count:  14
+    Environment:    <none>
+    Mounts:
+      /var/run/secrets/kubernetes.io/serviceaccount from kube-api-access-lfjgv (ro)
+Conditions:
+  Type                        Status
+  PodReadyToStartContainers   True
+  Initialized                 True
+  Ready                       False
+  ContainersReady             False
+  PodScheduled                True
+Volumes:
+  kube-api-access-lfjgv:
+    Type:                    Projected (a volume that contains injected data from multiple sources)
+    TokenExpirationSeconds:  3607
+    ConfigMapName:           kube-root-ca.crt
+    ConfigMapOptional:       <nil>
+    DownwardAPI:             true
+QoS Class:                   BestEffort
+Node-Selectors:              <none>
+Tolerations:                 node.kubernetes.io/not-ready:NoExecute op=Exists for 300s
+                             node.kubernetes.io/unreachable:NoExecute op=Exists for 300s
+Events:
+  Type     Reason     Age                    From               Message
+  ----     ------     ----                   ----               -------
+  Normal   Scheduled  48m                    default-scheduler  Successfully assigned default/payment-processing-worker-747ccfb9db-pd6wz to kind-control-plane
+  Normal   Pulled     48m                    kubelet            Successfully pulled image "bash" in 1.079s (1.079s including waiting). Image size: 6335174 bytes.
+  Normal   Pulled     48m                    kubelet            Successfully pulled image "bash" in 827ms (5.062s including waiting). Image size: 6335174 bytes.
+  Normal   Pulled     48m                    kubelet            Successfully pulled image "bash" in 822ms (822ms including waiting). Image size: 6335174 bytes.
+  Normal   Created    47m (x4 over 48m)      kubelet            Created container payment-processing-container
+  Normal   Started    47m (x4 over 48m)      kubelet            Started container payment-processing-container
+  Normal   Pulled     47m                    kubelet            Successfully pulled image "bash" in 805ms (805ms including waiting). Image size: 6335174 bytes.
+  Normal   Pulling    46m (x5 over 48m)      kubelet            Pulling image "bash"
+  Warning  BackOff    3m30s (x206 over 48m)  kubelet            Back-off restarting failed container payment-processing-container in pod payment-processing-worker-747ccfb9db-pd6wz_default(b9cc2b01-9250-4360-941f-40e522628f6c)
+
+stderr:
diff --git a/tests/llm/fixtures/test_investigate/12_KubePodCrashLooping/kubectl_logs.txt b/tests/llm/fixtures/test_investigate/12_KubePodCrashLooping/kubectl_logs.txt
new file mode 100644
index 00000000..626ee21f
--- /dev/null
+++ b/tests/llm/fixtures/test_investigate/12_KubePodCrashLooping/kubectl_logs.txt
@@ -0,0 +1,5 @@
+{"toolset_name":"kubernetes/core","tool_name":"kubectl_logs","match_params":{"name": "payment-processing-worker-747ccfb9db-pd6wz", "namespace": "default"}}
+stdout:
+Environment variable DEPLOY_ENV is undefined
+
+stderr:
diff --git a/tests/llm/fixtures/test_investigate/12_KubePodCrashLooping/kubectl_previous_logs.txt b/tests/llm/fixtures/test_investigate/12_KubePodCrashLooping/kubectl_previous_logs.txt
new file mode 100644
index 00000000..49f5f32d
--- /dev/null
+++ b/tests/llm/fixtures/test_investigate/12_KubePodCrashLooping/kubectl_previous_logs.txt
@@ -0,0 +1,5 @@
+{"toolset_name":"kubernetes/core","tool_name":"kubectl_previous_logs","match_params":{"name": "payment-processing-worker-747ccfb9db-pd6wz", "namespace": "default"}}
+stdout:
+Environment variable DEPLOY_ENV is undefined
+
+stderr:
diff --git a/tests/llm/fixtures/test_investigate/12_KubePodCrashLooping/resource_instructions.json b/tests/llm/fixtures/test_investigate/12_KubePodCrashLooping/resource_instructions.json
new file mode 100644
index 00000000..9e26dfee
--- /dev/null
+++ b/tests/llm/fixtures/test_investigate/12_KubePodCrashLooping/resource_instructions.json
@@ -0,0 +1 @@
+{}
\ No newline at end of file
diff --git a/tests/llm/fixtures/test_investigate/12_KubePodCrashLooping/test_case.yaml b/tests/llm/fixtures/test_investigate/12_KubePodCrashLooping/test_case.yaml
new file mode 100644
index 00000000..04ad5d3e
--- /dev/null
+++ b/tests/llm/fixtures/test_investigate/12_KubePodCrashLooping/test_case.yaml
@@ -0,0 +1,3 @@
+expected_output:
+  - The DEPLOY_ENV environment variable is missing
+generate_mocks: True
diff --git a/tests/llm/fixtures/test_investigate/13_KubePodNotReady/investigate_request.json b/tests/llm/fixtures/test_investigate/13_KubePodNotReady/investigate_request.json
new file mode 100644
index 00000000..6fc4434c
--- /dev/null
+++ b/tests/llm/fixtures/test_investigate/13_KubePodNotReady/investigate_request.json
@@ -0,0 +1,22 @@
+{
+  "source": "prometheus",
+  "title": "KubePodNotReady",
+  "description": "",
+  "subject": {
+    "name": "user-profile-import",
+    "namespace": "default",
+    "kind": "pod",
+    "node": null,
+    "container": null,
+    "cluster": null
+  },
+  "context": {
+    "issue_type": "KubePodNotReady",
+    "robusta_issue_id": "KubePodNotReady-02d9413092785cdd-2024-11-29 08:40:27.669000+00:00",
+    "source": "prometheus"
+  },
+  "source_instance_id": "ApiRequest",
+  "include_tool_calls": true,
+  "include_tool_call_results": true,
+  "prompt_template": "builtin://generic_investigation.jinja2"
+}
diff --git a/tests/llm/fixtures/test_investigate/13_KubePodNotReady/issue_data.json b/tests/llm/fixtures/test_investigate/13_KubePodNotReady/issue_data.json
new file mode 100644
index 00000000..d321db2a
--- /dev/null
+++ b/tests/llm/fixtures/test_investigate/13_KubePodNotReady/issue_data.json
@@ -0,0 +1,33 @@
+{
+  "id": "KubePodNotReady-02d9413092785cdd-2024-11-29 08:40:27.669000+00:00",
+  "name": "KubePodNotReady",
+  "source_type": "prometheus",
+  "source_instance_id": "http://localhost:9093",
+  "url": "http://robusta-kube-prometheus-st-prometheus.default:9090/graph?g0.expr=sum+by+%28namespace%2C+pod%2C+cluster%29+%28max+by+%28namespace%2C+pod%2C+cluster%29+%28kube_pod_status_phase%7Bjob%3D%22kube-state-metrics%22%2Cnamespace%3D~%22.%2A%22%2Cphase%3D~%22Pending%7CUnknown%7CFailed%22%7D%29+%2A+on+%28namespace%2C+pod%2C+cluster%29+group_left+%28owner_kind%29+topk+by+%28namespace%2C+pod%2C+cluster%29+%281%2C+max+by+%28namespace%2C+pod%2C+owner_kind%2C+cluster%29+%28kube_pod_owner%7Bowner_kind%21%3D%22Job%22%7D%29%29%29+%3E+0&g0.tab=1",
+  "raw": {
+    "status": "active",
+    "labels": {
+      "alertname": "KubePodNotReady",
+      "namespace": "default",
+      "pod": "user-profile-import",
+      "prometheus": "default/robusta-kube-prometheus-st-prometheus",
+      "severity": "warning"
+    },
+    "annotations": {
+      "description": "Pod default/user-profile-import has been in a non-ready state for longer than 15 minutes.",
+      "runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepodnotready",
+      "summary": "Pod has been in a non-ready state for more than 15 minutes."
+    },
+    "startsAt": "2024-11-29 08:40:27.669000+00:00",
+    "endsAt": "2024-11-29 09:17:27.669000+00:00",
+    "generatorURL": "http://robusta-kube-prometheus-st-prometheus.default:9090/graph?g0.expr=sum+by+%28namespace%2C+pod%2C+cluster%29+%28max+by+%28namespace%2C+pod%2C+cluster%29+%28kube_pod_status_phase%7Bjob%3D%22kube-state-metrics%22%2Cnamespace%3D~%22.%2A%22%2Cphase%3D~%22Pending%7CUnknown%7CFailed%22%7D%29+%2A+on+%28namespace%2C+pod%2C+cluster%29+group_left+%28owner_kind%29+topk+by+%28namespace%2C+pod%2C+cluster%29+%281%2C+max+by+%28namespace%2C+pod%2C+owner_kind%2C+cluster%29+%28kube_pod_owner%7Bowner_kind%21%3D%22Job%22%7D%29%29%29+%3E+0&g0.tab=1",
+    "fingerprint": "02d9413092785cdd",
+    "unique_id": "KubePodNotReady-02d9413092785cdd-2024-11-29 08:40:27.669000+00:00",
+    "duration": "0:37:00",
+    "name": "KubePodNotReady",
+    "definition": "sum by (namespace, pod, cluster) (max by (namespace, pod, cluster) (kube_pod_status_phase{job=\"kube-state-metrics\",namespace=~\".*\",phase=~\"Pending|Unknown|Failed\"}) * on (namespace, pod, cluster) group_left (owner_kind) topk by (namespace, pod, cluster) (1, max by (namespace, pod, owner_kind, cluster) (kube_pod_owner{owner_kind!=\"Job\"}))) > 0"
+  },
+  "presentation_status": null,
+  "presentation_key_metadata": "*Severity*: warning\n*Start Time*: 2024-11-29 08:40:27 UTC\n*Duration*: 37 minutes",
+  "presentation_all_metadata": "*Labels:*\n• *alertname*: KubePodNotReady\n• *namespace*: default\n• *pod*: user-profile-import\n• *prometheus*: default/robusta-kube-prometheus-st-prometheus\n• *severity*: warning\n*Annotations:*\n• *description*: Pod default/user-profile-import has been in a non-ready state for longer than 15 minutes.\n• *runbook_url*: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepodnotready\n• *summary*: Pod has been in a non-ready state for more than 15 minutes.\n"
+}
diff --git a/tests/llm/fixtures/test_investigate/13_KubePodNotReady/kubectl_describe.txt b/tests/llm/fixtures/test_investigate/13_KubePodNotReady/kubectl_describe.txt
new file mode 100644
index 00000000..65296093
--- /dev/null
+++ b/tests/llm/fixtures/test_investigate/13_KubePodNotReady/kubectl_describe.txt
@@ -0,0 +1,41 @@
+{"toolset_name":"kubernetes/core","tool_name":"kubectl_describe","match_params":{"kind": "pod", "name": "user-profile-import", "namespace": "default"}}
+stdout:
+Name:             user-profile-import
+Namespace:        default
+Priority:         0
+Service Account:  default
+Node:             <none>
+Labels:           <none>
+Annotations:      <none>
+Status:           Pending
+IP:
+IPs:              <none>
+Containers:
+  nginx:
+    Image:        nginx
+    Port:         <none>
+    Host Port:    <none>
+    Environment:  <none>
+    Mounts:
+      /var/run/secrets/kubernetes.io/serviceaccount from kube-api-access-47fsg (ro)
+Conditions:
+  Type           Status
+  PodScheduled   False
+Volumes:
+  kube-api-access-47fsg:
+    Type:                    Projected (a volume that contains injected data from multiple sources)
+    TokenExpirationSeconds:  3607
+    ConfigMapName:           kube-root-ca.crt
+    ConfigMapOptional:       <nil>
+    DownwardAPI:             true
+QoS Class:                   BestEffort
+Node-Selectors:              label=someLabel
+Tolerations:                 node.kubernetes.io/not-ready:NoExecute op=Exists for 300s
+                             node.kubernetes.io/unreachable:NoExecute op=Exists for 300s
+Events:
+  Type     Reason            Age                From               Message
+  ----     ------            ----               ----               -------
+  Warning  FailedScheduling  48m                default-scheduler  0/1 nodes are available: 1 node(s) didn't match Pod's node affinity/selector. preemption: 0/1 nodes are available: 1 Preemption is not helpful for scheduling.
+  Warning  FailedScheduling  33m (x3 over 43m)  default-scheduler  0/1 nodes are available: 1 node(s) didn't match Pod's node affinity/selector. preemption: 0/1 nodes are available: 1 Preemption is not helpful for scheduling.
+
+stderr:
diff --git a/tests/llm/fixtures/test_investigate/13_KubePodNotReady/kubectl_events.txt b/tests/llm/fixtures/test_investigate/13_KubePodNotReady/kubectl_events.txt
new file mode 100644
index 00000000..014bd45f
--- /dev/null
+++ b/tests/llm/fixtures/test_investigate/13_KubePodNotReady/kubectl_events.txt
@@ -0,0 +1,7 @@
+{"toolset_name":"kubernetes/live-metrics","tool_name":"kubectl_events","match_params":{"resource_type": "pod", "pod_name": "user-profile-import", "namespace": "default"}}
+stdout:
+LAST SEEN           TYPE      REASON             OBJECT                    MESSAGE
+48m                 Warning   FailedScheduling   Pod/user-profile-import   0/1 nodes are available: 1 node(s) didn't match Pod's node affinity/selector. preemption: 0/1 nodes are available: 1 Preemption is not helpful for scheduling.
+33m (x3 over 43m)   Warning   FailedScheduling   Pod/user-profile-import   0/1 nodes are available: 1 node(s) didn't match Pod's node affinity/selector. preemption: 0/1 nodes are available: 1 Preemption is not helpful for scheduling.
+
+stderr:
diff --git a/tests/llm/fixtures/test_investigate/13_KubePodNotReady/kubectl_logs.txt b/tests/llm/fixtures/test_investigate/13_KubePodNotReady/kubectl_logs.txt
new file mode 100644
index 00000000..c31a05e3
--- /dev/null
+++ b/tests/llm/fixtures/test_investigate/13_KubePodNotReady/kubectl_logs.txt
@@ -0,0 +1,4 @@
+{"toolset_name":"kubernetes/core","tool_name":"kubectl_logs","match_params":{"name": "user-profile-import", "namespace": "default"}}
+stdout:
+
+stderr:
diff --git a/tests/llm/fixtures/test_investigate/13_KubePodNotReady/kubectl_previous_logs.txt b/tests/llm/fixtures/test_investigate/13_KubePodNotReady/kubectl_previous_logs.txt
new file mode 100644
index 00000000..f7541481
--- /dev/null
+++ b/tests/llm/fixtures/test_investigate/13_KubePodNotReady/kubectl_previous_logs.txt
@@ -0,0 +1,4 @@
+{"toolset_name":"kubernetes/live-metrics","tool_name":"kubectl_previous_logs","match_params":{"name": "user-profile-import", "namespace": "default"}}
+stdout:
+
+stderr:
diff --git a/tests/llm/fixtures/test_investigate/13_KubePodNotReady/resource_instructions.json b/tests/llm/fixtures/test_investigate/13_KubePodNotReady/resource_instructions.json
new file mode 100644
index 00000000..9e26dfee
--- /dev/null
+++ b/tests/llm/fixtures/test_investigate/13_KubePodNotReady/resource_instructions.json
@@ -0,0 +1 @@
+{}
\ No newline at end of file
diff --git a/tests/llm/fixtures/test_investigate/13_KubePodNotReady/test_case.yaml b/tests/llm/fixtures/test_investigate/13_KubePodNotReady/test_case.yaml
new file mode 100644
index 00000000..3594c542
--- /dev/null
+++ b/tests/llm/fixtures/test_investigate/13_KubePodNotReady/test_case.yaml
@@ -0,0 +1,2 @@
+expected_output:
+  - This must mention insufficient GPU resources
diff --git a/tests/llm/fixtures/test_investigate/14_Watchdog/investigate_request.json b/tests/llm/fixtures/test_investigate/14_Watchdog/investigate_request.json
new file mode 100644
index 00000000..9d97f345
--- /dev/null
+++ b/tests/llm/fixtures/test_investigate/14_Watchdog/investigate_request.json
@@ -0,0 +1,15 @@
+{
+  "source": "prometheus",
+  "title": "Watchdog",
+  "description": "",
+  "subject": {},
+  "context": {
+    "issue_type": "Watchdog",
+    "robusta_issue_id": "Watchdog-f482596675d33088-2024-11-29 06:15:32.570000+00:00",
+    "source": "prometheus"
+  },
+  "source_instance_id": "ApiRequest",
+  "include_tool_calls": true,
+  "include_tool_call_results": true,
+  "prompt_template": "builtin://generic_investigation.jinja2"
+}
diff --git a/tests/llm/fixtures/test_investigate/14_Watchdog/issue_data.json b/tests/llm/fixtures/test_investigate/14_Watchdog/issue_data.json
new file mode 100644
index 00000000..c0d2bf21
--- /dev/null
+++ b/tests/llm/fixtures/test_investigate/14_Watchdog/issue_data.json
@@ -0,0 +1,31 @@
+{
+  "id": "Watchdog-f482596675d33088-2024-11-29 06:15:32.570000+00:00",
+  "name": "Watchdog",
+  "source_type": "prometheus",
+  "source_instance_id": "http://localhost:9093",
+  "url": "http://robusta-kube-prometheus-st-prometheus.default:9090/graph?g0.expr=vector%281%29&g0.tab=1",
+  "raw": {
+    "status": "active",
+    "labels": {
+      "alertname": "Watchdog",
+      "prometheus": "default/robusta-kube-prometheus-st-prometheus",
+      "severity": "none"
+    },
+    "annotations": {
+      "description": "This is an alert meant to ensure that the entire alerting pipeline is functional.\nThis alert is always firing, therefore it should always be firing in Alertmanager\nand always fire against a receiver. There are integrations with various notification\nmechanisms that send a notification when this alert is not firing. For example the\n\"DeadMansSnitch\" integration in PagerDuty.\n",
+      "runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/general/watchdog",
+      "summary": "An alert that should always be firing to certify that Alertmanager is working properly."
+    },
+    "startsAt": "2024-11-29 06:15:32.570000+00:00",
+    "endsAt": "2024-11-29 09:18:02.570000+00:00",
+    "generatorURL": "http://robusta-kube-prometheus-st-prometheus.default:9090/graph?g0.expr=vector%281%29&g0.tab=1",
+    "fingerprint": "f482596675d33088",
+    "unique_id": "Watchdog-f482596675d33088-2024-11-29 06:15:32.570000+00:00",
+    "duration": "3:02:30",
+    "name": "Watchdog",
+    "definition": "vector(1)"
+  },
+  "presentation_status": null,
+  "presentation_key_metadata": "*Severity*: none\n*Start Time*: 2024-11-29 06:15:32 UTC\n*Duration*: 3 hours",
+  "presentation_all_metadata": "*Labels:*\n• *alertname*: Watchdog\n• *prometheus*: default/robusta-kube-prometheus-st-prometheus\n• *severity*: none\n*Annotations:*\n• *description*: This is an alert meant to ensure that the entire alerting pipeline is functional.\nThis alert is always firing, therefore it should always be firing in Alertmanager\nand always fire against a receiver. There are integrations with various notification\nmechanisms that send a notification when this alert is not firing. For example the\n\"DeadMansSnitch\" integration in PagerDuty.\n\n• *runbook_url*: https://runbooks.prometheus-operator.dev/runbooks/general/watchdog\n• *summary*: An alert that should always be firing to certify that Alertmanager is working properly.\n"
+}
diff --git a/tests/llm/fixtures/test_investigate/14_Watchdog/resource_instructions.json b/tests/llm/fixtures/test_investigate/14_Watchdog/resource_instructions.json
new file mode 100644
index 00000000..9e26dfee
--- /dev/null
+++ b/tests/llm/fixtures/test_investigate/14_Watchdog/resource_instructions.json
@@ -0,0 +1 @@
+{}
\ No newline at end of file
diff --git a/tests/llm/fixtures/test_investigate/14_Watchdog/test_case.yaml b/tests/llm/fixtures/test_investigate/14_Watchdog/test_case.yaml
new file mode 100644
index 00000000..247d94ed
--- /dev/null
+++ b/tests/llm/fixtures/test_investigate/14_Watchdog/test_case.yaml
@@ -0,0 +1,4 @@
+expected_output:
+  - This is not an issue
+  - The `Watchdog` alert is designed to always be active. It serves as a health check for the alerting pipeline, ensuring that the Alertmanager is functioning correctly.
+  - This alert should always be firing, and if it stops, it indicates a problem with the alerting system.
diff --git a/tests/llm/test_ask_holmes.py b/tests/llm/test_ask_holmes.py
index b6b8cfc2..ca7b4039 100644
--- a/tests/llm/test_ask_holmes.py
+++ b/tests/llm/test_ask_holmes.py
@@ -8,13 +8,12 @@
 from holmes.core.tool_calling_llm import LLMResult, ToolCallingLLM
 from holmes.core.tools import ToolExecutor
 import tests.llm.utils.braintrust as braintrust_util
-from tests.llm.utils.classifiers import get_context_classifier
+from tests.llm.utils.classifiers import evaluate_context_usage, evaluate_correctness, evaluate_factuality
 from tests.llm.utils.commands import after_test, before_test
 from tests.llm.utils.constants import PROJECT
 from tests.llm.utils.system import readable_timestamp
 from tests.llm.utils.mock_toolset import MockToolsets
 
-from autoevals.llm import Factuality
 from tests.llm.utils.mock_utils import AskHolmesTestCase, MockHelper
 from tests.llm.utils.system import get_machine_state_tags
 from os import path
@@ -53,13 +52,13 @@ def idfn(val):
 @pytest.mark.llm
 @pytest.mark.skipif(not os.environ.get('BRAINTRUST_API_KEY'), reason="BRAINTRUST_API_KEY must be set to run LLM evaluations")
 @pytest.mark.parametrize("experiment_name, test_case", get_test_cases(), ids=idfn)
-def test_ask_holmes_with_braintrust(experiment_name, test_case):
+def test_ask_holmes(experiment_name, test_case):
 
     bt_helper = braintrust_util.BraintrustEvalHelper(project_name=PROJECT, dataset_name=DATASET_NAME)
 
     eval = bt_helper.start_evaluation(experiment_name, name=test_case.id)
 
-    eval_factuality = Factuality()
+
 
     try:
         before_test(test_case)
@@ -76,26 +75,32 @@ def test_ask_holmes_with_braintrust(experiment_name, test_case):
     output = result.result
     expected = test_case.expected_output
 
-    scores = {
-        "faithfulness": eval_factuality(output, expected, input=input).score
-    }
+
+    scores = {}
+
+    if isinstance(expected, list):
+        scores["correctness"] = evaluate_correctness(output=output, expected_elements=expected).score
+    else:
+        scores["faithfulness"] = evaluate_factuality(output=output, expected=expected, input=input).score
 
     if len(test_case.retrieval_context) > 0:
-        evaluate_context_usage = get_context_classifier(test_case.retrieval_context)
-        scores["context"] = evaluate_context_usage(output, expected, input=input).score
+        scores["context"] = evaluate_context_usage(output=output, context_items=test_case.retrieval_context, input=input).score
 
     bt_helper.end_evaluation(
         eval=eval,
         input=input,
         output=output or "",
-        expected=expected,
+        expected=str(expected),
         id=test_case.id,
         scores=scores
     )
     print(f"** OUTPUT **\n{output}")
     print(f"** SCORES **\n{scores}")
 
-    assert scores.get("faithfulness") >= test_case.evaluation.faithfulness
+    if scores.get("faithfulness"):
+        assert scores.get("faithfulness", 0) >= test_case.evaluation.faithfulness
+    if scores.get("correctness"):
+            assert scores.get("correctness", 0) >= test_case.evaluation.correctness
     assert scores.get("context", 0) >= test_case.evaluation.context
 
 
@@ -113,7 +118,7 @@ def ask_holmes(test_case:AskHolmesTestCase) -> LLMResult:
     ai = ToolCallingLLM(
         tool_executor=tool_executor,
         max_steps=10,
-        llm=DefaultLLM("gpt-4o")
+        llm=DefaultLLM(os.environ.get("MODEL", "gpt-4o"))
     )
 
     chat_request = ChatRequest(ask=test_case.user_prompt)
diff --git a/tests/llm/test_investigate.py b/tests/llm/test_investigate.py
index 226a3cf6..7f0a3703 100644
--- a/tests/llm/test_investigate.py
+++ b/tests/llm/test_investigate.py
@@ -3,7 +3,6 @@
 from pathlib import Path
 from typing import Optional
 
-from autoevals import Factuality
 import pytest
 from rich.console import Console
 
@@ -12,7 +11,7 @@
 from holmes.core.investigation import investigate_issues
 from holmes.core.supabase_dal import SupabaseDal
 from holmes.core.tools import ToolExecutor, ToolsetPattern
-from tests.llm.utils.classifiers import get_context_classifier, get_logs_explanation_classifier
+from tests.llm.utils.classifiers import evaluate_context_usage, evaluate_correctness, evaluate_factuality, evaluate_previous_logs_mention
 from tests.llm.utils.constants import PROJECT
 from tests.llm.utils.system import get_machine_state_tags, readable_timestamp
 from tests.llm.utils.mock_dal import MockSupabaseDal
@@ -46,8 +45,6 @@ def create_tool_executor(
 
         return ToolExecutor(mock.mocked_toolsets)
 
-
-
 def get_test_cases():
 
     unique_test_id = os.environ.get("PYTEST_XDIST_TESTRUNUID", readable_timestamp())
@@ -77,9 +74,9 @@ def idfn(val):
 @pytest.mark.parametrize("experiment_name, test_case", get_test_cases(), ids=idfn)
 def test_investigate(experiment_name, test_case):
 
-    eval_factuality = Factuality()
-
     config = MockConfig(test_case)
+    config.model = os.environ.get("MODEL", "gpt-4o")
+
     mock_dal = MockSupabaseDal(
         test_case_folder=Path(test_case.folder),
         generate_mocks=test_case.generate_mocks,
@@ -108,28 +105,31 @@ def test_investigate(experiment_name, test_case):
 
     output = result.analysis
 
-    evaluate_logs_explanation = get_logs_explanation_classifier()
-    factuality = eval_factuality(output, expected, input=input)
-    previous_logs = evaluate_logs_explanation(output, expected, input=input)
-    scores = {
-        "faithfulness": factuality.score,
-        "previous_logs": previous_logs.score
-    }
+    scores = {}
+
+    if isinstance(expected, list):
+        scores["correctness"] = evaluate_correctness(output=output, expected_elements=expected).score
+    else:
+        scores["faithfulness"] = evaluate_factuality(output=output, expected=expected, input=input).score
+    scores["previous_logs"] = evaluate_previous_logs_mention(output=output).score
 
     if len(test_case.retrieval_context) > 0:
-            evaluate_context_usage = get_context_classifier(test_case.retrieval_context)
-            scores["context"] = evaluate_context_usage(output, expected, input=input).score
+            scores["context"] = evaluate_context_usage(input=input, output=output, context_items=test_case.retrieval_context).score
 
     bt_helper.end_evaluation(
         eval=eval,
         input=input,
         output=output or "",
-        expected=expected,
+        expected=str(expected),
         id=test_case.id,
         scores=scores
     )
     print(f"** OUTPUT **\n{output}")
     print(f"** SCORES **\n{scores}")
 
-    assert scores.get("faithfulness") >= test_case.evaluation.faithfulness
+    if scores.get("faithfulness"):
+        assert scores.get("faithfulness") >= test_case.evaluation.faithfulness
+
+    if scores.get("correctness"):
+        assert scores.get("correctness") >= test_case.evaluation.correctness
     assert scores.get("context", 0) >= test_case.evaluation.context
diff --git a/tests/llm/utils/braintrust.py b/tests/llm/utils/braintrust.py
index f79c39b3..6cca2314 100644
--- a/tests/llm/utils/braintrust.py
+++ b/tests/llm/utils/braintrust.py
@@ -1,6 +1,6 @@
 
 import braintrust
-from braintrust import Dataset, Experiment, ReadonlyExperiment
+from braintrust import Dataset, Experiment, ReadonlyExperiment, Span
 import logging
 from typing import Any, List, Optional
 
@@ -78,7 +78,7 @@ def upload_test_cases(self, test_cases:List[HolmesTestCase]):
     def resolve_dataset_item(self, test_case:HolmesTestCase) -> Optional[Any]:
         return find_dataset_row_by_test_case(self.dataset, test_case)
 
-    def start_evaluation(self, experiment_name:str, name:str):
+    def start_evaluation(self, experiment_name:str, name:str) -> Span:
         if not self.experiment:
             experiment:Experiment|ReadonlyExperiment = braintrust.init(
                 project=self.project_name,
@@ -93,7 +93,7 @@ def start_evaluation(self, experiment_name:str, name:str):
             self.experiment = experiment
         return self.experiment.start_span(name=name)
 
-    def end_evaluation(self, eval:Any, input:str, output:str, expected:str, id:str, scores:dict[str, Any]):
+    def end_evaluation(self, eval:Span, input:str, output:str, expected:str, id:str, scores:dict[str, Any]):
         if not self.experiment:
             raise Exception("start_evaluation() must be called before end_evaluation()")
 
diff --git a/tests/llm/utils/classifiers.py b/tests/llm/utils/classifiers.py
index 32570675..08281e9c 100644
--- a/tests/llm/utils/classifiers.py
+++ b/tests/llm/utils/classifiers.py
@@ -1,7 +1,10 @@
-from typing import List
-from autoevals import LLMClassifier
+from typing import List, Optional
+from autoevals import Factuality, LLMClassifier
+import os
 
-def get_context_classifier(context_items:List[str]):
+classifier_model = os.environ.get("CLASSIFIER_MODEL", "gpt-4o-mini")
+
+def evaluate_context_usage(context_items:List[str], output:Optional[str], input:Optional[str]):
     context = "\n- ".join(context_items)
     prompt_prefix = f"""
 CONTEXT
@@ -24,19 +27,21 @@ def get_context_classifier(context_items:List[str]):
 
 A. No item mentioned in the CONTEXT is mentioned in the ANSWER
 B. Less than half of items present in the CONTEXT are mentioned in the ANSWER
-C. More than half of items present in the CONTEXT are mentioned in the ANSWER
+C. More than half of items present iEvaluate which of the following descibes the OUTPUT best and return the related letter:n the CONTEXT are mentioned in the ANSWER
 D. All items present in the CONTEXT are mentioned in the ANSWER
     """
 
-    return LLMClassifier(
+    classifier = LLMClassifier(
         name="ContextPrecision",
         prompt_template=prompt_prefix,
         choice_scores={"A": 0, "B": 0.33, "C": 0.67, "D": 1},
         use_cot=True,
+        model=classifier_model
     )
+    return classifier(input=input, output=output, expected=None)
 
 
-def get_logs_explanation_classifier():
+def evaluate_previous_logs_mention(output:Optional[str]):
     prompt_prefix = """
 
 OUTPUT
@@ -52,9 +57,46 @@ def get_logs_explanation_classifier():
 D. OUTPUT mentions both "logs" and "previous logs" but presents both as having the same meaning
     """
 
-    return LLMClassifier(
+    classifier = LLMClassifier(
         name="LogsExplanation",
         prompt_template=prompt_prefix,
         choice_scores={"A": 1, "B": 1, "C": 0, "D": 1},
         use_cot=True,
+        model=classifier_model
     )
+    return classifier(input=None, output=output, expected=None)
+
+
+def evaluate_correctness(expected_elements:List[str], output:Optional[str]):
+    prompt_prefix = """
+    You are evaluating the correctness of a response by a LLM. You must return a score between 0 and 1 that represents the correctness of the response/OUTPUT from the LLM.
+    The correctness is defined by the amount of EXPECTED ELEMENTS present in the output. Correctness is 1 if all elements are presents and 0 if none are presents.
+    The correctness score should be proportional to the number of EXPECTED ELEMENTS present in the OUTPUT.
+
+    EXPECTED ELEMENTS
+    =================
+
+    - {{"\n- ".join(expected_elements)}}
+
+    OUTPUT
+    ======
+
+    {{output}}
+
+
+    Return a score between 0 and 1 that is proportional to the number of EXPECTED ELEMENTS present in the OUTPUT.
+        """
+
+    classifier = LLMClassifier(
+        name="Correctness",
+        prompt_template=prompt_prefix,
+        choice_scores={"A": 1, "B": 1, "C": 0, "D": 1},
+        use_cot=True,
+        model=classifier_model
+    )
+    return classifier(input=input, output=output, expected=None)
+
+
+def evaluate_factuality(input:Optional[str], output:Optional[str], expected:Optional[str]):
+    eval_factuality = Factuality()
+    return eval_factuality(input=input, output=output, expected=expected)
diff --git a/tests/llm/utils/mock_utils.py b/tests/llm/utils/mock_utils.py
index 5e8c104e..b9d3cc31 100644
--- a/tests/llm/utils/mock_utils.py
+++ b/tests/llm/utils/mock_utils.py
@@ -1,13 +1,12 @@
 
 import json
-import braintrust
 from typing_extensions import Dict
 import yaml
 import logging
 import os
 import re
 from pathlib import Path
-from typing import Generic, List, Optional, TypeVar, Union, cast
+from typing import List, Optional, TypeVar, Union, cast
 
 from pydantic import BaseModel, TypeAdapter
 from holmes.core.models import InvestigateRequest
@@ -19,12 +18,12 @@ def read_file(file_path:Path):
     with open(file_path, 'r', encoding='utf-8') as file:
         return file.read().strip()
 
-
 TEST_CASE_ID_PATTERN = r'^[\d+]_(?:[a-z]+_)*[a-z]+$'
 CONFIG_FILE_NAME = "test_case.yaml"
 
 class LLMEvaluation(BaseModel):
     faithfulness: float = 0.3
+    correctness: float = 0.3
     context: float = 0
 
 class Message(BaseModel):
@@ -36,7 +35,7 @@ class HolmesTestCase(BaseModel):
     id: str
     folder: str
     generate_mocks: bool = False # If True, generate mocks
-    expected_output: str # Whether an output is expected
+    expected_output: Union[str, List[str]] # Whether an output is expected
     evaluation: LLMEvaluation = LLMEvaluation()
     retrieval_context: List[str] = [] # Elements helping to evaluate the correctness of the LLM response
     tool_mocks: List[ToolMock] = []

From c52f4e0388ad8bc3a32c7ac1d4b7e1168cd59f63 Mon Sep 17 00:00:00 2001
From: Tomer <tomer@robusta.dev>
Date: Mon, 9 Dec 2024 11:53:27 +0200
Subject: [PATCH 2/4] Use Contributor Covenant Code of Conduct v2.1 (#223)

---
 CODE_OF_CONDUCT.md | 132 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 132 insertions(+)
 create mode 100644 CODE_OF_CONDUCT.md

diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md
new file mode 100644
index 00000000..1dfc9964
--- /dev/null
+++ b/CODE_OF_CONDUCT.md
@@ -0,0 +1,132 @@
+# Contributor Covenant Code of Conduct
+
+## Our Pledge
+
+We as members, contributors, and leaders pledge to make participation in our
+community a harassment-free experience for everyone, regardless of age, body
+size, visible or invisible disability, ethnicity, sex characteristics, gender
+identity and expression, level of experience, education, socio-economic status,
+nationality, personal appearance, race, caste, color, religion, or sexual
+identity and orientation.
+
+We pledge to act and interact in ways that contribute to an open, welcoming,
+diverse, inclusive, and healthy community.
+
+## Our Standards
+
+Examples of behavior that contributes to a positive environment for our
+community include:
+
+* Demonstrating empathy and kindness toward other people
+* Being respectful of differing opinions, viewpoints, and experiences
+* Giving and gracefully accepting constructive feedback
+* Accepting responsibility and apologizing to those affected by our mistakes,
+  and learning from the experience
+* Focusing on what is best not just for us as individuals, but for the overall
+  community
+
+Examples of unacceptable behavior include:
+
+* The use of sexualized language or imagery, and sexual attention or advances of
+  any kind
+* Trolling, insulting or derogatory comments, and personal or political attacks
+* Public or private harassment
+* Publishing others' private information, such as a physical or email address,
+  without their explicit permission
+* Other conduct which could reasonably be considered inappropriate in a
+  professional setting
+
+## Enforcement Responsibilities
+
+Community leaders are responsible for clarifying and enforcing our standards of
+acceptable behavior and will take appropriate and fair corrective action in
+response to any behavior that they deem inappropriate, threatening, offensive,
+or harmful.
+
+Community leaders have the right and responsibility to remove, edit, or reject
+comments, commits, code, wiki edits, issues, and other contributions that are
+not aligned to this Code of Conduct, and will communicate reasons for moderation
+decisions when appropriate.
+
+## Scope
+
+This Code of Conduct applies within all community spaces, and also applies when
+an individual is officially representing the community in public spaces.
+Examples of representing our community include using an official email address,
+posting via an official social media account, or acting as an appointed
+representative at an online or offline event.
+
+## Enforcement
+
+Instances of abusive, harassing, or otherwise unacceptable behavior may be
+reported to the community leaders responsible for enforcement at
+conduct@robusta.dev.
+All complaints will be reviewed and investigated promptly and fairly.
+
+All community leaders are obligated to respect the privacy and security of the
+reporter of any incident.
+
+## Enforcement Guidelines
+
+Community leaders will follow these Community Impact Guidelines in determining
+the consequences for any action they deem in violation of this Code of Conduct:
+
+### 1. Correction
+
+**Community Impact**: Use of inappropriate language or other behavior deemed
+unprofessional or unwelcome in the community.
+
+**Consequence**: A private, written warning from community leaders, providing
+clarity around the nature of the violation and an explanation of why the
+behavior was inappropriate. A public apology may be requested.
+
+### 2. Warning
+
+**Community Impact**: A violation through a single incident or series of
+actions.
+
+**Consequence**: A warning with consequences for continued behavior. No
+interaction with the people involved, including unsolicited interaction with
+those enforcing the Code of Conduct, for a specified period of time. This
+includes avoiding interactions in community spaces as well as external channels
+like social media. Violating these terms may lead to a temporary or permanent
+ban.
+
+### 3. Temporary Ban
+
+**Community Impact**: A serious violation of community standards, including
+sustained inappropriate behavior.
+
+**Consequence**: A temporary ban from any sort of interaction or public
+communication with the community for a specified period of time. No public or
+private interaction with the people involved, including unsolicited interaction
+with those enforcing the Code of Conduct, is allowed during this period.
+Violating these terms may lead to a permanent ban.
+
+### 4. Permanent Ban
+
+**Community Impact**: Demonstrating a pattern of violation of community
+standards, including sustained inappropriate behavior, harassment of an
+individual, or aggression toward or disparagement of classes of individuals.
+
+**Consequence**: A permanent ban from any sort of public interaction within the
+community.
+
+## Attribution
+
+This Code of Conduct is adapted from the [Contributor Covenant][homepage],
+version 2.1, available at
+[https://www.contributor-covenant.org/version/2/1/code_of_conduct.html][v2.1].
+
+Community Impact Guidelines were inspired by
+[Mozilla's code of conduct enforcement ladder][Mozilla CoC].
+
+For answers to common questions about this code of conduct, see the FAQ at
+[https://www.contributor-covenant.org/faq][FAQ]. Translations are available at
+[https://www.contributor-covenant.org/translations][translations].
+
+[homepage]: https://www.contributor-covenant.org
+[v2.1]: https://www.contributor-covenant.org/version/2/1/code_of_conduct.html
+[Mozilla CoC]: https://github.com/mozilla/diversity
+[FAQ]: https://www.contributor-covenant.org/faq
+[translations]: https://www.contributor-covenant.org/translations

From e36db3422bcac05d9356ecc0d957e008a1316d35 Mon Sep 17 00:00:00 2001
From: Tomer <tomer.keshet@gmail.com>
Date: Mon, 9 Dec 2024 13:14:27 +0200
Subject: [PATCH 3/4] add contributing doc page (#225)

---
 CONTRIBUTING.md | 35 +++++++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)
 create mode 100644 CONTRIBUTING.md

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
new file mode 100644
index 00000000..6d95164c
--- /dev/null
+++ b/CONTRIBUTING.md
@@ -0,0 +1,35 @@
+# Contributing
+
+## Before you get started
+
+### Code of Conduct
+
+Please make sure to read and observe our [Code of Conduct](https://github.com/robusta-dev/holmesgpt?tab=coc-ov-file).
+
+### Install requirements
+- Python `3.11`
+  - poetry `1.8.4` & up
+- A LLM API key is required to use and test HolmesGPT
+  - OpenAI's `gpt4-o` is recommended.
+  - For details see [Getting an API Key](https://github.com/robusta-dev/holmesgpt?tab=readme-ov-file#getting-an-api-key).
+
+## Reporting bugs
+
+We encourage those interested to contribute code and also appreciate when issues are reported. 
+
+- Create a new issue and label is as `bug`
+- Clearly state how to reproduce the bug:
+  - Which LLM you've used
+  - Which steps are required to reproduce
+    - As LLMs answers may differ between runs - Does it always reproduce, or occasionally?
+
+  
+## Contributing Code
+
+- Fork the repository and clone it locally.
+- Create a new branch and make your changes
+- Add or update tests to ensure your changes are covered.
+- Run `pytest` to verify all tests pass.
+- Keep pull requests small and focused. if you have multiple changes, open a PR for each.
+- Create a pull request back to the upstream repository.
+- Wait for a review and address any comments

From c4743a016866e2a216897518059d326e4d97aaa7 Mon Sep 17 00:00:00 2001
From: Tomer <tomer.keshet@gmail.com>
Date: Mon, 9 Dec 2024 13:46:42 +0200
Subject: [PATCH 4/4] Create MAINTAINERS.md (#226)

---
 MAINTAINERS.md | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)
 create mode 100644 MAINTAINERS.md

diff --git a/MAINTAINERS.md b/MAINTAINERS.md
new file mode 100644
index 00000000..4cad4de5
--- /dev/null
+++ b/MAINTAINERS.md
@@ -0,0 +1,19 @@
+# Maintainers
+
+## Current
+
+| Maintainer          | GitHub ID                                         |
+| --------------------|---------------------------------------------------|
+| Natan Yellin        | [aantn](https://github.com/aantn)                 |
+| Arik Alon           | [arikalon1](https://github.com/arikalon1)         |
+| Avi Kotlicky        | [Avi-Robusta](https://github.com/Avi-Robusta)     |
+| Dmytro Chievtaiev   | [itisallgood](https://github.com/itisallgood)     |
+| Pavan Gudiwada      | [pavangudiwada](https://github.com/pavangudiwada) |
+| Nicolas Herment     | [nherment](https://github.com/nherment)           |
+| Tomer Keshet        | [Sheeproid](https://github.com/Sheeproid)         |
+| Roi Glinik          | [RoiGlinik](https://github.com/RoiGlinik)         |
+| Moshe Morad         | [moshemorad](https://github.com/moshemorad)       |
+
+## Emeritus
+
+N/A