From 5f4eaf81ad274c4c1c51edc6cc8b297c4df5caa5 Mon Sep 17 00:00:00 2001 From: Nicolas Herment Date: Mon, 9 Dec 2024 09:14:14 +0100 Subject: [PATCH 1/4] test: llm eval, add correctness score, port alertmanager test cases (#216) --- holmes/main.py | 1 + poetry.lock | 180 +++++++++--------- tests/llm/README.md | 8 +- .../kubectl_find_resource.txt | 48 ++--- .../kubectl_find_resource_pod_by_keyword.txt | 48 ++--- .../01_how_many_pods/test_case.yaml | 8 +- .../02_what_is_wrong_with_pod/test_case.yaml | 4 +- .../test_case.yaml | 10 +- .../04_related_k8s_events/test_case.yaml | 17 +- .../05_image_version/test_case.yaml | 3 +- .../06_explain_issue/test_case.yaml | 9 +- .../07_high_latency/test_case.yaml | 7 +- .../08_sock_shop_frontend/test_case.yaml | 11 +- .../09_crashpod/test_case.yaml | 5 +- .../10_image_pull_backoff/test_case.yaml | 20 +- .../11_init_containers/test_case.yaml | 9 +- .../12_job_crashing/test_case.yaml | 9 +- .../13_pending_node_selector/test_case.yaml | 8 +- .../14_pending_resources/test_case.yaml | 6 +- .../15_failed_readiness_probe/test_case.yaml | 18 +- .../16_failed_no_toolset_found/test_case.yaml | 5 +- .../17_oom_kill/test_case.yaml | 8 +- .../18_crash_looping_v2/test_case.yaml | 18 +- .../test_case.yaml | 5 +- .../kubectl_find_resource_pod_by_keyword.txt | 23 --- .../1_how_many_pods/kubectl_get_node.txt | 3 - .../1_how_many_pods/kubectl_get_pod.txt | 2 - .../1_how_many_pods/kubectl_top_pods.txt | 6 - .../20_long_log_file_search/test_case.yaml | 3 +- .../test_case.yaml | 12 +- .../22_high_latency_dbi_down/test_case.yaml | 5 +- .../test_case.yaml | 8 +- .../24_misconfigured_pvc/test_case.yaml | 6 +- .../test_case.yaml | 8 +- .../01_oom_kill/test_case.yaml | 32 +--- .../02_crashloop_backoff/test_case.yaml | 35 +--- .../03_cpu_throttling/test_case.yaml | 32 +--- .../04_image_pull_backoff/test_case.yaml | 25 +-- .../05_crashpod/test_case.yaml | 32 +--- .../06_job_failure/test_case.yaml | 22 +-- .../07_job_syntax_error/test_case.yaml | 32 +--- .../08_memory_pressure/test_case.yaml | 24 +-- .../09_high_latency/test_case.yaml | 22 +-- .../investigate_request.json | 15 ++ .../issue_data.json | 31 +++ .../kubectl_get_all.txt | 6 + .../resource_instructions.json | 6 + .../test_case.yaml | 3 + .../investigate_request.json | 22 +++ .../issue_data.json | 39 ++++ .../kubectl_describe.txt | 42 ++++ .../kubectl_describe_pod.txt | 64 +++++++ .../kubectl_get_all.txt | 54 ++++++ .../kubectl_get_deployment.txt | 6 + .../kubectl_get_pod.txt | 6 + .../kubectl_logs.txt | 5 + .../resource_instructions.json | 1 + .../test_case.yaml | 5 + .../investigate_request.json | 22 +++ .../12_KubePodCrashLooping/issue_data.json | 40 ++++ .../kubectl_describe.txt | 72 +++++++ .../12_KubePodCrashLooping/kubectl_logs.txt | 5 + .../kubectl_previous_logs.txt | 5 + .../resource_instructions.json | 1 + .../12_KubePodCrashLooping/test_case.yaml | 3 + .../investigate_request.json | 22 +++ .../13_KubePodNotReady/issue_data.json | 33 ++++ .../13_KubePodNotReady/kubectl_describe.txt | 41 ++++ .../13_KubePodNotReady/kubectl_events.txt | 7 + .../13_KubePodNotReady/kubectl_logs.txt | 4 + .../kubectl_previous_logs.txt | 4 + .../resource_instructions.json | 1 + .../13_KubePodNotReady/test_case.yaml | 2 + .../14_Watchdog/investigate_request.json | 15 ++ .../14_Watchdog/issue_data.json | 31 +++ .../14_Watchdog/resource_instructions.json | 1 + .../14_Watchdog/test_case.yaml | 4 + tests/llm/test_ask_holmes.py | 29 +-- tests/llm/test_investigate.py | 34 ++-- tests/llm/utils/braintrust.py | 6 +- tests/llm/utils/classifiers.py | 56 +++++- tests/llm/utils/mock_utils.py | 7 +- 82 files changed, 992 insertions(+), 555 deletions(-) delete mode 100644 tests/llm/fixtures/test_ask_holmes/1_how_many_pods/kubectl_find_resource_pod_by_keyword.txt delete mode 100644 tests/llm/fixtures/test_ask_holmes/1_how_many_pods/kubectl_get_node.txt delete mode 100644 tests/llm/fixtures/test_ask_holmes/1_how_many_pods/kubectl_get_pod.txt delete mode 100644 tests/llm/fixtures/test_ask_holmes/1_how_many_pods/kubectl_top_pods.txt create mode 100644 tests/llm/fixtures/test_investigate/10_kube_controller_manager_down/investigate_request.json create mode 100644 tests/llm/fixtures/test_investigate/10_kube_controller_manager_down/issue_data.json create mode 100644 tests/llm/fixtures/test_investigate/10_kube_controller_manager_down/kubectl_get_all.txt create mode 100644 tests/llm/fixtures/test_investigate/10_kube_controller_manager_down/resource_instructions.json create mode 100644 tests/llm/fixtures/test_investigate/10_kube_controller_manager_down/test_case.yaml create mode 100644 tests/llm/fixtures/test_investigate/11_KubeDeploymentReplicasMismatch/investigate_request.json create mode 100644 tests/llm/fixtures/test_investigate/11_KubeDeploymentReplicasMismatch/issue_data.json create mode 100644 tests/llm/fixtures/test_investigate/11_KubeDeploymentReplicasMismatch/kubectl_describe.txt create mode 100644 tests/llm/fixtures/test_investigate/11_KubeDeploymentReplicasMismatch/kubectl_describe_pod.txt create mode 100644 tests/llm/fixtures/test_investigate/11_KubeDeploymentReplicasMismatch/kubectl_get_all.txt create mode 100644 tests/llm/fixtures/test_investigate/11_KubeDeploymentReplicasMismatch/kubectl_get_deployment.txt create mode 100644 tests/llm/fixtures/test_investigate/11_KubeDeploymentReplicasMismatch/kubectl_get_pod.txt create mode 100644 tests/llm/fixtures/test_investigate/11_KubeDeploymentReplicasMismatch/kubectl_logs.txt create mode 100644 tests/llm/fixtures/test_investigate/11_KubeDeploymentReplicasMismatch/resource_instructions.json create mode 100644 tests/llm/fixtures/test_investigate/11_KubeDeploymentReplicasMismatch/test_case.yaml create mode 100644 tests/llm/fixtures/test_investigate/12_KubePodCrashLooping/investigate_request.json create mode 100644 tests/llm/fixtures/test_investigate/12_KubePodCrashLooping/issue_data.json create mode 100644 tests/llm/fixtures/test_investigate/12_KubePodCrashLooping/kubectl_describe.txt create mode 100644 tests/llm/fixtures/test_investigate/12_KubePodCrashLooping/kubectl_logs.txt create mode 100644 tests/llm/fixtures/test_investigate/12_KubePodCrashLooping/kubectl_previous_logs.txt create mode 100644 tests/llm/fixtures/test_investigate/12_KubePodCrashLooping/resource_instructions.json create mode 100644 tests/llm/fixtures/test_investigate/12_KubePodCrashLooping/test_case.yaml create mode 100644 tests/llm/fixtures/test_investigate/13_KubePodNotReady/investigate_request.json create mode 100644 tests/llm/fixtures/test_investigate/13_KubePodNotReady/issue_data.json create mode 100644 tests/llm/fixtures/test_investigate/13_KubePodNotReady/kubectl_describe.txt create mode 100644 tests/llm/fixtures/test_investigate/13_KubePodNotReady/kubectl_events.txt create mode 100644 tests/llm/fixtures/test_investigate/13_KubePodNotReady/kubectl_logs.txt create mode 100644 tests/llm/fixtures/test_investigate/13_KubePodNotReady/kubectl_previous_logs.txt create mode 100644 tests/llm/fixtures/test_investigate/13_KubePodNotReady/resource_instructions.json create mode 100644 tests/llm/fixtures/test_investigate/13_KubePodNotReady/test_case.yaml create mode 100644 tests/llm/fixtures/test_investigate/14_Watchdog/investigate_request.json create mode 100644 tests/llm/fixtures/test_investigate/14_Watchdog/issue_data.json create mode 100644 tests/llm/fixtures/test_investigate/14_Watchdog/resource_instructions.json create mode 100644 tests/llm/fixtures/test_investigate/14_Watchdog/test_case.yaml diff --git a/holmes/main.py b/holmes/main.py index bfc0b79d..be5bb12f 100644 --- a/holmes/main.py +++ b/holmes/main.py @@ -394,6 +394,7 @@ def alertmanager( results.append({"issue": issue.model_dump(), "result": result.model_dump()}) handle_result(result, console, destination, config, issue, False, True) + if json_output_file: write_json_file(json_output_file, results) diff --git a/poetry.lock b/poetry.lock index 203e57bf..93687fc9 100644 --- a/poetry.lock +++ b/poetry.lock @@ -13,87 +13,87 @@ files = [ [[package]] name = "aiohttp" -version = "3.11.9" +version = "3.11.10" description = "Async http client/server framework (asyncio)" optional = false python-versions = ">=3.9" files = [ - {file = "aiohttp-3.11.9-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:0411777249f25d11bd2964a230b3ffafcbed6cd65d0f2b132bc2b8f5b8c347c7"}, - {file = "aiohttp-3.11.9-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:499368eb904566fbdf1a3836a1532000ef1308f34a1bcbf36e6351904cced771"}, - {file = "aiohttp-3.11.9-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:0b5a5009b0159a8f707879dc102b139466d8ec6db05103ec1520394fdd8ea02c"}, - {file = "aiohttp-3.11.9-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:176f8bb8931da0613bb0ed16326d01330066bb1e172dd97e1e02b1c27383277b"}, - {file = "aiohttp-3.11.9-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6435a66957cdba1a0b16f368bde03ce9c79c57306b39510da6ae5312a1a5b2c1"}, - {file = "aiohttp-3.11.9-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:202f40fb686e5f93908eee0c75d1e6fbe50a43e9bd4909bf3bf4a56b560ca180"}, - {file = "aiohttp-3.11.9-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:39625703540feb50b6b7f938b3856d1f4886d2e585d88274e62b1bd273fae09b"}, - {file = "aiohttp-3.11.9-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c6beeac698671baa558e82fa160be9761cf0eb25861943f4689ecf9000f8ebd0"}, - {file = "aiohttp-3.11.9-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:96726839a42429318017e67a42cca75d4f0d5248a809b3cc2e125445edd7d50d"}, - {file = "aiohttp-3.11.9-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:3f5461c77649358610fb9694e790956b4238ac5d9e697a17f63619c096469afe"}, - {file = "aiohttp-3.11.9-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:4313f3bc901255b22f01663eeeae167468264fdae0d32c25fc631d5d6e15b502"}, - {file = "aiohttp-3.11.9-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:d6e274661c74195708fc4380a4ef64298926c5a50bb10fbae3d01627d7a075b7"}, - {file = "aiohttp-3.11.9-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:db2914de2559809fdbcf3e48f41b17a493b58cb7988d3e211f6b63126c55fe82"}, - {file = "aiohttp-3.11.9-cp310-cp310-win32.whl", hash = "sha256:27935716f8d62c1c73010428db310fd10136002cfc6d52b0ba7bdfa752d26066"}, - {file = "aiohttp-3.11.9-cp310-cp310-win_amd64.whl", hash = "sha256:afbe85b50ade42ddff5669947afde9e8a610e64d2c80be046d67ec4368e555fa"}, - {file = "aiohttp-3.11.9-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:afcda759a69c6a8be3aae764ec6733155aa4a5ad9aad4f398b52ba4037942fe3"}, - {file = "aiohttp-3.11.9-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c5bba6b83fde4ca233cfda04cbd4685ab88696b0c8eaf76f7148969eab5e248a"}, - {file = "aiohttp-3.11.9-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:442356e8924fe1a121f8c87866b0ecdc785757fd28924b17c20493961b3d6697"}, - {file = "aiohttp-3.11.9-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f737fef6e117856400afee4f17774cdea392b28ecf058833f5eca368a18cf1bf"}, - {file = "aiohttp-3.11.9-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ea142255d4901b03f89cb6a94411ecec117786a76fc9ab043af8f51dd50b5313"}, - {file = "aiohttp-3.11.9-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6e1e9e447856e9b7b3d38e1316ae9a8c92e7536ef48373de758ea055edfd5db5"}, - {file = "aiohttp-3.11.9-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e7f6173302f8a329ca5d1ee592af9e628d3ade87816e9958dcf7cdae2841def7"}, - {file = "aiohttp-3.11.9-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a7c6147c6306f537cff59409609508a1d2eff81199f0302dd456bb9e7ea50c39"}, - {file = "aiohttp-3.11.9-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:e9d036a9a41fc78e8a3f10a86c2fc1098fca8fab8715ba9eb999ce4788d35df0"}, - {file = "aiohttp-3.11.9-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:2ac9fd83096df36728da8e2f4488ac3b5602238f602706606f3702f07a13a409"}, - {file = "aiohttp-3.11.9-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:d3108f0ad5c6b6d78eec5273219a5bbd884b4aacec17883ceefaac988850ce6e"}, - {file = "aiohttp-3.11.9-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:96bbec47beb131bbf4bae05d8ef99ad9e5738f12717cfbbf16648b78b0232e87"}, - {file = "aiohttp-3.11.9-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:fc726c3fa8f606d07bd2b500e5dc4c0fd664c59be7788a16b9e34352c50b6b6b"}, - {file = "aiohttp-3.11.9-cp311-cp311-win32.whl", hash = "sha256:5720ebbc7a1b46c33a42d489d25d36c64c419f52159485e55589fbec648ea49a"}, - {file = "aiohttp-3.11.9-cp311-cp311-win_amd64.whl", hash = "sha256:17af09d963fa1acd7e4c280e9354aeafd9e3d47eaa4a6bfbd2171ad7da49f0c5"}, - {file = "aiohttp-3.11.9-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:c1f2d7fd583fc79c240094b3e7237d88493814d4b300d013a42726c35a734bc9"}, - {file = "aiohttp-3.11.9-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:d4b8a1b6c7a68c73191f2ebd3bf66f7ce02f9c374e309bdb68ba886bbbf1b938"}, - {file = "aiohttp-3.11.9-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:bd3f711f4c99da0091ced41dccdc1bcf8be0281dc314d6d9c6b6cf5df66f37a9"}, - {file = "aiohttp-3.11.9-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:44cb1a1326a0264480a789e6100dc3e07122eb8cd1ad6b784a3d47d13ed1d89c"}, - {file = "aiohttp-3.11.9-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7a7ddf981a0b953ade1c2379052d47ccda2f58ab678fca0671c7c7ca2f67aac2"}, - {file = "aiohttp-3.11.9-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6ffa45cc55b18d4ac1396d1ddb029f139b1d3480f1594130e62bceadf2e1a838"}, - {file = "aiohttp-3.11.9-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cca505829cdab58c2495ff418c96092d225a1bbd486f79017f6de915580d3c44"}, - {file = "aiohttp-3.11.9-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:44d323aa80a867cb6db6bebb4bbec677c6478e38128847f2c6b0f70eae984d72"}, - {file = "aiohttp-3.11.9-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:b2fab23003c4bb2249729a7290a76c1dda38c438300fdf97d4e42bf78b19c810"}, - {file = "aiohttp-3.11.9-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:be0c7c98e38a1e3ad7a6ff64af8b6d6db34bf5a41b1478e24c3c74d9e7f8ed42"}, - {file = "aiohttp-3.11.9-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:5cc5e0d069c56645446c45a4b5010d4b33ac6c5ebfd369a791b5f097e46a3c08"}, - {file = "aiohttp-3.11.9-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:9bcf97b971289be69638d8b1b616f7e557e1342debc7fc86cf89d3f08960e411"}, - {file = "aiohttp-3.11.9-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:c7333e7239415076d1418dbfb7fa4df48f3a5b00f8fdf854fca549080455bc14"}, - {file = "aiohttp-3.11.9-cp312-cp312-win32.whl", hash = "sha256:9384b07cfd3045b37b05ed002d1c255db02fb96506ad65f0f9b776b762a7572e"}, - {file = "aiohttp-3.11.9-cp312-cp312-win_amd64.whl", hash = "sha256:f5252ba8b43906f206048fa569debf2cd0da0316e8d5b4d25abe53307f573941"}, - {file = "aiohttp-3.11.9-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:282e0a7ddd36ebc411f156aeaa0491e8fe7f030e2a95da532cf0c84b0b70bc66"}, - {file = "aiohttp-3.11.9-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:ebd3e6b0c7d4954cca59d241970011f8d3327633d555051c430bd09ff49dc494"}, - {file = "aiohttp-3.11.9-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:30f9f89ae625d412043f12ca3771b2ccec227cc93b93bb1f994db6e1af40a7d3"}, - {file = "aiohttp-3.11.9-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7a3b5b2c012d70c63d9d13c57ed1603709a4d9d7d473e4a9dfece0e4ea3d5f51"}, - {file = "aiohttp-3.11.9-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6ef1550bb5f55f71b97a6a395286db07f7f2c01c8890e613556df9a51da91e8d"}, - {file = "aiohttp-3.11.9-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:317251b9c9a2f1a9ff9cd093775b34c6861d1d7df9439ce3d32a88c275c995cd"}, - {file = "aiohttp-3.11.9-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:21cbe97839b009826a61b143d3ca4964c8590d7aed33d6118125e5b71691ca46"}, - {file = "aiohttp-3.11.9-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:618b18c3a2360ac940a5503da14fa4f880c5b9bc315ec20a830357bcc62e6bae"}, - {file = "aiohttp-3.11.9-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:a0cf4d814689e58f57ecd5d8c523e6538417ca2e72ff52c007c64065cef50fb2"}, - {file = "aiohttp-3.11.9-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:15c4e489942d987d5dac0ba39e5772dcbed4cc9ae3710d1025d5ba95e4a5349c"}, - {file = "aiohttp-3.11.9-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:ec8df0ff5a911c6d21957a9182402aad7bf060eaeffd77c9ea1c16aecab5adbf"}, - {file = "aiohttp-3.11.9-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:ed95d66745f53e129e935ad726167d3a6cb18c5d33df3165974d54742c373868"}, - {file = "aiohttp-3.11.9-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:647ec5bee7e4ec9f1034ab48173b5fa970d9a991e565549b965e93331f1328fe"}, - {file = "aiohttp-3.11.9-cp313-cp313-win32.whl", hash = "sha256:ef2c9499b7bd1e24e473dc1a85de55d72fd084eea3d8bdeec7ee0720decb54fa"}, - {file = "aiohttp-3.11.9-cp313-cp313-win_amd64.whl", hash = "sha256:84de955314aa5e8d469b00b14d6d714b008087a0222b0f743e7ffac34ef56aff"}, - {file = "aiohttp-3.11.9-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:e738aabff3586091221044b7a584865ddc4d6120346d12e28e788307cd731043"}, - {file = "aiohttp-3.11.9-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:28f29bce89c3b401a53d6fd4bee401ee943083bf2bdc12ef297c1d63155070b0"}, - {file = "aiohttp-3.11.9-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:31de2f10f63f96cc19e04bd2df9549559beadd0b2ee2da24a17e7ed877ca8c60"}, - {file = "aiohttp-3.11.9-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:77f31cebd8c27a36af6c7346055ac564946e562080ee1a838da724585c67474f"}, - {file = "aiohttp-3.11.9-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0bcb7f6976dc0b6b56efde13294862adf68dd48854111b422a336fa729a82ea6"}, - {file = "aiohttp-3.11.9-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1a8b13b9950d8b2f8f58b6e5842c4b842b5887e2c32e3f4644d6642f1659a530"}, - {file = "aiohttp-3.11.9-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c9c23e62f3545c2216100603614f9e019e41b9403c47dd85b8e7e5015bf1bde0"}, - {file = "aiohttp-3.11.9-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ec656680fc53a13f849c71afd0c84a55c536206d524cbc831cde80abbe80489e"}, - {file = "aiohttp-3.11.9-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:36df00e0541f264ce42d62280281541a47474dfda500bc5b7f24f70a7f87be7a"}, - {file = "aiohttp-3.11.9-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:8dcfd14c712aa9dd18049280bfb2f95700ff6a8bde645e09f17c3ed3f05a0130"}, - {file = "aiohttp-3.11.9-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:14624d96f0d69cf451deed3173079a68c322279be6030208b045ab77e1e8d550"}, - {file = "aiohttp-3.11.9-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:4b01d9cfcb616eeb6d40f02e66bebfe7b06d9f2ef81641fdd50b8dd981166e0b"}, - {file = "aiohttp-3.11.9-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:928f92f80e2e8d6567b87d3316c1fd9860ccfe36e87a9a7f5237d4cda8baa1ba"}, - {file = "aiohttp-3.11.9-cp39-cp39-win32.whl", hash = "sha256:c8a02f74ae419e3955af60f570d83187423e42e672a6433c5e292f1d23619269"}, - {file = "aiohttp-3.11.9-cp39-cp39-win_amd64.whl", hash = "sha256:0a97d657f6cf8782a830bb476c13f7d777cfcab8428ac49dde15c22babceb361"}, - {file = "aiohttp-3.11.9.tar.gz", hash = "sha256:a9266644064779840feec0e34f10a89b3ff1d2d6b751fe90017abcad1864fa7c"}, + {file = "aiohttp-3.11.10-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:cbad88a61fa743c5d283ad501b01c153820734118b65aee2bd7dbb735475ce0d"}, + {file = "aiohttp-3.11.10-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:80886dac673ceaef499de2f393fc80bb4481a129e6cb29e624a12e3296cc088f"}, + {file = "aiohttp-3.11.10-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:61b9bae80ed1f338c42f57c16918853dc51775fb5cb61da70d590de14d8b5fb4"}, + {file = "aiohttp-3.11.10-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9e2e576caec5c6a6b93f41626c9c02fc87cd91538b81a3670b2e04452a63def6"}, + {file = "aiohttp-3.11.10-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:02c13415b5732fb6ee7ff64583a5e6ed1c57aa68f17d2bda79c04888dfdc2769"}, + {file = "aiohttp-3.11.10-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4cfce37f31f20800a6a6620ce2cdd6737b82e42e06e6e9bd1b36f546feb3c44f"}, + {file = "aiohttp-3.11.10-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3bbbfff4c679c64e6e23cb213f57cc2c9165c9a65d63717108a644eb5a7398df"}, + {file = "aiohttp-3.11.10-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:49c7dbbc1a559ae14fc48387a115b7d4bbc84b4a2c3b9299c31696953c2a5219"}, + {file = "aiohttp-3.11.10-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:68386d78743e6570f054fe7949d6cb37ef2b672b4d3405ce91fafa996f7d9b4d"}, + {file = "aiohttp-3.11.10-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:9ef405356ba989fb57f84cac66f7b0260772836191ccefbb987f414bcd2979d9"}, + {file = "aiohttp-3.11.10-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:5d6958671b296febe7f5f859bea581a21c1d05430d1bbdcf2b393599b1cdce77"}, + {file = "aiohttp-3.11.10-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:99b7920e7165be5a9e9a3a7f1b680f06f68ff0d0328ff4079e5163990d046767"}, + {file = "aiohttp-3.11.10-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:0dc49f42422163efb7e6f1df2636fe3db72713f6cd94688e339dbe33fe06d61d"}, + {file = "aiohttp-3.11.10-cp310-cp310-win32.whl", hash = "sha256:40d1c7a7f750b5648642586ba7206999650208dbe5afbcc5284bcec6579c9b91"}, + {file = "aiohttp-3.11.10-cp310-cp310-win_amd64.whl", hash = "sha256:68ff6f48b51bd78ea92b31079817aff539f6c8fc80b6b8d6ca347d7c02384e33"}, + {file = "aiohttp-3.11.10-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:77c4aa15a89847b9891abf97f3d4048f3c2d667e00f8a623c89ad2dccee6771b"}, + {file = "aiohttp-3.11.10-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:909af95a72cedbefe5596f0bdf3055740f96c1a4baa0dd11fd74ca4de0b4e3f1"}, + {file = "aiohttp-3.11.10-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:386fbe79863eb564e9f3615b959e28b222259da0c48fd1be5929ac838bc65683"}, + {file = "aiohttp-3.11.10-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3de34936eb1a647aa919655ff8d38b618e9f6b7f250cc19a57a4bf7fd2062b6d"}, + {file = "aiohttp-3.11.10-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0c9527819b29cd2b9f52033e7fb9ff08073df49b4799c89cb5754624ecd98299"}, + {file = "aiohttp-3.11.10-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:65a96e3e03300b41f261bbfd40dfdbf1c301e87eab7cd61c054b1f2e7c89b9e8"}, + {file = "aiohttp-3.11.10-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:98f5635f7b74bcd4f6f72fcd85bea2154b323a9f05226a80bc7398d0c90763b0"}, + {file = "aiohttp-3.11.10-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:03b6002e20938fc6ee0918c81d9e776bebccc84690e2b03ed132331cca065ee5"}, + {file = "aiohttp-3.11.10-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:6362cc6c23c08d18ddbf0e8c4d5159b5df74fea1a5278ff4f2c79aed3f4e9f46"}, + {file = "aiohttp-3.11.10-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:3691ed7726fef54e928fe26344d930c0c8575bc968c3e239c2e1a04bd8cf7838"}, + {file = "aiohttp-3.11.10-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:31d5093d3acd02b31c649d3a69bb072d539d4c7659b87caa4f6d2bcf57c2fa2b"}, + {file = "aiohttp-3.11.10-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:8b3cf2dc0f0690a33f2d2b2cb15db87a65f1c609f53c37e226f84edb08d10f52"}, + {file = "aiohttp-3.11.10-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:fbbaea811a2bba171197b08eea288b9402faa2bab2ba0858eecdd0a4105753a3"}, + {file = "aiohttp-3.11.10-cp311-cp311-win32.whl", hash = "sha256:4b2c7ac59c5698a7a8207ba72d9e9c15b0fc484a560be0788b31312c2c5504e4"}, + {file = "aiohttp-3.11.10-cp311-cp311-win_amd64.whl", hash = "sha256:974d3a2cce5fcfa32f06b13ccc8f20c6ad9c51802bb7f829eae8a1845c4019ec"}, + {file = "aiohttp-3.11.10-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:b78f053a7ecfc35f0451d961dacdc671f4bcbc2f58241a7c820e9d82559844cf"}, + {file = "aiohttp-3.11.10-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:ab7485222db0959a87fbe8125e233b5a6f01f4400785b36e8a7878170d8c3138"}, + {file = "aiohttp-3.11.10-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:cf14627232dfa8730453752e9cdc210966490992234d77ff90bc8dc0dce361d5"}, + {file = "aiohttp-3.11.10-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:076bc454a7e6fd646bc82ea7f98296be0b1219b5e3ef8a488afbdd8e81fbac50"}, + {file = "aiohttp-3.11.10-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:482cafb7dc886bebeb6c9ba7925e03591a62ab34298ee70d3dd47ba966370d2c"}, + {file = "aiohttp-3.11.10-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:bf3d1a519a324af764a46da4115bdbd566b3c73fb793ffb97f9111dbc684fc4d"}, + {file = "aiohttp-3.11.10-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:24213ba85a419103e641e55c27dc7ff03536c4873470c2478cce3311ba1eee7b"}, + {file = "aiohttp-3.11.10-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b99acd4730ad1b196bfb03ee0803e4adac371ae8efa7e1cbc820200fc5ded109"}, + {file = "aiohttp-3.11.10-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:14cdb5a9570be5a04eec2ace174a48ae85833c2aadc86de68f55541f66ce42ab"}, + {file = "aiohttp-3.11.10-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:7e97d622cb083e86f18317282084bc9fbf261801b0192c34fe4b1febd9f7ae69"}, + {file = "aiohttp-3.11.10-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:012f176945af138abc10c4a48743327a92b4ca9adc7a0e078077cdb5dbab7be0"}, + {file = "aiohttp-3.11.10-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:44224d815853962f48fe124748227773acd9686eba6dc102578defd6fc99e8d9"}, + {file = "aiohttp-3.11.10-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:c87bf31b7fdab94ae3adbe4a48e711bfc5f89d21cf4c197e75561def39e223bc"}, + {file = "aiohttp-3.11.10-cp312-cp312-win32.whl", hash = "sha256:06a8e2ee1cbac16fe61e51e0b0c269400e781b13bcfc33f5425912391a542985"}, + {file = "aiohttp-3.11.10-cp312-cp312-win_amd64.whl", hash = "sha256:be2b516f56ea883a3e14dda17059716593526e10fb6303189aaf5503937db408"}, + {file = "aiohttp-3.11.10-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:8cc5203b817b748adccb07f36390feb730b1bc5f56683445bfe924fc270b8816"}, + {file = "aiohttp-3.11.10-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:5ef359ebc6949e3a34c65ce20230fae70920714367c63afd80ea0c2702902ccf"}, + {file = "aiohttp-3.11.10-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:9bca390cb247dbfaec3c664326e034ef23882c3f3bfa5fbf0b56cad0320aaca5"}, + {file = "aiohttp-3.11.10-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:811f23b3351ca532af598405db1093f018edf81368e689d1b508c57dcc6b6a32"}, + {file = "aiohttp-3.11.10-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ddf5f7d877615f6a1e75971bfa5ac88609af3b74796ff3e06879e8422729fd01"}, + {file = "aiohttp-3.11.10-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6ab29b8a0beb6f8eaf1e5049252cfe74adbaafd39ba91e10f18caeb0e99ffb34"}, + {file = "aiohttp-3.11.10-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c49a76c1038c2dd116fa443eba26bbb8e6c37e924e2513574856de3b6516be99"}, + {file = "aiohttp-3.11.10-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7f3dc0e330575f5b134918976a645e79adf333c0a1439dcf6899a80776c9ab39"}, + {file = "aiohttp-3.11.10-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:efb15a17a12497685304b2d976cb4939e55137df7b09fa53f1b6a023f01fcb4e"}, + {file = "aiohttp-3.11.10-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:db1d0b28fcb7f1d35600150c3e4b490775251dea70f894bf15c678fdd84eda6a"}, + {file = "aiohttp-3.11.10-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:15fccaf62a4889527539ecb86834084ecf6e9ea70588efde86e8bc775e0e7542"}, + {file = "aiohttp-3.11.10-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:593c114a2221444f30749cc5e5f4012488f56bd14de2af44fe23e1e9894a9c60"}, + {file = "aiohttp-3.11.10-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:7852bbcb4d0d2f0c4d583f40c3bc750ee033265d80598d0f9cb6f372baa6b836"}, + {file = "aiohttp-3.11.10-cp313-cp313-win32.whl", hash = "sha256:65e55ca7debae8faaffee0ebb4b47a51b4075f01e9b641c31e554fd376595c6c"}, + {file = "aiohttp-3.11.10-cp313-cp313-win_amd64.whl", hash = "sha256:beb39a6d60a709ae3fb3516a1581777e7e8b76933bb88c8f4420d875bb0267c6"}, + {file = "aiohttp-3.11.10-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:0580f2e12de2138f34debcd5d88894786453a76e98febaf3e8fe5db62d01c9bf"}, + {file = "aiohttp-3.11.10-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:a55d2ad345684e7c3dd2c20d2f9572e9e1d5446d57200ff630e6ede7612e307f"}, + {file = "aiohttp-3.11.10-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:04814571cb72d65a6899db6099e377ed00710bf2e3eafd2985166f2918beaf59"}, + {file = "aiohttp-3.11.10-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e44a9a3c053b90c6f09b1bb4edd880959f5328cf63052503f892c41ea786d99f"}, + {file = "aiohttp-3.11.10-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:502a1464ccbc800b4b1995b302efaf426e8763fadf185e933c2931df7db9a199"}, + {file = "aiohttp-3.11.10-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:613e5169f8ae77b1933e42e418a95931fb4867b2991fc311430b15901ed67079"}, + {file = "aiohttp-3.11.10-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4cca22a61b7fe45da8fc73c3443150c3608750bbe27641fc7558ec5117b27fdf"}, + {file = "aiohttp-3.11.10-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:86a5dfcc39309470bd7b68c591d84056d195428d5d2e0b5ccadfbaf25b026ebc"}, + {file = "aiohttp-3.11.10-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:77ae58586930ee6b2b6f696c82cf8e78c8016ec4795c53e36718365f6959dc82"}, + {file = "aiohttp-3.11.10-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:78153314f26d5abef3239b4a9af20c229c6f3ecb97d4c1c01b22c4f87669820c"}, + {file = "aiohttp-3.11.10-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:98283b94cc0e11c73acaf1c9698dea80c830ca476492c0fe2622bd931f34b487"}, + {file = "aiohttp-3.11.10-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:53bf2097e05c2accc166c142a2090e4c6fd86581bde3fd9b2d3f9e93dda66ac1"}, + {file = "aiohttp-3.11.10-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:c5532f0441fc09c119e1dca18fbc0687e64fbeb45aa4d6a87211ceaee50a74c4"}, + {file = "aiohttp-3.11.10-cp39-cp39-win32.whl", hash = "sha256:47ad15a65fb41c570cd0ad9a9ff8012489e68176e7207ec7b82a0940dddfd8be"}, + {file = "aiohttp-3.11.10-cp39-cp39-win_amd64.whl", hash = "sha256:c6b9e6d7e41656d78e37ce754813fa44b455c3d0d0dced2a047def7dc5570b74"}, + {file = "aiohttp-3.11.10.tar.gz", hash = "sha256:b1fc6b45010a8d0ff9e88f9f2418c6fd408c99c211257334aff41597ebece42e"}, ] [package.dependencies] @@ -136,24 +136,24 @@ files = [ [[package]] name = "anyio" -version = "4.6.2.post1" +version = "4.7.0" description = "High level compatibility layer for multiple asynchronous event loop implementations" optional = false python-versions = ">=3.9" files = [ - {file = "anyio-4.6.2.post1-py3-none-any.whl", hash = "sha256:6d170c36fba3bdd840c73d3868c1e777e33676a69c3a72cf0a0d5d6d8009b61d"}, - {file = "anyio-4.6.2.post1.tar.gz", hash = "sha256:4c8bc31ccdb51c7f7bd251f51c609e038d63e34219b44aa86e47576389880b4c"}, + {file = "anyio-4.7.0-py3-none-any.whl", hash = "sha256:ea60c3723ab42ba6fff7e8ccb0488c898ec538ff4df1f1d5e642c3601d07e352"}, + {file = "anyio-4.7.0.tar.gz", hash = "sha256:2f834749c602966b7d456a7567cafcb309f96482b5081d14ac93ccd457f9dd48"}, ] [package.dependencies] exceptiongroup = {version = ">=1.0.2", markers = "python_version < \"3.11\""} idna = ">=2.8" sniffio = ">=1.1" -typing-extensions = {version = ">=4.1", markers = "python_version < \"3.11\""} +typing_extensions = {version = ">=4.5", markers = "python_version < \"3.13\""} [package.extras] -doc = ["Sphinx (>=7.4,<8.0)", "packaging", "sphinx-autodoc-typehints (>=1.2.0)", "sphinx-rtd-theme"] -test = ["anyio[trio]", "coverage[toml] (>=7)", "exceptiongroup (>=1.2.0)", "hypothesis (>=4.0)", "psutil (>=5.9)", "pytest (>=7.0)", "pytest-mock (>=3.6.1)", "trustme", "truststore (>=0.9.1)", "uvloop (>=0.21.0b1)"] +doc = ["Sphinx (>=7.4,<8.0)", "packaging", "sphinx-autodoc-typehints (>=1.2.0)", "sphinx_rtd_theme"] +test = ["anyio[trio]", "coverage[toml] (>=7)", "exceptiongroup (>=1.2.0)", "hypothesis (>=4.0)", "psutil (>=5.9)", "pytest (>=7.0)", "pytest-mock (>=3.6.1)", "trustme", "truststore (>=0.9.1)", "uvloop (>=0.21)"] trio = ["trio (>=0.26.1)"] [[package]] @@ -1634,13 +1634,13 @@ files = [ [[package]] name = "openai" -version = "1.56.2" +version = "1.57.0" description = "The official Python library for the openai API" optional = false python-versions = ">=3.8" files = [ - {file = "openai-1.56.2-py3-none-any.whl", hash = "sha256:82d0c48f9504e04c7797e9b799dcf7f49a246d99b6cbfd90f3193ea80815b69e"}, - {file = "openai-1.56.2.tar.gz", hash = "sha256:17312af69bc7670d4048f98ab5849f8784d98c39ac64fcde19406e3774a0c1e5"}, + {file = "openai-1.57.0-py3-none-any.whl", hash = "sha256:972e36960b821797952da3dc4532f486c28e28a2a332d7d0c5407f242e9d9c39"}, + {file = "openai-1.57.0.tar.gz", hash = "sha256:76f91971c4bdbd78380c9970581075e0337b5d497c2fbf7b5255078f4b31abf9"}, ] [package.dependencies] @@ -2702,17 +2702,17 @@ files = [ [[package]] name = "slack-bolt" -version = "1.21.2" +version = "1.21.3" description = "The Bolt Framework for Python" optional = false python-versions = ">=3.6" files = [ - {file = "slack_bolt-1.21.2-py2.py3-none-any.whl", hash = "sha256:6860fc8693ca543b653c5d49a09b8b542f5fb7a02638342a7ddd18d8bc6f3ba0"}, - {file = "slack_bolt-1.21.2.tar.gz", hash = "sha256:05ac2d454adfddfc629fb63c7a3723bd1432a24373119368bc81f2f52b029cbf"}, + {file = "slack_bolt-1.21.3-py2.py3-none-any.whl", hash = "sha256:f5c41bfd5758db2023a7591e93d05ebb218aaae704e0dc382ae400bc1d5f2670"}, + {file = "slack_bolt-1.21.3.tar.gz", hash = "sha256:f91ca5dc22da749a42e40a1885b466a4f2af1f65d40c544fccce56e4bb95f1fe"}, ] [package.dependencies] -slack-sdk = ">=3.33.1,<4" +slack_sdk = ">=3.33.5,<4" [[package]] name = "slack-sdk" diff --git a/tests/llm/README.md b/tests/llm/README.md index fe97ffc3..4d33f729 100644 --- a/tests/llm/README.md +++ b/tests/llm/README.md @@ -23,12 +23,13 @@ In this folder, add a `test_case.yaml` file: ```yaml user_prompt: 'Is pod xyz healthy? ' -expected_output: "Yes, pod xyz is healthy. It is running and there are no errors in the logs." +expected_output: + - pod xyz is running and healthy retrieval_context: - Any element of context. This will inform the evaluation score 'context' - These context elements are expected to be present in the output evaluation: # expected evaluation scores. The test will fail unless the LLM scores at least the following: - faithfulness: 0.5 # defaults to 0.3 + correctness: 0.5 # defaults to 0.3 context: 0 # defaults to 0 before-test: kubectl apply -f manifest.yaml after-test: kubectl delete -f manifest.yaml @@ -41,7 +42,7 @@ Here are the possible fields in the `test_case.yaml` yaml file: | Field | Type | Required/optional | Example value | Description | |-------------------|------------------|-------------------|-----------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| | user_prompt | str | Required | Is pod xyz healthy? | The user prompt | -| expected_output | str | Required | Yes, pod xyz is healthy. It is running and there are no errors in the logs. | The expected answer from the LLM | +| expected_output | str or List[str] | Required | Yes, pod xyz is healthy. It is running and there are no errors in the logs. | The expected answer from the LLM. This can be a string or a list of expected elements. If it is a string, the response will be scored with 'faithfulness'. Otherwise it is 'correctness'. | | retrieval_context | List[str] | Optional | - pod xyz is running and healthy - there are no errors in the logs | Context that the LLM is expected to have used in its answer. If present, this generates a 'context' score proportional to the number of matching context elements found in the LLM's output. | | evaluation | Dict[str, float] | Optional | evaluation:
faithfulness: 1
context: 1
| The minimum expected scores. The test will fail unless these are met. Set to 0 for unstable tests. | | before-test | str | Optional | kubectl apply -f manifest.yaml | A command to run before the LLM evaluation. The CWD for this command is the same folder as the fixture. This step is skipped unless `RUN_LIVE` environment variable is set | @@ -67,3 +68,4 @@ The test may pass or not based on whether the evaluation scores are high enough. | BRAINTRUST_API_KEY | BRAINTRUST_API_KEY=sk-1dh1...swdO02 | The braintrust API key you get from your account. Log in https://www.braintrust.dev -> top right persona logo -> settings -> API keys. | | UPLOAD_DATASET | UPLOAD_DATASET=1 | Synchronise the dataset from the local machine to braintrust. This is usually safe as datasets are separated by branch name. | | EXPERIMENT_ID | EXPERIMENT_ID=nicolas_gemini_v1 | Override the experiment name in Braintrust. Helps with identifying and comparing experiments. Must be unique across ALL experiments. | +| MODEL | MODEL=anthropic/claude-3.5 | The model to use for generation. | diff --git a/tests/llm/fixtures/test_ask_holmes/01_how_many_pods/kubectl_find_resource.txt b/tests/llm/fixtures/test_ask_holmes/01_how_many_pods/kubectl_find_resource.txt index fd0a5a8a..90a8ab70 100644 --- a/tests/llm/fixtures/test_ask_holmes/01_how_many_pods/kubectl_find_resource.txt +++ b/tests/llm/fixtures/test_ask_holmes/01_how_many_pods/kubectl_find_resource.txt @@ -1,23 +1,27 @@ {"toolset_name":"kubernetes/core","tool_name":"kubectl_find_resource","match_params":{"kind":"pod","keyword":"ip-172-31-8-128.us-east-2.compute.internal"}} -stdout: -default alertmanager-robusta-kube-prometheus-st-alertmanager-0 2/2 Running 0 3d22h 172.31.5.200 ip-172-31-8-128.us-east-2.compute.internal alertmanager=robusta-kube-prometheus-st-alertmanager,app.kubernetes.io/instance=robusta-kube-prometheus-st-alertmanager,app.kubernetes.io/managed-by=prometheus-operator,app.kubernetes.io/name=alertmanager,app.kubernetes.io/version=0.26.0,apps.kubernetes.io/pod-index=0,controller-revision-hash=alertmanager-robusta-kube-prometheus-st-alertmanager-57cd7fb46f,statefulset.kubernetes.io/pod-name=alertmanager-robusta-kube-prometheus-st-alertmanager-0 -default analytics-exporter-fast-8cf8c9446-6rqwc 0/1 CrashLoopBackOff 1061 (2m51s ago) 3d18h 172.31.15.122 ip-172-31-8-128.us-east-2.compute.internal app=analytics-exporter-fast,pod-template-hash=8cf8c9446 -default customer-relations-webapp-5d98ffcfd-nj5gs 0/1 ImagePullBackOff 0 3d18h 172.31.14.171 ip-172-31-8-128.us-east-2.compute.internal app=customer-relations,pod-template-hash=5d98ffcfd,visualize=true -default db-certs-authenticator-7ffd769f48-d9pxl 0/1 CrashLoopBackOff 886 (69s ago) 3d18h 172.31.3.214 ip-172-31-8-128.us-east-2.compute.internal app=flask,pod-template-hash=7ffd769f48 -default java-api-checker-9pj7k 0/1 Error 0 3d18h 172.31.12.200 ip-172-31-8-128.us-east-2.compute.internal batch.kubernetes.io/controller-uid=ea3f2c52-3382-4cbc-8958-41832511a3e7,batch.kubernetes.io/job-name=java-api-checker,controller-uid=ea3f2c52-3382-4cbc-8958-41832511a3e7,job-name=java-api-checker -default java-api-checker-vzm7z 0/1 Error 0 3d18h 172.31.13.205 ip-172-31-8-128.us-east-2.compute.internal batch.kubernetes.io/controller-uid=ea3f2c52-3382-4cbc-8958-41832511a3e7,batch.kubernetes.io/job-name=java-api-checker,controller-uid=ea3f2c52-3382-4cbc-8958-41832511a3e7,job-name=java-api-checker -default logging-agent 0/1 Init:CrashLoopBackOff 1067 (15s ago) 3d18h 172.31.1.249 ip-172-31-8-128.us-east-2.compute.internal -default prometheus-robusta-kube-prometheus-st-prometheus-0 2/2 Running 0 3d22h 172.31.11.168 ip-172-31-8-128.us-east-2.compute.internal app.kubernetes.io/instance=robusta-kube-prometheus-st-prometheus,app.kubernetes.io/managed-by=prometheus-operator,app.kubernetes.io/name=prometheus,app.kubernetes.io/version=2.48.1,apps.kubernetes.io/pod-index=0,controller-revision-hash=prometheus-robusta-kube-prometheus-st-prometheus-55d87c869b,operator.prometheus.io/name=robusta-kube-prometheus-st-prometheus,operator.prometheus.io/shard=0,prometheus=robusta-kube-prometheus-st-prometheus,statefulset.kubernetes.io/pod-name=prometheus-robusta-kube-prometheus-st-prometheus-0 -default robusta-forwarder-89f44d49b-fxtrh 1/1 Running 0 3d22h 172.31.3.106 ip-172-31-8-128.us-east-2.compute.internal app=robusta-forwarder,pod-template-hash=89f44d49b -default robusta-kube-prometheus-st-operator-7fc5db7f4d-dr46l 1/1 Running 0 3d22h 172.31.6.195 ip-172-31-8-128.us-east-2.compute.internal app.kubernetes.io/instance=robusta,app.kubernetes.io/managed-by=Helm,app.kubernetes.io/part-of=kube-prometheus-stack,app.kubernetes.io/version=55.7.0,app=kube-prometheus-stack-operator,chart=kube-prometheus-stack-55.7.0,heritage=Helm,pod-template-hash=7fc5db7f4d,release=robusta -default robusta-prometheus-node-exporter-t2b5k 1/1 Running 0 3d22h 172.31.8.128 ip-172-31-8-128.us-east-2.compute.internal app.kubernetes.io/component=metrics,app.kubernetes.io/instance=robusta,app.kubernetes.io/managed-by=Helm,app.kubernetes.io/name=prometheus-node-exporter,app.kubernetes.io/part-of=prometheus-node-exporter,app.kubernetes.io/version=1.7.0,controller-revision-hash=7bf445876b,helm.sh/chart=prometheus-node-exporter-4.24.0,jobLabel=node-exporter,pod-template-generation=1,release=robusta -default search-engine-service 0/1 Running 0 3d18h 172.31.11.151 ip-172-31-8-128.us-east-2.compute.internal -kube-system aws-node-m47xg 2/2 Running 0 25d 172.31.8.128 ip-172-31-8-128.us-east-2.compute.internal app.kubernetes.io/instance=aws-vpc-cni,app.kubernetes.io/name=aws-node,controller-revision-hash=54f5998898,k8s-app=aws-node,pod-template-generation=1 -kube-system ebs-csi-controller-7bb676b68d-cs2gx 6/6 Running 0 25d 172.31.12.254 ip-172-31-8-128.us-east-2.compute.internal app.kubernetes.io/component=csi-driver,app.kubernetes.io/managed-by=EKS,app.kubernetes.io/name=aws-ebs-csi-driver,app.kubernetes.io/version=1.35.0,app=ebs-csi-controller,pod-template-hash=7bb676b68d -kube-system ebs-csi-node-pgrvq 3/3 Running 0 25d 172.31.2.194 ip-172-31-8-128.us-east-2.compute.internal app.kubernetes.io/component=csi-driver,app.kubernetes.io/managed-by=EKS,app.kubernetes.io/name=aws-ebs-csi-driver,app.kubernetes.io/version=1.35.0,app=ebs-csi-node,controller-revision-hash=6bc69bc4b9,pod-template-generation=1 -kube-system eks-pod-identity-agent-vgz8h 1/1 Running 0 25d 172.31.8.128 ip-172-31-8-128.us-east-2.compute.internal app.kubernetes.io/instance=eks-pod-identity-agent,app.kubernetes.io/name=eks-pod-identity-agent,controller-revision-hash=74bcb67854,pod-template-generation=1 -kube-system kube-proxy-l7vqp 1/1 Running 0 25d 172.31.8.128 ip-172-31-8-128.us-east-2.compute.internal controller-revision-hash=6b64cc6947,k8s-app=kube-proxy,pod-template-generation=1 -sock-shop user-5bd96d75fb-ld8xv 1/1 Running 0 3d18h 172.31.0.106 ip-172-31-8-128.us-east-2.compute.internal name=user,pod-template-hash=5bd96d75fb -sock-shop user-db-5dc5c5f488-dw6xw 1/1 Running 0 3d18h 172.31.0.66 ip-172-31-8-128.us-east-2.compute.internal name=user-db,pod-template-hash=5dc5c5f488 - -stderr: +NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES +alertmanager-robusta-kube-prometheus-st-alertmanager-0 2/2 Running 0 28h 172.31.5.200 ip-172-31-8-128.us-east-2.compute.internal +analytics-exporter-fast-8cf8c9446-6rqwc 0/1 CrashLoopBackOff 295 (82s ago) 24h 172.31.15.122 ip-172-31-8-128.us-east-2.compute.internal +customer-orders-57cc4b9565-tltfd 1/2 CreateContainerConfigError 0 24h 172.31.23.36 ip-172-31-21-139.us-east-2.compute.internal +customer-relations-webapp-5d98ffcfd-4svkf 0/1 ImagePullBackOff 0 24h 172.31.45.91 ip-172-31-40-111.us-east-2.compute.internal +customer-relations-webapp-5d98ffcfd-nj5gs 0/1 ImagePullBackOff 0 24h 172.31.14.171 ip-172-31-8-128.us-east-2.compute.internal +customer-relations-webapp-5d98ffcfd-nxggs 0/1 ImagePullBackOff 0 24h 172.31.23.28 ip-172-31-21-139.us-east-2.compute.internal +db-certs-authenticator-7ffd769f48-d9pxl 0/1 CrashLoopBackOff 246 (60s ago) 24h 172.31.3.214 ip-172-31-8-128.us-east-2.compute.internal +java-api-checker-9pj7k 0/1 Error 0 24h 172.31.12.200 ip-172-31-8-128.us-east-2.compute.internal +java-api-checker-vzm7z 0/1 Error 0 24h 172.31.13.205 ip-172-31-8-128.us-east-2.compute.internal +logging-agent 0/1 Init:CrashLoopBackOff 296 (3m43s ago) 24h 172.31.1.249 ip-172-31-8-128.us-east-2.compute.internal +order-processor 1/1 Running 258 (61m ago) 15d 172.31.23.45 ip-172-31-21-139.us-east-2.compute.internal +payment-processing-worker-74754cf949-k8jsd 0/1 CrashLoopBackOff 296 (3m38s ago) 24h 172.31.37.156 ip-172-31-40-111.us-east-2.compute.internal +prometheus-robusta-kube-prometheus-st-prometheus-0 2/2 Running 0 28h 172.31.11.168 ip-172-31-8-128.us-east-2.compute.internal +robusta-forwarder-89f44d49b-fxtrh 1/1 Running 0 28h 172.31.3.106 ip-172-31-8-128.us-east-2.compute.internal +robusta-grafana-597597b88-r8cql 3/3 Running 0 28h 172.31.38.193 ip-172-31-40-111.us-east-2.compute.internal +robusta-holmes-7869fc74bc-bmf2m 1/1 Running 0 28h 172.31.21.29 ip-172-31-21-139.us-east-2.compute.internal +robusta-kube-prometheus-st-operator-7fc5db7f4d-dr46l 1/1 Running 0 28h 172.31.6.195 ip-172-31-8-128.us-east-2.compute.internal +robusta-kube-state-metrics-7964495ff6-77m58 1/1 Running 0 28h 172.31.40.222 ip-172-31-40-111.us-east-2.compute.internal +robusta-prometheus-node-exporter-7rqmn 1/1 Running 0 28h 172.31.21.139 ip-172-31-21-139.us-east-2.compute.internal +robusta-prometheus-node-exporter-dsd4f 1/1 Running 0 28h 172.31.40.111 ip-172-31-40-111.us-east-2.compute.internal +robusta-prometheus-node-exporter-t2b5k 1/1 Running 0 28h 172.31.8.128 ip-172-31-8-128.us-east-2.compute.internal +robusta-runner-6958c5bdd8-69gtn 1/1 Running 0 28h 172.31.25.172 ip-172-31-21-139.us-east-2.compute.internal +search-engine-service 0/1 Running 0 24h 172.31.11.151 ip-172-31-8-128.us-east-2.compute.internal +user-profile-import 0/1 Pending 0 24h +user-profile-resources-685bfd9c86-sbcv6 0/1 Pending 0 24h diff --git a/tests/llm/fixtures/test_ask_holmes/01_how_many_pods/kubectl_find_resource_pod_by_keyword.txt b/tests/llm/fixtures/test_ask_holmes/01_how_many_pods/kubectl_find_resource_pod_by_keyword.txt index fd0a5a8a..90a8ab70 100644 --- a/tests/llm/fixtures/test_ask_holmes/01_how_many_pods/kubectl_find_resource_pod_by_keyword.txt +++ b/tests/llm/fixtures/test_ask_holmes/01_how_many_pods/kubectl_find_resource_pod_by_keyword.txt @@ -1,23 +1,27 @@ {"toolset_name":"kubernetes/core","tool_name":"kubectl_find_resource","match_params":{"kind":"pod","keyword":"ip-172-31-8-128.us-east-2.compute.internal"}} -stdout: -default alertmanager-robusta-kube-prometheus-st-alertmanager-0 2/2 Running 0 3d22h 172.31.5.200 ip-172-31-8-128.us-east-2.compute.internal alertmanager=robusta-kube-prometheus-st-alertmanager,app.kubernetes.io/instance=robusta-kube-prometheus-st-alertmanager,app.kubernetes.io/managed-by=prometheus-operator,app.kubernetes.io/name=alertmanager,app.kubernetes.io/version=0.26.0,apps.kubernetes.io/pod-index=0,controller-revision-hash=alertmanager-robusta-kube-prometheus-st-alertmanager-57cd7fb46f,statefulset.kubernetes.io/pod-name=alertmanager-robusta-kube-prometheus-st-alertmanager-0 -default analytics-exporter-fast-8cf8c9446-6rqwc 0/1 CrashLoopBackOff 1061 (2m51s ago) 3d18h 172.31.15.122 ip-172-31-8-128.us-east-2.compute.internal app=analytics-exporter-fast,pod-template-hash=8cf8c9446 -default customer-relations-webapp-5d98ffcfd-nj5gs 0/1 ImagePullBackOff 0 3d18h 172.31.14.171 ip-172-31-8-128.us-east-2.compute.internal app=customer-relations,pod-template-hash=5d98ffcfd,visualize=true -default db-certs-authenticator-7ffd769f48-d9pxl 0/1 CrashLoopBackOff 886 (69s ago) 3d18h 172.31.3.214 ip-172-31-8-128.us-east-2.compute.internal app=flask,pod-template-hash=7ffd769f48 -default java-api-checker-9pj7k 0/1 Error 0 3d18h 172.31.12.200 ip-172-31-8-128.us-east-2.compute.internal batch.kubernetes.io/controller-uid=ea3f2c52-3382-4cbc-8958-41832511a3e7,batch.kubernetes.io/job-name=java-api-checker,controller-uid=ea3f2c52-3382-4cbc-8958-41832511a3e7,job-name=java-api-checker -default java-api-checker-vzm7z 0/1 Error 0 3d18h 172.31.13.205 ip-172-31-8-128.us-east-2.compute.internal batch.kubernetes.io/controller-uid=ea3f2c52-3382-4cbc-8958-41832511a3e7,batch.kubernetes.io/job-name=java-api-checker,controller-uid=ea3f2c52-3382-4cbc-8958-41832511a3e7,job-name=java-api-checker -default logging-agent 0/1 Init:CrashLoopBackOff 1067 (15s ago) 3d18h 172.31.1.249 ip-172-31-8-128.us-east-2.compute.internal -default prometheus-robusta-kube-prometheus-st-prometheus-0 2/2 Running 0 3d22h 172.31.11.168 ip-172-31-8-128.us-east-2.compute.internal app.kubernetes.io/instance=robusta-kube-prometheus-st-prometheus,app.kubernetes.io/managed-by=prometheus-operator,app.kubernetes.io/name=prometheus,app.kubernetes.io/version=2.48.1,apps.kubernetes.io/pod-index=0,controller-revision-hash=prometheus-robusta-kube-prometheus-st-prometheus-55d87c869b,operator.prometheus.io/name=robusta-kube-prometheus-st-prometheus,operator.prometheus.io/shard=0,prometheus=robusta-kube-prometheus-st-prometheus,statefulset.kubernetes.io/pod-name=prometheus-robusta-kube-prometheus-st-prometheus-0 -default robusta-forwarder-89f44d49b-fxtrh 1/1 Running 0 3d22h 172.31.3.106 ip-172-31-8-128.us-east-2.compute.internal app=robusta-forwarder,pod-template-hash=89f44d49b -default robusta-kube-prometheus-st-operator-7fc5db7f4d-dr46l 1/1 Running 0 3d22h 172.31.6.195 ip-172-31-8-128.us-east-2.compute.internal app.kubernetes.io/instance=robusta,app.kubernetes.io/managed-by=Helm,app.kubernetes.io/part-of=kube-prometheus-stack,app.kubernetes.io/version=55.7.0,app=kube-prometheus-stack-operator,chart=kube-prometheus-stack-55.7.0,heritage=Helm,pod-template-hash=7fc5db7f4d,release=robusta -default robusta-prometheus-node-exporter-t2b5k 1/1 Running 0 3d22h 172.31.8.128 ip-172-31-8-128.us-east-2.compute.internal app.kubernetes.io/component=metrics,app.kubernetes.io/instance=robusta,app.kubernetes.io/managed-by=Helm,app.kubernetes.io/name=prometheus-node-exporter,app.kubernetes.io/part-of=prometheus-node-exporter,app.kubernetes.io/version=1.7.0,controller-revision-hash=7bf445876b,helm.sh/chart=prometheus-node-exporter-4.24.0,jobLabel=node-exporter,pod-template-generation=1,release=robusta -default search-engine-service 0/1 Running 0 3d18h 172.31.11.151 ip-172-31-8-128.us-east-2.compute.internal -kube-system aws-node-m47xg 2/2 Running 0 25d 172.31.8.128 ip-172-31-8-128.us-east-2.compute.internal app.kubernetes.io/instance=aws-vpc-cni,app.kubernetes.io/name=aws-node,controller-revision-hash=54f5998898,k8s-app=aws-node,pod-template-generation=1 -kube-system ebs-csi-controller-7bb676b68d-cs2gx 6/6 Running 0 25d 172.31.12.254 ip-172-31-8-128.us-east-2.compute.internal app.kubernetes.io/component=csi-driver,app.kubernetes.io/managed-by=EKS,app.kubernetes.io/name=aws-ebs-csi-driver,app.kubernetes.io/version=1.35.0,app=ebs-csi-controller,pod-template-hash=7bb676b68d -kube-system ebs-csi-node-pgrvq 3/3 Running 0 25d 172.31.2.194 ip-172-31-8-128.us-east-2.compute.internal app.kubernetes.io/component=csi-driver,app.kubernetes.io/managed-by=EKS,app.kubernetes.io/name=aws-ebs-csi-driver,app.kubernetes.io/version=1.35.0,app=ebs-csi-node,controller-revision-hash=6bc69bc4b9,pod-template-generation=1 -kube-system eks-pod-identity-agent-vgz8h 1/1 Running 0 25d 172.31.8.128 ip-172-31-8-128.us-east-2.compute.internal app.kubernetes.io/instance=eks-pod-identity-agent,app.kubernetes.io/name=eks-pod-identity-agent,controller-revision-hash=74bcb67854,pod-template-generation=1 -kube-system kube-proxy-l7vqp 1/1 Running 0 25d 172.31.8.128 ip-172-31-8-128.us-east-2.compute.internal controller-revision-hash=6b64cc6947,k8s-app=kube-proxy,pod-template-generation=1 -sock-shop user-5bd96d75fb-ld8xv 1/1 Running 0 3d18h 172.31.0.106 ip-172-31-8-128.us-east-2.compute.internal name=user,pod-template-hash=5bd96d75fb -sock-shop user-db-5dc5c5f488-dw6xw 1/1 Running 0 3d18h 172.31.0.66 ip-172-31-8-128.us-east-2.compute.internal name=user-db,pod-template-hash=5dc5c5f488 - -stderr: +NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES +alertmanager-robusta-kube-prometheus-st-alertmanager-0 2/2 Running 0 28h 172.31.5.200 ip-172-31-8-128.us-east-2.compute.internal +analytics-exporter-fast-8cf8c9446-6rqwc 0/1 CrashLoopBackOff 295 (82s ago) 24h 172.31.15.122 ip-172-31-8-128.us-east-2.compute.internal +customer-orders-57cc4b9565-tltfd 1/2 CreateContainerConfigError 0 24h 172.31.23.36 ip-172-31-21-139.us-east-2.compute.internal +customer-relations-webapp-5d98ffcfd-4svkf 0/1 ImagePullBackOff 0 24h 172.31.45.91 ip-172-31-40-111.us-east-2.compute.internal +customer-relations-webapp-5d98ffcfd-nj5gs 0/1 ImagePullBackOff 0 24h 172.31.14.171 ip-172-31-8-128.us-east-2.compute.internal +customer-relations-webapp-5d98ffcfd-nxggs 0/1 ImagePullBackOff 0 24h 172.31.23.28 ip-172-31-21-139.us-east-2.compute.internal +db-certs-authenticator-7ffd769f48-d9pxl 0/1 CrashLoopBackOff 246 (60s ago) 24h 172.31.3.214 ip-172-31-8-128.us-east-2.compute.internal +java-api-checker-9pj7k 0/1 Error 0 24h 172.31.12.200 ip-172-31-8-128.us-east-2.compute.internal +java-api-checker-vzm7z 0/1 Error 0 24h 172.31.13.205 ip-172-31-8-128.us-east-2.compute.internal +logging-agent 0/1 Init:CrashLoopBackOff 296 (3m43s ago) 24h 172.31.1.249 ip-172-31-8-128.us-east-2.compute.internal +order-processor 1/1 Running 258 (61m ago) 15d 172.31.23.45 ip-172-31-21-139.us-east-2.compute.internal +payment-processing-worker-74754cf949-k8jsd 0/1 CrashLoopBackOff 296 (3m38s ago) 24h 172.31.37.156 ip-172-31-40-111.us-east-2.compute.internal +prometheus-robusta-kube-prometheus-st-prometheus-0 2/2 Running 0 28h 172.31.11.168 ip-172-31-8-128.us-east-2.compute.internal +robusta-forwarder-89f44d49b-fxtrh 1/1 Running 0 28h 172.31.3.106 ip-172-31-8-128.us-east-2.compute.internal +robusta-grafana-597597b88-r8cql 3/3 Running 0 28h 172.31.38.193 ip-172-31-40-111.us-east-2.compute.internal +robusta-holmes-7869fc74bc-bmf2m 1/1 Running 0 28h 172.31.21.29 ip-172-31-21-139.us-east-2.compute.internal +robusta-kube-prometheus-st-operator-7fc5db7f4d-dr46l 1/1 Running 0 28h 172.31.6.195 ip-172-31-8-128.us-east-2.compute.internal +robusta-kube-state-metrics-7964495ff6-77m58 1/1 Running 0 28h 172.31.40.222 ip-172-31-40-111.us-east-2.compute.internal +robusta-prometheus-node-exporter-7rqmn 1/1 Running 0 28h 172.31.21.139 ip-172-31-21-139.us-east-2.compute.internal +robusta-prometheus-node-exporter-dsd4f 1/1 Running 0 28h 172.31.40.111 ip-172-31-40-111.us-east-2.compute.internal +robusta-prometheus-node-exporter-t2b5k 1/1 Running 0 28h 172.31.8.128 ip-172-31-8-128.us-east-2.compute.internal +robusta-runner-6958c5bdd8-69gtn 1/1 Running 0 28h 172.31.25.172 ip-172-31-21-139.us-east-2.compute.internal +search-engine-service 0/1 Running 0 24h 172.31.11.151 ip-172-31-8-128.us-east-2.compute.internal +user-profile-import 0/1 Pending 0 24h +user-profile-resources-685bfd9c86-sbcv6 0/1 Pending 0 24h diff --git a/tests/llm/fixtures/test_ask_holmes/01_how_many_pods/test_case.yaml b/tests/llm/fixtures/test_ask_holmes/01_how_many_pods/test_case.yaml index 2ac5ca6a..98d58260 100644 --- a/tests/llm/fixtures/test_ask_holmes/01_how_many_pods/test_case.yaml +++ b/tests/llm/fixtures/test_ask_holmes/01_how_many_pods/test_case.yaml @@ -1,10 +1,12 @@ user_prompt: 'How many pods do I have running on << { "type": "node", "name": "ip-172-31-8-128.us-east-2.compute.internal" } >> ?' #user_prompt: "How many pods do I have running on node ip-172-31-8-128.us-east-2.compute.internal?" -expected_output: "5 pods are running on ip-172-31-8-128.us-east-2.compute.internal" +expected_output: + - 6 pods are in a running state on node ip-172-31-8-128.us-east-2.compute.internal retrieval_context: - There is a total of 12 pods on node ip-172-31-8-128.us-east-2.compute.internal - - There are 5 pods in running state - - 7 pods are not running as indicated by the STATUS column + - There are 6 pods in running state + - One of the pod is in running state but has a container in the process of starting or restarting + - 6 pods are not running as indicated by the STATUS column evaluation: answer_relevancy: 0 faithfulness: 0 diff --git a/tests/llm/fixtures/test_ask_holmes/02_what_is_wrong_with_pod/test_case.yaml b/tests/llm/fixtures/test_ask_holmes/02_what_is_wrong_with_pod/test_case.yaml index 8933b508..3dbe5b72 100644 --- a/tests/llm/fixtures/test_ask_holmes/02_what_is_wrong_with_pod/test_case.yaml +++ b/tests/llm/fixtures/test_ask_holmes/02_what_is_wrong_with_pod/test_case.yaml @@ -1,5 +1,7 @@ user_prompt: 'What is wrong with << { "type": "pod", "name": "robusta-runner-6958c5bdd8-69gtn" } >> ?' -expected_output: "The pod is currently running and healthy but has recently been killed due to it being out of memory. Increase the memory available to this pod." +expected_output: + - The pod was recently killed due to it being out of memory + - The pod is currently running and healthy retrieval_context: - | Last State: Terminate diff --git a/tests/llm/fixtures/test_ask_holmes/03_what_is_the_command_to_port_forward/test_case.yaml b/tests/llm/fixtures/test_ask_holmes/03_what_is_the_command_to_port_forward/test_case.yaml index 86c57bae..f30ba0ba 100644 --- a/tests/llm/fixtures/test_ask_holmes/03_what_is_the_command_to_port_forward/test_case.yaml +++ b/tests/llm/fixtures/test_ask_holmes/03_what_is_the_command_to_port_forward/test_case.yaml @@ -1,13 +1,7 @@ user_prompt: 'what is the command to port-forward to << { "type": "pod", "name": "my_grafana_4j981" } >>' #user_prompt: "what is the command to port-forward to my grafana service?" -expected_output: | - To port-forward to the pod `my_grafana_4j981`, use the following command: - - ```bash - kubectl port-forward pod/my_grafana_4j981 3000:3000 -n default - ``` - - This command forwards port 3000 on your local machine to port 3000 on the pod. +expected_output: + - kubectl port-forward pod/my_grafana_4j981 3000:3000 -n default retrieval_context: - "The grafana service is running but does not have an external IP address" - "The name of the grafana service is my_grafana_4j981" diff --git a/tests/llm/fixtures/test_ask_holmes/04_related_k8s_events/test_case.yaml b/tests/llm/fixtures/test_ask_holmes/04_related_k8s_events/test_case.yaml index 8e47d997..4de7ca25 100644 --- a/tests/llm/fixtures/test_ask_holmes/04_related_k8s_events/test_case.yaml +++ b/tests/llm/fixtures/test_ask_holmes/04_related_k8s_events/test_case.yaml @@ -1,13 +1,11 @@ user_prompt: 'What kubernetes events are related to << { "type": "pod", "name": "nginx-6958c5bdd8-69gtn" } >>' #user_prompt: "What kubernetes events are related to the nginx pod >>" -expected_output: | - Kubernetes events related to the `nginx-6958c5bdd8-69gtn` pod include: - - 1. **Scheduled**: Successfully assigned to node `aks-nodepool1-26081864-vmss000004`. - 2. **Pulling**: Pulling image "nginx". - 3. **Pulled**: Successfully pulled image "nginx". - 4. **Created**: Created container `nginxreplica`. - 5. **Started**: Started container `nginxreplica`. +expected_output: + - "Scheduled: Successfully assigned to node `aks-nodepool1-26081864-vmss000004`" + - "Pulling: Pulling image `nginx`" + - "Pulled: Successfully pulled image `nginx`" + - "Created: Created container `nginxreplica`" + - "Started: Started container `nginxreplica`" retrieval_context: - | Events: @@ -21,6 +19,3 @@ retrieval_context: evaluation: answer_relevancy: 0 faithfulness: 0 - contextual_precision: 0 - contextual_recall: 0 - contextual_relevancy: 0 diff --git a/tests/llm/fixtures/test_ask_holmes/05_image_version/test_case.yaml b/tests/llm/fixtures/test_ask_holmes/05_image_version/test_case.yaml index 1af50eb8..783551b4 100644 --- a/tests/llm/fixtures/test_ask_holmes/05_image_version/test_case.yaml +++ b/tests/llm/fixtures/test_ask_holmes/05_image_version/test_case.yaml @@ -1,5 +1,6 @@ user_prompt: 'what is the image version of << { "type": "pod", "name": "robusta-runner" } >>' #user_prompt: "what is the image version of my robusta-runner pod?" -expected_output: "The image version of the `robusta-runner` pod is `0.18.0`." +expected_output: + - The image version of the `robusta-runner` pod is `0.18.0`. retrieval_context: - "image: robustadev/robusta-runner:0.18.0" diff --git a/tests/llm/fixtures/test_ask_holmes/06_explain_issue/test_case.yaml b/tests/llm/fixtures/test_ask_holmes/06_explain_issue/test_case.yaml index abc8486a..e2f955c8 100644 --- a/tests/llm/fixtures/test_ask_holmes/06_explain_issue/test_case.yaml +++ b/tests/llm/fixtures/test_ask_holmes/06_explain_issue/test_case.yaml @@ -1,12 +1,11 @@ user_prompt: 'Explain << { "type": "issue", "id": "asodfkq1209edyhqawdo2uydqawidh" } >>' -expected_output: "The finding indicates that the pod `my-cluster-kafka-0` in the `kafka` namespace was evicted due to low ephemeral storage on the node. The threshold was 2126213140, but only 1884540Ki was available. The `kafka` container was using 3264Ki of ephemeral storage, with no specific request set, leading to higher consumption. This resulted in a warning and the container being stopped. The issue occurred in the `payments-kafka-eu-eks-v2` cluster." +expected_output: + - The pod `my-cluster-kafka-0` in the `kafka` namespace was evicted due to low ephemeral storage on the node + - The threshold was 2126213140, but only 1884540Ki was available + - The `kafka` container was using 3264Ki of ephemeral storage retrieval_context: - "Pod my-cluster-kafka was evicted" - "The node was low on resource: ephemeral-storage. Threshold quantity: 2126213140, available: 1884540Ki. Container kafka was using 3264Ki, request is 0, has larger consumption of ephemeral-storage." - evaluation: answer_relevancy: 0 faithfulness: 0 - contextual_precision: 0 - contextual_recall: 0 - contextual_relevancy: 0 diff --git a/tests/llm/fixtures/test_ask_holmes/07_high_latency/test_case.yaml b/tests/llm/fixtures/test_ask_holmes/07_high_latency/test_case.yaml index b6e5cd97..fcd525de 100644 --- a/tests/llm/fixtures/test_ask_holmes/07_high_latency/test_case.yaml +++ b/tests/llm/fixtures/test_ask_holmes/07_high_latency/test_case.yaml @@ -1,7 +1,6 @@ user_prompt: "Why is there high latency with the customer-orders deployment?" -expected_output: | - High latency in the `customer-orders` deployment is due to slow database calls. - The logs show that database operations, specifically fetching data using a stored procedure, are taking between 5 to 10 seconds to complete. - This delay directly impacts the page rendering time, causing high latency in the application. +expected_output: + - High latency in the `customer-orders` deployment is due to slow database calls + - Database operations are taking between 5 to 10 seconds to complete before_test: kubectl apply -f ./helm/manifest.yaml after_test: kubectl delete -f ./helm/manifest.yaml diff --git a/tests/llm/fixtures/test_ask_holmes/08_sock_shop_frontend/test_case.yaml b/tests/llm/fixtures/test_ask_holmes/08_sock_shop_frontend/test_case.yaml index 80995c6a..4eaf2dd6 100644 --- a/tests/llm/fixtures/test_ask_holmes/08_sock_shop_frontend/test_case.yaml +++ b/tests/llm/fixtures/test_ask_holmes/08_sock_shop_frontend/test_case.yaml @@ -1,12 +1,7 @@ user_prompt: "Why did the sock-shop front-end stop working?" -expected_output: | - The sock-shop front-end stopped working due to an internal server error related to MongoDB. The error message indicates that the MongoDB client driver may require an upgrade due to an unsupported OP_QUERY command. This results in a failure to process orders, causing a crash in the front-end application. - - To resolve this issue, consider upgrading the MongoDB client driver to a version that supports the current MongoDB server's command set. - - The sock-shop front-end stopped working due to an internal server error caused by an outdated MongoDB client driver. The error message indicates that the `find` command is unsupported, suggesting the client driver needs an upgrade. This led to a failure in processing orders, resulting in a crash with a `TypeError` when trying to access `customerOrders`. - - To resolve this, update the MongoDB client driver to a version that supports the current MongoDB server's commands. +expected_output: + - The sock-shop front-end stopped working due to an internal server error related to MongoDB. + - The MongoDB client driver may require an upgrade retrieval_context: - "The sock-shop front-end crashed" - "The mongodb client driver may require an upgrade" diff --git a/tests/llm/fixtures/test_ask_holmes/09_crashpod/test_case.yaml b/tests/llm/fixtures/test_ask_holmes/09_crashpod/test_case.yaml index 88d55457..52f2add3 100644 --- a/tests/llm/fixtures/test_ask_holmes/09_crashpod/test_case.yaml +++ b/tests/llm/fixtures/test_ask_holmes/09_crashpod/test_case.yaml @@ -1,7 +1,6 @@ user_prompt: "What is the issue with payment-processing-worker?" -expected_output: | - The `payment-processing-worker` pod is crashing due to an undefined environment variable `DEPLOY_ENV`. The container is set to check for this variable, and if it's not defined, it exits immediately, causing a CrashLoopBackOff state. - To resolve this, define the `DEPLOY_ENV` environment variable in the pod's configuration. +expected_output: + - The `DEPLOY_ENV` environment variable is undefined or missing retrieval_context: - "logs from payment-processing-worker mention: Environment variable DEPLOY_ENV is undefined" # before_test: | diff --git a/tests/llm/fixtures/test_ask_holmes/10_image_pull_backoff/test_case.yaml b/tests/llm/fixtures/test_ask_holmes/10_image_pull_backoff/test_case.yaml index 460e26b6..a3566f18 100644 --- a/tests/llm/fixtures/test_ask_holmes/10_image_pull_backoff/test_case.yaml +++ b/tests/llm/fixtures/test_ask_holmes/10_image_pull_backoff/test_case.yaml @@ -1,14 +1,8 @@ user_prompt: What is the issue with customer-relations-webapp? -expected_output: | - The `customer-relations-webapp` pods are failing to start due to an `ImagePullBackOff` error. The issue is caused by the inability to pull the Docker image `yourcompany/crw:latest`. The error message indicates that the image pull failed because the repository does not exist or may require authorization. - - Possible solutions: - 1. Verify the image name and tag are correct. - 2. Ensure the image is available in the specified repository. - 3. Check if authentication credentials are needed to access the repository and configure them in Kubernetes if necessary. - -# before_test: | -# kubectl apply -f https://raw.githubusercontent.com/robusta-dev/kubernetes-demos/main/image_pull_backoff/no_such_image.yaml -# sleep 20 -# after_test: | -# kubectl delete -f https://raw.githubusercontent.com/robusta-dev/kubernetes-demos/main/image_pull_backoff/no_such_image.yaml +expected_output: + - The Docker image `yourcompany/crw:latest` cannot be pulled +before_test: | + kubectl apply -f https://raw.githubusercontent.com/robusta-dev/kubernetes-demos/main/image_pull_backoff/no_such_image.yaml + sleep 20 +after_test: | + kubectl delete -f https://raw.githubusercontent.com/robusta-dev/kubernetes-demos/main/image_pull_backoff/no_such_image.yaml diff --git a/tests/llm/fixtures/test_ask_holmes/11_init_containers/test_case.yaml b/tests/llm/fixtures/test_ask_holmes/11_init_containers/test_case.yaml index e3c3ca52..51d667bd 100644 --- a/tests/llm/fixtures/test_ask_holmes/11_init_containers/test_case.yaml +++ b/tests/llm/fixtures/test_ask_holmes/11_init_containers/test_case.yaml @@ -1,10 +1,7 @@ user_prompt: "What is the issue with logging-agent?" -expected_output: | - The `logging-agent` pod is failing to initialize due to an error in the init container `downloader`. - The command `wge` is not found, likely a typo for `wget`. - This causes the container to fail with a `RunContainerError`. - Correct the command in the init container to use `wget` instead of `wge`. - +expected_output: + - The command `wge` is not found + - The answer should suggets `wge` may be a typo for `wget`. # before_test: | # kubectl apply -f https://raw.githubusercontent.com/robusta-dev/kubernetes-demos/main/init_crashloop_backoff/create_init_crashloop_backoff.yaml # sleep 40 diff --git a/tests/llm/fixtures/test_ask_holmes/12_job_crashing/test_case.yaml b/tests/llm/fixtures/test_ask_holmes/12_job_crashing/test_case.yaml index d17d33d7..242d99b9 100644 --- a/tests/llm/fixtures/test_ask_holmes/12_job_crashing/test_case.yaml +++ b/tests/llm/fixtures/test_ask_holmes/12_job_crashing/test_case.yaml @@ -1,11 +1,6 @@ user_prompt: "What is the issue with job java-api-checker" -expected_output: | - The `java-api-checker` job is experiencing a network exception. - It repeatedly fails to connect to the database at `prod-db:3333`, - with the error indicating that no available connection exists and the - queue has reached its maximum size of 256. - This suggests a potential issue with the database being unreachable or overloaded. - +expected_output: + - The `java-api-checker` job repeatedly fails to connect to the database at `prod-db:3333` # before_test: | # kubectl delete -f https://raw.githubusercontent.com/robusta-dev/kubernetes-demos/main/job_failure/job_crash.yaml || true # kubectl apply -f https://raw.githubusercontent.com/robusta-dev/kubernetes-demos/main/job_failure/job_crash.yaml diff --git a/tests/llm/fixtures/test_ask_holmes/13_pending_node_selector/test_case.yaml b/tests/llm/fixtures/test_ask_holmes/13_pending_node_selector/test_case.yaml index 3d8e51b2..9330d190 100644 --- a/tests/llm/fixtures/test_ask_holmes/13_pending_node_selector/test_case.yaml +++ b/tests/llm/fixtures/test_ask_holmes/13_pending_node_selector/test_case.yaml @@ -1,9 +1,7 @@ user_prompt: "What is the issue with user-profile-import" -expected_output: | - The `user-profile-import` pod is pending because it has a node selector that doesn't match any available nodes. - The node selector is set to `label=someLabel`, but no nodes in the cluster have this label. - To resolve this, either update the node selector to match an existing node label or add the required label to a node. - +expected_output: + - The node selector that doesn't match any available nodes. + - The node selector is set to `label=someLabel`, but no nodes in the cluster have this label. # before_test: | # kubectl apply -f https://raw.githubusercontent.com/robusta-dev/kubernetes-demos/main/pending_pods/pending_pod_node_selector.yaml # sleep 30 diff --git a/tests/llm/fixtures/test_ask_holmes/14_pending_resources/test_case.yaml b/tests/llm/fixtures/test_ask_holmes/14_pending_resources/test_case.yaml index 2f6f41fb..098487bf 100644 --- a/tests/llm/fixtures/test_ask_holmes/14_pending_resources/test_case.yaml +++ b/tests/llm/fixtures/test_ask_holmes/14_pending_resources/test_case.yaml @@ -1,8 +1,6 @@ user_prompt: "What is the issue with user-profile-resources" -expected_output: | - The `user-profile-resources` deployment is not running due to insufficient GPU resources. - The pod `user-profile-resources-659d4dd659-d2tlj` is pending because there are no available nodes with the required 5 GPUs. - Adjust the GPU resource requests or ensure nodes with sufficient GPUs are available. +expected_output: + - This must mention insufficient GPU resources before_test: | kubectl apply -f https://raw.githubusercontent.com/robusta-dev/kubernetes-demos/main/pending_pods/pending_pod_resources.yaml sleep 30 diff --git a/tests/llm/fixtures/test_ask_holmes/15_failed_readiness_probe/test_case.yaml b/tests/llm/fixtures/test_ask_holmes/15_failed_readiness_probe/test_case.yaml index bf2b7661..811c3661 100644 --- a/tests/llm/fixtures/test_ask_holmes/15_failed_readiness_probe/test_case.yaml +++ b/tests/llm/fixtures/test_ask_holmes/15_failed_readiness_probe/test_case.yaml @@ -1,11 +1,9 @@ user_prompt: "What is the issue with search-engine-service" -expected_output: | - The `search-engine-service` pod is running but not ready due to a failing readiness probe. - The readiness probe is set to execute `sh -c exit 1`, which always fails, causing the pod to be marked as not ready. - Adjust the readiness probe to a command that accurately reflects the pod's readiness state. - -# before_test: | -# kubectl apply -f https://raw.githubusercontent.com/robusta-dev/kubernetes-demos/main/readiness_probe_fail/failing_readiness_probe.yaml -# sleep 30 -# after_test: | -# kubectl delete -f https://raw.githubusercontent.com/robusta-dev/kubernetes-demos/main/readiness_probe_fail/failing_readiness_probe.yaml +expected_output: + - The `search-engine-service` pod is running but not ready due to a failing readiness probe + - The readiness probe fails +before_test: | + kubectl apply -f https://raw.githubusercontent.com/robusta-dev/kubernetes-demos/main/readiness_probe_fail/failing_readiness_probe.yaml + sleep 30 +after_test: | + kubectl delete -f https://raw.githubusercontent.com/robusta-dev/kubernetes-demos/main/readiness_probe_fail/failing_readiness_probe.yaml diff --git a/tests/llm/fixtures/test_ask_holmes/16_failed_no_toolset_found/test_case.yaml b/tests/llm/fixtures/test_ask_holmes/16_failed_no_toolset_found/test_case.yaml index 1b7c314d..33539024 100644 --- a/tests/llm/fixtures/test_ask_holmes/16_failed_no_toolset_found/test_case.yaml +++ b/tests/llm/fixtures/test_ask_holmes/16_failed_no_toolset_found/test_case.yaml @@ -1,4 +1,3 @@ user_prompt: "Why is my Digital ocean droplet not working fine" -expected_output: | - I don't have access to Digital Ocean droplets. - You can add a Holmes Digital Ocean integration which will give me access yo your droplets. +expected_output: + - The answer should prompt the user to add a Holmes Digital Ocean integration to access droplets diff --git a/tests/llm/fixtures/test_ask_holmes/17_oom_kill/test_case.yaml b/tests/llm/fixtures/test_ask_holmes/17_oom_kill/test_case.yaml index 1bcfeac2..9b273e11 100644 --- a/tests/llm/fixtures/test_ask_holmes/17_oom_kill/test_case.yaml +++ b/tests/llm/fixtures/test_ask_holmes/17_oom_kill/test_case.yaml @@ -1,10 +1,6 @@ user_prompt: "What is the issue with analytics-exporter-fast?" -expected_output: | - The `analytics-exporter-fast` pod is crashing due to being OOMKilled (Out of Memory Killed). - The container `memory-eater` is configured with a memory limit of 100Mi, and it is exceeding this limit, leading to termination. - - To resolve this, consider increasing the memory limit for the container or optimizing the application's memory usage. - +expected_output: + - The result mentions analytics-exporter-fast is getting OOMKILLED before_test: | kubectl apply -f ./fast_oom_deployment.yaml sleep 5 diff --git a/tests/llm/fixtures/test_ask_holmes/18_crash_looping_v2/test_case.yaml b/tests/llm/fixtures/test_ask_holmes/18_crash_looping_v2/test_case.yaml index 71d86580..215b3236 100644 --- a/tests/llm/fixtures/test_ask_holmes/18_crash_looping_v2/test_case.yaml +++ b/tests/llm/fixtures/test_ask_holmes/18_crash_looping_v2/test_case.yaml @@ -1,11 +1,9 @@ user_prompt: "What is the issue with db-certs-authenticator?" -expected_output: | - The `db-certs-authenticator` pod experienced a crash due to an expired SSL certificate. - The logs indicate that the certificate expired on August 3, 2024. To resolve this, - update the SSL certificate using the `/update_certificate` API or update the config map with a valid certificate. - -# before_test: | -# kubectl apply -f https://raw.githubusercontent.com/robusta-dev/kubernetes-demos/main/crashpod.v2/crashloop-cert-app.yaml -# sleep 120 -# after_test: | -# kubectl delete -f https://raw.githubusercontent.com/robusta-dev/kubernetes-demos/main/crashpod.v2/crashloop-cert-app.yaml +expected_output: + - The `db-certs-authenticator` pod experienced a crash due to an expired SSL certificate. + - The certificate expired on August 3, 2024 +before_test: | + kubectl apply -f https://raw.githubusercontent.com/robusta-dev/kubernetes-demos/main/crashpod.v2/crashloop-cert-app.yaml + sleep 120 +after_test: | + kubectl delete -f https://raw.githubusercontent.com/robusta-dev/kubernetes-demos/main/crashpod.v2/crashloop-cert-app.yaml diff --git a/tests/llm/fixtures/test_ask_holmes/19_detect_missing_app_details/test_case.yaml b/tests/llm/fixtures/test_ask_holmes/19_detect_missing_app_details/test_case.yaml index 4a69b5ad..7915808f 100644 --- a/tests/llm/fixtures/test_ask_holmes/19_detect_missing_app_details/test_case.yaml +++ b/tests/llm/fixtures/test_ask_holmes/19_detect_missing_app_details/test_case.yaml @@ -1,4 +1,3 @@ user_prompt: "Why is frontend-certs-validator in a crashloopbackoff?" -expected_output: I couldn't find a pod named "frontend-certs-validator." Please check the name and try again. -retrieval_context: - - "The pod frontend-certs-validator is not present in the cluster" +expected_output: + - There is no pod named "frontend-certs-validator." diff --git a/tests/llm/fixtures/test_ask_holmes/1_how_many_pods/kubectl_find_resource_pod_by_keyword.txt b/tests/llm/fixtures/test_ask_holmes/1_how_many_pods/kubectl_find_resource_pod_by_keyword.txt deleted file mode 100644 index fd0a5a8a..00000000 --- a/tests/llm/fixtures/test_ask_holmes/1_how_many_pods/kubectl_find_resource_pod_by_keyword.txt +++ /dev/null @@ -1,23 +0,0 @@ -{"toolset_name":"kubernetes/core","tool_name":"kubectl_find_resource","match_params":{"kind":"pod","keyword":"ip-172-31-8-128.us-east-2.compute.internal"}} -stdout: -default alertmanager-robusta-kube-prometheus-st-alertmanager-0 2/2 Running 0 3d22h 172.31.5.200 ip-172-31-8-128.us-east-2.compute.internal alertmanager=robusta-kube-prometheus-st-alertmanager,app.kubernetes.io/instance=robusta-kube-prometheus-st-alertmanager,app.kubernetes.io/managed-by=prometheus-operator,app.kubernetes.io/name=alertmanager,app.kubernetes.io/version=0.26.0,apps.kubernetes.io/pod-index=0,controller-revision-hash=alertmanager-robusta-kube-prometheus-st-alertmanager-57cd7fb46f,statefulset.kubernetes.io/pod-name=alertmanager-robusta-kube-prometheus-st-alertmanager-0 -default analytics-exporter-fast-8cf8c9446-6rqwc 0/1 CrashLoopBackOff 1061 (2m51s ago) 3d18h 172.31.15.122 ip-172-31-8-128.us-east-2.compute.internal app=analytics-exporter-fast,pod-template-hash=8cf8c9446 -default customer-relations-webapp-5d98ffcfd-nj5gs 0/1 ImagePullBackOff 0 3d18h 172.31.14.171 ip-172-31-8-128.us-east-2.compute.internal app=customer-relations,pod-template-hash=5d98ffcfd,visualize=true -default db-certs-authenticator-7ffd769f48-d9pxl 0/1 CrashLoopBackOff 886 (69s ago) 3d18h 172.31.3.214 ip-172-31-8-128.us-east-2.compute.internal app=flask,pod-template-hash=7ffd769f48 -default java-api-checker-9pj7k 0/1 Error 0 3d18h 172.31.12.200 ip-172-31-8-128.us-east-2.compute.internal batch.kubernetes.io/controller-uid=ea3f2c52-3382-4cbc-8958-41832511a3e7,batch.kubernetes.io/job-name=java-api-checker,controller-uid=ea3f2c52-3382-4cbc-8958-41832511a3e7,job-name=java-api-checker -default java-api-checker-vzm7z 0/1 Error 0 3d18h 172.31.13.205 ip-172-31-8-128.us-east-2.compute.internal batch.kubernetes.io/controller-uid=ea3f2c52-3382-4cbc-8958-41832511a3e7,batch.kubernetes.io/job-name=java-api-checker,controller-uid=ea3f2c52-3382-4cbc-8958-41832511a3e7,job-name=java-api-checker -default logging-agent 0/1 Init:CrashLoopBackOff 1067 (15s ago) 3d18h 172.31.1.249 ip-172-31-8-128.us-east-2.compute.internal -default prometheus-robusta-kube-prometheus-st-prometheus-0 2/2 Running 0 3d22h 172.31.11.168 ip-172-31-8-128.us-east-2.compute.internal app.kubernetes.io/instance=robusta-kube-prometheus-st-prometheus,app.kubernetes.io/managed-by=prometheus-operator,app.kubernetes.io/name=prometheus,app.kubernetes.io/version=2.48.1,apps.kubernetes.io/pod-index=0,controller-revision-hash=prometheus-robusta-kube-prometheus-st-prometheus-55d87c869b,operator.prometheus.io/name=robusta-kube-prometheus-st-prometheus,operator.prometheus.io/shard=0,prometheus=robusta-kube-prometheus-st-prometheus,statefulset.kubernetes.io/pod-name=prometheus-robusta-kube-prometheus-st-prometheus-0 -default robusta-forwarder-89f44d49b-fxtrh 1/1 Running 0 3d22h 172.31.3.106 ip-172-31-8-128.us-east-2.compute.internal app=robusta-forwarder,pod-template-hash=89f44d49b -default robusta-kube-prometheus-st-operator-7fc5db7f4d-dr46l 1/1 Running 0 3d22h 172.31.6.195 ip-172-31-8-128.us-east-2.compute.internal app.kubernetes.io/instance=robusta,app.kubernetes.io/managed-by=Helm,app.kubernetes.io/part-of=kube-prometheus-stack,app.kubernetes.io/version=55.7.0,app=kube-prometheus-stack-operator,chart=kube-prometheus-stack-55.7.0,heritage=Helm,pod-template-hash=7fc5db7f4d,release=robusta -default robusta-prometheus-node-exporter-t2b5k 1/1 Running 0 3d22h 172.31.8.128 ip-172-31-8-128.us-east-2.compute.internal app.kubernetes.io/component=metrics,app.kubernetes.io/instance=robusta,app.kubernetes.io/managed-by=Helm,app.kubernetes.io/name=prometheus-node-exporter,app.kubernetes.io/part-of=prometheus-node-exporter,app.kubernetes.io/version=1.7.0,controller-revision-hash=7bf445876b,helm.sh/chart=prometheus-node-exporter-4.24.0,jobLabel=node-exporter,pod-template-generation=1,release=robusta -default search-engine-service 0/1 Running 0 3d18h 172.31.11.151 ip-172-31-8-128.us-east-2.compute.internal -kube-system aws-node-m47xg 2/2 Running 0 25d 172.31.8.128 ip-172-31-8-128.us-east-2.compute.internal app.kubernetes.io/instance=aws-vpc-cni,app.kubernetes.io/name=aws-node,controller-revision-hash=54f5998898,k8s-app=aws-node,pod-template-generation=1 -kube-system ebs-csi-controller-7bb676b68d-cs2gx 6/6 Running 0 25d 172.31.12.254 ip-172-31-8-128.us-east-2.compute.internal app.kubernetes.io/component=csi-driver,app.kubernetes.io/managed-by=EKS,app.kubernetes.io/name=aws-ebs-csi-driver,app.kubernetes.io/version=1.35.0,app=ebs-csi-controller,pod-template-hash=7bb676b68d -kube-system ebs-csi-node-pgrvq 3/3 Running 0 25d 172.31.2.194 ip-172-31-8-128.us-east-2.compute.internal app.kubernetes.io/component=csi-driver,app.kubernetes.io/managed-by=EKS,app.kubernetes.io/name=aws-ebs-csi-driver,app.kubernetes.io/version=1.35.0,app=ebs-csi-node,controller-revision-hash=6bc69bc4b9,pod-template-generation=1 -kube-system eks-pod-identity-agent-vgz8h 1/1 Running 0 25d 172.31.8.128 ip-172-31-8-128.us-east-2.compute.internal app.kubernetes.io/instance=eks-pod-identity-agent,app.kubernetes.io/name=eks-pod-identity-agent,controller-revision-hash=74bcb67854,pod-template-generation=1 -kube-system kube-proxy-l7vqp 1/1 Running 0 25d 172.31.8.128 ip-172-31-8-128.us-east-2.compute.internal controller-revision-hash=6b64cc6947,k8s-app=kube-proxy,pod-template-generation=1 -sock-shop user-5bd96d75fb-ld8xv 1/1 Running 0 3d18h 172.31.0.106 ip-172-31-8-128.us-east-2.compute.internal name=user,pod-template-hash=5bd96d75fb -sock-shop user-db-5dc5c5f488-dw6xw 1/1 Running 0 3d18h 172.31.0.66 ip-172-31-8-128.us-east-2.compute.internal name=user-db,pod-template-hash=5dc5c5f488 - -stderr: diff --git a/tests/llm/fixtures/test_ask_holmes/1_how_many_pods/kubectl_get_node.txt b/tests/llm/fixtures/test_ask_holmes/1_how_many_pods/kubectl_get_node.txt deleted file mode 100644 index 3c951eea..00000000 --- a/tests/llm/fixtures/test_ask_holmes/1_how_many_pods/kubectl_get_node.txt +++ /dev/null @@ -1,3 +0,0 @@ -{"toolset_name":"kubernetes/core","tool_name":"kubectl_find_resource","match_params":{"kind":"node","keyword":"ip-172-31-8-128.us-east-2.compute.internal"}} -NAME STATUS ROLES AGE VERSION INTERNAL-IP EXTERNAL-IP OS-IMAGE KERNEL-VERSION CONTAINER-RUNTIME LABELS -ip-172-31-8-128.us-east-2.compute.internal Ready 25d v1.30.4-eks-a737599 172.31.8.128 3.147.70.176 Amazon Linux 2 5.10.225-213.878.amzn2.x86_64 containerd://1.7.11 beta.kubernetes.io/arch=amd64,beta.kubernetes.io/instance-type=t3.medium,beta.kubernetes.io/os=linux,eks.amazonaws.com/capacityType=ON_DEMAND,eks.amazonaws.com/nodegroup-image=ami-067ed4d12a282fb31,eks.amazonaws.com/nodegroup=nicolas-node-group,failure-domain.beta.kubernetes.io/region=us-east-2,failure-domain.beta.kubernetes.io/zone=us-east-2a,k8s.io/cloud-provider-aws=02bcd7cbb8e774ede4606ab79260ae31,kubernetes.io/arch=amd64,kubernetes.io/hostname=ip-172-31-8-128.us-east-2.compute.internal,kubernetes.io/os=linux,node.kubernetes.io/instance-type=t3.medium,topology.ebs.csi.aws.com/zone=us-east-2a,topology.k8s.aws/zone-id=use2-az1,topology.kubernetes.io/region=us-east-2,topology.kubernetes.io/zone=us-east-2a diff --git a/tests/llm/fixtures/test_ask_holmes/1_how_many_pods/kubectl_get_pod.txt b/tests/llm/fixtures/test_ask_holmes/1_how_many_pods/kubectl_get_pod.txt deleted file mode 100644 index ebe27926..00000000 --- a/tests/llm/fixtures/test_ask_holmes/1_how_many_pods/kubectl_get_pod.txt +++ /dev/null @@ -1,2 +0,0 @@ -{"toolset_name":"kubernetes/core","tool_name":"kubectl_find_resource","match_params":{"kind":"pod","name":"ip-172-31-8-128.us-east-2.compute.internal"}} -Error from server (NotFound): pods "ip-172-31-8-128.us-east-2.compute.internal" not found diff --git a/tests/llm/fixtures/test_ask_holmes/1_how_many_pods/kubectl_top_pods.txt b/tests/llm/fixtures/test_ask_holmes/1_how_many_pods/kubectl_top_pods.txt deleted file mode 100644 index a29a1535..00000000 --- a/tests/llm/fixtures/test_ask_holmes/1_how_many_pods/kubectl_top_pods.txt +++ /dev/null @@ -1,6 +0,0 @@ -{"toolset_name":"kubernetes/live-metrics","tool_name":"kubectl_top_pods","match_params":{}} -Command `kubectl top pods -A` failed with return code 1 -stdout: - -stderr: -error: Metrics API not available diff --git a/tests/llm/fixtures/test_ask_holmes/20_long_log_file_search/test_case.yaml b/tests/llm/fixtures/test_ask_holmes/20_long_log_file_search/test_case.yaml index 955f2a9a..cc689e93 100644 --- a/tests/llm/fixtures/test_ask_holmes/20_long_log_file_search/test_case.yaml +++ b/tests/llm/fixtures/test_ask_holmes/20_long_log_file_search/test_case.yaml @@ -1,4 +1,5 @@ user_prompt: "Was there any 'Peer down' problem reported for network-connector pod?" -expected_output: The "Peer down" issue was reported in the logs of the `network-connector` pod. +expected_output: + - The "Peer down" issue was reported in the logs of the `network-connector` pod. before_test: kubectl apply -f ./long-log-pod.yaml after_test: kubectl delete -f ./long-log-pod.yaml diff --git a/tests/llm/fixtures/test_ask_holmes/21_job_fail_curl_no_svc_account/test_case.yaml b/tests/llm/fixtures/test_ask_holmes/21_job_fail_curl_no_svc_account/test_case.yaml index 0eb0e513..a6685f28 100644 --- a/tests/llm/fixtures/test_ask_holmes/21_job_fail_curl_no_svc_account/test_case.yaml +++ b/tests/llm/fixtures/test_ask_holmes/21_job_fail_curl_no_svc_account/test_case.yaml @@ -1,12 +1,6 @@ user_prompt: "Why get-data job failed?" -expected_output: | - The `get-data` job failed because the `curl` command in the `curl-container` exited with code 6. - This exit code indicates a "Could not resolve host" error, meaning the specified URL - `http://kldsvklnsdiogjerkngernjk23432i90erfefjio.com/fsdkhjsadfjhsadfjsadu343` could not be resolved to an IP address. - - To fix this, ensure the URL is correct and accessible from the cluster. - If it's a typo or a temporary issue with the DNS, correcting the URL or ensuring DNS resolution should resolve the problem. - +expected_output: + - The specified URL `http://kldsvklnsdiogjerkngernjk23432i90erfefjio.com/fsdkhjsadfjhsadfjsadu343` could not be resolved to an IP address retrieval_context: - "The job ran and returned with an error: curl: (6) Could not resolve host: kldsvklnsdiogjerkngernjk23432i90erfefjio.com" before_test: | @@ -19,5 +13,5 @@ after_test: | kubectl delete -f ./job-service-account-event/get-data-job.yaml kubectl delete -f ./job-service-account-event/get-data-service-account.yaml evaluation: - faithfulness: 0.3 + correctness: 0.3 context: 0.3 diff --git a/tests/llm/fixtures/test_ask_holmes/22_high_latency_dbi_down/test_case.yaml b/tests/llm/fixtures/test_ask_holmes/22_high_latency_dbi_down/test_case.yaml index 0f2dc4ce..6e6fbb45 100644 --- a/tests/llm/fixtures/test_ask_holmes/22_high_latency_dbi_down/test_case.yaml +++ b/tests/llm/fixtures/test_ask_holmes/22_high_latency_dbi_down/test_case.yaml @@ -1,7 +1,8 @@ # this requires first creating a secret w/ credentials for the RDS database # for instructions, see "Credentials for HolmesGPT RDS AI Test" in 1Password, but change the secret name to db-secrets-for-medium and the db to "promotions-db-for-medium" user_prompt: "Why is there high latency with the customer-orders-for-medium deployment?" -expected_output: "The result says the reason for failure is promotions-db-for-medium.cp8rwothwarq.us-east-2.rds.amazonaws.com is down, stopped, or in unavailable state." +expected_output: + - The reason for failure is promotions-db-for-medium.cp8rwothwarq.us-east-2.rds.amazonaws.com is down, stopped, or in unavailable state. before_test: | [ "$(aws rds describe-db-instances --db-instance-identifier promotions-db-for-medium --query "DBInstances[0].DBInstanceStatus" --output text)" != "stopped" ] && aws rds stop-db-instance --db-instance-identifier promotions-db-for-medium || echo "RDS instance is already stopped." kubectl apply -f ./slow-rds-query-for-medium.yaml @@ -9,4 +10,4 @@ before_test: | after_test: | kubectl delete -f ./slow-rds-query-for-medium.yaml evaluation: - faithfulness: 0 + correctness: 0 diff --git a/tests/llm/fixtures/test_ask_holmes/23_app_error_in_current_logs/test_case.yaml b/tests/llm/fixtures/test_ask_holmes/23_app_error_in_current_logs/test_case.yaml index f9140355..d5a3153a 100644 --- a/tests/llm/fixtures/test_ask_holmes/23_app_error_in_current_logs/test_case.yaml +++ b/tests/llm/fixtures/test_ask_holmes/23_app_error_in_current_logs/test_case.yaml @@ -1,9 +1,7 @@ user_prompt: "Why is meme-deployment not working" -expected_output: | - The `meme-deployment` is experiencing issues due to a DNS resolution error. - The application is trying to connect to the host `memcom`, but it fails with the error "No address associated with hostname." - This indicates that the hostname `memcom` cannot be resolved to an IP address. - +expected_output: + - The `meme-deployment` is experiencing issues due to a DNS resolution error. + - This indicates that the hostname `memcom` cannot be resolved to an IP address. retrieval_context: - "There is a DNS resolution error" - "hostname `memcom` cannot be resolved to an IP address" diff --git a/tests/llm/fixtures/test_ask_holmes/24_misconfigured_pvc/test_case.yaml b/tests/llm/fixtures/test_ask_holmes/24_misconfigured_pvc/test_case.yaml index 72386713..48b22ce0 100644 --- a/tests/llm/fixtures/test_ask_holmes/24_misconfigured_pvc/test_case.yaml +++ b/tests/llm/fixtures/test_ask_holmes/24_misconfigured_pvc/test_case.yaml @@ -1,7 +1,9 @@ user_prompt: "What is the issue with redis?" -expected_output: "The PersistentVolumeClaim (pvc) is misconfigured and that the storage class standard-v2 does not exist" +expected_output: + - The PersistentVolumeClaim (pvc) is misconfigured + - That the storage class standard-v2 does not exist evaluation: - faithfulness: 0 + correctness: 0 before_test: | kubectl apply -f https://raw.githubusercontent.com/robusta-dev/kubernetes-demos/main/pvc-misconfiguration/redis-deployment.yaml after_test: | diff --git a/tests/llm/fixtures/test_ask_holmes/25_misconfigured_ingress_class/test_case.yaml b/tests/llm/fixtures/test_ask_holmes/25_misconfigured_ingress_class/test_case.yaml index eac17d77..40af6f50 100644 --- a/tests/llm/fixtures/test_ask_holmes/25_misconfigured_ingress_class/test_case.yaml +++ b/tests/llm/fixtures/test_ask_holmes/25_misconfigured_ingress_class/test_case.yaml @@ -1,9 +1,7 @@ user_prompt: "Other processes in the cluster are unable to reach my-http-app-deployment, why? Investigate ingress class issues" -expected_output: | - 1. The `my-http-ingress` is using the `example-ingress-class`, which doesn't exist in the cluster. This could prevent routing to `my-http-app-deployment`. - 2. Ensure the ingress class `example-ingress-class` is defined and configured correctly. - 3. Verify that the ingress controller is running and associated with the correct ingress class. +expected_output: + - Ensure the ingress class `example-ingress-class` does not exist in the cluster before_test: kubectl apply -f./ingress_with_class.yaml after_test: kubectl delete -f./ingress_with_class.yaml evaluation: - faithfulness: 0 + correctness: 0 diff --git a/tests/llm/fixtures/test_investigate/01_oom_kill/test_case.yaml b/tests/llm/fixtures/test_investigate/01_oom_kill/test_case.yaml index b86b3027..0b66cdc7 100644 --- a/tests/llm/fixtures/test_investigate/01_oom_kill/test_case.yaml +++ b/tests/llm/fixtures/test_investigate/01_oom_kill/test_case.yaml @@ -1,34 +1,8 @@ -expected_output: | - The alert indicates that the pod `oomkill-deployment-696dbdbf67-d47z6` in the `default` namespace was experiencing a `CrashLoopBackOff` state, which typically means the pod was repeatedly crashing and restarting. - - # Investigation - I attempted to gather more information by describing the pod and fetching its logs, but the pod `oomkill-deployment-696dbdbf67-d47z6` could not be found in the cluster. This suggests that the pod may have been deleted or the deployment was scaled down after the alert was triggered. - - # Conclusions and Possible Root causes - 1. *Pod Deletion*: The pod might have been manually deleted or automatically removed by a scaling operation or deployment update. - 2. *Deployment Update*: A new deployment or update might have replaced the pod, leading to its removal. - 3. *Resource Constraints*: If the pod was indeed crashing due to resource constraints (e.g., OOMKilled), it might have been removed as part of a cleanup process. - - # Next Steps - 1. Verify if the deployment `oomkill-deployment` is still present and check its current status using: - ```bash - kubectl get deployment oomkill-deployment -n default - ``` - 2. If the deployment exists, check the replica set and any new pods created: - ```bash - kubectl get rs -n default - kubectl get pods -n default - ``` - 3. Review any recent changes or events related to the deployment: - ```bash - kubectl describe deployment oomkill-deployment -n default - ``` - 4. If resource constraints were suspected, consider increasing the resources allocated to the pods in the deployment configuration. - +expected_output: + - Pod `oomkill-deployment-696dbdbf67-d47z6` is experiencing a `CrashLoopBackOff` retrieval_context: - There is a total of 12 pods on node ip-172-31-8-128.us-east-2.compute.internal - There are 5 pods in running state - 7 pods are not running as indicated by the STATUS column evaluation: - answer_relevancy: 0 - faithfulness: 0 + correctness: 0 diff --git a/tests/llm/fixtures/test_investigate/02_crashloop_backoff/test_case.yaml b/tests/llm/fixtures/test_investigate/02_crashloop_backoff/test_case.yaml index e9bebc6c..172915ee 100644 --- a/tests/llm/fixtures/test_investigate/02_crashloop_backoff/test_case.yaml +++ b/tests/llm/fixtures/test_investigate/02_crashloop_backoff/test_case.yaml @@ -1,28 +1,11 @@ -expected_output: | - Alert Explanation - - The pod inventory-management-api in the default namespace is experiencing a CrashLoopBackOff due to repeated failures in starting its container. - - Investigation - - - The pod inventory-management-api is using the nginx image and attempts to execute a command wge which is not found in the container's $PATH. This results in a StartError with an exit code 128. +expected_output: + - Pod inventory-management-api is experiencing a CrashLoopBackOff + - The pod attempts to execute a command wge which is not found + - Mention `wge` might be a typo and the intended command may be `wget` +retrieval_context: + - The pod inventory-management-api is using the nginx image + - The pod attempts to execute a command wge which is not found in the container's $PATH. This results in a StartError with an exit code 128. - The pod has restarted 4 times and is currently in a Waiting state with the reason CrashLoopBackOff. - - The error message indicates: exec: "wge": executable file not found in $PATH. - - Conclusions and Possible Root Causes - - - Incorrect Command: The command wge is likely a typo or misconfiguration. The intended command might be wget, which is a common utility for downloading files. - - Missing Executable: The required executable is not present in the container's image, leading to the failure to start the container. - - Next Steps - - - Correct the command in the pod's configuration to use the correct executable, likely wget, if that was the intended command. - - Verify the container image includes all necessary executables and dependencies. - - Update the pod configuration and redeploy the pod with the corrected command. You can do this by editing the pod's YAML configuration and applying the changes: - - ``` - kubectl edit pod inventory-management-api -n default - ``` - + - "The error message indicates: exec: 'wge': executable file not found in $PATH." evaluation: - faithfulness: 0 + correctness: 0 diff --git a/tests/llm/fixtures/test_investigate/03_cpu_throttling/test_case.yaml b/tests/llm/fixtures/test_investigate/03_cpu_throttling/test_case.yaml index 5c737af7..4ea66e0f 100644 --- a/tests/llm/fixtures/test_investigate/03_cpu_throttling/test_case.yaml +++ b/tests/llm/fixtures/test_investigate/03_cpu_throttling/test_case.yaml @@ -1,24 +1,10 @@ -expected_output: | - Alert Explanation - - The frontend-service pod in the default namespace is experiencing 100% CPU throttling for the stress container. This means the container is being limited by its CPU quota, potentially affecting performance. - Investigation - - - The frontend-service pod is running on node ip-172-31-21-139.us-east-2.compute.internal. - - The stress container is configured with CPU limits and requests set to 10m. - - The container is running a command to stress the CPU with 100 threads, which is likely causing the CPU throttling. - - The pod is in a Running state with no restarts, indicating it hasn't crashed or been restarted recently. - - Current logs show the stress command is actively running, but there are no previous logs available, suggesting the container hasn't been restarted. - - Conclusions and Possible Root Causes - - - CPU Limit Configuration: The stress container has a CPU limit of 10m, which is insufficient for the workload it is trying to perform (100 CPU threads). This is causing the CPU throttling. - - Application Design: The application is designed to use a high number of CPU threads, which is not compatible with the current CPU limits. - - Next Steps - - - Consider removing or increasing the CPU limit for the stress container if the application requires more CPU resources. This can be done by editing the pod's resource configuration: - `kubectl edit pod frontend-service -n default` - - Adjust the resources.limits.cpu value to a higher number or remove it entirely if appropriate. +expected_output: + - The frontend-service pod in the default namespace is experiencing 100% CPU throttling for the stress container + - Suggest increasing the CPU limit +retrieval_context: + - The frontend-service pod is running on node ip-172-31-21-139.us-east-2.compute.internal + - The stress container is configured with CPU limits and requests set to 10m + - The container is running a command to stress the CPU with 100 threads + - The pod is in a Running state with no restarts evaluation: - faithfulness: 0 + correctness: 0 diff --git a/tests/llm/fixtures/test_investigate/04_image_pull_backoff/test_case.yaml b/tests/llm/fixtures/test_investigate/04_image_pull_backoff/test_case.yaml index 0017f9ac..7ae8c64c 100644 --- a/tests/llm/fixtures/test_investigate/04_image_pull_backoff/test_case.yaml +++ b/tests/llm/fixtures/test_investigate/04_image_pull_backoff/test_case.yaml @@ -1,20 +1,7 @@ -expected_output: | - # Alert Explanation - The pod `customer-relations-webapp-5d98ffcfd-tz4nc` in the `default` namespace is experiencing a warning event due to a failure in pulling the Docker image `yourcompany/crw:latest`. - - # Investigation - - The pod is in a `Pending` state with the container `crw-main-container` waiting due to `ErrImagePull`. - - The events indicate repeated failures to pull the image `yourcompany/crw:latest` due to "pull access denied" and "insufficient_scope: authorization failed". - - The logs for the current and previous container states are unavailable because the container has not started successfully. - - # Conclusions and Possible Root Causes - 1. *Image Pull Authorization Failure*: The image `yourcompany/crw:latest` cannot be pulled because of authorization issues. This could be due to missing or incorrect credentials for accessing the Docker registry. - 2. *Repository Access Issues*: The repository might not exist or the image tag `latest` might be incorrect, leading to the failure in resolving the image reference. - - # Next Steps - - Verify the credentials used for accessing the Docker registry and ensure they have the necessary permissions. - - Check if the image `yourcompany/crw:latest` exists in the Docker registry and is accessible. - - Update the Kubernetes secret used for Docker registry credentials if necessary. - - Consider specifying a specific image tag instead of `latest` to avoid potential issues with image resolution. +expected_output: + - The pod fails in pulling the Docker image `yourcompany/crw:latest` + - The answer should prompt the user to verify that the docker image exists +retrieval_context: + - The pod is in a `Pending` state with the container `crw-main-container` waiting due to `ErrImagePull` evaluation: - faithfulness: 0 + correctness: 0 diff --git a/tests/llm/fixtures/test_investigate/05_crashpod/test_case.yaml b/tests/llm/fixtures/test_investigate/05_crashpod/test_case.yaml index 4916b3b0..98ebed6c 100644 --- a/tests/llm/fixtures/test_investigate/05_crashpod/test_case.yaml +++ b/tests/llm/fixtures/test_investigate/05_crashpod/test_case.yaml @@ -1,29 +1,5 @@ -expected_output: | - The pod `db-certs-authenticator-7ffd769f48-vndgd` in the `default` namespace is experiencing a `CrashLoopBackOff`, indicating that the application within the pod is repeatedly crashing and Kubernetes is attempting to restart it. - - # Investigation - 1. The runbook for `KubePodCrashLooping` suggests checking pod events, logs, and configuration for issues such as resource constraints, misconfigurations, or missing dependencies. - 2. Attempts to describe the pod and fetch logs failed because the pod `db-certs-authenticator-7ffd769f48-vndgd` is not currently found in the cluster. - 3. A search for the pod across all namespaces confirmed that it is not present. - - # Conclusions and Possible Root Causes - 1. *Pod Deletion*: The pod `db-certs-authenticator-7ffd769f48-vndgd` may have been deleted after the alert was triggered, which is why it is not found in the cluster. - 2. *Transient Issue*: The crash loop might have been a transient issue that resolved itself, leading to the pod being replaced or removed. - 3. *Configuration or Resource Issue*: If the pod was deleted due to a configuration or resource issue, it might recur when the pod is redeployed. - - # Next Steps - 1. **Verify Deployment**: Check the deployment `db-certs-authenticator` to ensure it is correctly configured and running. - ```bash - kubectl get deployment db-certs-authenticator -n default - ``` - 2. **Check for New Pods**: Look for any new pods created by the deployment to replace the crashed pod. - ```bash - kubectl get pods -n default -l app=db-certs-authenticator - ``` - 3. **Review Deployment Configuration**: Ensure that the deployment configuration does not have issues that could cause pods to crash. - ```bash - kubectl describe deployment db-certs-authenticator -n default - ``` - 4. **Monitor for Recurrence**: Keep an eye on the deployment for any new crash loop alerts to identify if the issue persists. +expected_output: + - The pod `db-certs-authenticator-7ffd769f48-vndgd` in the `default` namespace is experiencing a `CrashLoopBackOff` + - Mention the runbook `KubePodCrashLooping` evaluation: - faithfulness: 0 + correctness: 0 diff --git a/tests/llm/fixtures/test_investigate/06_job_failure/test_case.yaml b/tests/llm/fixtures/test_investigate/06_job_failure/test_case.yaml index 83075d3b..b7ca4874 100644 --- a/tests/llm/fixtures/test_investigate/06_job_failure/test_case.yaml +++ b/tests/llm/fixtures/test_investigate/06_job_failure/test_case.yaml @@ -1,18 +1,6 @@ -expected_output: | - The job `java-api-checker` in the `default` namespace has failed because it reached the specified backoff limit. - - # Investigation - The job `java-api-checker` was configured with a `backoffLimit` of 1, meaning it would retry once upon failure. The job failed twice, as indicated by the events and logs. The logs from the pod `java-api-checker-slp89` show repeated network exceptions indicating that all attempts to connect to the database at `prod-db:3333` failed due to no available connections and a full queue. - - # Conclusions and Possible Root Causes - 1. *Database Connectivity Issues*: The primary issue is a failure to connect to the database at `prod-db:3333`. This could be due to network issues, the database being down, or the database being overloaded. - 2. *Configuration Error*: The job's configuration might not be suitable for the current database load, leading to connection failures. - 3. *Resource Limitations*: The database might be hitting its connection limits or queue size, preventing new connections. - - # Next Steps - 1. Verify the status and availability of the database at `prod-db:3333`. - 2. Check network connectivity between the Kubernetes cluster and the database. - 3. Review and possibly increase the database's connection limits or queue size. - 4. Consider adjusting the job's retry logic or backoff limit to allow more retries if transient issues are expected. +expected_output: + - The job `java-api-checker` in the `default` namespace has failed because it reached the specified backoff limit. + - The job `java-api-checker` was configured with a `backoffLimit` of 1 + - The logs from the pod `java-api-checker-slp89` show repeated network exceptions indicating that all attempts to connect to the database at `prod-db:3333` failed evaluation: - faithfulness: 0 + correctness: 0 diff --git a/tests/llm/fixtures/test_investigate/07_job_syntax_error/test_case.yaml b/tests/llm/fixtures/test_investigate/07_job_syntax_error/test_case.yaml index 3542506c..192fc4a4 100644 --- a/tests/llm/fixtures/test_investigate/07_job_syntax_error/test_case.yaml +++ b/tests/llm/fixtures/test_investigate/07_job_syntax_error/test_case.yaml @@ -1,28 +1,6 @@ -expected_output: | - The `KubeJobFailed` alert indicates that the Kubernetes job `product-data-scraper` in the `default` namespace failed to complete successfully. - - # Investigation - 1. The job `product-data-scraper` was configured to complete 5 times but failed all 5 attempts, reaching the backoff limit of 4. - 2. The job's pods (`product-data-scraper-2fqrh`, `product-data-scraper-5fdj6`, etc.) were created successfully but encountered errors during execution. - 3. The logs from one of the pods, `product-data-scraper-2fqrh`, show a syntax error: `/bin/sh: syntax error: unexpected "fi"`. - - # Conclusions and Possible Root Causes - 1. *Syntax Error in Command*: The job's container command has a syntax error, specifically an unexpected "fi", which is causing the job to fail. - 2. *Job Configuration*: The job's backoff limit was reached due to repeated failures, indicating that the job's command needs correction. - - # Next Steps - 1. **Fix the Command**: Correct the syntax error in the job's command. Ensure that the shell script logic is correct. - 2. **Redeploy the Job**: After fixing the command, delete the failed job and redeploy it to clear the alert. - 3. **Verify**: Monitor the job to ensure it completes successfully without hitting the backoff limit. - - To fix the job, you can edit the job's YAML to correct the command and then apply the changes: - ```bash - kubectl edit job product-data-scraper -n default - ``` - After editing, delete the existing job and create a new one: - ```bash - kubectl delete job product-data-scraper -n default - kubectl apply -f .yaml - ``` +expected_output: + - The Kubernetes job `product-data-scraper` failed to complete successfully + - The job's pods (`product-data-scraper-2fqrh`, `product-data-scraper-5fdj6`, etc.) were created successfully but encountered errors during execution. + - "The logs from one of the pods, `product-data-scraper-2fqrh`, show a syntax error: `/bin/sh: syntax error: unexpected 'fi'`" evaluation: - faithfulness: 0 + correctness: 0 diff --git a/tests/llm/fixtures/test_investigate/08_memory_pressure/test_case.yaml b/tests/llm/fixtures/test_investigate/08_memory_pressure/test_case.yaml index 3463aa3d..d72d4c65 100644 --- a/tests/llm/fixtures/test_investigate/08_memory_pressure/test_case.yaml +++ b/tests/llm/fixtures/test_investigate/08_memory_pressure/test_case.yaml @@ -1,21 +1,5 @@ -expected_output: | - The node `ip-172-31-40-111.us-east-2.compute.internal` is running at 100% of its pod capacity, which is set to 17 pods. This alert indicates that the node cannot schedule any additional pods. - - # Investigation - - The node `ip-172-31-40-111.us-east-2.compute.internal` is currently running 17 pods, which is its maximum capacity. - - Several pods are pending, indicating they cannot be scheduled due to the node's capacity limit. - - The node is labeled as `t3.medium`, which typically has a lower pod capacity compared to larger instance types. - - Some pods on this node do not have CPU or memory requests defined, which can lead to inefficient resource utilization. - - # Conclusions and Possible Root Causes - 1. *Node Capacity Limit Reached*: The node has reached its maximum pod capacity of 17, preventing new pods from being scheduled. - 2. *Resource Requests Not Defined*: The absence of CPU and memory requests for some pods may lead to suboptimal scheduling and resource allocation. - 3. *Instance Type Limitation*: The `t3.medium` instance type may inherently support fewer pods due to its resource constraints. - - # Next Steps - 1. **Redistribute Pods**: Consider redistributing pods to other nodes with available capacity. - 2. **Increase Node Capacity**: Add more nodes to the cluster or upgrade existing nodes to a larger instance type to accommodate more pods. - 3. **Define Resource Requests**: Ensure all pods have defined CPU and memory requests to improve scheduling efficiency. - 4. **Adjust Kubelet Configuration**: If feasible, increase the `maxPods` setting in the Kubelet configuration for nodes that can handle more pods. +expected_output: + - The node `ip-172-31-40-111.us-east-2.compute.internal` is running at 100% of its pod capacity, which is set to 17 pods + - The node cannot schedule any additional pods. evaluation: - faithfulness: 0 + correctness: 0 diff --git a/tests/llm/fixtures/test_investigate/09_high_latency/test_case.yaml b/tests/llm/fixtures/test_investigate/09_high_latency/test_case.yaml index d4d5f8bf..4be18dee 100644 --- a/tests/llm/fixtures/test_investigate/09_high_latency/test_case.yaml +++ b/tests/llm/fixtures/test_investigate/09_high_latency/test_case.yaml @@ -1,18 +1,6 @@ -expected_output: | - The `HighLatencyForCustomerCheckout` alert is triggered because HTTP requests to the `/checkout` endpoint in the `customer-orders` application are taking longer than 3 seconds to complete. - - # Investigation - 1. **Deployment Details**: The `customer-orders` deployment in the `default` namespace is running with one replica. The deployment uses the `fastapi-app` container from the image `us-central1-docker.pkg.dev/genuine-flight-317411/devel/rds-demo:nicolas-no-db`. - 2. **Pod Logs**: The logs from the `fastapi-app` container show that database calls are taking a significant amount of time, ranging from 5 to 10 seconds, which is contributing to the high latency of the checkout page rendering. - - # Conclusions and Possible Root Causes - 1. *Database Latency*: The primary cause of the high latency appears to be the time taken for database calls. The logs indicate that the database call `sp_CheckUserNotifications` is taking between 5 to 10 seconds to complete, which directly impacts the response time of the `/checkout` endpoint. - 2. *Database Connection Issues*: The logs mention connecting to a database at `None`, which might indicate a misconfiguration or an issue with the database connection string. - - # Next Steps - 1. **Database Optimization**: Investigate the stored procedure `sp_CheckUserNotifications` for potential optimizations to reduce execution time. - 2. **Configuration Check**: Verify the database connection settings in the application configuration to ensure they are correct and pointing to the intended database. - 3. **Monitoring and Metrics**: Implement detailed monitoring on the database to identify any bottlenecks or resource constraints that could be causing delays. - 4. **Scaling**: Consider scaling the database resources or the application if the load is higher than expected. +expected_output: + - There is a HighLatencyForCustomerCheckout alert + - It is triggered by HTTP requests to the `/checkout` endpoint in the `customer-orders` application which are taking longer than 3 seconds to complete + - logs indicate that the database call `sp_CheckUserNotifications` is taking between 5 to 10 seconds to complete evaluation: - faithfulness: 0 + correctness: 0 diff --git a/tests/llm/fixtures/test_investigate/10_kube_controller_manager_down/investigate_request.json b/tests/llm/fixtures/test_investigate/10_kube_controller_manager_down/investigate_request.json new file mode 100644 index 00000000..5e740e4d --- /dev/null +++ b/tests/llm/fixtures/test_investigate/10_kube_controller_manager_down/investigate_request.json @@ -0,0 +1,15 @@ +{ + "source": "prometheus", + "title": "KubeControllerManagerDown", + "description": "", + "subject": {}, + "context": { + "issue_type": "KubeControllerManagerDown", + "robusta_issue_id": "KubeControllerManagerDown-467f7b1923c9cfcc-2024-11-29 06:30:39.202000+00:00", + "source": "prometheus" + }, + "source_instance_id": "ApiRequest", + "include_tool_calls": true, + "include_tool_call_results": true, + "prompt_template": "builtin://generic_investigation.jinja2" +} diff --git a/tests/llm/fixtures/test_investigate/10_kube_controller_manager_down/issue_data.json b/tests/llm/fixtures/test_investigate/10_kube_controller_manager_down/issue_data.json new file mode 100644 index 00000000..e743ccbb --- /dev/null +++ b/tests/llm/fixtures/test_investigate/10_kube_controller_manager_down/issue_data.json @@ -0,0 +1,31 @@ +{ + "id": "KubeControllerManagerDown-467f7b1923c9cfcc-2024-11-29 06:30:39.202000+00:00", + "name": "KubeControllerManagerDown", + "source_type": "prometheus", + "source_instance_id": "http://localhost:9093", + "url": "http://robusta-kube-prometheus-st-prometheus.default:9090/graph?g0.expr=absent%28up%7Bjob%3D%22kube-controller-manager%22%7D+%3D%3D+1%29&g0.tab=1", + "raw": { + "status": "active", + "labels": { + "alertname": "KubeControllerManagerDown", + "prometheus": "default/robusta-kube-prometheus-st-prometheus", + "severity": "critical" + }, + "annotations": { + "description": "KubeControllerManager has disappeared from Prometheus target discovery.", + "runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubecontrollermanagerdown", + "summary": "Target disappeared from Prometheus target discovery." + }, + "startsAt": "2024-11-29 06:30:39.202000+00:00", + "endsAt": "2024-11-29 09:18:09.202000+00:00", + "generatorURL": "http://robusta-kube-prometheus-st-prometheus.default:9090/graph?g0.expr=absent%28up%7Bjob%3D%22kube-controller-manager%22%7D+%3D%3D+1%29&g0.tab=1", + "fingerprint": "467f7b1923c9cfcc", + "unique_id": "KubeControllerManagerDown-467f7b1923c9cfcc-2024-11-29 06:30:39.202000+00:00", + "duration": "2:47:30", + "name": "KubeControllerManagerDown", + "definition": "absent(up{job=\"kube-controller-manager\"} == 1)" + }, + "presentation_status": null, + "presentation_key_metadata": "*Severity*: critical\n*Start Time*: 2024-11-29 06:30:39 UTC\n*Duration*: 2 hours", + "presentation_all_metadata": "*Labels:*\n• *alertname*: KubeControllerManagerDown\n• *prometheus*: default/robusta-kube-prometheus-st-prometheus\n• *severity*: critical\n*Annotations:*\n• *description*: KubeControllerManager has disappeared from Prometheus target discovery.\n• *runbook_url*: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubecontrollermanagerdown\n• *summary*: Target disappeared from Prometheus target discovery.\n" +} diff --git a/tests/llm/fixtures/test_investigate/10_kube_controller_manager_down/kubectl_get_all.txt b/tests/llm/fixtures/test_investigate/10_kube_controller_manager_down/kubectl_get_all.txt new file mode 100644 index 00000000..8e9fb22a --- /dev/null +++ b/tests/llm/fixtures/test_investigate/10_kube_controller_manager_down/kubectl_get_all.txt @@ -0,0 +1,6 @@ +{"toolset_name":"kubernetes/core","tool_name":"kubectl_get_all","match_params":{"kind":"node"}} +stdout: +NAME STATUS ROLES AGE VERSION INTERNAL-IP EXTERNAL-IP OS-IMAGE KERNEL-VERSION CONTAINER-RUNTIME LABELS +kind-control-plane Ready control-plane 11d v1.31.2 172.18.0.2 Debian GNU/Linux 12 (bookworm) 6.1.0-27-amd64 containerd://1.7.18 beta.kubernetes.io/arch=amd64,beta.kubernetes.io/os=linux,kubernetes.io/arch=amd64,kubernetes.io/hostname=kind-control-plane,kubernetes.io/os=linux,node-role.kubernetes.io/control-plane= + +stderr: diff --git a/tests/llm/fixtures/test_investigate/10_kube_controller_manager_down/resource_instructions.json b/tests/llm/fixtures/test_investigate/10_kube_controller_manager_down/resource_instructions.json new file mode 100644 index 00000000..41c749b6 --- /dev/null +++ b/tests/llm/fixtures/test_investigate/10_kube_controller_manager_down/resource_instructions.json @@ -0,0 +1,6 @@ +{ + "instructions": [ + "Check if the cluster is a managed cluster like EKS by fetching nodes and looking at their labels. If so, tell the user this is likely a known false positive in the kube-prometheus-stack alert because Prometheus can't scrape the scheduler which is managed by the cloud provider. On the other hand, if this is a self-managed Kubernetes, either the scheduler is really down (unlikely) or it is running but Prometheus can't scrape it." + ], + "documents": [] +} diff --git a/tests/llm/fixtures/test_investigate/10_kube_controller_manager_down/test_case.yaml b/tests/llm/fixtures/test_investigate/10_kube_controller_manager_down/test_case.yaml new file mode 100644 index 00000000..2b1e4ecc --- /dev/null +++ b/tests/llm/fixtures/test_investigate/10_kube_controller_manager_down/test_case.yaml @@ -0,0 +1,3 @@ +expected_output: + - Prometheus can't scrape the control plane + - this is not an alert that requires your attention diff --git a/tests/llm/fixtures/test_investigate/11_KubeDeploymentReplicasMismatch/investigate_request.json b/tests/llm/fixtures/test_investigate/11_KubeDeploymentReplicasMismatch/investigate_request.json new file mode 100644 index 00000000..fae72113 --- /dev/null +++ b/tests/llm/fixtures/test_investigate/11_KubeDeploymentReplicasMismatch/investigate_request.json @@ -0,0 +1,22 @@ +{ + "source": "prometheus", + "title": "KubeDeploymentReplicasMismatch", + "description": "", + "subject": { + "name": "robusta-kube-state-metrics-8667fd9775-kbsp7", + "namespace": "default", + "kind": "pod", + "node": null, + "container": "kube-state-metrics", + "cluster": null + }, + "context": { + "issue_type": "KubeDeploymentReplicasMismatch", + "robusta_issue_id": "KubeDeploymentReplicasMismatch-412453fa9d1e6ca0-2024-11-29 08:39:57.669000+00:00", + "source": "prometheus" + }, + "source_instance_id": "ApiRequest", + "include_tool_calls": true, + "include_tool_call_results": true, + "prompt_template": "builtin://generic_investigation.jinja2" +} diff --git a/tests/llm/fixtures/test_investigate/11_KubeDeploymentReplicasMismatch/issue_data.json b/tests/llm/fixtures/test_investigate/11_KubeDeploymentReplicasMismatch/issue_data.json new file mode 100644 index 00000000..6e482060 --- /dev/null +++ b/tests/llm/fixtures/test_investigate/11_KubeDeploymentReplicasMismatch/issue_data.json @@ -0,0 +1,39 @@ +{ + "id": "KubeDeploymentReplicasMismatch-412453fa9d1e6ca0-2024-11-29 08:39:57.669000+00:00", + "name": "KubeDeploymentReplicasMismatch", + "source_type": "prometheus", + "source_instance_id": "http://localhost:9093", + "url": "http://robusta-kube-prometheus-st-prometheus.default:9090/graph?g0.expr=%28kube_deployment_spec_replicas%7Bjob%3D%22kube-state-metrics%22%2Cnamespace%3D~%22.%2A%22%7D+%3E+kube_deployment_status_replicas_available%7Bjob%3D%22kube-state-metrics%22%2Cnamespace%3D~%22.%2A%22%7D%29+and+%28changes%28kube_deployment_status_replicas_updated%7Bjob%3D%22kube-state-metrics%22%2Cnamespace%3D~%22.%2A%22%7D%5B10m%5D%29+%3D%3D+0%29&g0.tab=1", + "raw": { + "status": "active", + "labels": { + "alertname": "KubeDeploymentReplicasMismatch", + "container": "kube-state-metrics", + "deployment": "payment-processing-worker", + "endpoint": "http", + "instance": "10.244.0.3:8080", + "job": "kube-state-metrics", + "namespace": "default", + "pod": "robusta-kube-state-metrics-8667fd9775-kbsp7", + "prometheus": "default/robusta-kube-prometheus-st-prometheus", + "service": "robusta-kube-state-metrics", + "severity": "warning" + }, + "annotations": { + "description": "Deployment default/payment-processing-worker has not matched the expected number of replicas for longer than 15 minutes.", + "runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedeploymentreplicasmismatch", + "summary": "Deployment has not matched the expected number of replicas." + }, + "startsAt": "2024-11-29 08:39:57.669000+00:00", + "endsAt": "2024-11-29 08:55:57.669000+00:00", + "generatorURL": "http://robusta-kube-prometheus-st-prometheus.default:9090/graph?g0.expr=%28kube_deployment_spec_replicas%7Bjob%3D%22kube-state-metrics%22%2Cnamespace%3D~%22.%2A%22%7D+%3E+kube_deployment_status_replicas_available%7Bjob%3D%22kube-state-metrics%22%2Cnamespace%3D~%22.%2A%22%7D%29+and+%28changes%28kube_deployment_status_replicas_updated%7Bjob%3D%22kube-state-metrics%22%2Cnamespace%3D~%22.%2A%22%7D%5B10m%5D%29+%3D%3D+0%29&g0.tab=1", + "fingerprint": "412453fa9d1e6ca0", + "unique_id": "KubeDeploymentReplicasMismatch-412453fa9d1e6ca0-2024-11-29 08:39:57.669000+00:00", + "duration": "0:16:00", + "name": "KubeDeploymentReplicasMismatch", + "definition": "(kube_deployment_spec_replicas{job=\"kube-state-metrics\",namespace=~\".*\"} > kube_deployment_status_replicas_available{job=\"kube-state-metrics\",namespace=~\".*\"}) and (changes(kube_deployment_status_replicas_updated{job=\"kube-state-metrics\",namespace=~\".*\"}[10m]) == 0)" + }, + "presentation_status": null, + "presentation_key_metadata": "*Severity*: warning\n*Start Time*: 2024-11-29 08:39:57 UTC\n*Duration*: 16 minutes", + "presentation_all_metadata": "*Labels:*\n• *alertname*: KubeDeploymentReplicasMismatch\n• *container*: kube-state-metrics\n• *deployment*: payment-processing-worker\n• *endpoint*: http\n• *instance*: 10.244.0.3:8080\n• *job*: kube-state-metrics\n• *namespace*: default\n• *pod*: robusta-kube-state-metrics-8667fd9775-kbsp7\n• *prometheus*: default/robusta-kube-prometheus-st-prometheus\n• *service*: robusta-kube-state-metrics\n• *severity*: warning\n*Annotations:*\n• *description*: Deployment default/payment-processing-worker has not matched the expected number of replicas for longer than 15 minutes.\n• *runbook_url*: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedeploymentreplicasmismatch\n• *summary*: Deployment has not matched the expected number of replicas.\n" +} diff --git a/tests/llm/fixtures/test_investigate/11_KubeDeploymentReplicasMismatch/kubectl_describe.txt b/tests/llm/fixtures/test_investigate/11_KubeDeploymentReplicasMismatch/kubectl_describe.txt new file mode 100644 index 00000000..636596b1 --- /dev/null +++ b/tests/llm/fixtures/test_investigate/11_KubeDeploymentReplicasMismatch/kubectl_describe.txt @@ -0,0 +1,42 @@ +{"toolset_name":"kubernetes/core","tool_name":"kubectl_describe","match_params":{"kind": "deployment", "name": "payment-processing-worker", "namespace": "default"}} +stdout: +Name: payment-processing-worker +Namespace: default +CreationTimestamp: Fri, 29 Nov 2024 09:24:29 +0100 +Labels: +Annotations: deployment.kubernetes.io/revision: 1 +Selector: app=payment-processing-worker +Replicas: 1 desired | 1 updated | 1 total | 0 available | 1 unavailable +StrategyType: RollingUpdate +MinReadySeconds: 0 +RollingUpdateStrategy: 25% max unavailable, 25% max surge +Pod Template: + Labels: app=payment-processing-worker + Containers: + payment-processing-container: + Image: bash + Port: + Host Port: + Command: + /bin/sh + Args: + -c + if [[ -z "${DEPLOY_ENV}" ]]; then echo Environment variable DEPLOY_ENV is undefined ; else while true; do echo hello; sleep 10;done; fi + Environment: + Mounts: + Volumes: + Node-Selectors: + Tolerations: +Conditions: + Type Status Reason + ---- ------ ------ + Available False MinimumReplicasUnavailable + Progressing False ProgressDeadlineExceeded +OldReplicaSets: +NewReplicaSet: payment-processing-worker-747ccfb9db (1/1 replicas created) +Events: + Type Reason Age From Message + ---- ------ ---- ---- ------- + Normal ScalingReplicaSet 28m deployment-controller Scaled up replica set payment-processing-worker-747ccfb9db to 1 + +stderr: diff --git a/tests/llm/fixtures/test_investigate/11_KubeDeploymentReplicasMismatch/kubectl_describe_pod.txt b/tests/llm/fixtures/test_investigate/11_KubeDeploymentReplicasMismatch/kubectl_describe_pod.txt new file mode 100644 index 00000000..7d62aad4 --- /dev/null +++ b/tests/llm/fixtures/test_investigate/11_KubeDeploymentReplicasMismatch/kubectl_describe_pod.txt @@ -0,0 +1,64 @@ +{"toolset_name":"kubernetes/core","tool_name":"kubectl_describe","match_params":{"kind":"pod","name":"payment-processing-worker-747ccfb9db-pd6wz","namespace":"default"}} +stdout: +Name: payment-processing-worker-747ccfb9db-pd6wz +Namespace: default +Priority: 0 +Service Account: default +Node: kind-control-plane/172.18.0.2 +Start Time: Fri, 29 Nov 2024 09:24:29 +0100 +Labels: app=payment-processing-worker + pod-template-hash=747ccfb9db +Annotations: +Status: Running +IP: 10.244.0.15 +IPs: + IP: 10.244.0.15 +Controlled By: ReplicaSet/payment-processing-worker-747ccfb9db +Containers: + payment-processing-container: + Container ID: containerd://5714b7a193dd4bd5cfeede3e2cd37a53ba88ce5f292974d5aaeda1c1d0b8167f + Image: bash + Image ID: docker.io/library/bash@sha256:4bbfbe07eceeed5ab9136b37faf4f5cff3c28a339087ce068a76f2c1733054e8 + Port: + Host Port: + Command: + /bin/sh + Args: + -c + if [[ -z "${DEPLOY_ENV}" ]]; then echo Environment variable DEPLOY_ENV is undefined ; else while true; do echo hello; sleep 10;done; fi + State: Waiting + Reason: CrashLoopBackOff + Last State: Terminated + Reason: Completed + Exit Code: 0 + Started: Fri, 29 Nov 2024 14:01:30 +0100 + Finished: Fri, 29 Nov 2024 14:01:30 +0100 + Ready: False + Restart Count: 59 + Environment: + Mounts: + /var/run/secrets/kubernetes.io/serviceaccount from kube-api-access-lfjgv (ro) +Conditions: + Type Status + PodReadyToStartContainers True + Initialized True + Ready False + ContainersReady False + PodScheduled True +Volumes: + kube-api-access-lfjgv: + Type: Projected (a volume that contains injected data from multiple sources) + TokenExpirationSeconds: 3607 + ConfigMapName: kube-root-ca.crt + ConfigMapOptional: + DownwardAPI: true +QoS Class: BestEffort +Node-Selectors: +Tolerations: node.kubernetes.io/not-ready:NoExecute op=Exists for 300s + node.kubernetes.io/unreachable:NoExecute op=Exists for 300s +Events: + Type Reason Age From Message + ---- ------ ---- ---- ------- + Warning BackOff 73s (x1289 over 4h41m) kubelet Back-off restarting failed container payment-processing-container in pod payment-processing-worker-747ccfb9db-pd6wz_default(b9cc2b01-9250-4360-941f-40e522628f6c) + +stderr: diff --git a/tests/llm/fixtures/test_investigate/11_KubeDeploymentReplicasMismatch/kubectl_get_all.txt b/tests/llm/fixtures/test_investigate/11_KubeDeploymentReplicasMismatch/kubectl_get_all.txt new file mode 100644 index 00000000..e806771a --- /dev/null +++ b/tests/llm/fixtures/test_investigate/11_KubeDeploymentReplicasMismatch/kubectl_get_all.txt @@ -0,0 +1,54 @@ +{"toolset_name":"kubernetes/core","tool_name":"kubectl_get_all","match_params":{"kind":"pod"}} +stdout: +NAMESPACE NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES LABELS +default alertmanager-robusta-kube-prometheus-st-alertmanager-0 2/2 Running 16 (6h50m ago) 10d 10.244.0.14 kind-control-plane alertmanager=robusta-kube-prometheus-st-alertmanager,app.kubernetes.io/instance=robusta-kube-prometheus-st-alertmanager,app.kubernetes.io/managed-by=prometheus-operator,app.kubernetes.io/name=alertmanager,app.kubernetes.io/version=0.26.0,apps.kubernetes.io/pod-index=0,controller-revision-hash=alertmanager-robusta-kube-prometheus-st-alertmanager-57cd7fb46f,statefulset.kubernetes.io/pod-name=alertmanager-robusta-kube-prometheus-st-alertmanager-0 +default analytics-exporter-fast-76897854c-4m49l 0/1 CrashLoopBackOff 59 (32s ago) 4h39m 10.244.0.37 kind-control-plane app=analytics-exporter-fast,pod-template-hash=76897854c +default customer-orders-6c69574678-jl5wk 1/2 CreateContainerConfigError 0 4h41m 10.244.0.19 kind-control-plane app=customer-orders,pod-template-hash=6c69574678 +default customer-orders-for-medium-7744d956fb-dd4hh 1/2 CreateContainerConfigError 2 (6h50m ago) 2d6h 10.244.0.7 kind-control-plane app=customer-orders-for-medium,pod-template-hash=7744d956fb +default customer-relations-webapp-7c67c65579-kdf9r 0/1 ImagePullBackOff 0 4h41m 10.244.0.18 kind-control-plane app=customer-relations,pod-template-hash=7c67c65579,visualize=true +default customer-relations-webapp-7c67c65579-l8rdf 0/1 ImagePullBackOff 0 4h41m 10.244.0.16 kind-control-plane app=customer-relations,pod-template-hash=7c67c65579,visualize=true +default customer-relations-webapp-7c67c65579-m5b2s 0/1 ImagePullBackOff 0 4h41m 10.244.0.17 kind-control-plane app=customer-relations,pod-template-hash=7c67c65579,visualize=true +default db-certs-authenticator-757f89d977-xjnbs 0/1 CrashLoopBackOff 49 (49s ago) 4h39m 10.244.0.40 kind-control-plane app=flask,pod-template-hash=757f89d977 +default get-data-ldxkt 0/1 Error 0 2d6h kind-control-plane batch.kubernetes.io/controller-uid=5315b81a-e300-47c0-8115-8e8e2316c910,batch.kubernetes.io/job-name=get-data,controller-uid=5315b81a-e300-47c0-8115-8e8e2316c910,job-name=get-data +default java-api-checker-frj2z 0/1 Error 0 4h39m 10.244.0.39 kind-control-plane batch.kubernetes.io/controller-uid=e5d16a17-76e1-43a0-82bc-897d284b3bf7,batch.kubernetes.io/job-name=java-api-checker,controller-uid=e5d16a17-76e1-43a0-82bc-897d284b3bf7,job-name=java-api-checker +default java-api-checker-svhxl 0/1 Error 0 4h40m 10.244.0.35 kind-control-plane batch.kubernetes.io/controller-uid=e5d16a17-76e1-43a0-82bc-897d284b3bf7,batch.kubernetes.io/job-name=java-api-checker,controller-uid=e5d16a17-76e1-43a0-82bc-897d284b3bf7,job-name=java-api-checker +default logging-agent 0/1 Init:CrashLoopBackOff 59 (3m44s ago) 4h41m 10.244.0.34 kind-control-plane +default my-http-app-deployment-75cdcff8b4-tgscj 1/1 Running 2 (6h50m ago) 2d5h 10.244.0.5 kind-control-plane app=my-http-app,pod-template-hash=75cdcff8b4 +default network-connector 1/1 Running 0 4h39m 10.244.0.41 kind-control-plane +default payment-processing-worker-747ccfb9db-pd6wz 0/1 CrashLoopBackOff 59 (4m14s ago) 4h41m 10.244.0.15 kind-control-plane app=payment-processing-worker,pod-template-hash=747ccfb9db +default prometheus-robusta-kube-prometheus-st-prometheus-0 2/2 Running 16 (6h50m ago) 10d 10.244.0.13 kind-control-plane app.kubernetes.io/instance=robusta-kube-prometheus-st-prometheus,app.kubernetes.io/managed-by=prometheus-operator,app.kubernetes.io/name=prometheus,app.kubernetes.io/version=2.48.1,apps.kubernetes.io/pod-index=0,controller-revision-hash=prometheus-robusta-kube-prometheus-st-prometheus-55d87c869b,operator.prometheus.io/name=robusta-kube-prometheus-st-prometheus,operator.prometheus.io/shard=0,prometheus=robusta-kube-prometheus-st-prometheus,statefulset.kubernetes.io/pod-name=prometheus-robusta-kube-prometheus-st-prometheus-0 +default robusta-forwarder-5c5fdbbf57-vcm8t 1/1 Running 8 (6h50m ago) 10d 10.244.0.4 kind-control-plane app=robusta-forwarder,pod-template-hash=5c5fdbbf57 +default robusta-grafana-8588b8fb85-fcm7f 3/3 Running 24 (6h50m ago) 10d 10.244.0.9 kind-control-plane app.kubernetes.io/instance=robusta,app.kubernetes.io/name=grafana,pod-template-hash=8588b8fb85 +default robusta-holmes-78fcd58947-djt9g 1/1 Running 10 (6h50m ago) 7d5h 10.244.0.6 kind-control-plane app=holmes,pod-template-hash=78fcd58947 +default robusta-kube-prometheus-st-operator-6885c8f675-56dbg 1/1 Running 16 (6h49m ago) 10d 10.244.0.12 kind-control-plane app.kubernetes.io/instance=robusta,app.kubernetes.io/managed-by=Helm,app.kubernetes.io/part-of=kube-prometheus-stack,app.kubernetes.io/version=55.7.0,app=kube-prometheus-stack-operator,chart=kube-prometheus-stack-55.7.0,heritage=Helm,pod-template-hash=6885c8f675,release=robusta +default robusta-kube-state-metrics-8667fd9775-kbsp7 1/1 Running 15 (6h50m ago) 10d 10.244.0.3 kind-control-plane app.kubernetes.io/component=metrics,app.kubernetes.io/instance=robusta,app.kubernetes.io/managed-by=Helm,app.kubernetes.io/name=kube-state-metrics,app.kubernetes.io/part-of=kube-state-metrics,app.kubernetes.io/version=2.10.1,helm.sh/chart=kube-state-metrics-5.15.3,pod-template-hash=8667fd9775,release=robusta +default robusta-prometheus-node-exporter-g5kxh 1/1 Running 8 (6h50m ago) 10d 172.18.0.2 kind-control-plane app.kubernetes.io/component=metrics,app.kubernetes.io/instance=robusta,app.kubernetes.io/managed-by=Helm,app.kubernetes.io/name=prometheus-node-exporter,app.kubernetes.io/part-of=prometheus-node-exporter,app.kubernetes.io/version=1.7.0,controller-revision-hash=7b4f94f6bf,helm.sh/chart=prometheus-node-exporter-4.24.0,jobLabel=node-exporter,pod-template-generation=1,release=robusta +default robusta-runner-746d848db9-f8ns9 1/1 Running 17 (6h50m ago) 10d 10.244.0.2 kind-control-plane app=robusta-runner,pod-template-hash=746d848db9,robustaComponent=runner +default search-engine-service 0/1 Running 0 4h40m 10.244.0.36 kind-control-plane +default user-profile-import 0/1 Pending 0 4h40m +default user-profile-resources-659d4dd659-q7xnw 0/1 Pending 0 4h40m app=user-profile-resources,pod-template-hash=659d4dd659 +kube-system coredns-7c65d6cfc9-c24ls 1/1 Running 9 (6h50m ago) 11d 10.244.0.10 kind-control-plane k8s-app=kube-dns,pod-template-hash=7c65d6cfc9 +kube-system coredns-7c65d6cfc9-cphtd 1/1 Running 9 (6h50m ago) 11d 10.244.0.11 kind-control-plane k8s-app=kube-dns,pod-template-hash=7c65d6cfc9 +kube-system etcd-kind-control-plane 1/1 Running 9 (6h50m ago) 11d 172.18.0.2 kind-control-plane component=etcd,tier=control-plane +kube-system kindnet-wns4v 1/1 Running 9 (6h50m ago) 11d 172.18.0.2 kind-control-plane app=kindnet,controller-revision-hash=f87fcbc4c,k8s-app=kindnet,pod-template-generation=1,tier=node +kube-system kube-apiserver-kind-control-plane 1/1 Running 9 (6h50m ago) 11d 172.18.0.2 kind-control-plane component=kube-apiserver,tier=control-plane +kube-system kube-controller-manager-kind-control-plane 1/1 Running 9 (6h50m ago) 11d 172.18.0.2 kind-control-plane component=kube-controller-manager,tier=control-plane +kube-system kube-proxy-fhk5k 1/1 Running 9 (6h50m ago) 11d 172.18.0.2 kind-control-plane controller-revision-hash=77987969cc,k8s-app=kube-proxy,pod-template-generation=1 +kube-system kube-scheduler-kind-control-plane 1/1 Running 9 (6h50m ago) 11d 172.18.0.2 kind-control-plane component=kube-scheduler,tier=control-plane +local-path-storage local-path-provisioner-57c5987fd4-gdjv2 1/1 Running 18 (6h49m ago) 11d 10.244.0.8 kind-control-plane app=local-path-provisioner,pod-template-hash=57c5987fd4 +sock-shop carts-db-676c6b5865-5rsfq 1/1 Running 0 4h41m 10.244.0.21 kind-control-plane name=carts-db,pod-template-hash=676c6b5865 +sock-shop carts-f54496f6d-kl2dj 1/1 Running 0 4h41m 10.244.0.20 kind-control-plane name=carts,pod-template-hash=f54496f6d +sock-shop catalogue-db-c948fd796-6mgbm 1/1 Running 0 4h41m 10.244.0.23 kind-control-plane name=catalogue-db,pod-template-hash=c948fd796 +sock-shop catalogue-f7687cb4-crpnn 1/1 Running 0 4h41m 10.244.0.22 kind-control-plane name=catalogue,pod-template-hash=f7687cb4 +sock-shop front-end-6bc5876d74-qkqwc 1/1 Running 0 4h41m 10.244.0.24 kind-control-plane name=front-end,pod-template-hash=6bc5876d74 +sock-shop orders-697cb4957d-t6slr 1/1 Running 0 4h41m 10.244.0.25 kind-control-plane name=orders,pod-template-hash=697cb4957d +sock-shop orders-db-658fc79675-5sdzp 1/1 Running 0 4h41m 10.244.0.26 kind-control-plane name=orders-db,pod-template-hash=658fc79675 +sock-shop payment-84bbbfd97f-4stdr 1/1 Running 0 4h41m 10.244.0.27 kind-control-plane name=payment,pod-template-hash=84bbbfd97f +sock-shop queue-master-566bfb8644-gjmh6 1/1 Running 0 4h41m 10.244.0.28 kind-control-plane name=queue-master,pod-template-hash=566bfb8644 +sock-shop rabbitmq-556cb847-4c26z 2/2 Running 0 4h41m 10.244.0.29 kind-control-plane name=rabbitmq,pod-template-hash=556cb847 +sock-shop session-db-59cc4b855f-7t59g 1/1 Running 0 4h41m 10.244.0.30 kind-control-plane name=session-db,pod-template-hash=59cc4b855f +sock-shop shipping-c4d77b86d-rzlwp 1/1 Running 0 4h41m 10.244.0.31 kind-control-plane name=shipping,pod-template-hash=c4d77b86d +sock-shop user-574f64957c-hxgd4 1/1 Running 0 4h41m 10.244.0.32 kind-control-plane name=user,pod-template-hash=574f64957c +sock-shop user-db-7f98f68489-tb2dx 1/1 Running 0 4h41m 10.244.0.33 kind-control-plane name=user-db,pod-template-hash=7f98f68489 + +stderr: diff --git a/tests/llm/fixtures/test_investigate/11_KubeDeploymentReplicasMismatch/kubectl_get_deployment.txt b/tests/llm/fixtures/test_investigate/11_KubeDeploymentReplicasMismatch/kubectl_get_deployment.txt new file mode 100644 index 00000000..a9f46724 --- /dev/null +++ b/tests/llm/fixtures/test_investigate/11_KubeDeploymentReplicasMismatch/kubectl_get_deployment.txt @@ -0,0 +1,6 @@ +{"toolset_name":"kubernetes/core","tool_name":"kubectl_get","match_params":{"kind": "deployment", "name": "payment-processing-worker", "namespace": "default"}} +stdout: +NAME READY UP-TO-DATE AVAILABLE AGE CONTAINERS IMAGES SELECTOR LABELS +payment-processing-worker 0/1 1 0 28m payment-processing-container bash app=payment-processing-worker + +stderr: diff --git a/tests/llm/fixtures/test_investigate/11_KubeDeploymentReplicasMismatch/kubectl_get_pod.txt b/tests/llm/fixtures/test_investigate/11_KubeDeploymentReplicasMismatch/kubectl_get_pod.txt new file mode 100644 index 00000000..ee0c3ace --- /dev/null +++ b/tests/llm/fixtures/test_investigate/11_KubeDeploymentReplicasMismatch/kubectl_get_pod.txt @@ -0,0 +1,6 @@ +{"toolset_name":"kubernetes/core","tool_name":"kubectl_get","match_params":{"kind": "pod", "name": "robusta-kube-state-metrics-8667fd9775-kbsp7", "namespace": "default"}} +stdout: +NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES LABELS +robusta-kube-state-metrics-8667fd9775-kbsp7 1/1 Running 15 (157m ago) 10d 10.244.0.3 kind-control-plane app.kubernetes.io/component=metrics,app.kubernetes.io/instance=robusta,app.kubernetes.io/managed-by=Helm,app.kubernetes.io/name=kube-state-metrics,app.kubernetes.io/part-of=kube-state-metrics,app.kubernetes.io/version=2.10.1,helm.sh/chart=kube-state-metrics-5.15.3,pod-template-hash=8667fd9775,release=robusta + +stderr: diff --git a/tests/llm/fixtures/test_investigate/11_KubeDeploymentReplicasMismatch/kubectl_logs.txt b/tests/llm/fixtures/test_investigate/11_KubeDeploymentReplicasMismatch/kubectl_logs.txt new file mode 100644 index 00000000..772dee4e --- /dev/null +++ b/tests/llm/fixtures/test_investigate/11_KubeDeploymentReplicasMismatch/kubectl_logs.txt @@ -0,0 +1,5 @@ +{"toolset_name":"kubernetes/core","tool_name":"kubectl_logs","match_params":{"name":"payment-processing-worker-747ccfb9db-pd6wz","namespace":"default"}} +stdout: +Environment variable DEPLOY_ENV is undefined + +stderr: diff --git a/tests/llm/fixtures/test_investigate/11_KubeDeploymentReplicasMismatch/resource_instructions.json b/tests/llm/fixtures/test_investigate/11_KubeDeploymentReplicasMismatch/resource_instructions.json new file mode 100644 index 00000000..9e26dfee --- /dev/null +++ b/tests/llm/fixtures/test_investigate/11_KubeDeploymentReplicasMismatch/resource_instructions.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/tests/llm/fixtures/test_investigate/11_KubeDeploymentReplicasMismatch/test_case.yaml b/tests/llm/fixtures/test_investigate/11_KubeDeploymentReplicasMismatch/test_case.yaml new file mode 100644 index 00000000..3cab0fee --- /dev/null +++ b/tests/llm/fixtures/test_investigate/11_KubeDeploymentReplicasMismatch/test_case.yaml @@ -0,0 +1,5 @@ +expected_output: + - The `payment-processing-worker` deployment has not matched the expected number of replicas for over 15 minutes. + - The `payment-processing-worker` deployment is configured with 1 desired replica, but currently has 0 available replicas. + - "The deployment's condition indicates `Available: False` due to `MinimumReplicasUnavailable` and `Progressing: False` due to `ProgressDeadlineExceeded`" + - The pod `robusta-kube-state-metrics-8667fd9775-kbsp7` is running and has restarted 15 times over the past 10 days, but this is unrelated to the `payment-processing-worker` deployment. diff --git a/tests/llm/fixtures/test_investigate/12_KubePodCrashLooping/investigate_request.json b/tests/llm/fixtures/test_investigate/12_KubePodCrashLooping/investigate_request.json new file mode 100644 index 00000000..aaf669f4 --- /dev/null +++ b/tests/llm/fixtures/test_investigate/12_KubePodCrashLooping/investigate_request.json @@ -0,0 +1,22 @@ +{ + "source": "prometheus", + "title": "KubePodCrashLooping", + "description": "", + "subject": { + "name": "payment-processing-worker-747ccfb9db-pd6wz", + "namespace": "default", + "kind": "pod", + "node": null, + "container": "payment-processing-container", + "cluster": null + }, + "context": { + "issue_type": "KubePodCrashLooping", + "robusta_issue_id": "KubePodCrashLooping-a38bd979bba359db-2024-11-29 08:40:57.669000+00:00", + "source": "prometheus" + }, + "source_instance_id": "ApiRequest", + "include_tool_calls": true, + "include_tool_call_results": true, + "prompt_template": "builtin://generic_investigation.jinja2" +} diff --git a/tests/llm/fixtures/test_investigate/12_KubePodCrashLooping/issue_data.json b/tests/llm/fixtures/test_investigate/12_KubePodCrashLooping/issue_data.json new file mode 100644 index 00000000..75189dfb --- /dev/null +++ b/tests/llm/fixtures/test_investigate/12_KubePodCrashLooping/issue_data.json @@ -0,0 +1,40 @@ +{ + "id": "KubePodCrashLooping-a38bd979bba359db-2024-11-29 08:40:57.669000+00:00", + "name": "KubePodCrashLooping", + "source_type": "prometheus", + "source_instance_id": "http://localhost:9093", + "url": "http://robusta-kube-prometheus-st-prometheus.default:9090/graph?g0.expr=max_over_time%28kube_pod_container_status_waiting_reason%7Bjob%3D%22kube-state-metrics%22%2Cnamespace%3D~%22.%2A%22%2Creason%3D%22CrashLoopBackOff%22%7D%5B5m%5D%29+%3E%3D+1&g0.tab=1", + "raw": { + "status": "active", + "labels": { + "alertname": "KubePodCrashLooping", + "container": "payment-processing-container", + "endpoint": "http", + "instance": "10.244.0.3:8080", + "job": "kube-state-metrics", + "namespace": "default", + "pod": "payment-processing-worker-747ccfb9db-pd6wz", + "prometheus": "default/robusta-kube-prometheus-st-prometheus", + "reason": "CrashLoopBackOff", + "service": "robusta-kube-state-metrics", + "severity": "warning", + "uid": "b9cc2b01-9250-4360-941f-40e522628f6c" + }, + "annotations": { + "description": "Pod default/payment-processing-worker-747ccfb9db-pd6wz (payment-processing-container) is in waiting state (reason: \"CrashLoopBackOff\").", + "runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepodcrashlooping", + "summary": "Pod is crash looping." + }, + "startsAt": "2024-11-29 08:40:57.669000+00:00", + "endsAt": "2024-11-29 09:16:27.669000+00:00", + "generatorURL": "http://robusta-kube-prometheus-st-prometheus.default:9090/graph?g0.expr=max_over_time%28kube_pod_container_status_waiting_reason%7Bjob%3D%22kube-state-metrics%22%2Cnamespace%3D~%22.%2A%22%2Creason%3D%22CrashLoopBackOff%22%7D%5B5m%5D%29+%3E%3D+1&g0.tab=1", + "fingerprint": "a38bd979bba359db", + "unique_id": "KubePodCrashLooping-a38bd979bba359db-2024-11-29 08:40:57.669000+00:00", + "duration": "0:35:30", + "name": "KubePodCrashLooping", + "definition": "max_over_time(kube_pod_container_status_waiting_reason{job=\"kube-state-metrics\",namespace=~\".*\",reason=\"CrashLoopBackOff\"}[5m]) >= 1" + }, + "presentation_status": null, + "presentation_key_metadata": "*Severity*: warning\n*Start Time*: 2024-11-29 08:40:57 UTC\n*Duration*: 35 minutes", + "presentation_all_metadata": "*Labels:*\n• *alertname*: KubePodCrashLooping\n• *container*: payment-processing-container\n• *endpoint*: http\n• *instance*: 10.244.0.3:8080\n• *job*: kube-state-metrics\n• *namespace*: default\n• *pod*: payment-processing-worker-747ccfb9db-pd6wz\n• *prometheus*: default/robusta-kube-prometheus-st-prometheus\n• *reason*: CrashLoopBackOff\n• *service*: robusta-kube-state-metrics\n• *severity*: warning\n• *uid*: b9cc2b01-9250-4360-941f-40e522628f6c\n*Annotations:*\n• *description*: Pod default/payment-processing-worker-747ccfb9db-pd6wz (payment-processing-container) is in waiting state (reason: \"CrashLoopBackOff\").\n• *runbook_url*: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepodcrashlooping\n• *summary*: Pod is crash looping.\n" +} diff --git a/tests/llm/fixtures/test_investigate/12_KubePodCrashLooping/kubectl_describe.txt b/tests/llm/fixtures/test_investigate/12_KubePodCrashLooping/kubectl_describe.txt new file mode 100644 index 00000000..a884efec --- /dev/null +++ b/tests/llm/fixtures/test_investigate/12_KubePodCrashLooping/kubectl_describe.txt @@ -0,0 +1,72 @@ +{"toolset_name":"kubernetes/core","tool_name":"kubectl_describe","match_params":{"kind": "pod", "name": "payment-processing-worker-747ccfb9db-pd6wz", "namespace": "default"}} +stdout: +Name: payment-processing-worker-747ccfb9db-pd6wz +Namespace: default +Priority: 0 +Service Account: default +Node: kind-control-plane/172.18.0.2 +Start Time: Fri, 29 Nov 2024 09:24:29 +0100 +Labels: app=payment-processing-worker + pod-template-hash=747ccfb9db +Annotations: +Status: Running +IP: 10.244.0.15 +IPs: + IP: 10.244.0.15 +Controlled By: ReplicaSet/payment-processing-worker-747ccfb9db +Containers: + payment-processing-container: + Container ID: containerd://fc89df0a97d127b9d9af7e5fa222d79655c679caadb798f92598825cf7b9b6f1 + Image: bash + Image ID: docker.io/library/bash@sha256:4bbfbe07eceeed5ab9136b37faf4f5cff3c28a339087ce068a76f2c1733054e8 + Port: + Host Port: + Command: + /bin/sh + Args: + -c + if [[ -z "${DEPLOY_ENV}" ]]; then echo Environment variable DEPLOY_ENV is undefined ; else while true; do echo hello; sleep 10;done; fi + State: Waiting + Reason: CrashLoopBackOff + Last State: Terminated + Reason: Completed + Exit Code: 0 + Started: Fri, 29 Nov 2024 10:10:59 +0100 + Finished: Fri, 29 Nov 2024 10:10:59 +0100 + Ready: False + Restart Count: 14 + Environment: + Mounts: + /var/run/secrets/kubernetes.io/serviceaccount from kube-api-access-lfjgv (ro) +Conditions: + Type Status + PodReadyToStartContainers True + Initialized True + Ready False + ContainersReady False + PodScheduled True +Volumes: + kube-api-access-lfjgv: + Type: Projected (a volume that contains injected data from multiple sources) + TokenExpirationSeconds: 3607 + ConfigMapName: kube-root-ca.crt + ConfigMapOptional: + DownwardAPI: true +QoS Class: BestEffort +Node-Selectors: +Tolerations: node.kubernetes.io/not-ready:NoExecute op=Exists for 300s + node.kubernetes.io/unreachable:NoExecute op=Exists for 300s +Events: + Type Reason Age From Message + ---- ------ ---- ---- ------- + Normal Scheduled 48m default-scheduler Successfully assigned default/payment-processing-worker-747ccfb9db-pd6wz to kind-control-plane + Normal Pulled 48m kubelet Successfully pulled image "bash" in 1.079s (1.079s including waiting). Image size: 6335174 bytes. + Normal Pulled 48m kubelet Successfully pulled image "bash" in 827ms (5.062s including waiting). Image size: 6335174 bytes. + Normal Pulled 48m kubelet Successfully pulled image "bash" in 822ms (822ms including waiting). Image size: 6335174 bytes. + Normal Created 47m (x4 over 48m) kubelet Created container payment-processing-container + Normal Started 47m (x4 over 48m) kubelet Started container payment-processing-container + Normal Pulled 47m kubelet Successfully pulled image "bash" in 805ms (805ms including waiting). Image size: 6335174 bytes. + Normal Pulling 46m (x5 over 48m) kubelet Pulling image "bash" + Warning BackOff 3m30s (x206 over 48m) kubelet Back-off restarting failed container payment-processing-container in pod payment-processing-worker-747ccfb9db-pd6wz_default(b9cc2b01-9250-4360-941f-40e522628f6c) + +stderr: diff --git a/tests/llm/fixtures/test_investigate/12_KubePodCrashLooping/kubectl_logs.txt b/tests/llm/fixtures/test_investigate/12_KubePodCrashLooping/kubectl_logs.txt new file mode 100644 index 00000000..626ee21f --- /dev/null +++ b/tests/llm/fixtures/test_investigate/12_KubePodCrashLooping/kubectl_logs.txt @@ -0,0 +1,5 @@ +{"toolset_name":"kubernetes/core","tool_name":"kubectl_logs","match_params":{"name": "payment-processing-worker-747ccfb9db-pd6wz", "namespace": "default"}} +stdout: +Environment variable DEPLOY_ENV is undefined + +stderr: diff --git a/tests/llm/fixtures/test_investigate/12_KubePodCrashLooping/kubectl_previous_logs.txt b/tests/llm/fixtures/test_investigate/12_KubePodCrashLooping/kubectl_previous_logs.txt new file mode 100644 index 00000000..49f5f32d --- /dev/null +++ b/tests/llm/fixtures/test_investigate/12_KubePodCrashLooping/kubectl_previous_logs.txt @@ -0,0 +1,5 @@ +{"toolset_name":"kubernetes/core","tool_name":"kubectl_previous_logs","match_params":{"name": "payment-processing-worker-747ccfb9db-pd6wz", "namespace": "default"}} +stdout: +Environment variable DEPLOY_ENV is undefined + +stderr: diff --git a/tests/llm/fixtures/test_investigate/12_KubePodCrashLooping/resource_instructions.json b/tests/llm/fixtures/test_investigate/12_KubePodCrashLooping/resource_instructions.json new file mode 100644 index 00000000..9e26dfee --- /dev/null +++ b/tests/llm/fixtures/test_investigate/12_KubePodCrashLooping/resource_instructions.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/tests/llm/fixtures/test_investigate/12_KubePodCrashLooping/test_case.yaml b/tests/llm/fixtures/test_investigate/12_KubePodCrashLooping/test_case.yaml new file mode 100644 index 00000000..04ad5d3e --- /dev/null +++ b/tests/llm/fixtures/test_investigate/12_KubePodCrashLooping/test_case.yaml @@ -0,0 +1,3 @@ +expected_output: + - The DEPLOY_ENV environment variable is missing +generate_mocks: True diff --git a/tests/llm/fixtures/test_investigate/13_KubePodNotReady/investigate_request.json b/tests/llm/fixtures/test_investigate/13_KubePodNotReady/investigate_request.json new file mode 100644 index 00000000..6fc4434c --- /dev/null +++ b/tests/llm/fixtures/test_investigate/13_KubePodNotReady/investigate_request.json @@ -0,0 +1,22 @@ +{ + "source": "prometheus", + "title": "KubePodNotReady", + "description": "", + "subject": { + "name": "user-profile-import", + "namespace": "default", + "kind": "pod", + "node": null, + "container": null, + "cluster": null + }, + "context": { + "issue_type": "KubePodNotReady", + "robusta_issue_id": "KubePodNotReady-02d9413092785cdd-2024-11-29 08:40:27.669000+00:00", + "source": "prometheus" + }, + "source_instance_id": "ApiRequest", + "include_tool_calls": true, + "include_tool_call_results": true, + "prompt_template": "builtin://generic_investigation.jinja2" +} diff --git a/tests/llm/fixtures/test_investigate/13_KubePodNotReady/issue_data.json b/tests/llm/fixtures/test_investigate/13_KubePodNotReady/issue_data.json new file mode 100644 index 00000000..d321db2a --- /dev/null +++ b/tests/llm/fixtures/test_investigate/13_KubePodNotReady/issue_data.json @@ -0,0 +1,33 @@ +{ + "id": "KubePodNotReady-02d9413092785cdd-2024-11-29 08:40:27.669000+00:00", + "name": "KubePodNotReady", + "source_type": "prometheus", + "source_instance_id": "http://localhost:9093", + "url": "http://robusta-kube-prometheus-st-prometheus.default:9090/graph?g0.expr=sum+by+%28namespace%2C+pod%2C+cluster%29+%28max+by+%28namespace%2C+pod%2C+cluster%29+%28kube_pod_status_phase%7Bjob%3D%22kube-state-metrics%22%2Cnamespace%3D~%22.%2A%22%2Cphase%3D~%22Pending%7CUnknown%7CFailed%22%7D%29+%2A+on+%28namespace%2C+pod%2C+cluster%29+group_left+%28owner_kind%29+topk+by+%28namespace%2C+pod%2C+cluster%29+%281%2C+max+by+%28namespace%2C+pod%2C+owner_kind%2C+cluster%29+%28kube_pod_owner%7Bowner_kind%21%3D%22Job%22%7D%29%29%29+%3E+0&g0.tab=1", + "raw": { + "status": "active", + "labels": { + "alertname": "KubePodNotReady", + "namespace": "default", + "pod": "user-profile-import", + "prometheus": "default/robusta-kube-prometheus-st-prometheus", + "severity": "warning" + }, + "annotations": { + "description": "Pod default/user-profile-import has been in a non-ready state for longer than 15 minutes.", + "runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepodnotready", + "summary": "Pod has been in a non-ready state for more than 15 minutes." + }, + "startsAt": "2024-11-29 08:40:27.669000+00:00", + "endsAt": "2024-11-29 09:17:27.669000+00:00", + "generatorURL": "http://robusta-kube-prometheus-st-prometheus.default:9090/graph?g0.expr=sum+by+%28namespace%2C+pod%2C+cluster%29+%28max+by+%28namespace%2C+pod%2C+cluster%29+%28kube_pod_status_phase%7Bjob%3D%22kube-state-metrics%22%2Cnamespace%3D~%22.%2A%22%2Cphase%3D~%22Pending%7CUnknown%7CFailed%22%7D%29+%2A+on+%28namespace%2C+pod%2C+cluster%29+group_left+%28owner_kind%29+topk+by+%28namespace%2C+pod%2C+cluster%29+%281%2C+max+by+%28namespace%2C+pod%2C+owner_kind%2C+cluster%29+%28kube_pod_owner%7Bowner_kind%21%3D%22Job%22%7D%29%29%29+%3E+0&g0.tab=1", + "fingerprint": "02d9413092785cdd", + "unique_id": "KubePodNotReady-02d9413092785cdd-2024-11-29 08:40:27.669000+00:00", + "duration": "0:37:00", + "name": "KubePodNotReady", + "definition": "sum by (namespace, pod, cluster) (max by (namespace, pod, cluster) (kube_pod_status_phase{job=\"kube-state-metrics\",namespace=~\".*\",phase=~\"Pending|Unknown|Failed\"}) * on (namespace, pod, cluster) group_left (owner_kind) topk by (namespace, pod, cluster) (1, max by (namespace, pod, owner_kind, cluster) (kube_pod_owner{owner_kind!=\"Job\"}))) > 0" + }, + "presentation_status": null, + "presentation_key_metadata": "*Severity*: warning\n*Start Time*: 2024-11-29 08:40:27 UTC\n*Duration*: 37 minutes", + "presentation_all_metadata": "*Labels:*\n• *alertname*: KubePodNotReady\n• *namespace*: default\n• *pod*: user-profile-import\n• *prometheus*: default/robusta-kube-prometheus-st-prometheus\n• *severity*: warning\n*Annotations:*\n• *description*: Pod default/user-profile-import has been in a non-ready state for longer than 15 minutes.\n• *runbook_url*: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepodnotready\n• *summary*: Pod has been in a non-ready state for more than 15 minutes.\n" +} diff --git a/tests/llm/fixtures/test_investigate/13_KubePodNotReady/kubectl_describe.txt b/tests/llm/fixtures/test_investigate/13_KubePodNotReady/kubectl_describe.txt new file mode 100644 index 00000000..65296093 --- /dev/null +++ b/tests/llm/fixtures/test_investigate/13_KubePodNotReady/kubectl_describe.txt @@ -0,0 +1,41 @@ +{"toolset_name":"kubernetes/core","tool_name":"kubectl_describe","match_params":{"kind": "pod", "name": "user-profile-import", "namespace": "default"}} +stdout: +Name: user-profile-import +Namespace: default +Priority: 0 +Service Account: default +Node: +Labels: +Annotations: +Status: Pending +IP: +IPs: +Containers: + nginx: + Image: nginx + Port: + Host Port: + Environment: + Mounts: + /var/run/secrets/kubernetes.io/serviceaccount from kube-api-access-47fsg (ro) +Conditions: + Type Status + PodScheduled False +Volumes: + kube-api-access-47fsg: + Type: Projected (a volume that contains injected data from multiple sources) + TokenExpirationSeconds: 3607 + ConfigMapName: kube-root-ca.crt + ConfigMapOptional: + DownwardAPI: true +QoS Class: BestEffort +Node-Selectors: label=someLabel +Tolerations: node.kubernetes.io/not-ready:NoExecute op=Exists for 300s + node.kubernetes.io/unreachable:NoExecute op=Exists for 300s +Events: + Type Reason Age From Message + ---- ------ ---- ---- ------- + Warning FailedScheduling 48m default-scheduler 0/1 nodes are available: 1 node(s) didn't match Pod's node affinity/selector. preemption: 0/1 nodes are available: 1 Preemption is not helpful for scheduling. + Warning FailedScheduling 33m (x3 over 43m) default-scheduler 0/1 nodes are available: 1 node(s) didn't match Pod's node affinity/selector. preemption: 0/1 nodes are available: 1 Preemption is not helpful for scheduling. + +stderr: diff --git a/tests/llm/fixtures/test_investigate/13_KubePodNotReady/kubectl_events.txt b/tests/llm/fixtures/test_investigate/13_KubePodNotReady/kubectl_events.txt new file mode 100644 index 00000000..014bd45f --- /dev/null +++ b/tests/llm/fixtures/test_investigate/13_KubePodNotReady/kubectl_events.txt @@ -0,0 +1,7 @@ +{"toolset_name":"kubernetes/live-metrics","tool_name":"kubectl_events","match_params":{"resource_type": "pod", "pod_name": "user-profile-import", "namespace": "default"}} +stdout: +LAST SEEN TYPE REASON OBJECT MESSAGE +48m Warning FailedScheduling Pod/user-profile-import 0/1 nodes are available: 1 node(s) didn't match Pod's node affinity/selector. preemption: 0/1 nodes are available: 1 Preemption is not helpful for scheduling. +33m (x3 over 43m) Warning FailedScheduling Pod/user-profile-import 0/1 nodes are available: 1 node(s) didn't match Pod's node affinity/selector. preemption: 0/1 nodes are available: 1 Preemption is not helpful for scheduling. + +stderr: diff --git a/tests/llm/fixtures/test_investigate/13_KubePodNotReady/kubectl_logs.txt b/tests/llm/fixtures/test_investigate/13_KubePodNotReady/kubectl_logs.txt new file mode 100644 index 00000000..c31a05e3 --- /dev/null +++ b/tests/llm/fixtures/test_investigate/13_KubePodNotReady/kubectl_logs.txt @@ -0,0 +1,4 @@ +{"toolset_name":"kubernetes/core","tool_name":"kubectl_logs","match_params":{"name": "user-profile-import", "namespace": "default"}} +stdout: + +stderr: diff --git a/tests/llm/fixtures/test_investigate/13_KubePodNotReady/kubectl_previous_logs.txt b/tests/llm/fixtures/test_investigate/13_KubePodNotReady/kubectl_previous_logs.txt new file mode 100644 index 00000000..f7541481 --- /dev/null +++ b/tests/llm/fixtures/test_investigate/13_KubePodNotReady/kubectl_previous_logs.txt @@ -0,0 +1,4 @@ +{"toolset_name":"kubernetes/live-metrics","tool_name":"kubectl_previous_logs","match_params":{"name": "user-profile-import", "namespace": "default"}} +stdout: + +stderr: diff --git a/tests/llm/fixtures/test_investigate/13_KubePodNotReady/resource_instructions.json b/tests/llm/fixtures/test_investigate/13_KubePodNotReady/resource_instructions.json new file mode 100644 index 00000000..9e26dfee --- /dev/null +++ b/tests/llm/fixtures/test_investigate/13_KubePodNotReady/resource_instructions.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/tests/llm/fixtures/test_investigate/13_KubePodNotReady/test_case.yaml b/tests/llm/fixtures/test_investigate/13_KubePodNotReady/test_case.yaml new file mode 100644 index 00000000..3594c542 --- /dev/null +++ b/tests/llm/fixtures/test_investigate/13_KubePodNotReady/test_case.yaml @@ -0,0 +1,2 @@ +expected_output: + - This must mention insufficient GPU resources diff --git a/tests/llm/fixtures/test_investigate/14_Watchdog/investigate_request.json b/tests/llm/fixtures/test_investigate/14_Watchdog/investigate_request.json new file mode 100644 index 00000000..9d97f345 --- /dev/null +++ b/tests/llm/fixtures/test_investigate/14_Watchdog/investigate_request.json @@ -0,0 +1,15 @@ +{ + "source": "prometheus", + "title": "Watchdog", + "description": "", + "subject": {}, + "context": { + "issue_type": "Watchdog", + "robusta_issue_id": "Watchdog-f482596675d33088-2024-11-29 06:15:32.570000+00:00", + "source": "prometheus" + }, + "source_instance_id": "ApiRequest", + "include_tool_calls": true, + "include_tool_call_results": true, + "prompt_template": "builtin://generic_investigation.jinja2" +} diff --git a/tests/llm/fixtures/test_investigate/14_Watchdog/issue_data.json b/tests/llm/fixtures/test_investigate/14_Watchdog/issue_data.json new file mode 100644 index 00000000..c0d2bf21 --- /dev/null +++ b/tests/llm/fixtures/test_investigate/14_Watchdog/issue_data.json @@ -0,0 +1,31 @@ +{ + "id": "Watchdog-f482596675d33088-2024-11-29 06:15:32.570000+00:00", + "name": "Watchdog", + "source_type": "prometheus", + "source_instance_id": "http://localhost:9093", + "url": "http://robusta-kube-prometheus-st-prometheus.default:9090/graph?g0.expr=vector%281%29&g0.tab=1", + "raw": { + "status": "active", + "labels": { + "alertname": "Watchdog", + "prometheus": "default/robusta-kube-prometheus-st-prometheus", + "severity": "none" + }, + "annotations": { + "description": "This is an alert meant to ensure that the entire alerting pipeline is functional.\nThis alert is always firing, therefore it should always be firing in Alertmanager\nand always fire against a receiver. There are integrations with various notification\nmechanisms that send a notification when this alert is not firing. For example the\n\"DeadMansSnitch\" integration in PagerDuty.\n", + "runbook_url": "https://runbooks.prometheus-operator.dev/runbooks/general/watchdog", + "summary": "An alert that should always be firing to certify that Alertmanager is working properly." + }, + "startsAt": "2024-11-29 06:15:32.570000+00:00", + "endsAt": "2024-11-29 09:18:02.570000+00:00", + "generatorURL": "http://robusta-kube-prometheus-st-prometheus.default:9090/graph?g0.expr=vector%281%29&g0.tab=1", + "fingerprint": "f482596675d33088", + "unique_id": "Watchdog-f482596675d33088-2024-11-29 06:15:32.570000+00:00", + "duration": "3:02:30", + "name": "Watchdog", + "definition": "vector(1)" + }, + "presentation_status": null, + "presentation_key_metadata": "*Severity*: none\n*Start Time*: 2024-11-29 06:15:32 UTC\n*Duration*: 3 hours", + "presentation_all_metadata": "*Labels:*\n• *alertname*: Watchdog\n• *prometheus*: default/robusta-kube-prometheus-st-prometheus\n• *severity*: none\n*Annotations:*\n• *description*: This is an alert meant to ensure that the entire alerting pipeline is functional.\nThis alert is always firing, therefore it should always be firing in Alertmanager\nand always fire against a receiver. There are integrations with various notification\nmechanisms that send a notification when this alert is not firing. For example the\n\"DeadMansSnitch\" integration in PagerDuty.\n\n• *runbook_url*: https://runbooks.prometheus-operator.dev/runbooks/general/watchdog\n• *summary*: An alert that should always be firing to certify that Alertmanager is working properly.\n" +} diff --git a/tests/llm/fixtures/test_investigate/14_Watchdog/resource_instructions.json b/tests/llm/fixtures/test_investigate/14_Watchdog/resource_instructions.json new file mode 100644 index 00000000..9e26dfee --- /dev/null +++ b/tests/llm/fixtures/test_investigate/14_Watchdog/resource_instructions.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/tests/llm/fixtures/test_investigate/14_Watchdog/test_case.yaml b/tests/llm/fixtures/test_investigate/14_Watchdog/test_case.yaml new file mode 100644 index 00000000..247d94ed --- /dev/null +++ b/tests/llm/fixtures/test_investigate/14_Watchdog/test_case.yaml @@ -0,0 +1,4 @@ +expected_output: + - This is not an issue + - The `Watchdog` alert is designed to always be active. It serves as a health check for the alerting pipeline, ensuring that the Alertmanager is functioning correctly. + - This alert should always be firing, and if it stops, it indicates a problem with the alerting system. diff --git a/tests/llm/test_ask_holmes.py b/tests/llm/test_ask_holmes.py index b6b8cfc2..ca7b4039 100644 --- a/tests/llm/test_ask_holmes.py +++ b/tests/llm/test_ask_holmes.py @@ -8,13 +8,12 @@ from holmes.core.tool_calling_llm import LLMResult, ToolCallingLLM from holmes.core.tools import ToolExecutor import tests.llm.utils.braintrust as braintrust_util -from tests.llm.utils.classifiers import get_context_classifier +from tests.llm.utils.classifiers import evaluate_context_usage, evaluate_correctness, evaluate_factuality from tests.llm.utils.commands import after_test, before_test from tests.llm.utils.constants import PROJECT from tests.llm.utils.system import readable_timestamp from tests.llm.utils.mock_toolset import MockToolsets -from autoevals.llm import Factuality from tests.llm.utils.mock_utils import AskHolmesTestCase, MockHelper from tests.llm.utils.system import get_machine_state_tags from os import path @@ -53,13 +52,13 @@ def idfn(val): @pytest.mark.llm @pytest.mark.skipif(not os.environ.get('BRAINTRUST_API_KEY'), reason="BRAINTRUST_API_KEY must be set to run LLM evaluations") @pytest.mark.parametrize("experiment_name, test_case", get_test_cases(), ids=idfn) -def test_ask_holmes_with_braintrust(experiment_name, test_case): +def test_ask_holmes(experiment_name, test_case): bt_helper = braintrust_util.BraintrustEvalHelper(project_name=PROJECT, dataset_name=DATASET_NAME) eval = bt_helper.start_evaluation(experiment_name, name=test_case.id) - eval_factuality = Factuality() + try: before_test(test_case) @@ -76,26 +75,32 @@ def test_ask_holmes_with_braintrust(experiment_name, test_case): output = result.result expected = test_case.expected_output - scores = { - "faithfulness": eval_factuality(output, expected, input=input).score - } + + scores = {} + + if isinstance(expected, list): + scores["correctness"] = evaluate_correctness(output=output, expected_elements=expected).score + else: + scores["faithfulness"] = evaluate_factuality(output=output, expected=expected, input=input).score if len(test_case.retrieval_context) > 0: - evaluate_context_usage = get_context_classifier(test_case.retrieval_context) - scores["context"] = evaluate_context_usage(output, expected, input=input).score + scores["context"] = evaluate_context_usage(output=output, context_items=test_case.retrieval_context, input=input).score bt_helper.end_evaluation( eval=eval, input=input, output=output or "", - expected=expected, + expected=str(expected), id=test_case.id, scores=scores ) print(f"** OUTPUT **\n{output}") print(f"** SCORES **\n{scores}") - assert scores.get("faithfulness") >= test_case.evaluation.faithfulness + if scores.get("faithfulness"): + assert scores.get("faithfulness", 0) >= test_case.evaluation.faithfulness + if scores.get("correctness"): + assert scores.get("correctness", 0) >= test_case.evaluation.correctness assert scores.get("context", 0) >= test_case.evaluation.context @@ -113,7 +118,7 @@ def ask_holmes(test_case:AskHolmesTestCase) -> LLMResult: ai = ToolCallingLLM( tool_executor=tool_executor, max_steps=10, - llm=DefaultLLM("gpt-4o") + llm=DefaultLLM(os.environ.get("MODEL", "gpt-4o")) ) chat_request = ChatRequest(ask=test_case.user_prompt) diff --git a/tests/llm/test_investigate.py b/tests/llm/test_investigate.py index 226a3cf6..7f0a3703 100644 --- a/tests/llm/test_investigate.py +++ b/tests/llm/test_investigate.py @@ -3,7 +3,6 @@ from pathlib import Path from typing import Optional -from autoevals import Factuality import pytest from rich.console import Console @@ -12,7 +11,7 @@ from holmes.core.investigation import investigate_issues from holmes.core.supabase_dal import SupabaseDal from holmes.core.tools import ToolExecutor, ToolsetPattern -from tests.llm.utils.classifiers import get_context_classifier, get_logs_explanation_classifier +from tests.llm.utils.classifiers import evaluate_context_usage, evaluate_correctness, evaluate_factuality, evaluate_previous_logs_mention from tests.llm.utils.constants import PROJECT from tests.llm.utils.system import get_machine_state_tags, readable_timestamp from tests.llm.utils.mock_dal import MockSupabaseDal @@ -46,8 +45,6 @@ def create_tool_executor( return ToolExecutor(mock.mocked_toolsets) - - def get_test_cases(): unique_test_id = os.environ.get("PYTEST_XDIST_TESTRUNUID", readable_timestamp()) @@ -77,9 +74,9 @@ def idfn(val): @pytest.mark.parametrize("experiment_name, test_case", get_test_cases(), ids=idfn) def test_investigate(experiment_name, test_case): - eval_factuality = Factuality() - config = MockConfig(test_case) + config.model = os.environ.get("MODEL", "gpt-4o") + mock_dal = MockSupabaseDal( test_case_folder=Path(test_case.folder), generate_mocks=test_case.generate_mocks, @@ -108,28 +105,31 @@ def test_investigate(experiment_name, test_case): output = result.analysis - evaluate_logs_explanation = get_logs_explanation_classifier() - factuality = eval_factuality(output, expected, input=input) - previous_logs = evaluate_logs_explanation(output, expected, input=input) - scores = { - "faithfulness": factuality.score, - "previous_logs": previous_logs.score - } + scores = {} + + if isinstance(expected, list): + scores["correctness"] = evaluate_correctness(output=output, expected_elements=expected).score + else: + scores["faithfulness"] = evaluate_factuality(output=output, expected=expected, input=input).score + scores["previous_logs"] = evaluate_previous_logs_mention(output=output).score if len(test_case.retrieval_context) > 0: - evaluate_context_usage = get_context_classifier(test_case.retrieval_context) - scores["context"] = evaluate_context_usage(output, expected, input=input).score + scores["context"] = evaluate_context_usage(input=input, output=output, context_items=test_case.retrieval_context).score bt_helper.end_evaluation( eval=eval, input=input, output=output or "", - expected=expected, + expected=str(expected), id=test_case.id, scores=scores ) print(f"** OUTPUT **\n{output}") print(f"** SCORES **\n{scores}") - assert scores.get("faithfulness") >= test_case.evaluation.faithfulness + if scores.get("faithfulness"): + assert scores.get("faithfulness") >= test_case.evaluation.faithfulness + + if scores.get("correctness"): + assert scores.get("correctness") >= test_case.evaluation.correctness assert scores.get("context", 0) >= test_case.evaluation.context diff --git a/tests/llm/utils/braintrust.py b/tests/llm/utils/braintrust.py index f79c39b3..6cca2314 100644 --- a/tests/llm/utils/braintrust.py +++ b/tests/llm/utils/braintrust.py @@ -1,6 +1,6 @@ import braintrust -from braintrust import Dataset, Experiment, ReadonlyExperiment +from braintrust import Dataset, Experiment, ReadonlyExperiment, Span import logging from typing import Any, List, Optional @@ -78,7 +78,7 @@ def upload_test_cases(self, test_cases:List[HolmesTestCase]): def resolve_dataset_item(self, test_case:HolmesTestCase) -> Optional[Any]: return find_dataset_row_by_test_case(self.dataset, test_case) - def start_evaluation(self, experiment_name:str, name:str): + def start_evaluation(self, experiment_name:str, name:str) -> Span: if not self.experiment: experiment:Experiment|ReadonlyExperiment = braintrust.init( project=self.project_name, @@ -93,7 +93,7 @@ def start_evaluation(self, experiment_name:str, name:str): self.experiment = experiment return self.experiment.start_span(name=name) - def end_evaluation(self, eval:Any, input:str, output:str, expected:str, id:str, scores:dict[str, Any]): + def end_evaluation(self, eval:Span, input:str, output:str, expected:str, id:str, scores:dict[str, Any]): if not self.experiment: raise Exception("start_evaluation() must be called before end_evaluation()") diff --git a/tests/llm/utils/classifiers.py b/tests/llm/utils/classifiers.py index 32570675..08281e9c 100644 --- a/tests/llm/utils/classifiers.py +++ b/tests/llm/utils/classifiers.py @@ -1,7 +1,10 @@ -from typing import List -from autoevals import LLMClassifier +from typing import List, Optional +from autoevals import Factuality, LLMClassifier +import os -def get_context_classifier(context_items:List[str]): +classifier_model = os.environ.get("CLASSIFIER_MODEL", "gpt-4o-mini") + +def evaluate_context_usage(context_items:List[str], output:Optional[str], input:Optional[str]): context = "\n- ".join(context_items) prompt_prefix = f""" CONTEXT @@ -24,19 +27,21 @@ def get_context_classifier(context_items:List[str]): A. No item mentioned in the CONTEXT is mentioned in the ANSWER B. Less than half of items present in the CONTEXT are mentioned in the ANSWER -C. More than half of items present in the CONTEXT are mentioned in the ANSWER +C. More than half of items present iEvaluate which of the following descibes the OUTPUT best and return the related letter:n the CONTEXT are mentioned in the ANSWER D. All items present in the CONTEXT are mentioned in the ANSWER """ - return LLMClassifier( + classifier = LLMClassifier( name="ContextPrecision", prompt_template=prompt_prefix, choice_scores={"A": 0, "B": 0.33, "C": 0.67, "D": 1}, use_cot=True, + model=classifier_model ) + return classifier(input=input, output=output, expected=None) -def get_logs_explanation_classifier(): +def evaluate_previous_logs_mention(output:Optional[str]): prompt_prefix = """ OUTPUT @@ -52,9 +57,46 @@ def get_logs_explanation_classifier(): D. OUTPUT mentions both "logs" and "previous logs" but presents both as having the same meaning """ - return LLMClassifier( + classifier = LLMClassifier( name="LogsExplanation", prompt_template=prompt_prefix, choice_scores={"A": 1, "B": 1, "C": 0, "D": 1}, use_cot=True, + model=classifier_model ) + return classifier(input=None, output=output, expected=None) + + +def evaluate_correctness(expected_elements:List[str], output:Optional[str]): + prompt_prefix = """ + You are evaluating the correctness of a response by a LLM. You must return a score between 0 and 1 that represents the correctness of the response/OUTPUT from the LLM. + The correctness is defined by the amount of EXPECTED ELEMENTS present in the output. Correctness is 1 if all elements are presents and 0 if none are presents. + The correctness score should be proportional to the number of EXPECTED ELEMENTS present in the OUTPUT. + + EXPECTED ELEMENTS + ================= + + - {{"\n- ".join(expected_elements)}} + + OUTPUT + ====== + + {{output}} + + + Return a score between 0 and 1 that is proportional to the number of EXPECTED ELEMENTS present in the OUTPUT. + """ + + classifier = LLMClassifier( + name="Correctness", + prompt_template=prompt_prefix, + choice_scores={"A": 1, "B": 1, "C": 0, "D": 1}, + use_cot=True, + model=classifier_model + ) + return classifier(input=input, output=output, expected=None) + + +def evaluate_factuality(input:Optional[str], output:Optional[str], expected:Optional[str]): + eval_factuality = Factuality() + return eval_factuality(input=input, output=output, expected=expected) diff --git a/tests/llm/utils/mock_utils.py b/tests/llm/utils/mock_utils.py index 5e8c104e..b9d3cc31 100644 --- a/tests/llm/utils/mock_utils.py +++ b/tests/llm/utils/mock_utils.py @@ -1,13 +1,12 @@ import json -import braintrust from typing_extensions import Dict import yaml import logging import os import re from pathlib import Path -from typing import Generic, List, Optional, TypeVar, Union, cast +from typing import List, Optional, TypeVar, Union, cast from pydantic import BaseModel, TypeAdapter from holmes.core.models import InvestigateRequest @@ -19,12 +18,12 @@ def read_file(file_path:Path): with open(file_path, 'r', encoding='utf-8') as file: return file.read().strip() - TEST_CASE_ID_PATTERN = r'^[\d+]_(?:[a-z]+_)*[a-z]+$' CONFIG_FILE_NAME = "test_case.yaml" class LLMEvaluation(BaseModel): faithfulness: float = 0.3 + correctness: float = 0.3 context: float = 0 class Message(BaseModel): @@ -36,7 +35,7 @@ class HolmesTestCase(BaseModel): id: str folder: str generate_mocks: bool = False # If True, generate mocks - expected_output: str # Whether an output is expected + expected_output: Union[str, List[str]] # Whether an output is expected evaluation: LLMEvaluation = LLMEvaluation() retrieval_context: List[str] = [] # Elements helping to evaluate the correctness of the LLM response tool_mocks: List[ToolMock] = [] From c52f4e0388ad8bc3a32c7ac1d4b7e1168cd59f63 Mon Sep 17 00:00:00 2001 From: Tomer Date: Mon, 9 Dec 2024 11:53:27 +0200 Subject: [PATCH 2/4] Use Contributor Covenant Code of Conduct v2.1 (#223) --- CODE_OF_CONDUCT.md | 132 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 132 insertions(+) create mode 100644 CODE_OF_CONDUCT.md diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md new file mode 100644 index 00000000..1dfc9964 --- /dev/null +++ b/CODE_OF_CONDUCT.md @@ -0,0 +1,132 @@ +# Contributor Covenant Code of Conduct + +## Our Pledge + +We as members, contributors, and leaders pledge to make participation in our +community a harassment-free experience for everyone, regardless of age, body +size, visible or invisible disability, ethnicity, sex characteristics, gender +identity and expression, level of experience, education, socio-economic status, +nationality, personal appearance, race, caste, color, religion, or sexual +identity and orientation. + +We pledge to act and interact in ways that contribute to an open, welcoming, +diverse, inclusive, and healthy community. + +## Our Standards + +Examples of behavior that contributes to a positive environment for our +community include: + +* Demonstrating empathy and kindness toward other people +* Being respectful of differing opinions, viewpoints, and experiences +* Giving and gracefully accepting constructive feedback +* Accepting responsibility and apologizing to those affected by our mistakes, + and learning from the experience +* Focusing on what is best not just for us as individuals, but for the overall + community + +Examples of unacceptable behavior include: + +* The use of sexualized language or imagery, and sexual attention or advances of + any kind +* Trolling, insulting or derogatory comments, and personal or political attacks +* Public or private harassment +* Publishing others' private information, such as a physical or email address, + without their explicit permission +* Other conduct which could reasonably be considered inappropriate in a + professional setting + +## Enforcement Responsibilities + +Community leaders are responsible for clarifying and enforcing our standards of +acceptable behavior and will take appropriate and fair corrective action in +response to any behavior that they deem inappropriate, threatening, offensive, +or harmful. + +Community leaders have the right and responsibility to remove, edit, or reject +comments, commits, code, wiki edits, issues, and other contributions that are +not aligned to this Code of Conduct, and will communicate reasons for moderation +decisions when appropriate. + +## Scope + +This Code of Conduct applies within all community spaces, and also applies when +an individual is officially representing the community in public spaces. +Examples of representing our community include using an official email address, +posting via an official social media account, or acting as an appointed +representative at an online or offline event. + +## Enforcement + +Instances of abusive, harassing, or otherwise unacceptable behavior may be +reported to the community leaders responsible for enforcement at +conduct@robusta.dev. +All complaints will be reviewed and investigated promptly and fairly. + +All community leaders are obligated to respect the privacy and security of the +reporter of any incident. + +## Enforcement Guidelines + +Community leaders will follow these Community Impact Guidelines in determining +the consequences for any action they deem in violation of this Code of Conduct: + +### 1. Correction + +**Community Impact**: Use of inappropriate language or other behavior deemed +unprofessional or unwelcome in the community. + +**Consequence**: A private, written warning from community leaders, providing +clarity around the nature of the violation and an explanation of why the +behavior was inappropriate. A public apology may be requested. + +### 2. Warning + +**Community Impact**: A violation through a single incident or series of +actions. + +**Consequence**: A warning with consequences for continued behavior. No +interaction with the people involved, including unsolicited interaction with +those enforcing the Code of Conduct, for a specified period of time. This +includes avoiding interactions in community spaces as well as external channels +like social media. Violating these terms may lead to a temporary or permanent +ban. + +### 3. Temporary Ban + +**Community Impact**: A serious violation of community standards, including +sustained inappropriate behavior. + +**Consequence**: A temporary ban from any sort of interaction or public +communication with the community for a specified period of time. No public or +private interaction with the people involved, including unsolicited interaction +with those enforcing the Code of Conduct, is allowed during this period. +Violating these terms may lead to a permanent ban. + +### 4. Permanent Ban + +**Community Impact**: Demonstrating a pattern of violation of community +standards, including sustained inappropriate behavior, harassment of an +individual, or aggression toward or disparagement of classes of individuals. + +**Consequence**: A permanent ban from any sort of public interaction within the +community. + +## Attribution + +This Code of Conduct is adapted from the [Contributor Covenant][homepage], +version 2.1, available at +[https://www.contributor-covenant.org/version/2/1/code_of_conduct.html][v2.1]. + +Community Impact Guidelines were inspired by +[Mozilla's code of conduct enforcement ladder][Mozilla CoC]. + +For answers to common questions about this code of conduct, see the FAQ at +[https://www.contributor-covenant.org/faq][FAQ]. Translations are available at +[https://www.contributor-covenant.org/translations][translations]. + +[homepage]: https://www.contributor-covenant.org +[v2.1]: https://www.contributor-covenant.org/version/2/1/code_of_conduct.html +[Mozilla CoC]: https://github.com/mozilla/diversity +[FAQ]: https://www.contributor-covenant.org/faq +[translations]: https://www.contributor-covenant.org/translations From e36db3422bcac05d9356ecc0d957e008a1316d35 Mon Sep 17 00:00:00 2001 From: Tomer Date: Mon, 9 Dec 2024 13:14:27 +0200 Subject: [PATCH 3/4] add contributing doc page (#225) --- CONTRIBUTING.md | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100644 CONTRIBUTING.md diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 00000000..6d95164c --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,35 @@ +# Contributing + +## Before you get started + +### Code of Conduct + +Please make sure to read and observe our [Code of Conduct](https://github.com/robusta-dev/holmesgpt?tab=coc-ov-file). + +### Install requirements +- Python `3.11` + - poetry `1.8.4` & up +- A LLM API key is required to use and test HolmesGPT + - OpenAI's `gpt4-o` is recommended. + - For details see [Getting an API Key](https://github.com/robusta-dev/holmesgpt?tab=readme-ov-file#getting-an-api-key). + +## Reporting bugs + +We encourage those interested to contribute code and also appreciate when issues are reported. + +- Create a new issue and label is as `bug` +- Clearly state how to reproduce the bug: + - Which LLM you've used + - Which steps are required to reproduce + - As LLMs answers may differ between runs - Does it always reproduce, or occasionally? + + +## Contributing Code + +- Fork the repository and clone it locally. +- Create a new branch and make your changes +- Add or update tests to ensure your changes are covered. +- Run `pytest` to verify all tests pass. +- Keep pull requests small and focused. if you have multiple changes, open a PR for each. +- Create a pull request back to the upstream repository. +- Wait for a review and address any comments From c4743a016866e2a216897518059d326e4d97aaa7 Mon Sep 17 00:00:00 2001 From: Tomer Date: Mon, 9 Dec 2024 13:46:42 +0200 Subject: [PATCH 4/4] Create MAINTAINERS.md (#226) --- MAINTAINERS.md | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 MAINTAINERS.md diff --git a/MAINTAINERS.md b/MAINTAINERS.md new file mode 100644 index 00000000..4cad4de5 --- /dev/null +++ b/MAINTAINERS.md @@ -0,0 +1,19 @@ +# Maintainers + +## Current + +| Maintainer | GitHub ID | +| --------------------|---------------------------------------------------| +| Natan Yellin | [aantn](https://github.com/aantn) | +| Arik Alon | [arikalon1](https://github.com/arikalon1) | +| Avi Kotlicky | [Avi-Robusta](https://github.com/Avi-Robusta) | +| Dmytro Chievtaiev | [itisallgood](https://github.com/itisallgood) | +| Pavan Gudiwada | [pavangudiwada](https://github.com/pavangudiwada) | +| Nicolas Herment | [nherment](https://github.com/nherment) | +| Tomer Keshet | [Sheeproid](https://github.com/Sheeproid) | +| Roi Glinik | [RoiGlinik](https://github.com/RoiGlinik) | +| Moshe Morad | [moshemorad](https://github.com/moshemorad) | + +## Emeritus + +N/A