From 6a999b044f59b1541df74c3af34ab6c20458be2b Mon Sep 17 00:00:00 2001 From: openhands Date: Tue, 17 Dec 2024 07:54:56 +0000 Subject: [PATCH 01/10] Fix issue #5637: [Bug]: Incorrectly interpreting a Python error as a success message --- openhands/events/observation/commands.py | 12 +++++++-- tests/unit/test_observation_serialization.py | 27 ++++++++++++++++++++ 2 files changed, 37 insertions(+), 2 deletions(-) diff --git a/openhands/events/observation/commands.py b/openhands/events/observation/commands.py index b522b5c47283..f3fa24e39151 100644 --- a/openhands/events/observation/commands.py +++ b/openhands/events/observation/commands.py @@ -40,7 +40,15 @@ class IPythonRunCellObservation(Observation): @property def error(self) -> bool: - return False # IPython cells do not return exit codes + # Check for common error indicators in IPython output + error_indicators = [ + 'ERROR:', + 'Error:', + 'Exception:', + 'Parameter `', + 'is required for command:', + ] + return any(indicator in self.content for indicator in error_indicators) @property def message(self) -> str: @@ -48,7 +56,7 @@ def message(self) -> str: @property def success(self) -> bool: - return True # IPython cells are always considered successful + return not self.error def __str__(self) -> str: return f'**IPythonRunCellObservation**\n{self.content}' diff --git a/tests/unit/test_observation_serialization.py b/tests/unit/test_observation_serialization.py index 67a95449b719..3ab0e21253ef 100644 --- a/tests/unit/test_observation_serialization.py +++ b/tests/unit/test_observation_serialization.py @@ -40,6 +40,33 @@ def serialization_deserialization( # Additional tests for various observation subclasses can be included here +def test_ipython_error_detection(): + from openhands.events.observation import IPythonRunCellObservation + + # Test error detection for various error patterns + error_cases = [ + 'ERROR: Something went wrong', + 'Error: Invalid syntax', + 'Exception: Division by zero', + 'Parameter `old_str` is required for command: str_replace', + ] + for error_content in error_cases: + obs = IPythonRunCellObservation(content=error_content, code='print("test")') + serialized = event_to_dict(obs) + assert ( + serialized['success'] is False + ), f'Failed to detect error in: {error_content}' + assert obs.error is True, f'Failed to detect error in: {error_content}' + + # Test success case + obs = IPythonRunCellObservation( + content='Hello World!', code='print("Hello World!")' + ) + serialized = event_to_dict(obs) + assert serialized['success'] is True, 'Failed to detect success' + assert obs.error is False, 'Failed to detect success' + + def test_success_field_serialization(): # Test success=True obs = CmdOutputObservation( From d56755baff31386804ec736f925b4e8e9cbeeb7f Mon Sep 17 00:00:00 2001 From: Engel Nyst Date: Tue, 17 Dec 2024 09:00:49 +0100 Subject: [PATCH 02/10] Update openhands/events/observation/commands.py --- openhands/events/observation/commands.py | 1 - 1 file changed, 1 deletion(-) diff --git a/openhands/events/observation/commands.py b/openhands/events/observation/commands.py index f3fa24e39151..dd1df6c06c52 100644 --- a/openhands/events/observation/commands.py +++ b/openhands/events/observation/commands.py @@ -45,7 +45,6 @@ def error(self) -> bool: 'ERROR:', 'Error:', 'Exception:', - 'Parameter `', 'is required for command:', ] return any(indicator in self.content for indicator in error_indicators) From 1b03d13adf4d73e4a8d65569629d43f9ed116a86 Mon Sep 17 00:00:00 2001 From: Engel Nyst Date: Tue, 17 Dec 2024 09:01:03 +0100 Subject: [PATCH 03/10] Update openhands/events/observation/commands.py --- openhands/events/observation/commands.py | 1 - 1 file changed, 1 deletion(-) diff --git a/openhands/events/observation/commands.py b/openhands/events/observation/commands.py index dd1df6c06c52..3fc1c61cdadd 100644 --- a/openhands/events/observation/commands.py +++ b/openhands/events/observation/commands.py @@ -45,7 +45,6 @@ def error(self) -> bool: 'ERROR:', 'Error:', 'Exception:', - 'is required for command:', ] return any(indicator in self.content for indicator in error_indicators) From 59e8c6a26d42a39f83deedb0c456c3b1108dc1ee Mon Sep 17 00:00:00 2001 From: Engel Nyst Date: Tue, 17 Dec 2024 09:01:21 +0100 Subject: [PATCH 04/10] Update tests/unit/test_observation_serialization.py --- tests/unit/test_observation_serialization.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/unit/test_observation_serialization.py b/tests/unit/test_observation_serialization.py index 3ab0e21253ef..3173a850d5db 100644 --- a/tests/unit/test_observation_serialization.py +++ b/tests/unit/test_observation_serialization.py @@ -48,7 +48,6 @@ def test_ipython_error_detection(): 'ERROR: Something went wrong', 'Error: Invalid syntax', 'Exception: Division by zero', - 'Parameter `old_str` is required for command: str_replace', ] for error_content in error_cases: obs = IPythonRunCellObservation(content=error_content, code='print("test")') From 984b9ccc55a1e11de433b051bc85a3031618ee29 Mon Sep 17 00:00:00 2001 From: openhands Date: Tue, 17 Dec 2024 08:10:36 +0000 Subject: [PATCH 05/10] Fix pr #5644: Fix issue #5637: [Bug]: Incorrectly interpreting a Python error as a success message --- frontend/src/components/features/controls/agent-status-bar.tsx | 1 + 1 file changed, 1 insertion(+) diff --git a/frontend/src/components/features/controls/agent-status-bar.tsx b/frontend/src/components/features/controls/agent-status-bar.tsx index 62511897151b..68df25ec7888 100644 --- a/frontend/src/components/features/controls/agent-status-bar.tsx +++ b/frontend/src/components/features/controls/agent-status-bar.tsx @@ -23,6 +23,7 @@ export function AgentStatusBar() { } if (curStatusMessage?.type === "error") { toast.error(message); + setStatusMessage(AGENT_STATUS_MAP[AgentState.ERROR].message); return; } if (curAgentState === AgentState.LOADING && message.trim()) { From 087d34cf6166a252d2016718985edcf96c84529d Mon Sep 17 00:00:00 2001 From: Engel Nyst Date: Tue, 17 Dec 2024 09:18:21 +0100 Subject: [PATCH 06/10] Update frontend/src/components/features/controls/agent-status-bar.tsx --- frontend/src/components/features/controls/agent-status-bar.tsx | 1 - 1 file changed, 1 deletion(-) diff --git a/frontend/src/components/features/controls/agent-status-bar.tsx b/frontend/src/components/features/controls/agent-status-bar.tsx index 68df25ec7888..62511897151b 100644 --- a/frontend/src/components/features/controls/agent-status-bar.tsx +++ b/frontend/src/components/features/controls/agent-status-bar.tsx @@ -23,7 +23,6 @@ export function AgentStatusBar() { } if (curStatusMessage?.type === "error") { toast.error(message); - setStatusMessage(AGENT_STATUS_MAP[AgentState.ERROR].message); return; } if (curAgentState === AgentState.LOADING && message.trim()) { From 39ce6cd764ed461a5f8b4e5e0c6d7c2549a49b1b Mon Sep 17 00:00:00 2001 From: openhands Date: Tue, 17 Dec 2024 08:24:40 +0000 Subject: [PATCH 07/10] Fix pr #5644: Fix issue #5637: [Bug]: Incorrectly interpreting a Python error as a success message --- frontend/src/state/chat-slice.ts | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/frontend/src/state/chat-slice.ts b/frontend/src/state/chat-slice.ts index 47d2b651754d..f0bee9eb5508 100644 --- a/frontend/src/state/chat-slice.ts +++ b/frontend/src/state/chat-slice.ts @@ -149,9 +149,8 @@ export const chatSlice = createSlice({ } else if (observationID === "run_ipython") { // For IPython, we consider it successful if there's no error message const ipythonObs = observation.payload as IPythonObservation; - causeMessage.success = !ipythonObs.message - .toLowerCase() - .includes("error"); + // Check for error in the message field which contains error information + causeMessage.success = !ipythonObs.message; } if (observationID === "run" || observationID === "run_ipython") { From 1334166f8573d236851c699b8a9fe0535aee68a5 Mon Sep 17 00:00:00 2001 From: Engel Nyst Date: Tue, 17 Dec 2024 09:26:34 +0100 Subject: [PATCH 08/10] Update frontend/src/state/chat-slice.ts --- frontend/src/state/chat-slice.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/frontend/src/state/chat-slice.ts b/frontend/src/state/chat-slice.ts index f0bee9eb5508..da3d56427fea 100644 --- a/frontend/src/state/chat-slice.ts +++ b/frontend/src/state/chat-slice.ts @@ -150,7 +150,7 @@ export const chatSlice = createSlice({ // For IPython, we consider it successful if there's no error message const ipythonObs = observation.payload as IPythonObservation; // Check for error in the message field which contains error information - causeMessage.success = !ipythonObs.message; + causeMessage.success = !ipythonObs.error; } if (observationID === "run" || observationID === "run_ipython") { From 6ba683ab438678608a4efc017dff250b475c195d Mon Sep 17 00:00:00 2001 From: Engel Nyst Date: Tue, 17 Dec 2024 09:27:28 +0100 Subject: [PATCH 09/10] Update frontend/src/state/chat-slice.ts --- frontend/src/state/chat-slice.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/frontend/src/state/chat-slice.ts b/frontend/src/state/chat-slice.ts index da3d56427fea..ab37b6d8bc3b 100644 --- a/frontend/src/state/chat-slice.ts +++ b/frontend/src/state/chat-slice.ts @@ -150,7 +150,7 @@ export const chatSlice = createSlice({ // For IPython, we consider it successful if there's no error message const ipythonObs = observation.payload as IPythonObservation; // Check for error in the message field which contains error information - causeMessage.success = !ipythonObs.error; + causeMessage.success = ipythonObs.success; } if (observationID === "run" || observationID === "run_ipython") { From 295ac6d7149300626f7e21b78ee762cf2230f900 Mon Sep 17 00:00:00 2001 From: Engel Nyst Date: Tue, 17 Dec 2024 09:29:34 +0100 Subject: [PATCH 10/10] Update frontend/src/state/chat-slice.ts --- frontend/src/state/chat-slice.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/frontend/src/state/chat-slice.ts b/frontend/src/state/chat-slice.ts index ab37b6d8bc3b..da3d56427fea 100644 --- a/frontend/src/state/chat-slice.ts +++ b/frontend/src/state/chat-slice.ts @@ -150,7 +150,7 @@ export const chatSlice = createSlice({ // For IPython, we consider it successful if there's no error message const ipythonObs = observation.payload as IPythonObservation; // Check for error in the message field which contains error information - causeMessage.success = ipythonObs.success; + causeMessage.success = !ipythonObs.error; } if (observationID === "run" || observationID === "run_ipython") {