Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix issue #5637: [Bug]: Incorrectly interpreting a Python error as a success message #5644

Draft
wants to merge 10 commits into
base: main
Choose a base branch
from
12 changes: 10 additions & 2 deletions openhands/events/observation/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,15 +40,23 @@ class IPythonRunCellObservation(Observation):

@property
def error(self) -> bool:
return False # IPython cells do not return exit codes
# Check for common error indicators in IPython output
error_indicators = [
'ERROR:',
'Error:',
'Exception:',
'Parameter `',
enyst marked this conversation as resolved.
Show resolved Hide resolved
'is required for command:',
enyst marked this conversation as resolved.
Show resolved Hide resolved
]
return any(indicator in self.content for indicator in error_indicators)

@property
def message(self) -> str:
return 'Code executed in IPython cell.'

@property
def success(self) -> bool:
return True # IPython cells are always considered successful
return not self.error

def __str__(self) -> str:
return f'**IPythonRunCellObservation**\n{self.content}'
27 changes: 27 additions & 0 deletions tests/unit/test_observation_serialization.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,33 @@ def serialization_deserialization(


# Additional tests for various observation subclasses can be included here
def test_ipython_error_detection():
from openhands.events.observation import IPythonRunCellObservation

# Test error detection for various error patterns
error_cases = [
'ERROR: Something went wrong',
'Error: Invalid syntax',
'Exception: Division by zero',
'Parameter `old_str` is required for command: str_replace',
enyst marked this conversation as resolved.
Show resolved Hide resolved
]
for error_content in error_cases:
obs = IPythonRunCellObservation(content=error_content, code='print("test")')
serialized = event_to_dict(obs)
assert (
serialized['success'] is False
), f'Failed to detect error in: {error_content}'
assert obs.error is True, f'Failed to detect error in: {error_content}'

# Test success case
obs = IPythonRunCellObservation(
content='Hello World!', code='print("Hello World!")'
)
serialized = event_to_dict(obs)
assert serialized['success'] is True, 'Failed to detect success'
assert obs.error is False, 'Failed to detect success'


def test_success_field_serialization():
# Test success=True
obs = CmdOutputObservation(
Expand Down
Loading