diff --git a/examples/01_standalone_sdk/02_custom_tools.py b/examples/01_standalone_sdk/02_custom_tools.py index 4c399d467f..cb30ed0443 100644 --- a/examples/01_standalone_sdk/02_custom_tools.py +++ b/examples/01_standalone_sdk/02_custom_tools.py @@ -92,8 +92,10 @@ def __call__(self, action: GrepAction, conversation=None) -> GrepObservation: # files: set[str] = set() # grep returns exit code 1 when no matches; treat as empty - if result.output.strip(): - for line in result.output.strip().splitlines(): + output_text = result.text + + if output_text.strip(): + for line in output_text.strip().splitlines(): matches.append(line) # Expect "path:line:content" — take the file part before first ":" file_path = line.split(":", 1)[0] diff --git a/openhands-sdk/openhands/sdk/mcp/definition.py b/openhands-sdk/openhands/sdk/mcp/definition.py index 8d4d544880..771729e11d 100644 --- a/openhands-sdk/openhands/sdk/mcp/definition.py +++ b/openhands-sdk/openhands/sdk/mcp/definition.py @@ -1,7 +1,6 @@ """MCPTool definition and implementation.""" import json -from collections.abc import Sequence from typing import Any import mcp.types @@ -51,14 +50,6 @@ def to_mcp_arguments(self) -> dict: class MCPToolObservation(Observation): """Observation from MCP tool execution.""" - content: list[TextContent | ImageContent] = Field( - default_factory=list, - description="Content returned from the MCP tool converted " - "to LLM Ready TextContent or ImageContent", - ) - is_error: bool = Field( - default=False, description="Whether the call resulted in an error" - ) tool_name: str = Field(description="Name of the tool that was called") @classmethod @@ -66,13 +57,16 @@ def from_call_tool_result( cls, tool_name: str, result: mcp.types.CallToolResult ) -> "MCPToolObservation": """Create an MCPToolObservation from a CallToolResult.""" - content: list[mcp.types.ContentBlock] = result.content - convrted_content = [] - for block in content: + + native_content: list[mcp.types.ContentBlock] = result.content + content: list[TextContent | ImageContent] = [ + TextContent(text=f"[Tool '{tool_name}' executed.]") + ] + for block in native_content: if isinstance(block, mcp.types.TextContent): - convrted_content.append(TextContent(text=block.text)) + content.append(TextContent(text=block.text)) elif isinstance(block, mcp.types.ImageContent): - convrted_content.append( + content.append( ImageContent( image_urls=[f"data:{block.mimeType};base64,{block.data}"], ) @@ -81,36 +75,32 @@ def from_call_tool_result( logger.warning( f"Unsupported MCP content block type: {type(block)}. Ignoring." ) + return cls( - content=convrted_content, + content=content, is_error=result.isError, tool_name=tool_name, ) - @property - def to_llm_content(self) -> Sequence[TextContent | ImageContent]: - """Format the observation for agent display.""" - initial_message = f"[Tool '{self.tool_name}' executed.]\n" - if self.is_error: - initial_message += "[An error occurred during execution.]\n" - return [TextContent(text=initial_message)] + self.content - @property def visualize(self) -> Text: """Return Rich Text representation of this observation.""" - content = Text() - content.append(f"[MCP Tool '{self.tool_name}' Observation]\n", style="bold") + text = Text() + if self.is_error: - content.append("[Error during execution]\n", style="bold red") + text.append("❌ ", style="red bold") + text.append(self.ERROR_MESSAGE_HEADER, style="bold red") + + text.append(f"[MCP Tool '{self.tool_name}' Observation]\n", style="bold") for block in self.content: if isinstance(block, TextContent): # try to see if block.text is a JSON try: parsed = json.loads(block.text) - content.append(display_dict(parsed)) + text.append(display_dict(parsed)) continue except (json.JSONDecodeError, TypeError): - content.append(block.text + "\n") + text.append(block.text + "\n") elif isinstance(block, ImageContent): - content.append(f"[Image with {len(block.image_urls)} URLs]\n") - return content + text.append(f"[Image with {len(block.image_urls)} URLs]\n") + return text diff --git a/openhands-sdk/openhands/sdk/mcp/tool.py b/openhands-sdk/openhands/sdk/mcp/tool.py index 0e28b9eda5..04b3f10b3b 100644 --- a/openhands-sdk/openhands/sdk/mcp/tool.py +++ b/openhands-sdk/openhands/sdk/mcp/tool.py @@ -12,7 +12,6 @@ from litellm import ChatCompletionToolParam from pydantic import Field, ValidationError -from openhands.sdk.llm import TextContent from openhands.sdk.logger import get_logger from openhands.sdk.mcp.client import MCPClient from openhands.sdk.mcp.definition import MCPToolAction, MCPToolObservation @@ -69,8 +68,8 @@ async def call_tool(self, action: MCPToolAction) -> MCPToolObservation: except Exception as e: error_msg = f"Error calling MCP tool {self.tool_name}: {str(e)}" logger.error(error_msg, exc_info=True) - return MCPToolObservation( - content=[TextContent(text=error_msg)], + return MCPToolObservation.from_text( + text=error_msg, is_error=True, tool_name=self.tool_name, ) @@ -154,8 +153,8 @@ def __call__( # Surface validation errors as an observation instead of crashing error_msg = f"Validation error for MCP tool '{self.name}' args: {e}" logger.error(error_msg, exc_info=True) - return MCPToolObservation( - content=[TextContent(text=error_msg)], + return MCPToolObservation.from_text( + text=error_msg, is_error=True, tool_name=self.name, ) diff --git a/openhands-sdk/openhands/sdk/tool/builtins/finish.py b/openhands-sdk/openhands/sdk/tool/builtins/finish.py index 3c40f3b586..0bb12f849b 100644 --- a/openhands-sdk/openhands/sdk/tool/builtins/finish.py +++ b/openhands-sdk/openhands/sdk/tool/builtins/finish.py @@ -4,7 +4,6 @@ from pydantic import Field from rich.text import Text -from openhands.sdk.llm.message import ImageContent, TextContent from openhands.sdk.tool.tool import ( Action, Observation, @@ -32,16 +31,15 @@ def visualize(self) -> Text: class FinishObservation(Observation): - message: str = Field(description="Final message sent to the user.") - - @property - def to_llm_content(self) -> Sequence[TextContent | ImageContent]: - return [TextContent(text=self.message)] + """ + Observation returned after finishing a task. + The FinishAction itself contains the message sent to the user so no + extra fields are needed here. + """ @property def visualize(self) -> Text: - """Return Rich Text representation - empty since action shows the message.""" - # Don't duplicate the finish message display - action already shows it + """Return an empty Text representation since the message is in the action.""" return Text() @@ -65,7 +63,7 @@ def __call__( action: FinishAction, conversation: "BaseConversation | None" = None, # noqa: ARG002 ) -> FinishObservation: - return FinishObservation(message=action.message) + return FinishObservation.from_text(text=action.message) class FinishTool(ToolDefinition[FinishAction, FinishObservation]): diff --git a/openhands-sdk/openhands/sdk/tool/builtins/think.py b/openhands-sdk/openhands/sdk/tool/builtins/think.py index b28418795b..ca641ce94d 100644 --- a/openhands-sdk/openhands/sdk/tool/builtins/think.py +++ b/openhands-sdk/openhands/sdk/tool/builtins/think.py @@ -4,7 +4,6 @@ from pydantic import Field from rich.text import Text -from openhands.sdk.llm.message import ImageContent, TextContent from openhands.sdk.tool.tool import ( Action, Observation, @@ -46,20 +45,15 @@ def visualize(self) -> Text: class ThinkObservation(Observation): - """Observation returned after logging a thought.""" - - content: str = Field( - default="Your thought has been logged.", description="Confirmation message." - ) - - @property - def to_llm_content(self) -> Sequence[TextContent | ImageContent]: - return [TextContent(text=self.content)] + """ + Observation returned after logging a thought. + The ThinkAction itself contains the thought logged so no extra + fields are needed here. + """ @property def visualize(self) -> Text: - """Return Rich Text representation - empty since action shows the thought.""" - # Don't duplicate the thought display - action already shows it + """Return an empty Text representation since the thought is in the action.""" return Text() @@ -81,7 +75,7 @@ def __call__( _: ThinkAction, conversation: "BaseConversation | None" = None, # noqa: ARG002 ) -> ThinkObservation: - return ThinkObservation() + return ThinkObservation.from_text(text="Your thought has been logged.") class ThinkTool(ToolDefinition[ThinkAction, ThinkObservation]): diff --git a/openhands-sdk/openhands/sdk/tool/schema.py b/openhands-sdk/openhands/sdk/tool/schema.py index 1c041aa659..1102df7005 100644 --- a/openhands-sdk/openhands/sdk/tool/schema.py +++ b/openhands-sdk/openhands/sdk/tool/schema.py @@ -1,6 +1,6 @@ -from abc import ABC, abstractmethod +from abc import ABC from collections.abc import Sequence -from typing import Any, ClassVar, TypeVar +from typing import TYPE_CHECKING, Any, ClassVar, TypeVar from pydantic import ConfigDict, Field, create_model from rich.text import Text @@ -13,6 +13,9 @@ from openhands.sdk.utils.visualize import display_dict +if TYPE_CHECKING: + from typing import Self + S = TypeVar("S", bound="Schema") @@ -190,23 +193,84 @@ def visualize(self) -> Text: class Observation(Schema, ABC): """Base schema for output observation.""" + ERROR_MESSAGE_HEADER: ClassVar[str] = "[An error occurred during execution.]\n" + + content: list[TextContent | ImageContent] = Field( + default_factory=list, + description=( + "Content returned from the tool as a list of " + "TextContent/ImageContent objects. " + "When there is an error, it should be written in this field." + ), + ) + is_error: bool = Field( + default=False, description="Whether the observation indicates an error" + ) + + @classmethod + def from_text( + cls, + text: str, + is_error: bool = False, + **kwargs: Any, + ) -> "Self": + """Utility to create an Observation from a simple text string. + + Args: + text: The text content to include in the observation. + is_error: Whether this observation represents an error. + **kwargs: Additional fields for the observation subclass. + + Returns: + An Observation instance with the text wrapped in a TextContent. + """ + return cls(content=[TextContent(text=text)], is_error=is_error, **kwargs) + + @property + def text(self) -> str: + """Extract all text content from the observation. + + Returns: + Concatenated text from all TextContent items in content. + """ + return "".join( + item.text for item in self.content if isinstance(item, TextContent) + ) + @property - @abstractmethod def to_llm_content(self) -> Sequence[TextContent | ImageContent]: - """Get the observation string to show to the agent.""" + """ + Default content formatting for converting observation to LLM readable content. + Subclasses can override to provide richer content (e.g., images, diffs). + """ + llm_content: list[TextContent | ImageContent] = [] + + # If is_error is true, prepend error message + if self.is_error: + llm_content.append(TextContent(text=self.ERROR_MESSAGE_HEADER)) + + # Add content (now always a list) + llm_content.extend(self.content) + + return llm_content @property def visualize(self) -> Text: - """Return Rich Text representation of this action. + """Return Rich Text representation of this observation. - This method can be overridden by subclasses to customize visualization. - The base implementation displays all action fields systematically. + Subclasses can override for custom visualization; by default we show the + same text that would be sent to the LLM. """ - content = Text() + text = Text() + + if self.is_error: + text.append("❌ ", style="red bold") + text.append(self.ERROR_MESSAGE_HEADER, style="bold red") + text_parts = content_to_str(self.to_llm_content) if text_parts: full_content = "".join(text_parts) - content.append(full_content) + text.append(full_content) else: - content.append("[no text content]") - return content + text.append("[no text content]") + return text diff --git a/openhands-tools/openhands/tools/browser_use/definition.py b/openhands-tools/openhands/tools/browser_use/definition.py index a9a66cfbb7..6a02026b2b 100644 --- a/openhands-tools/openhands/tools/browser_use/definition.py +++ b/openhands-tools/openhands/tools/browser_use/definition.py @@ -28,20 +28,24 @@ class BrowserObservation(Observation): """Base observation for browser operations.""" - output: str = Field(description="The output message from the browser operation") - error: str | None = Field(default=None, description="Error message if any") screenshot_data: str | None = Field( default=None, description="Base64 screenshot data if available" ) @property def to_llm_content(self) -> Sequence[TextContent | ImageContent]: - if self.error: - return [TextContent(text=f"Error: {self.error}")] + llm_content: list[TextContent | ImageContent] = [] - content: list[TextContent | ImageContent] = [ - TextContent(text=maybe_truncate(self.output, MAX_BROWSER_OUTPUT_SIZE)) - ] + # If is_error is true, prepend error message + if self.is_error: + llm_content.append(TextContent(text=self.ERROR_MESSAGE_HEADER)) + + # Get text content and truncate if needed + content_text = self.text + if content_text: + llm_content.append( + TextContent(text=maybe_truncate(content_text, MAX_BROWSER_OUTPUT_SIZE)) + ) if self.screenshot_data: mime_type = "image/png" @@ -55,9 +59,9 @@ def to_llm_content(self) -> Sequence[TextContent | ImageContent]: mime_type = "image/webp" # Convert base64 to data URL format for ImageContent data_url = f"data:{mime_type};base64,{self.screenshot_data}" - content.append(ImageContent(image_urls=[data_url])) + llm_content.append(ImageContent(image_urls=[data_url])) - return content + return llm_content # ============================================ diff --git a/openhands-tools/openhands/tools/browser_use/impl.py b/openhands-tools/openhands/tools/browser_use/impl.py index cbe6aebf1b..19bdce7dc8 100644 --- a/openhands-tools/openhands/tools/browser_use/impl.py +++ b/openhands-tools/openhands/tools/browser_use/impl.py @@ -225,13 +225,13 @@ async def _execute_action(self, action): result = await self.close_tab(action.tab_id) else: error_msg = f"Unsupported action type: {type(action)}" - return BrowserObservation(output="", error=error_msg) + return BrowserObservation.from_text(text=error_msg, is_error=True) - return BrowserObservation(output=result) + return BrowserObservation.from_text(text=result) except Exception as e: error_msg = f"Browser operation failed: {str(e)}" logger.error(error_msg, exc_info=True) - return BrowserObservation(output="", error=error_msg) + return BrowserObservation.from_text(text=error_msg, is_error=True) async def _ensure_initialized(self): """Ensure browser session is initialized.""" @@ -281,14 +281,15 @@ async def get_state(self, include_screenshot: bool = False): # Return clean JSON + separate screenshot data clean_json = json.dumps(result_data, indent=2) - return BrowserObservation( - output=clean_json, screenshot_data=screenshot_data + return BrowserObservation.from_text( + text=clean_json, + screenshot_data=screenshot_data, ) except json.JSONDecodeError: # If JSON parsing fails, return as-is pass - return BrowserObservation(output=result_json) + return BrowserObservation.from_text(text=result_json) # Tab Management async def list_tabs(self) -> str: diff --git a/openhands-tools/openhands/tools/delegate/definition.py b/openhands-tools/openhands/tools/delegate/definition.py index 36835e8c7b..0b5ee4fba0 100644 --- a/openhands-tools/openhands/tools/delegate/definition.py +++ b/openhands-tools/openhands/tools/delegate/definition.py @@ -5,7 +5,6 @@ from pydantic import Field -from openhands.sdk.llm.message import ImageContent, TextContent from openhands.sdk.tool.tool import ( Action, Observation, @@ -44,15 +43,7 @@ class DelegateAction(Action): class DelegateObservation(Observation): """Observation from delegation operations.""" - command: CommandLiteral = Field( - description="The command that was executed. Either `spawn` or `delegate`." - ) - message: str = Field(description="Result message from the operation") - - @property - def to_llm_content(self) -> Sequence[TextContent | ImageContent]: - """Get the observation content to show to the agent.""" - return [TextContent(text=self.message)] + command: CommandLiteral = Field(description="The command that was executed") TOOL_DESCRIPTION = """Delegation tool for spawning sub-agents and delegating tasks to them. diff --git a/openhands-tools/openhands/tools/delegate/impl.py b/openhands-tools/openhands/tools/delegate/impl.py index 2b79797c16..da68f9400b 100644 --- a/openhands-tools/openhands/tools/delegate/impl.py +++ b/openhands-tools/openhands/tools/delegate/impl.py @@ -58,9 +58,13 @@ def __call__( # type: ignore[override] elif action.command == "delegate": return self._delegate_tasks(action) else: - return DelegateObservation( + return DelegateObservation.from_text( + text=( + f"Unsupported command: {action.command}. " + "Available commands: spawn, delegate" + ), command=action.command, - message=f"Unsupported command: {action.command}", + is_error=True, ) def _spawn_agents(self, action: "DelegateAction") -> DelegateObservation: @@ -74,19 +78,21 @@ def _spawn_agents(self, action: "DelegateAction") -> DelegateObservation: DelegateObservation indicating success/failure and which agents were spawned """ if not action.ids: - return DelegateObservation( - command="spawn", - message="Error: at least one ID is required for spawn action", + return DelegateObservation.from_text( + text="At least one ID is required for spawn action", + command=action.command, + is_error=True, ) if len(self._sub_agents) + len(action.ids) > self._max_children: - return DelegateObservation( - command="spawn", - message=( + return DelegateObservation.from_text( + text=( f"Cannot spawn {len(action.ids)} agents. " f"Already have {len(self._sub_agents)} agents, " f"maximum is {self._max_children}" ), + command=action.command, + is_error=True, ) try: @@ -115,16 +121,17 @@ def _spawn_agents(self, action: "DelegateAction") -> DelegateObservation: agent_list = ", ".join(action.ids) message = f"Successfully spawned {len(action.ids)} sub-agents: {agent_list}" - return DelegateObservation( - command="spawn", - message=message, + return DelegateObservation.from_text( + text=message, + command=action.command, ) except Exception as e: logger.error(f"Error: failed to spawn agents: {e}", exc_info=True) - return DelegateObservation( - command="spawn", - message=f"Error: failed to spawn agents: {str(e)}", + return DelegateObservation.from_text( + text=f"failed to spawn agents: {str(e)}", + command=action.command, + is_error=True, ) def _delegate_tasks(self, action: "DelegateAction") -> "DelegateObservation": @@ -139,20 +146,22 @@ def _delegate_tasks(self, action: "DelegateAction") -> "DelegateObservation": DelegateObservation with consolidated results from all sub-agents """ if not action.tasks: - return DelegateObservation( - command="delegate", - message="Error: at least one task is required for delegate action", + return DelegateObservation.from_text( + text="at least one task is required for delegate action", + command=action.command, + is_error=True, ) # Check that all requested agent IDs exist missing_agents = set(action.tasks.keys()) - set(self._sub_agents.keys()) if missing_agents: - return DelegateObservation( - command="delegate", - message=( - f"Error: sub-agents not found: {', '.join(missing_agents)}. " + return DelegateObservation.from_text( + text=( + f"sub-agents not found: {', '.join(missing_agents)}. " f"Available agents: {', '.join(self._sub_agents.keys())}" ), + command=action.command, + is_error=True, ) try: @@ -211,24 +220,25 @@ def run_task(agent_id: str, conversation: LocalConversation, task: str): all_results.append(f"Agent {agent_id}: No result") # Create comprehensive message with results - message = f"Completed delegation of {len(action.tasks)} tasks" + output_text = f"Completed delegation of {len(action.tasks)} tasks" if errors: - message += f" with {len(errors)} errors" + output_text += f" with {len(errors)} errors" if all_results: results_text = "\n".join( f"{i}. {result}" for i, result in enumerate(all_results, 1) ) - message += f"\n\nResults:\n{results_text}" + output_text += f"\n\nResults:\n{results_text}" - return DelegateObservation( - command="delegate", - message=message, + return DelegateObservation.from_text( + text=output_text, + command=action.command, ) except Exception as e: logger.error(f"Failed to delegate tasks: {e}", exc_info=True) - return DelegateObservation( - command="delegate", - message=f"Error: failed to delegate tasks: {str(e)}", + return DelegateObservation.from_text( + text=f"failed to delegate tasks: {str(e)}", + command=action.command, + is_error=True, ) diff --git a/openhands-tools/openhands/tools/execute_bash/definition.py b/openhands-tools/openhands/tools/execute_bash/definition.py index 19721362ab..32c118691b 100644 --- a/openhands-tools/openhands/tools/execute_bash/definition.py +++ b/openhands-tools/openhands/tools/execute_bash/definition.py @@ -81,19 +81,13 @@ def visualize(self) -> Text: class ExecuteBashObservation(Observation): """A ToolResult that can be rendered as a CLI output.""" - output: str = Field(description="The raw output from the tool.") command: str | None = Field( - default=None, description="The bash command that was executed. Can be empty string if the observation is from a previous command that hit soft timeout and is not yet finished.", # noqa ) exit_code: int | None = Field( default=None, description="The exit code of the command. -1 indicates the process hit the soft timeout and is not yet finished.", # noqa ) - error: bool = Field( - default=False, - description="Whether there was an error during command execution.", - ) timeout: bool = Field( default=False, description="Whether the command execution timed out." ) @@ -109,31 +103,41 @@ def command_id(self) -> int | None: @property def to_llm_content(self) -> Sequence[TextContent | ImageContent]: - ret = f"{self.metadata.prefix}{self.output}{self.metadata.suffix}" + llm_content: list[TextContent | ImageContent] = [] + + # If is_error is true, prepend error message + if self.is_error: + llm_content.append(TextContent(text=self.ERROR_MESSAGE_HEADER)) + + # ExecuteBashObservation always has content as a single TextContent + content_text = self.text + + ret = f"{self.metadata.prefix}{content_text}{self.metadata.suffix}" if self.metadata.working_dir: ret += f"\n[Current working directory: {self.metadata.working_dir}]" if self.metadata.py_interpreter_path: ret += f"\n[Python interpreter: {self.metadata.py_interpreter_path}]" if self.metadata.exit_code != -1: ret += f"\n[Command finished with exit code {self.metadata.exit_code}]" - if self.error: - ret = f"[There was an error during command execution.]\n{ret}" - return [TextContent(text=maybe_truncate(ret, MAX_CMD_OUTPUT_SIZE))] + llm_content.append(TextContent(text=maybe_truncate(ret, MAX_CMD_OUTPUT_SIZE))) + + return llm_content @property def visualize(self) -> Text: """Return Rich Text representation with terminal-style output formatting.""" - content = Text() + text = Text() - # Add error indicator if present - if self.error: - content.append("❌ ", style="red bold") - content.append("Command execution error\n", style="red") + if self.is_error: + text.append("❌ ", style="red bold") + text.append(self.ERROR_MESSAGE_HEADER, style="bold red") - # Add command output with proper styling - if self.output: + # ExecuteBashObservation always has content as a single TextContent + content_text = self.text + + if content_text: # Style the output based on content - output_lines = self.output.split("\n") + output_lines = content_text.split("\n") for line in output_lines: if line.strip(): # Color error-like lines differently @@ -141,28 +145,28 @@ def visualize(self) -> Text: keyword in line.lower() for keyword in ["error", "failed", "exception", "traceback"] ): - content.append(line, style="red") + text.append(line, style="red") elif any( keyword in line.lower() for keyword in ["warning", "warn"] ): - content.append(line, style="yellow") + text.append(line, style="yellow") elif line.startswith("+ "): # bash -x output - content.append(line, style="cyan") + text.append(line, style="cyan") else: - content.append(line, style="white") - content.append("\n") + text.append(line, style="white") + text.append("\n") # Add metadata with styling if hasattr(self, "metadata") and self.metadata: if self.metadata.working_dir: - content.append("\n📁 ", style="blue") - content.append( + text.append("\n📁 ", style="blue") + text.append( f"Working directory: {self.metadata.working_dir}", style="blue" ) if self.metadata.py_interpreter_path: - content.append("\n🐍 ", style="green") - content.append( + text.append("\n🐍 ", style="green") + text.append( f"Python interpreter: {self.metadata.py_interpreter_path}", style="green", ) @@ -172,20 +176,16 @@ def visualize(self) -> Text: and self.metadata.exit_code is not None ): if self.metadata.exit_code == 0: - content.append("\n✅ ", style="green") - content.append( - f"Exit code: {self.metadata.exit_code}", style="green" - ) + text.append("\n✅ ", style="green") + text.append(f"Exit code: {self.metadata.exit_code}", style="green") elif self.metadata.exit_code == -1: - content.append("\n⏳ ", style="yellow") - content.append( - "Process still running (soft timeout)", style="yellow" - ) + text.append("\n⏳ ", style="yellow") + text.append("Process still running (soft timeout)", style="yellow") else: - content.append("\n❌ ", style="red") - content.append(f"Exit code: {self.metadata.exit_code}", style="red") + text.append("\n❌ ", style="red") + text.append(f"Exit code: {self.metadata.exit_code}", style="red") - return content + return text TOOL_DESCRIPTION = """Execute a bash command in the terminal within a persistent shell session. diff --git a/openhands-tools/openhands/tools/execute_bash/impl.py b/openhands-tools/openhands/tools/execute_bash/impl.py index 911e544ec5..79c39d45f8 100644 --- a/openhands-tools/openhands/tools/execute_bash/impl.py +++ b/openhands-tools/openhands/tools/execute_bash/impl.py @@ -1,6 +1,7 @@ import json from typing import TYPE_CHECKING, Literal +from openhands.sdk.llm import TextContent from openhands.sdk.logger import get_logger from openhands.sdk.tool import ToolExecutor @@ -111,8 +112,8 @@ def reset(self) -> ExecuteBashObservation: f"Terminal session reset successfully with working_dir: {original_work_dir}" ) - return ExecuteBashObservation( - output=( + return ExecuteBashObservation.from_text( + text=( "Terminal session has been reset. All previous environment " "variables and session state have been cleared." ), @@ -141,11 +142,16 @@ def __call__( ) self._export_envs(command_action, conversation) command_result = self.session.execute(command_action) + + # Extract text from content + reset_text = reset_result.text + command_text = command_result.text + observation = command_result.model_copy( update={ - "output": ( - reset_result.output + "\n\n" + command_result.output - ), + "content": [ + TextContent(text=f"{reset_text}\n\n{command_text}") + ], "command": f"[RESET] {action.command}", } ) @@ -158,15 +164,15 @@ def __call__( observation = self.session.execute(action) # Apply automatic secrets masking - if observation.output and conversation is not None: + content_text = observation.text + + if content_text and conversation is not None: try: secret_registry = conversation.state.secret_registry - masked_output = secret_registry.mask_secrets_in_output( - observation.output - ) - if masked_output: - data = observation.model_dump(exclude={"output"}) - return ExecuteBashObservation(**data, output=masked_output) + masked_content = secret_registry.mask_secrets_in_output(content_text) + if masked_content: + data = observation.model_dump(exclude={"content"}) + return ExecuteBashObservation.from_text(text=masked_content, **data) except Exception: pass diff --git a/openhands-tools/openhands/tools/execute_bash/terminal/terminal_session.py b/openhands-tools/openhands/tools/execute_bash/terminal/terminal_session.py index 0d46638650..35042d35ef 100644 --- a/openhands-tools/openhands/tools/execute_bash/terminal/terminal_session.py +++ b/openhands-tools/openhands/tools/execute_bash/terminal/terminal_session.py @@ -186,9 +186,9 @@ def _handle_completed_command( self.prev_status = TerminalCommandStatus.COMPLETED self.prev_output = "" # Reset previous command output self._ready_for_next_command() - return ExecuteBashObservation( - output=command_output, + return ExecuteBashObservation.from_text( command=command, + text=command_output, metadata=metadata, ) @@ -220,9 +220,9 @@ def _handle_nochange_timeout_command( metadata, continue_prefix="[Below is the output of the previous command.]\n", ) - return ExecuteBashObservation( - output=command_output, + return ExecuteBashObservation.from_text( command=command, + text=command_output, metadata=metadata, ) @@ -255,10 +255,9 @@ def _handle_hard_timeout_command( metadata, continue_prefix="[Below is the output of the previous command.]\n", ) - - return ExecuteBashObservation( - output=command_output, + return ExecuteBashObservation.from_text( command=command, + text=command_output, metadata=metadata, ) @@ -313,27 +312,32 @@ def execute(self, action: ExecuteBashAction) -> ExecuteBashObservation: TerminalCommandStatus.HARD_TIMEOUT, }: if command == "": - return ExecuteBashObservation( - output="ERROR: No previous running command to retrieve logs from.", - error=True, + return ExecuteBashObservation.from_text( + text="No previous running command to retrieve logs from.", + command=command, + is_error=True, ) if is_input: - return ExecuteBashObservation( - output="ERROR: No previous running command to interact with.", - error=True, + return ExecuteBashObservation.from_text( + text="No previous running command to interact with.", + command=command, + is_error=True, ) # Check if the command is a single command or multiple commands splited_commands = split_bash_commands(command) if len(splited_commands) > 1: - return ExecuteBashObservation( - output=( - f"ERROR: Cannot execute multiple commands at once.\n" - f"Please run each command separately OR chain them into a single " - f"command via && or ;\nProvided commands:\n" - f"{'\n'.join(f'({i + 1}) {cmd}' for i, cmd in enumerate(splited_commands))}" # noqa: E501 + commands_list = "\n".join( + f"({i + 1}) {cmd}" for i, cmd in enumerate(splited_commands) + ) + return ExecuteBashObservation.from_text( + text=( + "Cannot execute multiple commands at once.\n" + "Please run each command separately OR chain them into a single " + f"command via && or ;\nProvided commands:\n{commands_list}" ), - error=True, + command=command, + is_error=True, ) # Get initial state before sending command @@ -384,10 +388,11 @@ def execute(self, action: ExecuteBashAction) -> ExecuteBashObservation: metadata, continue_prefix="[Below is the output of the previous command.]\n", ) - obs = ExecuteBashObservation( - output=command_output, + obs = ExecuteBashObservation.from_text( command=command, + text=command_output, metadata=metadata, + is_error=True, ) logger.debug(f"RETURNING OBSERVATION (previous-command): {obs}") return obs diff --git a/openhands-tools/openhands/tools/file_editor/definition.py b/openhands-tools/openhands/tools/file_editor/definition.py index d2041687d3..ba6fdd7656 100644 --- a/openhands-tools/openhands/tools/file_editor/definition.py +++ b/openhands-tools/openhands/tools/file_editor/definition.py @@ -11,7 +11,6 @@ from rich.text import Text -from openhands.sdk.llm import ImageContent, TextContent from openhands.sdk.tool import ( Action, Observation, @@ -69,12 +68,12 @@ class FileEditorObservation(Observation): """A ToolResult that can be rendered as a CLI output.""" command: CommandLiteral = Field( - description="The commands to run. Allowed options are: `view`, `create`, " - "`str_replace`, `insert`, `undo_edit`." - ) - output: str = Field( - default="", description="The output message from the tool for the LLM to see." + description=( + "The command that was run: `view`, `create`, `str_replace`, " + "`insert`, or `undo_edit`." + ) ) + path: str | None = Field(default=None, description="The file path that was edited.") prev_exist: bool = Field( default=True, @@ -86,16 +85,9 @@ class FileEditorObservation(Observation): new_content: str | None = Field( default=None, description="The content of the file after the edit." ) - error: str | None = Field(default=None, description="Error message if any.") _diff_cache: Text | None = PrivateAttr(default=None) - @property - def to_llm_content(self) -> Sequence[TextContent | ImageContent]: - if self.error: - return [TextContent(text=self.error)] - return [TextContent(text=self.output)] - @property def visualize(self) -> Text: """Return Rich Text representation of this observation. @@ -103,6 +95,11 @@ def visualize(self) -> Text: Shows diff visualization for meaningful changes (file creation, successful edits), otherwise falls back to agent observation. """ + text = Text() + + if self.is_error: + text.append("❌ ", style="red bold") + text.append(self.ERROR_MESSAGE_HEADER, style="bold red") if not self._has_meaningful_diff: return super().visualize @@ -110,7 +107,7 @@ def visualize(self) -> Text: assert self.path is not None, "path should be set for meaningful diff" # Generate and cache diff visualization if not self._diff_cache: - change_applied = self.command != "view" and not self.error + change_applied = self.command != "view" and not self.is_error self._diff_cache = visualize_diff( self.path, self.old_content, @@ -119,12 +116,14 @@ def visualize(self) -> Text: change_applied=change_applied, ) - return self._diff_cache + # Combine error prefix with diff visualization + text.append(self._diff_cache) + return text @property def _has_meaningful_diff(self) -> bool: """Check if there's a meaningful diff to display.""" - if self.error: + if self.is_error: return False if not self.path: diff --git a/openhands-tools/openhands/tools/file_editor/editor.py b/openhands-tools/openhands/tools/file_editor/editor.py index 509bba6323..99adb7a409 100644 --- a/openhands-tools/openhands/tools/file_editor/editor.py +++ b/openhands-tools/openhands/tools/file_editor/editor.py @@ -108,12 +108,12 @@ def __call__( raise EditorToolParameterMissingError(command, "file_text") self.write_file(_path, file_text) self._history_manager.add_history(_path, file_text) - return FileEditorObservation( + return FileEditorObservation.from_text( + text=f"File created successfully at: {_path}", command=command, path=str(_path), new_content=file_text, prev_exist=False, - output=f"File created successfully at: {_path}", ) elif command == "str_replace": if old_str is None: @@ -253,9 +253,9 @@ def str_replace( "Review the changes and make sure they are as expected. Edit the " "file again if necessary." ) - return FileEditorObservation( + return FileEditorObservation.from_text( + text=success_message, command="str_replace", - output=success_message, prev_exist=True, path=str(path), old_content=file_content, @@ -293,30 +293,36 @@ def view( rf"-path '{path}/*/\.*' \) | sort", truncate_notice=DIRECTORY_CONTENT_TRUNCATED_NOTICE, ) - if not stderr: - # Add trailing slashes to directories - paths = stdout.strip().split("\n") if stdout.strip() else [] - formatted_paths = [] - for p in paths: - if Path(p).is_dir(): - formatted_paths.append(f"{p}/") - else: - formatted_paths.append(p) - - msg = [ - f"Here's the files and directories up to 2 levels deep in {path}, " - "excluding hidden items:\n" + "\n".join(formatted_paths) - ] - if hidden_count > 0: - msg.append( - f"\n{hidden_count} hidden files/directories in this directory " - f"are excluded. You can use 'ls -la {path}' to see them." - ) - stdout = "\n".join(msg) - return FileEditorObservation( + if stderr: + return FileEditorObservation.from_text( + text=stderr, + command="view", + is_error=True, + path=str(path), + prev_exist=True, + ) + # Add trailing slashes to directories + paths = stdout.strip().split("\n") if stdout.strip() else [] + formatted_paths = [] + for p in paths: + if Path(p).is_dir(): + formatted_paths.append(f"{p}/") + else: + formatted_paths.append(p) + + msg = [ + f"Here's the files and directories up to 2 levels deep in {path}, " + "excluding hidden items:\n" + "\n".join(formatted_paths) + ] + if hidden_count > 0: + msg.append( + f"\n{hidden_count} hidden files/directories in this directory " + f"are excluded. You can use 'ls -la {path}' to see them." + ) + stdout = "\n".join(msg) + return FileEditorObservation.from_text( + text=stdout, command="view", - output=stdout, - error=stderr, path=str(path), prev_exist=True, ) @@ -330,9 +336,9 @@ def view( file_content = self.read_file(path) output = self._make_output(file_content, str(path), start_line) - return FileEditorObservation( + return FileEditorObservation.from_text( + text=output, command="view", - output=output, path=str(path), prev_exist=True, ) @@ -383,10 +389,10 @@ def view( if warning_message: output = f"NOTE: {warning_message}\n{output}" - return FileEditorObservation( + return FileEditorObservation.from_text( + text=output, command="view", path=str(path), - output=output, prev_exist=True, ) @@ -496,9 +502,9 @@ def insert( "Review the changes and make sure they are as expected (correct " "indentation, no duplicate lines, etc). Edit the file again if necessary." ) - return FileEditorObservation( + return FileEditorObservation.from_text( + text=success_message, command="insert", - output=success_message, prev_exist=True, path=str(path), old_content=file_text, @@ -565,12 +571,12 @@ def undo_edit(self, path: Path) -> FileEditorObservation: self.write_file(path, old_text) - return FileEditorObservation( - command="undo_edit", - output=( + return FileEditorObservation.from_text( + text=( f"Last edit to {path} undone successfully. " f"{self._make_output(old_text, str(path))}" ), + command="undo_edit", path=str(path), prev_exist=True, old_content=current_text, diff --git a/openhands-tools/openhands/tools/file_editor/impl.py b/openhands-tools/openhands/tools/file_editor/impl.py index afc1f7906d..a653a6a24a 100644 --- a/openhands-tools/openhands/tools/file_editor/impl.py +++ b/openhands-tools/openhands/tools/file_editor/impl.py @@ -43,12 +43,15 @@ def __call__( if self.allowed_edits_files is not None and action.command != "view": action_path = Path(action.path).resolve() if action_path not in self.allowed_edits_files: - return FileEditorObservation( + return FileEditorObservation.from_text( + text=( + f"Operation '{action.command}' is not allowed " + f"on file '{action_path}'. " + f"Only the following files can be edited: " + f"{sorted(str(p) for p in self.allowed_edits_files)}" + ), command=action.command, - error=f"Operation '{action.command}' is not allowed " - f"on file '{action_path}'. " - f"Only the following files can be edited: " - f"{sorted(str(p) for p in self.allowed_edits_files)}", + is_error=True, ) result: FileEditorObservation | None = None @@ -63,7 +66,9 @@ def __call__( insert_line=action.insert_line, ) except ToolError as e: - result = FileEditorObservation(command=action.command, error=e.message) + result = FileEditorObservation.from_text( + text=e.message, command=action.command, is_error=True + ) assert result is not None, "file_editor should always return a result" return result @@ -95,6 +100,8 @@ def file_editor( insert_line=insert_line, ) except ToolError as e: - result = FileEditorObservation(command=command, error=e.message) + result = FileEditorObservation.from_text( + text=e.message, command=command, is_error=True + ) assert result is not None, "file_editor should always return a result" return result diff --git a/openhands-tools/openhands/tools/glob/definition.py b/openhands-tools/openhands/tools/glob/definition.py index 4e76ca9e20..a0c64b5005 100644 --- a/openhands-tools/openhands/tools/glob/definition.py +++ b/openhands-tools/openhands/tools/glob/definition.py @@ -6,11 +6,6 @@ from pydantic import Field - -if TYPE_CHECKING: - from openhands.sdk.conversation.state import ConversationState - -from openhands.sdk.llm import ImageContent, TextContent from openhands.sdk.tool import ( Action, Observation, @@ -20,6 +15,10 @@ ) +if TYPE_CHECKING: + from openhands.sdk.conversation.state import ConversationState + + class GlobAction(Action): """Schema for glob pattern matching operations.""" @@ -46,32 +45,6 @@ class GlobObservation(Observation): truncated: bool = Field( default=False, description="Whether results were truncated to 100 files" ) - error: str | None = Field(default=None, description="Error message if any") - - @property - def to_llm_content(self) -> Sequence[TextContent | ImageContent]: - """Convert observation to LLM content.""" - if self.error: - return [TextContent(text=f"Error: {self.error}")] - - if not self.files: - content = ( - f"No files found matching pattern '{self.pattern}' " - f"in directory '{self.search_path}'" - ) - else: - file_list = "\n".join(self.files) - content = ( - f"Found {len(self.files)} file(s) matching pattern " - f"'{self.pattern}' in '{self.search_path}':\n{file_list}" - ) - if self.truncated: - content += ( - "\n\n[Results truncated to first 100 files. " - "Consider using a more specific pattern.]" - ) - - return [TextContent(text=content)] TOOL_DESCRIPTION = """Fast file pattern matching tool. diff --git a/openhands-tools/openhands/tools/glob/impl.py b/openhands-tools/openhands/tools/glob/impl.py index 1b566d5d4c..45261cb970 100644 --- a/openhands-tools/openhands/tools/glob/impl.py +++ b/openhands-tools/openhands/tools/glob/impl.py @@ -67,11 +67,12 @@ def __call__( ) if not search_path.is_dir(): - return GlobObservation( + return GlobObservation.from_text( + text=f"Search path '{search_path}' is not a valid directory", files=[], pattern=original_pattern, search_path=str(search_path), - error=f"Search path '{search_path}' is not a valid directory", + is_error=True, ) if self._ripgrep_available: @@ -79,7 +80,26 @@ def __call__( else: files, truncated = self._execute_with_glob(pattern, search_path) - return GlobObservation( + # Format content message + if not files: + content = ( + f"No files found matching pattern '{original_pattern}' " + f"in directory '{search_path}'" + ) + else: + file_list = "\n".join(files) + content = ( + f"Found {len(files)} file(s) matching pattern " + f"'{original_pattern}' in '{search_path}':\n{file_list}" + ) + if truncated: + content += ( + "\n\n[Results truncated to first 100 files. " + "Consider using a more specific pattern.]" + ) + + return GlobObservation.from_text( + text=content, files=files, pattern=original_pattern, search_path=str(search_path), @@ -96,11 +116,12 @@ def __call__( except Exception: error_search_path = "unknown" - return GlobObservation( + return GlobObservation.from_text( + text=str(e), files=[], pattern=action.pattern, search_path=error_search_path, - error=str(e), + is_error=True, ) def _execute_with_ripgrep( diff --git a/openhands-tools/openhands/tools/grep/definition.py b/openhands-tools/openhands/tools/grep/definition.py index 4f378424e9..c603208fe0 100644 --- a/openhands-tools/openhands/tools/grep/definition.py +++ b/openhands-tools/openhands/tools/grep/definition.py @@ -6,11 +6,6 @@ from pydantic import Field - -if TYPE_CHECKING: - from openhands.sdk.conversation.state import ConversationState - -from openhands.sdk.llm import ImageContent, TextContent from openhands.sdk.tool import ( Action, Observation, @@ -20,6 +15,10 @@ ) +if TYPE_CHECKING: + from openhands.sdk.conversation.state import ConversationState + + class GrepAction(Action): """Schema for grep content search operations.""" @@ -52,43 +51,6 @@ class GrepObservation(Observation): truncated: bool = Field( default=False, description="Whether results were truncated to 100 files" ) - error: str | None = Field(default=None, description="Error message if any") - - @property - def to_llm_content(self) -> Sequence[TextContent | ImageContent]: - """Convert observation to LLM content.""" - if self.error: - return [TextContent(text=f"Error: {self.error}")] - - if not self.matches: - include_info = ( - f" (filtered by '{self.include_pattern}')" - if self.include_pattern - else "" - ) - content = ( - f"No files found containing pattern '{self.pattern}' " - f"in directory '{self.search_path}'{include_info}" - ) - else: - include_info = ( - f" (filtered by '{self.include_pattern}')" - if self.include_pattern - else "" - ) - file_list = "\n".join(self.matches) - content = ( - f"Found {len(self.matches)} file(s) containing pattern " - f"'{self.pattern}' in '{self.search_path}'{include_info}:\n" - f"{file_list}" - ) - if self.truncated: - content += ( - "\n\n[Results truncated to first 100 files. " - "Consider using a more specific pattern.]" - ) - - return [TextContent(text=content)] TOOL_DESCRIPTION = """Fast content search tool. diff --git a/openhands-tools/openhands/tools/grep/impl.py b/openhands-tools/openhands/tools/grep/impl.py index ef99434221..e08baf1ee4 100644 --- a/openhands-tools/openhands/tools/grep/impl.py +++ b/openhands-tools/openhands/tools/grep/impl.py @@ -55,12 +55,13 @@ def __call__( if action.path: search_path = Path(action.path).resolve() if not search_path.is_dir(): - return GrepObservation( + return GrepObservation.from_text( + text=f"Search path '{action.path}' is not a valid directory", matches=[], pattern=action.pattern, search_path=str(search_path), include_pattern=action.include, - error=f"Search path '{action.path}' is not a valid directory", + is_error=True, ) else: search_path = self.working_dir @@ -69,12 +70,13 @@ def __call__( try: re.compile(action.pattern) except re.error as e: - return GrepObservation( + return GrepObservation.from_text( + text=f"Invalid regex pattern: {e}", matches=[], pattern=action.pattern, search_path=str(search_path), include_pattern=action.include, - error=f"Invalid regex pattern: {e}", + is_error=True, ) if self._ripgrep_available: @@ -92,14 +94,46 @@ def __call__( except Exception: error_search_path = "unknown" - return GrepObservation( + return GrepObservation.from_text( + text=str(e), matches=[], pattern=action.pattern, search_path=error_search_path, include_pattern=action.include, - error=str(e), + is_error=True, ) + def _format_output( + self, + matches: list[str], + pattern: str, + search_path: str, + include_pattern: str | None, + truncated: bool, + ) -> str: + """Format the grep observation output message.""" + if not matches: + include_info = ( + f" (filtered by '{include_pattern}')" if include_pattern else "" + ) + return ( + f"No files found containing pattern '{pattern}' " + f"in directory '{search_path}'{include_info}" + ) + + include_info = f" (filtered by '{include_pattern}')" if include_pattern else "" + file_list = "\n".join(matches) + output = ( + f"Found {len(matches)} file(s) containing pattern " + f"'{pattern}' in '{search_path}'{include_info}:\n{file_list}" + ) + if truncated: + output += ( + "\n\n[Results truncated to first 100 files. " + "Consider using a more specific pattern.]" + ) + return output + def _execute_with_ripgrep( self, action: GrepAction, search_path: Path ) -> GrepObservation: @@ -135,7 +169,16 @@ def _execute_with_ripgrep( truncated = len(matches) >= 100 - return GrepObservation( + output = self._format_output( + matches=matches, + pattern=action.pattern, + search_path=str(search_path), + include_pattern=action.include, + truncated=truncated, + ) + + return GrepObservation.from_text( + text=output, matches=matches, pattern=action.pattern, search_path=str(search_path), @@ -189,7 +232,16 @@ def _execute_with_grep( truncated = len(matches) >= 100 - return GrepObservation( + output = self._format_output( + matches=matches, + pattern=action.pattern, + search_path=str(search_path), + include_pattern=action.include, + truncated=truncated, + ) + + return GrepObservation.from_text( + text=output, matches=matches, pattern=action.pattern, search_path=str(search_path), diff --git a/openhands-tools/openhands/tools/planning_file_editor/impl.py b/openhands-tools/openhands/tools/planning_file_editor/impl.py index 1b2f45a43c..20ba166e9d 100644 --- a/openhands-tools/openhands/tools/planning_file_editor/impl.py +++ b/openhands-tools/openhands/tools/planning_file_editor/impl.py @@ -60,6 +60,7 @@ def __call__( # Convert FileEditorObservation to PlanningFileEditorObservation return PlanningFileEditorObservation( command=action.command, - output=file_editor_obs.output, - error=file_editor_obs.error, + content=file_editor_obs.content, + is_error=file_editor_obs.is_error, + path=file_editor_obs.path, ) diff --git a/openhands-tools/openhands/tools/task_tracker/definition.py b/openhands-tools/openhands/tools/task_tracker/definition.py index 350f276fa2..3829fd9924 100644 --- a/openhands-tools/openhands/tools/task_tracker/definition.py +++ b/openhands-tools/openhands/tools/task_tracker/definition.py @@ -12,7 +12,6 @@ from rich.text import Text -from openhands.sdk import ImageContent, TextContent from openhands.sdk.logger import get_logger from openhands.sdk.tool import ( Action, @@ -72,22 +71,21 @@ def visualize(self) -> Text: class TaskTrackerObservation(Observation): """This data class represents the result of a task tracking operation.""" - content: str = Field( - default="", description="The formatted task list or status message" + command: Literal["view", "plan"] = Field( + description='The command that was executed: "view" or "plan".' ) - command: str = Field(default="", description="The command that was executed") task_list: list[TaskItem] = Field( default_factory=list, description="The current task list" ) - @property - def to_llm_content(self) -> Sequence[TextContent | ImageContent]: - return [TextContent(text=self.content)] - @property def visualize(self) -> Text: """Return Rich Text representation with task list formatting.""" - content = Text() + text = Text() + + if self.is_error: + text.append("❌ ", style="red bold") + text.append(self.ERROR_MESSAGE_HEADER, style="bold red") if self.task_list: # Count tasks by status @@ -99,11 +97,11 @@ def visualize(self) -> Text: # Show status summary if self.command == "plan": - content.append("✅ ", style="green") - content.append("Task list updated: ", style="green") + text.append("✅ ", style="green") + text.append("Task list updated: ", style="green") else: # view command - content.append("📋 ", style="blue") - content.append("Current task list: ", style="blue") + text.append("📋 ", style="blue") + text.append("Current task list: ", style="blue") # Status counts status_parts = [] @@ -115,33 +113,33 @@ def visualize(self) -> Text: status_parts.append(f"{done_count} done") if status_parts: - content.append(", ".join(status_parts), style="white") - content.append("\n\n") + text.append(", ".join(status_parts), style="white") + text.append("\n\n") # Show the actual task list for i, task in enumerate(self.task_list, 1): # Status icon if task.status == "done": - content.append("✅ ", style="green") + text.append("✅ ", style="green") elif task.status == "in_progress": - content.append("🔄 ", style="yellow") + text.append("🔄 ", style="yellow") else: # todo - content.append("⏳ ", style="blue") + text.append("⏳ ", style="blue") # Task title - content.append(f"{i}. {task.title}", style="white") + text.append(f"{i}. {task.title}", style="white") # NEW: show notes under the title if present if task.notes: - content.append("\n Notes: " + task.notes, style="italic") + text.append("\n Notes: " + task.notes, style="italic") if i < len(self.task_list): - content.append("\n") + text.append("\n") else: - content.append("📝 ", style="blue") - content.append("Task list is empty") + text.append("📝 ", style="blue") + text.append("Task list is empty") - return content + return text class TaskTrackerExecutor(ToolExecutor[TaskTrackerAction, TaskTrackerObservation]): @@ -176,28 +174,34 @@ def __call__( # Save to file if save_dir is provided if self.save_dir: self._save_tasks() - return TaskTrackerObservation( - content="Task list has been updated with " - + f"{len(self._task_list)} item(s).", + return TaskTrackerObservation.from_text( + text=( + f"Task list has been updated with {len(self._task_list)} item(s)." + ), command=action.command, task_list=self._task_list, ) elif action.command == "view": # Return the current task list if not self._task_list: - return TaskTrackerObservation( - content='No task list found. Use the "plan" command to create one.', + return TaskTrackerObservation.from_text( + text=('No task list found. Use the "plan" command to create one.'), command=action.command, task_list=[], ) content = self._format_task_list(self._task_list) - return TaskTrackerObservation( - content=content, command=action.command, task_list=self._task_list + return TaskTrackerObservation.from_text( + text=content, + command=action.command, + task_list=self._task_list, ) else: - return TaskTrackerObservation( - content=f"Unknown command: {action.command}. " - + 'Supported commands are "view" and "plan".', + return TaskTrackerObservation.from_text( + text=( + f"Unknown command: {action.command}. " + 'Supported commands are "view" and "plan".' + ), + is_error=True, command=action.command, task_list=[], ) diff --git a/tests/cross/test_agent_secrets_integration.py b/tests/cross/test_agent_secrets_integration.py index 3d9fdcf3c9..c6c3894b30 100644 --- a/tests/cross/test_agent_secrets_integration.py +++ b/tests/cross/test_agent_secrets_integration.py @@ -234,13 +234,13 @@ def get_value(self): try: action = ExecuteBashAction(command="echo $API_KEY") result = bash_executor(action, conversation=conversation) - assert "test-api-key" not in result.output - assert "" in result.output + assert "test-api-key" not in result.text + assert "" in result.text action = ExecuteBashAction(command="echo $DB_PASSWORD") result = bash_executor(action, conversation=conversation) - assert "dynamic-secret" not in result.output - assert "" in result.output + assert "dynamic-secret" not in result.text + assert "" in result.text finally: bash_executor.close() @@ -265,13 +265,13 @@ def get_value(self): try: action = ExecuteBashAction(command="echo $DB_PASSWORD") result = bash_executor(action, conversation=conversation) - assert "changing-secret" not in result.output - assert "" in result.output + assert "changing-secret" not in result.text + assert "" in result.text action = ExecuteBashAction(command="echo $DB_PASSWORD") result = bash_executor(action, conversation=conversation) - assert "changing-secret" not in result.output - assert "" in result.output + assert "changing-secret" not in result.text + assert "" in result.text finally: bash_executor.close() @@ -303,13 +303,13 @@ def get_value(self): action = ExecuteBashAction(command="echo $DB_PASSWORD") result = bash_executor(action, conversation=conversation) print(result) - assert "changing-secret" not in result.output - assert "" in result.output + assert "changing-secret" not in result.text + assert "" in result.text action = ExecuteBashAction(command="echo $DB_PASSWORD") result = bash_executor(action, conversation=conversation) - assert "changing-secret" not in result.output - assert "" in result.output + assert "changing-secret" not in result.text + assert "" in result.text assert dynamic_secret.raised_on_second finally: diff --git a/tests/cross/test_stuck_detector.py b/tests/cross/test_stuck_detector.py index 8ce47b9c09..f7e75afa79 100644 --- a/tests/cross/test_stuck_detector.py +++ b/tests/cross/test_stuck_detector.py @@ -58,8 +58,10 @@ def test_history_too_short(): observation = ObservationEvent( source="environment", - observation=ExecuteBashObservation( - output="file1.txt\nfile2.txt", command="ls", exit_code=0 + observation=ExecuteBashObservation.from_text( + text="file1.txt\nfile2.txt", + command="ls", + exit_code=0, ), action_id=action.id, tool_name="bash", @@ -107,8 +109,10 @@ def test_repeating_action_observation_not_stuck_less_than_4_repeats(): observation = ObservationEvent( source="environment", - observation=ExecuteBashObservation( - output="file1.txt\nfile2.txt", command="ls", exit_code=0 + observation=ExecuteBashObservation.from_text( + text="file1.txt\nfile2.txt", + command="ls", + exit_code=0, ), action_id=action.id, tool_name="bash", @@ -156,8 +160,10 @@ def test_repeating_action_observation_stuck(): observation = ObservationEvent( source="environment", - observation=ExecuteBashObservation( - output="file1.txt\nfile2.txt", command="ls", exit_code=0 + observation=ExecuteBashObservation.from_text( + text="file1.txt\nfile2.txt", + command="ls", + exit_code=0, ), action_id=action.id, tool_name="bash", @@ -297,8 +303,10 @@ def test_not_stuck_with_different_actions(): observation = ObservationEvent( source="environment", - observation=ExecuteBashObservation( - output=f"output from {cmd}", command=cmd, exit_code=0 + observation=ExecuteBashObservation.from_text( + text=f"output from {cmd}", + command=cmd, + exit_code=0, ), action_id=action.id, tool_name="bash", @@ -346,8 +354,10 @@ def test_reset_after_user_message(): observation = ObservationEvent( source="environment", - observation=ExecuteBashObservation( - output="file1.txt\nfile2.txt", command="ls", exit_code=0 + observation=ExecuteBashObservation.from_text( + text="file1.txt\nfile2.txt", + command="ls", + exit_code=0, ), action_id=action.id, tool_name="bash", @@ -389,8 +399,8 @@ def test_reset_after_user_message(): observation = ObservationEvent( source="environment", - observation=ExecuteBashObservation( - output="/home/user", command="pwd", exit_code=0 + observation=ExecuteBashObservation.from_text( + text="/home/user", command="pwd", exit_code=0 ), action_id=action.id, tool_name="bash", diff --git a/tests/sdk/context/test_view_batch_atomicity.py b/tests/sdk/context/test_view_batch_atomicity.py index ac37464cef..0060b351ce 100644 --- a/tests/sdk/context/test_view_batch_atomicity.py +++ b/tests/sdk/context/test_view_batch_atomicity.py @@ -55,8 +55,8 @@ def create_observation_event( tool_call_id: str, content: str = "Success", tool_name: str = "test_tool" ) -> ObservationEvent: """Helper to create an ObservationEvent.""" - observation = MCPToolObservation( - content=[TextContent(text=content)], + observation = MCPToolObservation.from_text( + text=content, tool_name=tool_name, ) return ObservationEvent( diff --git a/tests/sdk/conversation/local/test_confirmation_mode.py b/tests/sdk/conversation/local/test_confirmation_mode.py index bd9ecbe064..0176aee01d 100644 --- a/tests/sdk/conversation/local/test_confirmation_mode.py +++ b/tests/sdk/conversation/local/test_confirmation_mode.py @@ -552,7 +552,8 @@ def test_single_finish_action_skips_confirmation_entirely(self): e for e in self.conversation.state.events if isinstance(e, ObservationEvent) ] assert len(obs_events) == 1 - assert obs_events[0].observation.message == "Task completed successfully!" # type: ignore[attr-defined] + # FinishObservation should contain the finish message in content + assert obs_events[0].observation.text == "Task completed successfully!" def test_think_and_finish_action_skips_confirmation_entirely(self): """First step: ThinkAction (skips confirmation). Second step: FinishAction.""" @@ -594,13 +595,13 @@ def test_think_and_finish_action_skips_confirmation_entirely(self): ] assert len(obs_events) == 2 - # 1) ThinkAction observation + # 1) ThinkAction observation - should contain the standard message assert hasattr(obs_events[0].observation, "content") - assert obs_events[0].observation.content == "Your thought has been logged." # type: ignore[attr-defined] + assert obs_events[0].observation.text == "Your thought has been logged." - # 2) FinishAction observation - assert hasattr(obs_events[1].observation, "message") - assert obs_events[1].observation.message == "Analysis complete" # type: ignore[attr-defined] + # 2) FinishAction observation - should contain the finish message + assert hasattr(obs_events[1].observation, "content") + assert obs_events[1].observation.text == "Analysis complete" def test_pause_during_confirmation_preserves_waiting_status(self): """Test that pausing during WAITING_FOR_CONFIRMATION preserves the status. diff --git a/tests/sdk/conversation/test_visualizer.py b/tests/sdk/conversation/test_visualizer.py index fcac0c8a33..040c9c00f9 100644 --- a/tests/sdk/conversation/test_visualizer.py +++ b/tests/sdk/conversation/test_visualizer.py @@ -1,7 +1,6 @@ """Tests for the conversation visualizer and event visualization.""" import json -from collections.abc import Sequence from rich.text import Text @@ -19,7 +18,6 @@ UserRejectObservation, ) from openhands.sdk.llm import ( - ImageContent, Message, MessageToolCall, TextContent, @@ -153,14 +151,10 @@ def test_observation_event_visualize(): from openhands.sdk.tool import Observation class VisualizerMockObservation(Observation): - content: str = "Command output" - - @property - def to_llm_content(self) -> Sequence[TextContent | ImageContent]: - return [TextContent(text=self.content)] + pass observation = VisualizerMockObservation( - content="total 4\ndrwxr-xr-x 2 user user 4096 Jan 1 12:00 ." + content=[TextContent(text="total 4\ndrwxr-xr-x 2 user user 4096 Jan 1 12:00 .")] ) event = ObservationEvent( observation=observation, diff --git a/tests/sdk/event/test_event_serialization.py b/tests/sdk/event/test_event_serialization.py index fd3a3d17ee..e922bfe8ca 100644 --- a/tests/sdk/event/test_event_serialization.py +++ b/tests/sdk/event/test_event_serialization.py @@ -1,7 +1,5 @@ """Comprehensive tests for event serialization and deserialization.""" -from collections.abc import Sequence - import pytest from pydantic import ValidationError @@ -16,7 +14,6 @@ SystemPromptEvent, ) from openhands.sdk.llm import ( - ImageContent, Message, MessageToolCall, TextContent, @@ -32,17 +29,15 @@ class EventsSerializationMockAction(Action): """Mock action for testing.""" def execute(self) -> "EventsSerializationMockObservation": - return EventsSerializationMockObservation(content="mock result") + return EventsSerializationMockObservation( + content=[TextContent(text="mock result")] + ) class EventsSerializationMockObservation(Observation): """Mock observation for testing.""" - content: str - - @property - def to_llm_content(self) -> Sequence[TextContent | ImageContent]: - return [TextContent(text=self.content)] + pass def test_event_base_serialization() -> None: @@ -100,7 +95,9 @@ def test_action_event_serialization() -> None: def test_observation_event_serialization() -> None: """Test ObservationEvent serialization/deserialization.""" - observation = EventsSerializationMockObservation(content="test result") + observation = EventsSerializationMockObservation( + content=[TextContent(text="test result")] + ) event = ObservationEvent( observation=observation, action_id="action_123", diff --git a/tests/sdk/mcp/test_mcp_tool.py b/tests/sdk/mcp/test_mcp_tool.py index 3ca4f9c8bf..74c075e7a1 100644 --- a/tests/sdk/mcp/test_mcp_tool.py +++ b/tests/sdk/mcp/test_mcp_tool.py @@ -5,7 +5,7 @@ import mcp.types -from openhands.sdk.llm import TextContent +from openhands.sdk.llm import ImageContent, TextContent from openhands.sdk.mcp.client import MCPClient from openhands.sdk.mcp.definition import MCPToolObservation from openhands.sdk.mcp.tool import MCPToolDefinition, MCPToolExecutor @@ -37,9 +37,12 @@ def test_from_call_tool_result_success(self): ) assert observation.tool_name == "test_tool" - assert len(observation.content) == 1 + assert observation.content is not None + assert len(observation.content) == 2 assert isinstance(observation.content[0], TextContent) - assert observation.content[0].text == "Operation completed successfully" + assert observation.content[0].text == "[Tool 'test_tool' executed.]" + assert isinstance(observation.content[1], TextContent) + assert observation.content[1].text == "Operation completed successfully" assert observation.is_error is False def test_from_call_tool_result_error(self): @@ -54,10 +57,12 @@ def test_from_call_tool_result_error(self): ) assert observation.tool_name == "test_tool" - assert len(observation.content) == 1 - assert isinstance(observation.content[0], TextContent) - assert observation.content[0].text == "Operation failed" assert observation.is_error is True + assert len(observation.content) == 2 + assert isinstance(observation.content[0], TextContent) + assert observation.content[0].text == "[Tool 'test_tool' executed.]" + assert isinstance(observation.content[1], TextContent) + assert observation.content[1].text == "Operation failed" def test_from_call_tool_result_with_image(self): """Test creating observation from MCP result with image content.""" @@ -76,44 +81,53 @@ def test_from_call_tool_result_with_image(self): ) assert observation.tool_name == "test_tool" - assert len(observation.content) == 2 + assert observation.content is not None + assert len(observation.content) == 3 + # First item is header assert isinstance(observation.content[0], TextContent) - assert observation.content[0].text == "Here's the image:" - # Second content should be ImageContent - assert hasattr(observation.content[1], "image_urls") + assert observation.content[0].text == "[Tool 'test_tool' executed.]" + # Second item is text + assert isinstance(observation.content[1], TextContent) + assert observation.content[1].text == "Here's the image:" + # Third item is image + assert isinstance(observation.content[2], ImageContent) + assert hasattr(observation.content[2], "image_urls") assert observation.is_error is False def test_to_llm_content_success(self): """Test agent observation formatting for success.""" - observation = MCPToolObservation( + observation = MCPToolObservation.from_text( + text="[Tool 'test_tool' executed.]\nSuccess result", tool_name="test_tool", - content=[TextContent(text="Success result")], - is_error=False, ) agent_obs = observation.to_llm_content - assert len(agent_obs) == 2 + assert len(agent_obs) == 1 assert isinstance(agent_obs[0], TextContent) assert "[Tool 'test_tool' executed.]" in agent_obs[0].text - assert "[An error occurred during execution.]" not in agent_obs[0].text - assert isinstance(agent_obs[1], TextContent) - assert agent_obs[1].text == "Success result" + assert "Success result" in agent_obs[0].text + assert MCPToolObservation.ERROR_MESSAGE_HEADER not in agent_obs[0].text def test_to_llm_content_error(self): """Test agent observation formatting for error.""" - observation = MCPToolObservation( + observation = MCPToolObservation.from_text( + text=( + "[Tool 'test_tool' executed.]\n" + "[An error occurred during execution.]\n" + "Error occurred" + ), tool_name="test_tool", - content=[TextContent(text="Error occurred")], is_error=True, ) agent_obs = observation.to_llm_content assert len(agent_obs) == 2 assert isinstance(agent_obs[0], TextContent) + assert agent_obs[0].text == MCPToolObservation.ERROR_MESSAGE_HEADER assert isinstance(agent_obs[1], TextContent) - assert "[Tool 'test_tool' executed.]" in agent_obs[0].text - assert "[An error occurred during execution.]" in agent_obs[0].text - assert agent_obs[1].text == "Error occurred" + assert "[Tool 'test_tool' executed.]" in agent_obs[1].text + assert "[An error occurred during execution.]" in agent_obs[1].text + assert "Error occurred" in agent_obs[1].text class TestMCPToolExecutor: @@ -188,14 +202,10 @@ def test_call_tool_exception(self): # Mock call_async_from_sync to return an error observation def mock_call_async_from_sync(coro_func, **kwargs): - return MCPToolObservation( - content=[ - TextContent( - text="Error calling MCP tool test_tool: Connection failed" - ) - ], - is_error=True, + return MCPToolObservation.from_text( + text="Error calling MCP tool test_tool: Connection failed", tool_name="test_tool", + is_error=True, ) self.mock_client.call_async_from_sync = mock_call_async_from_sync @@ -203,10 +213,10 @@ def mock_call_async_from_sync(coro_func, **kwargs): observation = self.executor(mock_action) assert isinstance(observation, MCPToolObservation) - assert isinstance(observation.content[0], TextContent) assert observation.tool_name == "test_tool" assert observation.is_error is True - assert "Connection failed" in observation.content[0].text + assert observation.is_error is True + assert "Connection failed" in observation.text class TestMCPTool: diff --git a/tests/sdk/mcp/test_mcp_tool_kind_field.py b/tests/sdk/mcp/test_mcp_tool_kind_field.py index a62444615f..af67777c57 100644 --- a/tests/sdk/mcp/test_mcp_tool_kind_field.py +++ b/tests/sdk/mcp/test_mcp_tool_kind_field.py @@ -85,11 +85,17 @@ def test_real_mcp_tool_execution_without_kind_field(fetch_tool): observation = fetch_tool(action) # Verify we got a valid response (not an error about 'kind') + # Check output if no error, otherwise check error message + from openhands.sdk.llm import TextContent + assert observation.content is not None - assert len(observation.content) > 0 + # Extract text from content blocks (content is always a list now) + text_parts = [ + block.text for block in observation.content if isinstance(block, TextContent) + ] + content_str = " ".join(text_parts) # Check that the response doesn't contain validation error about 'kind' - content_str = str(observation.content) if "error" in content_str.lower(): # If there's an error, make sure it's not about 'kind' field assert "kind" not in content_str.lower(), ( diff --git a/tests/sdk/tool/test_registry.py b/tests/sdk/tool/test_registry.py index 49213bd87a..40d219913e 100644 --- a/tests/sdk/tool/test_registry.py +++ b/tests/sdk/tool/test_registry.py @@ -26,7 +26,7 @@ class _HelloAction(Action): class _HelloObservation(Observation): - message: str + message: str = "" @property def to_llm_content(self) -> Sequence[TextContent | ImageContent]: diff --git a/tests/tools/browser_use/conftest.py b/tests/tools/browser_use/conftest.py index 0cce6b37dd..d30f51faf6 100644 --- a/tests/tools/browser_use/conftest.py +++ b/tests/tools/browser_use/conftest.py @@ -4,6 +4,7 @@ import pytest +from openhands.sdk.tool.schema import TextContent from openhands.tools.browser_use.definition import BrowserObservation from openhands.tools.browser_use.impl import BrowserToolExecutor @@ -30,9 +31,11 @@ def create_mock_browser_response( screenshot_data: str | None = None, ): """Helper to create mock browser responses.""" - return BrowserObservation( - output=output, error=error, screenshot_data=screenshot_data - ) + if error: + return BrowserObservation.from_text( + text=error, is_error=True, screenshot_data=screenshot_data + ) + return BrowserObservation.from_text(text=output, screenshot_data=screenshot_data) def assert_browser_observation_success( @@ -40,9 +43,15 @@ def assert_browser_observation_success( ): """Assert that a browser observation indicates success.""" assert isinstance(observation, BrowserObservation) - assert observation.error is None + assert observation.is_error is False if expected_output: - assert expected_output in observation.output + if isinstance(observation.content, str): + output_text = observation.content + else: + output_text = "".join( + [c.text for c in observation.content if isinstance(c, TextContent)] + ) + assert expected_output in output_text def assert_browser_observation_error( @@ -50,6 +59,6 @@ def assert_browser_observation_error( ): """Assert that a browser observation contains an error.""" assert isinstance(observation, BrowserObservation) - assert observation.error is not None + assert observation.is_error is True if expected_error: - assert expected_error in observation.error + assert expected_error in observation.text diff --git a/tests/tools/browser_use/test_browser_executor.py b/tests/tools/browser_use/test_browser_executor.py index d200f1e727..25377b26da 100644 --- a/tests/tools/browser_use/test_browser_executor.py +++ b/tests/tools/browser_use/test_browser_executor.py @@ -72,8 +72,8 @@ async def test_browser_executor_action_routing_get_state( mock_get_state, mock_browser_executor ): """Test that get_state actions are routed correctly and return directly.""" - expected_observation = BrowserObservation( - output="State retrieved", screenshot_data="base64data" + expected_observation = BrowserObservation.from_text( + text="State retrieved", screenshot_data="base64data" ) mock_get_state.return_value = expected_observation @@ -105,7 +105,7 @@ async def test_browser_executor_error_wrapping(mock_navigate, mock_browser_execu result = await mock_browser_executor._execute_action(action) assert_browser_observation_error(result, "Browser operation failed") - assert "Browser error occurred" in result.error + assert "Browser error occurred" in result.text def test_browser_executor_async_execution(mock_browser_executor): @@ -113,7 +113,7 @@ def test_browser_executor_async_execution(mock_browser_executor): with patch.object( mock_browser_executor, "_execute_action", new_callable=AsyncMock ) as mock_execute: - expected_result = BrowserObservation(output="Test result") + expected_result = BrowserObservation.from_text(text="Test result") mock_execute.return_value = expected_result action = BrowserNavigateAction(url="https://example.com") diff --git a/tests/tools/browser_use/test_browser_executor_e2e.py b/tests/tools/browser_use/test_browser_executor_e2e.py index 1c2cd9e4dd..551b5d4bef 100644 --- a/tests/tools/browser_use/test_browser_executor_e2e.py +++ b/tests/tools/browser_use/test_browser_executor_e2e.py @@ -171,11 +171,9 @@ def test_navigate_action( result = browser_executor(action) assert isinstance(result, BrowserObservation) - assert result.error is None - assert ( - "successfully" in result.output.lower() - or "navigated" in result.output.lower() - ) + assert not result.is_error + output_text = result.text.lower() + assert "successfully" in output_text or "navigated" in output_text def test_get_state_action( self, browser_executor: BrowserToolExecutor, test_server: str @@ -190,8 +188,8 @@ def test_get_state_action( result = browser_executor(action) assert isinstance(result, BrowserObservation) - assert result.error is None - assert "Browser Test Page" in result.output + assert not result.is_error + assert "Browser Test Page" in result.text def test_get_state_with_screenshot( self, browser_executor: BrowserToolExecutor, test_server: str @@ -206,7 +204,7 @@ def test_get_state_with_screenshot( result = browser_executor(action) assert isinstance(result, BrowserObservation) - assert result.error is None + assert not result.is_error assert result.screenshot_data is not None assert len(result.screenshot_data) > 0 @@ -224,14 +222,14 @@ def test_click_action( # Parse the state to find button index # The test button should be indexed in the interactive elements - assert "Click Me" in state_result.output + assert "Click Me" in state_result.text # Try to click the first interactive element (likely the button) click_action = BrowserClickAction(index=0) result = browser_executor(click_action) assert isinstance(result, BrowserObservation) - assert result.error is None + assert not result.is_error def test_type_action(self, browser_executor: BrowserToolExecutor, test_server: str): """Test typing text into an input field.""" @@ -244,7 +242,8 @@ def test_type_action(self, browser_executor: BrowserToolExecutor, test_server: s state_result = browser_executor(get_state_action) # Look for input field in the state - assert "test-input" in state_result.output or "Type here" in state_result.output + state_output = state_result.text + assert "test-input" in state_output or "Type here" in state_output # Find the input field index and type into it # This assumes the input field is one of the interactive elements @@ -252,7 +251,7 @@ def test_type_action(self, browser_executor: BrowserToolExecutor, test_server: s result = browser_executor(type_action) assert isinstance(result, BrowserObservation) - assert result.error is None + assert not result.is_error def test_scroll_action( self, browser_executor: BrowserToolExecutor, test_server: str @@ -267,14 +266,14 @@ def test_scroll_action( result = browser_executor(scroll_action) assert isinstance(result, BrowserObservation) - assert result.error is None + assert not result.is_error # Scroll back up scroll_up_action = BrowserScrollAction(direction="up") result = browser_executor(scroll_up_action) assert isinstance(result, BrowserObservation) - assert result.error is None + assert not result.is_error def test_get_content_action( self, browser_executor: BrowserToolExecutor, test_server: str @@ -289,8 +288,8 @@ def test_get_content_action( result = browser_executor(content_action) assert isinstance(result, BrowserObservation) - assert result.error is None - assert "Browser Test Page" in result.output + assert not result.is_error + assert "Browser Test Page" in result.text # Get content with links content_with_links_action = BrowserGetContentAction( @@ -299,8 +298,8 @@ def test_get_content_action( result = browser_executor(content_with_links_action) assert isinstance(result, BrowserObservation) - assert result.error is None - assert "Browser Test Page" in result.output + assert not result.is_error + assert "Browser Test Page" in result.text def test_navigate_new_tab( self, browser_executor: BrowserToolExecutor, test_server: str @@ -311,7 +310,7 @@ def test_navigate_new_tab( result = browser_executor(action) assert isinstance(result, BrowserObservation) - assert result.error is None + assert not result.is_error def test_list_tabs_action( self, browser_executor: BrowserToolExecutor, test_server: str @@ -326,9 +325,9 @@ def test_list_tabs_action( result = browser_executor(list_tabs_action) assert isinstance(result, BrowserObservation) - assert result.error is None + assert not result.is_error # Should contain tab information - assert len(result.output) > 0 + assert len(result.text) > 0 def test_go_back_action( self, browser_executor: BrowserToolExecutor, test_server: str @@ -348,7 +347,7 @@ def test_go_back_action( result = browser_executor(back_action) assert isinstance(result, BrowserObservation) - assert result.error is None + assert not result.is_error def test_switch_tab_action( self, browser_executor: BrowserToolExecutor, test_server: str @@ -370,7 +369,7 @@ def test_switch_tab_action( # Parse tab information to get a tab ID # This is a simplified approach - in practice you'd parse the JSON response - if "tab" in tabs_result.output.lower(): + if "tab" in tabs_result.text.lower(): # Try to switch to first tab (assuming tab ID format) switch_action = BrowserSwitchTabAction(tab_id="0") result = browser_executor(switch_action) @@ -428,24 +427,24 @@ def test_concurrent_actions( """Test that multiple actions can be executed in sequence.""" # Navigate navigate_result = browser_executor(BrowserNavigateAction(url=test_server)) - assert navigate_result.error is None + assert not navigate_result.is_error # Get state state_result = browser_executor(BrowserGetStateAction(include_screenshot=False)) - assert state_result.error is None + assert not state_result.is_error # Scroll scroll_result = browser_executor(BrowserScrollAction(direction="down")) - assert scroll_result.error is None + assert not scroll_result.is_error # Get content content_result = browser_executor( BrowserGetContentAction(extract_links=False, start_from_char=0) ) - assert content_result.error is None + assert not content_result.is_error # All actions should complete successfully assert all( - result.error is None + not result.is_error for result in [navigate_result, state_result, scroll_result, content_result] ) diff --git a/tests/tools/browser_use/test_browser_observation.py b/tests/tools/browser_use/test_browser_observation.py index b8ae22e000..09992781ee 100644 --- a/tests/tools/browser_use/test_browser_observation.py +++ b/tests/tools/browser_use/test_browser_observation.py @@ -6,37 +6,37 @@ def test_browser_observation_basic_output(): """Test basic BrowserObservation creation with output.""" - observation = BrowserObservation(output="Test output") + observation = BrowserObservation.from_text(text="Test output") - assert observation.output == "Test output" - assert observation.error is None + assert observation.text == "Test output" + assert observation.is_error is False assert observation.screenshot_data is None def test_browser_observation_with_error(): """Test BrowserObservation with error.""" - observation = BrowserObservation(output="", error="Test error") + observation = BrowserObservation.from_text(text="Test error", is_error=True) - assert observation.output == "" - assert observation.error == "Test error" + assert observation.text == "Test error" + assert observation.is_error is True assert observation.screenshot_data is None def test_browser_observation_with_screenshot(): """Test BrowserObservation with screenshot data.""" screenshot_data = "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNkYPhfDwAChAI9jU77zgAAAABJRU5ErkJggg==" # noqa: E501 - observation = BrowserObservation( - output="Screenshot taken", screenshot_data=screenshot_data + observation = BrowserObservation.from_text( + text="Screenshot taken", screenshot_data=screenshot_data ) - assert observation.output == "Screenshot taken" - assert observation.error is None + assert observation.text == "Screenshot taken" + assert observation.is_error is False assert observation.screenshot_data == screenshot_data def test_browser_observation_to_llm_content_text_only(): """Test to_llm_content property with text only.""" - observation = BrowserObservation(output="Test output") + observation = BrowserObservation.from_text(text="Test output") agent_obs = observation.to_llm_content assert len(agent_obs) == 1 @@ -47,8 +47,8 @@ def test_browser_observation_to_llm_content_text_only(): def test_browser_observation_to_llm_content_with_screenshot(): """Test to_llm_content property with screenshot.""" screenshot_data = "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNkYPhfDwAChAI9jU77zgAAAABJRU5ErkJggg==" # noqa: E501 - observation = BrowserObservation( - output="Screenshot taken", screenshot_data=screenshot_data + observation = BrowserObservation.from_text( + text="Screenshot taken", screenshot_data=screenshot_data ) agent_obs = observation.to_llm_content @@ -63,19 +63,21 @@ def test_browser_observation_to_llm_content_with_screenshot(): def test_browser_observation_to_llm_content_with_error(): """Test to_llm_content property with error.""" - observation = BrowserObservation(output="", error="Test error") + observation = BrowserObservation.from_text(text="Test error", is_error=True) agent_obs = observation.to_llm_content - assert len(agent_obs) == 1 + assert len(agent_obs) == 2 assert isinstance(agent_obs[0], TextContent) - assert agent_obs[0].text == "Error: Test error" + assert agent_obs[0].text == BrowserObservation.ERROR_MESSAGE_HEADER + assert isinstance(agent_obs[1], TextContent) + assert "Test error" in agent_obs[1].text def test_browser_observation_output_truncation(): """Test output truncation for very long outputs.""" # Create a very long output string long_output = "x" * 100000 # 100k characters - observation = BrowserObservation(output=long_output) + observation = BrowserObservation.from_text(text=long_output) agent_obs = observation.to_llm_content @@ -89,7 +91,9 @@ def test_browser_observation_output_truncation(): def test_browser_observation_screenshot_data_url_conversion(): """Test that screenshot data is properly converted to data URL.""" screenshot_data = "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNkYPhfDwAChAI9jU77zgAAAABJRU5ErkJggg==" # noqa: E501 - observation = BrowserObservation(output="Test", screenshot_data=screenshot_data) + observation = BrowserObservation.from_text( + text="Test", screenshot_data=screenshot_data + ) agent_obs = observation.to_llm_content expected_data_url = f"data:image/png;base64,{screenshot_data}" @@ -101,10 +105,10 @@ def test_browser_observation_screenshot_data_url_conversion(): def test_browser_observation_empty_screenshot_handling(): """Test handling of empty or None screenshot data.""" - observation = BrowserObservation(output="Test", screenshot_data="") + observation = BrowserObservation.from_text(text="Test", screenshot_data="") agent_obs = observation.to_llm_content assert len(agent_obs) == 1 # Only text content, no image - observation = BrowserObservation(output="Test", screenshot_data=None) + observation = BrowserObservation.from_text(text="Test", screenshot_data=None) agent_obs = observation.to_llm_content assert len(agent_obs) == 1 # Only text content, no image diff --git a/tests/tools/delegation/test_delegation.py b/tests/tools/delegation/test_delegation.py index ee1a26a465..37d808a6b9 100644 --- a/tests/tools/delegation/test_delegation.py +++ b/tests/tools/delegation/test_delegation.py @@ -6,7 +6,7 @@ from pydantic import SecretStr from openhands.sdk.conversation.state import ConversationExecutionStatus -from openhands.sdk.llm import LLM +from openhands.sdk.llm import LLM, TextContent from openhands.tools.delegate import ( DelegateAction, DelegateExecutor, @@ -65,24 +65,36 @@ def test_delegate_action_creation(): def test_delegate_observation_creation(): """Test creating DelegateObservation instances.""" - # Test spawn observation - spawn_observation = DelegateObservation( + # Test spawn observation with string output + spawn_observation = DelegateObservation.from_text( + text="spawn: Sub-agents created successfully", command="spawn", - message="Sub-agents created successfully", ) - assert spawn_observation.command == "spawn" - assert spawn_observation.message == "Sub-agents created successfully" - # spawn observation doesn't have results field anymore - - # Test delegate observation - delegate_observation = DelegateObservation( + assert isinstance(spawn_observation.content, list) + assert spawn_observation.text == "spawn: Sub-agents created successfully" + # Verify to_llm_content returns TextContent + llm_content = spawn_observation.to_llm_content + assert len(llm_content) == 1 + assert isinstance(llm_content[0], TextContent) + assert llm_content[0].text == "spawn: Sub-agents created successfully" + + # Test delegate observation with string output + delegate_observation = DelegateObservation.from_text( + text=( + "delegate: Tasks completed successfully\n\nResults:\n" + "1. Result 1\n2. Result 2" + ), command="delegate", - message="Tasks completed successfully\n\nResults:\n1. Result 1\n2. Result 2", ) - assert delegate_observation.command == "delegate" - assert "Tasks completed successfully" in delegate_observation.message - assert "Result 1" in delegate_observation.message - assert "Result 2" in delegate_observation.message + assert isinstance(delegate_observation.content, list) + assert "Tasks completed successfully" in delegate_observation.text + assert "Result 1" in delegate_observation.text + assert "Result 2" in delegate_observation.text + # Verify to_llm_content + llm_content = delegate_observation.to_llm_content + assert len(llm_content) == 1 + assert isinstance(llm_content[0], TextContent) + assert "Tasks completed successfully" in llm_content[0].text def test_delegate_executor_delegate(): @@ -92,7 +104,8 @@ def test_delegate_executor_delegate(): # First spawn some agents spawn_action = DelegateAction(command="spawn", ids=["agent1", "agent2"]) spawn_observation = executor(spawn_action, parent_conversation) - assert "Successfully spawned" in spawn_observation.message + assert isinstance(spawn_observation.content, list) + assert "Successfully spawned" in spawn_observation.text # Then delegate tasks to them delegate_action = DelegateAction( @@ -101,22 +114,23 @@ def test_delegate_executor_delegate(): ) with patch.object(executor, "_delegate_tasks") as mock_delegate: - mock_observation = DelegateObservation( - command="delegate", - message=( - "Tasks completed successfully\n\nResults:\n" + mock_observation = DelegateObservation.from_text( + text=( + "delegate: Tasks completed successfully\n\nResults:\n" "1. Agent agent1: Code analysis complete\n" "2. Agent agent2: Tests written" ), + command="delegate", ) mock_delegate.return_value = mock_observation observation = executor(delegate_action, parent_conversation) assert isinstance(observation, DelegateObservation) - assert observation.command == "delegate" - assert "Agent agent1: Code analysis complete" in observation.message - assert "Agent agent2: Tests written" in observation.message + assert isinstance(observation.content, list) + text_content = observation.text + assert "Agent agent1: Code analysis complete" in text_content + assert "Agent agent2: Tests written" in text_content def test_delegate_executor_missing_task(): @@ -129,10 +143,13 @@ def test_delegate_executor_missing_task(): observation = executor(action, parent_conversation) assert isinstance(observation, DelegateObservation) - assert observation.command == "delegate" + # Error message should be in the error field + assert observation.is_error + assert observation.is_error is True + content_text = observation.text assert ( - "task is required" in observation.message.lower() - or "at least one task" in observation.message.lower() + "task is required" in content_text.lower() + or "at least one task" in content_text.lower() ) diff --git a/tests/tools/execute_bash/test_bash_ps1_metadata.py b/tests/tools/execute_bash/test_bash_ps1_metadata.py index df5cdc6b74..9dd94b54e3 100644 --- a/tests/tools/execute_bash/test_bash_ps1_metadata.py +++ b/tests/tools/execute_bash/test_bash_ps1_metadata.py @@ -1,6 +1,5 @@ import json -from openhands.sdk import TextContent from openhands.tools.execute_bash.constants import ( CMD_OUTPUT_METADATA_PS1_REGEX, CMD_OUTPUT_PS1_BEGIN, @@ -270,14 +269,19 @@ def test_ps1_metadata_regex_pattern(): def test_cmd_output_observation_properties(): """Test ExecuteBashObservation class properties""" + from openhands.sdk.tool.schema import TextContent + # Test with successful command metadata = CmdOutputMetadata(exit_code=0, pid=123) - obs = ExecuteBashObservation( - command="ls", output="file1\nfile2", exit_code=0, metadata=metadata + obs = ExecuteBashObservation.from_text( + text="file1\nfile2", + command="ls", + exit_code=0, + metadata=metadata, ) assert obs.command_id == 123 assert obs.exit_code == 0 - assert not obs.error + assert not obs.is_error assert len(obs.to_llm_content) == 1 assert isinstance(obs.to_llm_content[0], TextContent) assert "exit code 0" in obs.to_llm_content[0].text @@ -287,16 +291,21 @@ def test_cmd_output_observation_properties(): # Test with failed command metadata = CmdOutputMetadata(exit_code=1, pid=456) - obs = ExecuteBashObservation( - command="invalid", output="error", exit_code=1, error=True, metadata=metadata + obs = ExecuteBashObservation.from_text( + text="Command failed", + command="invalid", + exit_code=1, + is_error=True, + metadata=metadata, ) assert obs.command_id == 456 assert obs.exit_code == 1 - assert obs.error - assert len(obs.to_llm_content) == 1 + assert obs.is_error + assert len(obs.to_llm_content) == 2 assert isinstance(obs.to_llm_content[0], TextContent) - assert "exit code 1" in obs.to_llm_content[0].text - assert obs.error + assert obs.to_llm_content[0].text == ExecuteBashObservation.ERROR_MESSAGE_HEADER + assert isinstance(obs.to_llm_content[1], TextContent) + assert "Command failed" in obs.to_llm_content[1].text def test_ps1_metadata_empty_fields(): diff --git a/tests/tools/execute_bash/test_bash_reset.py b/tests/tools/execute_bash/test_bash_reset.py index e19e7d2286..8335bd0556 100644 --- a/tests/tools/execute_bash/test_bash_reset.py +++ b/tests/tools/execute_bash/test_bash_reset.py @@ -43,13 +43,13 @@ def test_bash_reset_basic(): action = ExecuteBashAction(command="echo $TEST_VAR") result = tool(action) assert isinstance(result, ExecuteBashObservation) - assert "hello" in result.output + assert "hello" in result.text # Reset the terminal reset_action = ExecuteBashAction(command="", reset=True) reset_result = tool(reset_action) assert isinstance(reset_result, ExecuteBashObservation) - assert "Terminal session has been reset" in reset_result.output + assert "Terminal session has been reset" in reset_result.text assert reset_result.command == "[RESET]" # Verify the variable is no longer set after reset @@ -57,7 +57,7 @@ def test_bash_reset_basic(): result = tool(action) assert isinstance(result, ExecuteBashObservation) # The variable should be empty after reset - assert result.output.strip() == "" + assert result.text.strip() == "" def test_bash_reset_with_command(): @@ -78,15 +78,15 @@ def test_bash_reset_with_command(): ) reset_result = tool(reset_action) assert isinstance(reset_result, ExecuteBashObservation) - assert "Terminal session has been reset" in reset_result.output - assert "hello from fresh terminal" in reset_result.output + assert "Terminal session has been reset" in reset_result.text + assert "hello from fresh terminal" in reset_result.text assert reset_result.command == "[RESET] echo 'hello from fresh terminal'" # Verify the variable is no longer set (confirming reset worked) action = ExecuteBashAction(command="echo $TEST_VAR") result = tool(action) assert isinstance(result, ExecuteBashObservation) - assert result.output.strip() == "" + assert result.text.strip() == "" def test_bash_reset_working_directory(): @@ -99,7 +99,7 @@ def test_bash_reset_working_directory(): action = ExecuteBashAction(command="pwd") result = tool(action) assert isinstance(result, ExecuteBashObservation) - assert temp_dir in result.output + assert temp_dir in result.text # Change directory action = ExecuteBashAction(command="cd /home") @@ -110,19 +110,19 @@ def test_bash_reset_working_directory(): action = ExecuteBashAction(command="pwd") result = tool(action) assert isinstance(result, ExecuteBashObservation) - assert "/home" in result.output + assert "/home" in result.text # Reset the terminal reset_action = ExecuteBashAction(command="", reset=True) reset_result = tool(reset_action) assert isinstance(reset_result, ExecuteBashObservation) - assert "Terminal session has been reset" in reset_result.output + assert "Terminal session has been reset" in reset_result.text # Verify working directory is back to original action = ExecuteBashAction(command="pwd") result = tool(action) assert isinstance(result, ExecuteBashObservation) - assert temp_dir in result.output + assert temp_dir in result.text def test_bash_reset_multiple_times(): @@ -135,25 +135,25 @@ def test_bash_reset_multiple_times(): reset_action = ExecuteBashAction(command="", reset=True) reset_result = tool(reset_action) assert isinstance(reset_result, ExecuteBashObservation) - assert "Terminal session has been reset" in reset_result.output + assert "Terminal session has been reset" in reset_result.text # Execute a command after first reset action = ExecuteBashAction(command="echo 'after first reset'") result = tool(action) assert isinstance(result, ExecuteBashObservation) - assert "after first reset" in result.output + assert "after first reset" in result.text # Second reset reset_action = ExecuteBashAction(command="", reset=True) reset_result = tool(reset_action) assert isinstance(reset_result, ExecuteBashObservation) - assert "Terminal session has been reset" in reset_result.output + assert "Terminal session has been reset" in reset_result.text # Execute a command after second reset action = ExecuteBashAction(command="echo 'after second reset'") result = tool(action) assert isinstance(result, ExecuteBashObservation) - assert "after second reset" in result.output + assert "after second reset" in result.text def test_bash_reset_with_timeout(): @@ -166,7 +166,7 @@ def test_bash_reset_with_timeout(): reset_action = ExecuteBashAction(command="", reset=True, timeout=5.0) reset_result = tool(reset_action) assert isinstance(reset_result, ExecuteBashObservation) - assert "Terminal session has been reset" in reset_result.output + assert "Terminal session has been reset" in reset_result.text assert reset_result.command == "[RESET]" @@ -196,5 +196,5 @@ def test_bash_reset_only_with_empty_command(): reset_action = ExecuteBashAction(command="", reset=True) reset_result = tool(reset_action) assert isinstance(reset_result, ExecuteBashObservation) - assert "Terminal session has been reset" in reset_result.output + assert "Terminal session has been reset" in reset_result.text assert reset_result.command == "[RESET]" diff --git a/tests/tools/execute_bash/test_bash_session.py b/tests/tools/execute_bash/test_bash_session.py index e9afd067af..ee827c9a34 100644 --- a/tests/tools/execute_bash/test_bash_session.py +++ b/tests/tools/execute_bash/test_bash_session.py @@ -17,7 +17,10 @@ from openhands.sdk import TextContent from openhands.sdk.logger import get_logger -from openhands.tools.execute_bash.definition import ExecuteBashAction +from openhands.tools.execute_bash.definition import ( + ExecuteBashAction, + ExecuteBashObservation, +) from openhands.tools.execute_bash.terminal import ( TerminalCommandStatus, create_terminal_session, @@ -43,7 +46,7 @@ def test_session_initialization(terminal_type): session.initialize() obs = session.execute(ExecuteBashAction(command="pwd")) - assert temp_dir in obs.output + assert temp_dir in obs.text assert "[The command completed with exit code 0.]" in obs.metadata.suffix session.close() @@ -66,7 +69,7 @@ def test_cwd_property(tmp_path, terminal_type): # For other implementations, just verify the command executed successfully obs = session.execute(ExecuteBashAction(command="pwd")) - assert str(random_dir) in obs.output + assert str(random_dir) in obs.text # Note: CWD tracking may vary between terminal implementations # For tmux, it should track properly. For subprocess, it may not. @@ -84,7 +87,7 @@ def test_basic_command(terminal_type): # Test simple command obs = session.execute(ExecuteBashAction(command="echo 'hello world'")) - assert "hello world" in obs.output + assert "hello world" in obs.text assert obs.metadata.suffix == "\n[The command completed with exit code 0.]" # Note: prefix may vary between terminal implementations assert obs.metadata.exit_code == 0 @@ -95,16 +98,16 @@ def test_basic_command(terminal_type): # Note: Exit code handling may vary between terminal implementations # The important thing is that the error message is captured - assert "nonexistent_command: command not found" in obs.output + assert "nonexistent_command: command not found" in obs.text assert session.prev_status == TerminalCommandStatus.COMPLETED # Test multiple commands in sequence obs = session.execute( ExecuteBashAction(command='echo "first" && echo "second" && echo "third"') ) - assert "first" in obs.output - assert "second" in obs.output - assert "third" in obs.output + assert "first" in obs.text + assert "second" in obs.text + assert "third" in obs.text assert obs.metadata.suffix == "\n[The command completed with exit code 0.]" # Note: prefix may vary between terminal implementations assert obs.metadata.exit_code == 0 @@ -125,7 +128,7 @@ def test_environment_variable_persistence(terminal_type): # Use the environment variable in a subsequent command obs = session.execute(ExecuteBashAction(command="echo $TEST_VAR")) - assert "hello world" in obs.output + assert "hello world" in obs.text assert obs.metadata.exit_code == 0 session.close() @@ -151,8 +154,8 @@ def test_environment_variable_inheritance_from_parent(terminal_type): # Check if the environment variable is available in the terminal obs = session.execute(ExecuteBashAction(command=f"echo ${test_var_name}")) - assert test_var_value in obs.output, ( - f"Expected '{test_var_value}' in output, but got: {obs.output}" + assert test_var_value in obs.text, ( + f"Expected '{test_var_value}' in output, but got: {obs.text}" ) assert obs.metadata.exit_code == 0 @@ -176,7 +179,7 @@ def test_long_running_command_follow_by_execute(): ExecuteBashAction(command="echo 1; sleep 3; echo 2; sleep 3; echo 3") ) - assert "1" in obs.output # First number should appear before timeout + assert "1" in obs.text # First number should appear before timeout assert obs.metadata.exit_code == -1 # -1 indicates command is still running assert session.prev_status == TerminalCommandStatus.NO_CHANGE_TIMEOUT assert obs.metadata.suffix == get_no_change_timeout_suffix(2) @@ -185,7 +188,7 @@ def test_long_running_command_follow_by_execute(): # Continue watching output obs = session.execute(ExecuteBashAction(command="", is_input=True)) - assert "2" in obs.output + assert "2" in obs.text assert obs.metadata.prefix == "[Below is the output of the previous command.]\n" assert obs.metadata.suffix == get_no_change_timeout_suffix(2) assert obs.metadata.exit_code == -1 # -1 indicates command is still running @@ -194,7 +197,7 @@ def test_long_running_command_follow_by_execute(): # Test command that produces no output obs = session.execute(ExecuteBashAction(command="sleep 15")) - assert "3" not in obs.output + assert "3" not in obs.text assert obs.metadata.prefix == "[Below is the output of the previous command.]\n" assert "The previous command is still running" in obs.metadata.suffix assert obs.metadata.exit_code == -1 # -1 indicates command is still running @@ -205,7 +208,7 @@ def test_long_running_command_follow_by_execute(): # Run it again, this time it should produce output and then start a new command obs = session.execute(ExecuteBashAction(command="sleep 15")) - assert "3" in obs.output # Should see the final output from the previous command + assert "3" in obs.text # Should see the final output from the previous command assert obs.metadata.exit_code == -1 # -1 indicates new command is still running assert session.prev_status == TerminalCommandStatus.NO_CHANGE_TIMEOUT @@ -227,7 +230,7 @@ def test_interactive_command(terminal_type): ) ) - assert "Enter name:" in obs.output + assert "Enter name:" in obs.text assert obs.metadata.exit_code == -1 # -1 indicates command is still running assert session.prev_status == TerminalCommandStatus.NO_CHANGE_TIMEOUT assert obs.metadata.suffix == get_no_change_timeout_suffix(3) @@ -236,7 +239,7 @@ def test_interactive_command(terminal_type): # Send input obs = session.execute(ExecuteBashAction(command="John", is_input=True)) - assert "Hello John" in obs.output + assert "Hello John" in obs.text assert obs.metadata.exit_code == 0 assert obs.metadata.suffix == "\n[The command completed with exit code 0.]" assert obs.metadata.prefix == "" @@ -266,7 +269,7 @@ def test_interactive_command(terminal_type): obs = session.execute(ExecuteBashAction(command="EOF", is_input=True)) - assert "line 1" in obs.output and "line 2" in obs.output + assert "line 1" in obs.text and "line 2" in obs.text assert obs.metadata.exit_code == 0 assert obs.metadata.suffix == "\n[The command completed with exit code 0.]" assert obs.metadata.prefix == "" @@ -287,7 +290,7 @@ def test_ctrl_c(terminal_type): ExecuteBashAction(command="while true; do echo 'looping'; sleep 3; done"), ) - assert "looping" in obs.output + assert "looping" in obs.text assert obs.metadata.suffix == get_no_change_timeout_suffix(2) assert obs.metadata.prefix == "" assert obs.metadata.exit_code == -1 # -1 indicates command is still running @@ -317,14 +320,15 @@ def test_empty_command_error(terminal_type): # Test empty command without previous command obs = session.execute(ExecuteBashAction(command="")) - assert obs.error is True - assert obs.output == "ERROR: No previous running command to retrieve logs from." - assert len(obs.to_llm_content) == 1 + assert obs.is_error is True + assert obs.text == "No previous running command to retrieve logs from." + assert len(obs.to_llm_content) == 2 assert isinstance(obs.to_llm_content[0], TextContent) - assert "There was an error during command execution." in obs.to_llm_content[0].text + assert obs.to_llm_content[0].text == ExecuteBashObservation.ERROR_MESSAGE_HEADER + assert isinstance(obs.to_llm_content[1], TextContent) assert ( - "ERROR: No previous running command to retrieve logs from." - in obs.to_llm_content[0].text + "No previous running command to retrieve logs from." + == obs.to_llm_content[1].text ) assert obs.metadata.exit_code == -1 assert obs.metadata.prefix == "" @@ -356,22 +360,22 @@ def test_command_output_continuation(terminal_type): if session.prev_status == TerminalCommandStatus.COMPLETED: # If the command completed immediately, verify we got all the output logger.info("Command completed immediately", extra={"msg_type": "TEST_INFO"}) - assert "1" in obs.output - assert "2" in obs.output - assert "3" in obs.output - assert "4" in obs.output - assert "5" in obs.output + assert "1" in obs.text + assert "2" in obs.text + assert "3" in obs.text + assert "4" in obs.text + assert "5" in obs.text assert "[The command completed with exit code 0.]" in obs.metadata.suffix else: # If the command timed out, verify we got the timeout message assert session.prev_status == TerminalCommandStatus.NO_CHANGE_TIMEOUT - assert "1" in obs.output + assert "1" in obs.text assert "[The command has no new output after 1 seconds." in obs.metadata.suffix # Continue getting output until we see all numbers numbers_seen = set() for i in range(1, 6): - if str(i) in obs.output: + if str(i) in obs.text: numbers_seen.add(i) # We need to see numbers 2-5 and then the command completion @@ -383,7 +387,7 @@ def test_command_output_continuation(terminal_type): # Check for numbers in the output for i in range(1, 6): - if str(i) in obs.output and i not in numbers_seen: + if str(i) in obs.text and i not in numbers_seen: numbers_seen.add(i) logger.info( f"Found number {i} in output", extra={"msg_type": "TEST_INFO"} @@ -423,8 +427,8 @@ def test_long_output(terminal_type): ExecuteBashAction(command='for i in {1..5000}; do echo "Line $i"; done') ) - assert "Line 1" in obs.output - assert "Line 5000" in obs.output + assert "Line 1" in obs.text + assert "Line 5000" in obs.text assert obs.metadata.exit_code == 0 assert obs.metadata.prefix == "" assert obs.metadata.suffix == "\n[The command completed with exit code 0.]" @@ -443,8 +447,8 @@ def test_long_output_exceed_history_limit(terminal_type): ) assert "Previous command outputs are truncated" in obs.metadata.prefix - assert "Line 40000" in obs.output - assert "Line 50000" in obs.output + assert "Line 40000" in obs.text + assert "Line 50000" in obs.text assert obs.metadata.exit_code == 0 assert obs.metadata.suffix == "\n[The command completed with exit code 0.]" @@ -464,7 +468,7 @@ def test_multiline_command(): ) ) - assert "inside if" in obs.output + assert "inside if" in obs.text assert obs.metadata.exit_code == 0 assert obs.metadata.prefix == "" assert obs.metadata.suffix == "\n[The command completed with exit code 0.]" @@ -488,21 +492,21 @@ def test_python_interactive_input(terminal_type): # Start Python with the interactive script obs = session.execute(ExecuteBashAction(command=f'python3 -c "{python_script}"')) - assert "Enter your name:" in obs.output + assert "Enter your name:" in obs.text assert obs.metadata.exit_code == -1 # -1 indicates command is still running assert session.prev_status == TerminalCommandStatus.NO_CHANGE_TIMEOUT # Send first input (name) obs = session.execute(ExecuteBashAction(command="Alice", is_input=True)) - assert "Enter your age:" in obs.output + assert "Enter your age:" in obs.text assert obs.metadata.exit_code == -1 assert session.prev_status == TerminalCommandStatus.NO_CHANGE_TIMEOUT # Send second input (age) obs = session.execute(ExecuteBashAction(command="25", is_input=True)) - assert "Hello Alice, you are 25 years old" in obs.output + assert "Hello Alice, you are 25 years old" in obs.text assert obs.metadata.exit_code == 0 assert obs.metadata.suffix == "\n[The command completed with exit code 0.]" assert session.prev_status == TerminalCommandStatus.COMPLETED @@ -515,7 +519,8 @@ def _run_bash_action(session, command: str, **kwargs): action = ExecuteBashAction(command=command, **kwargs) obs = session.execute(action) logger.info(f"Command: {command}") - logger.info(f"Output: {obs.output}") + output_text = obs.text if obs.content else "" + logger.info(f"Output: {output_text}") logger.info(f"Exit code: {obs.metadata.exit_code}") return obs @@ -535,12 +540,12 @@ def test_bash_server(terminal_type): session, "python -u -m http.server 8081", timeout=1.0 ) assert obs.metadata.exit_code == -1 - assert "Serving HTTP on" in obs.output + assert "Serving HTTP on" in obs.text # Send Ctrl+C to interrupt obs = _run_bash_action(session, "C-c", is_input=True) assert "CTRL+C was sent" in obs.metadata.suffix - assert "Keyboard interrupt received, exiting." in obs.output + assert "Keyboard interrupt received, exiting." in obs.text # Verify we can run commands after interrupt obs = _run_bash_action(session, "ls") @@ -551,7 +556,7 @@ def test_bash_server(terminal_type): session, "python -u -m http.server 8081", timeout=1.0 ) assert obs.metadata.exit_code == -1 - assert "Serving HTTP on" in obs.output + assert "Serving HTTP on" in obs.text finally: session.close() @@ -578,7 +583,7 @@ def test_bash_background_server(terminal_type): obs = _run_bash_action(session, f"curl http://localhost:{server_port}") assert obs.metadata.exit_code == 0 # Check for content typical of python http.server directory listing - assert "Directory listing for" in obs.output + assert "Directory listing for" in obs.text # Kill the server obs = _run_bash_action(session, 'pkill -f "http.server"') @@ -601,17 +606,17 @@ def test_multiline_commands(terminal_type): # single multiline command obs = _run_bash_action(session, 'echo \\\n -e "foo"') assert obs.metadata.exit_code == 0 - assert "foo" in obs.output + assert "foo" in obs.text # test multiline echo obs = _run_bash_action(session, 'echo -e "hello\nworld"') assert obs.metadata.exit_code == 0 - assert "hello\nworld" in obs.output + assert "hello\nworld" in obs.text # test whitespace obs = _run_bash_action(session, 'echo -e "a\\n\\n\\nz"') assert obs.metadata.exit_code == 0 - assert "\n\n\n" in obs.output + assert "\n\n\n" in obs.text finally: session.close() @@ -634,7 +639,7 @@ def test_complex_commands(terminal_type): try: obs = _run_bash_action(session, cmd) assert obs.metadata.exit_code == 0 - assert "Got 3 heads in a row after 3 flips!" in obs.output + assert "Got 3 heads in a row after 3 flips!" in obs.text finally: session.close() @@ -651,8 +656,8 @@ def test_no_ps2_in_output(terminal_type): obs = _run_bash_action(session, 'echo -e "hello\nworld"') assert obs.metadata.exit_code == 0 - assert "hello\nworld" in obs.output - assert ">" not in obs.output + assert "hello\nworld" in obs.text + assert ">" not in obs.text finally: session.close() @@ -682,11 +687,11 @@ def test_multiline_command_loop(terminal_type): try: obs = _run_bash_action(session, init_cmd) assert obs.metadata.exit_code == 0 - assert "created files" in obs.output + assert "created files" in obs.text obs = _run_bash_action(session, follow_up_cmd) assert obs.metadata.exit_code == 0 - assert "success" in obs.output + assert "success" in obs.text finally: session.close() @@ -715,15 +720,15 @@ def test_multiple_multiline_commands(terminal_type): # First test that running multiple commands at once fails obs = _run_bash_action(session, joined_cmds) - assert obs.error is True - assert "Cannot execute multiple commands at once" in obs.output + assert obs.is_error is True + assert "Cannot execute multiple commands at once" in obs.text # Now run each command individually and verify they work results = [] for cmd in cmds: obs = _run_bash_action(session, cmd) assert obs.metadata.exit_code == 0 - results.append(obs.output) + results.append(obs.text) # Verify all expected outputs are present assert "total 0" in results[0] # ls -l @@ -756,21 +761,21 @@ def test_cmd_run(terminal_type): obs = _run_bash_action(session, "ls -l") assert obs.metadata.exit_code == 0 - assert "total 0" in obs.output + assert "total 0" in obs.text obs = _run_bash_action(session, "mkdir test") assert obs.metadata.exit_code == 0 obs = _run_bash_action(session, "ls -l") assert obs.metadata.exit_code == 0 - assert "test" in obs.output + assert "test" in obs.text obs = _run_bash_action(session, "touch test/foo.txt") assert obs.metadata.exit_code == 0 obs = _run_bash_action(session, "ls -l test") assert obs.metadata.exit_code == 0 - assert "foo.txt" in obs.output + assert "foo.txt" in obs.text # clean up _run_bash_action(session, "rm -rf test") @@ -792,7 +797,7 @@ def test_run_as_user_correct_home_dir(terminal_type): obs = _run_bash_action(session, "cd ~ && pwd") assert obs.metadata.exit_code == 0 home = os.getenv("HOME") - assert home and home in obs.output + assert home and home in obs.text finally: session.close() @@ -807,8 +812,8 @@ def test_multi_cmd_run_in_single_line(terminal_type): # Original Linux version using && obs = _run_bash_action(session, "pwd && ls -l") assert obs.metadata.exit_code == 0 - assert temp_dir in obs.output - assert "total 0" in obs.output + assert temp_dir in obs.text + assert "total 0" in obs.text finally: session.close() @@ -831,7 +836,7 @@ def test_stateful_cmd(terminal_type): obs = _run_bash_action(session, "pwd") assert obs.metadata.exit_code == 0 - assert f"{temp_dir}/test" in obs.output.strip() + assert f"{temp_dir}/test" in obs.text.strip() finally: session.close() @@ -862,7 +867,7 @@ def test_python_version(terminal_type): try: obs = _run_bash_action(session, "python --version") assert obs.metadata.exit_code == 0 - assert "Python 3" in obs.output + assert "Python 3" in obs.text finally: session.close() @@ -882,7 +887,7 @@ def test_pwd_property(terminal_type): obs = _run_bash_action(session, "cd random_dir && pwd") assert obs.metadata.exit_code == 0 - assert "random_dir" in obs.output + assert "random_dir" in obs.text finally: session.close() @@ -911,10 +916,10 @@ def test_long_output_from_nested_directories(terminal_type): assert obs.metadata.exit_code == 0 # Verify output contains expected files - assert "folder_1" in obs.output - assert "file_1.txt" in obs.output - assert "folder_100" in obs.output - assert "file_100.txt" in obs.output + assert "folder_1" in obs.text + assert "file_1.txt" in obs.text + assert "folder_100" in obs.text + assert "file_100.txt" in obs.text finally: session.close() @@ -948,7 +953,7 @@ def test_command_backslash(terminal_type): ) obs = _run_bash_action(session, cmd) assert obs.metadata.exit_code == 0 - assert "/tmp/test_dir/file_1.txt" in obs.output + assert "/tmp/test_dir/file_1.txt" in obs.text finally: session.close() @@ -972,7 +977,7 @@ def test_bash_remove_prefix(terminal_type): # Check git remote - same for both platforms obs = _run_bash_action(session, "git remote -v") assert obs.metadata.exit_code == 0 - assert "https://github.com/OpenHands/OpenHands" in obs.output - assert "git remote -v" not in obs.output + assert "https://github.com/OpenHands/OpenHands" in obs.text + assert "git remote -v" not in obs.text finally: session.close() diff --git a/tests/tools/execute_bash/test_bash_tool.py b/tests/tools/execute_bash/test_bash_tool.py index 61aed7fee2..fd9e088693 100644 --- a/tests/tools/execute_bash/test_bash_tool.py +++ b/tests/tools/execute_bash/test_bash_tool.py @@ -69,7 +69,7 @@ def test_bash_tool_execution(): # Check the result assert result is not None assert isinstance(result, ExecuteBashObservation) - assert "Hello, World!" in result.output + assert "Hello, World!" in result.text def test_bash_tool_working_directory(): @@ -87,7 +87,7 @@ def test_bash_tool_working_directory(): # Check that the working directory is correct assert isinstance(result, ExecuteBashObservation) - assert temp_dir in result.output + assert temp_dir in result.text def test_bash_tool_to_openai_tool(): diff --git a/tests/tools/execute_bash/test_bash_tool_auto_detection.py b/tests/tools/execute_bash/test_bash_tool_auto_detection.py index 5dcb10427b..dbc1c36349 100644 --- a/tests/tools/execute_bash/test_bash_tool_auto_detection.py +++ b/tests/tools/execute_bash/test_bash_tool_auto_detection.py @@ -52,7 +52,7 @@ def test_default_auto_detection(): # Test that it works action = ExecuteBashAction(command="echo 'Auto-detection test'") obs = executor(action) - assert "Auto-detection test" in obs.output + assert "Auto-detection test" in obs.text def test_forced_terminal_types(): @@ -138,7 +138,7 @@ def test_backward_compatibility(): assert tool.executor is not None action = ExecuteBashAction(command="echo 'Backward compatibility test'") obs = tool.executor(action) - assert "Backward compatibility test" in obs.output + assert "Backward compatibility test" in obs.text assert obs.metadata.exit_code == 0 diff --git a/tests/tools/execute_bash/test_observation_truncation.py b/tests/tools/execute_bash/test_observation_truncation.py index 1be00839fc..9e61976310 100644 --- a/tests/tools/execute_bash/test_observation_truncation.py +++ b/tests/tools/execute_bash/test_observation_truncation.py @@ -18,9 +18,9 @@ def test_execute_bash_observation_truncation_under_limit(): ) observation = ExecuteBashObservation( - output="Short output", + command="echo test", + content=[TextContent(text="Short output")], metadata=metadata, - error=False, ) result = observation.to_llm_content @@ -52,9 +52,9 @@ def test_execute_bash_observation_truncation_over_limit(): long_output = "A" * (MAX_CMD_OUTPUT_SIZE + 1000) observation = ExecuteBashObservation( - output=long_output, + command="echo test", + content=[TextContent(text=long_output)], metadata=metadata, - error=False, ) result = observation.to_llm_content @@ -89,18 +89,21 @@ def test_execute_bash_observation_truncation_with_error(): long_output = "B" * (MAX_CMD_OUTPUT_SIZE + 500) observation = ExecuteBashObservation( - output=long_output, + command="false", + content=[TextContent(text=long_output)], metadata=metadata, - error=True, + is_error=True, ) result = observation.to_llm_content - assert len(result) == 1 + assert len(result) == 2 assert isinstance(result[0], TextContent) - result = result[0].text + assert result[0].text == ExecuteBashObservation.ERROR_MESSAGE_HEADER - # The result should be truncated and have error prefix - assert result.startswith("[There was an error during command execution.]") + assert isinstance(result[1], TextContent) + result = result[1].text + + # The result should be truncated assert len(result) < len(long_output) + 300 # Account for metadata and error prefix # With head-and-tail truncation, should end with original content + metadata expected_end = ( @@ -132,9 +135,9 @@ def test_execute_bash_observation_truncation_exact_limit(): exact_output = "C" * exact_output_size observation = ExecuteBashObservation( - output=exact_output, + command="echo test", + content=[TextContent(text=exact_output)], metadata=metadata, - error=False, ) result = observation.to_llm_content @@ -162,9 +165,9 @@ def test_execute_bash_observation_truncation_with_prefix_suffix(): long_output = "D" * (MAX_CMD_OUTPUT_SIZE + 200) observation = ExecuteBashObservation( - output=long_output, + command="echo test", + content=[TextContent(text=long_output)], metadata=metadata, - error=False, ) result = observation.to_llm_content diff --git a/tests/tools/execute_bash/test_secrets_masking.py b/tests/tools/execute_bash/test_secrets_masking.py index 2fa56d2777..16d5917022 100644 --- a/tests/tools/execute_bash/test_secrets_masking.py +++ b/tests/tools/execute_bash/test_secrets_masking.py @@ -8,6 +8,7 @@ from openhands.sdk.agent import Agent from openhands.sdk.conversation import Conversation from openhands.sdk.llm import LLM +from openhands.sdk.tool.schema import TextContent from openhands.tools.execute_bash import ExecuteBashAction, ExecuteBashObservation from openhands.tools.execute_bash.impl import BashExecutor @@ -24,8 +25,8 @@ def test_bash_executor_without_conversation(): result = executor(action) # Check that the output is not masked (no conversation provided) - assert "secret-value-123" in result.output - assert "" not in result.output + assert "secret-value-123" in result.text + assert "" not in result.text finally: executor.close() @@ -62,7 +63,9 @@ def test_bash_executor_with_conversation_secrets(): mock_observation = ExecuteBashObservation( command="echo 'Token: $SECRET_TOKEN, Key: $API_KEY'", exit_code=0, - output="Token: secret-value-123, Key: another-secret-456", + content=[ + TextContent(text="Token: secret-value-123, Key: another-secret-456") + ], ) mock_session.execute.return_value = mock_observation mock_session._closed = False @@ -78,10 +81,10 @@ def test_bash_executor_with_conversation_secrets(): assert mock_session.execute.called # Check that both secrets were masked in the output - assert "secret-value-123" not in result.output - assert "another-secret-456" not in result.output + assert "secret-value-123" not in result.text + assert "another-secret-456" not in result.text # SecretsManager uses as the mask - assert "" in result.output + assert "" in result.text finally: executor.close() diff --git a/tests/tools/file_editor/conftest.py b/tests/tools/file_editor/conftest.py index d1f52f659e..2588541733 100644 --- a/tests/tools/file_editor/conftest.py +++ b/tests/tools/file_editor/conftest.py @@ -3,6 +3,7 @@ import pytest +from openhands.sdk.tool.schema import TextContent from openhands.tools.file_editor.definition import ( FileEditorObservation, ) @@ -56,7 +57,7 @@ def assert_successful_result( ): """Assert that a result is successful (no error).""" assert isinstance(result, FileEditorObservation) - assert result.error is None + assert not result.is_error if expected_path: assert result.path == expected_path @@ -66,9 +67,14 @@ def assert_error_result( ): """Assert that a result contains an error.""" assert isinstance(result, FileEditorObservation) - assert result.error is not None + assert result.is_error if expected_error_substring: - assert expected_error_substring in result.error + content_text = ( + result.content + if isinstance(result.content, str) + else "".join([c.text for c in result.content if isinstance(c, TextContent)]) + ) + assert expected_error_substring in content_text def create_test_file(path: Path, content: str): diff --git a/tests/tools/file_editor/test_basic_operations.py b/tests/tools/file_editor/test_basic_operations.py index 16624d890c..c26f6c1fcf 100644 --- a/tests/tools/file_editor/test_basic_operations.py +++ b/tests/tools/file_editor/test_basic_operations.py @@ -62,11 +62,11 @@ def test_file_editor_happy_path(temp_file): # Validate the result assert_successful_result(result, str(temp_file)) assert ( - result.output is not None - and "The file" in result.output - and "has been edited" in result.output + result.text is not None + and "The file" in result.text + and "has been edited" in result.text ) - assert result.output is not None and "This is a sample file." in result.output + assert result.text is not None and "This is a sample file." in result.text assert result.path == str(temp_file) assert result.prev_exist is True assert ( @@ -106,15 +106,15 @@ def test_file_editor_view_operation(temp_file): # Validate the result assert_successful_result(result, str(temp_file)) assert ( - result.output is not None - and "Here's the result of running `cat -n`" in result.output + result.text is not None + and "Here's the result of running `cat -n`" in result.text ) assert ( - result.output is not None - and "This is a file with XML tags parsing logic..." in result.output + result.text is not None + and "This is a file with XML tags parsing logic..." in result.text ) - assert result.output is not None and "match = re.search(" in result.output - assert result.output is not None and "...More text here." in result.output + assert result.text is not None and "match = re.search(" in result.text + assert result.text is not None and "...More text here." in result.text def test_successful_operations(temp_file): @@ -131,10 +131,10 @@ def test_successful_operations(temp_file): ) assert_successful_result(result) assert ( - result.output is not None - and "Here's the result of running `cat -n`" in result.output + result.text is not None + and "Here's the result of running `cat -n`" in result.text ) - assert result.output is not None and "line 1" in result.output + assert result.text is not None and "line 1" in result.text # Test str_replace result = file_editor( @@ -144,8 +144,8 @@ def test_successful_operations(temp_file): new_str="replaced line", ) assert_successful_result(result) - assert result.output is not None and "has been edited" in result.output - assert result.output is not None and "replaced line" in result.output + assert result.text is not None and "has been edited" in result.text + assert result.text is not None and "replaced line" in result.text # Test insert result = file_editor( @@ -155,8 +155,8 @@ def test_successful_operations(temp_file): new_str="inserted line", ) assert_successful_result(result) - assert result.output is not None and "has been edited" in result.output - assert result.output is not None and "inserted line" in result.output + assert result.text is not None and "has been edited" in result.text + assert result.text is not None and "inserted line" in result.text # Test undo result = file_editor( @@ -164,7 +164,7 @@ def test_successful_operations(temp_file): path=str(temp_file), ) assert_successful_result(result) - assert result.output is not None and "undone successfully" in result.output + assert result.text is not None and "undone successfully" in result.text def test_tab_expansion(temp_file): @@ -181,8 +181,8 @@ def test_tab_expansion(temp_file): ) assert_successful_result(result) # Tabs should be preserved in output - assert result.output is not None and "\tindented" in result.output - assert result.output is not None and "line\twith\ttabs" in result.output + assert result.text is not None and "\tindented" in result.text + assert result.text is not None and "line\twith\ttabs" in result.text # Test str_replace with tabs in old_str result = file_editor( @@ -192,7 +192,7 @@ def test_tab_expansion(temp_file): new_str="replaced line", ) assert_successful_result(result) - assert result.output is not None and "replaced line" in result.output + assert result.text is not None and "replaced line" in result.text # Test str_replace with tabs in new_str result = file_editor( @@ -202,7 +202,7 @@ def test_tab_expansion(temp_file): new_str="new\tline\twith\ttabs", ) assert_successful_result(result) - assert result.output is not None and "new\tline\twith\ttabs" in result.output + assert result.text is not None and "new\tline\twith\ttabs" in result.text # Test insert with tabs result = file_editor( @@ -212,7 +212,7 @@ def test_tab_expansion(temp_file): new_str="\tindented\tline", ) assert_successful_result(result) - assert result.output is not None and "\tindented\tline" in result.output + assert result.text is not None and "\tindented\tline" in result.text def test_create_operation(temp_file): @@ -229,7 +229,7 @@ def test_create_operation(temp_file): ) assert_successful_result(result, str(temp_file)) - assert result.output is not None and "created successfully" in result.output + assert result.text is not None and "created successfully" in result.text assert result.prev_exist is False assert result.new_content == content @@ -258,29 +258,29 @@ def test_view_operation_truncation(temp_file): ) assert_successful_result(result) - assert result.output is not None + assert result.text is not None # Check that truncation notice is present - assert TEXT_FILE_CONTENT_TRUNCATED_NOTICE in result.output + assert TEXT_FILE_CONTENT_TRUNCATED_NOTICE in result.text # The content should be truncated before line numbers are added # So the final output will be longer than MAX_RESPONSE_LEN_CHAR due to formatting # but the original content was truncated - assert "Here's the result of running `cat -n`" in result.output + assert "Here's the result of running `cat -n`" in result.text # With head-and-tail truncation, should contain both start and end content # The line numbers will show as " 1\tA..." at start and end with "A" - assert "\tA" in result.output # Should have A's with tab formatting + assert "\tA" in result.text # Should have A's with tab formatting def test_view_file(editor): editor, test_file = editor result = editor(command="view", path=str(test_file)) assert isinstance(result, FileEditorObservation) - assert f"Here's the result of running `cat -n` on {test_file}:" in result.output - assert "1\tThis is a test file." in result.output - assert "2\tThis file is for testing purposes." in result.output - assert "3\t" not in result.output # No extra line + assert f"Here's the result of running `cat -n` on {test_file}:" in result.text + assert "1\tThis is a test file." in result.text + assert "2\tThis file is for testing purposes." in result.text + assert "3\t" not in result.text # No extra line def test_view_directory(editor): @@ -288,7 +288,7 @@ def test_view_directory(editor): parent_dir = test_file.parent result = editor(command="view", path=str(parent_dir)) assert ( - result.output + result.text == f"""Here's the files and directories up to 2 levels deep in {parent_dir}, excluding hidden items: {parent_dir}/ {parent_dir}/test.txt""" # noqa: E501 @@ -315,11 +315,11 @@ def test_view_with_a_specific_range(editor): # View file in range 50-100 result = editor(command="view", path=str(test_file), view_range=[50, 100]) - assert f"Here's the result of running `cat -n` on {test_file}:" in result.output - assert " 49\tLine 49" not in result.output - assert " 50\tLine 50" in result.output - assert " 100\tLine 100" in result.output - assert "101" not in result.output + assert f"Here's the result of running `cat -n` on {test_file}:" in result.text + assert " 49\tLine 49" not in result.text + assert " 50\tLine 50" in result.text + assert " 100\tLine 100" in result.text + assert "101" not in result.text def test_create_file(editor): @@ -328,7 +328,7 @@ def test_create_file(editor): result = editor(command="create", path=str(new_file), file_text="New file content") assert new_file.exists() assert new_file.read_text() == "New file content" - assert "File created successfully" in result.output + assert "File created successfully" in result.text def test_create_with_empty_string(editor): @@ -337,12 +337,12 @@ def test_create_with_empty_string(editor): result = editor(command="create", path=str(new_file), file_text="") assert new_file.exists() assert new_file.read_text() == "" - assert "File created successfully" in result.output + assert "File created successfully" in result.text # Test the view command showing an empty line result = editor(command="view", path=str(new_file)) - assert f"Here's the result of running `cat -n` on {new_file}:" in result.output - assert "1\t" in result.output # Check for empty line + assert f"Here's the result of running `cat -n` on {new_file}:" in result.text + assert "1\t" in result.text # Check for empty line def test_create_with_none_file_text(editor): @@ -365,7 +365,7 @@ def test_str_replace_no_linting(editor): # Test str_replace command assert ( - result.output + result.text == f"""The file {test_file} has been edited. Here's the result of running `cat -n` on a snippet of {test_file}: 1\tThis is a sample file. 2\tThis file is for testing purposes. @@ -388,7 +388,7 @@ def test_str_replace_multi_line_no_linting(editor): # Test str_replace command assert ( - result.output + result.text == f"""The file {test_file} has been edited. Here's the result of running `cat -n` on a snippet of {test_file}: 1\tThis is a sample file. 2\tThis file is for testing purposes. @@ -407,7 +407,7 @@ def test_str_replace_multi_line_with_tabs_no_linting(editor_python_file_with_tab assert isinstance(result, FileEditorObservation) assert ( - result.output + result.text == f"""The file {test_file} has been edited. Here's the result of running `cat -n` on a snippet of {test_file}: 1\tdef test(): 2\t\tprint("Hello, Universe!") @@ -510,7 +510,7 @@ def test_insert_no_linting(editor): assert isinstance(result, FileEditorObservation) assert "Inserted line" in test_file.read_text() assert ( - result.output + result.text == f"""The file {test_file} has been edited. Here's the result of running `cat -n` on a snippet of the edited file: 1\tThis is a test file. 2\tInserted line @@ -559,7 +559,7 @@ def test_insert_chinese_text_into_english_file(editor): assert isinstance(result, FileEditorObservation) assert "中文文本" in test_file.read_text() assert ( - result.output + result.text == f"""The file {test_file} has been edited. Here's the result of running `cat -n` on a snippet of the edited file: 1\t中文文本 2\tThis is a test file. @@ -592,7 +592,7 @@ def test_undo_edit(editor): # Undo the edit result = editor(command="undo_edit", path=str(test_file)) assert isinstance(result, FileEditorObservation) - assert "Last edit to" in result.output + assert "Last edit to" in result.text assert "test file" in test_file.read_text() # Original content restored @@ -615,13 +615,13 @@ def test_multiple_undo_edits(editor): # Undo the last edit result = editor(command="undo_edit", path=str(test_file)) assert isinstance(result, FileEditorObservation) - assert "Last edit to" in result.output + assert "Last edit to" in result.text assert "sample file v1" in test_file.read_text() # Previous content restored # Undo the first edit result = editor(command="undo_edit", path=str(test_file)) assert isinstance(result, FileEditorObservation) - assert "Last edit to" in result.output + assert "Last edit to" in result.text assert "test file" in test_file.read_text() # Original content restored @@ -697,16 +697,16 @@ def test_view_directory_with_hidden_files(tmp_path): # Verify output assert isinstance(result, FileEditorObservation) - assert str(test_dir) in result.output - assert "visible.txt" in result.output # Visible file is shown - assert "visible_dir" in result.output # Visible directory is shown - assert ".hidden1" not in result.output # Hidden files not shown - assert ".hidden2" not in result.output - assert ".hidden_dir" not in result.output + assert str(test_dir) in result.text + assert "visible.txt" in result.text # Visible file is shown + assert "visible_dir" in result.text # Visible directory is shown + assert ".hidden1" not in result.text # Hidden files not shown + assert ".hidden2" not in result.text + assert ".hidden_dir" not in result.text assert ( - "3 hidden files/directories in this directory are excluded" in result.output + "3 hidden files/directories in this directory are excluded" in result.text ) # Shows count of hidden items in current dir only - assert "ls -la" in result.output # Shows command to view hidden files + assert "ls -la" in result.text # Shows command to view hidden files def test_view_symlinked_directory(tmp_path): @@ -732,11 +732,11 @@ def test_view_symlinked_directory(tmp_path): # Verify that all files are listed through the symlink assert isinstance(result, FileEditorObservation) - assert str(symlink_dir) in result.output - assert "file1.txt" in result.output - assert "file2.txt" in result.output - assert "subdir" in result.output - assert "file3.txt" in result.output + assert str(symlink_dir) in result.text + assert "file1.txt" in result.text + assert "file2.txt" in result.text + assert "subdir" in result.text + assert "file3.txt" in result.text def test_view_large_directory_with_truncation(editor, tmp_path): @@ -749,7 +749,7 @@ def test_view_large_directory_with_truncation(editor, tmp_path): result = editor(command="view", path=str(large_dir)) assert isinstance(result, FileEditorObservation) - assert DIRECTORY_CONTENT_TRUNCATED_NOTICE in result.output + assert DIRECTORY_CONTENT_TRUNCATED_NOTICE in result.text def test_view_directory_on_hidden_path(tmp_path): @@ -791,22 +791,22 @@ def test_view_directory_on_hidden_path(tmp_path): # Verify output assert isinstance(result, FileEditorObservation) # Depth 1: Visible files/dirs shown, hidden files/dirs not shown - assert "visible1.txt" in result.output - assert "visible_dir" in result.output - assert ".hidden1" not in result.output - assert ".hidden_dir" not in result.output + assert "visible1.txt" in result.text + assert "visible_dir" in result.text + assert ".hidden1" not in result.text + assert ".hidden_dir" not in result.text # Depth 2: Files in visible_dir shown - assert "visible2.txt" in result.output - assert ".hidden2" not in result.output + assert "visible2.txt" in result.text + assert ".hidden2" not in result.text # Depth 2: Files in hidden_dir not shown - assert "visible3.txt" not in result.output - assert ".hidden3" not in result.output + assert "visible3.txt" not in result.text + assert ".hidden3" not in result.text # Hidden file count only includes depth 1 assert ( - "2 hidden files/directories in this directory are excluded" in result.output + "2 hidden files/directories in this directory are excluded" in result.text ) # Only .hidden1 and .hidden_dir at depth 1 @@ -819,7 +819,7 @@ def test_view_large_file_with_truncation(editor, tmp_path): result = editor(command="view", path=str(large_file)) assert isinstance(result, FileEditorObservation) - assert TEXT_FILE_CONTENT_TRUNCATED_NOTICE in result.output + assert TEXT_FILE_CONTENT_TRUNCATED_NOTICE in result.text def test_validate_path_suggests_absolute_path(editor, tmp_path): @@ -868,8 +868,8 @@ def test_str_replace_and_insert_snippet_output_on_a_large_file(editor): # View file result = editor(command="view", path=str(test_file)) - assert " 1\tLine 1" in result.output - assert " 500\tLine 500" in result.output + assert " 1\tLine 1" in result.text + assert " 500\tLine 500" in result.text # Replace line 500's content with '500 new' result = editor( @@ -878,14 +878,14 @@ def test_str_replace_and_insert_snippet_output_on_a_large_file(editor): old_str="Line 500", new_str="500 new", ) - assert " 500\t500 new" in result.output + assert " 500\t500 new" in result.text # Delete the line '500 new' result = editor( command="str_replace", path=str(test_file), old_str="500 new\n", new_str="" ) - assert " 499\tLine 499" in result.output - assert " 500\tLine 501" in result.output + assert " 499\tLine 499" in result.text + assert " 500\tLine 501" in result.text # Insert content at line 500 result = editor( @@ -894,4 +894,4 @@ def test_str_replace_and_insert_snippet_output_on_a_large_file(editor): insert_line=499, new_str="Inserted line at 500", ) - assert " 500\tInserted line at 500" in result.output + assert " 500\tInserted line at 500" in result.text diff --git a/tests/tools/file_editor/test_error_handling.py b/tests/tools/file_editor/test_error_handling.py index 02ced3bb61..f705a2a09d 100644 --- a/tests/tools/file_editor/test_error_handling.py +++ b/tests/tools/file_editor/test_error_handling.py @@ -12,7 +12,7 @@ def test_validation_error_formatting(): path="/nonexistent/file.txt", ) assert_error_result(result) - assert result.error is not None and "does not exist" in result.error + assert result.is_error and "does not exist" in result.text # Test directory validation for non-view commands result = file_editor( @@ -22,10 +22,7 @@ def test_validation_error_formatting(): new_str="new", ) assert_error_result(result) - assert ( - result.error is not None - and "directory and only the `view` command" in result.error - ) + assert result.is_error and "directory and only the `view` command" in result.text def test_str_replace_error_handling(temp_file): @@ -43,7 +40,7 @@ def test_str_replace_error_handling(temp_file): new_str="something", ) assert_error_result(result) - assert result.error is not None and "did not appear verbatim" in result.error + assert result.is_error and "did not appear verbatim" in result.text # Test multiple occurrences with open(temp_file, "w") as f: @@ -56,8 +53,8 @@ def test_str_replace_error_handling(temp_file): new_str="new_line", ) assert_error_result(result) - assert result.error is not None and "Multiple occurrences" in result.error - assert result.error is not None and "lines [1, 2]" in result.error + assert result.is_error and "Multiple occurrences" in result.text + assert result.is_error and "lines [1, 2]" in result.text def test_view_range_validation(temp_file): @@ -74,9 +71,7 @@ def test_view_range_validation(temp_file): view_range=[1], # Should be [start, end] ) assert_error_result(result) - assert ( - result.error is not None and "should be a list of two integers" in result.error - ) + assert result.is_error and "should be a list of two integers" in result.text # Test out of bounds range: should clamp to file end and show a warning result = file_editor( @@ -85,10 +80,10 @@ def test_view_range_validation(temp_file): view_range=[1, 10], # File only has 3 lines ) # This should succeed but show a warning - assert result.error is None + assert not result.is_error assert ( "NOTE: We only show up to 3 since there're only 3 lines in this file." - in result.output + in result.text ) # Test invalid range order @@ -98,10 +93,7 @@ def test_view_range_validation(temp_file): view_range=[3, 1], # End before start ) assert_error_result(result) - assert ( - result.error is not None - and "should be greater than or equal to" in result.error - ) + assert result.is_error and "should be greater than or equal to" in result.text def test_insert_validation(temp_file): @@ -119,7 +111,7 @@ def test_insert_validation(temp_file): new_str="new line", ) assert_error_result(result) - assert result.error is not None and "should be within the range" in result.error + assert result.is_error and "should be within the range" in result.text # Test insert beyond file length result = file_editor( @@ -129,7 +121,7 @@ def test_insert_validation(temp_file): new_str="new line", ) assert_error_result(result) - assert result.error is not None and "should be within the range" in result.error + assert result.is_error and "should be within the range" in result.text def test_undo_validation(temp_file): @@ -145,4 +137,4 @@ def test_undo_validation(temp_file): path=temp_file, ) assert_error_result(result) - assert result.error is not None and "No edit history found" in result.error + assert result.is_error and "No edit history found" in result.text diff --git a/tests/tools/file_editor/test_file_editor_tool.py b/tests/tools/file_editor/test_file_editor_tool.py index dafce1c4d3..f5963d34b7 100644 --- a/tests/tools/file_editor/test_file_editor_tool.py +++ b/tests/tools/file_editor/test_file_editor_tool.py @@ -63,7 +63,7 @@ def test_file_editor_tool_create_file(): # Check the result assert result is not None assert isinstance(result, FileEditorObservation) - assert not result.error + assert not result.is_error assert os.path.exists(test_file) # Check file contents @@ -94,10 +94,10 @@ def test_file_editor_tool_view_file(): # Check the result assert result is not None assert isinstance(result, FileEditorObservation) - assert not result.error - assert "Line 1" in result.output - assert "Line 2" in result.output - assert "Line 3" in result.output + assert not result.is_error + assert "Line 1" in result.text + assert "Line 2" in result.text + assert "Line 3" in result.text def test_file_editor_tool_str_replace(): @@ -127,7 +127,7 @@ def test_file_editor_tool_str_replace(): # Check the result assert result is not None assert isinstance(result, FileEditorObservation) - assert not result.error + assert not result.is_error # Check file contents with open(test_file) as f: @@ -177,9 +177,9 @@ def test_file_editor_tool_view_directory(): # Check the result assert result is not None assert isinstance(result, FileEditorObservation) - assert not result.error - assert "file1.txt" in result.output - assert "file2.txt" in result.output + assert not result.is_error + assert "file1.txt" in result.text + assert "file2.txt" in result.text def test_file_editor_tool_includes_working_directory_in_description(): diff --git a/tests/tools/file_editor/test_memory_usage.py b/tests/tools/file_editor/test_memory_usage.py index dd7c351f93..109d551f7d 100644 --- a/tests/tools/file_editor/test_memory_usage.py +++ b/tests/tools/file_editor/test_memory_usage.py @@ -71,7 +71,7 @@ def test_file_read_memory_usage(temp_file): # Pull output before measuring and drop references to encourage GC assert_successful_result(result) - content = result.output + content = result.text del result gc.collect() @@ -187,7 +187,8 @@ def test_file_editor_memory_leak(temp_file): new_str=new_content, ) if i == 0: - print(f"First edit result: {result.output[:200]}...") + content_str = result.text + print(f"First edit result: {content_str[:200]}...") except Exception as e: print(f"\nError during edit {i}:") print(f"File size: {os.path.getsize(temp_file) / (1024 * 1024):.2f} MB") diff --git a/tests/tools/file_editor/test_view_supported_binary_files.py b/tests/tools/file_editor/test_view_supported_binary_files.py index e29ae932a6..bbd0dc027c 100644 --- a/tests/tools/file_editor/test_view_supported_binary_files.py +++ b/tests/tools/file_editor/test_view_supported_binary_files.py @@ -74,12 +74,12 @@ def test_view_pdf_file(): assert isinstance(result, FileEditorObservation) assert_successful_result(result) - assert f"Here's the result of running `cat -n` on {test_file}" in result.output + assert f"Here's the result of running `cat -n` on {test_file}" in result.text # Check for specific content present in the PDF assert ( - result.output is not None - and "Printer-Friendly Caltrain Schedule" in result.output + result.text is not None + and "Printer-Friendly Caltrain Schedule" in result.text ) finally: # Clean up the temporary file diff --git a/tests/tools/file_editor/utils/test_encoding.py b/tests/tools/file_editor/utils/test_encoding.py index 9bd47dd477..361606fdf1 100644 --- a/tests/tools/file_editor/utils/test_encoding.py +++ b/tests/tools/file_editor/utils/test_encoding.py @@ -287,9 +287,9 @@ def test_view_non_utf8_file(temp_non_utf8_file): # Parse the result - now using direct access # Verify the content was read correctly - assert result.output is not None and "Привет, мир!" in result.output - assert result.output is not None and "Тестовый файл с кириллицей" in result.output - assert result.output is not None and "Это тестовая строка" in result.output + assert result.text is not None and "Привет, мир!" in result.text + assert result.text is not None and "Тестовый файл с кириллицей" in result.text + assert result.text is not None and "Это тестовая строка" in result.text def test_view_range_non_utf8_file(temp_non_utf8_file): @@ -305,11 +305,11 @@ def test_view_range_non_utf8_file(temp_non_utf8_file): # Parse the result - now using direct access # Verify the content was read correctly - assert result.output is not None and "Тестовый файл с кириллицей" in result.output - assert result.output is not None and "Привет, мир!" in result.output + assert result.text is not None and "Тестовый файл с кириллицей" in result.text + assert result.text is not None and "Привет, мир!" in result.text # Verify that line 6 is not included - assert result.output is not None and "Это тестовая строка" not in result.output + assert result.text is not None and "Это тестовая строка" not in result.text def test_str_replace_non_utf8_file(temp_non_utf8_file): @@ -326,8 +326,8 @@ def test_str_replace_non_utf8_file(temp_non_utf8_file): # Parse the result - now using direct access # Verify the replacement was successful - assert result.output is not None and "Здравствуй, мир!" in result.output - assert result.output is not None and "Привет, мир!" not in result.output + assert result.text is not None and "Здравствуй, мир!" in result.text + assert result.text is not None and "Привет, мир!" not in result.text # Verify the file was saved with the correct encoding with open(temp_non_utf8_file, "rb") as f: @@ -354,7 +354,7 @@ def test_insert_non_utf8_file(temp_non_utf8_file): # Parse the result - now using direct access # Verify the insertion was successful - assert result.output is not None and "Новая переменная" in result.output + assert result.text is not None and "Новая переменная" in result.text # Verify the file was saved with the correct encoding with open(temp_non_utf8_file, "rb") as f: @@ -391,9 +391,7 @@ def test_create_non_utf8_file(): # Parse the result - now using direct access # Verify the file was created successfully - assert ( - result.output is not None and "File created successfully" in result.output - ) + assert result.text is not None and "File created successfully" in result.text # Read the file with cp1251 encoding to verify content encoding_manager = EncodingManager() @@ -433,7 +431,7 @@ def test_undo_edit_non_utf8_file(temp_non_utf8_file): # Parse the result - now using direct access # Verify the undo was successful - assert result.output is not None and "undone successfully" in result.output + assert result.text is not None and "undone successfully" in result.text # Verify the original content was restored with the correct encoding with open(temp_non_utf8_file, "rb") as f: @@ -455,7 +453,7 @@ def test_complex_workflow_non_utf8_file(temp_non_utf8_file): path=str(temp_non_utf8_file), ) # Parse the result - now using direct access - assert result.output is not None and "Привет, мир!" in result.output + assert result.text is not None and "Привет, мир!" in result.text # 2. Replace text result = file_editor( @@ -465,7 +463,7 @@ def test_complex_workflow_non_utf8_file(temp_non_utf8_file): new_str="Здравствуй, мир!", ) # Parse the result - now using direct access - assert result.output is not None and "Здравствуй, мир!" in result.output + assert result.text is not None and "Здравствуй, мир!" in result.text # 3. Insert text result = file_editor( @@ -475,7 +473,7 @@ def test_complex_workflow_non_utf8_file(temp_non_utf8_file): new_str="# Добавленная строка\nboolean_var = True", ) # Parse the result - now using direct access - assert result.output is not None and "Добавленная строка" in result.output + assert result.text is not None and "Добавленная строка" in result.text # 4. View specific range result = file_editor( @@ -484,8 +482,8 @@ def test_complex_workflow_non_utf8_file(temp_non_utf8_file): view_range=[5, 7], ) # Parse the result - now using direct access - assert result.output is not None and "Добавленная строка" in result.output - assert result.output is not None and "boolean_var = True" in result.output + assert result.text is not None and "Добавленная строка" in result.text + assert result.text is not None and "boolean_var = True" in result.text # 5. Undo the last edit result = file_editor( @@ -493,7 +491,7 @@ def test_complex_workflow_non_utf8_file(temp_non_utf8_file): path=str(temp_non_utf8_file), ) # Parse the result - now using direct access - assert result.output is not None and "undone successfully" in result.output + assert result.text is not None and "undone successfully" in result.text # 6. Verify the file content after all operations with open(temp_non_utf8_file, "rb") as f: @@ -532,7 +530,7 @@ def test_mixed_encoding_workflow(): path=path1, ) # Parse the result - now using direct access - assert "Текст в кодировке CP1251" in result1.output + assert "Текст в кодировке CP1251" in result1.text # 2. View the UTF-8 file result2 = file_editor( @@ -540,7 +538,7 @@ def test_mixed_encoding_workflow(): path=path2, ) # Parse the result - now using direct access - assert "Текст в кодировке UTF-8" in result2.output + assert "Текст в кодировке UTF-8" in result2.text # 3. Edit the cp1251 file result3 = file_editor( @@ -550,7 +548,7 @@ def test_mixed_encoding_workflow(): new_str="Измененный текст в CP1251", ) # Parse the result - now using direct access - assert "Измененный текст в CP1251" in result3.output + assert "Измененный текст в CP1251" in result3.text # 4. Edit the UTF-8 file result4 = file_editor( @@ -560,7 +558,7 @@ def test_mixed_encoding_workflow(): new_str="Измененный текст в UTF-8", ) # Parse the result - now using direct access - assert "Измененный текст в UTF-8" in result4.output + assert "Измененный текст в UTF-8" in result4.text # 5. Verify both files maintain their original encodings with open(path1, "rb") as f: diff --git a/tests/tools/glob/test_glob_executor.py b/tests/tools/glob/test_glob_executor.py index e14ed8ac9e..c0f215372e 100644 --- a/tests/tools/glob/test_glob_executor.py +++ b/tests/tools/glob/test_glob_executor.py @@ -26,7 +26,7 @@ def test_glob_executor_basic_pattern(): action = GlobAction(pattern="*.py") observation = executor(action) - assert observation.error is None + assert observation.is_error is False assert len(observation.files) == 2 assert all(f.endswith(".py") for f in observation.files) assert observation.pattern == "*.py" @@ -49,7 +49,7 @@ def test_glob_executor_recursive_pattern(): action = GlobAction(pattern="**/*.py") observation = executor(action) - assert observation.error is None + assert observation.is_error is False assert len(observation.files) == 2 assert all(f.endswith(".py") for f in observation.files) @@ -70,7 +70,7 @@ def test_glob_executor_custom_path(): action = GlobAction(pattern="*.txt", path=str(sub_dir)) observation = executor(action) - assert observation.error is None + assert observation.is_error is False assert len(observation.files) == 2 assert observation.search_path == str(sub_dir.resolve()) assert all(str(sub_dir) in f for f in observation.files) @@ -83,8 +83,8 @@ def test_glob_executor_invalid_path(): action = GlobAction(pattern="*.py", path="/nonexistent/path") observation = executor(action) - assert observation.error is not None - assert "is not a valid directory" in observation.error + assert observation.is_error is True + assert "is not a valid directory" in observation.text assert len(observation.files) == 0 @@ -99,7 +99,7 @@ def test_glob_executor_no_matches(): action = GlobAction(pattern="*.py") observation = executor(action) - assert observation.error is None + assert observation.is_error is False assert len(observation.files) == 0 assert not observation.truncated @@ -116,7 +116,7 @@ def test_glob_executor_directories_excluded(): action = GlobAction(pattern="*") observation = executor(action) - assert observation.error is None + assert observation.is_error is False # Should only find the file, not directories assert len(observation.files) == 1 assert observation.files[0].endswith("file.txt") @@ -143,7 +143,7 @@ def test_glob_executor_sorting(): action = GlobAction(pattern="*.txt") observation = executor(action) - assert observation.error is None + assert observation.is_error is False assert len(observation.files) == 3 # Files should be sorted by modification time (newest first) @@ -162,7 +162,7 @@ def test_glob_executor_truncation(): action = GlobAction(pattern="*.txt") observation = executor(action) - assert observation.error is None + assert observation.is_error is False assert len(observation.files) == 100 assert observation.truncated is True @@ -189,7 +189,7 @@ def test_glob_executor_complex_patterns(): action = GlobAction(pattern="config.*") observation = executor(action) - assert observation.error is None + assert observation.is_error is False assert len(observation.files) == 4 # All config files extensions = {Path(f).suffix for f in observation.files} assert extensions == {".json", ".yaml", ".yml", ".toml"} @@ -206,7 +206,7 @@ def test_glob_executor_exception_handling(): observation = executor(action) # Should not raise exception, even if there are no matches - assert observation.error is None or isinstance(observation.error, str) + assert observation.is_error is False or isinstance(observation.content, str) assert isinstance(observation.files, list) @@ -220,7 +220,7 @@ def test_glob_executor_absolute_paths(): action = GlobAction(pattern="*.py") observation = executor(action) - assert observation.error is None + assert observation.is_error is False assert len(observation.files) == 1 # Check that returned path is absolute @@ -236,7 +236,7 @@ def test_glob_executor_empty_directory(): action = GlobAction(pattern="*") observation = executor(action) - assert observation.error is None + assert observation.is_error is False assert len(observation.files) == 0 assert not observation.truncated diff --git a/tests/tools/glob/test_glob_tool.py b/tests/tools/glob/test_glob_tool.py index 3c3f69c374..854daeae9d 100644 --- a/tests/tools/glob/test_glob_tool.py +++ b/tests/tools/glob/test_glob_tool.py @@ -85,7 +85,7 @@ def test_glob_tool_find_files(): observation = tool.executor(action) assert isinstance(observation, GlobObservation) - assert observation.error is None + assert observation.is_error is False assert len(observation.files) == 3 # test.py, src/app.py, tests/test_main.py assert observation.pattern == "**/*.py" assert observation.search_path == str(Path(temp_dir).resolve()) @@ -119,7 +119,7 @@ def test_glob_tool_specific_directory(): assert tool.executor is not None observation = tool.executor(action) - assert observation.error is None + assert observation.is_error is False assert len(observation.files) == 2 # app.py, utils.py assert observation.search_path == str(src_dir.resolve()) @@ -143,7 +143,7 @@ def test_glob_tool_no_matches(): assert tool.executor is not None observation = tool.executor(action) - assert observation.error is None + assert observation.is_error is False assert len(observation.files) == 0 assert observation.pattern == "**/*.py" assert not observation.truncated @@ -161,8 +161,8 @@ def test_glob_tool_invalid_directory(): assert tool.executor is not None observation = tool.executor(action) - assert observation.error is not None - assert "is not a valid directory" in observation.error + assert observation.is_error is True + assert "is not a valid directory" in observation.text assert len(observation.files) == 0 @@ -191,7 +191,7 @@ def test_glob_tool_complex_patterns(): assert tool.executor is not None observation = tool.executor(action) - assert observation.error is None + assert observation.is_error is False assert len(observation.files) == 4 # All config files assert observation.pattern == "config.*" @@ -218,7 +218,7 @@ def test_glob_tool_directories_excluded(): assert tool.executor is not None observation = tool.executor(action) - assert observation.error is None + assert observation.is_error is False # Should find all files recursively, but not directories assert len(observation.files) == 2 # app.py and src/utils.py # Verify both files are present @@ -287,9 +287,9 @@ def test_glob_tool_to_llm_content_error(): observation = tool.executor(action) content = observation.to_llm_content - assert len(content) == 1 - text_content = content[0].text - assert "Error:" in text_content + assert len(content) == 2 + assert content[0].text == GlobObservation.ERROR_MESSAGE_HEADER + text_content = content[1].text assert "is not a valid directory" in text_content @@ -309,7 +309,7 @@ def test_glob_tool_truncation(): assert tool.executor is not None observation = tool.executor(action) - assert observation.error is None + assert observation.is_error is False assert len(observation.files) == 100 # Truncated to 100 assert observation.truncated is True diff --git a/tests/tools/grep/test_consistency.py b/tests/tools/grep/test_consistency.py index 6c13ad361a..0c3b0a8eda 100644 --- a/tests/tools/grep/test_consistency.py +++ b/tests/tools/grep/test_consistency.py @@ -114,8 +114,8 @@ def test_basic_search_consistency(self, temp_dir_with_content): ) # Both should succeed - assert not ripgrep_result.error - assert not fallback_result.error + assert not ripgrep_result.is_error + assert not fallback_result.is_error # Convert to sets of matching files for exact comparison ripgrep_matches = set(ripgrep_result.matches) @@ -143,8 +143,8 @@ def test_case_insensitive_consistency(self, temp_dir_with_content): ) # Both should succeed - assert not ripgrep_result.error - assert not fallback_result.error + assert not ripgrep_result.is_error + assert not fallback_result.is_error # Convert to sets for exact comparison ripgrep_matches = set(ripgrep_result.matches) @@ -172,8 +172,8 @@ def test_include_pattern_consistency(self, temp_dir_with_content): ) # Both should succeed - assert not ripgrep_result.error - assert not fallback_result.error + assert not ripgrep_result.is_error + assert not fallback_result.is_error # Convert to sets for exact comparison ripgrep_matches = set(ripgrep_result.matches) @@ -205,8 +205,8 @@ def test_no_matches_consistency(self, temp_dir_with_content): ) # Both should succeed with identical empty results - assert not ripgrep_result.error - assert not fallback_result.error + assert not ripgrep_result.is_error + assert not fallback_result.is_error # Convert to sets for exact comparison ripgrep_matches = set(ripgrep_result.matches) @@ -229,8 +229,8 @@ def test_regex_pattern_consistency(self, temp_dir_with_content): ) # Both should succeed - assert not ripgrep_result.error - assert not fallback_result.error + assert not ripgrep_result.is_error + assert not fallback_result.is_error # Convert to sets for exact comparison ripgrep_matches = set(ripgrep_result.matches) @@ -258,8 +258,8 @@ def test_todo_comments_consistency(self, temp_dir_with_content): ) # Both should succeed - assert not ripgrep_result.error - assert not fallback_result.error + assert not ripgrep_result.is_error + assert not fallback_result.is_error # Convert to sets for exact comparison ripgrep_matches = set(ripgrep_result.matches) @@ -287,8 +287,8 @@ def test_error_patterns_consistency(self, temp_dir_with_content): ) # Both should succeed - assert not ripgrep_result.error - assert not fallback_result.error + assert not ripgrep_result.is_error + assert not fallback_result.is_error # Convert to sets for exact comparison ripgrep_matches = set(ripgrep_result.matches) @@ -316,8 +316,8 @@ def test_import_statements_consistency(self, temp_dir_with_content): ) # Both should succeed - assert not ripgrep_result.error - assert not fallback_result.error + assert not ripgrep_result.is_error + assert not fallback_result.is_error # Convert to sets for exact comparison ripgrep_matches = set(ripgrep_result.matches) @@ -345,8 +345,8 @@ def test_class_definitions_consistency(self, temp_dir_with_content): ) # Both should succeed - assert not ripgrep_result.error - assert not fallback_result.error + assert not ripgrep_result.is_error + assert not fallback_result.is_error # Convert to sets for exact comparison ripgrep_matches = set(ripgrep_result.matches) @@ -374,8 +374,8 @@ def test_deep_nested_search_consistency(self, temp_dir_with_content): ) # Both should succeed - assert not ripgrep_result.error - assert not fallback_result.error + assert not ripgrep_result.is_error + assert not fallback_result.is_error # Convert to sets for exact comparison ripgrep_matches = set(ripgrep_result.matches) @@ -409,8 +409,8 @@ def test_config_file_search_consistency(self, temp_dir_with_content): ) # Both should succeed - assert not ripgrep_result.error - assert not fallback_result.error + assert not ripgrep_result.is_error + assert not fallback_result.is_error # Convert to sets for exact comparison ripgrep_matches = set(ripgrep_result.matches) @@ -439,8 +439,8 @@ def test_hidden_files_search_consistency(self, temp_dir_with_content): ) # Both should succeed - assert not ripgrep_result.error - assert not fallback_result.error + assert not ripgrep_result.is_error + assert not fallback_result.is_error # Convert to sets for exact comparison ripgrep_matches = set(ripgrep_result.matches) diff --git a/tests/tools/grep/test_grep_executor.py b/tests/tools/grep/test_grep_executor.py index 40ea9512f1..120f2b1da0 100644 --- a/tests/tools/grep/test_grep_executor.py +++ b/tests/tools/grep/test_grep_executor.py @@ -37,7 +37,7 @@ def test_grep_executor_basic_search(): action = GrepAction(pattern="print") observation = executor(action) - assert observation.error is None + assert observation.is_error is False assert len(observation.matches) == 2 # Two files containing "print" assert observation.pattern == "print" assert observation.search_path == str(Path(temp_dir).resolve()) @@ -59,7 +59,7 @@ def test_grep_executor_case_insensitive(): action = GrepAction(pattern="print") observation = executor(action) - assert observation.error is None + assert observation.is_error is False assert len(observation.matches) == 1 # File contains pattern (case-insensitive) assert "case_test.py" in observation.matches[0] @@ -75,7 +75,7 @@ def test_grep_executor_include_filter(): action = GrepAction(pattern="test", include="*.py") observation = executor(action) - assert observation.error is None + assert observation.is_error is False assert len(observation.matches) == 1 assert observation.matches[0].endswith(".py") @@ -92,7 +92,7 @@ def test_grep_executor_custom_path(): action = GrepAction(pattern="print", path=str(sub_dir)) observation = executor(action) - assert observation.error is None + assert observation.is_error is False assert len(observation.matches) == 1 assert observation.search_path == str(sub_dir.resolve()) assert str(sub_dir) in str(observation.matches[0]) @@ -105,8 +105,8 @@ def test_grep_executor_invalid_path(): action = GrepAction(pattern="test", path="/nonexistent/path") observation = executor(action) - assert observation.error is not None - assert "not a valid directory" in observation.error + assert observation.is_error is True + assert "not a valid directory" in observation.text def test_grep_executor_no_matches(): @@ -118,7 +118,7 @@ def test_grep_executor_no_matches(): action = GrepAction(pattern="nonexistent") observation = executor(action) - assert observation.error is None + assert observation.is_error is False assert len(observation.matches) == 0 @@ -132,7 +132,7 @@ def test_grep_executor_hidden_files_excluded(): action = GrepAction(pattern="test") observation = executor(action) - assert observation.error is None + assert observation.is_error is False assert len(observation.matches) == 1 assert ".hidden" not in observation.matches[0] @@ -155,7 +155,7 @@ def test_grep_executor_sorting(): action = GrepAction(pattern="test") observation = executor(action) - assert observation.error is None + assert observation.is_error is False assert len(observation.matches) == 2 # Newest file should be first assert "new.py" in observation.matches[0] @@ -173,7 +173,7 @@ def test_grep_executor_truncation(): action = GrepAction(pattern="test") observation = executor(action) - assert observation.error is None + assert observation.is_error is False assert len(observation.matches) == 100 assert observation.truncated is True @@ -185,5 +185,5 @@ def test_grep_executor_invalid_regex(): action = GrepAction(pattern="[invalid") observation = executor(action) - assert observation.error is not None - assert "Invalid regex pattern" in observation.error + assert observation.is_error is True + assert "Invalid regex pattern" in observation.text diff --git a/tests/tools/grep/test_grep_tool.py b/tests/tools/grep/test_grep_tool.py index e5b6abc874..04cefd668a 100644 --- a/tests/tools/grep/test_grep_tool.py +++ b/tests/tools/grep/test_grep_tool.py @@ -63,7 +63,7 @@ def test_grep_tool_basic_search(): observation = tool.executor(action) assert isinstance(observation, GrepObservation) - assert observation.error is None + assert observation.is_error is False assert len(observation.matches) == 2 # Two files assert observation.pattern == "print" assert observation.search_path == str(Path(temp_dir).resolve()) @@ -89,7 +89,7 @@ def test_grep_tool_case_insensitive(): assert tool.executor is not None observation = tool.executor(action) - assert observation.error is None + assert observation.is_error is False assert len(observation.matches) == 1 @@ -107,7 +107,7 @@ def test_grep_tool_include_filter(): assert tool.executor is not None observation = tool.executor(action) - assert observation.error is None + assert observation.is_error is False assert len(observation.matches) == 1 assert observation.matches[0].endswith(".py") @@ -128,7 +128,7 @@ def test_grep_tool_specific_directory(): assert tool.executor is not None observation = tool.executor(action) - assert observation.error is None + assert observation.is_error is False assert len(observation.matches) == 1 assert observation.search_path == str(src_dir.resolve()) assert str(src_dir) in observation.matches[0] @@ -147,7 +147,7 @@ def test_grep_tool_no_matches(): assert tool.executor is not None observation = tool.executor(action) - assert observation.error is None + assert observation.is_error is False assert len(observation.matches) == 0 assert not observation.truncated @@ -163,8 +163,8 @@ def test_grep_tool_invalid_regex(): assert tool.executor is not None observation = tool.executor(action) - assert observation.error is not None - assert "Invalid regex pattern" in observation.error + assert observation.is_error is True + assert "Invalid regex pattern" in observation.text def test_grep_tool_invalid_directory(): @@ -178,8 +178,8 @@ def test_grep_tool_invalid_directory(): assert tool.executor is not None observation = tool.executor(action) - assert observation.error is not None - assert "not a valid directory" in observation.error + assert observation.is_error is True + assert "not a valid directory" in observation.text def test_grep_tool_hidden_files_excluded(): @@ -196,7 +196,7 @@ def test_grep_tool_hidden_files_excluded(): assert tool.executor is not None observation = tool.executor(action) - assert observation.error is None + assert observation.is_error is False assert len(observation.matches) == 1 assert ".hidden" not in observation.matches[0] @@ -269,8 +269,9 @@ def test_grep_tool_to_llm_content_error(): observation = tool.executor(action) content = observation.to_llm_content - text = content[0].text - assert "Error:" in text + assert len(content) == 2 + assert content[0].text == GrepObservation.ERROR_MESSAGE_HEADER + text = content[1].text assert "Invalid regex pattern" in text @@ -289,7 +290,7 @@ def test_grep_tool_truncation(): assert tool.executor is not None observation = tool.executor(action) - assert observation.error is None + assert observation.is_error is False assert len(observation.matches) == 100 assert observation.truncated is True