diff --git a/examples/01_standalone_sdk/02_custom_tools.py b/examples/01_standalone_sdk/02_custom_tools.py
index 4c399d467f..cb30ed0443 100644
--- a/examples/01_standalone_sdk/02_custom_tools.py
+++ b/examples/01_standalone_sdk/02_custom_tools.py
@@ -92,8 +92,10 @@ def __call__(self, action: GrepAction, conversation=None) -> GrepObservation:  #
         files: set[str] = set()
 
         # grep returns exit code 1 when no matches; treat as empty
-        if result.output.strip():
-            for line in result.output.strip().splitlines():
+        output_text = result.text
+
+        if output_text.strip():
+            for line in output_text.strip().splitlines():
                 matches.append(line)
                 # Expect "path:line:content" — take the file part before first ":"
                 file_path = line.split(":", 1)[0]
diff --git a/openhands-sdk/openhands/sdk/mcp/definition.py b/openhands-sdk/openhands/sdk/mcp/definition.py
index 8d4d544880..771729e11d 100644
--- a/openhands-sdk/openhands/sdk/mcp/definition.py
+++ b/openhands-sdk/openhands/sdk/mcp/definition.py
@@ -1,7 +1,6 @@
 """MCPTool definition and implementation."""
 
 import json
-from collections.abc import Sequence
 from typing import Any
 
 import mcp.types
@@ -51,14 +50,6 @@ def to_mcp_arguments(self) -> dict:
 class MCPToolObservation(Observation):
     """Observation from MCP tool execution."""
 
-    content: list[TextContent | ImageContent] = Field(
-        default_factory=list,
-        description="Content returned from the MCP tool converted "
-        "to LLM Ready TextContent or ImageContent",
-    )
-    is_error: bool = Field(
-        default=False, description="Whether the call resulted in an error"
-    )
     tool_name: str = Field(description="Name of the tool that was called")
 
     @classmethod
@@ -66,13 +57,16 @@ def from_call_tool_result(
         cls, tool_name: str, result: mcp.types.CallToolResult
     ) -> "MCPToolObservation":
         """Create an MCPToolObservation from a CallToolResult."""
-        content: list[mcp.types.ContentBlock] = result.content
-        convrted_content = []
-        for block in content:
+
+        native_content: list[mcp.types.ContentBlock] = result.content
+        content: list[TextContent | ImageContent] = [
+            TextContent(text=f"[Tool '{tool_name}' executed.]")
+        ]
+        for block in native_content:
             if isinstance(block, mcp.types.TextContent):
-                convrted_content.append(TextContent(text=block.text))
+                content.append(TextContent(text=block.text))
             elif isinstance(block, mcp.types.ImageContent):
-                convrted_content.append(
+                content.append(
                     ImageContent(
                         image_urls=[f"data:{block.mimeType};base64,{block.data}"],
                     )
@@ -81,36 +75,32 @@ def from_call_tool_result(
                 logger.warning(
                     f"Unsupported MCP content block type: {type(block)}. Ignoring."
                 )
+
         return cls(
-            content=convrted_content,
+            content=content,
             is_error=result.isError,
             tool_name=tool_name,
         )
 
-    @property
-    def to_llm_content(self) -> Sequence[TextContent | ImageContent]:
-        """Format the observation for agent display."""
-        initial_message = f"[Tool '{self.tool_name}' executed.]\n"
-        if self.is_error:
-            initial_message += "[An error occurred during execution.]\n"
-        return [TextContent(text=initial_message)] + self.content
-
     @property
     def visualize(self) -> Text:
         """Return Rich Text representation of this observation."""
-        content = Text()
-        content.append(f"[MCP Tool '{self.tool_name}' Observation]\n", style="bold")
+        text = Text()
+
         if self.is_error:
-            content.append("[Error during execution]\n", style="bold red")
+            text.append("❌ ", style="red bold")
+            text.append(self.ERROR_MESSAGE_HEADER, style="bold red")
+
+        text.append(f"[MCP Tool '{self.tool_name}' Observation]\n", style="bold")
         for block in self.content:
             if isinstance(block, TextContent):
                 # try to see if block.text is a JSON
                 try:
                     parsed = json.loads(block.text)
-                    content.append(display_dict(parsed))
+                    text.append(display_dict(parsed))
                     continue
                 except (json.JSONDecodeError, TypeError):
-                    content.append(block.text + "\n")
+                    text.append(block.text + "\n")
             elif isinstance(block, ImageContent):
-                content.append(f"[Image with {len(block.image_urls)} URLs]\n")
-        return content
+                text.append(f"[Image with {len(block.image_urls)} URLs]\n")
+        return text
diff --git a/openhands-sdk/openhands/sdk/mcp/tool.py b/openhands-sdk/openhands/sdk/mcp/tool.py
index 0e28b9eda5..04b3f10b3b 100644
--- a/openhands-sdk/openhands/sdk/mcp/tool.py
+++ b/openhands-sdk/openhands/sdk/mcp/tool.py
@@ -12,7 +12,6 @@
 from litellm import ChatCompletionToolParam
 from pydantic import Field, ValidationError
 
-from openhands.sdk.llm import TextContent
 from openhands.sdk.logger import get_logger
 from openhands.sdk.mcp.client import MCPClient
 from openhands.sdk.mcp.definition import MCPToolAction, MCPToolObservation
@@ -69,8 +68,8 @@ async def call_tool(self, action: MCPToolAction) -> MCPToolObservation:
             except Exception as e:
                 error_msg = f"Error calling MCP tool {self.tool_name}: {str(e)}"
                 logger.error(error_msg, exc_info=True)
-                return MCPToolObservation(
-                    content=[TextContent(text=error_msg)],
+                return MCPToolObservation.from_text(
+                    text=error_msg,
                     is_error=True,
                     tool_name=self.tool_name,
                 )
@@ -154,8 +153,8 @@ def __call__(
             # Surface validation errors as an observation instead of crashing
             error_msg = f"Validation error for MCP tool '{self.name}' args: {e}"
             logger.error(error_msg, exc_info=True)
-            return MCPToolObservation(
-                content=[TextContent(text=error_msg)],
+            return MCPToolObservation.from_text(
+                text=error_msg,
                 is_error=True,
                 tool_name=self.name,
             )
diff --git a/openhands-sdk/openhands/sdk/tool/builtins/finish.py b/openhands-sdk/openhands/sdk/tool/builtins/finish.py
index 3c40f3b586..0bb12f849b 100644
--- a/openhands-sdk/openhands/sdk/tool/builtins/finish.py
+++ b/openhands-sdk/openhands/sdk/tool/builtins/finish.py
@@ -4,7 +4,6 @@
 from pydantic import Field
 from rich.text import Text
 
-from openhands.sdk.llm.message import ImageContent, TextContent
 from openhands.sdk.tool.tool import (
     Action,
     Observation,
@@ -32,16 +31,15 @@ def visualize(self) -> Text:
 
 
 class FinishObservation(Observation):
-    message: str = Field(description="Final message sent to the user.")
-
-    @property
-    def to_llm_content(self) -> Sequence[TextContent | ImageContent]:
-        return [TextContent(text=self.message)]
+    """
+    Observation returned after finishing a task.
+    The FinishAction itself contains the message sent to the user so no
+    extra fields are needed here.
+    """
 
     @property
     def visualize(self) -> Text:
-        """Return Rich Text representation - empty since action shows the message."""
-        # Don't duplicate the finish message display - action already shows it
+        """Return an empty Text representation since the message is in the action."""
         return Text()
 
 
@@ -65,7 +63,7 @@ def __call__(
         action: FinishAction,
         conversation: "BaseConversation | None" = None,  # noqa: ARG002
     ) -> FinishObservation:
-        return FinishObservation(message=action.message)
+        return FinishObservation.from_text(text=action.message)
 
 
 class FinishTool(ToolDefinition[FinishAction, FinishObservation]):
diff --git a/openhands-sdk/openhands/sdk/tool/builtins/think.py b/openhands-sdk/openhands/sdk/tool/builtins/think.py
index b28418795b..ca641ce94d 100644
--- a/openhands-sdk/openhands/sdk/tool/builtins/think.py
+++ b/openhands-sdk/openhands/sdk/tool/builtins/think.py
@@ -4,7 +4,6 @@
 from pydantic import Field
 from rich.text import Text
 
-from openhands.sdk.llm.message import ImageContent, TextContent
 from openhands.sdk.tool.tool import (
     Action,
     Observation,
@@ -46,20 +45,15 @@ def visualize(self) -> Text:
 
 
 class ThinkObservation(Observation):
-    """Observation returned after logging a thought."""
-
-    content: str = Field(
-        default="Your thought has been logged.", description="Confirmation message."
-    )
-
-    @property
-    def to_llm_content(self) -> Sequence[TextContent | ImageContent]:
-        return [TextContent(text=self.content)]
+    """
+    Observation returned after logging a thought.
+    The ThinkAction itself contains the thought logged so no extra
+    fields are needed here.
+    """
 
     @property
     def visualize(self) -> Text:
-        """Return Rich Text representation - empty since action shows the thought."""
-        # Don't duplicate the thought display - action already shows it
+        """Return an empty Text representation since the thought is in the action."""
         return Text()
 
 
@@ -81,7 +75,7 @@ def __call__(
         _: ThinkAction,
         conversation: "BaseConversation | None" = None,  # noqa: ARG002
     ) -> ThinkObservation:
-        return ThinkObservation()
+        return ThinkObservation.from_text(text="Your thought has been logged.")
 
 
 class ThinkTool(ToolDefinition[ThinkAction, ThinkObservation]):
diff --git a/openhands-sdk/openhands/sdk/tool/schema.py b/openhands-sdk/openhands/sdk/tool/schema.py
index 1c041aa659..1102df7005 100644
--- a/openhands-sdk/openhands/sdk/tool/schema.py
+++ b/openhands-sdk/openhands/sdk/tool/schema.py
@@ -1,6 +1,6 @@
-from abc import ABC, abstractmethod
+from abc import ABC
 from collections.abc import Sequence
-from typing import Any, ClassVar, TypeVar
+from typing import TYPE_CHECKING, Any, ClassVar, TypeVar
 
 from pydantic import ConfigDict, Field, create_model
 from rich.text import Text
@@ -13,6 +13,9 @@
 from openhands.sdk.utils.visualize import display_dict
 
 
+if TYPE_CHECKING:
+    from typing import Self
+
 S = TypeVar("S", bound="Schema")
 
 
@@ -190,23 +193,84 @@ def visualize(self) -> Text:
 class Observation(Schema, ABC):
     """Base schema for output observation."""
 
+    ERROR_MESSAGE_HEADER: ClassVar[str] = "[An error occurred during execution.]\n"
+
+    content: list[TextContent | ImageContent] = Field(
+        default_factory=list,
+        description=(
+            "Content returned from the tool as a list of "
+            "TextContent/ImageContent objects. "
+            "When there is an error, it should be written in this field."
+        ),
+    )
+    is_error: bool = Field(
+        default=False, description="Whether the observation indicates an error"
+    )
+
+    @classmethod
+    def from_text(
+        cls,
+        text: str,
+        is_error: bool = False,
+        **kwargs: Any,
+    ) -> "Self":
+        """Utility to create an Observation from a simple text string.
+
+        Args:
+            text: The text content to include in the observation.
+            is_error: Whether this observation represents an error.
+            **kwargs: Additional fields for the observation subclass.
+
+        Returns:
+            An Observation instance with the text wrapped in a TextContent.
+        """
+        return cls(content=[TextContent(text=text)], is_error=is_error, **kwargs)
+
+    @property
+    def text(self) -> str:
+        """Extract all text content from the observation.
+
+        Returns:
+            Concatenated text from all TextContent items in content.
+        """
+        return "".join(
+            item.text for item in self.content if isinstance(item, TextContent)
+        )
+
     @property
-    @abstractmethod
     def to_llm_content(self) -> Sequence[TextContent | ImageContent]:
-        """Get the observation string to show to the agent."""
+        """
+        Default content formatting for converting observation to LLM readable content.
+        Subclasses can override to provide richer content (e.g., images, diffs).
+        """
+        llm_content: list[TextContent | ImageContent] = []
+
+        # If is_error is true, prepend error message
+        if self.is_error:
+            llm_content.append(TextContent(text=self.ERROR_MESSAGE_HEADER))
+
+        # Add content (now always a list)
+        llm_content.extend(self.content)
+
+        return llm_content
 
     @property
     def visualize(self) -> Text:
-        """Return Rich Text representation of this action.
+        """Return Rich Text representation of this observation.
 
-        This method can be overridden by subclasses to customize visualization.
-        The base implementation displays all action fields systematically.
+        Subclasses can override for custom visualization; by default we show the
+        same text that would be sent to the LLM.
         """
-        content = Text()
+        text = Text()
+
+        if self.is_error:
+            text.append("❌ ", style="red bold")
+            text.append(self.ERROR_MESSAGE_HEADER, style="bold red")
+
         text_parts = content_to_str(self.to_llm_content)
         if text_parts:
             full_content = "".join(text_parts)
-            content.append(full_content)
+            text.append(full_content)
         else:
-            content.append("[no text content]")
-        return content
+            text.append("[no text content]")
+        return text
diff --git a/openhands-tools/openhands/tools/browser_use/definition.py b/openhands-tools/openhands/tools/browser_use/definition.py
index a9a66cfbb7..6a02026b2b 100644
--- a/openhands-tools/openhands/tools/browser_use/definition.py
+++ b/openhands-tools/openhands/tools/browser_use/definition.py
@@ -28,20 +28,24 @@
 class BrowserObservation(Observation):
     """Base observation for browser operations."""
 
-    output: str = Field(description="The output message from the browser operation")
-    error: str | None = Field(default=None, description="Error message if any")
     screenshot_data: str | None = Field(
         default=None, description="Base64 screenshot data if available"
     )
 
     @property
     def to_llm_content(self) -> Sequence[TextContent | ImageContent]:
-        if self.error:
-            return [TextContent(text=f"Error: {self.error}")]
+        llm_content: list[TextContent | ImageContent] = []
 
-        content: list[TextContent | ImageContent] = [
-            TextContent(text=maybe_truncate(self.output, MAX_BROWSER_OUTPUT_SIZE))
-        ]
+        # If is_error is true, prepend error message
+        if self.is_error:
+            llm_content.append(TextContent(text=self.ERROR_MESSAGE_HEADER))
+
+        # Get text content and truncate if needed
+        content_text = self.text
+        if content_text:
+            llm_content.append(
+                TextContent(text=maybe_truncate(content_text, MAX_BROWSER_OUTPUT_SIZE))
+            )
 
         if self.screenshot_data:
             mime_type = "image/png"
@@ -55,9 +59,9 @@ def to_llm_content(self) -> Sequence[TextContent | ImageContent]:
                 mime_type = "image/webp"
             # Convert base64 to data URL format for ImageContent
             data_url = f"data:{mime_type};base64,{self.screenshot_data}"
-            content.append(ImageContent(image_urls=[data_url]))
+            llm_content.append(ImageContent(image_urls=[data_url]))
 
-        return content
+        return llm_content
 
 
 # ============================================
diff --git a/openhands-tools/openhands/tools/browser_use/impl.py b/openhands-tools/openhands/tools/browser_use/impl.py
index cbe6aebf1b..19bdce7dc8 100644
--- a/openhands-tools/openhands/tools/browser_use/impl.py
+++ b/openhands-tools/openhands/tools/browser_use/impl.py
@@ -225,13 +225,13 @@ async def _execute_action(self, action):
                 result = await self.close_tab(action.tab_id)
             else:
                 error_msg = f"Unsupported action type: {type(action)}"
-                return BrowserObservation(output="", error=error_msg)
+                return BrowserObservation.from_text(text=error_msg, is_error=True)
 
-            return BrowserObservation(output=result)
+            return BrowserObservation.from_text(text=result)
         except Exception as e:
             error_msg = f"Browser operation failed: {str(e)}"
             logger.error(error_msg, exc_info=True)
-            return BrowserObservation(output="", error=error_msg)
+            return BrowserObservation.from_text(text=error_msg, is_error=True)
 
     async def _ensure_initialized(self):
         """Ensure browser session is initialized."""
@@ -281,14 +281,15 @@ async def get_state(self, include_screenshot: bool = False):
 
                 # Return clean JSON + separate screenshot data
                 clean_json = json.dumps(result_data, indent=2)
-                return BrowserObservation(
-                    output=clean_json, screenshot_data=screenshot_data
+                return BrowserObservation.from_text(
+                    text=clean_json,
+                    screenshot_data=screenshot_data,
                 )
             except json.JSONDecodeError:
                 # If JSON parsing fails, return as-is
                 pass
 
-        return BrowserObservation(output=result_json)
+        return BrowserObservation.from_text(text=result_json)
 
     # Tab Management
     async def list_tabs(self) -> str:
diff --git a/openhands-tools/openhands/tools/delegate/definition.py b/openhands-tools/openhands/tools/delegate/definition.py
index 36835e8c7b..0b5ee4fba0 100644
--- a/openhands-tools/openhands/tools/delegate/definition.py
+++ b/openhands-tools/openhands/tools/delegate/definition.py
@@ -5,7 +5,6 @@
 
 from pydantic import Field
 
-from openhands.sdk.llm.message import ImageContent, TextContent
 from openhands.sdk.tool.tool import (
     Action,
     Observation,
@@ -44,15 +43,7 @@ class DelegateAction(Action):
 class DelegateObservation(Observation):
     """Observation from delegation operations."""
 
-    command: CommandLiteral = Field(
-        description="The command that was executed. Either `spawn` or `delegate`."
-    )
-    message: str = Field(description="Result message from the operation")
-
-    @property
-    def to_llm_content(self) -> Sequence[TextContent | ImageContent]:
-        """Get the observation content to show to the agent."""
-        return [TextContent(text=self.message)]
+    command: CommandLiteral = Field(description="The command that was executed")
 
 
 TOOL_DESCRIPTION = """Delegation tool for spawning sub-agents and delegating tasks to them.
diff --git a/openhands-tools/openhands/tools/delegate/impl.py b/openhands-tools/openhands/tools/delegate/impl.py
index 2b79797c16..da68f9400b 100644
--- a/openhands-tools/openhands/tools/delegate/impl.py
+++ b/openhands-tools/openhands/tools/delegate/impl.py
@@ -58,9 +58,13 @@ def __call__(  # type: ignore[override]
         elif action.command == "delegate":
             return self._delegate_tasks(action)
         else:
-            return DelegateObservation(
+            return DelegateObservation.from_text(
+                text=(
+                    f"Unsupported command: {action.command}. "
+                    "Available commands: spawn, delegate"
+                ),
                 command=action.command,
-                message=f"Unsupported command: {action.command}",
+                is_error=True,
             )
 
     def _spawn_agents(self, action: "DelegateAction") -> DelegateObservation:
@@ -74,19 +78,21 @@ def _spawn_agents(self, action: "DelegateAction") -> DelegateObservation:
             DelegateObservation indicating success/failure and which agents were spawned
         """
         if not action.ids:
-            return DelegateObservation(
-                command="spawn",
-                message="Error: at least one ID is required for spawn action",
+            return DelegateObservation.from_text(
+                text="At least one ID is required for spawn action",
+                command=action.command,
+                is_error=True,
             )
 
         if len(self._sub_agents) + len(action.ids) > self._max_children:
-            return DelegateObservation(
-                command="spawn",
-                message=(
+            return DelegateObservation.from_text(
+                text=(
                     f"Cannot spawn {len(action.ids)} agents. "
                     f"Already have {len(self._sub_agents)} agents, "
                     f"maximum is {self._max_children}"
                 ),
+                command=action.command,
+                is_error=True,
             )
 
         try:
@@ -115,16 +121,17 @@ def _spawn_agents(self, action: "DelegateAction") -> DelegateObservation:
 
             agent_list = ", ".join(action.ids)
             message = f"Successfully spawned {len(action.ids)} sub-agents: {agent_list}"
-            return DelegateObservation(
-                command="spawn",
-                message=message,
+            return DelegateObservation.from_text(
+                text=message,
+                command=action.command,
             )
 
         except Exception as e:
             logger.error(f"Error: failed to spawn agents: {e}", exc_info=True)
-            return DelegateObservation(
-                command="spawn",
-                message=f"Error: failed to spawn agents: {str(e)}",
+            return DelegateObservation.from_text(
+                text=f"failed to spawn agents: {str(e)}",
+                command=action.command,
+                is_error=True,
             )
 
     def _delegate_tasks(self, action: "DelegateAction") -> "DelegateObservation":
@@ -139,20 +146,22 @@ def _delegate_tasks(self, action: "DelegateAction") -> "DelegateObservation":
             DelegateObservation with consolidated results from all sub-agents
         """
         if not action.tasks:
-            return DelegateObservation(
-                command="delegate",
-                message="Error: at least one task is required for delegate action",
+            return DelegateObservation.from_text(
+                text="at least one task is required for delegate action",
+                command=action.command,
+                is_error=True,
             )
 
         # Check that all requested agent IDs exist
         missing_agents = set(action.tasks.keys()) - set(self._sub_agents.keys())
         if missing_agents:
-            return DelegateObservation(
-                command="delegate",
-                message=(
-                    f"Error: sub-agents not found: {', '.join(missing_agents)}. "
+            return DelegateObservation.from_text(
+                text=(
+                    f"sub-agents not found: {', '.join(missing_agents)}. "
                     f"Available agents: {', '.join(self._sub_agents.keys())}"
                 ),
+                command=action.command,
+                is_error=True,
             )
 
         try:
@@ -211,24 +220,25 @@ def run_task(agent_id: str, conversation: LocalConversation, task: str):
                     all_results.append(f"Agent {agent_id}: No result")
 
             # Create comprehensive message with results
-            message = f"Completed delegation of {len(action.tasks)} tasks"
+            output_text = f"Completed delegation of {len(action.tasks)} tasks"
             if errors:
-                message += f" with {len(errors)} errors"
+                output_text += f" with {len(errors)} errors"
 
             if all_results:
                 results_text = "\n".join(
                     f"{i}. {result}" for i, result in enumerate(all_results, 1)
                 )
-                message += f"\n\nResults:\n{results_text}"
+                output_text += f"\n\nResults:\n{results_text}"
 
-            return DelegateObservation(
-                command="delegate",
-                message=message,
+            return DelegateObservation.from_text(
+                text=output_text,
+                command=action.command,
             )
 
         except Exception as e:
             logger.error(f"Failed to delegate tasks: {e}", exc_info=True)
-            return DelegateObservation(
-                command="delegate",
-                message=f"Error: failed to delegate tasks: {str(e)}",
+            return DelegateObservation.from_text(
+                text=f"failed to delegate tasks: {str(e)}",
+                command=action.command,
+                is_error=True,
             )
diff --git a/openhands-tools/openhands/tools/execute_bash/definition.py b/openhands-tools/openhands/tools/execute_bash/definition.py
index 19721362ab..32c118691b 100644
--- a/openhands-tools/openhands/tools/execute_bash/definition.py
+++ b/openhands-tools/openhands/tools/execute_bash/definition.py
@@ -81,19 +81,13 @@ def visualize(self) -> Text:
 class ExecuteBashObservation(Observation):
     """A ToolResult that can be rendered as a CLI output."""
 
-    output: str = Field(description="The raw output from the tool.")
     command: str | None = Field(
-        default=None,
         description="The bash command that was executed. Can be empty string if the observation is from a previous command that hit soft timeout and is not yet finished.",  # noqa
     )
     exit_code: int | None = Field(
         default=None,
         description="The exit code of the command. -1 indicates the process hit the soft timeout and is not yet finished.",  # noqa
     )
-    error: bool = Field(
-        default=False,
-        description="Whether there was an error during command execution.",
-    )
     timeout: bool = Field(
         default=False, description="Whether the command execution timed out."
     )
@@ -109,31 +103,41 @@ def command_id(self) -> int | None:
 
     @property
     def to_llm_content(self) -> Sequence[TextContent | ImageContent]:
-        ret = f"{self.metadata.prefix}{self.output}{self.metadata.suffix}"
+        llm_content: list[TextContent | ImageContent] = []
+
+        # If is_error is true, prepend error message
+        if self.is_error:
+            llm_content.append(TextContent(text=self.ERROR_MESSAGE_HEADER))
+
+        # ExecuteBashObservation always has content as a single TextContent
+        content_text = self.text
+
+        ret = f"{self.metadata.prefix}{content_text}{self.metadata.suffix}"
         if self.metadata.working_dir:
             ret += f"\n[Current working directory: {self.metadata.working_dir}]"
         if self.metadata.py_interpreter_path:
             ret += f"\n[Python interpreter: {self.metadata.py_interpreter_path}]"
         if self.metadata.exit_code != -1:
             ret += f"\n[Command finished with exit code {self.metadata.exit_code}]"
-        if self.error:
-            ret = f"[There was an error during command execution.]\n{ret}"
-        return [TextContent(text=maybe_truncate(ret, MAX_CMD_OUTPUT_SIZE))]
+        llm_content.append(TextContent(text=maybe_truncate(ret, MAX_CMD_OUTPUT_SIZE)))
+
+        return llm_content
 
     @property
     def visualize(self) -> Text:
         """Return Rich Text representation with terminal-style output formatting."""
-        content = Text()
+        text = Text()
 
-        # Add error indicator if present
-        if self.error:
-            content.append("❌ ", style="red bold")
-            content.append("Command execution error\n", style="red")
+        if self.is_error:
+            text.append("❌ ", style="red bold")
+            text.append(self.ERROR_MESSAGE_HEADER, style="bold red")
 
-        # Add command output with proper styling
-        if self.output:
+        # ExecuteBashObservation always has content as a single TextContent
+        content_text = self.text
+
+        if content_text:
             # Style the output based on content
-            output_lines = self.output.split("\n")
+            output_lines = content_text.split("\n")
             for line in output_lines:
                 if line.strip():
                     # Color error-like lines differently
@@ -141,28 +145,28 @@ def visualize(self) -> Text:
                         keyword in line.lower()
                         for keyword in ["error", "failed", "exception", "traceback"]
                     ):
-                        content.append(line, style="red")
+                        text.append(line, style="red")
                     elif any(
                         keyword in line.lower() for keyword in ["warning", "warn"]
                     ):
-                        content.append(line, style="yellow")
+                        text.append(line, style="yellow")
                     elif line.startswith("+ "):  # bash -x output
-                        content.append(line, style="cyan")
+                        text.append(line, style="cyan")
                     else:
-                        content.append(line, style="white")
-                content.append("\n")
+                        text.append(line, style="white")
+                text.append("\n")
 
         # Add metadata with styling
         if hasattr(self, "metadata") and self.metadata:
             if self.metadata.working_dir:
-                content.append("\n📁 ", style="blue")
-                content.append(
+                text.append("\n📁 ", style="blue")
+                text.append(
                     f"Working directory: {self.metadata.working_dir}", style="blue"
                 )
 
             if self.metadata.py_interpreter_path:
-                content.append("\n🐍 ", style="green")
-                content.append(
+                text.append("\n🐍 ", style="green")
+                text.append(
                     f"Python interpreter: {self.metadata.py_interpreter_path}",
                     style="green",
                 )
@@ -172,20 +176,16 @@ def visualize(self) -> Text:
                 and self.metadata.exit_code is not None
             ):
                 if self.metadata.exit_code == 0:
-                    content.append("\n✅ ", style="green")
-                    content.append(
-                        f"Exit code: {self.metadata.exit_code}", style="green"
-                    )
+                    text.append("\n✅ ", style="green")
+                    text.append(f"Exit code: {self.metadata.exit_code}", style="green")
                 elif self.metadata.exit_code == -1:
-                    content.append("\n⏳ ", style="yellow")
-                    content.append(
-                        "Process still running (soft timeout)", style="yellow"
-                    )
+                    text.append("\n⏳ ", style="yellow")
+                    text.append("Process still running (soft timeout)", style="yellow")
                 else:
-                    content.append("\n❌ ", style="red")
-                    content.append(f"Exit code: {self.metadata.exit_code}", style="red")
+                    text.append("\n❌ ", style="red")
+                    text.append(f"Exit code: {self.metadata.exit_code}", style="red")
 
-        return content
+        return text
 
 
 TOOL_DESCRIPTION = """Execute a bash command in the terminal within a persistent shell session.
diff --git a/openhands-tools/openhands/tools/execute_bash/impl.py b/openhands-tools/openhands/tools/execute_bash/impl.py
index 911e544ec5..79c39d45f8 100644
--- a/openhands-tools/openhands/tools/execute_bash/impl.py
+++ b/openhands-tools/openhands/tools/execute_bash/impl.py
@@ -1,6 +1,7 @@
 import json
 from typing import TYPE_CHECKING, Literal
 
+from openhands.sdk.llm import TextContent
 from openhands.sdk.logger import get_logger
 from openhands.sdk.tool import ToolExecutor
 
@@ -111,8 +112,8 @@ def reset(self) -> ExecuteBashObservation:
             f"Terminal session reset successfully with working_dir: {original_work_dir}"
         )
 
-        return ExecuteBashObservation(
-            output=(
+        return ExecuteBashObservation.from_text(
+            text=(
                 "Terminal session has been reset. All previous environment "
                 "variables and session state have been cleared."
             ),
@@ -141,11 +142,16 @@ def __call__(
                 )
                 self._export_envs(command_action, conversation)
                 command_result = self.session.execute(command_action)
+
+                # Extract text from content
+                reset_text = reset_result.text
+                command_text = command_result.text
+
                 observation = command_result.model_copy(
                     update={
-                        "output": (
-                            reset_result.output + "\n\n" + command_result.output
-                        ),
+                        "content": [
+                            TextContent(text=f"{reset_text}\n\n{command_text}")
+                        ],
                         "command": f"[RESET] {action.command}",
                     }
                 )
@@ -158,15 +164,15 @@ def __call__(
             observation = self.session.execute(action)
 
         # Apply automatic secrets masking
-        if observation.output and conversation is not None:
+        content_text = observation.text
+
+        if content_text and conversation is not None:
             try:
                 secret_registry = conversation.state.secret_registry
-                masked_output = secret_registry.mask_secrets_in_output(
-                    observation.output
-                )
-                if masked_output:
-                    data = observation.model_dump(exclude={"output"})
-                    return ExecuteBashObservation(**data, output=masked_output)
+                masked_content = secret_registry.mask_secrets_in_output(content_text)
+                if masked_content:
+                    data = observation.model_dump(exclude={"content"})
+                    return ExecuteBashObservation.from_text(text=masked_content, **data)
             except Exception:
                 pass
 
diff --git a/openhands-tools/openhands/tools/execute_bash/terminal/terminal_session.py b/openhands-tools/openhands/tools/execute_bash/terminal/terminal_session.py
index 0d46638650..35042d35ef 100644
--- a/openhands-tools/openhands/tools/execute_bash/terminal/terminal_session.py
+++ b/openhands-tools/openhands/tools/execute_bash/terminal/terminal_session.py
@@ -186,9 +186,9 @@ def _handle_completed_command(
         self.prev_status = TerminalCommandStatus.COMPLETED
         self.prev_output = ""  # Reset previous command output
         self._ready_for_next_command()
-        return ExecuteBashObservation(
-            output=command_output,
+        return ExecuteBashObservation.from_text(
             command=command,
+            text=command_output,
             metadata=metadata,
         )
 
@@ -220,9 +220,9 @@ def _handle_nochange_timeout_command(
             metadata,
             continue_prefix="[Below is the output of the previous command.]\n",
         )
-        return ExecuteBashObservation(
-            output=command_output,
+        return ExecuteBashObservation.from_text(
             command=command,
+            text=command_output,
             metadata=metadata,
         )
 
@@ -255,10 +255,9 @@ def _handle_hard_timeout_command(
             metadata,
             continue_prefix="[Below is the output of the previous command.]\n",
         )
-
-        return ExecuteBashObservation(
-            output=command_output,
+        return ExecuteBashObservation.from_text(
             command=command,
+            text=command_output,
             metadata=metadata,
         )
 
@@ -313,27 +312,32 @@ def execute(self, action: ExecuteBashAction) -> ExecuteBashObservation:
             TerminalCommandStatus.HARD_TIMEOUT,
         }:
             if command == "":
-                return ExecuteBashObservation(
-                    output="ERROR: No previous running command to retrieve logs from.",
-                    error=True,
+                return ExecuteBashObservation.from_text(
+                    text="No previous running command to retrieve logs from.",
+                    command=command,
+                    is_error=True,
                 )
             if is_input:
-                return ExecuteBashObservation(
-                    output="ERROR: No previous running command to interact with.",
-                    error=True,
+                return ExecuteBashObservation.from_text(
+                    text="No previous running command to interact with.",
+                    command=command,
+                    is_error=True,
                 )
 
         # Check if the command is a single command or multiple commands
         splited_commands = split_bash_commands(command)
         if len(splited_commands) > 1:
-            return ExecuteBashObservation(
-                output=(
-                    f"ERROR: Cannot execute multiple commands at once.\n"
-                    f"Please run each command separately OR chain them into a single "
-                    f"command via && or ;\nProvided commands:\n"
-                    f"{'\n'.join(f'({i + 1}) {cmd}' for i, cmd in enumerate(splited_commands))}"  # noqa: E501
+            commands_list = "\n".join(
+                f"({i + 1}) {cmd}" for i, cmd in enumerate(splited_commands)
+            )
+            return ExecuteBashObservation.from_text(
+                text=(
+                    "Cannot execute multiple commands at once.\n"
+                    "Please run each command separately OR chain them into a single "
+                    f"command via && or ;\nProvided commands:\n{commands_list}"
                 ),
-                error=True,
+                command=command,
+                is_error=True,
             )
 
         # Get initial state before sending command
@@ -384,10 +388,11 @@ def execute(self, action: ExecuteBashAction) -> ExecuteBashObservation:
                 metadata,
                 continue_prefix="[Below is the output of the previous command.]\n",
             )
-            obs = ExecuteBashObservation(
-                output=command_output,
+            obs = ExecuteBashObservation.from_text(
                 command=command,
+                text=command_output,
                 metadata=metadata,
+                is_error=True,
             )
             logger.debug(f"RETURNING OBSERVATION (previous-command): {obs}")
             return obs
diff --git a/openhands-tools/openhands/tools/file_editor/definition.py b/openhands-tools/openhands/tools/file_editor/definition.py
index d2041687d3..ba6fdd7656 100644
--- a/openhands-tools/openhands/tools/file_editor/definition.py
+++ b/openhands-tools/openhands/tools/file_editor/definition.py
@@ -11,7 +11,6 @@
 
 from rich.text import Text
 
-from openhands.sdk.llm import ImageContent, TextContent
 from openhands.sdk.tool import (
     Action,
     Observation,
@@ -69,12 +68,12 @@ class FileEditorObservation(Observation):
     """A ToolResult that can be rendered as a CLI output."""
 
     command: CommandLiteral = Field(
-        description="The commands to run. Allowed options are: `view`, `create`, "
-        "`str_replace`, `insert`, `undo_edit`."
-    )
-    output: str = Field(
-        default="", description="The output message from the tool for the LLM to see."
+        description=(
+            "The command that was run: `view`, `create`, `str_replace`, "
+            "`insert`, or `undo_edit`."
+        )
     )
+
     path: str | None = Field(default=None, description="The file path that was edited.")
     prev_exist: bool = Field(
         default=True,
@@ -86,16 +85,9 @@ class FileEditorObservation(Observation):
     new_content: str | None = Field(
         default=None, description="The content of the file after the edit."
     )
-    error: str | None = Field(default=None, description="Error message if any.")
 
     _diff_cache: Text | None = PrivateAttr(default=None)
 
-    @property
-    def to_llm_content(self) -> Sequence[TextContent | ImageContent]:
-        if self.error:
-            return [TextContent(text=self.error)]
-        return [TextContent(text=self.output)]
-
     @property
     def visualize(self) -> Text:
         """Return Rich Text representation of this observation.
@@ -103,6 +95,11 @@ def visualize(self) -> Text:
         Shows diff visualization for meaningful changes (file creation, successful
         edits), otherwise falls back to agent observation.
         """
+        text = Text()
+
+        if self.is_error:
+            text.append("❌ ", style="red bold")
+            text.append(self.ERROR_MESSAGE_HEADER, style="bold red")
 
         if not self._has_meaningful_diff:
             return super().visualize
@@ -110,7 +107,7 @@ def visualize(self) -> Text:
         assert self.path is not None, "path should be set for meaningful diff"
         # Generate and cache diff visualization
         if not self._diff_cache:
-            change_applied = self.command != "view" and not self.error
+            change_applied = self.command != "view" and not self.is_error
             self._diff_cache = visualize_diff(
                 self.path,
                 self.old_content,
@@ -119,12 +116,14 @@ def visualize(self) -> Text:
                 change_applied=change_applied,
             )
 
-        return self._diff_cache
+        # Combine error prefix with diff visualization
+        text.append(self._diff_cache)
+        return text
 
     @property
     def _has_meaningful_diff(self) -> bool:
         """Check if there's a meaningful diff to display."""
-        if self.error:
+        if self.is_error:
             return False
 
         if not self.path:
diff --git a/openhands-tools/openhands/tools/file_editor/editor.py b/openhands-tools/openhands/tools/file_editor/editor.py
index 509bba6323..99adb7a409 100644
--- a/openhands-tools/openhands/tools/file_editor/editor.py
+++ b/openhands-tools/openhands/tools/file_editor/editor.py
@@ -108,12 +108,12 @@ def __call__(
                 raise EditorToolParameterMissingError(command, "file_text")
             self.write_file(_path, file_text)
             self._history_manager.add_history(_path, file_text)
-            return FileEditorObservation(
+            return FileEditorObservation.from_text(
+                text=f"File created successfully at: {_path}",
                 command=command,
                 path=str(_path),
                 new_content=file_text,
                 prev_exist=False,
-                output=f"File created successfully at: {_path}",
             )
         elif command == "str_replace":
             if old_str is None:
@@ -253,9 +253,9 @@ def str_replace(
             "Review the changes and make sure they are as expected. Edit the "
             "file again if necessary."
         )
-        return FileEditorObservation(
+        return FileEditorObservation.from_text(
+            text=success_message,
             command="str_replace",
-            output=success_message,
             prev_exist=True,
             path=str(path),
             old_content=file_content,
@@ -293,30 +293,36 @@ def view(
                 rf"-path '{path}/*/\.*' \) | sort",
                 truncate_notice=DIRECTORY_CONTENT_TRUNCATED_NOTICE,
             )
-            if not stderr:
-                # Add trailing slashes to directories
-                paths = stdout.strip().split("\n") if stdout.strip() else []
-                formatted_paths = []
-                for p in paths:
-                    if Path(p).is_dir():
-                        formatted_paths.append(f"{p}/")
-                    else:
-                        formatted_paths.append(p)
-
-                msg = [
-                    f"Here's the files and directories up to 2 levels deep in {path}, "
-                    "excluding hidden items:\n" + "\n".join(formatted_paths)
-                ]
-                if hidden_count > 0:
-                    msg.append(
-                        f"\n{hidden_count} hidden files/directories in this directory "
-                        f"are excluded. You can use 'ls -la {path}' to see them."
-                    )
-                stdout = "\n".join(msg)
-            return FileEditorObservation(
+            if stderr:
+                return FileEditorObservation.from_text(
+                    text=stderr,
+                    command="view",
+                    is_error=True,
+                    path=str(path),
+                    prev_exist=True,
+                )
+            # Add trailing slashes to directories
+            paths = stdout.strip().split("\n") if stdout.strip() else []
+            formatted_paths = []
+            for p in paths:
+                if Path(p).is_dir():
+                    formatted_paths.append(f"{p}/")
+                else:
+                    formatted_paths.append(p)
+
+            msg = [
+                f"Here's the files and directories up to 2 levels deep in {path}, "
+                "excluding hidden items:\n" + "\n".join(formatted_paths)
+            ]
+            if hidden_count > 0:
+                msg.append(
+                    f"\n{hidden_count} hidden files/directories in this directory "
+                    f"are excluded. You can use 'ls -la {path}' to see them."
+                )
+            stdout = "\n".join(msg)
+            return FileEditorObservation.from_text(
+                text=stdout,
                 command="view",
-                output=stdout,
-                error=stderr,
                 path=str(path),
                 prev_exist=True,
             )
@@ -330,9 +336,9 @@ def view(
             file_content = self.read_file(path)
             output = self._make_output(file_content, str(path), start_line)
 
-            return FileEditorObservation(
+            return FileEditorObservation.from_text(
+                text=output,
                 command="view",
-                output=output,
                 path=str(path),
                 prev_exist=True,
             )
@@ -383,10 +389,10 @@ def view(
         if warning_message:
             output = f"NOTE: {warning_message}\n{output}"
 
-        return FileEditorObservation(
+        return FileEditorObservation.from_text(
+            text=output,
             command="view",
             path=str(path),
-            output=output,
             prev_exist=True,
         )
 
@@ -496,9 +502,9 @@ def insert(
             "Review the changes and make sure they are as expected (correct "
             "indentation, no duplicate lines, etc). Edit the file again if necessary."
         )
-        return FileEditorObservation(
+        return FileEditorObservation.from_text(
+            text=success_message,
             command="insert",
-            output=success_message,
             prev_exist=True,
             path=str(path),
             old_content=file_text,
@@ -565,12 +571,12 @@ def undo_edit(self, path: Path) -> FileEditorObservation:
 
         self.write_file(path, old_text)
 
-        return FileEditorObservation(
-            command="undo_edit",
-            output=(
+        return FileEditorObservation.from_text(
+            text=(
                 f"Last edit to {path} undone successfully. "
                 f"{self._make_output(old_text, str(path))}"
             ),
+            command="undo_edit",
             path=str(path),
             prev_exist=True,
             old_content=current_text,
diff --git a/openhands-tools/openhands/tools/file_editor/impl.py b/openhands-tools/openhands/tools/file_editor/impl.py
index afc1f7906d..a653a6a24a 100644
--- a/openhands-tools/openhands/tools/file_editor/impl.py
+++ b/openhands-tools/openhands/tools/file_editor/impl.py
@@ -43,12 +43,15 @@ def __call__(
         if self.allowed_edits_files is not None and action.command != "view":
             action_path = Path(action.path).resolve()
             if action_path not in self.allowed_edits_files:
-                return FileEditorObservation(
+                return FileEditorObservation.from_text(
+                    text=(
+                        f"Operation '{action.command}' is not allowed "
+                        f"on file '{action_path}'. "
+                        f"Only the following files can be edited: "
+                        f"{sorted(str(p) for p in self.allowed_edits_files)}"
+                    ),
                     command=action.command,
-                    error=f"Operation '{action.command}' is not allowed "
-                    f"on file '{action_path}'. "
-                    f"Only the following files can be edited: "
-                    f"{sorted(str(p) for p in self.allowed_edits_files)}",
+                    is_error=True,
                 )
 
         result: FileEditorObservation | None = None
@@ -63,7 +66,9 @@ def __call__(
                 insert_line=action.insert_line,
             )
         except ToolError as e:
-            result = FileEditorObservation(command=action.command, error=e.message)
+            result = FileEditorObservation.from_text(
+                text=e.message, command=action.command, is_error=True
+            )
         assert result is not None, "file_editor should always return a result"
         return result
 
@@ -95,6 +100,8 @@ def file_editor(
             insert_line=insert_line,
         )
     except ToolError as e:
-        result = FileEditorObservation(command=command, error=e.message)
+        result = FileEditorObservation.from_text(
+            text=e.message, command=command, is_error=True
+        )
     assert result is not None, "file_editor should always return a result"
     return result
diff --git a/openhands-tools/openhands/tools/glob/definition.py b/openhands-tools/openhands/tools/glob/definition.py
index 4e76ca9e20..a0c64b5005 100644
--- a/openhands-tools/openhands/tools/glob/definition.py
+++ b/openhands-tools/openhands/tools/glob/definition.py
@@ -6,11 +6,6 @@
 
 from pydantic import Field
 
-
-if TYPE_CHECKING:
-    from openhands.sdk.conversation.state import ConversationState
-
-from openhands.sdk.llm import ImageContent, TextContent
 from openhands.sdk.tool import (
     Action,
     Observation,
@@ -20,6 +15,10 @@
 )
 
 
+if TYPE_CHECKING:
+    from openhands.sdk.conversation.state import ConversationState
+
+
 class GlobAction(Action):
     """Schema for glob pattern matching operations."""
 
@@ -46,32 +45,6 @@ class GlobObservation(Observation):
     truncated: bool = Field(
         default=False, description="Whether results were truncated to 100 files"
     )
-    error: str | None = Field(default=None, description="Error message if any")
-
-    @property
-    def to_llm_content(self) -> Sequence[TextContent | ImageContent]:
-        """Convert observation to LLM content."""
-        if self.error:
-            return [TextContent(text=f"Error: {self.error}")]
-
-        if not self.files:
-            content = (
-                f"No files found matching pattern '{self.pattern}' "
-                f"in directory '{self.search_path}'"
-            )
-        else:
-            file_list = "\n".join(self.files)
-            content = (
-                f"Found {len(self.files)} file(s) matching pattern "
-                f"'{self.pattern}' in '{self.search_path}':\n{file_list}"
-            )
-            if self.truncated:
-                content += (
-                    "\n\n[Results truncated to first 100 files. "
-                    "Consider using a more specific pattern.]"
-                )
-
-        return [TextContent(text=content)]
 
 
 TOOL_DESCRIPTION = """Fast file pattern matching tool.
diff --git a/openhands-tools/openhands/tools/glob/impl.py b/openhands-tools/openhands/tools/glob/impl.py
index 1b566d5d4c..45261cb970 100644
--- a/openhands-tools/openhands/tools/glob/impl.py
+++ b/openhands-tools/openhands/tools/glob/impl.py
@@ -67,11 +67,12 @@ def __call__(
                 )
 
             if not search_path.is_dir():
-                return GlobObservation(
+                return GlobObservation.from_text(
+                    text=f"Search path '{search_path}' is not a valid directory",
                     files=[],
                     pattern=original_pattern,
                     search_path=str(search_path),
-                    error=f"Search path '{search_path}' is not a valid directory",
+                    is_error=True,
                 )
 
             if self._ripgrep_available:
@@ -79,7 +80,26 @@ def __call__(
             else:
                 files, truncated = self._execute_with_glob(pattern, search_path)
 
-            return GlobObservation(
+            # Format content message
+            if not files:
+                content = (
+                    f"No files found matching pattern '{original_pattern}' "
+                    f"in directory '{search_path}'"
+                )
+            else:
+                file_list = "\n".join(files)
+                content = (
+                    f"Found {len(files)} file(s) matching pattern "
+                    f"'{original_pattern}' in '{search_path}':\n{file_list}"
+                )
+                if truncated:
+                    content += (
+                        "\n\n[Results truncated to first 100 files. "
+                        "Consider using a more specific pattern.]"
+                    )
+
+            return GlobObservation.from_text(
+                text=content,
                 files=files,
                 pattern=original_pattern,
                 search_path=str(search_path),
@@ -96,11 +116,12 @@ def __call__(
             except Exception:
                 error_search_path = "unknown"
 
-            return GlobObservation(
+            return GlobObservation.from_text(
+                text=str(e),
                 files=[],
                 pattern=action.pattern,
                 search_path=error_search_path,
-                error=str(e),
+                is_error=True,
             )
 
     def _execute_with_ripgrep(
diff --git a/openhands-tools/openhands/tools/grep/definition.py b/openhands-tools/openhands/tools/grep/definition.py
index 4f378424e9..c603208fe0 100644
--- a/openhands-tools/openhands/tools/grep/definition.py
+++ b/openhands-tools/openhands/tools/grep/definition.py
@@ -6,11 +6,6 @@
 
 from pydantic import Field
 
-
-if TYPE_CHECKING:
-    from openhands.sdk.conversation.state import ConversationState
-
-from openhands.sdk.llm import ImageContent, TextContent
 from openhands.sdk.tool import (
     Action,
     Observation,
@@ -20,6 +15,10 @@
 )
 
 
+if TYPE_CHECKING:
+    from openhands.sdk.conversation.state import ConversationState
+
+
 class GrepAction(Action):
     """Schema for grep content search operations."""
 
@@ -52,43 +51,6 @@ class GrepObservation(Observation):
     truncated: bool = Field(
         default=False, description="Whether results were truncated to 100 files"
     )
-    error: str | None = Field(default=None, description="Error message if any")
-
-    @property
-    def to_llm_content(self) -> Sequence[TextContent | ImageContent]:
-        """Convert observation to LLM content."""
-        if self.error:
-            return [TextContent(text=f"Error: {self.error}")]
-
-        if not self.matches:
-            include_info = (
-                f" (filtered by '{self.include_pattern}')"
-                if self.include_pattern
-                else ""
-            )
-            content = (
-                f"No files found containing pattern '{self.pattern}' "
-                f"in directory '{self.search_path}'{include_info}"
-            )
-        else:
-            include_info = (
-                f" (filtered by '{self.include_pattern}')"
-                if self.include_pattern
-                else ""
-            )
-            file_list = "\n".join(self.matches)
-            content = (
-                f"Found {len(self.matches)} file(s) containing pattern "
-                f"'{self.pattern}' in '{self.search_path}'{include_info}:\n"
-                f"{file_list}"
-            )
-            if self.truncated:
-                content += (
-                    "\n\n[Results truncated to first 100 files. "
-                    "Consider using a more specific pattern.]"
-                )
-
-        return [TextContent(text=content)]
 
 
 TOOL_DESCRIPTION = """Fast content search tool.
diff --git a/openhands-tools/openhands/tools/grep/impl.py b/openhands-tools/openhands/tools/grep/impl.py
index ef99434221..e08baf1ee4 100644
--- a/openhands-tools/openhands/tools/grep/impl.py
+++ b/openhands-tools/openhands/tools/grep/impl.py
@@ -55,12 +55,13 @@ def __call__(
             if action.path:
                 search_path = Path(action.path).resolve()
                 if not search_path.is_dir():
-                    return GrepObservation(
+                    return GrepObservation.from_text(
+                        text=f"Search path '{action.path}' is not a valid directory",
                         matches=[],
                         pattern=action.pattern,
                         search_path=str(search_path),
                         include_pattern=action.include,
-                        error=f"Search path '{action.path}' is not a valid directory",
+                        is_error=True,
                     )
             else:
                 search_path = self.working_dir
@@ -69,12 +70,13 @@ def __call__(
             try:
                 re.compile(action.pattern)
             except re.error as e:
-                return GrepObservation(
+                return GrepObservation.from_text(
+                    text=f"Invalid regex pattern: {e}",
                     matches=[],
                     pattern=action.pattern,
                     search_path=str(search_path),
                     include_pattern=action.include,
-                    error=f"Invalid regex pattern: {e}",
+                    is_error=True,
                 )
 
             if self._ripgrep_available:
@@ -92,14 +94,46 @@ def __call__(
             except Exception:
                 error_search_path = "unknown"
 
-            return GrepObservation(
+            return GrepObservation.from_text(
+                text=str(e),
                 matches=[],
                 pattern=action.pattern,
                 search_path=error_search_path,
                 include_pattern=action.include,
-                error=str(e),
+                is_error=True,
             )
 
+    def _format_output(
+        self,
+        matches: list[str],
+        pattern: str,
+        search_path: str,
+        include_pattern: str | None,
+        truncated: bool,
+    ) -> str:
+        """Format the grep observation output message."""
+        if not matches:
+            include_info = (
+                f" (filtered by '{include_pattern}')" if include_pattern else ""
+            )
+            return (
+                f"No files found containing pattern '{pattern}' "
+                f"in directory '{search_path}'{include_info}"
+            )
+
+        include_info = f" (filtered by '{include_pattern}')" if include_pattern else ""
+        file_list = "\n".join(matches)
+        output = (
+            f"Found {len(matches)} file(s) containing pattern "
+            f"'{pattern}' in '{search_path}'{include_info}:\n{file_list}"
+        )
+        if truncated:
+            output += (
+                "\n\n[Results truncated to first 100 files. "
+                "Consider using a more specific pattern.]"
+            )
+        return output
+
     def _execute_with_ripgrep(
         self, action: GrepAction, search_path: Path
     ) -> GrepObservation:
@@ -135,7 +169,16 @@ def _execute_with_ripgrep(
 
         truncated = len(matches) >= 100
 
-        return GrepObservation(
+        output = self._format_output(
+            matches=matches,
+            pattern=action.pattern,
+            search_path=str(search_path),
+            include_pattern=action.include,
+            truncated=truncated,
+        )
+
+        return GrepObservation.from_text(
+            text=output,
             matches=matches,
             pattern=action.pattern,
             search_path=str(search_path),
@@ -189,7 +232,16 @@ def _execute_with_grep(
 
         truncated = len(matches) >= 100
 
-        return GrepObservation(
+        output = self._format_output(
+            matches=matches,
+            pattern=action.pattern,
+            search_path=str(search_path),
+            include_pattern=action.include,
+            truncated=truncated,
+        )
+
+        return GrepObservation.from_text(
+            text=output,
             matches=matches,
             pattern=action.pattern,
             search_path=str(search_path),
diff --git a/openhands-tools/openhands/tools/planning_file_editor/impl.py b/openhands-tools/openhands/tools/planning_file_editor/impl.py
index 1b2f45a43c..20ba166e9d 100644
--- a/openhands-tools/openhands/tools/planning_file_editor/impl.py
+++ b/openhands-tools/openhands/tools/planning_file_editor/impl.py
@@ -60,6 +60,7 @@ def __call__(
         # Convert FileEditorObservation to PlanningFileEditorObservation
         return PlanningFileEditorObservation(
             command=action.command,
-            output=file_editor_obs.output,
-            error=file_editor_obs.error,
+            content=file_editor_obs.content,
+            is_error=file_editor_obs.is_error,
+            path=file_editor_obs.path,
         )
diff --git a/openhands-tools/openhands/tools/task_tracker/definition.py b/openhands-tools/openhands/tools/task_tracker/definition.py
index 350f276fa2..3829fd9924 100644
--- a/openhands-tools/openhands/tools/task_tracker/definition.py
+++ b/openhands-tools/openhands/tools/task_tracker/definition.py
@@ -12,7 +12,6 @@
 
 from rich.text import Text
 
-from openhands.sdk import ImageContent, TextContent
 from openhands.sdk.logger import get_logger
 from openhands.sdk.tool import (
     Action,
@@ -72,22 +71,21 @@ def visualize(self) -> Text:
 class TaskTrackerObservation(Observation):
     """This data class represents the result of a task tracking operation."""
 
-    content: str = Field(
-        default="", description="The formatted task list or status message"
+    command: Literal["view", "plan"] = Field(
+        description='The command that was executed: "view" or "plan".'
     )
-    command: str = Field(default="", description="The command that was executed")
     task_list: list[TaskItem] = Field(
         default_factory=list, description="The current task list"
     )
 
-    @property
-    def to_llm_content(self) -> Sequence[TextContent | ImageContent]:
-        return [TextContent(text=self.content)]
-
     @property
     def visualize(self) -> Text:
         """Return Rich Text representation with task list formatting."""
-        content = Text()
+        text = Text()
+
+        if self.is_error:
+            text.append("❌ ", style="red bold")
+            text.append(self.ERROR_MESSAGE_HEADER, style="bold red")
 
         if self.task_list:
             # Count tasks by status
@@ -99,11 +97,11 @@ def visualize(self) -> Text:
 
             # Show status summary
             if self.command == "plan":
-                content.append("✅ ", style="green")
-                content.append("Task list updated: ", style="green")
+                text.append("✅ ", style="green")
+                text.append("Task list updated: ", style="green")
             else:  # view command
-                content.append("📋 ", style="blue")
-                content.append("Current task list: ", style="blue")
+                text.append("📋 ", style="blue")
+                text.append("Current task list: ", style="blue")
 
             # Status counts
             status_parts = []
@@ -115,33 +113,33 @@ def visualize(self) -> Text:
                 status_parts.append(f"{done_count} done")
 
             if status_parts:
-                content.append(", ".join(status_parts), style="white")
-                content.append("\n\n")
+                text.append(", ".join(status_parts), style="white")
+                text.append("\n\n")
 
             # Show the actual task list
             for i, task in enumerate(self.task_list, 1):
                 # Status icon
                 if task.status == "done":
-                    content.append("✅ ", style="green")
+                    text.append("✅ ", style="green")
                 elif task.status == "in_progress":
-                    content.append("🔄 ", style="yellow")
+                    text.append("🔄 ", style="yellow")
                 else:  # todo
-                    content.append("⏳ ", style="blue")
+                    text.append("⏳ ", style="blue")
 
                 # Task title
-                content.append(f"{i}. {task.title}", style="white")
+                text.append(f"{i}. {task.title}", style="white")
 
                 # NEW: show notes under the title if present
                 if task.notes:
-                    content.append("\n   Notes: " + task.notes, style="italic")
+                    text.append("\n   Notes: " + task.notes, style="italic")
 
                 if i < len(self.task_list):
-                    content.append("\n")
+                    text.append("\n")
         else:
-            content.append("📝 ", style="blue")
-            content.append("Task list is empty")
+            text.append("📝 ", style="blue")
+            text.append("Task list is empty")
 
-        return content
+        return text
 
 
 class TaskTrackerExecutor(ToolExecutor[TaskTrackerAction, TaskTrackerObservation]):
@@ -176,28 +174,34 @@ def __call__(
             # Save to file if save_dir is provided
             if self.save_dir:
                 self._save_tasks()
-            return TaskTrackerObservation(
-                content="Task list has been updated with "
-                + f"{len(self._task_list)} item(s).",
+            return TaskTrackerObservation.from_text(
+                text=(
+                    f"Task list has been updated with {len(self._task_list)} item(s)."
+                ),
                 command=action.command,
                 task_list=self._task_list,
             )
         elif action.command == "view":
             # Return the current task list
             if not self._task_list:
-                return TaskTrackerObservation(
-                    content='No task list found. Use the "plan" command to create one.',
+                return TaskTrackerObservation.from_text(
+                    text=('No task list found. Use the "plan" command to create one.'),
                     command=action.command,
                     task_list=[],
                 )
             content = self._format_task_list(self._task_list)
-            return TaskTrackerObservation(
-                content=content, command=action.command, task_list=self._task_list
+            return TaskTrackerObservation.from_text(
+                text=content,
+                command=action.command,
+                task_list=self._task_list,
             )
         else:
-            return TaskTrackerObservation(
-                content=f"Unknown command: {action.command}. "
-                + 'Supported commands are "view" and "plan".',
+            return TaskTrackerObservation.from_text(
+                text=(
+                    f"Unknown command: {action.command}. "
+                    'Supported commands are "view" and "plan".'
+                ),
+                is_error=True,
                 command=action.command,
                 task_list=[],
             )
diff --git a/tests/cross/test_agent_secrets_integration.py b/tests/cross/test_agent_secrets_integration.py
index 3d9fdcf3c9..c6c3894b30 100644
--- a/tests/cross/test_agent_secrets_integration.py
+++ b/tests/cross/test_agent_secrets_integration.py
@@ -234,13 +234,13 @@ def get_value(self):
     try:
         action = ExecuteBashAction(command="echo $API_KEY")
         result = bash_executor(action, conversation=conversation)
-        assert "test-api-key" not in result.output
-        assert "<secret-hidden>" in result.output
+        assert "test-api-key" not in result.text
+        assert "<secret-hidden>" in result.text
 
         action = ExecuteBashAction(command="echo $DB_PASSWORD")
         result = bash_executor(action, conversation=conversation)
-        assert "dynamic-secret" not in result.output
-        assert "<secret-hidden>" in result.output
+        assert "dynamic-secret" not in result.text
+        assert "<secret-hidden>" in result.text
 
     finally:
         bash_executor.close()
@@ -265,13 +265,13 @@ def get_value(self):
     try:
         action = ExecuteBashAction(command="echo $DB_PASSWORD")
         result = bash_executor(action, conversation=conversation)
-        assert "changing-secret" not in result.output
-        assert "<secret-hidden>" in result.output
+        assert "changing-secret" not in result.text
+        assert "<secret-hidden>" in result.text
 
         action = ExecuteBashAction(command="echo $DB_PASSWORD")
         result = bash_executor(action, conversation=conversation)
-        assert "changing-secret" not in result.output
-        assert "<secret-hidden>" in result.output
+        assert "changing-secret" not in result.text
+        assert "<secret-hidden>" in result.text
 
     finally:
         bash_executor.close()
@@ -303,13 +303,13 @@ def get_value(self):
         action = ExecuteBashAction(command="echo $DB_PASSWORD")
         result = bash_executor(action, conversation=conversation)
         print(result)
-        assert "changing-secret" not in result.output
-        assert "<secret-hidden>" in result.output
+        assert "changing-secret" not in result.text
+        assert "<secret-hidden>" in result.text
 
         action = ExecuteBashAction(command="echo $DB_PASSWORD")
         result = bash_executor(action, conversation=conversation)
-        assert "changing-secret" not in result.output
-        assert "<secret-hidden>" in result.output
+        assert "changing-secret" not in result.text
+        assert "<secret-hidden>" in result.text
         assert dynamic_secret.raised_on_second
 
     finally:
diff --git a/tests/cross/test_stuck_detector.py b/tests/cross/test_stuck_detector.py
index 8ce47b9c09..f7e75afa79 100644
--- a/tests/cross/test_stuck_detector.py
+++ b/tests/cross/test_stuck_detector.py
@@ -58,8 +58,10 @@ def test_history_too_short():
 
     observation = ObservationEvent(
         source="environment",
-        observation=ExecuteBashObservation(
-            output="file1.txt\nfile2.txt", command="ls", exit_code=0
+        observation=ExecuteBashObservation.from_text(
+            text="file1.txt\nfile2.txt",
+            command="ls",
+            exit_code=0,
         ),
         action_id=action.id,
         tool_name="bash",
@@ -107,8 +109,10 @@ def test_repeating_action_observation_not_stuck_less_than_4_repeats():
 
         observation = ObservationEvent(
             source="environment",
-            observation=ExecuteBashObservation(
-                output="file1.txt\nfile2.txt", command="ls", exit_code=0
+            observation=ExecuteBashObservation.from_text(
+                text="file1.txt\nfile2.txt",
+                command="ls",
+                exit_code=0,
             ),
             action_id=action.id,
             tool_name="bash",
@@ -156,8 +160,10 @@ def test_repeating_action_observation_stuck():
 
         observation = ObservationEvent(
             source="environment",
-            observation=ExecuteBashObservation(
-                output="file1.txt\nfile2.txt", command="ls", exit_code=0
+            observation=ExecuteBashObservation.from_text(
+                text="file1.txt\nfile2.txt",
+                command="ls",
+                exit_code=0,
             ),
             action_id=action.id,
             tool_name="bash",
@@ -297,8 +303,10 @@ def test_not_stuck_with_different_actions():
 
         observation = ObservationEvent(
             source="environment",
-            observation=ExecuteBashObservation(
-                output=f"output from {cmd}", command=cmd, exit_code=0
+            observation=ExecuteBashObservation.from_text(
+                text=f"output from {cmd}",
+                command=cmd,
+                exit_code=0,
             ),
             action_id=action.id,
             tool_name="bash",
@@ -346,8 +354,10 @@ def test_reset_after_user_message():
 
         observation = ObservationEvent(
             source="environment",
-            observation=ExecuteBashObservation(
-                output="file1.txt\nfile2.txt", command="ls", exit_code=0
+            observation=ExecuteBashObservation.from_text(
+                text="file1.txt\nfile2.txt",
+                command="ls",
+                exit_code=0,
             ),
             action_id=action.id,
             tool_name="bash",
@@ -389,8 +399,8 @@ def test_reset_after_user_message():
 
     observation = ObservationEvent(
         source="environment",
-        observation=ExecuteBashObservation(
-            output="/home/user", command="pwd", exit_code=0
+        observation=ExecuteBashObservation.from_text(
+            text="/home/user", command="pwd", exit_code=0
         ),
         action_id=action.id,
         tool_name="bash",
diff --git a/tests/sdk/context/test_view_batch_atomicity.py b/tests/sdk/context/test_view_batch_atomicity.py
index ac37464cef..0060b351ce 100644
--- a/tests/sdk/context/test_view_batch_atomicity.py
+++ b/tests/sdk/context/test_view_batch_atomicity.py
@@ -55,8 +55,8 @@ def create_observation_event(
     tool_call_id: str, content: str = "Success", tool_name: str = "test_tool"
 ) -> ObservationEvent:
     """Helper to create an ObservationEvent."""
-    observation = MCPToolObservation(
-        content=[TextContent(text=content)],
+    observation = MCPToolObservation.from_text(
+        text=content,
         tool_name=tool_name,
     )
     return ObservationEvent(
diff --git a/tests/sdk/conversation/local/test_confirmation_mode.py b/tests/sdk/conversation/local/test_confirmation_mode.py
index bd9ecbe064..0176aee01d 100644
--- a/tests/sdk/conversation/local/test_confirmation_mode.py
+++ b/tests/sdk/conversation/local/test_confirmation_mode.py
@@ -552,7 +552,8 @@ def test_single_finish_action_skips_confirmation_entirely(self):
             e for e in self.conversation.state.events if isinstance(e, ObservationEvent)
         ]
         assert len(obs_events) == 1
-        assert obs_events[0].observation.message == "Task completed successfully!"  # type: ignore[attr-defined]
+        # FinishObservation should contain the finish message in content
+        assert obs_events[0].observation.text == "Task completed successfully!"
 
     def test_think_and_finish_action_skips_confirmation_entirely(self):
         """First step: ThinkAction (skips confirmation). Second step: FinishAction."""
@@ -594,13 +595,13 @@ def test_think_and_finish_action_skips_confirmation_entirely(self):
         ]
         assert len(obs_events) == 2
 
-        # 1) ThinkAction observation
+        # 1) ThinkAction observation - should contain the standard message
         assert hasattr(obs_events[0].observation, "content")
-        assert obs_events[0].observation.content == "Your thought has been logged."  # type: ignore[attr-defined]
+        assert obs_events[0].observation.text == "Your thought has been logged."
 
-        # 2) FinishAction observation
-        assert hasattr(obs_events[1].observation, "message")
-        assert obs_events[1].observation.message == "Analysis complete"  # type: ignore[attr-defined]
+        # 2) FinishAction observation - should contain the finish message
+        assert hasattr(obs_events[1].observation, "content")
+        assert obs_events[1].observation.text == "Analysis complete"
 
     def test_pause_during_confirmation_preserves_waiting_status(self):
         """Test that pausing during WAITING_FOR_CONFIRMATION preserves the status.
diff --git a/tests/sdk/conversation/test_visualizer.py b/tests/sdk/conversation/test_visualizer.py
index fcac0c8a33..040c9c00f9 100644
--- a/tests/sdk/conversation/test_visualizer.py
+++ b/tests/sdk/conversation/test_visualizer.py
@@ -1,7 +1,6 @@
 """Tests for the conversation visualizer and event visualization."""
 
 import json
-from collections.abc import Sequence
 
 from rich.text import Text
 
@@ -19,7 +18,6 @@
     UserRejectObservation,
 )
 from openhands.sdk.llm import (
-    ImageContent,
     Message,
     MessageToolCall,
     TextContent,
@@ -153,14 +151,10 @@ def test_observation_event_visualize():
     from openhands.sdk.tool import Observation
 
     class VisualizerMockObservation(Observation):
-        content: str = "Command output"
-
-        @property
-        def to_llm_content(self) -> Sequence[TextContent | ImageContent]:
-            return [TextContent(text=self.content)]
+        pass
 
     observation = VisualizerMockObservation(
-        content="total 4\ndrwxr-xr-x 2 user user 4096 Jan 1 12:00 ."
+        content=[TextContent(text="total 4\ndrwxr-xr-x 2 user user 4096 Jan 1 12:00 .")]
     )
     event = ObservationEvent(
         observation=observation,
diff --git a/tests/sdk/event/test_event_serialization.py b/tests/sdk/event/test_event_serialization.py
index fd3a3d17ee..e922bfe8ca 100644
--- a/tests/sdk/event/test_event_serialization.py
+++ b/tests/sdk/event/test_event_serialization.py
@@ -1,7 +1,5 @@
 """Comprehensive tests for event serialization and deserialization."""
 
-from collections.abc import Sequence
-
 import pytest
 from pydantic import ValidationError
 
@@ -16,7 +14,6 @@
     SystemPromptEvent,
 )
 from openhands.sdk.llm import (
-    ImageContent,
     Message,
     MessageToolCall,
     TextContent,
@@ -32,17 +29,15 @@ class EventsSerializationMockAction(Action):
     """Mock action for testing."""
 
     def execute(self) -> "EventsSerializationMockObservation":
-        return EventsSerializationMockObservation(content="mock result")
+        return EventsSerializationMockObservation(
+            content=[TextContent(text="mock result")]
+        )
 
 
 class EventsSerializationMockObservation(Observation):
     """Mock observation for testing."""
 
-    content: str
-
-    @property
-    def to_llm_content(self) -> Sequence[TextContent | ImageContent]:
-        return [TextContent(text=self.content)]
+    pass
 
 
 def test_event_base_serialization() -> None:
@@ -100,7 +95,9 @@ def test_action_event_serialization() -> None:
 
 def test_observation_event_serialization() -> None:
     """Test ObservationEvent serialization/deserialization."""
-    observation = EventsSerializationMockObservation(content="test result")
+    observation = EventsSerializationMockObservation(
+        content=[TextContent(text="test result")]
+    )
     event = ObservationEvent(
         observation=observation,
         action_id="action_123",
diff --git a/tests/sdk/mcp/test_mcp_tool.py b/tests/sdk/mcp/test_mcp_tool.py
index 3ca4f9c8bf..74c075e7a1 100644
--- a/tests/sdk/mcp/test_mcp_tool.py
+++ b/tests/sdk/mcp/test_mcp_tool.py
@@ -5,7 +5,7 @@
 
 import mcp.types
 
-from openhands.sdk.llm import TextContent
+from openhands.sdk.llm import ImageContent, TextContent
 from openhands.sdk.mcp.client import MCPClient
 from openhands.sdk.mcp.definition import MCPToolObservation
 from openhands.sdk.mcp.tool import MCPToolDefinition, MCPToolExecutor
@@ -37,9 +37,12 @@ def test_from_call_tool_result_success(self):
         )
 
         assert observation.tool_name == "test_tool"
-        assert len(observation.content) == 1
+        assert observation.content is not None
+        assert len(observation.content) == 2
         assert isinstance(observation.content[0], TextContent)
-        assert observation.content[0].text == "Operation completed successfully"
+        assert observation.content[0].text == "[Tool 'test_tool' executed.]"
+        assert isinstance(observation.content[1], TextContent)
+        assert observation.content[1].text == "Operation completed successfully"
         assert observation.is_error is False
 
     def test_from_call_tool_result_error(self):
@@ -54,10 +57,12 @@ def test_from_call_tool_result_error(self):
         )
 
         assert observation.tool_name == "test_tool"
-        assert len(observation.content) == 1
-        assert isinstance(observation.content[0], TextContent)
-        assert observation.content[0].text == "Operation failed"
         assert observation.is_error is True
+        assert len(observation.content) == 2
+        assert isinstance(observation.content[0], TextContent)
+        assert observation.content[0].text == "[Tool 'test_tool' executed.]"
+        assert isinstance(observation.content[1], TextContent)
+        assert observation.content[1].text == "Operation failed"
 
     def test_from_call_tool_result_with_image(self):
         """Test creating observation from MCP result with image content."""
@@ -76,44 +81,53 @@ def test_from_call_tool_result_with_image(self):
         )
 
         assert observation.tool_name == "test_tool"
-        assert len(observation.content) == 2
+        assert observation.content is not None
+        assert len(observation.content) == 3
+        # First item is header
         assert isinstance(observation.content[0], TextContent)
-        assert observation.content[0].text == "Here's the image:"
-        # Second content should be ImageContent
-        assert hasattr(observation.content[1], "image_urls")
+        assert observation.content[0].text == "[Tool 'test_tool' executed.]"
+        # Second item is text
+        assert isinstance(observation.content[1], TextContent)
+        assert observation.content[1].text == "Here's the image:"
+        # Third item is image
+        assert isinstance(observation.content[2], ImageContent)
+        assert hasattr(observation.content[2], "image_urls")
         assert observation.is_error is False
 
     def test_to_llm_content_success(self):
         """Test agent observation formatting for success."""
-        observation = MCPToolObservation(
+        observation = MCPToolObservation.from_text(
+            text="[Tool 'test_tool' executed.]\nSuccess result",
             tool_name="test_tool",
-            content=[TextContent(text="Success result")],
-            is_error=False,
         )
 
         agent_obs = observation.to_llm_content
-        assert len(agent_obs) == 2
+        assert len(agent_obs) == 1
         assert isinstance(agent_obs[0], TextContent)
         assert "[Tool 'test_tool' executed.]" in agent_obs[0].text
-        assert "[An error occurred during execution.]" not in agent_obs[0].text
-        assert isinstance(agent_obs[1], TextContent)
-        assert agent_obs[1].text == "Success result"
+        assert "Success result" in agent_obs[0].text
+        assert MCPToolObservation.ERROR_MESSAGE_HEADER not in agent_obs[0].text
 
     def test_to_llm_content_error(self):
         """Test agent observation formatting for error."""
-        observation = MCPToolObservation(
+        observation = MCPToolObservation.from_text(
+            text=(
+                "[Tool 'test_tool' executed.]\n"
+                "[An error occurred during execution.]\n"
+                "Error occurred"
+            ),
             tool_name="test_tool",
-            content=[TextContent(text="Error occurred")],
             is_error=True,
         )
 
         agent_obs = observation.to_llm_content
         assert len(agent_obs) == 2
         assert isinstance(agent_obs[0], TextContent)
+        assert agent_obs[0].text == MCPToolObservation.ERROR_MESSAGE_HEADER
         assert isinstance(agent_obs[1], TextContent)
-        assert "[Tool 'test_tool' executed.]" in agent_obs[0].text
-        assert "[An error occurred during execution.]" in agent_obs[0].text
-        assert agent_obs[1].text == "Error occurred"
+        assert "[Tool 'test_tool' executed.]" in agent_obs[1].text
+        assert "[An error occurred during execution.]" in agent_obs[1].text
+        assert "Error occurred" in agent_obs[1].text
 
 
 class TestMCPToolExecutor:
@@ -188,14 +202,10 @@ def test_call_tool_exception(self):
 
         # Mock call_async_from_sync to return an error observation
         def mock_call_async_from_sync(coro_func, **kwargs):
-            return MCPToolObservation(
-                content=[
-                    TextContent(
-                        text="Error calling MCP tool test_tool: Connection failed"
-                    )
-                ],
-                is_error=True,
+            return MCPToolObservation.from_text(
+                text="Error calling MCP tool test_tool: Connection failed",
                 tool_name="test_tool",
+                is_error=True,
             )
 
         self.mock_client.call_async_from_sync = mock_call_async_from_sync
@@ -203,10 +213,10 @@ def mock_call_async_from_sync(coro_func, **kwargs):
         observation = self.executor(mock_action)
 
         assert isinstance(observation, MCPToolObservation)
-        assert isinstance(observation.content[0], TextContent)
         assert observation.tool_name == "test_tool"
         assert observation.is_error is True
-        assert "Connection failed" in observation.content[0].text
+        assert observation.is_error is True
+        assert "Connection failed" in observation.text
 
 
 class TestMCPTool:
diff --git a/tests/sdk/mcp/test_mcp_tool_kind_field.py b/tests/sdk/mcp/test_mcp_tool_kind_field.py
index a62444615f..af67777c57 100644
--- a/tests/sdk/mcp/test_mcp_tool_kind_field.py
+++ b/tests/sdk/mcp/test_mcp_tool_kind_field.py
@@ -85,11 +85,17 @@ def test_real_mcp_tool_execution_without_kind_field(fetch_tool):
     observation = fetch_tool(action)
 
     # Verify we got a valid response (not an error about 'kind')
+    # Check output if no error, otherwise check error message
+    from openhands.sdk.llm import TextContent
+
     assert observation.content is not None
-    assert len(observation.content) > 0
+    # Extract text from content blocks (content is always a list now)
+    text_parts = [
+        block.text for block in observation.content if isinstance(block, TextContent)
+    ]
+    content_str = " ".join(text_parts)
 
     # Check that the response doesn't contain validation error about 'kind'
-    content_str = str(observation.content)
     if "error" in content_str.lower():
         # If there's an error, make sure it's not about 'kind' field
         assert "kind" not in content_str.lower(), (
diff --git a/tests/sdk/tool/test_registry.py b/tests/sdk/tool/test_registry.py
index 49213bd87a..40d219913e 100644
--- a/tests/sdk/tool/test_registry.py
+++ b/tests/sdk/tool/test_registry.py
@@ -26,7 +26,7 @@ class _HelloAction(Action):
 
 
 class _HelloObservation(Observation):
-    message: str
+    message: str = ""
 
     @property
     def to_llm_content(self) -> Sequence[TextContent | ImageContent]:
diff --git a/tests/tools/browser_use/conftest.py b/tests/tools/browser_use/conftest.py
index 0cce6b37dd..d30f51faf6 100644
--- a/tests/tools/browser_use/conftest.py
+++ b/tests/tools/browser_use/conftest.py
@@ -4,6 +4,7 @@
 
 import pytest
 
+from openhands.sdk.tool.schema import TextContent
 from openhands.tools.browser_use.definition import BrowserObservation
 from openhands.tools.browser_use.impl import BrowserToolExecutor
 
@@ -30,9 +31,11 @@ def create_mock_browser_response(
     screenshot_data: str | None = None,
 ):
     """Helper to create mock browser responses."""
-    return BrowserObservation(
-        output=output, error=error, screenshot_data=screenshot_data
-    )
+    if error:
+        return BrowserObservation.from_text(
+            text=error, is_error=True, screenshot_data=screenshot_data
+        )
+    return BrowserObservation.from_text(text=output, screenshot_data=screenshot_data)
 
 
 def assert_browser_observation_success(
@@ -40,9 +43,15 @@ def assert_browser_observation_success(
 ):
     """Assert that a browser observation indicates success."""
     assert isinstance(observation, BrowserObservation)
-    assert observation.error is None
+    assert observation.is_error is False
     if expected_output:
-        assert expected_output in observation.output
+        if isinstance(observation.content, str):
+            output_text = observation.content
+        else:
+            output_text = "".join(
+                [c.text for c in observation.content if isinstance(c, TextContent)]
+            )
+        assert expected_output in output_text
 
 
 def assert_browser_observation_error(
@@ -50,6 +59,6 @@ def assert_browser_observation_error(
 ):
     """Assert that a browser observation contains an error."""
     assert isinstance(observation, BrowserObservation)
-    assert observation.error is not None
+    assert observation.is_error is True
     if expected_error:
-        assert expected_error in observation.error
+        assert expected_error in observation.text
diff --git a/tests/tools/browser_use/test_browser_executor.py b/tests/tools/browser_use/test_browser_executor.py
index d200f1e727..25377b26da 100644
--- a/tests/tools/browser_use/test_browser_executor.py
+++ b/tests/tools/browser_use/test_browser_executor.py
@@ -72,8 +72,8 @@ async def test_browser_executor_action_routing_get_state(
     mock_get_state, mock_browser_executor
 ):
     """Test that get_state actions are routed correctly and return directly."""
-    expected_observation = BrowserObservation(
-        output="State retrieved", screenshot_data="base64data"
+    expected_observation = BrowserObservation.from_text(
+        text="State retrieved", screenshot_data="base64data"
     )
     mock_get_state.return_value = expected_observation
 
@@ -105,7 +105,7 @@ async def test_browser_executor_error_wrapping(mock_navigate, mock_browser_execu
     result = await mock_browser_executor._execute_action(action)
 
     assert_browser_observation_error(result, "Browser operation failed")
-    assert "Browser error occurred" in result.error
+    assert "Browser error occurred" in result.text
 
 
 def test_browser_executor_async_execution(mock_browser_executor):
@@ -113,7 +113,7 @@ def test_browser_executor_async_execution(mock_browser_executor):
     with patch.object(
         mock_browser_executor, "_execute_action", new_callable=AsyncMock
     ) as mock_execute:
-        expected_result = BrowserObservation(output="Test result")
+        expected_result = BrowserObservation.from_text(text="Test result")
         mock_execute.return_value = expected_result
 
         action = BrowserNavigateAction(url="https://example.com")
diff --git a/tests/tools/browser_use/test_browser_executor_e2e.py b/tests/tools/browser_use/test_browser_executor_e2e.py
index 1c2cd9e4dd..551b5d4bef 100644
--- a/tests/tools/browser_use/test_browser_executor_e2e.py
+++ b/tests/tools/browser_use/test_browser_executor_e2e.py
@@ -171,11 +171,9 @@ def test_navigate_action(
         result = browser_executor(action)
 
         assert isinstance(result, BrowserObservation)
-        assert result.error is None
-        assert (
-            "successfully" in result.output.lower()
-            or "navigated" in result.output.lower()
-        )
+        assert not result.is_error
+        output_text = result.text.lower()
+        assert "successfully" in output_text or "navigated" in output_text
 
     def test_get_state_action(
         self, browser_executor: BrowserToolExecutor, test_server: str
@@ -190,8 +188,8 @@ def test_get_state_action(
         result = browser_executor(action)
 
         assert isinstance(result, BrowserObservation)
-        assert result.error is None
-        assert "Browser Test Page" in result.output
+        assert not result.is_error
+        assert "Browser Test Page" in result.text
 
     def test_get_state_with_screenshot(
         self, browser_executor: BrowserToolExecutor, test_server: str
@@ -206,7 +204,7 @@ def test_get_state_with_screenshot(
         result = browser_executor(action)
 
         assert isinstance(result, BrowserObservation)
-        assert result.error is None
+        assert not result.is_error
         assert result.screenshot_data is not None
         assert len(result.screenshot_data) > 0
 
@@ -224,14 +222,14 @@ def test_click_action(
 
         # Parse the state to find button index
         # The test button should be indexed in the interactive elements
-        assert "Click Me" in state_result.output
+        assert "Click Me" in state_result.text
 
         # Try to click the first interactive element (likely the button)
         click_action = BrowserClickAction(index=0)
         result = browser_executor(click_action)
 
         assert isinstance(result, BrowserObservation)
-        assert result.error is None
+        assert not result.is_error
 
     def test_type_action(self, browser_executor: BrowserToolExecutor, test_server: str):
         """Test typing text into an input field."""
@@ -244,7 +242,8 @@ def test_type_action(self, browser_executor: BrowserToolExecutor, test_server: s
         state_result = browser_executor(get_state_action)
 
         # Look for input field in the state
-        assert "test-input" in state_result.output or "Type here" in state_result.output
+        state_output = state_result.text
+        assert "test-input" in state_output or "Type here" in state_output
 
         # Find the input field index and type into it
         # This assumes the input field is one of the interactive elements
@@ -252,7 +251,7 @@ def test_type_action(self, browser_executor: BrowserToolExecutor, test_server: s
         result = browser_executor(type_action)
 
         assert isinstance(result, BrowserObservation)
-        assert result.error is None
+        assert not result.is_error
 
     def test_scroll_action(
         self, browser_executor: BrowserToolExecutor, test_server: str
@@ -267,14 +266,14 @@ def test_scroll_action(
         result = browser_executor(scroll_action)
 
         assert isinstance(result, BrowserObservation)
-        assert result.error is None
+        assert not result.is_error
 
         # Scroll back up
         scroll_up_action = BrowserScrollAction(direction="up")
         result = browser_executor(scroll_up_action)
 
         assert isinstance(result, BrowserObservation)
-        assert result.error is None
+        assert not result.is_error
 
     def test_get_content_action(
         self, browser_executor: BrowserToolExecutor, test_server: str
@@ -289,8 +288,8 @@ def test_get_content_action(
         result = browser_executor(content_action)
 
         assert isinstance(result, BrowserObservation)
-        assert result.error is None
-        assert "Browser Test Page" in result.output
+        assert not result.is_error
+        assert "Browser Test Page" in result.text
 
         # Get content with links
         content_with_links_action = BrowserGetContentAction(
@@ -299,8 +298,8 @@ def test_get_content_action(
         result = browser_executor(content_with_links_action)
 
         assert isinstance(result, BrowserObservation)
-        assert result.error is None
-        assert "Browser Test Page" in result.output
+        assert not result.is_error
+        assert "Browser Test Page" in result.text
 
     def test_navigate_new_tab(
         self, browser_executor: BrowserToolExecutor, test_server: str
@@ -311,7 +310,7 @@ def test_navigate_new_tab(
         result = browser_executor(action)
 
         assert isinstance(result, BrowserObservation)
-        assert result.error is None
+        assert not result.is_error
 
     def test_list_tabs_action(
         self, browser_executor: BrowserToolExecutor, test_server: str
@@ -326,9 +325,9 @@ def test_list_tabs_action(
         result = browser_executor(list_tabs_action)
 
         assert isinstance(result, BrowserObservation)
-        assert result.error is None
+        assert not result.is_error
         # Should contain tab information
-        assert len(result.output) > 0
+        assert len(result.text) > 0
 
     def test_go_back_action(
         self, browser_executor: BrowserToolExecutor, test_server: str
@@ -348,7 +347,7 @@ def test_go_back_action(
         result = browser_executor(back_action)
 
         assert isinstance(result, BrowserObservation)
-        assert result.error is None
+        assert not result.is_error
 
     def test_switch_tab_action(
         self, browser_executor: BrowserToolExecutor, test_server: str
@@ -370,7 +369,7 @@ def test_switch_tab_action(
 
         # Parse tab information to get a tab ID
         # This is a simplified approach - in practice you'd parse the JSON response
-        if "tab" in tabs_result.output.lower():
+        if "tab" in tabs_result.text.lower():
             # Try to switch to first tab (assuming tab ID format)
             switch_action = BrowserSwitchTabAction(tab_id="0")
             result = browser_executor(switch_action)
@@ -428,24 +427,24 @@ def test_concurrent_actions(
         """Test that multiple actions can be executed in sequence."""
         # Navigate
         navigate_result = browser_executor(BrowserNavigateAction(url=test_server))
-        assert navigate_result.error is None
+        assert not navigate_result.is_error
 
         # Get state
         state_result = browser_executor(BrowserGetStateAction(include_screenshot=False))
-        assert state_result.error is None
+        assert not state_result.is_error
 
         # Scroll
         scroll_result = browser_executor(BrowserScrollAction(direction="down"))
-        assert scroll_result.error is None
+        assert not scroll_result.is_error
 
         # Get content
         content_result = browser_executor(
             BrowserGetContentAction(extract_links=False, start_from_char=0)
         )
-        assert content_result.error is None
+        assert not content_result.is_error
 
         # All actions should complete successfully
         assert all(
-            result.error is None
+            not result.is_error
             for result in [navigate_result, state_result, scroll_result, content_result]
         )
diff --git a/tests/tools/browser_use/test_browser_observation.py b/tests/tools/browser_use/test_browser_observation.py
index b8ae22e000..09992781ee 100644
--- a/tests/tools/browser_use/test_browser_observation.py
+++ b/tests/tools/browser_use/test_browser_observation.py
@@ -6,37 +6,37 @@
 
 def test_browser_observation_basic_output():
     """Test basic BrowserObservation creation with output."""
-    observation = BrowserObservation(output="Test output")
+    observation = BrowserObservation.from_text(text="Test output")
 
-    assert observation.output == "Test output"
-    assert observation.error is None
+    assert observation.text == "Test output"
+    assert observation.is_error is False
     assert observation.screenshot_data is None
 
 
 def test_browser_observation_with_error():
     """Test BrowserObservation with error."""
-    observation = BrowserObservation(output="", error="Test error")
+    observation = BrowserObservation.from_text(text="Test error", is_error=True)
 
-    assert observation.output == ""
-    assert observation.error == "Test error"
+    assert observation.text == "Test error"
+    assert observation.is_error is True
     assert observation.screenshot_data is None
 
 
 def test_browser_observation_with_screenshot():
     """Test BrowserObservation with screenshot data."""
     screenshot_data = "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNkYPhfDwAChAI9jU77zgAAAABJRU5ErkJggg=="  # noqa: E501
-    observation = BrowserObservation(
-        output="Screenshot taken", screenshot_data=screenshot_data
+    observation = BrowserObservation.from_text(
+        text="Screenshot taken", screenshot_data=screenshot_data
     )
 
-    assert observation.output == "Screenshot taken"
-    assert observation.error is None
+    assert observation.text == "Screenshot taken"
+    assert observation.is_error is False
     assert observation.screenshot_data == screenshot_data
 
 
 def test_browser_observation_to_llm_content_text_only():
     """Test to_llm_content property with text only."""
-    observation = BrowserObservation(output="Test output")
+    observation = BrowserObservation.from_text(text="Test output")
     agent_obs = observation.to_llm_content
 
     assert len(agent_obs) == 1
@@ -47,8 +47,8 @@ def test_browser_observation_to_llm_content_text_only():
 def test_browser_observation_to_llm_content_with_screenshot():
     """Test to_llm_content property with screenshot."""
     screenshot_data = "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNkYPhfDwAChAI9jU77zgAAAABJRU5ErkJggg=="  # noqa: E501
-    observation = BrowserObservation(
-        output="Screenshot taken", screenshot_data=screenshot_data
+    observation = BrowserObservation.from_text(
+        text="Screenshot taken", screenshot_data=screenshot_data
     )
     agent_obs = observation.to_llm_content
 
@@ -63,19 +63,21 @@ def test_browser_observation_to_llm_content_with_screenshot():
 
 def test_browser_observation_to_llm_content_with_error():
     """Test to_llm_content property with error."""
-    observation = BrowserObservation(output="", error="Test error")
+    observation = BrowserObservation.from_text(text="Test error", is_error=True)
     agent_obs = observation.to_llm_content
 
-    assert len(agent_obs) == 1
+    assert len(agent_obs) == 2
     assert isinstance(agent_obs[0], TextContent)
-    assert agent_obs[0].text == "Error: Test error"
+    assert agent_obs[0].text == BrowserObservation.ERROR_MESSAGE_HEADER
+    assert isinstance(agent_obs[1], TextContent)
+    assert "Test error" in agent_obs[1].text
 
 
 def test_browser_observation_output_truncation():
     """Test output truncation for very long outputs."""
     # Create a very long output string
     long_output = "x" * 100000  # 100k characters
-    observation = BrowserObservation(output=long_output)
+    observation = BrowserObservation.from_text(text=long_output)
 
     agent_obs = observation.to_llm_content
 
@@ -89,7 +91,9 @@ def test_browser_observation_output_truncation():
 def test_browser_observation_screenshot_data_url_conversion():
     """Test that screenshot data is properly converted to data URL."""
     screenshot_data = "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNkYPhfDwAChAI9jU77zgAAAABJRU5ErkJggg=="  # noqa: E501
-    observation = BrowserObservation(output="Test", screenshot_data=screenshot_data)
+    observation = BrowserObservation.from_text(
+        text="Test", screenshot_data=screenshot_data
+    )
 
     agent_obs = observation.to_llm_content
     expected_data_url = f"data:image/png;base64,{screenshot_data}"
@@ -101,10 +105,10 @@ def test_browser_observation_screenshot_data_url_conversion():
 
 def test_browser_observation_empty_screenshot_handling():
     """Test handling of empty or None screenshot data."""
-    observation = BrowserObservation(output="Test", screenshot_data="")
+    observation = BrowserObservation.from_text(text="Test", screenshot_data="")
     agent_obs = observation.to_llm_content
     assert len(agent_obs) == 1  # Only text content, no image
 
-    observation = BrowserObservation(output="Test", screenshot_data=None)
+    observation = BrowserObservation.from_text(text="Test", screenshot_data=None)
     agent_obs = observation.to_llm_content
     assert len(agent_obs) == 1  # Only text content, no image
diff --git a/tests/tools/delegation/test_delegation.py b/tests/tools/delegation/test_delegation.py
index ee1a26a465..37d808a6b9 100644
--- a/tests/tools/delegation/test_delegation.py
+++ b/tests/tools/delegation/test_delegation.py
@@ -6,7 +6,7 @@
 from pydantic import SecretStr
 
 from openhands.sdk.conversation.state import ConversationExecutionStatus
-from openhands.sdk.llm import LLM
+from openhands.sdk.llm import LLM, TextContent
 from openhands.tools.delegate import (
     DelegateAction,
     DelegateExecutor,
@@ -65,24 +65,36 @@ def test_delegate_action_creation():
 
 def test_delegate_observation_creation():
     """Test creating DelegateObservation instances."""
-    # Test spawn observation
-    spawn_observation = DelegateObservation(
+    # Test spawn observation with string output
+    spawn_observation = DelegateObservation.from_text(
+        text="spawn: Sub-agents created successfully",
         command="spawn",
-        message="Sub-agents created successfully",
     )
-    assert spawn_observation.command == "spawn"
-    assert spawn_observation.message == "Sub-agents created successfully"
-    # spawn observation doesn't have results field anymore
-
-    # Test delegate observation
-    delegate_observation = DelegateObservation(
+    assert isinstance(spawn_observation.content, list)
+    assert spawn_observation.text == "spawn: Sub-agents created successfully"
+    # Verify to_llm_content returns TextContent
+    llm_content = spawn_observation.to_llm_content
+    assert len(llm_content) == 1
+    assert isinstance(llm_content[0], TextContent)
+    assert llm_content[0].text == "spawn: Sub-agents created successfully"
+
+    # Test delegate observation with string output
+    delegate_observation = DelegateObservation.from_text(
+        text=(
+            "delegate: Tasks completed successfully\n\nResults:\n"
+            "1. Result 1\n2. Result 2"
+        ),
         command="delegate",
-        message="Tasks completed successfully\n\nResults:\n1. Result 1\n2. Result 2",
     )
-    assert delegate_observation.command == "delegate"
-    assert "Tasks completed successfully" in delegate_observation.message
-    assert "Result 1" in delegate_observation.message
-    assert "Result 2" in delegate_observation.message
+    assert isinstance(delegate_observation.content, list)
+    assert "Tasks completed successfully" in delegate_observation.text
+    assert "Result 1" in delegate_observation.text
+    assert "Result 2" in delegate_observation.text
+    # Verify to_llm_content
+    llm_content = delegate_observation.to_llm_content
+    assert len(llm_content) == 1
+    assert isinstance(llm_content[0], TextContent)
+    assert "Tasks completed successfully" in llm_content[0].text
 
 
 def test_delegate_executor_delegate():
@@ -92,7 +104,8 @@ def test_delegate_executor_delegate():
     # First spawn some agents
     spawn_action = DelegateAction(command="spawn", ids=["agent1", "agent2"])
     spawn_observation = executor(spawn_action, parent_conversation)
-    assert "Successfully spawned" in spawn_observation.message
+    assert isinstance(spawn_observation.content, list)
+    assert "Successfully spawned" in spawn_observation.text
 
     # Then delegate tasks to them
     delegate_action = DelegateAction(
@@ -101,22 +114,23 @@ def test_delegate_executor_delegate():
     )
 
     with patch.object(executor, "_delegate_tasks") as mock_delegate:
-        mock_observation = DelegateObservation(
-            command="delegate",
-            message=(
-                "Tasks completed successfully\n\nResults:\n"
+        mock_observation = DelegateObservation.from_text(
+            text=(
+                "delegate: Tasks completed successfully\n\nResults:\n"
                 "1. Agent agent1: Code analysis complete\n"
                 "2. Agent agent2: Tests written"
             ),
+            command="delegate",
         )
         mock_delegate.return_value = mock_observation
 
         observation = executor(delegate_action, parent_conversation)
 
     assert isinstance(observation, DelegateObservation)
-    assert observation.command == "delegate"
-    assert "Agent agent1: Code analysis complete" in observation.message
-    assert "Agent agent2: Tests written" in observation.message
+    assert isinstance(observation.content, list)
+    text_content = observation.text
+    assert "Agent agent1: Code analysis complete" in text_content
+    assert "Agent agent2: Tests written" in text_content
 
 
 def test_delegate_executor_missing_task():
@@ -129,10 +143,13 @@ def test_delegate_executor_missing_task():
     observation = executor(action, parent_conversation)
 
     assert isinstance(observation, DelegateObservation)
-    assert observation.command == "delegate"
+    # Error message should be in the error field
+    assert observation.is_error
+    assert observation.is_error is True
+    content_text = observation.text
     assert (
-        "task is required" in observation.message.lower()
-        or "at least one task" in observation.message.lower()
+        "task is required" in content_text.lower()
+        or "at least one task" in content_text.lower()
     )
 
 
diff --git a/tests/tools/execute_bash/test_bash_ps1_metadata.py b/tests/tools/execute_bash/test_bash_ps1_metadata.py
index df5cdc6b74..9dd94b54e3 100644
--- a/tests/tools/execute_bash/test_bash_ps1_metadata.py
+++ b/tests/tools/execute_bash/test_bash_ps1_metadata.py
@@ -1,6 +1,5 @@
 import json
 
-from openhands.sdk import TextContent
 from openhands.tools.execute_bash.constants import (
     CMD_OUTPUT_METADATA_PS1_REGEX,
     CMD_OUTPUT_PS1_BEGIN,
@@ -270,14 +269,19 @@ def test_ps1_metadata_regex_pattern():
 
 def test_cmd_output_observation_properties():
     """Test ExecuteBashObservation class properties"""
+    from openhands.sdk.tool.schema import TextContent
+
     # Test with successful command
     metadata = CmdOutputMetadata(exit_code=0, pid=123)
-    obs = ExecuteBashObservation(
-        command="ls", output="file1\nfile2", exit_code=0, metadata=metadata
+    obs = ExecuteBashObservation.from_text(
+        text="file1\nfile2",
+        command="ls",
+        exit_code=0,
+        metadata=metadata,
     )
     assert obs.command_id == 123
     assert obs.exit_code == 0
-    assert not obs.error
+    assert not obs.is_error
     assert len(obs.to_llm_content) == 1
     assert isinstance(obs.to_llm_content[0], TextContent)
     assert "exit code 0" in obs.to_llm_content[0].text
@@ -287,16 +291,21 @@ def test_cmd_output_observation_properties():
 
     # Test with failed command
     metadata = CmdOutputMetadata(exit_code=1, pid=456)
-    obs = ExecuteBashObservation(
-        command="invalid", output="error", exit_code=1, error=True, metadata=metadata
+    obs = ExecuteBashObservation.from_text(
+        text="Command failed",
+        command="invalid",
+        exit_code=1,
+        is_error=True,
+        metadata=metadata,
     )
     assert obs.command_id == 456
     assert obs.exit_code == 1
-    assert obs.error
-    assert len(obs.to_llm_content) == 1
+    assert obs.is_error
+    assert len(obs.to_llm_content) == 2
     assert isinstance(obs.to_llm_content[0], TextContent)
-    assert "exit code 1" in obs.to_llm_content[0].text
-    assert obs.error
+    assert obs.to_llm_content[0].text == ExecuteBashObservation.ERROR_MESSAGE_HEADER
+    assert isinstance(obs.to_llm_content[1], TextContent)
+    assert "Command failed" in obs.to_llm_content[1].text
 
 
 def test_ps1_metadata_empty_fields():
diff --git a/tests/tools/execute_bash/test_bash_reset.py b/tests/tools/execute_bash/test_bash_reset.py
index e19e7d2286..8335bd0556 100644
--- a/tests/tools/execute_bash/test_bash_reset.py
+++ b/tests/tools/execute_bash/test_bash_reset.py
@@ -43,13 +43,13 @@ def test_bash_reset_basic():
         action = ExecuteBashAction(command="echo $TEST_VAR")
         result = tool(action)
         assert isinstance(result, ExecuteBashObservation)
-        assert "hello" in result.output
+        assert "hello" in result.text
 
         # Reset the terminal
         reset_action = ExecuteBashAction(command="", reset=True)
         reset_result = tool(reset_action)
         assert isinstance(reset_result, ExecuteBashObservation)
-        assert "Terminal session has been reset" in reset_result.output
+        assert "Terminal session has been reset" in reset_result.text
         assert reset_result.command == "[RESET]"
 
         # Verify the variable is no longer set after reset
@@ -57,7 +57,7 @@ def test_bash_reset_basic():
         result = tool(action)
         assert isinstance(result, ExecuteBashObservation)
         # The variable should be empty after reset
-        assert result.output.strip() == ""
+        assert result.text.strip() == ""
 
 
 def test_bash_reset_with_command():
@@ -78,15 +78,15 @@ def test_bash_reset_with_command():
         )
         reset_result = tool(reset_action)
         assert isinstance(reset_result, ExecuteBashObservation)
-        assert "Terminal session has been reset" in reset_result.output
-        assert "hello from fresh terminal" in reset_result.output
+        assert "Terminal session has been reset" in reset_result.text
+        assert "hello from fresh terminal" in reset_result.text
         assert reset_result.command == "[RESET] echo 'hello from fresh terminal'"
 
         # Verify the variable is no longer set (confirming reset worked)
         action = ExecuteBashAction(command="echo $TEST_VAR")
         result = tool(action)
         assert isinstance(result, ExecuteBashObservation)
-        assert result.output.strip() == ""
+        assert result.text.strip() == ""
 
 
 def test_bash_reset_working_directory():
@@ -99,7 +99,7 @@ def test_bash_reset_working_directory():
         action = ExecuteBashAction(command="pwd")
         result = tool(action)
         assert isinstance(result, ExecuteBashObservation)
-        assert temp_dir in result.output
+        assert temp_dir in result.text
 
         # Change directory
         action = ExecuteBashAction(command="cd /home")
@@ -110,19 +110,19 @@ def test_bash_reset_working_directory():
         action = ExecuteBashAction(command="pwd")
         result = tool(action)
         assert isinstance(result, ExecuteBashObservation)
-        assert "/home" in result.output
+        assert "/home" in result.text
 
         # Reset the terminal
         reset_action = ExecuteBashAction(command="", reset=True)
         reset_result = tool(reset_action)
         assert isinstance(reset_result, ExecuteBashObservation)
-        assert "Terminal session has been reset" in reset_result.output
+        assert "Terminal session has been reset" in reset_result.text
 
         # Verify working directory is back to original
         action = ExecuteBashAction(command="pwd")
         result = tool(action)
         assert isinstance(result, ExecuteBashObservation)
-        assert temp_dir in result.output
+        assert temp_dir in result.text
 
 
 def test_bash_reset_multiple_times():
@@ -135,25 +135,25 @@ def test_bash_reset_multiple_times():
         reset_action = ExecuteBashAction(command="", reset=True)
         reset_result = tool(reset_action)
         assert isinstance(reset_result, ExecuteBashObservation)
-        assert "Terminal session has been reset" in reset_result.output
+        assert "Terminal session has been reset" in reset_result.text
 
         # Execute a command after first reset
         action = ExecuteBashAction(command="echo 'after first reset'")
         result = tool(action)
         assert isinstance(result, ExecuteBashObservation)
-        assert "after first reset" in result.output
+        assert "after first reset" in result.text
 
         # Second reset
         reset_action = ExecuteBashAction(command="", reset=True)
         reset_result = tool(reset_action)
         assert isinstance(reset_result, ExecuteBashObservation)
-        assert "Terminal session has been reset" in reset_result.output
+        assert "Terminal session has been reset" in reset_result.text
 
         # Execute a command after second reset
         action = ExecuteBashAction(command="echo 'after second reset'")
         result = tool(action)
         assert isinstance(result, ExecuteBashObservation)
-        assert "after second reset" in result.output
+        assert "after second reset" in result.text
 
 
 def test_bash_reset_with_timeout():
@@ -166,7 +166,7 @@ def test_bash_reset_with_timeout():
         reset_action = ExecuteBashAction(command="", reset=True, timeout=5.0)
         reset_result = tool(reset_action)
         assert isinstance(reset_result, ExecuteBashObservation)
-        assert "Terminal session has been reset" in reset_result.output
+        assert "Terminal session has been reset" in reset_result.text
         assert reset_result.command == "[RESET]"
 
 
@@ -196,5 +196,5 @@ def test_bash_reset_only_with_empty_command():
         reset_action = ExecuteBashAction(command="", reset=True)
         reset_result = tool(reset_action)
         assert isinstance(reset_result, ExecuteBashObservation)
-        assert "Terminal session has been reset" in reset_result.output
+        assert "Terminal session has been reset" in reset_result.text
         assert reset_result.command == "[RESET]"
diff --git a/tests/tools/execute_bash/test_bash_session.py b/tests/tools/execute_bash/test_bash_session.py
index e9afd067af..ee827c9a34 100644
--- a/tests/tools/execute_bash/test_bash_session.py
+++ b/tests/tools/execute_bash/test_bash_session.py
@@ -17,7 +17,10 @@
 
 from openhands.sdk import TextContent
 from openhands.sdk.logger import get_logger
-from openhands.tools.execute_bash.definition import ExecuteBashAction
+from openhands.tools.execute_bash.definition import (
+    ExecuteBashAction,
+    ExecuteBashObservation,
+)
 from openhands.tools.execute_bash.terminal import (
     TerminalCommandStatus,
     create_terminal_session,
@@ -43,7 +46,7 @@ def test_session_initialization(terminal_type):
         session.initialize()
         obs = session.execute(ExecuteBashAction(command="pwd"))
 
-        assert temp_dir in obs.output
+        assert temp_dir in obs.text
         assert "[The command completed with exit code 0.]" in obs.metadata.suffix
         session.close()
 
@@ -66,7 +69,7 @@ def test_cwd_property(tmp_path, terminal_type):
 
     # For other implementations, just verify the command executed successfully
     obs = session.execute(ExecuteBashAction(command="pwd"))
-    assert str(random_dir) in obs.output
+    assert str(random_dir) in obs.text
 
     # Note: CWD tracking may vary between terminal implementations
     # For tmux, it should track properly. For subprocess, it may not.
@@ -84,7 +87,7 @@ def test_basic_command(terminal_type):
     # Test simple command
     obs = session.execute(ExecuteBashAction(command="echo 'hello world'"))
 
-    assert "hello world" in obs.output
+    assert "hello world" in obs.text
     assert obs.metadata.suffix == "\n[The command completed with exit code 0.]"
     # Note: prefix may vary between terminal implementations
     assert obs.metadata.exit_code == 0
@@ -95,16 +98,16 @@ def test_basic_command(terminal_type):
 
     # Note: Exit code handling may vary between terminal implementations
     # The important thing is that the error message is captured
-    assert "nonexistent_command: command not found" in obs.output
+    assert "nonexistent_command: command not found" in obs.text
     assert session.prev_status == TerminalCommandStatus.COMPLETED
 
     # Test multiple commands in sequence
     obs = session.execute(
         ExecuteBashAction(command='echo "first" && echo "second" && echo "third"')
     )
-    assert "first" in obs.output
-    assert "second" in obs.output
-    assert "third" in obs.output
+    assert "first" in obs.text
+    assert "second" in obs.text
+    assert "third" in obs.text
     assert obs.metadata.suffix == "\n[The command completed with exit code 0.]"
     # Note: prefix may vary between terminal implementations
     assert obs.metadata.exit_code == 0
@@ -125,7 +128,7 @@ def test_environment_variable_persistence(terminal_type):
 
     # Use the environment variable in a subsequent command
     obs = session.execute(ExecuteBashAction(command="echo $TEST_VAR"))
-    assert "hello world" in obs.output
+    assert "hello world" in obs.text
     assert obs.metadata.exit_code == 0
 
     session.close()
@@ -151,8 +154,8 @@ def test_environment_variable_inheritance_from_parent(terminal_type):
 
         # Check if the environment variable is available in the terminal
         obs = session.execute(ExecuteBashAction(command=f"echo ${test_var_name}"))
-        assert test_var_value in obs.output, (
-            f"Expected '{test_var_value}' in output, but got: {obs.output}"
+        assert test_var_value in obs.text, (
+            f"Expected '{test_var_value}' in output, but got: {obs.text}"
         )
         assert obs.metadata.exit_code == 0
 
@@ -176,7 +179,7 @@ def test_long_running_command_follow_by_execute():
         ExecuteBashAction(command="echo 1; sleep 3; echo 2; sleep 3; echo 3")
     )
 
-    assert "1" in obs.output  # First number should appear before timeout
+    assert "1" in obs.text  # First number should appear before timeout
     assert obs.metadata.exit_code == -1  # -1 indicates command is still running
     assert session.prev_status == TerminalCommandStatus.NO_CHANGE_TIMEOUT
     assert obs.metadata.suffix == get_no_change_timeout_suffix(2)
@@ -185,7 +188,7 @@ def test_long_running_command_follow_by_execute():
     # Continue watching output
     obs = session.execute(ExecuteBashAction(command="", is_input=True))
 
-    assert "2" in obs.output
+    assert "2" in obs.text
     assert obs.metadata.prefix == "[Below is the output of the previous command.]\n"
     assert obs.metadata.suffix == get_no_change_timeout_suffix(2)
     assert obs.metadata.exit_code == -1  # -1 indicates command is still running
@@ -194,7 +197,7 @@ def test_long_running_command_follow_by_execute():
     # Test command that produces no output
     obs = session.execute(ExecuteBashAction(command="sleep 15"))
 
-    assert "3" not in obs.output
+    assert "3" not in obs.text
     assert obs.metadata.prefix == "[Below is the output of the previous command.]\n"
     assert "The previous command is still running" in obs.metadata.suffix
     assert obs.metadata.exit_code == -1  # -1 indicates command is still running
@@ -205,7 +208,7 @@ def test_long_running_command_follow_by_execute():
     # Run it again, this time it should produce output and then start a new command
     obs = session.execute(ExecuteBashAction(command="sleep 15"))
 
-    assert "3" in obs.output  # Should see the final output from the previous command
+    assert "3" in obs.text  # Should see the final output from the previous command
     assert obs.metadata.exit_code == -1  # -1 indicates new command is still running
     assert session.prev_status == TerminalCommandStatus.NO_CHANGE_TIMEOUT
 
@@ -227,7 +230,7 @@ def test_interactive_command(terminal_type):
         )
     )
 
-    assert "Enter name:" in obs.output
+    assert "Enter name:" in obs.text
     assert obs.metadata.exit_code == -1  # -1 indicates command is still running
     assert session.prev_status == TerminalCommandStatus.NO_CHANGE_TIMEOUT
     assert obs.metadata.suffix == get_no_change_timeout_suffix(3)
@@ -236,7 +239,7 @@ def test_interactive_command(terminal_type):
     # Send input
     obs = session.execute(ExecuteBashAction(command="John", is_input=True))
 
-    assert "Hello John" in obs.output
+    assert "Hello John" in obs.text
     assert obs.metadata.exit_code == 0
     assert obs.metadata.suffix == "\n[The command completed with exit code 0.]"
     assert obs.metadata.prefix == ""
@@ -266,7 +269,7 @@ def test_interactive_command(terminal_type):
 
     obs = session.execute(ExecuteBashAction(command="EOF", is_input=True))
 
-    assert "line 1" in obs.output and "line 2" in obs.output
+    assert "line 1" in obs.text and "line 2" in obs.text
     assert obs.metadata.exit_code == 0
     assert obs.metadata.suffix == "\n[The command completed with exit code 0.]"
     assert obs.metadata.prefix == ""
@@ -287,7 +290,7 @@ def test_ctrl_c(terminal_type):
         ExecuteBashAction(command="while true; do echo 'looping'; sleep 3; done"),
     )
 
-    assert "looping" in obs.output
+    assert "looping" in obs.text
     assert obs.metadata.suffix == get_no_change_timeout_suffix(2)
     assert obs.metadata.prefix == ""
     assert obs.metadata.exit_code == -1  # -1 indicates command is still running
@@ -317,14 +320,15 @@ def test_empty_command_error(terminal_type):
     # Test empty command without previous command
     obs = session.execute(ExecuteBashAction(command=""))
 
-    assert obs.error is True
-    assert obs.output == "ERROR: No previous running command to retrieve logs from."
-    assert len(obs.to_llm_content) == 1
+    assert obs.is_error is True
+    assert obs.text == "No previous running command to retrieve logs from."
+    assert len(obs.to_llm_content) == 2
     assert isinstance(obs.to_llm_content[0], TextContent)
-    assert "There was an error during command execution." in obs.to_llm_content[0].text
+    assert obs.to_llm_content[0].text == ExecuteBashObservation.ERROR_MESSAGE_HEADER
+    assert isinstance(obs.to_llm_content[1], TextContent)
     assert (
-        "ERROR: No previous running command to retrieve logs from."
-        in obs.to_llm_content[0].text
+        "No previous running command to retrieve logs from."
+        == obs.to_llm_content[1].text
     )
     assert obs.metadata.exit_code == -1
     assert obs.metadata.prefix == ""
@@ -356,22 +360,22 @@ def test_command_output_continuation(terminal_type):
     if session.prev_status == TerminalCommandStatus.COMPLETED:
         # If the command completed immediately, verify we got all the output
         logger.info("Command completed immediately", extra={"msg_type": "TEST_INFO"})
-        assert "1" in obs.output
-        assert "2" in obs.output
-        assert "3" in obs.output
-        assert "4" in obs.output
-        assert "5" in obs.output
+        assert "1" in obs.text
+        assert "2" in obs.text
+        assert "3" in obs.text
+        assert "4" in obs.text
+        assert "5" in obs.text
         assert "[The command completed with exit code 0.]" in obs.metadata.suffix
     else:
         # If the command timed out, verify we got the timeout message
         assert session.prev_status == TerminalCommandStatus.NO_CHANGE_TIMEOUT
-        assert "1" in obs.output
+        assert "1" in obs.text
         assert "[The command has no new output after 1 seconds." in obs.metadata.suffix
 
         # Continue getting output until we see all numbers
         numbers_seen = set()
         for i in range(1, 6):
-            if str(i) in obs.output:
+            if str(i) in obs.text:
                 numbers_seen.add(i)
 
         # We need to see numbers 2-5 and then the command completion
@@ -383,7 +387,7 @@ def test_command_output_continuation(terminal_type):
 
             # Check for numbers in the output
             for i in range(1, 6):
-                if str(i) in obs.output and i not in numbers_seen:
+                if str(i) in obs.text and i not in numbers_seen:
                     numbers_seen.add(i)
                     logger.info(
                         f"Found number {i} in output", extra={"msg_type": "TEST_INFO"}
@@ -423,8 +427,8 @@ def test_long_output(terminal_type):
         ExecuteBashAction(command='for i in {1..5000}; do echo "Line $i"; done')
     )
 
-    assert "Line 1" in obs.output
-    assert "Line 5000" in obs.output
+    assert "Line 1" in obs.text
+    assert "Line 5000" in obs.text
     assert obs.metadata.exit_code == 0
     assert obs.metadata.prefix == ""
     assert obs.metadata.suffix == "\n[The command completed with exit code 0.]"
@@ -443,8 +447,8 @@ def test_long_output_exceed_history_limit(terminal_type):
     )
 
     assert "Previous command outputs are truncated" in obs.metadata.prefix
-    assert "Line 40000" in obs.output
-    assert "Line 50000" in obs.output
+    assert "Line 40000" in obs.text
+    assert "Line 50000" in obs.text
     assert obs.metadata.exit_code == 0
     assert obs.metadata.suffix == "\n[The command completed with exit code 0.]"
 
@@ -464,7 +468,7 @@ def test_multiline_command():
         )
     )
 
-    assert "inside if" in obs.output
+    assert "inside if" in obs.text
     assert obs.metadata.exit_code == 0
     assert obs.metadata.prefix == ""
     assert obs.metadata.suffix == "\n[The command completed with exit code 0.]"
@@ -488,21 +492,21 @@ def test_python_interactive_input(terminal_type):
     # Start Python with the interactive script
     obs = session.execute(ExecuteBashAction(command=f'python3 -c "{python_script}"'))
 
-    assert "Enter your name:" in obs.output
+    assert "Enter your name:" in obs.text
     assert obs.metadata.exit_code == -1  # -1 indicates command is still running
     assert session.prev_status == TerminalCommandStatus.NO_CHANGE_TIMEOUT
 
     # Send first input (name)
     obs = session.execute(ExecuteBashAction(command="Alice", is_input=True))
 
-    assert "Enter your age:" in obs.output
+    assert "Enter your age:" in obs.text
     assert obs.metadata.exit_code == -1
     assert session.prev_status == TerminalCommandStatus.NO_CHANGE_TIMEOUT
 
     # Send second input (age)
     obs = session.execute(ExecuteBashAction(command="25", is_input=True))
 
-    assert "Hello Alice, you are 25 years old" in obs.output
+    assert "Hello Alice, you are 25 years old" in obs.text
     assert obs.metadata.exit_code == 0
     assert obs.metadata.suffix == "\n[The command completed with exit code 0.]"
     assert session.prev_status == TerminalCommandStatus.COMPLETED
@@ -515,7 +519,8 @@ def _run_bash_action(session, command: str, **kwargs):
     action = ExecuteBashAction(command=command, **kwargs)
     obs = session.execute(action)
     logger.info(f"Command: {command}")
-    logger.info(f"Output: {obs.output}")
+    output_text = obs.text if obs.content else ""
+    logger.info(f"Output: {output_text}")
     logger.info(f"Exit code: {obs.metadata.exit_code}")
     return obs
 
@@ -535,12 +540,12 @@ def test_bash_server(terminal_type):
                 session, "python -u -m http.server 8081", timeout=1.0
             )
             assert obs.metadata.exit_code == -1
-            assert "Serving HTTP on" in obs.output
+            assert "Serving HTTP on" in obs.text
 
             # Send Ctrl+C to interrupt
             obs = _run_bash_action(session, "C-c", is_input=True)
             assert "CTRL+C was sent" in obs.metadata.suffix
-            assert "Keyboard interrupt received, exiting." in obs.output
+            assert "Keyboard interrupt received, exiting." in obs.text
 
             # Verify we can run commands after interrupt
             obs = _run_bash_action(session, "ls")
@@ -551,7 +556,7 @@ def test_bash_server(terminal_type):
                 session, "python -u -m http.server 8081", timeout=1.0
             )
             assert obs.metadata.exit_code == -1
-            assert "Serving HTTP on" in obs.output
+            assert "Serving HTTP on" in obs.text
 
         finally:
             session.close()
@@ -578,7 +583,7 @@ def test_bash_background_server(terminal_type):
             obs = _run_bash_action(session, f"curl http://localhost:{server_port}")
             assert obs.metadata.exit_code == 0
             # Check for content typical of python http.server directory listing
-            assert "Directory listing for" in obs.output
+            assert "Directory listing for" in obs.text
 
             # Kill the server
             obs = _run_bash_action(session, 'pkill -f "http.server"')
@@ -601,17 +606,17 @@ def test_multiline_commands(terminal_type):
             # single multiline command
             obs = _run_bash_action(session, 'echo \\\n -e "foo"')
             assert obs.metadata.exit_code == 0
-            assert "foo" in obs.output
+            assert "foo" in obs.text
 
             # test multiline echo
             obs = _run_bash_action(session, 'echo -e "hello\nworld"')
             assert obs.metadata.exit_code == 0
-            assert "hello\nworld" in obs.output
+            assert "hello\nworld" in obs.text
 
             # test whitespace
             obs = _run_bash_action(session, 'echo -e "a\\n\\n\\nz"')
             assert obs.metadata.exit_code == 0
-            assert "\n\n\n" in obs.output
+            assert "\n\n\n" in obs.text
         finally:
             session.close()
 
@@ -634,7 +639,7 @@ def test_complex_commands(terminal_type):
         try:
             obs = _run_bash_action(session, cmd)
             assert obs.metadata.exit_code == 0
-            assert "Got 3 heads in a row after 3 flips!" in obs.output
+            assert "Got 3 heads in a row after 3 flips!" in obs.text
         finally:
             session.close()
 
@@ -651,8 +656,8 @@ def test_no_ps2_in_output(terminal_type):
             obs = _run_bash_action(session, 'echo -e "hello\nworld"')
             assert obs.metadata.exit_code == 0
 
-            assert "hello\nworld" in obs.output
-            assert ">" not in obs.output
+            assert "hello\nworld" in obs.text
+            assert ">" not in obs.text
         finally:
             session.close()
 
@@ -682,11 +687,11 @@ def test_multiline_command_loop(terminal_type):
         try:
             obs = _run_bash_action(session, init_cmd)
             assert obs.metadata.exit_code == 0
-            assert "created files" in obs.output
+            assert "created files" in obs.text
 
             obs = _run_bash_action(session, follow_up_cmd)
             assert obs.metadata.exit_code == 0
-            assert "success" in obs.output
+            assert "success" in obs.text
         finally:
             session.close()
 
@@ -715,15 +720,15 @@ def test_multiple_multiline_commands(terminal_type):
 
             # First test that running multiple commands at once fails
             obs = _run_bash_action(session, joined_cmds)
-            assert obs.error is True
-            assert "Cannot execute multiple commands at once" in obs.output
+            assert obs.is_error is True
+            assert "Cannot execute multiple commands at once" in obs.text
 
             # Now run each command individually and verify they work
             results = []
             for cmd in cmds:
                 obs = _run_bash_action(session, cmd)
                 assert obs.metadata.exit_code == 0
-                results.append(obs.output)
+                results.append(obs.text)
 
             # Verify all expected outputs are present
             assert "total 0" in results[0]  # ls -l
@@ -756,21 +761,21 @@ def test_cmd_run(terminal_type):
 
             obs = _run_bash_action(session, "ls -l")
             assert obs.metadata.exit_code == 0
-            assert "total 0" in obs.output
+            assert "total 0" in obs.text
 
             obs = _run_bash_action(session, "mkdir test")
             assert obs.metadata.exit_code == 0
 
             obs = _run_bash_action(session, "ls -l")
             assert obs.metadata.exit_code == 0
-            assert "test" in obs.output
+            assert "test" in obs.text
 
             obs = _run_bash_action(session, "touch test/foo.txt")
             assert obs.metadata.exit_code == 0
 
             obs = _run_bash_action(session, "ls -l test")
             assert obs.metadata.exit_code == 0
-            assert "foo.txt" in obs.output
+            assert "foo.txt" in obs.text
 
             # clean up
             _run_bash_action(session, "rm -rf test")
@@ -792,7 +797,7 @@ def test_run_as_user_correct_home_dir(terminal_type):
             obs = _run_bash_action(session, "cd ~ && pwd")
             assert obs.metadata.exit_code == 0
             home = os.getenv("HOME")
-            assert home and home in obs.output
+            assert home and home in obs.text
         finally:
             session.close()
 
@@ -807,8 +812,8 @@ def test_multi_cmd_run_in_single_line(terminal_type):
             # Original Linux version using &&
             obs = _run_bash_action(session, "pwd && ls -l")
             assert obs.metadata.exit_code == 0
-            assert temp_dir in obs.output
-            assert "total 0" in obs.output
+            assert temp_dir in obs.text
+            assert "total 0" in obs.text
         finally:
             session.close()
 
@@ -831,7 +836,7 @@ def test_stateful_cmd(terminal_type):
 
             obs = _run_bash_action(session, "pwd")
             assert obs.metadata.exit_code == 0
-            assert f"{temp_dir}/test" in obs.output.strip()
+            assert f"{temp_dir}/test" in obs.text.strip()
         finally:
             session.close()
 
@@ -862,7 +867,7 @@ def test_python_version(terminal_type):
         try:
             obs = _run_bash_action(session, "python --version")
             assert obs.metadata.exit_code == 0
-            assert "Python 3" in obs.output
+            assert "Python 3" in obs.text
         finally:
             session.close()
 
@@ -882,7 +887,7 @@ def test_pwd_property(terminal_type):
 
             obs = _run_bash_action(session, "cd random_dir && pwd")
             assert obs.metadata.exit_code == 0
-            assert "random_dir" in obs.output
+            assert "random_dir" in obs.text
         finally:
             session.close()
 
@@ -911,10 +916,10 @@ def test_long_output_from_nested_directories(terminal_type):
             assert obs.metadata.exit_code == 0
 
             # Verify output contains expected files
-            assert "folder_1" in obs.output
-            assert "file_1.txt" in obs.output
-            assert "folder_100" in obs.output
-            assert "file_100.txt" in obs.output
+            assert "folder_1" in obs.text
+            assert "file_1.txt" in obs.text
+            assert "folder_100" in obs.text
+            assert "file_100.txt" in obs.text
         finally:
             session.close()
 
@@ -948,7 +953,7 @@ def test_command_backslash(terminal_type):
             )
             obs = _run_bash_action(session, cmd)
             assert obs.metadata.exit_code == 0
-            assert "/tmp/test_dir/file_1.txt" in obs.output
+            assert "/tmp/test_dir/file_1.txt" in obs.text
         finally:
             session.close()
 
@@ -972,7 +977,7 @@ def test_bash_remove_prefix(terminal_type):
             # Check git remote - same for both platforms
             obs = _run_bash_action(session, "git remote -v")
             assert obs.metadata.exit_code == 0
-            assert "https://github.com/OpenHands/OpenHands" in obs.output
-            assert "git remote -v" not in obs.output
+            assert "https://github.com/OpenHands/OpenHands" in obs.text
+            assert "git remote -v" not in obs.text
         finally:
             session.close()
diff --git a/tests/tools/execute_bash/test_bash_tool.py b/tests/tools/execute_bash/test_bash_tool.py
index 61aed7fee2..fd9e088693 100644
--- a/tests/tools/execute_bash/test_bash_tool.py
+++ b/tests/tools/execute_bash/test_bash_tool.py
@@ -69,7 +69,7 @@ def test_bash_tool_execution():
         # Check the result
         assert result is not None
         assert isinstance(result, ExecuteBashObservation)
-        assert "Hello, World!" in result.output
+        assert "Hello, World!" in result.text
 
 
 def test_bash_tool_working_directory():
@@ -87,7 +87,7 @@ def test_bash_tool_working_directory():
 
         # Check that the working directory is correct
         assert isinstance(result, ExecuteBashObservation)
-        assert temp_dir in result.output
+        assert temp_dir in result.text
 
 
 def test_bash_tool_to_openai_tool():
diff --git a/tests/tools/execute_bash/test_bash_tool_auto_detection.py b/tests/tools/execute_bash/test_bash_tool_auto_detection.py
index 5dcb10427b..dbc1c36349 100644
--- a/tests/tools/execute_bash/test_bash_tool_auto_detection.py
+++ b/tests/tools/execute_bash/test_bash_tool_auto_detection.py
@@ -52,7 +52,7 @@ def test_default_auto_detection():
         # Test that it works
         action = ExecuteBashAction(command="echo 'Auto-detection test'")
         obs = executor(action)
-        assert "Auto-detection test" in obs.output
+        assert "Auto-detection test" in obs.text
 
 
 def test_forced_terminal_types():
@@ -138,7 +138,7 @@ def test_backward_compatibility():
         assert tool.executor is not None
         action = ExecuteBashAction(command="echo 'Backward compatibility test'")
         obs = tool.executor(action)
-        assert "Backward compatibility test" in obs.output
+        assert "Backward compatibility test" in obs.text
         assert obs.metadata.exit_code == 0
 
 
diff --git a/tests/tools/execute_bash/test_observation_truncation.py b/tests/tools/execute_bash/test_observation_truncation.py
index 1be00839fc..9e61976310 100644
--- a/tests/tools/execute_bash/test_observation_truncation.py
+++ b/tests/tools/execute_bash/test_observation_truncation.py
@@ -18,9 +18,9 @@ def test_execute_bash_observation_truncation_under_limit():
     )
 
     observation = ExecuteBashObservation(
-        output="Short output",
+        command="echo test",
+        content=[TextContent(text="Short output")],
         metadata=metadata,
-        error=False,
     )
 
     result = observation.to_llm_content
@@ -52,9 +52,9 @@ def test_execute_bash_observation_truncation_over_limit():
     long_output = "A" * (MAX_CMD_OUTPUT_SIZE + 1000)
 
     observation = ExecuteBashObservation(
-        output=long_output,
+        command="echo test",
+        content=[TextContent(text=long_output)],
         metadata=metadata,
-        error=False,
     )
 
     result = observation.to_llm_content
@@ -89,18 +89,21 @@ def test_execute_bash_observation_truncation_with_error():
     long_output = "B" * (MAX_CMD_OUTPUT_SIZE + 500)
 
     observation = ExecuteBashObservation(
-        output=long_output,
+        command="false",
+        content=[TextContent(text=long_output)],
         metadata=metadata,
-        error=True,
+        is_error=True,
     )
 
     result = observation.to_llm_content
-    assert len(result) == 1
+    assert len(result) == 2
     assert isinstance(result[0], TextContent)
-    result = result[0].text
+    assert result[0].text == ExecuteBashObservation.ERROR_MESSAGE_HEADER
 
-    # The result should be truncated and have error prefix
-    assert result.startswith("[There was an error during command execution.]")
+    assert isinstance(result[1], TextContent)
+    result = result[1].text
+
+    # The result should be truncated
     assert len(result) < len(long_output) + 300  # Account for metadata and error prefix
     # With head-and-tail truncation, should end with original content + metadata
     expected_end = (
@@ -132,9 +135,9 @@ def test_execute_bash_observation_truncation_exact_limit():
     exact_output = "C" * exact_output_size
 
     observation = ExecuteBashObservation(
-        output=exact_output,
+        command="echo test",
+        content=[TextContent(text=exact_output)],
         metadata=metadata,
-        error=False,
     )
 
     result = observation.to_llm_content
@@ -162,9 +165,9 @@ def test_execute_bash_observation_truncation_with_prefix_suffix():
     long_output = "D" * (MAX_CMD_OUTPUT_SIZE + 200)
 
     observation = ExecuteBashObservation(
-        output=long_output,
+        command="echo test",
+        content=[TextContent(text=long_output)],
         metadata=metadata,
-        error=False,
     )
 
     result = observation.to_llm_content
diff --git a/tests/tools/execute_bash/test_secrets_masking.py b/tests/tools/execute_bash/test_secrets_masking.py
index 2fa56d2777..16d5917022 100644
--- a/tests/tools/execute_bash/test_secrets_masking.py
+++ b/tests/tools/execute_bash/test_secrets_masking.py
@@ -8,6 +8,7 @@
 from openhands.sdk.agent import Agent
 from openhands.sdk.conversation import Conversation
 from openhands.sdk.llm import LLM
+from openhands.sdk.tool.schema import TextContent
 from openhands.tools.execute_bash import ExecuteBashAction, ExecuteBashObservation
 from openhands.tools.execute_bash.impl import BashExecutor
 
@@ -24,8 +25,8 @@ def test_bash_executor_without_conversation():
             result = executor(action)
 
             # Check that the output is not masked (no conversation provided)
-            assert "secret-value-123" in result.output
-            assert "<secret-hidden>" not in result.output
+            assert "secret-value-123" in result.text
+            assert "<secret-hidden>" not in result.text
 
         finally:
             executor.close()
@@ -62,7 +63,9 @@ def test_bash_executor_with_conversation_secrets():
             mock_observation = ExecuteBashObservation(
                 command="echo 'Token: $SECRET_TOKEN, Key: $API_KEY'",
                 exit_code=0,
-                output="Token: secret-value-123, Key: another-secret-456",
+                content=[
+                    TextContent(text="Token: secret-value-123, Key: another-secret-456")
+                ],
             )
             mock_session.execute.return_value = mock_observation
             mock_session._closed = False
@@ -78,10 +81,10 @@ def test_bash_executor_with_conversation_secrets():
             assert mock_session.execute.called
 
             # Check that both secrets were masked in the output
-            assert "secret-value-123" not in result.output
-            assert "another-secret-456" not in result.output
+            assert "secret-value-123" not in result.text
+            assert "another-secret-456" not in result.text
             # SecretsManager uses <secret-hidden> as the mask
-            assert "<secret-hidden>" in result.output
+            assert "<secret-hidden>" in result.text
 
         finally:
             executor.close()
diff --git a/tests/tools/file_editor/conftest.py b/tests/tools/file_editor/conftest.py
index d1f52f659e..2588541733 100644
--- a/tests/tools/file_editor/conftest.py
+++ b/tests/tools/file_editor/conftest.py
@@ -3,6 +3,7 @@
 
 import pytest
 
+from openhands.sdk.tool.schema import TextContent
 from openhands.tools.file_editor.definition import (
     FileEditorObservation,
 )
@@ -56,7 +57,7 @@ def assert_successful_result(
 ):
     """Assert that a result is successful (no error)."""
     assert isinstance(result, FileEditorObservation)
-    assert result.error is None
+    assert not result.is_error
     if expected_path:
         assert result.path == expected_path
 
@@ -66,9 +67,14 @@ def assert_error_result(
 ):
     """Assert that a result contains an error."""
     assert isinstance(result, FileEditorObservation)
-    assert result.error is not None
+    assert result.is_error
     if expected_error_substring:
-        assert expected_error_substring in result.error
+        content_text = (
+            result.content
+            if isinstance(result.content, str)
+            else "".join([c.text for c in result.content if isinstance(c, TextContent)])
+        )
+        assert expected_error_substring in content_text
 
 
 def create_test_file(path: Path, content: str):
diff --git a/tests/tools/file_editor/test_basic_operations.py b/tests/tools/file_editor/test_basic_operations.py
index 16624d890c..c26f6c1fcf 100644
--- a/tests/tools/file_editor/test_basic_operations.py
+++ b/tests/tools/file_editor/test_basic_operations.py
@@ -62,11 +62,11 @@ def test_file_editor_happy_path(temp_file):
     # Validate the result
     assert_successful_result(result, str(temp_file))
     assert (
-        result.output is not None
-        and "The file" in result.output
-        and "has been edited" in result.output
+        result.text is not None
+        and "The file" in result.text
+        and "has been edited" in result.text
     )
-    assert result.output is not None and "This is a sample file." in result.output
+    assert result.text is not None and "This is a sample file." in result.text
     assert result.path == str(temp_file)
     assert result.prev_exist is True
     assert (
@@ -106,15 +106,15 @@ def test_file_editor_view_operation(temp_file):
     # Validate the result
     assert_successful_result(result, str(temp_file))
     assert (
-        result.output is not None
-        and "Here's the result of running `cat -n`" in result.output
+        result.text is not None
+        and "Here's the result of running `cat -n`" in result.text
     )
     assert (
-        result.output is not None
-        and "This is a file with XML tags parsing logic..." in result.output
+        result.text is not None
+        and "This is a file with XML tags parsing logic..." in result.text
     )
-    assert result.output is not None and "match = re.search(" in result.output
-    assert result.output is not None and "...More text here." in result.output
+    assert result.text is not None and "match = re.search(" in result.text
+    assert result.text is not None and "...More text here." in result.text
 
 
 def test_successful_operations(temp_file):
@@ -131,10 +131,10 @@ def test_successful_operations(temp_file):
     )
     assert_successful_result(result)
     assert (
-        result.output is not None
-        and "Here's the result of running `cat -n`" in result.output
+        result.text is not None
+        and "Here's the result of running `cat -n`" in result.text
     )
-    assert result.output is not None and "line 1" in result.output
+    assert result.text is not None and "line 1" in result.text
 
     # Test str_replace
     result = file_editor(
@@ -144,8 +144,8 @@ def test_successful_operations(temp_file):
         new_str="replaced line",
     )
     assert_successful_result(result)
-    assert result.output is not None and "has been edited" in result.output
-    assert result.output is not None and "replaced line" in result.output
+    assert result.text is not None and "has been edited" in result.text
+    assert result.text is not None and "replaced line" in result.text
 
     # Test insert
     result = file_editor(
@@ -155,8 +155,8 @@ def test_successful_operations(temp_file):
         new_str="inserted line",
     )
     assert_successful_result(result)
-    assert result.output is not None and "has been edited" in result.output
-    assert result.output is not None and "inserted line" in result.output
+    assert result.text is not None and "has been edited" in result.text
+    assert result.text is not None and "inserted line" in result.text
 
     # Test undo
     result = file_editor(
@@ -164,7 +164,7 @@ def test_successful_operations(temp_file):
         path=str(temp_file),
     )
     assert_successful_result(result)
-    assert result.output is not None and "undone successfully" in result.output
+    assert result.text is not None and "undone successfully" in result.text
 
 
 def test_tab_expansion(temp_file):
@@ -181,8 +181,8 @@ def test_tab_expansion(temp_file):
     )
     assert_successful_result(result)
     # Tabs should be preserved in output
-    assert result.output is not None and "\tindented" in result.output
-    assert result.output is not None and "line\twith\ttabs" in result.output
+    assert result.text is not None and "\tindented" in result.text
+    assert result.text is not None and "line\twith\ttabs" in result.text
 
     # Test str_replace with tabs in old_str
     result = file_editor(
@@ -192,7 +192,7 @@ def test_tab_expansion(temp_file):
         new_str="replaced line",
     )
     assert_successful_result(result)
-    assert result.output is not None and "replaced line" in result.output
+    assert result.text is not None and "replaced line" in result.text
 
     # Test str_replace with tabs in new_str
     result = file_editor(
@@ -202,7 +202,7 @@ def test_tab_expansion(temp_file):
         new_str="new\tline\twith\ttabs",
     )
     assert_successful_result(result)
-    assert result.output is not None and "new\tline\twith\ttabs" in result.output
+    assert result.text is not None and "new\tline\twith\ttabs" in result.text
 
     # Test insert with tabs
     result = file_editor(
@@ -212,7 +212,7 @@ def test_tab_expansion(temp_file):
         new_str="\tindented\tline",
     )
     assert_successful_result(result)
-    assert result.output is not None and "\tindented\tline" in result.output
+    assert result.text is not None and "\tindented\tline" in result.text
 
 
 def test_create_operation(temp_file):
@@ -229,7 +229,7 @@ def test_create_operation(temp_file):
     )
 
     assert_successful_result(result, str(temp_file))
-    assert result.output is not None and "created successfully" in result.output
+    assert result.text is not None and "created successfully" in result.text
     assert result.prev_exist is False
     assert result.new_content == content
 
@@ -258,29 +258,29 @@ def test_view_operation_truncation(temp_file):
     )
 
     assert_successful_result(result)
-    assert result.output is not None
+    assert result.text is not None
 
     # Check that truncation notice is present
-    assert TEXT_FILE_CONTENT_TRUNCATED_NOTICE in result.output
+    assert TEXT_FILE_CONTENT_TRUNCATED_NOTICE in result.text
 
     # The content should be truncated before line numbers are added
     # So the final output will be longer than MAX_RESPONSE_LEN_CHAR due to formatting
     # but the original content was truncated
-    assert "Here's the result of running `cat -n`" in result.output
+    assert "Here's the result of running `cat -n`" in result.text
 
     # With head-and-tail truncation, should contain both start and end content
     # The line numbers will show as "     1\tA..." at start and end with "A"
-    assert "\tA" in result.output  # Should have A's with tab formatting
+    assert "\tA" in result.text  # Should have A's with tab formatting
 
 
 def test_view_file(editor):
     editor, test_file = editor
     result = editor(command="view", path=str(test_file))
     assert isinstance(result, FileEditorObservation)
-    assert f"Here's the result of running `cat -n` on {test_file}:" in result.output
-    assert "1\tThis is a test file." in result.output
-    assert "2\tThis file is for testing purposes." in result.output
-    assert "3\t" not in result.output  # No extra line
+    assert f"Here's the result of running `cat -n` on {test_file}:" in result.text
+    assert "1\tThis is a test file." in result.text
+    assert "2\tThis file is for testing purposes." in result.text
+    assert "3\t" not in result.text  # No extra line
 
 
 def test_view_directory(editor):
@@ -288,7 +288,7 @@ def test_view_directory(editor):
     parent_dir = test_file.parent
     result = editor(command="view", path=str(parent_dir))
     assert (
-        result.output
+        result.text
         == f"""Here's the files and directories up to 2 levels deep in {parent_dir}, excluding hidden items:
 {parent_dir}/
 {parent_dir}/test.txt"""  # noqa: E501
@@ -315,11 +315,11 @@ def test_view_with_a_specific_range(editor):
 
     # View file in range 50-100
     result = editor(command="view", path=str(test_file), view_range=[50, 100])
-    assert f"Here's the result of running `cat -n` on {test_file}:" in result.output
-    assert "    49\tLine 49" not in result.output
-    assert "    50\tLine 50" in result.output
-    assert "   100\tLine 100" in result.output
-    assert "101" not in result.output
+    assert f"Here's the result of running `cat -n` on {test_file}:" in result.text
+    assert "    49\tLine 49" not in result.text
+    assert "    50\tLine 50" in result.text
+    assert "   100\tLine 100" in result.text
+    assert "101" not in result.text
 
 
 def test_create_file(editor):
@@ -328,7 +328,7 @@ def test_create_file(editor):
     result = editor(command="create", path=str(new_file), file_text="New file content")
     assert new_file.exists()
     assert new_file.read_text() == "New file content"
-    assert "File created successfully" in result.output
+    assert "File created successfully" in result.text
 
 
 def test_create_with_empty_string(editor):
@@ -337,12 +337,12 @@ def test_create_with_empty_string(editor):
     result = editor(command="create", path=str(new_file), file_text="")
     assert new_file.exists()
     assert new_file.read_text() == ""
-    assert "File created successfully" in result.output
+    assert "File created successfully" in result.text
 
     # Test the view command showing an empty line
     result = editor(command="view", path=str(new_file))
-    assert f"Here's the result of running `cat -n` on {new_file}:" in result.output
-    assert "1\t" in result.output  # Check for empty line
+    assert f"Here's the result of running `cat -n` on {new_file}:" in result.text
+    assert "1\t" in result.text  # Check for empty line
 
 
 def test_create_with_none_file_text(editor):
@@ -365,7 +365,7 @@ def test_str_replace_no_linting(editor):
 
     # Test str_replace command
     assert (
-        result.output
+        result.text
         == f"""The file {test_file} has been edited. Here's the result of running `cat -n` on a snippet of {test_file}:
      1\tThis is a sample file.
      2\tThis file is for testing purposes.
@@ -388,7 +388,7 @@ def test_str_replace_multi_line_no_linting(editor):
 
     # Test str_replace command
     assert (
-        result.output
+        result.text
         == f"""The file {test_file} has been edited. Here's the result of running `cat -n` on a snippet of {test_file}:
      1\tThis is a sample file.
      2\tThis file is for testing purposes.
@@ -407,7 +407,7 @@ def test_str_replace_multi_line_with_tabs_no_linting(editor_python_file_with_tab
     assert isinstance(result, FileEditorObservation)
 
     assert (
-        result.output
+        result.text
         == f"""The file {test_file} has been edited. Here's the result of running `cat -n` on a snippet of {test_file}:
      1\tdef test():
      2\t\tprint("Hello, Universe!")
@@ -510,7 +510,7 @@ def test_insert_no_linting(editor):
     assert isinstance(result, FileEditorObservation)
     assert "Inserted line" in test_file.read_text()
     assert (
-        result.output
+        result.text
         == f"""The file {test_file} has been edited. Here's the result of running `cat -n` on a snippet of the edited file:
      1\tThis is a test file.
      2\tInserted line
@@ -559,7 +559,7 @@ def test_insert_chinese_text_into_english_file(editor):
     assert isinstance(result, FileEditorObservation)
     assert "中文文本" in test_file.read_text()
     assert (
-        result.output
+        result.text
         == f"""The file {test_file} has been edited. Here's the result of running `cat -n` on a snippet of the edited file:
      1\t中文文本
      2\tThis is a test file.
@@ -592,7 +592,7 @@ def test_undo_edit(editor):
     # Undo the edit
     result = editor(command="undo_edit", path=str(test_file))
     assert isinstance(result, FileEditorObservation)
-    assert "Last edit to" in result.output
+    assert "Last edit to" in result.text
     assert "test file" in test_file.read_text()  # Original content restored
 
 
@@ -615,13 +615,13 @@ def test_multiple_undo_edits(editor):
     # Undo the last edit
     result = editor(command="undo_edit", path=str(test_file))
     assert isinstance(result, FileEditorObservation)
-    assert "Last edit to" in result.output
+    assert "Last edit to" in result.text
     assert "sample file v1" in test_file.read_text()  # Previous content restored
 
     # Undo the first edit
     result = editor(command="undo_edit", path=str(test_file))
     assert isinstance(result, FileEditorObservation)
-    assert "Last edit to" in result.output
+    assert "Last edit to" in result.text
     assert "test file" in test_file.read_text()  # Original content restored
 
 
@@ -697,16 +697,16 @@ def test_view_directory_with_hidden_files(tmp_path):
 
     # Verify output
     assert isinstance(result, FileEditorObservation)
-    assert str(test_dir) in result.output
-    assert "visible.txt" in result.output  # Visible file is shown
-    assert "visible_dir" in result.output  # Visible directory is shown
-    assert ".hidden1" not in result.output  # Hidden files not shown
-    assert ".hidden2" not in result.output
-    assert ".hidden_dir" not in result.output
+    assert str(test_dir) in result.text
+    assert "visible.txt" in result.text  # Visible file is shown
+    assert "visible_dir" in result.text  # Visible directory is shown
+    assert ".hidden1" not in result.text  # Hidden files not shown
+    assert ".hidden2" not in result.text
+    assert ".hidden_dir" not in result.text
     assert (
-        "3 hidden files/directories in this directory are excluded" in result.output
+        "3 hidden files/directories in this directory are excluded" in result.text
     )  # Shows count of hidden items in current dir only
-    assert "ls -la" in result.output  # Shows command to view hidden files
+    assert "ls -la" in result.text  # Shows command to view hidden files
 
 
 def test_view_symlinked_directory(tmp_path):
@@ -732,11 +732,11 @@ def test_view_symlinked_directory(tmp_path):
 
     # Verify that all files are listed through the symlink
     assert isinstance(result, FileEditorObservation)
-    assert str(symlink_dir) in result.output
-    assert "file1.txt" in result.output
-    assert "file2.txt" in result.output
-    assert "subdir" in result.output
-    assert "file3.txt" in result.output
+    assert str(symlink_dir) in result.text
+    assert "file1.txt" in result.text
+    assert "file2.txt" in result.text
+    assert "subdir" in result.text
+    assert "file3.txt" in result.text
 
 
 def test_view_large_directory_with_truncation(editor, tmp_path):
@@ -749,7 +749,7 @@ def test_view_large_directory_with_truncation(editor, tmp_path):
 
     result = editor(command="view", path=str(large_dir))
     assert isinstance(result, FileEditorObservation)
-    assert DIRECTORY_CONTENT_TRUNCATED_NOTICE in result.output
+    assert DIRECTORY_CONTENT_TRUNCATED_NOTICE in result.text
 
 
 def test_view_directory_on_hidden_path(tmp_path):
@@ -791,22 +791,22 @@ def test_view_directory_on_hidden_path(tmp_path):
     # Verify output
     assert isinstance(result, FileEditorObservation)
     # Depth 1: Visible files/dirs shown, hidden files/dirs not shown
-    assert "visible1.txt" in result.output
-    assert "visible_dir" in result.output
-    assert ".hidden1" not in result.output
-    assert ".hidden_dir" not in result.output
+    assert "visible1.txt" in result.text
+    assert "visible_dir" in result.text
+    assert ".hidden1" not in result.text
+    assert ".hidden_dir" not in result.text
 
     # Depth 2: Files in visible_dir shown
-    assert "visible2.txt" in result.output
-    assert ".hidden2" not in result.output
+    assert "visible2.txt" in result.text
+    assert ".hidden2" not in result.text
 
     # Depth 2: Files in hidden_dir not shown
-    assert "visible3.txt" not in result.output
-    assert ".hidden3" not in result.output
+    assert "visible3.txt" not in result.text
+    assert ".hidden3" not in result.text
 
     # Hidden file count only includes depth 1
     assert (
-        "2 hidden files/directories in this directory are excluded" in result.output
+        "2 hidden files/directories in this directory are excluded" in result.text
     )  # Only .hidden1 and .hidden_dir at depth 1
 
 
@@ -819,7 +819,7 @@ def test_view_large_file_with_truncation(editor, tmp_path):
 
     result = editor(command="view", path=str(large_file))
     assert isinstance(result, FileEditorObservation)
-    assert TEXT_FILE_CONTENT_TRUNCATED_NOTICE in result.output
+    assert TEXT_FILE_CONTENT_TRUNCATED_NOTICE in result.text
 
 
 def test_validate_path_suggests_absolute_path(editor, tmp_path):
@@ -868,8 +868,8 @@ def test_str_replace_and_insert_snippet_output_on_a_large_file(editor):
 
     # View file
     result = editor(command="view", path=str(test_file))
-    assert "     1\tLine 1" in result.output
-    assert "   500\tLine 500" in result.output
+    assert "     1\tLine 1" in result.text
+    assert "   500\tLine 500" in result.text
 
     # Replace line 500's content with '500 new'
     result = editor(
@@ -878,14 +878,14 @@ def test_str_replace_and_insert_snippet_output_on_a_large_file(editor):
         old_str="Line 500",
         new_str="500 new",
     )
-    assert "   500\t500 new" in result.output
+    assert "   500\t500 new" in result.text
 
     # Delete the line '500 new'
     result = editor(
         command="str_replace", path=str(test_file), old_str="500 new\n", new_str=""
     )
-    assert "   499\tLine 499" in result.output
-    assert "   500\tLine 501" in result.output
+    assert "   499\tLine 499" in result.text
+    assert "   500\tLine 501" in result.text
 
     # Insert content at line 500
     result = editor(
@@ -894,4 +894,4 @@ def test_str_replace_and_insert_snippet_output_on_a_large_file(editor):
         insert_line=499,
         new_str="Inserted line at 500",
     )
-    assert "   500\tInserted line at 500" in result.output
+    assert "   500\tInserted line at 500" in result.text
diff --git a/tests/tools/file_editor/test_error_handling.py b/tests/tools/file_editor/test_error_handling.py
index 02ced3bb61..f705a2a09d 100644
--- a/tests/tools/file_editor/test_error_handling.py
+++ b/tests/tools/file_editor/test_error_handling.py
@@ -12,7 +12,7 @@ def test_validation_error_formatting():
         path="/nonexistent/file.txt",
     )
     assert_error_result(result)
-    assert result.error is not None and "does not exist" in result.error
+    assert result.is_error and "does not exist" in result.text
 
     # Test directory validation for non-view commands
     result = file_editor(
@@ -22,10 +22,7 @@ def test_validation_error_formatting():
         new_str="new",
     )
     assert_error_result(result)
-    assert (
-        result.error is not None
-        and "directory and only the `view` command" in result.error
-    )
+    assert result.is_error and "directory and only the `view` command" in result.text
 
 
 def test_str_replace_error_handling(temp_file):
@@ -43,7 +40,7 @@ def test_str_replace_error_handling(temp_file):
         new_str="something",
     )
     assert_error_result(result)
-    assert result.error is not None and "did not appear verbatim" in result.error
+    assert result.is_error and "did not appear verbatim" in result.text
 
     # Test multiple occurrences
     with open(temp_file, "w") as f:
@@ -56,8 +53,8 @@ def test_str_replace_error_handling(temp_file):
         new_str="new_line",
     )
     assert_error_result(result)
-    assert result.error is not None and "Multiple occurrences" in result.error
-    assert result.error is not None and "lines [1, 2]" in result.error
+    assert result.is_error and "Multiple occurrences" in result.text
+    assert result.is_error and "lines [1, 2]" in result.text
 
 
 def test_view_range_validation(temp_file):
@@ -74,9 +71,7 @@ def test_view_range_validation(temp_file):
         view_range=[1],  # Should be [start, end]
     )
     assert_error_result(result)
-    assert (
-        result.error is not None and "should be a list of two integers" in result.error
-    )
+    assert result.is_error and "should be a list of two integers" in result.text
 
     # Test out of bounds range: should clamp to file end and show a warning
     result = file_editor(
@@ -85,10 +80,10 @@ def test_view_range_validation(temp_file):
         view_range=[1, 10],  # File only has 3 lines
     )
     # This should succeed but show a warning
-    assert result.error is None
+    assert not result.is_error
     assert (
         "NOTE: We only show up to 3 since there're only 3 lines in this file."
-        in result.output
+        in result.text
     )
 
     # Test invalid range order
@@ -98,10 +93,7 @@ def test_view_range_validation(temp_file):
         view_range=[3, 1],  # End before start
     )
     assert_error_result(result)
-    assert (
-        result.error is not None
-        and "should be greater than or equal to" in result.error
-    )
+    assert result.is_error and "should be greater than or equal to" in result.text
 
 
 def test_insert_validation(temp_file):
@@ -119,7 +111,7 @@ def test_insert_validation(temp_file):
         new_str="new line",
     )
     assert_error_result(result)
-    assert result.error is not None and "should be within the range" in result.error
+    assert result.is_error and "should be within the range" in result.text
 
     # Test insert beyond file length
     result = file_editor(
@@ -129,7 +121,7 @@ def test_insert_validation(temp_file):
         new_str="new line",
     )
     assert_error_result(result)
-    assert result.error is not None and "should be within the range" in result.error
+    assert result.is_error and "should be within the range" in result.text
 
 
 def test_undo_validation(temp_file):
@@ -145,4 +137,4 @@ def test_undo_validation(temp_file):
         path=temp_file,
     )
     assert_error_result(result)
-    assert result.error is not None and "No edit history found" in result.error
+    assert result.is_error and "No edit history found" in result.text
diff --git a/tests/tools/file_editor/test_file_editor_tool.py b/tests/tools/file_editor/test_file_editor_tool.py
index dafce1c4d3..f5963d34b7 100644
--- a/tests/tools/file_editor/test_file_editor_tool.py
+++ b/tests/tools/file_editor/test_file_editor_tool.py
@@ -63,7 +63,7 @@ def test_file_editor_tool_create_file():
         # Check the result
         assert result is not None
         assert isinstance(result, FileEditorObservation)
-        assert not result.error
+        assert not result.is_error
         assert os.path.exists(test_file)
 
         # Check file contents
@@ -94,10 +94,10 @@ def test_file_editor_tool_view_file():
         # Check the result
         assert result is not None
         assert isinstance(result, FileEditorObservation)
-        assert not result.error
-        assert "Line 1" in result.output
-        assert "Line 2" in result.output
-        assert "Line 3" in result.output
+        assert not result.is_error
+        assert "Line 1" in result.text
+        assert "Line 2" in result.text
+        assert "Line 3" in result.text
 
 
 def test_file_editor_tool_str_replace():
@@ -127,7 +127,7 @@ def test_file_editor_tool_str_replace():
         # Check the result
         assert result is not None
         assert isinstance(result, FileEditorObservation)
-        assert not result.error
+        assert not result.is_error
 
         # Check file contents
         with open(test_file) as f:
@@ -177,9 +177,9 @@ def test_file_editor_tool_view_directory():
         # Check the result
         assert result is not None
         assert isinstance(result, FileEditorObservation)
-        assert not result.error
-        assert "file1.txt" in result.output
-        assert "file2.txt" in result.output
+        assert not result.is_error
+        assert "file1.txt" in result.text
+        assert "file2.txt" in result.text
 
 
 def test_file_editor_tool_includes_working_directory_in_description():
diff --git a/tests/tools/file_editor/test_memory_usage.py b/tests/tools/file_editor/test_memory_usage.py
index dd7c351f93..109d551f7d 100644
--- a/tests/tools/file_editor/test_memory_usage.py
+++ b/tests/tools/file_editor/test_memory_usage.py
@@ -71,7 +71,7 @@ def test_file_read_memory_usage(temp_file):
 
     # Pull output before measuring and drop references to encourage GC
     assert_successful_result(result)
-    content = result.output
+    content = result.text
     del result
     gc.collect()
 
@@ -187,7 +187,8 @@ def test_file_editor_memory_leak(temp_file):
                     new_str=new_content,
                 )
                 if i == 0:
-                    print(f"First edit result: {result.output[:200]}...")
+                    content_str = result.text
+                    print(f"First edit result: {content_str[:200]}...")
             except Exception as e:
                 print(f"\nError during edit {i}:")
                 print(f"File size: {os.path.getsize(temp_file) / (1024 * 1024):.2f} MB")
diff --git a/tests/tools/file_editor/test_view_supported_binary_files.py b/tests/tools/file_editor/test_view_supported_binary_files.py
index e29ae932a6..bbd0dc027c 100644
--- a/tests/tools/file_editor/test_view_supported_binary_files.py
+++ b/tests/tools/file_editor/test_view_supported_binary_files.py
@@ -74,12 +74,12 @@ def test_view_pdf_file():
 
         assert isinstance(result, FileEditorObservation)
         assert_successful_result(result)
-        assert f"Here's the result of running `cat -n` on {test_file}" in result.output
+        assert f"Here's the result of running `cat -n` on {test_file}" in result.text
 
         # Check for specific content present in the PDF
         assert (
-            result.output is not None
-            and "Printer-Friendly Caltrain Schedule" in result.output
+            result.text is not None
+            and "Printer-Friendly Caltrain Schedule" in result.text
         )
     finally:
         # Clean up the temporary file
diff --git a/tests/tools/file_editor/utils/test_encoding.py b/tests/tools/file_editor/utils/test_encoding.py
index 9bd47dd477..361606fdf1 100644
--- a/tests/tools/file_editor/utils/test_encoding.py
+++ b/tests/tools/file_editor/utils/test_encoding.py
@@ -287,9 +287,9 @@ def test_view_non_utf8_file(temp_non_utf8_file):
     # Parse the result - now using direct access
 
     # Verify the content was read correctly
-    assert result.output is not None and "Привет, мир!" in result.output
-    assert result.output is not None and "Тестовый файл с кириллицей" in result.output
-    assert result.output is not None and "Это тестовая строка" in result.output
+    assert result.text is not None and "Привет, мир!" in result.text
+    assert result.text is not None and "Тестовый файл с кириллицей" in result.text
+    assert result.text is not None and "Это тестовая строка" in result.text
 
 
 def test_view_range_non_utf8_file(temp_non_utf8_file):
@@ -305,11 +305,11 @@ def test_view_range_non_utf8_file(temp_non_utf8_file):
     # Parse the result - now using direct access
 
     # Verify the content was read correctly
-    assert result.output is not None and "Тестовый файл с кириллицей" in result.output
-    assert result.output is not None and "Привет, мир!" in result.output
+    assert result.text is not None and "Тестовый файл с кириллицей" in result.text
+    assert result.text is not None and "Привет, мир!" in result.text
 
     # Verify that line 6 is not included
-    assert result.output is not None and "Это тестовая строка" not in result.output
+    assert result.text is not None and "Это тестовая строка" not in result.text
 
 
 def test_str_replace_non_utf8_file(temp_non_utf8_file):
@@ -326,8 +326,8 @@ def test_str_replace_non_utf8_file(temp_non_utf8_file):
     # Parse the result - now using direct access
 
     # Verify the replacement was successful
-    assert result.output is not None and "Здравствуй, мир!" in result.output
-    assert result.output is not None and "Привет, мир!" not in result.output
+    assert result.text is not None and "Здравствуй, мир!" in result.text
+    assert result.text is not None and "Привет, мир!" not in result.text
 
     # Verify the file was saved with the correct encoding
     with open(temp_non_utf8_file, "rb") as f:
@@ -354,7 +354,7 @@ def test_insert_non_utf8_file(temp_non_utf8_file):
     # Parse the result - now using direct access
 
     # Verify the insertion was successful
-    assert result.output is not None and "Новая переменная" in result.output
+    assert result.text is not None and "Новая переменная" in result.text
 
     # Verify the file was saved with the correct encoding
     with open(temp_non_utf8_file, "rb") as f:
@@ -391,9 +391,7 @@ def test_create_non_utf8_file():
         # Parse the result - now using direct access
 
         # Verify the file was created successfully
-        assert (
-            result.output is not None and "File created successfully" in result.output
-        )
+        assert result.text is not None and "File created successfully" in result.text
 
         # Read the file with cp1251 encoding to verify content
         encoding_manager = EncodingManager()
@@ -433,7 +431,7 @@ def test_undo_edit_non_utf8_file(temp_non_utf8_file):
     # Parse the result - now using direct access
 
     # Verify the undo was successful
-    assert result.output is not None and "undone successfully" in result.output
+    assert result.text is not None and "undone successfully" in result.text
 
     # Verify the original content was restored with the correct encoding
     with open(temp_non_utf8_file, "rb") as f:
@@ -455,7 +453,7 @@ def test_complex_workflow_non_utf8_file(temp_non_utf8_file):
         path=str(temp_non_utf8_file),
     )
     # Parse the result - now using direct access
-    assert result.output is not None and "Привет, мир!" in result.output
+    assert result.text is not None and "Привет, мир!" in result.text
 
     # 2. Replace text
     result = file_editor(
@@ -465,7 +463,7 @@ def test_complex_workflow_non_utf8_file(temp_non_utf8_file):
         new_str="Здравствуй, мир!",
     )
     # Parse the result - now using direct access
-    assert result.output is not None and "Здравствуй, мир!" in result.output
+    assert result.text is not None and "Здравствуй, мир!" in result.text
 
     # 3. Insert text
     result = file_editor(
@@ -475,7 +473,7 @@ def test_complex_workflow_non_utf8_file(temp_non_utf8_file):
         new_str="# Добавленная строка\nboolean_var = True",
     )
     # Parse the result - now using direct access
-    assert result.output is not None and "Добавленная строка" in result.output
+    assert result.text is not None and "Добавленная строка" in result.text
 
     # 4. View specific range
     result = file_editor(
@@ -484,8 +482,8 @@ def test_complex_workflow_non_utf8_file(temp_non_utf8_file):
         view_range=[5, 7],
     )
     # Parse the result - now using direct access
-    assert result.output is not None and "Добавленная строка" in result.output
-    assert result.output is not None and "boolean_var = True" in result.output
+    assert result.text is not None and "Добавленная строка" in result.text
+    assert result.text is not None and "boolean_var = True" in result.text
 
     # 5. Undo the last edit
     result = file_editor(
@@ -493,7 +491,7 @@ def test_complex_workflow_non_utf8_file(temp_non_utf8_file):
         path=str(temp_non_utf8_file),
     )
     # Parse the result - now using direct access
-    assert result.output is not None and "undone successfully" in result.output
+    assert result.text is not None and "undone successfully" in result.text
 
     # 6. Verify the file content after all operations
     with open(temp_non_utf8_file, "rb") as f:
@@ -532,7 +530,7 @@ def test_mixed_encoding_workflow():
             path=path1,
         )
         # Parse the result - now using direct access
-        assert "Текст в кодировке CP1251" in result1.output
+        assert "Текст в кодировке CP1251" in result1.text
 
         # 2. View the UTF-8 file
         result2 = file_editor(
@@ -540,7 +538,7 @@ def test_mixed_encoding_workflow():
             path=path2,
         )
         # Parse the result - now using direct access
-        assert "Текст в кодировке UTF-8" in result2.output
+        assert "Текст в кодировке UTF-8" in result2.text
 
         # 3. Edit the cp1251 file
         result3 = file_editor(
@@ -550,7 +548,7 @@ def test_mixed_encoding_workflow():
             new_str="Измененный текст в CP1251",
         )
         # Parse the result - now using direct access
-        assert "Измененный текст в CP1251" in result3.output
+        assert "Измененный текст в CP1251" in result3.text
 
         # 4. Edit the UTF-8 file
         result4 = file_editor(
@@ -560,7 +558,7 @@ def test_mixed_encoding_workflow():
             new_str="Измененный текст в UTF-8",
         )
         # Parse the result - now using direct access
-        assert "Измененный текст в UTF-8" in result4.output
+        assert "Измененный текст в UTF-8" in result4.text
 
         # 5. Verify both files maintain their original encodings
         with open(path1, "rb") as f:
diff --git a/tests/tools/glob/test_glob_executor.py b/tests/tools/glob/test_glob_executor.py
index e14ed8ac9e..c0f215372e 100644
--- a/tests/tools/glob/test_glob_executor.py
+++ b/tests/tools/glob/test_glob_executor.py
@@ -26,7 +26,7 @@ def test_glob_executor_basic_pattern():
         action = GlobAction(pattern="*.py")
         observation = executor(action)
 
-        assert observation.error is None
+        assert observation.is_error is False
         assert len(observation.files) == 2
         assert all(f.endswith(".py") for f in observation.files)
         assert observation.pattern == "*.py"
@@ -49,7 +49,7 @@ def test_glob_executor_recursive_pattern():
         action = GlobAction(pattern="**/*.py")
         observation = executor(action)
 
-        assert observation.error is None
+        assert observation.is_error is False
         assert len(observation.files) == 2
         assert all(f.endswith(".py") for f in observation.files)
 
@@ -70,7 +70,7 @@ def test_glob_executor_custom_path():
         action = GlobAction(pattern="*.txt", path=str(sub_dir))
         observation = executor(action)
 
-        assert observation.error is None
+        assert observation.is_error is False
         assert len(observation.files) == 2
         assert observation.search_path == str(sub_dir.resolve())
         assert all(str(sub_dir) in f for f in observation.files)
@@ -83,8 +83,8 @@ def test_glob_executor_invalid_path():
         action = GlobAction(pattern="*.py", path="/nonexistent/path")
         observation = executor(action)
 
-        assert observation.error is not None
-        assert "is not a valid directory" in observation.error
+        assert observation.is_error is True
+        assert "is not a valid directory" in observation.text
         assert len(observation.files) == 0
 
 
@@ -99,7 +99,7 @@ def test_glob_executor_no_matches():
         action = GlobAction(pattern="*.py")
         observation = executor(action)
 
-        assert observation.error is None
+        assert observation.is_error is False
         assert len(observation.files) == 0
         assert not observation.truncated
 
@@ -116,7 +116,7 @@ def test_glob_executor_directories_excluded():
         action = GlobAction(pattern="*")
         observation = executor(action)
 
-        assert observation.error is None
+        assert observation.is_error is False
         # Should only find the file, not directories
         assert len(observation.files) == 1
         assert observation.files[0].endswith("file.txt")
@@ -143,7 +143,7 @@ def test_glob_executor_sorting():
         action = GlobAction(pattern="*.txt")
         observation = executor(action)
 
-        assert observation.error is None
+        assert observation.is_error is False
         assert len(observation.files) == 3
 
         # Files should be sorted by modification time (newest first)
@@ -162,7 +162,7 @@ def test_glob_executor_truncation():
         action = GlobAction(pattern="*.txt")
         observation = executor(action)
 
-        assert observation.error is None
+        assert observation.is_error is False
         assert len(observation.files) == 100
         assert observation.truncated is True
 
@@ -189,7 +189,7 @@ def test_glob_executor_complex_patterns():
         action = GlobAction(pattern="config.*")
         observation = executor(action)
 
-        assert observation.error is None
+        assert observation.is_error is False
         assert len(observation.files) == 4  # All config files
         extensions = {Path(f).suffix for f in observation.files}
         assert extensions == {".json", ".yaml", ".yml", ".toml"}
@@ -206,7 +206,7 @@ def test_glob_executor_exception_handling():
         observation = executor(action)
 
         # Should not raise exception, even if there are no matches
-        assert observation.error is None or isinstance(observation.error, str)
+        assert observation.is_error is False or isinstance(observation.content, str)
         assert isinstance(observation.files, list)
 
 
@@ -220,7 +220,7 @@ def test_glob_executor_absolute_paths():
         action = GlobAction(pattern="*.py")
         observation = executor(action)
 
-        assert observation.error is None
+        assert observation.is_error is False
         assert len(observation.files) == 1
 
         # Check that returned path is absolute
@@ -236,7 +236,7 @@ def test_glob_executor_empty_directory():
         action = GlobAction(pattern="*")
         observation = executor(action)
 
-        assert observation.error is None
+        assert observation.is_error is False
         assert len(observation.files) == 0
         assert not observation.truncated
 
diff --git a/tests/tools/glob/test_glob_tool.py b/tests/tools/glob/test_glob_tool.py
index 3c3f69c374..854daeae9d 100644
--- a/tests/tools/glob/test_glob_tool.py
+++ b/tests/tools/glob/test_glob_tool.py
@@ -85,7 +85,7 @@ def test_glob_tool_find_files():
         observation = tool.executor(action)
 
         assert isinstance(observation, GlobObservation)
-        assert observation.error is None
+        assert observation.is_error is False
         assert len(observation.files) == 3  # test.py, src/app.py, tests/test_main.py
         assert observation.pattern == "**/*.py"
         assert observation.search_path == str(Path(temp_dir).resolve())
@@ -119,7 +119,7 @@ def test_glob_tool_specific_directory():
         assert tool.executor is not None
         observation = tool.executor(action)
 
-        assert observation.error is None
+        assert observation.is_error is False
         assert len(observation.files) == 2  # app.py, utils.py
         assert observation.search_path == str(src_dir.resolve())
 
@@ -143,7 +143,7 @@ def test_glob_tool_no_matches():
         assert tool.executor is not None
         observation = tool.executor(action)
 
-        assert observation.error is None
+        assert observation.is_error is False
         assert len(observation.files) == 0
         assert observation.pattern == "**/*.py"
         assert not observation.truncated
@@ -161,8 +161,8 @@ def test_glob_tool_invalid_directory():
         assert tool.executor is not None
         observation = tool.executor(action)
 
-        assert observation.error is not None
-        assert "is not a valid directory" in observation.error
+        assert observation.is_error is True
+        assert "is not a valid directory" in observation.text
         assert len(observation.files) == 0
 
 
@@ -191,7 +191,7 @@ def test_glob_tool_complex_patterns():
         assert tool.executor is not None
         observation = tool.executor(action)
 
-        assert observation.error is None
+        assert observation.is_error is False
         assert len(observation.files) == 4  # All config files
         assert observation.pattern == "config.*"
 
@@ -218,7 +218,7 @@ def test_glob_tool_directories_excluded():
         assert tool.executor is not None
         observation = tool.executor(action)
 
-        assert observation.error is None
+        assert observation.is_error is False
         # Should find all files recursively, but not directories
         assert len(observation.files) == 2  # app.py and src/utils.py
         # Verify both files are present
@@ -287,9 +287,9 @@ def test_glob_tool_to_llm_content_error():
         observation = tool.executor(action)
 
         content = observation.to_llm_content
-        assert len(content) == 1
-        text_content = content[0].text
-        assert "Error:" in text_content
+        assert len(content) == 2
+        assert content[0].text == GlobObservation.ERROR_MESSAGE_HEADER
+        text_content = content[1].text
         assert "is not a valid directory" in text_content
 
 
@@ -309,7 +309,7 @@ def test_glob_tool_truncation():
         assert tool.executor is not None
         observation = tool.executor(action)
 
-        assert observation.error is None
+        assert observation.is_error is False
         assert len(observation.files) == 100  # Truncated to 100
         assert observation.truncated is True
 
diff --git a/tests/tools/grep/test_consistency.py b/tests/tools/grep/test_consistency.py
index 6c13ad361a..0c3b0a8eda 100644
--- a/tests/tools/grep/test_consistency.py
+++ b/tests/tools/grep/test_consistency.py
@@ -114,8 +114,8 @@ def test_basic_search_consistency(self, temp_dir_with_content):
         )
 
         # Both should succeed
-        assert not ripgrep_result.error
-        assert not fallback_result.error
+        assert not ripgrep_result.is_error
+        assert not fallback_result.is_error
 
         # Convert to sets of matching files for exact comparison
         ripgrep_matches = set(ripgrep_result.matches)
@@ -143,8 +143,8 @@ def test_case_insensitive_consistency(self, temp_dir_with_content):
         )
 
         # Both should succeed
-        assert not ripgrep_result.error
-        assert not fallback_result.error
+        assert not ripgrep_result.is_error
+        assert not fallback_result.is_error
 
         # Convert to sets for exact comparison
         ripgrep_matches = set(ripgrep_result.matches)
@@ -172,8 +172,8 @@ def test_include_pattern_consistency(self, temp_dir_with_content):
         )
 
         # Both should succeed
-        assert not ripgrep_result.error
-        assert not fallback_result.error
+        assert not ripgrep_result.is_error
+        assert not fallback_result.is_error
 
         # Convert to sets for exact comparison
         ripgrep_matches = set(ripgrep_result.matches)
@@ -205,8 +205,8 @@ def test_no_matches_consistency(self, temp_dir_with_content):
         )
 
         # Both should succeed with identical empty results
-        assert not ripgrep_result.error
-        assert not fallback_result.error
+        assert not ripgrep_result.is_error
+        assert not fallback_result.is_error
 
         # Convert to sets for exact comparison
         ripgrep_matches = set(ripgrep_result.matches)
@@ -229,8 +229,8 @@ def test_regex_pattern_consistency(self, temp_dir_with_content):
         )
 
         # Both should succeed
-        assert not ripgrep_result.error
-        assert not fallback_result.error
+        assert not ripgrep_result.is_error
+        assert not fallback_result.is_error
 
         # Convert to sets for exact comparison
         ripgrep_matches = set(ripgrep_result.matches)
@@ -258,8 +258,8 @@ def test_todo_comments_consistency(self, temp_dir_with_content):
         )
 
         # Both should succeed
-        assert not ripgrep_result.error
-        assert not fallback_result.error
+        assert not ripgrep_result.is_error
+        assert not fallback_result.is_error
 
         # Convert to sets for exact comparison
         ripgrep_matches = set(ripgrep_result.matches)
@@ -287,8 +287,8 @@ def test_error_patterns_consistency(self, temp_dir_with_content):
         )
 
         # Both should succeed
-        assert not ripgrep_result.error
-        assert not fallback_result.error
+        assert not ripgrep_result.is_error
+        assert not fallback_result.is_error
 
         # Convert to sets for exact comparison
         ripgrep_matches = set(ripgrep_result.matches)
@@ -316,8 +316,8 @@ def test_import_statements_consistency(self, temp_dir_with_content):
         )
 
         # Both should succeed
-        assert not ripgrep_result.error
-        assert not fallback_result.error
+        assert not ripgrep_result.is_error
+        assert not fallback_result.is_error
 
         # Convert to sets for exact comparison
         ripgrep_matches = set(ripgrep_result.matches)
@@ -345,8 +345,8 @@ def test_class_definitions_consistency(self, temp_dir_with_content):
         )
 
         # Both should succeed
-        assert not ripgrep_result.error
-        assert not fallback_result.error
+        assert not ripgrep_result.is_error
+        assert not fallback_result.is_error
 
         # Convert to sets for exact comparison
         ripgrep_matches = set(ripgrep_result.matches)
@@ -374,8 +374,8 @@ def test_deep_nested_search_consistency(self, temp_dir_with_content):
         )
 
         # Both should succeed
-        assert not ripgrep_result.error
-        assert not fallback_result.error
+        assert not ripgrep_result.is_error
+        assert not fallback_result.is_error
 
         # Convert to sets for exact comparison
         ripgrep_matches = set(ripgrep_result.matches)
@@ -409,8 +409,8 @@ def test_config_file_search_consistency(self, temp_dir_with_content):
             )
 
             # Both should succeed
-            assert not ripgrep_result.error
-            assert not fallback_result.error
+            assert not ripgrep_result.is_error
+            assert not fallback_result.is_error
 
             # Convert to sets for exact comparison
             ripgrep_matches = set(ripgrep_result.matches)
@@ -439,8 +439,8 @@ def test_hidden_files_search_consistency(self, temp_dir_with_content):
         )
 
         # Both should succeed
-        assert not ripgrep_result.error
-        assert not fallback_result.error
+        assert not ripgrep_result.is_error
+        assert not fallback_result.is_error
 
         # Convert to sets for exact comparison
         ripgrep_matches = set(ripgrep_result.matches)
diff --git a/tests/tools/grep/test_grep_executor.py b/tests/tools/grep/test_grep_executor.py
index 40ea9512f1..120f2b1da0 100644
--- a/tests/tools/grep/test_grep_executor.py
+++ b/tests/tools/grep/test_grep_executor.py
@@ -37,7 +37,7 @@ def test_grep_executor_basic_search():
         action = GrepAction(pattern="print")
         observation = executor(action)
 
-        assert observation.error is None
+        assert observation.is_error is False
         assert len(observation.matches) == 2  # Two files containing "print"
         assert observation.pattern == "print"
         assert observation.search_path == str(Path(temp_dir).resolve())
@@ -59,7 +59,7 @@ def test_grep_executor_case_insensitive():
         action = GrepAction(pattern="print")
         observation = executor(action)
 
-        assert observation.error is None
+        assert observation.is_error is False
         assert len(observation.matches) == 1  # File contains pattern (case-insensitive)
         assert "case_test.py" in observation.matches[0]
 
@@ -75,7 +75,7 @@ def test_grep_executor_include_filter():
         action = GrepAction(pattern="test", include="*.py")
         observation = executor(action)
 
-        assert observation.error is None
+        assert observation.is_error is False
         assert len(observation.matches) == 1
         assert observation.matches[0].endswith(".py")
 
@@ -92,7 +92,7 @@ def test_grep_executor_custom_path():
         action = GrepAction(pattern="print", path=str(sub_dir))
         observation = executor(action)
 
-        assert observation.error is None
+        assert observation.is_error is False
         assert len(observation.matches) == 1
         assert observation.search_path == str(sub_dir.resolve())
         assert str(sub_dir) in str(observation.matches[0])
@@ -105,8 +105,8 @@ def test_grep_executor_invalid_path():
         action = GrepAction(pattern="test", path="/nonexistent/path")
         observation = executor(action)
 
-        assert observation.error is not None
-        assert "not a valid directory" in observation.error
+        assert observation.is_error is True
+        assert "not a valid directory" in observation.text
 
 
 def test_grep_executor_no_matches():
@@ -118,7 +118,7 @@ def test_grep_executor_no_matches():
         action = GrepAction(pattern="nonexistent")
         observation = executor(action)
 
-        assert observation.error is None
+        assert observation.is_error is False
         assert len(observation.matches) == 0
 
 
@@ -132,7 +132,7 @@ def test_grep_executor_hidden_files_excluded():
         action = GrepAction(pattern="test")
         observation = executor(action)
 
-        assert observation.error is None
+        assert observation.is_error is False
         assert len(observation.matches) == 1
         assert ".hidden" not in observation.matches[0]
 
@@ -155,7 +155,7 @@ def test_grep_executor_sorting():
         action = GrepAction(pattern="test")
         observation = executor(action)
 
-        assert observation.error is None
+        assert observation.is_error is False
         assert len(observation.matches) == 2
         # Newest file should be first
         assert "new.py" in observation.matches[0]
@@ -173,7 +173,7 @@ def test_grep_executor_truncation():
         action = GrepAction(pattern="test")
         observation = executor(action)
 
-        assert observation.error is None
+        assert observation.is_error is False
         assert len(observation.matches) == 100
         assert observation.truncated is True
 
@@ -185,5 +185,5 @@ def test_grep_executor_invalid_regex():
         action = GrepAction(pattern="[invalid")
         observation = executor(action)
 
-        assert observation.error is not None
-        assert "Invalid regex pattern" in observation.error
+        assert observation.is_error is True
+        assert "Invalid regex pattern" in observation.text
diff --git a/tests/tools/grep/test_grep_tool.py b/tests/tools/grep/test_grep_tool.py
index e5b6abc874..04cefd668a 100644
--- a/tests/tools/grep/test_grep_tool.py
+++ b/tests/tools/grep/test_grep_tool.py
@@ -63,7 +63,7 @@ def test_grep_tool_basic_search():
         observation = tool.executor(action)
 
         assert isinstance(observation, GrepObservation)
-        assert observation.error is None
+        assert observation.is_error is False
         assert len(observation.matches) == 2  # Two files
         assert observation.pattern == "print"
         assert observation.search_path == str(Path(temp_dir).resolve())
@@ -89,7 +89,7 @@ def test_grep_tool_case_insensitive():
         assert tool.executor is not None
         observation = tool.executor(action)
 
-        assert observation.error is None
+        assert observation.is_error is False
         assert len(observation.matches) == 1
 
 
@@ -107,7 +107,7 @@ def test_grep_tool_include_filter():
         assert tool.executor is not None
         observation = tool.executor(action)
 
-        assert observation.error is None
+        assert observation.is_error is False
         assert len(observation.matches) == 1
         assert observation.matches[0].endswith(".py")
 
@@ -128,7 +128,7 @@ def test_grep_tool_specific_directory():
         assert tool.executor is not None
         observation = tool.executor(action)
 
-        assert observation.error is None
+        assert observation.is_error is False
         assert len(observation.matches) == 1
         assert observation.search_path == str(src_dir.resolve())
         assert str(src_dir) in observation.matches[0]
@@ -147,7 +147,7 @@ def test_grep_tool_no_matches():
         assert tool.executor is not None
         observation = tool.executor(action)
 
-        assert observation.error is None
+        assert observation.is_error is False
         assert len(observation.matches) == 0
         assert not observation.truncated
 
@@ -163,8 +163,8 @@ def test_grep_tool_invalid_regex():
         assert tool.executor is not None
         observation = tool.executor(action)
 
-        assert observation.error is not None
-        assert "Invalid regex pattern" in observation.error
+        assert observation.is_error is True
+        assert "Invalid regex pattern" in observation.text
 
 
 def test_grep_tool_invalid_directory():
@@ -178,8 +178,8 @@ def test_grep_tool_invalid_directory():
         assert tool.executor is not None
         observation = tool.executor(action)
 
-        assert observation.error is not None
-        assert "not a valid directory" in observation.error
+        assert observation.is_error is True
+        assert "not a valid directory" in observation.text
 
 
 def test_grep_tool_hidden_files_excluded():
@@ -196,7 +196,7 @@ def test_grep_tool_hidden_files_excluded():
         assert tool.executor is not None
         observation = tool.executor(action)
 
-        assert observation.error is None
+        assert observation.is_error is False
         assert len(observation.matches) == 1
         assert ".hidden" not in observation.matches[0]
 
@@ -269,8 +269,9 @@ def test_grep_tool_to_llm_content_error():
         observation = tool.executor(action)
 
         content = observation.to_llm_content
-        text = content[0].text
-        assert "Error:" in text
+        assert len(content) == 2
+        assert content[0].text == GrepObservation.ERROR_MESSAGE_HEADER
+        text = content[1].text
         assert "Invalid regex pattern" in text
 
 
@@ -289,7 +290,7 @@ def test_grep_tool_truncation():
         assert tool.executor is not None
         observation = tool.executor(action)
 
-        assert observation.error is None
+        assert observation.is_error is False
         assert len(observation.matches) == 100
         assert observation.truncated is True