fix: handoff from reasoning model to non-reasoning

serialx · serialx · commit 8ab93577855f · 2025-09-16T14:37:01.000+09:00
Be more selective/surgical on the reasoning message preservation.
When we handoff from Claude 4 Sonnet Thinking to non-thinking agent, we get errors because non-thinking models expects no thinking blocks in the request. This fixes this edge case by only preserving blocks when reasoning effort is not None.
diff --git a/src/agents/extensions/models/litellm_model.py b/src/agents/extensions/models/litellm_model.py
@@ -257,7 +257,15 @@ async def _fetch_response(
         stream: bool = False,
         prompt: Any | None = None,
     ) -> litellm.types.utils.ModelResponse | tuple[Response, AsyncStream[ChatCompletionChunk]]:
-        converted_messages = Converter.items_to_messages(input)
+        # Preserve reasoning messages for tool calls when reasoning is on
+        # This is needed for models like Claude 4 Sonnet/Opus which support interleaved thinking
+        preserve_reasoning_message = (
+            model_settings.reasoning is not None and model_settings.reasoning.effort is not None
+        )
+
+        converted_messages = Converter.items_to_messages(
+            input, preserve_reasoning_message=preserve_reasoning_message
+        )
 
         if system_instructions:
             converted_messages.insert(
diff --git a/src/agents/models/chatcmpl_converter.py b/src/agents/models/chatcmpl_converter.py
@@ -315,10 +315,18 @@ def extract_all_content(
     def items_to_messages(
         cls,
         items: str | Iterable[TResponseInputItem],
+        preserve_reasoning_message: bool = False,
     ) -> list[ChatCompletionMessageParam]:
         """
         Convert a sequence of 'Item' objects into a list of ChatCompletionMessageParam.
 
+        Args:
+            items: A string or iterable of response input items to convert
+            preserve_reasoning_message: Whether to preserve reasoning messages (thinking blocks)
+                in tool calls for reasoning models like Claude 4 Sonnet/Opus which support
+                interleaved thinking. When True, thinking blocks are reconstructed and
+                included in assistant messages with tool calls.
+
         Rules:
         - EasyInputMessage or InputMessage (role=user) => ChatCompletionUserMessageParam
         - EasyInputMessage or InputMessage (role=system) => ChatCompletionSystemMessageParam
@@ -512,7 +520,7 @@ def ensure_assistant_message() -> ChatCompletionAssistantMessageParam:
                 content_items = reasoning_item.get("content", [])
                 signature = reasoning_item.get("encrypted_content")
 
-                if content_items:
+                if content_items and preserve_reasoning_message:
                     # Reconstruct thinking blocks from content and signature
                     pending_thinking_blocks = []
                     for content_item in content_items:
diff --git a/tests/test_anthropic_thinking_blocks.py b/tests/test_anthropic_thinking_blocks.py
@@ -176,7 +176,7 @@ def test_anthropic_thinking_blocks_with_tool_calls():
         else:
             items_as_dicts.append(cast(dict[str, Any], item))
 
-    messages = Converter.items_to_messages(items_as_dicts)  # type: ignore[arg-type]
+    messages = Converter.items_to_messages(items_as_dicts, preserve_reasoning_message=True)  # type: ignore[arg-type]
 
     # Find the assistant message with tool calls
     assistant_messages = [