From c5e4e51a0392fb921a280a8891de40398927fe98 Mon Sep 17 00:00:00 2001 From: Dean Schmigelski Date: Wed, 30 Jul 2025 16:31:46 -0400 Subject: [PATCH 1/6] fix(event_loop): raise dedicated exception when encountering max tokens stop reason --- src/strands/event_loop/event_loop.py | 15 ++++++- src/strands/types/exceptions.py | 11 +++++ tests/strands/event_loop/test_event_loop.py | 48 ++++++++++++++++++++- tests_integ/test_max_tokens_reached.py | 18 ++++++++ 4 files changed, 90 insertions(+), 2 deletions(-) create mode 100644 tests_integ/test_max_tokens_reached.py diff --git a/src/strands/event_loop/event_loop.py b/src/strands/event_loop/event_loop.py index ffcb6a5c9..5b96dfc92 100644 --- a/src/strands/event_loop/event_loop.py +++ b/src/strands/event_loop/event_loop.py @@ -28,7 +28,12 @@ from ..telemetry.tracer import get_tracer from ..tools.executor import run_tools, validate_and_prepare_tools from ..types.content import Message -from ..types.exceptions import ContextWindowOverflowException, EventLoopException, ModelThrottledException +from ..types.exceptions import ( + ContextWindowOverflowException, + EventLoopException, + EventLoopMaxTokensReachedException, + ModelThrottledException, +) from ..types.streaming import Metrics, StopReason from ..types.tools import ToolChoice, ToolChoiceAuto, ToolConfig, ToolGenerator, ToolResult, ToolUse from .streaming import stream_messages @@ -216,6 +221,14 @@ async def event_loop_cycle(agent: "Agent", invocation_state: dict[str, Any]) -> yield event return + elif stop_reason == "max_tokens": + raise EventLoopMaxTokensReachedException( + ( + "Agent has reached an unrecoverable state due to max_tokens limit. " + "For more information see: " + "https://strandsagents.com/latest/user-guide/concepts/agents/agent-loop/#maxtokensreachedexception" + ) + ) # End the cycle and return results agent.event_loop_metrics.end_cycle(cycle_start_time, cycle_trace, attributes) diff --git a/src/strands/types/exceptions.py b/src/strands/types/exceptions.py index 4bd3fd88e..14f76e945 100644 --- a/src/strands/types/exceptions.py +++ b/src/strands/types/exceptions.py @@ -18,6 +18,17 @@ def __init__(self, original_exception: Exception, request_state: Any = None) -> super().__init__(str(original_exception)) +class EventLoopMaxTokensReachedException(EventLoopException): + """Exception raised when the model reaches its maximum token generation limit. + + This exception is raised when the model stops generating tokens because it has reached the maximum number of + tokens allowed for output generation. This can occur when the model's max_tokens parameter is set too low for + the complexity of the response, or when the model naturally reaches its configured output limit during generation. + """ + + pass + + class ContextWindowOverflowException(Exception): """Exception raised when the context window is exceeded. diff --git a/tests/strands/event_loop/test_event_loop.py b/tests/strands/event_loop/test_event_loop.py index 1ac2f8258..3303b7282 100644 --- a/tests/strands/event_loop/test_event_loop.py +++ b/tests/strands/event_loop/test_event_loop.py @@ -19,7 +19,12 @@ ) from strands.telemetry.metrics import EventLoopMetrics from strands.tools.registry import ToolRegistry -from strands.types.exceptions import ContextWindowOverflowException, EventLoopException, ModelThrottledException +from strands.types.exceptions import ( + ContextWindowOverflowException, + EventLoopException, + EventLoopMaxTokensReachedException, + ModelThrottledException, +) from tests.fixtures.mock_hook_provider import MockHookProvider @@ -556,6 +561,47 @@ async def test_event_loop_tracing_with_model_error( mock_tracer.end_span_with_error.assert_called_once_with(model_span, "Input too long", model.stream.side_effect) +@pytest.mark.asyncio +async def test_event_loop_cycle_max_tokens_exception( + agent, + model, + agenerator, + alist, +): + """Test that max_tokens stop reason raises MaxTokensReachedException.""" + + # Note the empty toolUse to handle case raised in https://github.com/strands-agents/sdk-python/issues/495 + model.stream.return_value = agenerator( + [ + { + "contentBlockStart": { + "start": { + "toolUse": {}, + }, + }, + }, + {"contentBlockStop": {}}, + {"messageStop": {"stopReason": "max_tokens"}}, + ] + ) + + # Call event_loop_cycle, expecting it to raise MaxTokensReachedException + with pytest.raises(EventLoopMaxTokensReachedException) as exc_info: + stream = strands.event_loop.event_loop.event_loop_cycle( + agent=agent, + invocation_state={}, + ) + await alist(stream) + + # Verify the exception message contains the expected content + expected_message = ( + "Agent has reached an unrecoverable state due to max_tokens limit. " + "For more information see: " + "https://strandsagents.com/latest/user-guide/concepts/agents/agent-loop/#maxtokensreachedexception" + ) + assert str(exc_info.value) == expected_message + + @patch("strands.event_loop.event_loop.get_tracer") @pytest.mark.asyncio async def test_event_loop_tracing_with_tool_execution( diff --git a/tests_integ/test_max_tokens_reached.py b/tests_integ/test_max_tokens_reached.py new file mode 100644 index 000000000..b6f6b2857 --- /dev/null +++ b/tests_integ/test_max_tokens_reached.py @@ -0,0 +1,18 @@ +import pytest + +from strands import Agent, tool +from strands.models.bedrock import BedrockModel +from strands.types.exceptions import EventLoopMaxTokensReachedException + + +@tool +def story_tool(story: str) -> str: + return story + + +def test_context_window_overflow(): + model = BedrockModel(max_tokens=1) + agent = Agent(model=model, tools=[story_tool]) + + with pytest.raises(EventLoopMaxTokensReachedException): + agent("Tell me a story!") From 6703819d6b6cdedb7b08d92e028bb3deca6c4e78 Mon Sep 17 00:00:00 2001 From: Dean Schmigelski Date: Wed, 30 Jul 2025 17:02:03 -0400 Subject: [PATCH 2/6] fix: update integ tests --- src/strands/event_loop/event_loop.py | 2 +- src/strands/models/anthropic.py | 2 +- src/strands/models/bedrock.py | 2 +- src/strands/types/exceptions.py | 2 +- tests/strands/event_loop/test_event_loop.py | 9 ++++----- tests_integ/test_max_tokens_reached.py | 7 ++++--- 6 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/strands/event_loop/event_loop.py b/src/strands/event_loop/event_loop.py index 5b96dfc92..16fefa5ac 100644 --- a/src/strands/event_loop/event_loop.py +++ b/src/strands/event_loop/event_loop.py @@ -226,7 +226,7 @@ async def event_loop_cycle(agent: "Agent", invocation_state: dict[str, Any]) -> ( "Agent has reached an unrecoverable state due to max_tokens limit. " "For more information see: " - "https://strandsagents.com/latest/user-guide/concepts/agents/agent-loop/#maxtokensreachedexception" + "https://strandsagents.com/latest/user-guide/concepts/agents/agent-loop/#eventloopmaxtokensreachedexception" ) ) diff --git a/src/strands/models/anthropic.py b/src/strands/models/anthropic.py index 0d734b762..975fca3e9 100644 --- a/src/strands/models/anthropic.py +++ b/src/strands/models/anthropic.py @@ -414,7 +414,7 @@ async def structured_output( stop_reason, messages, _, _ = event["stop"] if stop_reason != "tool_use": - raise ValueError(f"Model returned stop_reason: {stop_reason} instead of \"tool_use\".") + raise ValueError(f'Model returned stop_reason: {stop_reason} instead of "tool_use".') content = messages["content"] output_response: dict[str, Any] | None = None diff --git a/src/strands/models/bedrock.py b/src/strands/models/bedrock.py index 9b36b4244..4ea1453a4 100644 --- a/src/strands/models/bedrock.py +++ b/src/strands/models/bedrock.py @@ -631,7 +631,7 @@ async def structured_output( stop_reason, messages, _, _ = event["stop"] if stop_reason != "tool_use": - raise ValueError(f"Model returned stop_reason: {stop_reason} instead of \"tool_use\".") + raise ValueError(f'Model returned stop_reason: {stop_reason} instead of "tool_use".') content = messages["content"] output_response: dict[str, Any] | None = None diff --git a/src/strands/types/exceptions.py b/src/strands/types/exceptions.py index 14f76e945..7d9f1c6dc 100644 --- a/src/strands/types/exceptions.py +++ b/src/strands/types/exceptions.py @@ -18,7 +18,7 @@ def __init__(self, original_exception: Exception, request_state: Any = None) -> super().__init__(str(original_exception)) -class EventLoopMaxTokensReachedException(EventLoopException): +class EventLoopMaxTokensReachedException(Exception): """Exception raised when the model reaches its maximum token generation limit. This exception is raised when the model stops generating tokens because it has reached the maximum number of diff --git a/tests/strands/event_loop/test_event_loop.py b/tests/strands/event_loop/test_event_loop.py index 3303b7282..05b20ba01 100644 --- a/tests/strands/event_loop/test_event_loop.py +++ b/tests/strands/event_loop/test_event_loop.py @@ -22,7 +22,6 @@ from strands.types.exceptions import ( ContextWindowOverflowException, EventLoopException, - EventLoopMaxTokensReachedException, ModelThrottledException, ) from tests.fixtures.mock_hook_provider import MockHookProvider @@ -568,7 +567,7 @@ async def test_event_loop_cycle_max_tokens_exception( agenerator, alist, ): - """Test that max_tokens stop reason raises MaxTokensReachedException.""" + """Test that max_tokens stop reason raises EventLoopMaxTokensReachedException.""" # Note the empty toolUse to handle case raised in https://github.com/strands-agents/sdk-python/issues/495 model.stream.return_value = agenerator( @@ -585,8 +584,8 @@ async def test_event_loop_cycle_max_tokens_exception( ] ) - # Call event_loop_cycle, expecting it to raise MaxTokensReachedException - with pytest.raises(EventLoopMaxTokensReachedException) as exc_info: + # Call event_loop_cycle, expecting it to raise EventLoopMaxTokensReachedException + with pytest.raises(EventLoopException) as exc_info: stream = strands.event_loop.event_loop.event_loop_cycle( agent=agent, invocation_state={}, @@ -597,7 +596,7 @@ async def test_event_loop_cycle_max_tokens_exception( expected_message = ( "Agent has reached an unrecoverable state due to max_tokens limit. " "For more information see: " - "https://strandsagents.com/latest/user-guide/concepts/agents/agent-loop/#maxtokensreachedexception" + "https://strandsagents.com/latest/user-guide/concepts/agents/agent-loop/#eventloopmaxtokensreachedexception" ) assert str(exc_info.value) == expected_message diff --git a/tests_integ/test_max_tokens_reached.py b/tests_integ/test_max_tokens_reached.py index b6f6b2857..1bf75f136 100644 --- a/tests_integ/test_max_tokens_reached.py +++ b/tests_integ/test_max_tokens_reached.py @@ -1,8 +1,7 @@ -import pytest from strands import Agent, tool from strands.models.bedrock import BedrockModel -from strands.types.exceptions import EventLoopMaxTokensReachedException +from strands.types.exceptions import EventLoopException, EventLoopMaxTokensReachedException @tool @@ -14,5 +13,7 @@ def test_context_window_overflow(): model = BedrockModel(max_tokens=1) agent = Agent(model=model, tools=[story_tool]) - with pytest.raises(EventLoopMaxTokensReachedException): + try: agent("Tell me a story!") + except EventLoopException as e: + assert isinstance(e.original_exception, EventLoopMaxTokensReachedException) From c94b74e75236dcbac0ffdb438f3a4a9ff59cda5f Mon Sep 17 00:00:00 2001 From: Dean Schmigelski Date: Thu, 31 Jul 2025 10:50:40 -0400 Subject: [PATCH 3/6] fix: rename exception message, add to exception, move earlier in cycle --- src/strands/event_loop/event_loop.py | 29 ++++++++++++++------- src/strands/types/exceptions.py | 14 ++++++++-- tests/strands/event_loop/test_event_loop.py | 13 ++++++--- tests_integ/test_max_tokens_reached.py | 7 +++-- 4 files changed, 43 insertions(+), 20 deletions(-) diff --git a/src/strands/event_loop/event_loop.py b/src/strands/event_loop/event_loop.py index 16fefa5ac..ae21d4c6d 100644 --- a/src/strands/event_loop/event_loop.py +++ b/src/strands/event_loop/event_loop.py @@ -31,7 +31,7 @@ from ..types.exceptions import ( ContextWindowOverflowException, EventLoopException, - EventLoopMaxTokensReachedException, + MaxTokensReachedException, ModelThrottledException, ) from ..types.streaming import Metrics, StopReason @@ -192,6 +192,22 @@ async def event_loop_cycle(agent: "Agent", invocation_state: dict[str, Any]) -> raise e try: + if stop_reason == "max_tokens": + """ + Handle max_tokens limit reached by the model. + + When the model reaches its maximum token limit, this represents a potentially unrecoverable + state where the model's response was truncated. By default, Strands fails hard with an + MaxTokensReachedException to maintain consistency with other failure types. + """ + raise MaxTokensReachedException( + message=( + "Agent has reached an unrecoverable state due to max_tokens limit. " + "For more information see: " + "https://strandsagents.com/latest/user-guide/concepts/agents/agent-loop/#maxtokensreachedexception" + ), + incomplete_message=message, + ) # Add message in trace and mark the end of the stream messages trace stream_trace.add_message(message) stream_trace.end() @@ -221,14 +237,6 @@ async def event_loop_cycle(agent: "Agent", invocation_state: dict[str, Any]) -> yield event return - elif stop_reason == "max_tokens": - raise EventLoopMaxTokensReachedException( - ( - "Agent has reached an unrecoverable state due to max_tokens limit. " - "For more information see: " - "https://strandsagents.com/latest/user-guide/concepts/agents/agent-loop/#eventloopmaxtokensreachedexception" - ) - ) # End the cycle and return results agent.event_loop_metrics.end_cycle(cycle_start_time, cycle_trace, attributes) @@ -244,7 +252,8 @@ async def event_loop_cycle(agent: "Agent", invocation_state: dict[str, Any]) -> # Don't yield or log the exception - we already did it when we # raised the exception and we don't need that duplication. raise - except ContextWindowOverflowException as e: + except (ContextWindowOverflowException, MaxTokensReachedException) as e: + # Special cased exceptions which we want to bubble up rather than get wrapped in an EventLoopException if cycle_span: tracer.end_span_with_error(cycle_span, str(e), e) raise e diff --git a/src/strands/types/exceptions.py b/src/strands/types/exceptions.py index 7d9f1c6dc..71ea28b9f 100644 --- a/src/strands/types/exceptions.py +++ b/src/strands/types/exceptions.py @@ -2,6 +2,8 @@ from typing import Any +from strands.types.content import Message + class EventLoopException(Exception): """Exception raised by the event loop.""" @@ -18,7 +20,7 @@ def __init__(self, original_exception: Exception, request_state: Any = None) -> super().__init__(str(original_exception)) -class EventLoopMaxTokensReachedException(Exception): +class MaxTokensReachedException(Exception): """Exception raised when the model reaches its maximum token generation limit. This exception is raised when the model stops generating tokens because it has reached the maximum number of @@ -26,7 +28,15 @@ class EventLoopMaxTokensReachedException(Exception): the complexity of the response, or when the model naturally reaches its configured output limit during generation. """ - pass + def __init__(self, message: str, incomplete_message: Message): + """Initialize the exception with an error message and the incomplete message object. + + Args: + message: The error message describing the token limit issue + incomplete_message: The valid Message object with incomplete content due to token limits + """ + self.incomplete_message = incomplete_message + super().__init__(message) class ContextWindowOverflowException(Exception): diff --git a/tests/strands/event_loop/test_event_loop.py b/tests/strands/event_loop/test_event_loop.py index 05b20ba01..3886df8b9 100644 --- a/tests/strands/event_loop/test_event_loop.py +++ b/tests/strands/event_loop/test_event_loop.py @@ -22,6 +22,7 @@ from strands.types.exceptions import ( ContextWindowOverflowException, EventLoopException, + MaxTokensReachedException, ModelThrottledException, ) from tests.fixtures.mock_hook_provider import MockHookProvider @@ -567,7 +568,7 @@ async def test_event_loop_cycle_max_tokens_exception( agenerator, alist, ): - """Test that max_tokens stop reason raises EventLoopMaxTokensReachedException.""" + """Test that max_tokens stop reason raises MaxTokensReachedException.""" # Note the empty toolUse to handle case raised in https://github.com/strands-agents/sdk-python/issues/495 model.stream.return_value = agenerator( @@ -584,8 +585,8 @@ async def test_event_loop_cycle_max_tokens_exception( ] ) - # Call event_loop_cycle, expecting it to raise EventLoopMaxTokensReachedException - with pytest.raises(EventLoopException) as exc_info: + # Call event_loop_cycle, expecting it to raise MaxTokensReachedException + with pytest.raises(MaxTokensReachedException) as exc_info: stream = strands.event_loop.event_loop.event_loop_cycle( agent=agent, invocation_state={}, @@ -596,10 +597,14 @@ async def test_event_loop_cycle_max_tokens_exception( expected_message = ( "Agent has reached an unrecoverable state due to max_tokens limit. " "For more information see: " - "https://strandsagents.com/latest/user-guide/concepts/agents/agent-loop/#eventloopmaxtokensreachedexception" + "https://strandsagents.com/latest/user-guide/concepts/agents/agent-loop/#maxtokensreachedexception" ) assert str(exc_info.value) == expected_message + # Verify that the message has not been appended to the messages array + assert len(agent.messages) == 1 + assert exc_info.value.incomplete_message not in agent.messages + @patch("strands.event_loop.event_loop.get_tracer") @pytest.mark.asyncio diff --git a/tests_integ/test_max_tokens_reached.py b/tests_integ/test_max_tokens_reached.py index 1bf75f136..519cf62c2 100644 --- a/tests_integ/test_max_tokens_reached.py +++ b/tests_integ/test_max_tokens_reached.py @@ -1,7 +1,8 @@ +import pytest from strands import Agent, tool from strands.models.bedrock import BedrockModel -from strands.types.exceptions import EventLoopException, EventLoopMaxTokensReachedException +from strands.types.exceptions import MaxTokensReachedException @tool @@ -13,7 +14,5 @@ def test_context_window_overflow(): model = BedrockModel(max_tokens=1) agent = Agent(model=model, tools=[story_tool]) - try: + with pytest.raises(MaxTokensReachedException): agent("Tell me a story!") - except EventLoopException as e: - assert isinstance(e.original_exception, EventLoopMaxTokensReachedException) From 36dd0f9304ba0daa4fceffef614ff91400fcb23a Mon Sep 17 00:00:00 2001 From: Dean Schmigelski Date: Thu, 31 Jul 2025 14:53:04 -0400 Subject: [PATCH 4/6] Update tests_integ/test_max_tokens_reached.py Co-authored-by: Nick Clegg --- tests_integ/test_max_tokens_reached.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests_integ/test_max_tokens_reached.py b/tests_integ/test_max_tokens_reached.py index 519cf62c2..1b822dcba 100644 --- a/tests_integ/test_max_tokens_reached.py +++ b/tests_integ/test_max_tokens_reached.py @@ -11,7 +11,7 @@ def story_tool(story: str) -> str: def test_context_window_overflow(): - model = BedrockModel(max_tokens=1) + model = BedrockModel(max_tokens=100) agent = Agent(model=model, tools=[story_tool]) with pytest.raises(MaxTokensReachedException): From e04c73d85d86dde5d9e415ae2ef693aa9a55da56 Mon Sep 17 00:00:00 2001 From: Dean Schmigelski Date: Thu, 31 Jul 2025 14:53:11 -0400 Subject: [PATCH 5/6] Update tests_integ/test_max_tokens_reached.py Co-authored-by: Nick Clegg --- tests_integ/test_max_tokens_reached.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests_integ/test_max_tokens_reached.py b/tests_integ/test_max_tokens_reached.py index 1b822dcba..5f7e5584c 100644 --- a/tests_integ/test_max_tokens_reached.py +++ b/tests_integ/test_max_tokens_reached.py @@ -16,3 +16,5 @@ def test_context_window_overflow(): with pytest.raises(MaxTokensReachedException): agent("Tell me a story!") + + assert len(agent.messages) == 1 From cca2f86a3f7a1d22cfa8cf59ffa0029943a0efa7 Mon Sep 17 00:00:00 2001 From: Dean Schmigelski Date: Thu, 31 Jul 2025 14:57:19 -0400 Subject: [PATCH 6/6] linting --- tests_integ/test_max_tokens_reached.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests_integ/test_max_tokens_reached.py b/tests_integ/test_max_tokens_reached.py index 5f7e5584c..d9c2817b3 100644 --- a/tests_integ/test_max_tokens_reached.py +++ b/tests_integ/test_max_tokens_reached.py @@ -16,5 +16,5 @@ def test_context_window_overflow(): with pytest.raises(MaxTokensReachedException): agent("Tell me a story!") - + assert len(agent.messages) == 1