From c5e4e51a0392fb921a280a8891de40398927fe98 Mon Sep 17 00:00:00 2001
From: Dean Schmigelski <dbschmigelski+github@gmail.com>
Date: Wed, 30 Jul 2025 16:31:46 -0400
Subject: [PATCH 1/6] fix(event_loop): raise dedicated exception when
 encountering max tokens stop reason

---
 src/strands/event_loop/event_loop.py        | 15 ++++++-
 src/strands/types/exceptions.py             | 11 +++++
 tests/strands/event_loop/test_event_loop.py | 48 ++++++++++++++++++++-
 tests_integ/test_max_tokens_reached.py      | 18 ++++++++
 4 files changed, 90 insertions(+), 2 deletions(-)
 create mode 100644 tests_integ/test_max_tokens_reached.py

diff --git a/src/strands/event_loop/event_loop.py b/src/strands/event_loop/event_loop.py
index ffcb6a5c9..5b96dfc92 100644
--- a/src/strands/event_loop/event_loop.py
+++ b/src/strands/event_loop/event_loop.py
@@ -28,7 +28,12 @@
 from ..telemetry.tracer import get_tracer
 from ..tools.executor import run_tools, validate_and_prepare_tools
 from ..types.content import Message
-from ..types.exceptions import ContextWindowOverflowException, EventLoopException, ModelThrottledException
+from ..types.exceptions import (
+    ContextWindowOverflowException,
+    EventLoopException,
+    EventLoopMaxTokensReachedException,
+    ModelThrottledException,
+)
 from ..types.streaming import Metrics, StopReason
 from ..types.tools import ToolChoice, ToolChoiceAuto, ToolConfig, ToolGenerator, ToolResult, ToolUse
 from .streaming import stream_messages
@@ -216,6 +221,14 @@ async def event_loop_cycle(agent: "Agent", invocation_state: dict[str, Any]) ->
                 yield event
 
             return
+        elif stop_reason == "max_tokens":
+            raise EventLoopMaxTokensReachedException(
+                (
+                    "Agent has reached an unrecoverable state due to max_tokens limit. "
+                    "For more information see: "
+                    "https://strandsagents.com/latest/user-guide/concepts/agents/agent-loop/#maxtokensreachedexception"
+                )
+            )
 
         # End the cycle and return results
         agent.event_loop_metrics.end_cycle(cycle_start_time, cycle_trace, attributes)
diff --git a/src/strands/types/exceptions.py b/src/strands/types/exceptions.py
index 4bd3fd88e..14f76e945 100644
--- a/src/strands/types/exceptions.py
+++ b/src/strands/types/exceptions.py
@@ -18,6 +18,17 @@ def __init__(self, original_exception: Exception, request_state: Any = None) ->
         super().__init__(str(original_exception))
 
 
+class EventLoopMaxTokensReachedException(EventLoopException):
+    """Exception raised when the model reaches its maximum token generation limit.
+
+    This exception is raised when the model stops generating tokens because it has reached the maximum number of
+    tokens allowed for output generation. This can occur when the model's max_tokens parameter is set too low for
+    the complexity of the response, or when the model naturally reaches its configured output limit during generation.
+    """
+
+    pass
+
+
 class ContextWindowOverflowException(Exception):
     """Exception raised when the context window is exceeded.
 
diff --git a/tests/strands/event_loop/test_event_loop.py b/tests/strands/event_loop/test_event_loop.py
index 1ac2f8258..3303b7282 100644
--- a/tests/strands/event_loop/test_event_loop.py
+++ b/tests/strands/event_loop/test_event_loop.py
@@ -19,7 +19,12 @@
 )
 from strands.telemetry.metrics import EventLoopMetrics
 from strands.tools.registry import ToolRegistry
-from strands.types.exceptions import ContextWindowOverflowException, EventLoopException, ModelThrottledException
+from strands.types.exceptions import (
+    ContextWindowOverflowException,
+    EventLoopException,
+    EventLoopMaxTokensReachedException,
+    ModelThrottledException,
+)
 from tests.fixtures.mock_hook_provider import MockHookProvider
 
 
@@ -556,6 +561,47 @@ async def test_event_loop_tracing_with_model_error(
     mock_tracer.end_span_with_error.assert_called_once_with(model_span, "Input too long", model.stream.side_effect)
 
 
+@pytest.mark.asyncio
+async def test_event_loop_cycle_max_tokens_exception(
+    agent,
+    model,
+    agenerator,
+    alist,
+):
+    """Test that max_tokens stop reason raises MaxTokensReachedException."""
+
+    # Note the empty toolUse to handle case raised in https://github.com/strands-agents/sdk-python/issues/495
+    model.stream.return_value = agenerator(
+        [
+            {
+                "contentBlockStart": {
+                    "start": {
+                        "toolUse": {},
+                    },
+                },
+            },
+            {"contentBlockStop": {}},
+            {"messageStop": {"stopReason": "max_tokens"}},
+        ]
+    )
+
+    # Call event_loop_cycle, expecting it to raise MaxTokensReachedException
+    with pytest.raises(EventLoopMaxTokensReachedException) as exc_info:
+        stream = strands.event_loop.event_loop.event_loop_cycle(
+            agent=agent,
+            invocation_state={},
+        )
+        await alist(stream)
+
+    # Verify the exception message contains the expected content
+    expected_message = (
+        "Agent has reached an unrecoverable state due to max_tokens limit. "
+        "For more information see: "
+        "https://strandsagents.com/latest/user-guide/concepts/agents/agent-loop/#maxtokensreachedexception"
+    )
+    assert str(exc_info.value) == expected_message
+
+
 @patch("strands.event_loop.event_loop.get_tracer")
 @pytest.mark.asyncio
 async def test_event_loop_tracing_with_tool_execution(
diff --git a/tests_integ/test_max_tokens_reached.py b/tests_integ/test_max_tokens_reached.py
new file mode 100644
index 000000000..b6f6b2857
--- /dev/null
+++ b/tests_integ/test_max_tokens_reached.py
@@ -0,0 +1,18 @@
+import pytest
+
+from strands import Agent, tool
+from strands.models.bedrock import BedrockModel
+from strands.types.exceptions import EventLoopMaxTokensReachedException
+
+
+@tool
+def story_tool(story: str) -> str:
+    return story
+
+
+def test_context_window_overflow():
+    model = BedrockModel(max_tokens=1)
+    agent = Agent(model=model, tools=[story_tool])
+
+    with pytest.raises(EventLoopMaxTokensReachedException):
+        agent("Tell me a story!")

From 6703819d6b6cdedb7b08d92e028bb3deca6c4e78 Mon Sep 17 00:00:00 2001
From: Dean Schmigelski <dbschmigelski+github@gmail.com>
Date: Wed, 30 Jul 2025 17:02:03 -0400
Subject: [PATCH 2/6] fix: update integ tests

---
 src/strands/event_loop/event_loop.py        | 2 +-
 src/strands/models/anthropic.py             | 2 +-
 src/strands/models/bedrock.py               | 2 +-
 src/strands/types/exceptions.py             | 2 +-
 tests/strands/event_loop/test_event_loop.py | 9 ++++-----
 tests_integ/test_max_tokens_reached.py      | 7 ++++---
 6 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/src/strands/event_loop/event_loop.py b/src/strands/event_loop/event_loop.py
index 5b96dfc92..16fefa5ac 100644
--- a/src/strands/event_loop/event_loop.py
+++ b/src/strands/event_loop/event_loop.py
@@ -226,7 +226,7 @@ async def event_loop_cycle(agent: "Agent", invocation_state: dict[str, Any]) ->
                 (
                     "Agent has reached an unrecoverable state due to max_tokens limit. "
                     "For more information see: "
-                    "https://strandsagents.com/latest/user-guide/concepts/agents/agent-loop/#maxtokensreachedexception"
+                    "https://strandsagents.com/latest/user-guide/concepts/agents/agent-loop/#eventloopmaxtokensreachedexception"
                 )
             )
 
diff --git a/src/strands/models/anthropic.py b/src/strands/models/anthropic.py
index 0d734b762..975fca3e9 100644
--- a/src/strands/models/anthropic.py
+++ b/src/strands/models/anthropic.py
@@ -414,7 +414,7 @@ async def structured_output(
         stop_reason, messages, _, _ = event["stop"]
 
         if stop_reason != "tool_use":
-            raise ValueError(f"Model returned stop_reason: {stop_reason} instead of \"tool_use\".")
+            raise ValueError(f'Model returned stop_reason: {stop_reason} instead of "tool_use".')
 
         content = messages["content"]
         output_response: dict[str, Any] | None = None
diff --git a/src/strands/models/bedrock.py b/src/strands/models/bedrock.py
index 9b36b4244..4ea1453a4 100644
--- a/src/strands/models/bedrock.py
+++ b/src/strands/models/bedrock.py
@@ -631,7 +631,7 @@ async def structured_output(
         stop_reason, messages, _, _ = event["stop"]
 
         if stop_reason != "tool_use":
-            raise ValueError(f"Model returned stop_reason: {stop_reason} instead of \"tool_use\".")
+            raise ValueError(f'Model returned stop_reason: {stop_reason} instead of "tool_use".')
 
         content = messages["content"]
         output_response: dict[str, Any] | None = None
diff --git a/src/strands/types/exceptions.py b/src/strands/types/exceptions.py
index 14f76e945..7d9f1c6dc 100644
--- a/src/strands/types/exceptions.py
+++ b/src/strands/types/exceptions.py
@@ -18,7 +18,7 @@ def __init__(self, original_exception: Exception, request_state: Any = None) ->
         super().__init__(str(original_exception))
 
 
-class EventLoopMaxTokensReachedException(EventLoopException):
+class EventLoopMaxTokensReachedException(Exception):
     """Exception raised when the model reaches its maximum token generation limit.
 
     This exception is raised when the model stops generating tokens because it has reached the maximum number of
diff --git a/tests/strands/event_loop/test_event_loop.py b/tests/strands/event_loop/test_event_loop.py
index 3303b7282..05b20ba01 100644
--- a/tests/strands/event_loop/test_event_loop.py
+++ b/tests/strands/event_loop/test_event_loop.py
@@ -22,7 +22,6 @@
 from strands.types.exceptions import (
     ContextWindowOverflowException,
     EventLoopException,
-    EventLoopMaxTokensReachedException,
     ModelThrottledException,
 )
 from tests.fixtures.mock_hook_provider import MockHookProvider
@@ -568,7 +567,7 @@ async def test_event_loop_cycle_max_tokens_exception(
     agenerator,
     alist,
 ):
-    """Test that max_tokens stop reason raises MaxTokensReachedException."""
+    """Test that max_tokens stop reason raises EventLoopMaxTokensReachedException."""
 
     # Note the empty toolUse to handle case raised in https://github.com/strands-agents/sdk-python/issues/495
     model.stream.return_value = agenerator(
@@ -585,8 +584,8 @@ async def test_event_loop_cycle_max_tokens_exception(
         ]
     )
 
-    # Call event_loop_cycle, expecting it to raise MaxTokensReachedException
-    with pytest.raises(EventLoopMaxTokensReachedException) as exc_info:
+    # Call event_loop_cycle, expecting it to raise EventLoopMaxTokensReachedException
+    with pytest.raises(EventLoopException) as exc_info:
         stream = strands.event_loop.event_loop.event_loop_cycle(
             agent=agent,
             invocation_state={},
@@ -597,7 +596,7 @@ async def test_event_loop_cycle_max_tokens_exception(
     expected_message = (
         "Agent has reached an unrecoverable state due to max_tokens limit. "
         "For more information see: "
-        "https://strandsagents.com/latest/user-guide/concepts/agents/agent-loop/#maxtokensreachedexception"
+        "https://strandsagents.com/latest/user-guide/concepts/agents/agent-loop/#eventloopmaxtokensreachedexception"
     )
     assert str(exc_info.value) == expected_message
 
diff --git a/tests_integ/test_max_tokens_reached.py b/tests_integ/test_max_tokens_reached.py
index b6f6b2857..1bf75f136 100644
--- a/tests_integ/test_max_tokens_reached.py
+++ b/tests_integ/test_max_tokens_reached.py
@@ -1,8 +1,7 @@
-import pytest
 
 from strands import Agent, tool
 from strands.models.bedrock import BedrockModel
-from strands.types.exceptions import EventLoopMaxTokensReachedException
+from strands.types.exceptions import EventLoopException, EventLoopMaxTokensReachedException
 
 
 @tool
@@ -14,5 +13,7 @@ def test_context_window_overflow():
     model = BedrockModel(max_tokens=1)
     agent = Agent(model=model, tools=[story_tool])
 
-    with pytest.raises(EventLoopMaxTokensReachedException):
+    try:
         agent("Tell me a story!")
+    except EventLoopException as e:
+        assert isinstance(e.original_exception, EventLoopMaxTokensReachedException)

From c94b74e75236dcbac0ffdb438f3a4a9ff59cda5f Mon Sep 17 00:00:00 2001
From: Dean Schmigelski <dbschmigelski+github@gmail.com>
Date: Thu, 31 Jul 2025 10:50:40 -0400
Subject: [PATCH 3/6] fix: rename exception message, add to exception, move
 earlier in cycle

---
 src/strands/event_loop/event_loop.py        | 29 ++++++++++++++-------
 src/strands/types/exceptions.py             | 14 ++++++++--
 tests/strands/event_loop/test_event_loop.py | 13 ++++++---
 tests_integ/test_max_tokens_reached.py      |  7 +++--
 4 files changed, 43 insertions(+), 20 deletions(-)

diff --git a/src/strands/event_loop/event_loop.py b/src/strands/event_loop/event_loop.py
index 16fefa5ac..ae21d4c6d 100644
--- a/src/strands/event_loop/event_loop.py
+++ b/src/strands/event_loop/event_loop.py
@@ -31,7 +31,7 @@
 from ..types.exceptions import (
     ContextWindowOverflowException,
     EventLoopException,
-    EventLoopMaxTokensReachedException,
+    MaxTokensReachedException,
     ModelThrottledException,
 )
 from ..types.streaming import Metrics, StopReason
@@ -192,6 +192,22 @@ async def event_loop_cycle(agent: "Agent", invocation_state: dict[str, Any]) ->
                     raise e
 
     try:
+        if stop_reason == "max_tokens":
+            """
+            Handle max_tokens limit reached by the model.
+            
+            When the model reaches its maximum token limit, this represents a potentially unrecoverable
+            state where the model's response was truncated. By default, Strands fails hard with an
+            MaxTokensReachedException to maintain consistency with other failure types.
+            """
+            raise MaxTokensReachedException(
+                message=(
+                    "Agent has reached an unrecoverable state due to max_tokens limit. "
+                    "For more information see: "
+                    "https://strandsagents.com/latest/user-guide/concepts/agents/agent-loop/#maxtokensreachedexception"
+                ),
+                incomplete_message=message,
+            )
         # Add message in trace and mark the end of the stream messages trace
         stream_trace.add_message(message)
         stream_trace.end()
@@ -221,14 +237,6 @@ async def event_loop_cycle(agent: "Agent", invocation_state: dict[str, Any]) ->
                 yield event
 
             return
-        elif stop_reason == "max_tokens":
-            raise EventLoopMaxTokensReachedException(
-                (
-                    "Agent has reached an unrecoverable state due to max_tokens limit. "
-                    "For more information see: "
-                    "https://strandsagents.com/latest/user-guide/concepts/agents/agent-loop/#eventloopmaxtokensreachedexception"
-                )
-            )
 
         # End the cycle and return results
         agent.event_loop_metrics.end_cycle(cycle_start_time, cycle_trace, attributes)
@@ -244,7 +252,8 @@ async def event_loop_cycle(agent: "Agent", invocation_state: dict[str, Any]) ->
         # Don't yield or log the exception - we already did it when we
         # raised the exception and we don't need that duplication.
         raise
-    except ContextWindowOverflowException as e:
+    except (ContextWindowOverflowException, MaxTokensReachedException) as e:
+        # Special cased exceptions which we want to bubble up rather than get wrapped in an EventLoopException
         if cycle_span:
             tracer.end_span_with_error(cycle_span, str(e), e)
         raise e
diff --git a/src/strands/types/exceptions.py b/src/strands/types/exceptions.py
index 7d9f1c6dc..71ea28b9f 100644
--- a/src/strands/types/exceptions.py
+++ b/src/strands/types/exceptions.py
@@ -2,6 +2,8 @@
 
 from typing import Any
 
+from strands.types.content import Message
+
 
 class EventLoopException(Exception):
     """Exception raised by the event loop."""
@@ -18,7 +20,7 @@ def __init__(self, original_exception: Exception, request_state: Any = None) ->
         super().__init__(str(original_exception))
 
 
-class EventLoopMaxTokensReachedException(Exception):
+class MaxTokensReachedException(Exception):
     """Exception raised when the model reaches its maximum token generation limit.
 
     This exception is raised when the model stops generating tokens because it has reached the maximum number of
@@ -26,7 +28,15 @@ class EventLoopMaxTokensReachedException(Exception):
     the complexity of the response, or when the model naturally reaches its configured output limit during generation.
     """
 
-    pass
+    def __init__(self, message: str, incomplete_message: Message):
+        """Initialize the exception with an error message and the incomplete message object.
+
+        Args:
+            message: The error message describing the token limit issue
+            incomplete_message: The valid Message object with incomplete content due to token limits
+        """
+        self.incomplete_message = incomplete_message
+        super().__init__(message)
 
 
 class ContextWindowOverflowException(Exception):
diff --git a/tests/strands/event_loop/test_event_loop.py b/tests/strands/event_loop/test_event_loop.py
index 05b20ba01..3886df8b9 100644
--- a/tests/strands/event_loop/test_event_loop.py
+++ b/tests/strands/event_loop/test_event_loop.py
@@ -22,6 +22,7 @@
 from strands.types.exceptions import (
     ContextWindowOverflowException,
     EventLoopException,
+    MaxTokensReachedException,
     ModelThrottledException,
 )
 from tests.fixtures.mock_hook_provider import MockHookProvider
@@ -567,7 +568,7 @@ async def test_event_loop_cycle_max_tokens_exception(
     agenerator,
     alist,
 ):
-    """Test that max_tokens stop reason raises EventLoopMaxTokensReachedException."""
+    """Test that max_tokens stop reason raises MaxTokensReachedException."""
 
     # Note the empty toolUse to handle case raised in https://github.com/strands-agents/sdk-python/issues/495
     model.stream.return_value = agenerator(
@@ -584,8 +585,8 @@ async def test_event_loop_cycle_max_tokens_exception(
         ]
     )
 
-    # Call event_loop_cycle, expecting it to raise EventLoopMaxTokensReachedException
-    with pytest.raises(EventLoopException) as exc_info:
+    # Call event_loop_cycle, expecting it to raise MaxTokensReachedException
+    with pytest.raises(MaxTokensReachedException) as exc_info:
         stream = strands.event_loop.event_loop.event_loop_cycle(
             agent=agent,
             invocation_state={},
@@ -596,10 +597,14 @@ async def test_event_loop_cycle_max_tokens_exception(
     expected_message = (
         "Agent has reached an unrecoverable state due to max_tokens limit. "
         "For more information see: "
-        "https://strandsagents.com/latest/user-guide/concepts/agents/agent-loop/#eventloopmaxtokensreachedexception"
+        "https://strandsagents.com/latest/user-guide/concepts/agents/agent-loop/#maxtokensreachedexception"
     )
     assert str(exc_info.value) == expected_message
 
+    # Verify that the message has not been appended to the messages array
+    assert len(agent.messages) == 1
+    assert exc_info.value.incomplete_message not in agent.messages
+
 
 @patch("strands.event_loop.event_loop.get_tracer")
 @pytest.mark.asyncio
diff --git a/tests_integ/test_max_tokens_reached.py b/tests_integ/test_max_tokens_reached.py
index 1bf75f136..519cf62c2 100644
--- a/tests_integ/test_max_tokens_reached.py
+++ b/tests_integ/test_max_tokens_reached.py
@@ -1,7 +1,8 @@
+import pytest
 
 from strands import Agent, tool
 from strands.models.bedrock import BedrockModel
-from strands.types.exceptions import EventLoopException, EventLoopMaxTokensReachedException
+from strands.types.exceptions import MaxTokensReachedException
 
 
 @tool
@@ -13,7 +14,5 @@ def test_context_window_overflow():
     model = BedrockModel(max_tokens=1)
     agent = Agent(model=model, tools=[story_tool])
 
-    try:
+    with pytest.raises(MaxTokensReachedException):
         agent("Tell me a story!")
-    except EventLoopException as e:
-        assert isinstance(e.original_exception, EventLoopMaxTokensReachedException)

From 36dd0f9304ba0daa4fceffef614ff91400fcb23a Mon Sep 17 00:00:00 2001
From: Dean Schmigelski <dbschmigelski+github@gmail.com>
Date: Thu, 31 Jul 2025 14:53:04 -0400
Subject: [PATCH 4/6] Update tests_integ/test_max_tokens_reached.py

Co-authored-by: Nick Clegg <nac542@gmail.com>
---
 tests_integ/test_max_tokens_reached.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests_integ/test_max_tokens_reached.py b/tests_integ/test_max_tokens_reached.py
index 519cf62c2..1b822dcba 100644
--- a/tests_integ/test_max_tokens_reached.py
+++ b/tests_integ/test_max_tokens_reached.py
@@ -11,7 +11,7 @@ def story_tool(story: str) -> str:
 
 
 def test_context_window_overflow():
-    model = BedrockModel(max_tokens=1)
+    model = BedrockModel(max_tokens=100)
     agent = Agent(model=model, tools=[story_tool])
 
     with pytest.raises(MaxTokensReachedException):

From e04c73d85d86dde5d9e415ae2ef693aa9a55da56 Mon Sep 17 00:00:00 2001
From: Dean Schmigelski <dbschmigelski+github@gmail.com>
Date: Thu, 31 Jul 2025 14:53:11 -0400
Subject: [PATCH 5/6] Update tests_integ/test_max_tokens_reached.py

Co-authored-by: Nick Clegg <nac542@gmail.com>
---
 tests_integ/test_max_tokens_reached.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests_integ/test_max_tokens_reached.py b/tests_integ/test_max_tokens_reached.py
index 1b822dcba..5f7e5584c 100644
--- a/tests_integ/test_max_tokens_reached.py
+++ b/tests_integ/test_max_tokens_reached.py
@@ -16,3 +16,5 @@ def test_context_window_overflow():
 
     with pytest.raises(MaxTokensReachedException):
         agent("Tell me a story!")
+    
+    assert len(agent.messages) == 1

From cca2f86a3f7a1d22cfa8cf59ffa0029943a0efa7 Mon Sep 17 00:00:00 2001
From: Dean Schmigelski <dbschmigelski+github@gmail.com>
Date: Thu, 31 Jul 2025 14:57:19 -0400
Subject: [PATCH 6/6] linting

---
 tests_integ/test_max_tokens_reached.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests_integ/test_max_tokens_reached.py b/tests_integ/test_max_tokens_reached.py
index 5f7e5584c..d9c2817b3 100644
--- a/tests_integ/test_max_tokens_reached.py
+++ b/tests_integ/test_max_tokens_reached.py
@@ -16,5 +16,5 @@ def test_context_window_overflow():
 
     with pytest.raises(MaxTokensReachedException):
         agent("Tell me a story!")
-    
+
     assert len(agent.messages) == 1