From 312533ac9db9bd4b0f859b999c7920d3368ca2b2 Mon Sep 17 00:00:00 2001 From: Murat Kaan Meral Date: Wed, 24 Sep 2025 11:17:39 +0200 Subject: [PATCH 1/3] fix(openai): Improve error handling for OpenAI model provider --- src/strands/models/openai.py | 47 +++++++-- tests/strands/models/test_openai.py | 151 ++++++++++++++++++++++++++++ 2 files changed, 192 insertions(+), 6 deletions(-) diff --git a/src/strands/models/openai.py b/src/strands/models/openai.py index 7af81be84..fc2e9c778 100644 --- a/src/strands/models/openai.py +++ b/src/strands/models/openai.py @@ -15,6 +15,7 @@ from typing_extensions import Unpack, override from ..types.content import ContentBlock, Messages +from ..types.exceptions import ContextWindowOverflowException, ModelThrottledException from ..types.streaming import StreamEvent from ..types.tools import ToolChoice, ToolResult, ToolSpec, ToolUse from ._validation import validate_config_keys @@ -372,6 +373,10 @@ async def stream( Yields: Formatted message chunks from the model. + + Raises: + ContextWindowOverflowException: If the input exceeds the model's context window. + ModelThrottledException: If the request is throttled by OpenAI (rate limits). """ logger.debug("formatting request") request = self.format_request(messages, tool_specs, system_prompt, tool_choice) @@ -383,7 +388,20 @@ async def stream( # client. The asyncio event loop does not allow connections to be shared. For more details, please refer to # https://github.com/encode/httpx/discussions/2959. async with openai.AsyncOpenAI(**self.client_args) as client: - response = await client.chat.completions.create(**request) + try: + response = await client.chat.completions.create(**request) + except openai.BadRequestError as e: + # Check if this is a context length exceeded error + if hasattr(e, "code") and e.code == "context_length_exceeded": + logger.warning("OpenAI threw context window overflow error") + raise ContextWindowOverflowException(str(e)) from e + # Re-raise other BadRequestError exceptions + raise + except openai.RateLimitError as e: + # All rate limit errors should be treated as throttling, not context overflow + # Rate limits (including TPM) require waiting/retrying, not context reduction + logger.warning("OpenAI threw rate limit error") + raise ModelThrottledException(str(e)) from e logger.debug("got response from model") yield self.format_chunk({"chunk_type": "message_start"}) @@ -452,16 +470,33 @@ async def structured_output( Yields: Model events with the last being the structured output. + + Raises: + ContextWindowOverflowException: If the input exceeds the model's context window. + ModelThrottledException: If the request is throttled by OpenAI (rate limits). """ # We initialize an OpenAI context on every request so as to avoid connection sharing in the underlying httpx # client. The asyncio event loop does not allow connections to be shared. For more details, please refer to # https://github.com/encode/httpx/discussions/2959. async with openai.AsyncOpenAI(**self.client_args) as client: - response: ParsedChatCompletion = await client.beta.chat.completions.parse( - model=self.get_config()["model_id"], - messages=self.format_request(prompt, system_prompt=system_prompt)["messages"], - response_format=output_model, - ) + try: + response: ParsedChatCompletion = await client.beta.chat.completions.parse( + model=self.get_config()["model_id"], + messages=self.format_request(prompt, system_prompt=system_prompt)["messages"], + response_format=output_model, + ) + except openai.BadRequestError as e: + # Check if this is a context length exceeded error + if hasattr(e, "code") and e.code == "context_length_exceeded": + logger.warning("OpenAI threw context window overflow error") + raise ContextWindowOverflowException(str(e)) from e + # Re-raise other BadRequestError exceptions + raise + except openai.RateLimitError as e: + # All rate limit errors should be treated as throttling, not context overflow + # Rate limits (including TPM) require waiting/retrying, not context reduction + logger.warning("OpenAI threw rate limit error") + raise ModelThrottledException(str(e)) from e parsed: T | None = None # Find the first choice with tool_calls diff --git a/tests/strands/models/test_openai.py b/tests/strands/models/test_openai.py index 5979ec628..668542c4f 100644 --- a/tests/strands/models/test_openai.py +++ b/tests/strands/models/test_openai.py @@ -1,10 +1,12 @@ import unittest.mock +import openai import pydantic import pytest import strands from strands.models.openai import OpenAIModel +from strands.types.exceptions import ContextWindowOverflowException @pytest.fixture @@ -752,3 +754,152 @@ def test_tool_choice_none_no_warning(model, messages, captured_warnings): model.format_request(messages, tool_choice=None) assert len(captured_warnings) == 0 + + +@pytest.mark.asyncio +async def test_stream_context_overflow_exception(openai_client, model, messages): + """Test that OpenAI context overflow errors are properly converted to ContextWindowOverflowException.""" + # Create a mock OpenAI BadRequestError with context_length_exceeded code + mock_error = openai.BadRequestError( + message="This model's maximum context length is 4096 tokens. However, your messages resulted in 5000 tokens.", + response=unittest.mock.MagicMock(), + body={"error": {"code": "context_length_exceeded"}}, + ) + mock_error.code = "context_length_exceeded" + + # Configure the mock client to raise the context overflow error + openai_client.chat.completions.create.side_effect = mock_error + + # Test that the stream method converts the error properly + with pytest.raises(ContextWindowOverflowException) as exc_info: + async for _ in model.stream(messages): + pass + + # Verify the exception message contains the original error + assert "maximum context length" in str(exc_info.value) + assert exc_info.value.__cause__ == mock_error + + +@pytest.mark.asyncio +async def test_stream_other_bad_request_errors_passthrough(openai_client, model, messages): + """Test that other BadRequestError exceptions are not converted to ContextWindowOverflowException.""" + # Create a mock OpenAI BadRequestError with a different error code + mock_error = openai.BadRequestError( + message="Invalid parameter value", + response=unittest.mock.MagicMock(), + body={"error": {"code": "invalid_parameter"}}, + ) + mock_error.code = "invalid_parameter" + + # Configure the mock client to raise the non-context error + openai_client.chat.completions.create.side_effect = mock_error + + # Test that other BadRequestError exceptions pass through unchanged + with pytest.raises(openai.BadRequestError) as exc_info: + async for _ in model.stream(messages): + pass + + # Verify the original exception is raised, not ContextWindowOverflowException + assert exc_info.value == mock_error + + +@pytest.mark.asyncio +async def test_structured_output_context_overflow_exception(openai_client, model, messages, test_output_model_cls): + """Test that structured output also handles context overflow properly.""" + # Create a mock OpenAI BadRequestError with context_length_exceeded code + mock_error = openai.BadRequestError( + message="This model's maximum context length is 4096 tokens. However, your messages resulted in 5000 tokens.", + response=unittest.mock.MagicMock(), + body={"error": {"code": "context_length_exceeded"}}, + ) + mock_error.code = "context_length_exceeded" + + # Configure the mock client to raise the context overflow error + openai_client.beta.chat.completions.parse.side_effect = mock_error + + # Test that the structured_output method converts the error properly + with pytest.raises(ContextWindowOverflowException) as exc_info: + async for _ in model.structured_output(test_output_model_cls, messages): + pass + + # Verify the exception message contains the original error + assert "maximum context length" in str(exc_info.value) + assert exc_info.value.__cause__ == mock_error + + +@pytest.mark.asyncio +async def test_stream_rate_limit_as_throttle(openai_client, model, messages): + """Test that all rate limit errors are converted to ModelThrottledException.""" + from strands.types.exceptions import ModelThrottledException + + # Create a mock OpenAI RateLimitError (any type of rate limit) + mock_error = openai.RateLimitError( + message="Request too large for gpt-4o on tokens per min (TPM): Limit 30000, Requested 117505.", + response=unittest.mock.MagicMock(), + body={"error": {"code": "rate_limit_exceeded"}}, + ) + mock_error.code = "rate_limit_exceeded" + + # Configure the mock client to raise the rate limit error + openai_client.chat.completions.create.side_effect = mock_error + + # Test that the stream method converts the error properly + with pytest.raises(ModelThrottledException) as exc_info: + async for _ in model.stream(messages): + pass + + # Verify the exception message contains the original error + assert "tokens per min" in str(exc_info.value) + assert exc_info.value.__cause__ == mock_error + + +@pytest.mark.asyncio +async def test_stream_request_rate_limit_as_throttle(openai_client, model, messages): + """Test that request-based rate limit errors are converted to ModelThrottledException.""" + from strands.types.exceptions import ModelThrottledException + + # Create a mock OpenAI RateLimitError for request-based rate limiting + mock_error = openai.RateLimitError( + message="Rate limit reached for requests per minute.", + response=unittest.mock.MagicMock(), + body={"error": {"code": "rate_limit_exceeded"}}, + ) + mock_error.code = "rate_limit_exceeded" + + # Configure the mock client to raise the request rate limit error + openai_client.chat.completions.create.side_effect = mock_error + + # Test that the stream method converts the error properly + with pytest.raises(ModelThrottledException) as exc_info: + async for _ in model.stream(messages): + pass + + # Verify the exception message contains the original error + assert "Rate limit reached" in str(exc_info.value) + assert exc_info.value.__cause__ == mock_error + + +@pytest.mark.asyncio +async def test_structured_output_rate_limit_as_throttle(openai_client, model, messages, test_output_model_cls): + """Test that structured output handles rate limit errors properly.""" + from strands.types.exceptions import ModelThrottledException + + # Create a mock OpenAI RateLimitError + mock_error = openai.RateLimitError( + message="Request too large for gpt-4o on tokens per min (TPM): Limit 30000, Requested 117505.", + response=unittest.mock.MagicMock(), + body={"error": {"code": "rate_limit_exceeded"}}, + ) + mock_error.code = "rate_limit_exceeded" + + # Configure the mock client to raise the rate limit error + openai_client.beta.chat.completions.parse.side_effect = mock_error + + # Test that the structured_output method converts the error properly + with pytest.raises(ModelThrottledException) as exc_info: + async for _ in model.structured_output(test_output_model_cls, messages): + pass + + # Verify the exception message contains the original error + assert "tokens per min" in str(exc_info.value) + assert exc_info.value.__cause__ == mock_error From 35f2eb213ad04547e4be142e0c01f46bf7a1059e Mon Sep 17 00:00:00 2001 From: Murat Kaan Meral Date: Wed, 24 Sep 2025 13:35:18 +0200 Subject: [PATCH 2/3] move imports to top --- tests/strands/models/test_openai.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/tests/strands/models/test_openai.py b/tests/strands/models/test_openai.py index 668542c4f..f8c8568fe 100644 --- a/tests/strands/models/test_openai.py +++ b/tests/strands/models/test_openai.py @@ -6,7 +6,7 @@ import strands from strands.models.openai import OpenAIModel -from strands.types.exceptions import ContextWindowOverflowException +from strands.types.exceptions import ContextWindowOverflowException, ModelThrottledException @pytest.fixture @@ -830,7 +830,6 @@ async def test_structured_output_context_overflow_exception(openai_client, model @pytest.mark.asyncio async def test_stream_rate_limit_as_throttle(openai_client, model, messages): """Test that all rate limit errors are converted to ModelThrottledException.""" - from strands.types.exceptions import ModelThrottledException # Create a mock OpenAI RateLimitError (any type of rate limit) mock_error = openai.RateLimitError( @@ -856,7 +855,6 @@ async def test_stream_rate_limit_as_throttle(openai_client, model, messages): @pytest.mark.asyncio async def test_stream_request_rate_limit_as_throttle(openai_client, model, messages): """Test that request-based rate limit errors are converted to ModelThrottledException.""" - from strands.types.exceptions import ModelThrottledException # Create a mock OpenAI RateLimitError for request-based rate limiting mock_error = openai.RateLimitError( @@ -882,7 +880,6 @@ async def test_stream_request_rate_limit_as_throttle(openai_client, model, messa @pytest.mark.asyncio async def test_structured_output_rate_limit_as_throttle(openai_client, model, messages, test_output_model_cls): """Test that structured output handles rate limit errors properly.""" - from strands.types.exceptions import ModelThrottledException # Create a mock OpenAI RateLimitError mock_error = openai.RateLimitError( From 27955a3660c541db96fdc70989c8437749641a5e Mon Sep 17 00:00:00 2001 From: Murat Kaan Meral Date: Thu, 25 Sep 2025 11:58:47 +0200 Subject: [PATCH 3/3] fix(openai): Add integ tests for error handling --- tests_integ/models/test_model_openai.py | 54 +++++++++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/tests_integ/models/test_model_openai.py b/tests_integ/models/test_model_openai.py index 7054b222a..115a0819d 100644 --- a/tests_integ/models/test_model_openai.py +++ b/tests_integ/models/test_model_openai.py @@ -1,4 +1,5 @@ import os +import unittest.mock import pydantic import pytest @@ -6,6 +7,7 @@ import strands from strands import Agent, tool from strands.models.openai import OpenAIModel +from strands.types.exceptions import ContextWindowOverflowException, ModelThrottledException from tests_integ.models import providers # these tests only run if we have the openai api key @@ -167,3 +169,55 @@ def tool_with_image_return(): # 'user', but this message with role 'tool' contains an image URL." # See https://github.com/strands-agents/sdk-python/issues/320 for additional details agent("Run the the tool and analyze the image") + + +def test_context_window_overflow_integration(): + """Integration test for context window overflow with OpenAI. + + This test verifies that when a request exceeds the model's context window, + the OpenAI model properly raises a ContextWindowOverflowException. + """ + # Use gpt-4o-mini which has a smaller context window to make this test more reliable + mini_model = OpenAIModel( + model_id="gpt-4o-mini-2024-07-18", + client_args={ + "api_key": os.getenv("OPENAI_API_KEY"), + }, + ) + + agent = Agent(model=mini_model) + + # Create a very long text that should exceed context window + # This text is designed to be long enough to exceed context but not hit token rate limits + long_text = ( + "This text is longer than context window, but short enough to not get caught in token rate limit. " * 6800 + ) + + # This should raise ContextWindowOverflowException which gets handled by conversation manager + # The agent should attempt to reduce context and retry + with pytest.raises(ContextWindowOverflowException): + agent(long_text) + + +def test_rate_limit_throttling_integration_no_retries(model): + """Integration test for rate limit handling with retries disabled. + + This test verifies that when a request exceeds OpenAI's rate limits, + the model properly raises a ModelThrottledException. We disable retries + to avoid waiting for the exponential backoff during testing. + """ + # Patch the event loop constants to disable retries for this test + with unittest.mock.patch("strands.event_loop.event_loop.MAX_ATTEMPTS", 1): + agent = Agent(model=model) + + # Create a message that's very long to trigger token-per-minute rate limits + # This should be large enough to exceed TPM limits immediately + very_long_text = "Really long text " * 20000 + + # This should raise ModelThrottledException without retries + with pytest.raises(ModelThrottledException) as exc_info: + agent(very_long_text) + + # Verify it's a rate limit error + error_message = str(exc_info.value).lower() + assert "rate limit" in error_message or "tokens per min" in error_message