diff --git a/examples/realtime/twilio_sip/README.md b/examples/realtime/twilio_sip/README.md index a96e5d379..d74a5960b 100644 --- a/examples/realtime/twilio_sip/README.md +++ b/examples/realtime/twilio_sip/README.md @@ -25,7 +25,7 @@ This example shows how to handle OpenAI Realtime SIP calls with the Agents SDK. 1. Install dependencies: ```bash - uv pip install -r examples/realtime/twilio-sip/requirements.txt + uv pip install -r examples/realtime/twilio_sip/requirements.txt ``` 2. Export required environment variables: ```bash diff --git a/src/agents/realtime/openai_realtime.py b/src/agents/realtime/openai_realtime.py index 3aad14c8a..2df33cb64 100644 --- a/src/agents/realtime/openai_realtime.py +++ b/src/agents/realtime/openai_realtime.py @@ -198,6 +198,7 @@ def __init__(self) -> None: self._playback_tracker: RealtimePlaybackTracker | None = None self._created_session: OpenAISessionCreateRequest | None = None self._server_event_type_adapter = get_server_event_type_adapter() + self._call_id: str | None = None async def connect(self, options: RealtimeModelConfig) -> None: """Establish a connection to the model and keep it alive.""" @@ -220,6 +221,7 @@ async def connect(self, options: RealtimeModelConfig) -> None: if model_name: self.model = model_name + self._call_id = call_id api_key = await get_api_key(options.get("api_key")) if "tracing" in model_settings: @@ -833,10 +835,13 @@ def _get_session_config( speed = model_settings.get("speed") modalities = model_settings.get("modalities", DEFAULT_MODEL_SETTINGS.get("modalities")) - input_audio_format = model_settings.get( - "input_audio_format", - DEFAULT_MODEL_SETTINGS.get("input_audio_format"), - ) + if self._call_id: + input_audio_format = model_settings.get("input_audio_format") + else: + input_audio_format = model_settings.get( + "input_audio_format", + DEFAULT_MODEL_SETTINGS.get("input_audio_format"), + ) input_audio_transcription = model_settings.get( "input_audio_transcription", DEFAULT_MODEL_SETTINGS.get("input_audio_transcription"), @@ -845,10 +850,13 @@ def _get_session_config( "turn_detection", DEFAULT_MODEL_SETTINGS.get("turn_detection"), ) - output_audio_format = model_settings.get( - "output_audio_format", - DEFAULT_MODEL_SETTINGS.get("output_audio_format"), - ) + if self._call_id: + output_audio_format = model_settings.get("output_audio_format") + else: + output_audio_format = model_settings.get( + "output_audio_format", + DEFAULT_MODEL_SETTINGS.get("output_audio_format"), + ) input_audio_noise_reduction = model_settings.get( "input_audio_noise_reduction", DEFAULT_MODEL_SETTINGS.get("input_audio_noise_reduction"), diff --git a/tests/realtime/test_openai_realtime.py b/tests/realtime/test_openai_realtime.py index 08c45e5d7..f8eb725ff 100644 --- a/tests/realtime/test_openai_realtime.py +++ b/tests/realtime/test_openai_realtime.py @@ -606,6 +606,29 @@ def test_get_and_update_session_config(self, model): assert cfg.audio is not None and cfg.audio.output is not None assert cfg.audio.output.voice == "verse" + def test_session_config_defaults_audio_formats_when_not_call(self, model): + settings: dict[str, Any] = {} + cfg = model._get_session_config(settings) + assert cfg.audio is not None + assert cfg.audio.input is not None + assert cfg.audio.input.format is not None + assert cfg.audio.input.format.type == "audio/pcm" + assert cfg.audio.output is not None + assert cfg.audio.output.format is not None + assert cfg.audio.output.format.type == "audio/pcm" + + def test_session_config_preserves_sip_audio_formats(self, model): + model._call_id = "call-123" + settings = { + "turn_detection": {"type": "semantic_vad", "interrupt_response": True}, + } + cfg = model._get_session_config(settings) + assert cfg.audio is not None + assert cfg.audio.input is not None + assert cfg.audio.input.format is None + assert cfg.audio.output is not None + assert cfg.audio.output.format is None + @pytest.mark.asyncio async def test_handle_error_event_success(self, model): """Test successful handling of error events."""