Skip to content

Commit ce97c1d

Browse files
authored
Merge pull request #3 from mehtarac/realtime_mp
(feat)bidirectional_streaming: add openai realtime model provider
2 parents 6d81109 + 9cd3aca commit ce97c1d

File tree

10 files changed

+862
-17
lines changed

10 files changed

+862
-17
lines changed

pyproject.toml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,12 +54,24 @@ sagemaker = [
5454
"boto3-stubs[sagemaker-runtime]>=1.26.0,<2.0.0",
5555
"openai>=1.68.0,<2.0.0", # SageMaker uses OpenAI-compatible interface
5656
]
57+
bidirectional-streaming-nova = [
58+
"pyaudio>=0.2.13",
59+
"rx>=3.2.0",
60+
"smithy-aws-core>=0.0.1",
61+
"pytz",
62+
"aws_sdk_bedrock_runtime",
63+
]
64+
bidirectional-streaming-openai = [
65+
"pyaudio>=0.2.13",
66+
"websockets>=12.0,<14.0",
67+
]
5768
bidirectional-streaming = [
5869
"pyaudio>=0.2.13",
5970
"rx>=3.2.0",
6071
"smithy-aws-core>=0.0.1",
6172
"pytz",
6273
"aws_sdk_bedrock_runtime",
74+
"websockets>=12.0,<14.0",
6375
]
6476
otel = ["opentelemetry-exporter-otlp-proto-http>=1.30.0,<2.0.0"]
6577
docs = [

src/strands/experimental/bidirectional_streaming/__init__.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
# Model providers - What users need to create models
1010
from .models.novasonic import NovaSonicBidirectionalModel
11+
from .models.openai import OpenAIRealtimeBidirectionalModel
1112

1213
# Event types - For type hints and event handling
1314
from .types.bidirectional_streaming import (
@@ -17,20 +18,26 @@
1718
InterruptionDetectedEvent,
1819
TextOutputEvent,
1920
UsageMetricsEvent,
21+
VoiceActivityEvent,
2022
)
2123

2224
__all__ = [
2325
# Main interface
2426
"BidirectionalAgent",
27+
2528
# Model providers
2629
"NovaSonicBidirectionalModel",
30+
"OpenAIRealtimeBidirectionalModel",
31+
2732
# Event types
2833
"AudioInputEvent",
29-
"AudioOutputEvent",
34+
"AudioOutputEvent",
3035
"TextOutputEvent",
3136
"InterruptionDetectedEvent",
3237
"BidirectionalStreamEvent",
38+
"VoiceActivityEvent",
3339
"UsageMetricsEvent",
40+
3441
# Model interface
3542
"BidirectionalModel",
3643
"BidirectionalModelSession",

src/strands/experimental/bidirectional_streaming/agent/agent.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,6 @@
3333
from ..models.bidirectional_model import BidirectionalModel
3434
from ..types.bidirectional_streaming import AudioInputEvent, BidirectionalStreamEvent
3535

36-
3736
logger = logging.getLogger(__name__)
3837

3938
_DEFAULT_AGENT_NAME = "Strands Agents"

src/strands/experimental/bidirectional_streaming/event_loop/bidirectional_event_loop.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@
2323
from ....types.tools import ToolResult, ToolUse
2424
from ..models.bidirectional_model import BidirectionalModelSession
2525

26-
2726
logger = logging.getLogger(__name__)
2827

2928
# Session constants

src/strands/experimental/bidirectional_streaming/models/__init__.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,13 @@
22

33
from .bidirectional_model import BidirectionalModel, BidirectionalModelSession
44
from .novasonic import NovaSonicBidirectionalModel, NovaSonicSession
5+
from .openai import OpenAIRealtimeBidirectionalModel, OpenAIRealtimeSession
56

67
__all__ = [
7-
"BidirectionalModel",
8-
"BidirectionalModelSession",
9-
"NovaSonicBidirectionalModel",
8+
"BidirectionalModel",
9+
"BidirectionalModelSession",
10+
"NovaSonicBidirectionalModel",
1011
"NovaSonicSession",
12+
"OpenAIRealtimeBidirectionalModel",
13+
"OpenAIRealtimeSession"
1114
]

src/strands/experimental/bidirectional_streaming/models/novasonic.py

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323

2424
from aws_sdk_bedrock_runtime.client import BedrockRuntimeClient, InvokeModelWithBidirectionalStreamOperationInput
2525
from aws_sdk_bedrock_runtime.config import Config, HTTPAuthSchemeResolver, SigV4AuthScheme
26-
from aws_sdk_bedrock_runtime.models import BidirectionalInputPayloadPart, InvokeModelWithBidirectionalStreamInputChunk
26+
from aws_sdk_bedrock_runtime.models import BidirectionalInputPayloadPart, InvokeModelWithBidirectionalStreamInputChunk, InvokeModelWithBidirectionalStreamOperationOutput
2727
from smithy_aws_core.identity.environment import EnvironmentCredentialsResolver
2828

2929
from ....types.content import Messages
@@ -35,9 +35,8 @@
3535
BidirectionalConnectionStartEvent,
3636
InterruptionDetectedEvent,
3737
TextOutputEvent,
38-
UsageMetricsEvent
38+
UsageMetricsEvent,
3939
)
40-
4140
from .bidirectional_model import BidirectionalModel, BidirectionalModelSession
4241

4342
logger = logging.getLogger(__name__)
@@ -81,11 +80,11 @@ class NovaSonicSession(BidirectionalModelSession):
8180
interface.
8281
"""
8382

84-
def __init__(self, stream: any, config: dict[str, any]) -> None:
83+
def __init__(self, stream: InvokeModelWithBidirectionalStreamOperationOutput, config: dict[str, any]) -> None:
8584
"""Initialize Nova Sonic connection.
8685
8786
Args:
88-
stream: Nova Sonic bidirectional stream.
87+
stream: Nova Sonic bidirectional stream operation output from AWS SDK.
8988
config: Model configuration.
9089
"""
9190
self.stream = stream
@@ -487,14 +486,14 @@ def _convert_nova_event(self, nova_event: dict[str, any]) -> dict[str, any] | No
487486

488487
return {"interruptionDetected": interruption}
489488

490-
# Handle usage events (ignore)
489+
# Handle usage events - convert to standardized format
491490
elif "usageEvent" in nova_event:
492491
usage_data = nova_event["usageEvent"]
493492
usage_metrics: UsageMetricsEvent = {
494-
"totalTokens": usage_data.get("totalTokens"),
495-
"inputTokens": usage_data.get("totalInputTokens"),
496-
"outputTokens": usage_data.get("totalOutputTokens"),
497-
"audioTokens": usage_data.get("details", {}).get("total", {}).get("output", {}).get("speechTokens"),
493+
"totalTokens": usage_data.get("totalTokens", 0),
494+
"inputTokens": usage_data.get("totalInputTokens", 0),
495+
"outputTokens": usage_data.get("totalOutputTokens", 0),
496+
"audioTokens": usage_data.get("details", {}).get("total", {}).get("output", {}).get("speechTokens", 0)
498497
}
499498
return {"usageMetrics": usage_metrics}
500499

0 commit comments

Comments
 (0)