diff --git a/src/strands/event_loop/event_loop.py b/src/strands/event_loop/event_loop.py index bb45358a0..e837d30f2 100644 --- a/src/strands/event_loop/event_loop.py +++ b/src/strands/event_loop/event_loop.py @@ -104,7 +104,7 @@ def event_loop_cycle( # Create tracer span for this event loop cycle tracer = get_tracer() cycle_span = tracer.start_event_loop_cycle_span( - event_loop_kwargs=kwargs, parent_span=event_loop_parent_span, messages=messages + event_loop_kwargs=kwargs, messages=messages, parent_span=event_loop_parent_span ) kwargs["event_loop_cycle_span"] = cycle_span @@ -126,8 +126,8 @@ def event_loop_cycle( for attempt in range(MAX_ATTEMPTS): model_id = model.config.get("model_id") if hasattr(model, "config") else None model_invoke_span = tracer.start_model_invoke_span( - parent_span=cycle_span, messages=messages, + parent_span=cycle_span, model_id=model_id, ) @@ -142,7 +142,7 @@ def event_loop_cycle( kwargs.setdefault("request_state", {}) if model_invoke_span: - tracer.end_model_invoke_span(model_invoke_span, message, usage) + tracer.end_model_invoke_span(model_invoke_span, message, usage, stop_reason) break # Success! Break out of retry loop except ContextWindowOverflowException as e: diff --git a/src/strands/telemetry/tracer.py b/src/strands/telemetry/tracer.py index b17960fbc..15d7751aa 100644 --- a/src/strands/telemetry/tracer.py +++ b/src/strands/telemetry/tracer.py @@ -14,7 +14,7 @@ from ..agent.agent_result import AgentResult from ..types.content import Message, Messages -from ..types.streaming import Usage +from ..types.streaming import StopReason, Usage from ..types.tools import ToolResult, ToolUse from ..types.traces import AttributeValue @@ -196,20 +196,31 @@ def end_span_with_error(self, span: Span, error_message: str, exception: Optiona error = exception or Exception(error_message) self._end_span(span, error=error) + def _add_event(self, span: Optional[Span], event_name: str, event_attributes: Dict[str, AttributeValue]) -> None: + """Add an event with attributes to a span. + + Args: + span: The span to add the event to + event_name: Name of the event + event_attributes: Dictionary of attributes to set on the event + """ + if not span: + return + + span.add_event(event_name, attributes=event_attributes) + def start_model_invoke_span( self, + messages: Messages, parent_span: Optional[Span] = None, - agent_name: str = "Strands Agent", - messages: Optional[Messages] = None, model_id: Optional[str] = None, **kwargs: Any, ) -> Optional[Span]: """Start a new span for a model invocation. Args: + messages: Messages being sent to the model. parent_span: Optional parent span to link this span to. - agent_name: Name of the agent making the model call. - messages: Optional messages being sent to the model. model_id: Optional identifier for the model being invoked. **kwargs: Additional attributes to add to the span. @@ -219,8 +230,6 @@ def start_model_invoke_span( attributes: Dict[str, AttributeValue] = { "gen_ai.system": "strands-agents", "gen_ai.operation.name": "chat", - "gen_ai.agent.name": agent_name, - "gen_ai.prompt": serialize(messages), } if model_id: @@ -229,10 +238,17 @@ def start_model_invoke_span( # Add additional kwargs as attributes attributes.update({k: v for k, v in kwargs.items() if isinstance(v, (str, int, float, bool))}) - return self._start_span("Model invoke", parent_span, attributes, span_kind=trace_api.SpanKind.CLIENT) + span = self._start_span("Model invoke", parent_span, attributes, span_kind=trace_api.SpanKind.CLIENT) + for message in messages: + self._add_event( + span, + f"gen_ai.{message['role']}.message", + {"content": serialize(message["content"])}, + ) + return span def end_model_invoke_span( - self, span: Span, message: Message, usage: Usage, error: Optional[Exception] = None + self, span: Span, message: Message, usage: Usage, stop_reason: StopReason, error: Optional[Exception] = None ) -> None: """End a model invocation span with results and metrics. @@ -240,10 +256,10 @@ def end_model_invoke_span( span: The span to end. message: The message response from the model. usage: Token usage information from the model call. + stop_reason (StopReason): The reason the model stopped generating. error: Optional exception if the model call failed. """ attributes: Dict[str, AttributeValue] = { - "gen_ai.completion": serialize(message["content"]), "gen_ai.usage.prompt_tokens": usage["inputTokens"], "gen_ai.usage.input_tokens": usage["inputTokens"], "gen_ai.usage.completion_tokens": usage["outputTokens"], @@ -251,6 +267,12 @@ def end_model_invoke_span( "gen_ai.usage.total_tokens": usage["totalTokens"], } + self._add_event( + span, + "gen_ai.choice", + event_attributes={"finish_reason": str(stop_reason), "message": serialize(message["content"])}, + ) + self._end_span(span, attributes, error) def start_tool_call_span(self, tool: ToolUse, parent_span: Optional[Span] = None, **kwargs: Any) -> Optional[Span]: @@ -265,18 +287,29 @@ def start_tool_call_span(self, tool: ToolUse, parent_span: Optional[Span] = None The created span, or None if tracing is not enabled. """ attributes: Dict[str, AttributeValue] = { - "gen_ai.prompt": serialize(tool), + "gen_ai.operation.name": "execute_tool", "gen_ai.system": "strands-agents", - "tool.name": tool["name"], - "tool.id": tool["toolUseId"], - "tool.parameters": serialize(tool["input"]), + "gen_ai.tool.name": tool["name"], + "gen_ai.tool.call.id": tool["toolUseId"], } # Add additional kwargs as attributes attributes.update(kwargs) span_name = f"Tool: {tool['name']}" - return self._start_span(span_name, parent_span, attributes, span_kind=trace_api.SpanKind.INTERNAL) + span = self._start_span(span_name, parent_span, attributes, span_kind=trace_api.SpanKind.INTERNAL) + + self._add_event( + span, + "gen_ai.tool.message", + event_attributes={ + "role": "tool", + "content": serialize(tool["input"]), + "id": tool["toolUseId"], + }, + ) + + return span def end_tool_call_span( self, span: Span, tool_result: Optional[ToolResult], error: Optional[Exception] = None @@ -293,22 +326,28 @@ def end_tool_call_span( status = tool_result.get("status") status_str = str(status) if status is not None else "" - tool_result_content_json = serialize(tool_result.get("content")) attributes.update( { - "tool.result": tool_result_content_json, - "gen_ai.completion": tool_result_content_json, "tool.status": status_str, } ) + self._add_event( + span, + "gen_ai.choice", + event_attributes={ + "message": serialize(tool_result.get("content")), + "id": tool_result.get("toolUseId", ""), + }, + ) + self._end_span(span, attributes, error) def start_event_loop_cycle_span( self, event_loop_kwargs: Any, + messages: Messages, parent_span: Optional[Span] = None, - messages: Optional[Messages] = None, **kwargs: Any, ) -> Optional[Span]: """Start a new span for an event loop cycle. @@ -316,7 +355,7 @@ def start_event_loop_cycle_span( Args: event_loop_kwargs: Arguments for the event loop cycle. parent_span: Optional parent span to link this span to. - messages: Optional messages being processed in this cycle. + messages: Messages being processed in this cycle. **kwargs: Additional attributes to add to the span. Returns: @@ -326,7 +365,6 @@ def start_event_loop_cycle_span( parent_span = parent_span if parent_span else event_loop_kwargs.get("event_loop_parent_span") attributes: Dict[str, AttributeValue] = { - "gen_ai.prompt": serialize(messages), "event_loop.cycle_id": event_loop_cycle_id, } @@ -337,7 +375,15 @@ def start_event_loop_cycle_span( attributes.update({k: v for k, v in kwargs.items() if isinstance(v, (str, int, float, bool))}) span_name = f"Cycle {event_loop_cycle_id}" - return self._start_span(span_name, parent_span, attributes, span_kind=trace_api.SpanKind.INTERNAL) + span = self._start_span(span_name, parent_span, attributes) + for message in messages or []: + self._add_event( + span, + f"gen_ai.{message['role']}.message", + {"content": serialize(message["content"])}, + ) + + return span def end_event_loop_cycle_span( self, @@ -354,13 +400,12 @@ def end_event_loop_cycle_span( tool_result_message: Optional tool result message if a tool was called. error: Optional exception if the cycle failed. """ - attributes: Dict[str, AttributeValue] = { - "gen_ai.completion": serialize(message["content"]), - } + attributes: Dict[str, AttributeValue] = {} + event_attributes: Dict[str, AttributeValue] = {"message": serialize(message["content"])} if tool_result_message: - attributes["tool.result"] = serialize(tool_result_message["content"]) - + event_attributes["tool.result"] = serialize(tool_result_message["content"]) + self._add_event(span, "gen_ai.choice", event_attributes=event_attributes) self._end_span(span, attributes, error) def start_agent_span( @@ -387,9 +432,8 @@ def start_agent_span( """ attributes: Dict[str, AttributeValue] = { "gen_ai.system": "strands-agents", - "agent.name": agent_name, "gen_ai.agent.name": agent_name, - "gen_ai.prompt": prompt, + "gen_ai.operation.name": "invoke_agent", } if model_id: @@ -397,7 +441,6 @@ def start_agent_span( if tools: tools_json = serialize(tools) - attributes["agent.tools"] = tools_json attributes["gen_ai.agent.tools"] = tools_json # Add custom trace attributes if provided @@ -407,7 +450,18 @@ def start_agent_span( # Add additional kwargs as attributes attributes.update({k: v for k, v in kwargs.items() if isinstance(v, (str, int, float, bool))}) - return self._start_span(agent_name, attributes=attributes, span_kind=trace_api.SpanKind.CLIENT) + span = self._start_span( + f"invoke_agent {agent_name}", attributes=attributes, span_kind=trace_api.SpanKind.CLIENT + ) + self._add_event( + span, + "gen_ai.user.message", + event_attributes={ + "content": prompt, + }, + ) + + return span def end_agent_span( self, @@ -426,10 +480,10 @@ def end_agent_span( attributes: Dict[str, AttributeValue] = {} if response: - attributes.update( - { - "gen_ai.completion": str(response), - } + self._add_event( + span, + "gen_ai.choice", + event_attributes={"message": str(response), "finish_reason": str(response.stop_reason)}, ) if hasattr(response, "metrics") and hasattr(response.metrics, "accumulated_usage"): diff --git a/tests/strands/telemetry/test_tracer.py b/tests/strands/telemetry/test_tracer.py index 63ffda0d5..61ad2494e 100644 --- a/tests/strands/telemetry/test_tracer.py +++ b/tests/strands/telemetry/test_tracer.py @@ -10,7 +10,7 @@ ) from strands.telemetry.tracer import JSONEncoder, Tracer, get_tracer, serialize -from strands.types.streaming import Usage +from strands.types.streaming import StopReason, Usage @pytest.fixture(autouse=True) @@ -148,15 +148,17 @@ def test_start_model_invoke_span(mock_tracer): messages = [{"role": "user", "content": [{"text": "Hello"}]}] model_id = "test-model" - span = tracer.start_model_invoke_span(agent_name="TestAgent", messages=messages, model_id=model_id) + span = tracer.start_model_invoke_span(messages=messages, agent_name="TestAgent", model_id=model_id) mock_tracer.start_span.assert_called_once() assert mock_tracer.start_span.call_args[1]["name"] == "Model invoke" assert mock_tracer.start_span.call_args[1]["kind"] == SpanKind.CLIENT mock_span.set_attribute.assert_any_call("gen_ai.system", "strands-agents") mock_span.set_attribute.assert_any_call("gen_ai.operation.name", "chat") - mock_span.set_attribute.assert_any_call("gen_ai.agent.name", "TestAgent") mock_span.set_attribute.assert_any_call("gen_ai.request.model", model_id) + mock_span.add_event.assert_called_with( + "gen_ai.user.message", attributes={"content": json.dumps(messages[0]["content"])} + ) assert span is not None @@ -165,15 +167,19 @@ def test_end_model_invoke_span(mock_span): tracer = Tracer() message = {"role": "assistant", "content": [{"text": "Response"}]} usage = Usage(inputTokens=10, outputTokens=20, totalTokens=30) + stop_reason: StopReason = "end_turn" - tracer.end_model_invoke_span(mock_span, message, usage) + tracer.end_model_invoke_span(mock_span, message, usage, stop_reason) - mock_span.set_attribute.assert_any_call("gen_ai.completion", json.dumps(message["content"])) mock_span.set_attribute.assert_any_call("gen_ai.usage.prompt_tokens", 10) mock_span.set_attribute.assert_any_call("gen_ai.usage.input_tokens", 10) mock_span.set_attribute.assert_any_call("gen_ai.usage.completion_tokens", 20) mock_span.set_attribute.assert_any_call("gen_ai.usage.output_tokens", 20) mock_span.set_attribute.assert_any_call("gen_ai.usage.total_tokens", 30) + mock_span.add_event.assert_called_with( + "gen_ai.choice", + attributes={"message": json.dumps(message["content"]), "finish_reason": "end_turn"}, + ) mock_span.set_status.assert_called_once_with(StatusCode.OK) mock_span.end.assert_called_once() @@ -193,12 +199,13 @@ def test_start_tool_call_span(mock_tracer): mock_tracer.start_span.assert_called_once() assert mock_tracer.start_span.call_args[1]["name"] == "Tool: test-tool" - mock_span.set_attribute.assert_any_call( - "gen_ai.prompt", json.dumps({"name": "test-tool", "toolUseId": "123", "input": {"param": "value"}}) + mock_span.set_attribute.assert_any_call("gen_ai.tool.name", "test-tool") + mock_span.set_attribute.assert_any_call("gen_ai.system", "strands-agents") + mock_span.set_attribute.assert_any_call("gen_ai.operation.name", "execute_tool") + mock_span.set_attribute.assert_any_call("gen_ai.tool.call.id", "123") + mock_span.add_event.assert_any_call( + "gen_ai.tool.message", attributes={"role": "tool", "content": json.dumps({"param": "value"}), "id": "123"} ) - mock_span.set_attribute.assert_any_call("tool.name", "test-tool") - mock_span.set_attribute.assert_any_call("tool.id", "123") - mock_span.set_attribute.assert_any_call("tool.parameters", json.dumps({"param": "value"})) assert span is not None @@ -209,9 +216,11 @@ def test_end_tool_call_span(mock_span): tracer.end_tool_call_span(mock_span, tool_result) - mock_span.set_attribute.assert_any_call("tool.result", json.dumps(tool_result.get("content"))) - mock_span.set_attribute.assert_any_call("gen_ai.completion", json.dumps(tool_result.get("content"))) mock_span.set_attribute.assert_any_call("tool.status", "success") + mock_span.add_event.assert_called_with( + "gen_ai.choice", + attributes={"message": json.dumps(tool_result.get("content")), "id": ""}, + ) mock_span.set_status.assert_called_once_with(StatusCode.OK) mock_span.end.assert_called_once() @@ -232,8 +241,10 @@ def test_start_event_loop_cycle_span(mock_tracer): mock_tracer.start_span.assert_called_once() assert mock_tracer.start_span.call_args[1]["name"] == "Cycle cycle-123" - mock_span.set_attribute.assert_any_call("gen_ai.prompt", json.dumps(messages)) mock_span.set_attribute.assert_any_call("event_loop.cycle_id", "cycle-123") + mock_span.add_event.assert_any_call( + "gen_ai.user.message", attributes={"content": json.dumps([{"text": "Hello"}])} + ) assert span is not None @@ -245,8 +256,13 @@ def test_end_event_loop_cycle_span(mock_span): tracer.end_event_loop_cycle_span(mock_span, message, tool_result_message) - mock_span.set_attribute.assert_any_call("gen_ai.completion", json.dumps(message["content"])) - mock_span.set_attribute.assert_any_call("tool.result", json.dumps(tool_result_message["content"])) + mock_span.add_event.assert_called_with( + "gen_ai.choice", + attributes={ + "message": json.dumps(message["content"]), + "tool.result": json.dumps(tool_result_message["content"]), + }, + ) mock_span.set_status.assert_called_once_with(StatusCode.OK) mock_span.end.assert_called_once() @@ -274,12 +290,12 @@ def test_start_agent_span(mock_tracer): ) mock_tracer.start_span.assert_called_once() - assert mock_tracer.start_span.call_args[1]["name"] == "WeatherAgent" + assert mock_tracer.start_span.call_args[1]["name"] == "invoke_agent WeatherAgent" mock_span.set_attribute.assert_any_call("gen_ai.system", "strands-agents") - mock_span.set_attribute.assert_any_call("agent.name", "WeatherAgent") - mock_span.set_attribute.assert_any_call("gen_ai.prompt", prompt) + mock_span.set_attribute.assert_any_call("gen_ai.agent.name", "WeatherAgent") mock_span.set_attribute.assert_any_call("gen_ai.request.model", model_id) mock_span.set_attribute.assert_any_call("custom_attr", "value") + mock_span.add_event.assert_any_call("gen_ai.user.message", attributes={"content": prompt}) assert span is not None @@ -293,16 +309,20 @@ def test_end_agent_span(mock_span): mock_response = mock.MagicMock() mock_response.metrics = mock_metrics + mock_response.stop_reason = "end_turn" mock_response.__str__ = mock.MagicMock(return_value="Agent response") tracer.end_agent_span(mock_span, mock_response) - mock_span.set_attribute.assert_any_call("gen_ai.completion", "Agent response") mock_span.set_attribute.assert_any_call("gen_ai.usage.prompt_tokens", 50) mock_span.set_attribute.assert_any_call("gen_ai.usage.input_tokens", 50) mock_span.set_attribute.assert_any_call("gen_ai.usage.completion_tokens", 100) mock_span.set_attribute.assert_any_call("gen_ai.usage.output_tokens", 100) mock_span.set_attribute.assert_any_call("gen_ai.usage.total_tokens", 150) + mock_span.add_event.assert_any_call( + "gen_ai.choice", + attributes={"message": "Agent response", "finish_reason": "end_turn"}, + ) mock_span.set_status.assert_called_once_with(StatusCode.OK) mock_span.end.assert_called_once() @@ -401,7 +421,9 @@ def test_start_model_invoke_span_with_parent(mock_tracer): parent_span = mock.MagicMock() mock_tracer.start_span.return_value = mock_span - span = tracer.start_model_invoke_span(parent_span=parent_span, agent_name="TestAgent", model_id="test-model") + span = tracer.start_model_invoke_span( + messages=[], parent_span=parent_span, agent_name="TestAgent", model_id="test-model" + ) # Verify trace.set_span_in_context was called with parent span mock_tracer.start_span.assert_called_once()