From 84468d2284c7e9dd731a63dc7a2a75d2fb63bc7a Mon Sep 17 00:00:00 2001 From: Vamil Gandhi Date: Mon, 15 Sep 2025 18:31:12 +0000 Subject: [PATCH] fix: correctly label tool result messages in OpenTelemetry events - Label messages containing tool results as 'gen_ai.tool.message' regardless of role - Add OpenTelemetry semantic conventions v1.36.0 reference - Note that GenAI namespace is experimental - Add documentation link to semantic conventions - Convert tests to data-driven parametrized format for better maintainability - Improve test coverage with comprehensive edge cases including mixed content and multiple tool results --- src/strands/telemetry/tracer.py | 30 +++- tests/strands/telemetry/test_tracer.py | 201 +++++++++++++++++++++++++ 2 files changed, 228 insertions(+), 3 deletions(-) diff --git a/src/strands/telemetry/tracer.py b/src/strands/telemetry/tracer.py index 9e170571a..d1862b859 100644 --- a/src/strands/telemetry/tracer.py +++ b/src/strands/telemetry/tracer.py @@ -207,6 +207,30 @@ def _add_event(self, span: Optional[Span], event_name: str, event_attributes: Di span.add_event(event_name, attributes=event_attributes) + def _get_event_name_for_message(self, message: Message) -> str: + """Determine the appropriate OpenTelemetry event name for a message. + + According to OpenTelemetry semantic conventions v1.36.0, messages containing tool results + should be labeled as 'gen_ai.tool.message' regardless of their role field. + This ensures proper categorization of tool responses in traces. + + Note: The GenAI namespace is experimental and may change in future versions. + + Reference: https://github.com/open-telemetry/semantic-conventions/blob/v1.36.0/docs/gen-ai/gen-ai-events.md#event-gen_aitoolmessage + + Args: + message: The message to determine the event name for + + Returns: + The OpenTelemetry event name (e.g., 'gen_ai.user.message', 'gen_ai.tool.message') + """ + # Check if the message contains a tool result + for content_block in message.get("content", []): + if "toolResult" in content_block: + return "gen_ai.tool.message" + + return f"gen_ai.{message['role']}.message" + def start_model_invoke_span( self, messages: Messages, @@ -240,7 +264,7 @@ def start_model_invoke_span( for message in messages: self._add_event( span, - f"gen_ai.{message['role']}.message", + self._get_event_name_for_message(message), {"content": serialize(message["content"])}, ) return span @@ -379,7 +403,7 @@ def start_event_loop_cycle_span( for message in messages or []: self._add_event( span, - f"gen_ai.{message['role']}.message", + self._get_event_name_for_message(message), {"content": serialize(message["content"])}, ) @@ -456,7 +480,7 @@ def start_agent_span( for message in messages: self._add_event( span, - f"gen_ai.{message['role']}.message", + self._get_event_name_for_message(message), {"content": serialize(message["content"])}, ) diff --git a/tests/strands/telemetry/test_tracer.py b/tests/strands/telemetry/test_tracer.py index 568fff130..8c4f9ae20 100644 --- a/tests/strands/telemetry/test_tracer.py +++ b/tests/strands/telemetry/test_tracer.py @@ -746,3 +746,204 @@ def test_serialize_vs_json_dumps(): custom_result = serialize({"text": japanese_text}) assert japanese_text in custom_result assert "\\u" not in custom_result + + +@pytest.mark.parametrize( + "message, expected_event_name, description", + [ + # Regular role-based messages + ( + {"role": "user", "content": [{"text": "Hello"}]}, + "gen_ai.user.message", + "regular user message", + ), + ( + {"role": "assistant", "content": [{"text": "Hello"}]}, + "gen_ai.assistant.message", + "regular assistant message", + ), + ( + {"role": "system", "content": [{"text": "You are a helpful assistant"}]}, + "gen_ai.system.message", + "regular system message", + ), + # Messages with tool results should always be labeled as tool messages + ( + { + "role": "user", + "content": [ + { + "toolResult": { + "toolUseId": "123", + "status": "success", + "content": [{"text": "Tool response"}], + } + } + ], + }, + "gen_ai.tool.message", + "user message containing tool result", + ), + ( + { + "role": "assistant", + "content": [ + { + "toolResult": { + "toolUseId": "123", + "status": "success", + "content": [{"text": "Tool response"}], + } + } + ], + }, + "gen_ai.tool.message", + "assistant message containing tool result", + ), + # Mixed content with tool results + ( + { + "role": "user", + "content": [ + {"text": "Here are the results:"}, + { + "toolResult": { + "toolUseId": "123", + "status": "success", + "content": [{"text": "Tool response"}], + } + }, + ], + }, + "gen_ai.tool.message", + "message with both text and tool result", + ), + # Multiple tool results + ( + { + "role": "user", + "content": [ + { + "toolResult": { + "toolUseId": "123", + "status": "success", + "content": [{"text": "First tool"}], + } + }, + { + "toolResult": { + "toolUseId": "456", + "status": "success", + "content": [{"text": "Second tool"}], + } + }, + ], + }, + "gen_ai.tool.message", + "message with multiple tool results", + ), + # Edge cases + ( + {"role": "user", "content": []}, + "gen_ai.user.message", + "message with empty content", + ), + ( + {"role": "assistant"}, + "gen_ai.assistant.message", + "message with no content key", + ), + ], +) +def test_get_event_name_for_message(message, expected_event_name, description): + """Test getting event name for various message types using data-driven approach.""" + tracer = Tracer() + + event_name = tracer._get_event_name_for_message(message) + + assert event_name == expected_event_name, f"Failed for {description}" + + +def test_start_model_invoke_span_with_tool_result_message(mock_tracer): + """Test that start_model_invoke_span correctly labels tool result messages.""" + with mock.patch("strands.telemetry.tracer.trace_api.get_tracer", return_value=mock_tracer): + tracer = Tracer() + tracer.tracer = mock_tracer + + mock_span = mock.MagicMock() + mock_tracer.start_span.return_value = mock_span + + # Message that contains a tool result + messages = [ + { + "role": "user", + "content": [ + {"toolResult": {"toolUseId": "123", "status": "success", "content": [{"text": "Weather is sunny"}]}} + ], + } + ] + + span = tracer.start_model_invoke_span(messages=messages, model_id="test-model") + + # Should use gen_ai.tool.message event name instead of gen_ai.user.message + mock_span.add_event.assert_called_with( + "gen_ai.tool.message", attributes={"content": json.dumps(messages[0]["content"])} + ) + assert span is not None + + +def test_start_agent_span_with_tool_result_message(mock_tracer): + """Test that start_agent_span correctly labels tool result messages.""" + with mock.patch("strands.telemetry.tracer.trace_api.get_tracer", return_value=mock_tracer): + tracer = Tracer() + tracer.tracer = mock_tracer + + mock_span = mock.MagicMock() + mock_tracer.start_span.return_value = mock_span + + # Message that contains a tool result + messages = [ + { + "role": "user", + "content": [ + {"toolResult": {"toolUseId": "123", "status": "success", "content": [{"text": "Weather is sunny"}]}} + ], + } + ] + + span = tracer.start_agent_span(messages=messages, agent_name="WeatherAgent", model_id="test-model") + + # Should use gen_ai.tool.message event name instead of gen_ai.user.message + mock_span.add_event.assert_called_with( + "gen_ai.tool.message", attributes={"content": json.dumps(messages[0]["content"])} + ) + assert span is not None + + +def test_start_event_loop_cycle_span_with_tool_result_message(mock_tracer): + """Test that start_event_loop_cycle_span correctly labels tool result messages.""" + with mock.patch("strands.telemetry.tracer.trace_api.get_tracer", return_value=mock_tracer): + tracer = Tracer() + tracer.tracer = mock_tracer + + mock_span = mock.MagicMock() + mock_tracer.start_span.return_value = mock_span + + # Message that contains a tool result + messages = [ + { + "role": "user", + "content": [ + {"toolResult": {"toolUseId": "123", "status": "success", "content": [{"text": "Weather is sunny"}]}} + ], + } + ] + + event_loop_kwargs = {"event_loop_cycle_id": "cycle-123"} + span = tracer.start_event_loop_cycle_span(event_loop_kwargs, messages=messages) + + # Should use gen_ai.tool.message event name instead of gen_ai.user.message + mock_span.add_event.assert_called_with( + "gen_ai.tool.message", attributes={"content": json.dumps(messages[0]["content"])} + ) + assert span is not None