feat(anthropic) Update span attributes to use gen_ai.* namespace instead of ai.* (#4674)

constantinius · antonpirker · web-flow · commit 378fe812127e · 2025-08-08T13:03:03.000+02:00
Update `AnthropicIntegration` to support Otel and Sentry AI Agents module compatible span attributes of `gen_ai.*` family. Closes https://linear.app/getsentry/issue/TET-996/improve-integration-for-anthropic-sdk --------- Co-authored-by: Anton Pirker <anton.pirker@sentry.io>
diff --git a/sentry_sdk/ai/utils.py b/sentry_sdk/ai/utils.py
@@ -7,8 +7,8 @@
 from sentry_sdk.utils import logger
 
 
-def _normalize_data(data):
-    # type: (Any) -> Any
+def _normalize_data(data, unpack=True):
+    # type: (Any, bool) -> Any
 
     # convert pydantic data (e.g. OpenAI v1+) to json compatible format
     if hasattr(data, "model_dump"):
@@ -18,18 +18,18 @@ def _normalize_data(data):
             logger.warning("Could not convert pydantic data to JSON: %s", e)
             return data
     if isinstance(data, list):
-        if len(data) == 1:
-            return _normalize_data(data[0])  # remove empty dimensions
-        return list(_normalize_data(x) for x in data)
+        if unpack and len(data) == 1:
+            return _normalize_data(data[0], unpack=unpack)  # remove empty dimensions
+        return list(_normalize_data(x, unpack=unpack) for x in data)
     if isinstance(data, dict):
-        return {k: _normalize_data(v) for (k, v) in data.items()}
+        return {k: _normalize_data(v, unpack=unpack) for (k, v) in data.items()}
 
     return data
 
 
-def set_data_normalized(span, key, value):
-    # type: (Span, str, Any) -> None
-    normalized = _normalize_data(value)
+def set_data_normalized(span, key, value, unpack=True):
+    # type: (Span, str, Any, bool) -> None
+    normalized = _normalize_data(value, unpack=unpack)
     if isinstance(normalized, (int, float, bool, str)):
         span.set_data(key, normalized)
     else:
diff --git a/sentry_sdk/integrations/anthropic.py b/sentry_sdk/integrations/anthropic.py
@@ -1,18 +1,26 @@
 from functools import wraps
+import json
 from typing import TYPE_CHECKING
 
 import sentry_sdk
 from sentry_sdk.ai.monitoring import record_token_usage
+from sentry_sdk.ai.utils import set_data_normalized
 from sentry_sdk.consts import OP, SPANDATA
 from sentry_sdk.integrations import _check_minimum_version, DidNotEnable, Integration
 from sentry_sdk.scope import should_send_default_pii
 from sentry_sdk.utils import (
     capture_internal_exceptions,
     event_from_exception,
     package_version,
+    safe_serialize,
 )
 
 try:
+    try:
+        from anthropic import NOT_GIVEN
+    except ImportError:
+        NOT_GIVEN = None
+
     from anthropic.resources import AsyncMessages, Messages
 
     if TYPE_CHECKING:
@@ -53,8 +61,11 @@ def _capture_exception(exc):
     sentry_sdk.capture_event(event, hint=hint)
 
 
-def _calculate_token_usage(result, span):
-    # type: (Messages, Span) -> None
+def _get_token_usage(result):
+    # type: (Messages) -> tuple[int, int]
+    """
+    Get token usage from the Anthropic response.
+    """
     input_tokens = 0
     output_tokens = 0
     if hasattr(result, "usage"):
@@ -64,44 +75,21 @@ def _calculate_token_usage(result, span):
         if hasattr(usage, "output_tokens") and isinstance(usage.output_tokens, int):
             output_tokens = usage.output_tokens
 
-    total_tokens = input_tokens + output_tokens
+    return input_tokens, output_tokens
 
-    record_token_usage(
-        span,
-        input_tokens=input_tokens,
-        output_tokens=output_tokens,
-        total_tokens=total_tokens,
-    )
 
-
-def _get_responses(content):
-    # type: (list[Any]) -> list[dict[str, Any]]
+def _collect_ai_data(event, model, input_tokens, output_tokens, content_blocks):
+    # type: (MessageStreamEvent, str | None, int, int, list[str]) -> tuple[str | None, int, int, list[str]]
     """
-    Get JSON of a Anthropic responses.
-    """
-    responses = []
-    for item in content:
-        if hasattr(item, "text"):
-            responses.append(
-                {
-                    "type": item.type,
-                    "text": item.text,
-                }
-            )
-    return responses
-
-
-def _collect_ai_data(event, input_tokens, output_tokens, content_blocks):
-    # type: (MessageStreamEvent, int, int, list[str]) -> tuple[int, int, list[str]]
-    """
-    Count token usage and collect content blocks from the AI streaming response.
+    Collect model information, token usage, and collect content blocks from the AI streaming response.
     """
     with capture_internal_exceptions():
         if hasattr(event, "type"):
             if event.type == "message_start":
                 usage = event.message.usage
                 input_tokens += usage.input_tokens
                 output_tokens += usage.output_tokens
+                model = event.message.model or model
             elif event.type == "content_block_start":
                 pass
             elif event.type == "content_block_delta":
@@ -114,31 +102,80 @@ def _collect_ai_data(event, input_tokens, output_tokens, content_blocks):
             elif event.type == "message_delta":
                 output_tokens += event.usage.output_tokens
 
-    return input_tokens, output_tokens, content_blocks
+    return model, input_tokens, output_tokens, content_blocks
 
 
-def _add_ai_data_to_span(
-    span, integration, input_tokens, output_tokens, content_blocks
-):
-    # type: (Span, AnthropicIntegration, int, int, list[str]) -> None
+def _set_input_data(span, kwargs, integration):
+    # type: (Span, dict[str, Any], AnthropicIntegration) -> None
     """
-    Add token usage and content blocks from the AI streaming response to the span.
+    Set input data for the span based on the provided keyword arguments for the anthropic message creation.
     """
-    with capture_internal_exceptions():
-        if should_send_default_pii() and integration.include_prompts:
-            complete_message = "".join(content_blocks)
-            span.set_data(
-                SPANDATA.AI_RESPONSES,
-                [{"type": "text", "text": complete_message}],
-            )
-        total_tokens = input_tokens + output_tokens
-        record_token_usage(
+    messages = kwargs.get("messages")
+    if (
+        messages is not None
+        and len(messages) > 0
+        and should_send_default_pii()
+        and integration.include_prompts
+    ):
+        set_data_normalized(
+            span, SPANDATA.GEN_AI_REQUEST_MESSAGES, safe_serialize(messages)
+        )
+
+    set_data_normalized(
+        span, SPANDATA.GEN_AI_RESPONSE_STREAMING, kwargs.get("stream", False)
+    )
+
+    kwargs_keys_to_attributes = {
+        "max_tokens": SPANDATA.GEN_AI_REQUEST_MAX_TOKENS,
+        "model": SPANDATA.GEN_AI_REQUEST_MODEL,
+        "temperature": SPANDATA.GEN_AI_REQUEST_TEMPERATURE,
+        "top_k": SPANDATA.GEN_AI_REQUEST_TOP_K,
+        "top_p": SPANDATA.GEN_AI_REQUEST_TOP_P,
+    }
+    for key, attribute in kwargs_keys_to_attributes.items():
+        value = kwargs.get(key)
+        if value is not NOT_GIVEN and value is not None:
+            set_data_normalized(span, attribute, value)
+
+    # Input attributes: Tools
+    tools = kwargs.get("tools")
+    if tools is not NOT_GIVEN and tools is not None and len(tools) > 0:
+        set_data_normalized(
+            span, SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS, safe_serialize(tools)
+        )
+
+
+def _set_output_data(
+    span,
+    integration,
+    model,
+    input_tokens,
+    output_tokens,
+    content_blocks,
+    finish_span=False,
+):
+    # type: (Span, AnthropicIntegration, str | None, int | None, int | None, list[Any], bool) -> None
+    """
+    Set output data for the span based on the AI response."""
+    span.set_data(SPANDATA.GEN_AI_RESPONSE_MODEL, model)
+    if should_send_default_pii() and integration.include_prompts:
+        set_data_normalized(
             span,
-            input_tokens=input_tokens,
-            output_tokens=output_tokens,
-            total_tokens=total_tokens,
+            SPANDATA.GEN_AI_RESPONSE_TEXT,
+            json.dumps(content_blocks),
+            unpack=False,
         )
-        span.set_data(SPANDATA.AI_STREAMING, True)
+
+    record_token_usage(
+        span,
+        input_tokens=input_tokens,
+        output_tokens=output_tokens,
+    )
+
+    # TODO: GEN_AI_RESPONSE_TOOL_CALLS ?
+
+    if finish_span:
+        span.__exit__(None, None, None)
 
 
 def _sentry_patched_create_common(f, *args, **kwargs):
@@ -155,69 +192,95 @@ def _sentry_patched_create_common(f, *args, **kwargs):
     except TypeError:
         return f(*args, **kwargs)
 
+    model = kwargs.get("model", "")
+
     span = sentry_sdk.start_span(
-        op=OP.ANTHROPIC_MESSAGES_CREATE,
-        description="Anthropic messages create",
+        op=OP.GEN_AI_CHAT,
+        name=f"chat {model}".strip(),
         origin=AnthropicIntegration.origin,
     )
     span.__enter__()
 
-    result = yield f, args, kwargs
+    _set_input_data(span, kwargs, integration)
 
-    # add data to span and finish it
-    messages = list(kwargs["messages"])
-    model = kwargs.get("model")
+    result = yield f, args, kwargs
 
     with capture_internal_exceptions():
-        span.set_data(SPANDATA.AI_MODEL_ID, model)
-        span.set_data(SPANDATA.AI_STREAMING, False)
-
-        if should_send_default_pii() and integration.include_prompts:
-            span.set_data(SPANDATA.AI_INPUT_MESSAGES, messages)
-
         if hasattr(result, "content"):
-            if should_send_default_pii() and integration.include_prompts:
-                span.set_data(SPANDATA.AI_RESPONSES, _get_responses(result.content))
-            _calculate_token_usage(result, span)
-            span.__exit__(None, None, None)
+            input_tokens, output_tokens = _get_token_usage(result)
+
+            content_blocks = []
+            for content_block in result.content:
+                if hasattr(content_block, "to_dict"):
+                    content_blocks.append(content_block.to_dict())
+                elif hasattr(content_block, "model_dump"):
+                    content_blocks.append(content_block.model_dump())
+                elif hasattr(content_block, "text"):
+                    content_blocks.append({"type": "text", "text": content_block.text})
+
+            _set_output_data(
+                span=span,
+                integration=integration,
+                model=getattr(result, "model", None),
+                input_tokens=input_tokens,
+                output_tokens=output_tokens,
+                content_blocks=content_blocks,
+                finish_span=True,
+            )
 
         # Streaming response
         elif hasattr(result, "_iterator"):
             old_iterator = result._iterator
 
             def new_iterator():
                 # type: () -> Iterator[MessageStreamEvent]
+                model = None
                 input_tokens = 0
                 output_tokens = 0
                 content_blocks = []  # type: list[str]
 
                 for event in old_iterator:
-                    input_tokens, output_tokens, content_blocks = _collect_ai_data(
-                        event, input_tokens, output_tokens, content_blocks
+                    model, input_tokens, output_tokens, content_blocks = (
+                        _collect_ai_data(
+                            event, model, input_tokens, output_tokens, content_blocks
+                        )
                     )
                     yield event
 
-                _add_ai_data_to_span(
-                    span, integration, input_tokens, output_tokens, content_blocks
+                _set_output_data(
+                    span=span,
+                    integration=integration,
+                    model=model,
+                    input_tokens=input_tokens,
+                    output_tokens=output_tokens,
+                    content_blocks=[{"text": "".join(content_blocks), "type": "text"}],
+                    finish_span=True,
                 )
-                span.__exit__(None, None, None)
 
             async def new_iterator_async():
                 # type: () -> AsyncIterator[MessageStreamEvent]
+                model = None
                 input_tokens = 0
                 output_tokens = 0
                 content_blocks = []  # type: list[str]
 
                 async for event in old_iterator:
-                    input_tokens, output_tokens, content_blocks = _collect_ai_data(
-                        event, input_tokens, output_tokens, content_blocks
+                    model, input_tokens, output_tokens, content_blocks = (
+                        _collect_ai_data(
+                            event, model, input_tokens, output_tokens, content_blocks
+                        )
                     )
                     yield event
 
-                _add_ai_data_to_span(
-                    span, integration, input_tokens, output_tokens, content_blocks
+                _set_output_data(
+                    span=span,
+                    integration=integration,
+                    model=model,
+                    input_tokens=input_tokens,
+                    output_tokens=output_tokens,
+                    content_blocks=[{"text": "".join(content_blocks), "type": "text"}],
+                    finish_span=True,
                 )
-                span.__exit__(None, None, None)
 
             if str(type(result._iterator)) == "<class 'async_generator'>":
                 result._iterator = new_iterator_async()
diff --git a/sentry_sdk/integrations/starlite.py b/sentry_sdk/integrations/starlite.py
@@ -17,7 +17,7 @@
     from starlite.plugins.base import get_plugin_for_value  # type: ignore
     from starlite.routes.http import HTTPRoute  # type: ignore
     from starlite.utils import ConnectionDataExtractor, is_async_callable, Ref  # type: ignore
-    from pydantic import BaseModel
+    from pydantic import BaseModel  # type: ignore
 except ImportError:
     raise DidNotEnable("Starlite is not installed")
 
diff --git a/tests/integrations/anthropic/test_anthropic.py b/tests/integrations/anthropic/test_anthropic.py