From 48e2a6ff8a32c9b1910552eba719e918e62a75a5 Mon Sep 17 00:00:00 2001 From: Stephen Toub Date: Wed, 24 Sep 2025 16:58:48 -0400 Subject: [PATCH] Add OpenTelemetrySpeechToTextClient and friends This is basically the chat client OpenTelemetry client copy/pasted/tweaked to compile. The otel spec doesn't have anything specific to this modality yet, so this is making best guesses on what things should be and also being minimal in what's tracked. --- ...gingSpeechToTextClientBuilderExtensions.cs | 2 +- .../OpenTelemetrySpeechToTextClient.cs | 367 ++++++++++++++++++ ...etrySpeechToTextClientBuilderExtensions.cs | 42 ++ .../SpeechToText/SpeechToTextClientBuilder.cs | 4 +- .../OpenTelemetrySpeechToTextClientTests.cs | 150 +++++++ 5 files changed, 562 insertions(+), 3 deletions(-) create mode 100644 src/Libraries/Microsoft.Extensions.AI/SpeechToText/OpenTelemetrySpeechToTextClient.cs create mode 100644 src/Libraries/Microsoft.Extensions.AI/SpeechToText/OpenTelemetrySpeechToTextClientBuilderExtensions.cs create mode 100644 test/Libraries/Microsoft.Extensions.AI.Tests/SpeechToText/OpenTelemetrySpeechToTextClientTests.cs diff --git a/src/Libraries/Microsoft.Extensions.AI/SpeechToText/LoggingSpeechToTextClientBuilderExtensions.cs b/src/Libraries/Microsoft.Extensions.AI/SpeechToText/LoggingSpeechToTextClientBuilderExtensions.cs index 92a67189982..54ed411bd35 100644 --- a/src/Libraries/Microsoft.Extensions.AI/SpeechToText/LoggingSpeechToTextClientBuilderExtensions.cs +++ b/src/Libraries/Microsoft.Extensions.AI/SpeechToText/LoggingSpeechToTextClientBuilderExtensions.cs @@ -14,7 +14,7 @@ namespace Microsoft.Extensions.AI; [Experimental("MEAI001")] public static class LoggingSpeechToTextClientBuilderExtensions { - /// Adds logging to the audio transcription client pipeline. + /// Adds logging to the speech-to-text client pipeline. /// The . /// /// An optional used to create a logger with which logging should be performed. diff --git a/src/Libraries/Microsoft.Extensions.AI/SpeechToText/OpenTelemetrySpeechToTextClient.cs b/src/Libraries/Microsoft.Extensions.AI/SpeechToText/OpenTelemetrySpeechToTextClient.cs new file mode 100644 index 00000000000..40461ebe457 --- /dev/null +++ b/src/Libraries/Microsoft.Extensions.AI/SpeechToText/OpenTelemetrySpeechToTextClient.cs @@ -0,0 +1,367 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.Diagnostics.CodeAnalysis; +using System.Diagnostics.Metrics; +using System.IO; +using System.Runtime.CompilerServices; +using System.Threading; +using System.Threading.Tasks; +using Microsoft.Extensions.Logging; +using Microsoft.Shared.Diagnostics; + +#pragma warning disable S3358 // Ternary operators should not be nested +#pragma warning disable SA1111 // Closing parenthesis should be on line of last parameter +#pragma warning disable SA1113 // Comma should be on the same line as previous parameter + +namespace Microsoft.Extensions.AI; + +/// Represents a delegating speech-to-text client that implements the OpenTelemetry Semantic Conventions for Generative AI systems. +/// +/// This class provides an implementation of the Semantic Conventions for Generative AI systems v1.37, defined at . +/// The specification is still experimental and subject to change; as such, the telemetry output by this client is also subject to change. +/// +[Experimental("MEAI001")] +public sealed class OpenTelemetrySpeechToTextClient : DelegatingSpeechToTextClient +{ + private readonly ActivitySource _activitySource; + private readonly Meter _meter; + + private readonly Histogram _tokenUsageHistogram; + private readonly Histogram _operationDurationHistogram; + + private readonly string? _defaultModelId; + private readonly string? _providerName; + private readonly string? _serverAddress; + private readonly int _serverPort; + + /// Initializes a new instance of the class. + /// The underlying . + /// The to use for emitting any logging data from the client. + /// An optional source name that will be used on the telemetry data. +#pragma warning disable IDE0060 // Remove unused parameter; it exists for consistency with IChatClient and future use + public OpenTelemetrySpeechToTextClient(ISpeechToTextClient innerClient, ILogger? logger = null, string? sourceName = null) +#pragma warning restore IDE0060 + : base(innerClient) + { + Debug.Assert(innerClient is not null, "Should have been validated by the base ctor"); + + if (innerClient!.GetService() is SpeechToTextClientMetadata metadata) + { + _defaultModelId = metadata.DefaultModelId; + _providerName = metadata.ProviderName; + _serverAddress = metadata.ProviderUri?.Host; + _serverPort = metadata.ProviderUri?.Port ?? 0; + } + + string name = string.IsNullOrEmpty(sourceName) ? OpenTelemetryConsts.DefaultSourceName : sourceName!; + _activitySource = new(name); + _meter = new(name); + + _tokenUsageHistogram = _meter.CreateHistogram( + OpenTelemetryConsts.GenAI.Client.TokenUsage.Name, + OpenTelemetryConsts.TokensUnit, + OpenTelemetryConsts.GenAI.Client.TokenUsage.Description +#if NET9_0_OR_GREATER + , advice: new() { HistogramBucketBoundaries = OpenTelemetryConsts.GenAI.Client.TokenUsage.ExplicitBucketBoundaries } +#endif + ); + + _operationDurationHistogram = _meter.CreateHistogram( + OpenTelemetryConsts.GenAI.Client.OperationDuration.Name, + OpenTelemetryConsts.SecondsUnit, + OpenTelemetryConsts.GenAI.Client.OperationDuration.Description +#if NET9_0_OR_GREATER + , advice: new() { HistogramBucketBoundaries = OpenTelemetryConsts.GenAI.Client.OperationDuration.ExplicitBucketBoundaries } +#endif + ); + } + + /// + protected override void Dispose(bool disposing) + { + if (disposing) + { + _activitySource.Dispose(); + _meter.Dispose(); + } + + base.Dispose(disposing); + } + + /// + /// Gets or sets a value indicating whether potentially sensitive information should be included in telemetry. + /// + /// + /// if potentially sensitive information should be included in telemetry; + /// if telemetry shouldn't include raw inputs and outputs. + /// The default value is , unless the OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT + /// environment variable is set to "true" (case-insensitive). + /// + /// + /// By default, telemetry includes metadata, such as token counts, but not raw inputs + /// and outputs, such as message content, function call arguments, and function call results. + /// The default value can be overridden by setting the OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT + /// environment variable to "true". Explicitly setting this property will override the environment variable. + /// + public bool EnableSensitiveData { get; set; } = TelemetryHelpers.EnableSensitiveDataDefault; + + /// + public override object? GetService(Type serviceType, object? serviceKey = null) => + serviceType == typeof(ActivitySource) ? _activitySource : + base.GetService(serviceType, serviceKey); + + /// + public override async Task GetTextAsync(Stream audioSpeechStream, SpeechToTextOptions? options = null, CancellationToken cancellationToken = default) + { + _ = Throw.IfNull(audioSpeechStream); + + using Activity? activity = CreateAndConfigureActivity(options); + Stopwatch? stopwatch = _operationDurationHistogram.Enabled ? Stopwatch.StartNew() : null; + string? requestModelId = options?.ModelId ?? _defaultModelId; + + SpeechToTextResponse? response = null; + Exception? error = null; + try + { + response = await base.GetTextAsync(audioSpeechStream, options, cancellationToken); + return response; + } + catch (Exception ex) + { + error = ex; + throw; + } + finally + { + TraceResponse(activity, requestModelId, response, error, stopwatch); + } + } + + /// + public override async IAsyncEnumerable GetStreamingTextAsync( + Stream audioSpeechStream, SpeechToTextOptions? options = null, [EnumeratorCancellation] CancellationToken cancellationToken = default) + { + _ = Throw.IfNull(audioSpeechStream); + + using Activity? activity = CreateAndConfigureActivity(options); + Stopwatch? stopwatch = _operationDurationHistogram.Enabled ? Stopwatch.StartNew() : null; + string? requestModelId = options?.ModelId ?? _defaultModelId; + + IAsyncEnumerable updates; + try + { + updates = base.GetStreamingTextAsync(audioSpeechStream, options, cancellationToken); + } + catch (Exception ex) + { + TraceResponse(activity, requestModelId, response: null, ex, stopwatch); + throw; + } + + var responseEnumerator = updates.GetAsyncEnumerator(cancellationToken); + List trackedUpdates = []; + Exception? error = null; + try + { + while (true) + { + SpeechToTextResponseUpdate update; + try + { + if (!await responseEnumerator.MoveNextAsync()) + { + break; + } + + update = responseEnumerator.Current; + } + catch (Exception ex) + { + error = ex; + throw; + } + + trackedUpdates.Add(update); + yield return update; + Activity.Current = activity; // workaround for https://github.com/dotnet/runtime/issues/47802 + } + } + finally + { + TraceResponse(activity, requestModelId, trackedUpdates.ToSpeechToTextResponse(), error, stopwatch); + + await responseEnumerator.DisposeAsync(); + } + } + + /// Creates an activity for a speech-to-text request, or returns if not enabled. + private Activity? CreateAndConfigureActivity(SpeechToTextOptions? options) + { + Activity? activity = null; + if (_activitySource.HasListeners()) + { + string? modelId = options?.ModelId ?? _defaultModelId; + + activity = _activitySource.StartActivity( + string.IsNullOrWhiteSpace(modelId) ? OpenTelemetryConsts.GenAI.GenerateContentName : $"{OpenTelemetryConsts.GenAI.GenerateContentName} {modelId}", + ActivityKind.Client); + + if (activity is { IsAllDataRequested: true }) + { + _ = activity + .AddTag(OpenTelemetryConsts.GenAI.Operation.Name, OpenTelemetryConsts.GenAI.GenerateContentName) + .AddTag(OpenTelemetryConsts.GenAI.Request.Model, modelId) + .AddTag(OpenTelemetryConsts.GenAI.Provider.Name, _providerName) + .AddTag(OpenTelemetryConsts.GenAI.Output.Type, OpenTelemetryConsts.TypeText); + + if (_serverAddress is not null) + { + _ = activity + .AddTag(OpenTelemetryConsts.Server.Address, _serverAddress) + .AddTag(OpenTelemetryConsts.Server.Port, _serverPort); + } + + if (options is not null) + { + if (EnableSensitiveData) + { + // Log all additional request options as raw values on the span. + // Since AdditionalProperties has undefined meaning, we treat it as potentially sensitive data. + if (options.AdditionalProperties is { } props) + { + foreach (KeyValuePair prop in props) + { + _ = activity.AddTag(prop.Key, prop.Value); + } + } + } + } + } + } + + return activity; + } + + /// Adds speech-to-text response information to the activity. + private void TraceResponse( + Activity? activity, + string? requestModelId, + SpeechToTextResponse? response, + Exception? error, + Stopwatch? stopwatch) + { + if (_operationDurationHistogram.Enabled && stopwatch is not null) + { + TagList tags = default; + + AddMetricTags(ref tags, requestModelId, response); + if (error is not null) + { + tags.Add(OpenTelemetryConsts.Error.Type, error.GetType().FullName); + } + + _operationDurationHistogram.Record(stopwatch.Elapsed.TotalSeconds, tags); + } + + if (_tokenUsageHistogram.Enabled && response?.Usage is { } usage) + { + if (usage.InputTokenCount is long inputTokens) + { + TagList tags = default; + tags.Add(OpenTelemetryConsts.GenAI.Token.Type, OpenTelemetryConsts.TokenTypeInput); + AddMetricTags(ref tags, requestModelId, response); + _tokenUsageHistogram.Record((int)inputTokens, tags); + } + + if (usage.OutputTokenCount is long outputTokens) + { + TagList tags = default; + tags.Add(OpenTelemetryConsts.GenAI.Token.Type, OpenTelemetryConsts.TokenTypeOutput); + AddMetricTags(ref tags, requestModelId, response); + _tokenUsageHistogram.Record((int)outputTokens, tags); + } + } + + if (error is not null) + { + _ = activity? + .AddTag(OpenTelemetryConsts.Error.Type, error.GetType().FullName) + .SetStatus(ActivityStatusCode.Error, error.Message); + } + + if (response is not null) + { + AddOutputMessagesTags(response, activity); + + if (activity is not null) + { + if (!string.IsNullOrWhiteSpace(response.ResponseId)) + { + _ = activity.AddTag(OpenTelemetryConsts.GenAI.Response.Id, response.ResponseId); + } + + if (response.ModelId is not null) + { + _ = activity.AddTag(OpenTelemetryConsts.GenAI.Response.Model, response.ModelId); + } + + if (response.Usage?.InputTokenCount is long inputTokens) + { + _ = activity.AddTag(OpenTelemetryConsts.GenAI.Usage.InputTokens, (int)inputTokens); + } + + if (response.Usage?.OutputTokenCount is long outputTokens) + { + _ = activity.AddTag(OpenTelemetryConsts.GenAI.Usage.OutputTokens, (int)outputTokens); + } + + // Log all additional response properties as raw values on the span. + // Since AdditionalProperties has undefined meaning, we treat it as potentially sensitive data. + if (EnableSensitiveData && response.AdditionalProperties is { } props) + { + foreach (KeyValuePair prop in props) + { + _ = activity.AddTag(prop.Key, prop.Value); + } + } + } + } + + void AddMetricTags(ref TagList tags, string? requestModelId, SpeechToTextResponse? response) + { + tags.Add(OpenTelemetryConsts.GenAI.Operation.Name, OpenTelemetryConsts.GenAI.GenerateContentName); + + if (requestModelId is not null) + { + tags.Add(OpenTelemetryConsts.GenAI.Request.Model, requestModelId); + } + + tags.Add(OpenTelemetryConsts.GenAI.Provider.Name, _providerName); + + if (_serverAddress is string endpointAddress) + { + tags.Add(OpenTelemetryConsts.Server.Address, endpointAddress); + tags.Add(OpenTelemetryConsts.Server.Port, _serverPort); + } + + if (response?.ModelId is string responseModel) + { + tags.Add(OpenTelemetryConsts.GenAI.Response.Model, responseModel); + } + } + } + + private void AddOutputMessagesTags(SpeechToTextResponse response, Activity? activity) + { + if (EnableSensitiveData && activity is { IsAllDataRequested: true }) + { + _ = activity.AddTag( + OpenTelemetryConsts.GenAI.Output.Messages, + OpenTelemetryChatClient.SerializeChatMessages([new(ChatRole.Assistant, response.Contents)])); + } + } +} diff --git a/src/Libraries/Microsoft.Extensions.AI/SpeechToText/OpenTelemetrySpeechToTextClientBuilderExtensions.cs b/src/Libraries/Microsoft.Extensions.AI/SpeechToText/OpenTelemetrySpeechToTextClientBuilderExtensions.cs new file mode 100644 index 00000000000..5e23a41358e --- /dev/null +++ b/src/Libraries/Microsoft.Extensions.AI/SpeechToText/OpenTelemetrySpeechToTextClientBuilderExtensions.cs @@ -0,0 +1,42 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Diagnostics.CodeAnalysis; +using Microsoft.Extensions.DependencyInjection; +using Microsoft.Extensions.Logging; +using Microsoft.Shared.Diagnostics; + +namespace Microsoft.Extensions.AI; + +/// Provides extensions for configuring instances. +[Experimental("MEAI001")] +public static class OpenTelemetrySpeechToTextClientBuilderExtensions +{ + /// + /// Adds OpenTelemetry support to the speech-to-text client pipeline, following the OpenTelemetry Semantic Conventions for Generative AI systems. + /// + /// + /// The draft specification this follows is available at . + /// The specification is still experimental and subject to change; as such, the telemetry output by this client is also subject to change. + /// + /// The . + /// An optional to use to create a logger for logging events. + /// An optional source name that will be used on the telemetry data. + /// An optional callback that can be used to configure the instance. + /// The . + public static SpeechToTextClientBuilder UseOpenTelemetry( + this SpeechToTextClientBuilder builder, + ILoggerFactory? loggerFactory = null, + string? sourceName = null, + Action? configure = null) => + Throw.IfNull(builder).Use((innerClient, services) => + { + loggerFactory ??= services.GetService(); + + var client = new OpenTelemetrySpeechToTextClient(innerClient, loggerFactory?.CreateLogger(typeof(OpenTelemetrySpeechToTextClient)), sourceName); + configure?.Invoke(client); + + return client; + }); +} diff --git a/src/Libraries/Microsoft.Extensions.AI/SpeechToText/SpeechToTextClientBuilder.cs b/src/Libraries/Microsoft.Extensions.AI/SpeechToText/SpeechToTextClientBuilder.cs index dae4224a94d..1945a140762 100644 --- a/src/Libraries/Microsoft.Extensions.AI/SpeechToText/SpeechToTextClientBuilder.cs +++ b/src/Libraries/Microsoft.Extensions.AI/SpeechToText/SpeechToTextClientBuilder.cs @@ -58,7 +58,7 @@ public ISpeechToTextClient Build(IServiceProvider? services = null) return audioClient; } - /// Adds a factory for an intermediate audio transcription client to the audio transcription client pipeline. + /// Adds a factory for an intermediate speech-to-text client to the speech-to-text client pipeline. /// The client factory function. /// The updated instance. public SpeechToTextClientBuilder Use(Func clientFactory) @@ -68,7 +68,7 @@ public SpeechToTextClientBuilder Use(Func clientFactory(innerClient)); } - /// Adds a factory for an intermediate audio transcription client to the audio transcription client pipeline. + /// Adds a factory for an intermediate speech-to-text client to the speech-to-text client pipeline. /// The client factory function. /// The updated instance. public SpeechToTextClientBuilder Use(Func clientFactory) diff --git a/test/Libraries/Microsoft.Extensions.AI.Tests/SpeechToText/OpenTelemetrySpeechToTextClientTests.cs b/test/Libraries/Microsoft.Extensions.AI.Tests/SpeechToText/OpenTelemetrySpeechToTextClientTests.cs new file mode 100644 index 00000000000..c243bf2bf12 --- /dev/null +++ b/test/Libraries/Microsoft.Extensions.AI.Tests/SpeechToText/OpenTelemetrySpeechToTextClientTests.cs @@ -0,0 +1,150 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.IO; +using System.Linq; +using System.Runtime.CompilerServices; +using System.Text.RegularExpressions; +using System.Threading; +using System.Threading.Tasks; +using OpenTelemetry.Trace; +using Xunit; + +namespace Microsoft.Extensions.AI; + +public class OpenTelemetrySpeechToTextClientTests +{ + [Fact] + public void InvalidArgs_Throws() + { + Assert.Throws("innerClient", () => new OpenTelemetrySpeechToTextClient(null!)); + } + + [Theory] + [InlineData(false, false)] + [InlineData(false, true)] + [InlineData(true, false)] + [InlineData(true, true)] + public async Task ExpectedInformationLogged_Async(bool streaming, bool enableSensitiveData) + { + var sourceName = Guid.NewGuid().ToString(); + var activities = new List(); + using var tracerProvider = OpenTelemetry.Sdk.CreateTracerProviderBuilder() + .AddSource(sourceName) + .AddInMemoryExporter(activities) + .Build(); + + using var innerClient = new TestSpeechToTextClient + { + GetTextAsyncCallback = async (request, options, cancellationToken) => + { + await Task.Yield(); + return new("This is the recognized text.") + { + Usage = new() + { + InputTokenCount = 10, + OutputTokenCount = 20, + TotalTokenCount = 30, + }, + }; + }, + + GetStreamingTextAsyncCallback = TestClientStreamAsync, + + GetServiceCallback = (serviceType, serviceKey) => + serviceType == typeof(SpeechToTextClientMetadata) ? new SpeechToTextClientMetadata("testservice", new Uri("http://localhost:12345/something"), "amazingmodel") : + null, + }; + + static async IAsyncEnumerable TestClientStreamAsync( + Stream request, SpeechToTextOptions? options, [EnumeratorCancellation] CancellationToken cancellationToken) + { + await Task.Yield(); + yield return new("This is"); + yield return new(" the recognized"); + yield return new() + { + Contents = + [ + new TextContent(" text."), + new UsageContent(new() + { + InputTokenCount = 10, + OutputTokenCount = 20, + TotalTokenCount = 30, + }), + ] + }; + } + + using var client = innerClient + .AsBuilder() + .UseOpenTelemetry(null, sourceName, configure: instance => + { + instance.EnableSensitiveData = enableSensitiveData; + }) + .Build(); + + SpeechToTextOptions options = new() + { + ModelId = "mycoolspeechmodel", + AdditionalProperties = new() + { + ["service_tier"] = "value1", + ["SomethingElse"] = "value2", + }, + }; + + var response = streaming ? + await client.GetStreamingTextAsync(Stream.Null, options).ToSpeechToTextResponseAsync() : + await client.GetTextAsync(Stream.Null, options); + + var activity = Assert.Single(activities); + + Assert.NotNull(activity.Id); + Assert.NotEmpty(activity.Id); + + Assert.Equal("localhost", activity.GetTagItem("server.address")); + Assert.Equal(12345, (int)activity.GetTagItem("server.port")!); + + Assert.Equal("generate_content mycoolspeechmodel", activity.DisplayName); + Assert.Equal("testservice", activity.GetTagItem("gen_ai.provider.name")); + + Assert.Equal("mycoolspeechmodel", activity.GetTagItem("gen_ai.request.model")); + Assert.Equal(enableSensitiveData ? "value1" : null, activity.GetTagItem("service_tier")); + Assert.Equal(enableSensitiveData ? "value2" : null, activity.GetTagItem("SomethingElse")); + + Assert.Equal(10, activity.GetTagItem("gen_ai.usage.input_tokens")); + Assert.Equal(20, activity.GetTagItem("gen_ai.usage.output_tokens")); + + Assert.True(activity.Duration.TotalMilliseconds > 0); + + var tags = activity.Tags.ToDictionary(kvp => kvp.Key, kvp => kvp.Value); + if (enableSensitiveData) + { + Assert.Equal(ReplaceWhitespace(""" + [ + { + "role": "assistant", + "parts": [ + { + "type": "text", + "content": "This is the recognized text." + } + ] + } + ] + """), ReplaceWhitespace(tags["gen_ai.output.messages"])); + } + else + { + Assert.False(tags.ContainsKey("gen_ai.output.messages")); + } + + static string ReplaceWhitespace(string? input) => Regex.Replace(input ?? "", @"\s+", " ").Trim(); + } +}