From 8c2ea71380010906381099a8b82a8c7d58ee5994 Mon Sep 17 00:00:00 2001 From: Cynthia J Date: Wed, 27 Aug 2025 10:22:06 -0700 Subject: [PATCH 1/2] init commit --- firebaseai/src/LiveClientRealtimeInput.cs | 74 +++++++++++++++++++ .../src/LiveClientRealtimeInput.cs.meta | 9 +++ firebaseai/src/LiveSession.cs | 68 +++++++++++++---- 3 files changed, 138 insertions(+), 13 deletions(-) create mode 100644 firebaseai/src/LiveClientRealtimeInput.cs create mode 100644 firebaseai/src/LiveClientRealtimeInput.cs.meta diff --git a/firebaseai/src/LiveClientRealtimeInput.cs b/firebaseai/src/LiveClientRealtimeInput.cs new file mode 100644 index 00000000..1266c375 --- /dev/null +++ b/firebaseai/src/LiveClientRealtimeInput.cs @@ -0,0 +1,74 @@ +/* + * Copyright 2025 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using System.Collections.Generic; + +namespace Firebase.AI { + +/// +/// A type that represents a realtime input to the model. +/// +public class LiveClientRealtimeInput { + /// + /// The list of media chunks. + /// + [Obsolete("Use audio, video, or text instead")] + public List MediaChunks { get; set; } + + /// + /// Audio data. + /// + public ModelContent.InlineDataPart? Audio { get; set; } + + /// + /// Video data. + /// + public ModelContent.InlineDataPart? Video { get; set; } + + /// + /// Text data. + /// + public string Text { get; set; } + + /// + /// Creates a [LiveClientRealtimeInput] instance. + /// + public LiveClientRealtimeInput() { } + + /// + /// Creates a [LiveClientRealtimeInput] with audio data. + /// + public LiveClientRealtimeInput(ModelContent.InlineDataPart audio) { + Audio = audio; + } + + /// + /// Creates a [LiveClientRealtimeInput] with video data. + /// + public LiveClientRealtimeInput(ModelContent.InlineDataPart video) { + Video = video; + } + + /// + /// Creates a [LiveClientRealtimeInput] with text data. + /// + public LiveClientRealtimeInput(string text) { + Text = text; + } +} + +} diff --git a/firebaseai/src/LiveClientRealtimeInput.cs.meta b/firebaseai/src/LiveClientRealtimeInput.cs.meta new file mode 100644 index 00000000..d93f9e8b --- /dev/null +++ b/firebaseai/src/LiveClientRealtimeInput.cs.meta @@ -0,0 +1,9 @@ +fileFormatVersion: 2 +guid: 1a8f7a2b8c7d6e54b9a6d5c4b3a2e1f0 +DefaultImporter: + externalObjects: {} + serializedVersion: 2 + defaultReferences: [] + plugins: [] + assetBundleName: + assetBundleVariant: diff --git a/firebaseai/src/LiveSession.cs b/firebaseai/src/LiveSession.cs index 52030a7d..83b23fce 100644 --- a/firebaseai/src/LiveSession.cs +++ b/firebaseai/src/LiveSession.cs @@ -141,29 +141,71 @@ public async Task SendAsync( } /// - /// Send realtime input to the server. + /// Sends realtime input (media chunks) to the server. /// - /// A list of media chunks to send. - /// A token to cancel the send operation. - public async Task SendMediaChunksAsync( + /// The list of media chunks to send. + [Obsolete("Use SendAudio, SendVideo, or SendText instead")] + public Task SendMediaChunksAsync( List mediaChunks, CancellationToken cancellationToken = default) { - if (mediaChunks == null) return; + if (mediaChunks == null) return Task.CompletedTask; + + return SendRealtimeInputAsync(new LiveClientRealtimeInput() { MediaChunks = mediaChunks }, cancellationToken); + } + + /// + /// Sends audio data to the server. + /// + /// The audio data to send. + public Task SendAudioAsync(ModelContent.InlineDataPart audio, CancellationToken cancellationToken = default) { + return SendRealtimeInputAsync(new LiveClientRealtimeInput(audio), cancellationToken); + } + + /// + /// Sends video data to the server. + /// + /// The video data to send. + public Task SendVideoAsync(ModelContent.InlineDataPart video, CancellationToken cancellationToken = default) { + return SendRealtimeInputAsync(new LiveClientRealtimeInput(video), cancellationToken); + } + + /// + /// Sends text data to the server. + /// + /// The text data to send. + public Task SendTextAsync(string text, CancellationToken cancellationToken = default) { + return SendRealtimeInputAsync(new LiveClientRealtimeInput(text), cancellationToken); + } + private Task SendRealtimeInputAsync(LiveClientRealtimeInput input, CancellationToken cancellationToken = default) { // Prepare the message payload. Dictionary jsonDict = new() { { - "realtimeInput", new Dictionary() { - { - // InlineDataPart inherits from Part, so this conversion should be safe. - "mediaChunks", mediaChunks.Select(mc => (mc as ModelContent.Part).ToJson()["inlineData"]).ToList() - } - } + "realtimeInput", new Dictionary() } }; + + var realtimeInputDict = (Dictionary)jsonDict["realtimeInput"]; + + if (input.MediaChunks != null) { + realtimeInputDict["mediaChunks"] = input.MediaChunks.Select(mc => (mc as ModelContent.Part).ToJson()["inlineData"]).ToList(); + } + + if (input.Audio.HasValue) { + realtimeInputDict["audio"] = (input.Audio.Value as ModelContent.Part).ToJson()["inlineData"]; + } + + if (input.Video.HasValue) { + realtimeInputDict["video"] = (input.Video.Value as ModelContent.Part).ToJson()["inlineData"]; + } + + if (!string.IsNullOrEmpty(input.Text)) { + realtimeInputDict["text"] = input.Text; + } + var byteArray = Encoding.UTF8.GetBytes(Json.Serialize(jsonDict)); - await InternalSendBytesAsync(new ArraySegment(byteArray), cancellationToken); + return InternalSendBytesAsync(new ArraySegment(byteArray), cancellationToken); } private static byte[] ConvertTo16BitPCM(float[] samples) { @@ -189,7 +231,7 @@ private static byte[] ConvertTo16BitPCM(float[] samples) { /// A token to cancel the send operation. public Task SendAudioAsync(float[] audioData, CancellationToken cancellationToken = default) { ModelContent.InlineDataPart inlineDataPart = new("audio/pcm", ConvertTo16BitPCM(audioData)); - return SendMediaChunksAsync(new List(new []{inlineDataPart}), cancellationToken); + return SendAudioAsync(inlineDataPart, cancellationToken); } /// From 2a59cbb0f9f7e3da6ac59381ab96788a08e2afa8 Mon Sep 17 00:00:00 2001 From: Cynthia J Date: Wed, 27 Aug 2025 10:57:47 -0700 Subject: [PATCH 2/2] move all the logic into LiveSession --- firebaseai/src/LiveClientRealtimeInput.cs | 74 ------------------- .../src/LiveClientRealtimeInput.cs.meta | 9 --- firebaseai/src/LiveSession.cs | 30 ++++---- 3 files changed, 15 insertions(+), 98 deletions(-) delete mode 100644 firebaseai/src/LiveClientRealtimeInput.cs delete mode 100644 firebaseai/src/LiveClientRealtimeInput.cs.meta diff --git a/firebaseai/src/LiveClientRealtimeInput.cs b/firebaseai/src/LiveClientRealtimeInput.cs deleted file mode 100644 index 1266c375..00000000 --- a/firebaseai/src/LiveClientRealtimeInput.cs +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright 2025 Google LLC - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -using System; -using System.Collections.Generic; - -namespace Firebase.AI { - -/// -/// A type that represents a realtime input to the model. -/// -public class LiveClientRealtimeInput { - /// - /// The list of media chunks. - /// - [Obsolete("Use audio, video, or text instead")] - public List MediaChunks { get; set; } - - /// - /// Audio data. - /// - public ModelContent.InlineDataPart? Audio { get; set; } - - /// - /// Video data. - /// - public ModelContent.InlineDataPart? Video { get; set; } - - /// - /// Text data. - /// - public string Text { get; set; } - - /// - /// Creates a [LiveClientRealtimeInput] instance. - /// - public LiveClientRealtimeInput() { } - - /// - /// Creates a [LiveClientRealtimeInput] with audio data. - /// - public LiveClientRealtimeInput(ModelContent.InlineDataPart audio) { - Audio = audio; - } - - /// - /// Creates a [LiveClientRealtimeInput] with video data. - /// - public LiveClientRealtimeInput(ModelContent.InlineDataPart video) { - Video = video; - } - - /// - /// Creates a [LiveClientRealtimeInput] with text data. - /// - public LiveClientRealtimeInput(string text) { - Text = text; - } -} - -} diff --git a/firebaseai/src/LiveClientRealtimeInput.cs.meta b/firebaseai/src/LiveClientRealtimeInput.cs.meta deleted file mode 100644 index d93f9e8b..00000000 --- a/firebaseai/src/LiveClientRealtimeInput.cs.meta +++ /dev/null @@ -1,9 +0,0 @@ -fileFormatVersion: 2 -guid: 1a8f7a2b8c7d6e54b9a6d5c4b3a2e1f0 -DefaultImporter: - externalObjects: {} - serializedVersion: 2 - defaultReferences: [] - plugins: [] - assetBundleName: - assetBundleVariant: diff --git a/firebaseai/src/LiveSession.cs b/firebaseai/src/LiveSession.cs index 83b23fce..38719b4d 100644 --- a/firebaseai/src/LiveSession.cs +++ b/firebaseai/src/LiveSession.cs @@ -150,7 +150,7 @@ public Task SendMediaChunksAsync( CancellationToken cancellationToken = default) { if (mediaChunks == null) return Task.CompletedTask; - return SendRealtimeInputAsync(new LiveClientRealtimeInput() { MediaChunks = mediaChunks }, cancellationToken); + return SendRealtimeInputAsync(cancellationToken: cancellationToken); } /// @@ -158,7 +158,7 @@ public Task SendMediaChunksAsync( /// /// The audio data to send. public Task SendAudioAsync(ModelContent.InlineDataPart audio, CancellationToken cancellationToken = default) { - return SendRealtimeInputAsync(new LiveClientRealtimeInput(audio), cancellationToken); + return SendRealtimeInputAsync(audio: audio, cancellationToken: cancellationToken); } /// @@ -166,7 +166,7 @@ public Task SendAudioAsync(ModelContent.InlineDataPart audio, CancellationToken /// /// The video data to send. public Task SendVideoAsync(ModelContent.InlineDataPart video, CancellationToken cancellationToken = default) { - return SendRealtimeInputAsync(new LiveClientRealtimeInput(video), cancellationToken); + return SendRealtimeInputAsync(video: video, cancellationToken: cancellationToken); } /// @@ -174,10 +174,14 @@ public Task SendVideoAsync(ModelContent.InlineDataPart video, CancellationToken /// /// The text data to send. public Task SendTextAsync(string text, CancellationToken cancellationToken = default) { - return SendRealtimeInputAsync(new LiveClientRealtimeInput(text), cancellationToken); + return SendRealtimeInputAsync(text: text, cancellationToken: cancellationToken); } - private Task SendRealtimeInputAsync(LiveClientRealtimeInput input, CancellationToken cancellationToken = default) { + private Task SendRealtimeInputAsync( + ModelContent.InlineDataPart? audio = null, + ModelContent.InlineDataPart? video = null, + string text = null, + CancellationToken cancellationToken = default) { // Prepare the message payload. Dictionary jsonDict = new() { { @@ -187,20 +191,16 @@ private Task SendRealtimeInputAsync(LiveClientRealtimeInput input, CancellationT var realtimeInputDict = (Dictionary)jsonDict["realtimeInput"]; - if (input.MediaChunks != null) { - realtimeInputDict["mediaChunks"] = input.MediaChunks.Select(mc => (mc as ModelContent.Part).ToJson()["inlineData"]).ToList(); - } - - if (input.Audio.HasValue) { - realtimeInputDict["audio"] = (input.Audio.Value as ModelContent.Part).ToJson()["inlineData"]; + if (audio.HasValue) { + realtimeInputDict["audio"] = (audio.Value as ModelContent.Part).ToJson()["inlineData"]; } - if (input.Video.HasValue) { - realtimeInputDict["video"] = (input.Video.Value as ModelContent.Part).ToJson()["inlineData"]; + if (video.HasValue) { + realtimeInputDict["video"] = (video.Value as ModelContent.Part).ToJson()["inlineData"]; } - if (!string.IsNullOrEmpty(input.Text)) { - realtimeInputDict["text"] = input.Text; + if (!string.IsNullOrEmpty(text)) { + realtimeInputDict["text"] = text; } var byteArray = Encoding.UTF8.GetBytes(Json.Serialize(jsonDict));