From d068e9b92f85c7149abb511cbf3a2d4965e452f3 Mon Sep 17 00:00:00 2001 From: Rodrigo Lazo Paz Date: Mon, 13 Oct 2025 08:54:53 -0400 Subject: [PATCH 01/18] [ALF] Implement LiveAPI realtime methods The new methods, with specific endopints for audio, video, and text, are the recommended way of contacting the LiveAPI service. The `sendMediaStream` method is now deprecated. --- firebase-ai/gradle.properties | 2 +- .../type/BidiGenerateContentRealtimeInput.kt | 135 ++++++++++++++++++ .../google/firebase/ai/type/LiveSession.kt | 43 ++++++ 3 files changed, 179 insertions(+), 1 deletion(-) create mode 100644 firebase-ai/src/main/kotlin/com/google/firebase/ai/type/BidiGenerateContentRealtimeInput.kt diff --git a/firebase-ai/gradle.properties b/firebase-ai/gradle.properties index c1ee825b2cb..c4acd5b3aae 100644 --- a/firebase-ai/gradle.properties +++ b/firebase-ai/gradle.properties @@ -12,5 +12,5 @@ # See the License for the specific language governing permissions and # limitations under the License. -version=17.4.1 +version=17.5.0 latestReleasedVersion=17.4.0 diff --git a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/BidiGenerateContentRealtimeInput.kt b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/BidiGenerateContentRealtimeInput.kt new file mode 100644 index 00000000000..dbe4824fbcc --- /dev/null +++ b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/BidiGenerateContentRealtimeInput.kt @@ -0,0 +1,135 @@ +/* + * Copyright 2025 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +@file:OptIn(PublicPreviewAPI::class) + +package com.google.firebase.ai.type + +import kotlinx.serialization.Serializable +import kotlinx.serialization.json.JsonObject + +/** + * Represents a single message in a live, bidirectional generate content stream. + * + * See the [API reference](https://ai.google.dev/api/live#bidigeneratecontentrealtimeinput) for more + * details. + * + * @property audio These form the realtime audio input stream. + * @property audioStreamEnd Indicates that the audio stream has ended, e.g. because the microphone + * was turned off. This should only be sent when automatic activity detection is enabled (which is + * the default). The client can reopen the stream by sending an audio message. + * @property video These form the realtime video input stream. + * @property text These form the realtime text input stream. + * @property activityStart Marks the start of user activity. This can only be sent if automatic + * (i.e. server-side) activity detection is disabled. + * @property activityEnd Marks the end of user activity. This can only be sent if automatic (i.e. + * server-side) activity detection is disabled. + */ +@PublicPreviewAPI +public class BidiGenerateContentRealtimeInput +private constructor( + public val audio: InlineDataPart?, + public val audioStreamEnd: Boolean?, + public val video: InlineDataPart?, + public val text: String?, + public val activityStart: ActivityStart?, + public val activityEnd: ActivityEnd?, +) { + + /** Builder for creating a [BidiGenerateContentRealtimeInput]. */ + public class Builder { + @JvmField public var audio: InlineDataPart? = null + @JvmField public var audioStreamEnd: Boolean? = null + @JvmField public var video: InlineDataPart? = null + @JvmField public var text: String? = null + @JvmField public var activityStart: ActivityStart? = null + @JvmField public var activityEnd: ActivityEnd? = null + + public fun setAudio(audio: InlineDataPart?): Builder = apply { this.audio = audio } + public fun setAudioStreamEnd(audioStreamEnd: Boolean?): Builder = apply { + this.audioStreamEnd = audioStreamEnd + } + public fun setVideo(video: InlineDataPart?): Builder = apply { this.video = video } + public fun setText(text: String?): Builder = apply { this.text = text } + public fun setActivityStart(activityStart: ActivityStart?): Builder = apply { + this.activityStart = activityStart + } + public fun setActivityEnd(activityEnd: ActivityEnd?): Builder = apply { + this.activityEnd = activityEnd + } + + public fun build(): BidiGenerateContentRealtimeInput = + BidiGenerateContentRealtimeInput( + audio, + audioStreamEnd, + video, + text, + activityStart, + activityEnd, + ) + } + + /** Marks the start of user activity. */ + @PublicPreviewAPI public object ActivityStart + + /** Marks the end of user activity. */ + @PublicPreviewAPI public object ActivityEnd + + @Serializable + internal data class Internal( + val audio: InlineDataPart.Internal.InlineData? = null, + val audioStreamEnd: Boolean? = null, + val video: InlineDataPart.Internal.InlineData? = null, + val text: String? = null, + val activityStart: JsonObject? = null, + val activityEnd: JsonObject? = null, + ) + + internal fun toInternal(): Internal { + return Internal( + audio = audio?.let { (it.toInternal() as InlineDataPart.Internal).inlineData }, + audioStreamEnd = audioStreamEnd, + video = video?.let { (it.toInternal() as InlineDataPart.Internal).inlineData }, + text = text, + activityStart = if (activityStart != null) JsonObject(emptyMap()) else null, + activityEnd = if (activityEnd != null) JsonObject(emptyMap()) else null, + ) + } + + public companion object { + /** Returns a new [Builder] for constructing a [BidiGenerateContentRealtimeInput]. */ + @JvmStatic public fun builder(): Builder = Builder() + } +} + +/** + * DSL for building a [BidiGenerateContentRealtimeInput]. + * + * Example: + * ``` + * bidiGenerateContentRealtimeInput { + * text = "Hello" + * } + * ``` + */ +@PublicPreviewAPI +public fun bidiGenerateContentRealtimeInput( + init: BidiGenerateContentRealtimeInput.Builder.() -> Unit +): BidiGenerateContentRealtimeInput { + val builder = BidiGenerateContentRealtimeInput.builder() + builder.init() + return builder.build() +} diff --git a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt index c703cd959c3..6f72304d361 100644 --- a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt +++ b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt @@ -255,6 +255,48 @@ internal constructor( } } + /** + * Sends audio data to the server in realtime. Check + * https://ai.google.dev/api/live#bidigeneratecontentrealtimeinput for details about the realtime + * input usage. + * @param audio The audio data to send. + */ + public suspend fun sendAudioRealtime(audio: InlineDataPart) { + val msg = bidiGenerateContentRealtimeInput { this.audio = audio } + FirebaseAIException.catchAsync { + val jsonString = Json.encodeToString(msg.toInternal()) + session.send(Frame.Text(jsonString)) + } + } + + /** + * Sends video data to the server in realtime. Check + * https://ai.google.dev/api/live#bidigeneratecontentrealtimeinput for details about the realtime + * input usage. + * @param video The video data to send. Video MIME type could be either video or image. + */ + public suspend fun sendVideoRealtime(video: InlineDataPart) { + val msg = bidiGenerateContentRealtimeInput { this.video = video } + FirebaseAIException.catchAsync { + val jsonString = Json.encodeToString(msg.toInternal()) + session.send(Frame.Text(jsonString)) + } + } + + /** + * Sends text data to the server in realtime. Check + * https://ai.google.dev/api/live#bidigeneratecontentrealtimeinput for details about the realtime + * input usage. + * @param text The text data to send. + */ + public suspend fun sendTextRealtime(text: String) { + val msg = bidiGenerateContentRealtimeInput { this.text = text } + FirebaseAIException.catchAsync { + val jsonString = Json.encodeToString(msg.toInternal()) + session.send(Frame.Text(jsonString)) + } + } + /** * Streams client data to the model. * @@ -262,6 +304,7 @@ internal constructor( * * @param mediaChunks The list of [MediaData] instances representing the media data to be sent. */ + @Deprecated("Use sendAudioRealtime, sendVideoRealtime, or sendTextRealtime instead") public suspend fun sendMediaStream( mediaChunks: List, ) { From be3f45fe9a95760ed438f46b8af6d80c08b0bf1b Mon Sep 17 00:00:00 2001 From: Rodrigo Lazo Paz Date: Tue, 14 Oct 2025 23:03:00 -0400 Subject: [PATCH 02/18] Use the right format --- .../google/firebase/ai/type/LiveSession.kt | 28 +++++++++++++++---- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt index 6f72304d361..8da0b5257fa 100644 --- a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt +++ b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt @@ -28,6 +28,7 @@ import com.google.firebase.ai.common.JSON import com.google.firebase.ai.common.util.CancelledCoroutineScope import com.google.firebase.ai.common.util.accumulateUntil import com.google.firebase.ai.common.util.childJob +import com.google.firebase.ai.type.MediaData.Internal import com.google.firebase.annotations.concurrent.Blocking import io.ktor.client.plugins.websocket.DefaultClientWebSocketSession import io.ktor.websocket.Frame @@ -195,6 +196,8 @@ internal constructor( response .getOrNull() ?.let { + var x = it.readBytes().toString(Charsets.UTF_8) + Log.d(TAG, x) JSON.decodeFromString( it.readBytes().toString(Charsets.UTF_8) ) @@ -262,11 +265,15 @@ internal constructor( * @param audio The audio data to send. */ public suspend fun sendAudioRealtime(audio: InlineDataPart) { - val msg = bidiGenerateContentRealtimeInput { this.audio = audio } + Log.d(TAG, "sendAudioRealtime called with audio data") + val msg = BidiGenerateContentRealtimeInputSetup(audio = MediaData(audio.inlineData, mimeType = audio.mimeType).toInternal()) FirebaseAIException.catchAsync { val jsonString = Json.encodeToString(msg.toInternal()) session.send(Frame.Text(jsonString)) + Log.d(TAG, jsonString) + Log.d(TAG, "sendAudioRealtime sent audio data size: ${jsonString.length}") } + Log.d(TAG, "finish sending audio data") } /** @@ -313,6 +320,7 @@ internal constructor( Json.encodeToString( BidiGenerateContentRealtimeInputSetup(mediaChunks.map { (it.toInternal()) }).toInternal() ) + Log.d(TAG, jsonString) session.send(Frame.Text(jsonString)) } } @@ -362,12 +370,13 @@ internal constructor( /** Listen to the user's microphone and send the data to the model. */ private fun recordUserAudio() { + // ?.onEach { sendAudioRealtime(InlineDataPart (it, "audio/pcm")) } // Buffer the recording so we can keep recording while data is sent to the server audioHelper ?.listenToRecording() ?.buffer(UNLIMITED) ?.accumulateUntil(MIN_BUFFER_SIZE) - ?.onEach { sendMediaStream(listOf(MediaData(it, "audio/pcm"))) } + ?.onEach { sendAudioRealtime(InlineDataPart(it, "audio/pcm")) } ?.catch { throw FirebaseAIException.from(it) } ?.launchIn(scope) } @@ -507,15 +516,24 @@ internal constructor( * * End of turn is derived from user activity (eg; end of speech). */ - internal class BidiGenerateContentRealtimeInputSetup(val mediaChunks: List) { + internal class BidiGenerateContentRealtimeInputSetup( + val mediaChunks: List? = null, + val audio: MediaData.Internal? = null, + val video: MediaData.Internal? = null, + val text: String? = null + ) { @Serializable internal class Internal(val realtimeInput: BidiGenerateContentRealtimeInput) { @Serializable internal data class BidiGenerateContentRealtimeInput( - val mediaChunks: List + val mediaChunks: List?, + val audio: MediaData.Internal?, + val video: MediaData.Internal?, + val text: String? ) } - fun toInternal() = Internal(Internal.BidiGenerateContentRealtimeInput(mediaChunks)) + fun toInternal() = + Internal(Internal.BidiGenerateContentRealtimeInput(mediaChunks, audio, video, text)) } private companion object { From f6204a14d649f7745117780f32a8ea6ae14bed2a Mon Sep 17 00:00:00 2001 From: Rodrigo Lazo Paz Date: Wed, 15 Oct 2025 12:16:17 -0400 Subject: [PATCH 03/18] Support text --- .../kotlin/com/google/firebase/ai/type/LiveSession.kt | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt index 8da0b5257fa..f661928c4c5 100644 --- a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt +++ b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt @@ -265,12 +265,10 @@ internal constructor( * @param audio The audio data to send. */ public suspend fun sendAudioRealtime(audio: InlineDataPart) { - Log.d(TAG, "sendAudioRealtime called with audio data") val msg = BidiGenerateContentRealtimeInputSetup(audio = MediaData(audio.inlineData, mimeType = audio.mimeType).toInternal()) FirebaseAIException.catchAsync { val jsonString = Json.encodeToString(msg.toInternal()) session.send(Frame.Text(jsonString)) - Log.d(TAG, jsonString) Log.d(TAG, "sendAudioRealtime sent audio data size: ${jsonString.length}") } Log.d(TAG, "finish sending audio data") @@ -283,11 +281,13 @@ internal constructor( * @param video The video data to send. Video MIME type could be either video or image. */ public suspend fun sendVideoRealtime(video: InlineDataPart) { - val msg = bidiGenerateContentRealtimeInput { this.video = video } + val msg = BidiGenerateContentRealtimeInputSetup(video = MediaData(video.inlineData, mimeType = video.mimeType).toInternal()) FirebaseAIException.catchAsync { val jsonString = Json.encodeToString(msg.toInternal()) session.send(Frame.Text(jsonString)) + Log.d(TAG, "sendVideoRealtime sent video data size: ${jsonString.length}") } + Log.d(TAG, "finish sending video data") } /** @@ -297,11 +297,13 @@ internal constructor( * @param text The text data to send. */ public suspend fun sendTextRealtime(text: String) { - val msg = bidiGenerateContentRealtimeInput { this.text = text } + val msg = BidiGenerateContentRealtimeInputSetup(text = text) FirebaseAIException.catchAsync { val jsonString = Json.encodeToString(msg.toInternal()) session.send(Frame.Text(jsonString)) + Log.d(TAG, "sendTextRealtime sent text data size: ${jsonString.length}") } + Log.d(TAG, "finish sending text data") } /** From d94bde7a775b81475f3f6a2b813ae896c9d2155f Mon Sep 17 00:00:00 2001 From: Rodrigo Lazo Paz Date: Wed, 15 Oct 2025 12:51:39 -0400 Subject: [PATCH 04/18] Remove unnecessary file --- .../type/BidiGenerateContentRealtimeInput.kt | 135 ------------------ 1 file changed, 135 deletions(-) delete mode 100644 firebase-ai/src/main/kotlin/com/google/firebase/ai/type/BidiGenerateContentRealtimeInput.kt diff --git a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/BidiGenerateContentRealtimeInput.kt b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/BidiGenerateContentRealtimeInput.kt deleted file mode 100644 index dbe4824fbcc..00000000000 --- a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/BidiGenerateContentRealtimeInput.kt +++ /dev/null @@ -1,135 +0,0 @@ -/* - * Copyright 2025 Google LLC - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -@file:OptIn(PublicPreviewAPI::class) - -package com.google.firebase.ai.type - -import kotlinx.serialization.Serializable -import kotlinx.serialization.json.JsonObject - -/** - * Represents a single message in a live, bidirectional generate content stream. - * - * See the [API reference](https://ai.google.dev/api/live#bidigeneratecontentrealtimeinput) for more - * details. - * - * @property audio These form the realtime audio input stream. - * @property audioStreamEnd Indicates that the audio stream has ended, e.g. because the microphone - * was turned off. This should only be sent when automatic activity detection is enabled (which is - * the default). The client can reopen the stream by sending an audio message. - * @property video These form the realtime video input stream. - * @property text These form the realtime text input stream. - * @property activityStart Marks the start of user activity. This can only be sent if automatic - * (i.e. server-side) activity detection is disabled. - * @property activityEnd Marks the end of user activity. This can only be sent if automatic (i.e. - * server-side) activity detection is disabled. - */ -@PublicPreviewAPI -public class BidiGenerateContentRealtimeInput -private constructor( - public val audio: InlineDataPart?, - public val audioStreamEnd: Boolean?, - public val video: InlineDataPart?, - public val text: String?, - public val activityStart: ActivityStart?, - public val activityEnd: ActivityEnd?, -) { - - /** Builder for creating a [BidiGenerateContentRealtimeInput]. */ - public class Builder { - @JvmField public var audio: InlineDataPart? = null - @JvmField public var audioStreamEnd: Boolean? = null - @JvmField public var video: InlineDataPart? = null - @JvmField public var text: String? = null - @JvmField public var activityStart: ActivityStart? = null - @JvmField public var activityEnd: ActivityEnd? = null - - public fun setAudio(audio: InlineDataPart?): Builder = apply { this.audio = audio } - public fun setAudioStreamEnd(audioStreamEnd: Boolean?): Builder = apply { - this.audioStreamEnd = audioStreamEnd - } - public fun setVideo(video: InlineDataPart?): Builder = apply { this.video = video } - public fun setText(text: String?): Builder = apply { this.text = text } - public fun setActivityStart(activityStart: ActivityStart?): Builder = apply { - this.activityStart = activityStart - } - public fun setActivityEnd(activityEnd: ActivityEnd?): Builder = apply { - this.activityEnd = activityEnd - } - - public fun build(): BidiGenerateContentRealtimeInput = - BidiGenerateContentRealtimeInput( - audio, - audioStreamEnd, - video, - text, - activityStart, - activityEnd, - ) - } - - /** Marks the start of user activity. */ - @PublicPreviewAPI public object ActivityStart - - /** Marks the end of user activity. */ - @PublicPreviewAPI public object ActivityEnd - - @Serializable - internal data class Internal( - val audio: InlineDataPart.Internal.InlineData? = null, - val audioStreamEnd: Boolean? = null, - val video: InlineDataPart.Internal.InlineData? = null, - val text: String? = null, - val activityStart: JsonObject? = null, - val activityEnd: JsonObject? = null, - ) - - internal fun toInternal(): Internal { - return Internal( - audio = audio?.let { (it.toInternal() as InlineDataPart.Internal).inlineData }, - audioStreamEnd = audioStreamEnd, - video = video?.let { (it.toInternal() as InlineDataPart.Internal).inlineData }, - text = text, - activityStart = if (activityStart != null) JsonObject(emptyMap()) else null, - activityEnd = if (activityEnd != null) JsonObject(emptyMap()) else null, - ) - } - - public companion object { - /** Returns a new [Builder] for constructing a [BidiGenerateContentRealtimeInput]. */ - @JvmStatic public fun builder(): Builder = Builder() - } -} - -/** - * DSL for building a [BidiGenerateContentRealtimeInput]. - * - * Example: - * ``` - * bidiGenerateContentRealtimeInput { - * text = "Hello" - * } - * ``` - */ -@PublicPreviewAPI -public fun bidiGenerateContentRealtimeInput( - init: BidiGenerateContentRealtimeInput.Builder.() -> Unit -): BidiGenerateContentRealtimeInput { - val builder = BidiGenerateContentRealtimeInput.builder() - builder.init() - return builder.build() -} From 4055023bb16ce7d3ba85712b92b471322bee66ff Mon Sep 17 00:00:00 2001 From: Rodrigo Lazo Paz Date: Wed, 15 Oct 2025 12:52:08 -0400 Subject: [PATCH 05/18] Fix format in LiveSession --- .../kotlin/com/google/firebase/ai/type/LiveSession.kt | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt index f661928c4c5..e5192207e28 100644 --- a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt +++ b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt @@ -265,7 +265,10 @@ internal constructor( * @param audio The audio data to send. */ public suspend fun sendAudioRealtime(audio: InlineDataPart) { - val msg = BidiGenerateContentRealtimeInputSetup(audio = MediaData(audio.inlineData, mimeType = audio.mimeType).toInternal()) + val msg = + BidiGenerateContentRealtimeInputSetup( + audio = MediaData(audio.inlineData, mimeType = audio.mimeType).toInternal() + ) FirebaseAIException.catchAsync { val jsonString = Json.encodeToString(msg.toInternal()) session.send(Frame.Text(jsonString)) @@ -281,7 +284,10 @@ internal constructor( * @param video The video data to send. Video MIME type could be either video or image. */ public suspend fun sendVideoRealtime(video: InlineDataPart) { - val msg = BidiGenerateContentRealtimeInputSetup(video = MediaData(video.inlineData, mimeType = video.mimeType).toInternal()) + val msg = + BidiGenerateContentRealtimeInputSetup( + video = MediaData(video.inlineData, mimeType = video.mimeType).toInternal() + ) FirebaseAIException.catchAsync { val jsonString = Json.encodeToString(msg.toInternal()) session.send(Frame.Text(jsonString)) From acb35cec90ef011f11358d931e3bb0c7da845643 Mon Sep 17 00:00:00 2001 From: Rodrigo Lazo Paz Date: Wed, 15 Oct 2025 12:54:45 -0400 Subject: [PATCH 06/18] Remove log entries and temp vars --- .../google/firebase/ai/type/LiveSession.kt | 34 +++++++++---------- 1 file changed, 16 insertions(+), 18 deletions(-) diff --git a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt index e5192207e28..0b93bc8737b 100644 --- a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt +++ b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt @@ -265,16 +265,16 @@ internal constructor( * @param audio The audio data to send. */ public suspend fun sendAudioRealtime(audio: InlineDataPart) { - val msg = - BidiGenerateContentRealtimeInputSetup( - audio = MediaData(audio.inlineData, mimeType = audio.mimeType).toInternal() - ) FirebaseAIException.catchAsync { - val jsonString = Json.encodeToString(msg.toInternal()) + val jsonString = + Json.encodeToString( + BidiGenerateContentRealtimeInputSetup( + audio = MediaData(audio.inlineData, mimeType = audio.mimeType).toInternal() + ) + .toInternal() + ) session.send(Frame.Text(jsonString)) - Log.d(TAG, "sendAudioRealtime sent audio data size: ${jsonString.length}") } - Log.d(TAG, "finish sending audio data") } /** @@ -284,16 +284,16 @@ internal constructor( * @param video The video data to send. Video MIME type could be either video or image. */ public suspend fun sendVideoRealtime(video: InlineDataPart) { - val msg = - BidiGenerateContentRealtimeInputSetup( - video = MediaData(video.inlineData, mimeType = video.mimeType).toInternal() - ) FirebaseAIException.catchAsync { - val jsonString = Json.encodeToString(msg.toInternal()) + val jsonString = + Json.encodeToString( + BidiGenerateContentRealtimeInputSetup( + video = MediaData(video.inlineData, mimeType = video.mimeType).toInternal() + ) + .toInternal() + ) session.send(Frame.Text(jsonString)) - Log.d(TAG, "sendVideoRealtime sent video data size: ${jsonString.length}") } - Log.d(TAG, "finish sending video data") } /** @@ -303,13 +303,11 @@ internal constructor( * @param text The text data to send. */ public suspend fun sendTextRealtime(text: String) { - val msg = BidiGenerateContentRealtimeInputSetup(text = text) FirebaseAIException.catchAsync { - val jsonString = Json.encodeToString(msg.toInternal()) + val jsonString = + Json.encodeToString(BidiGenerateContentRealtimeInputSetup(text = text).toInternal()) session.send(Frame.Text(jsonString)) - Log.d(TAG, "sendTextRealtime sent text data size: ${jsonString.length}") } - Log.d(TAG, "finish sending text data") } /** From d3f543de12fd984ebed68077b9856c569b98f739 Mon Sep 17 00:00:00 2001 From: Rodrigo Lazo Paz Date: Wed, 15 Oct 2025 18:44:47 -0400 Subject: [PATCH 07/18] Expose inline data --- .../kotlin/com/google/firebase/ai/type/Part.kt | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/Part.kt b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/Part.kt index 4312fd5bdbd..8aa0388e9a9 100644 --- a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/Part.kt +++ b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/Part.kt @@ -19,6 +19,7 @@ package com.google.firebase.ai.type import android.graphics.Bitmap import android.graphics.BitmapFactory import android.util.Log +import com.google.firebase.ai.type.ImagenImageFormat.Internal import java.io.ByteArrayOutputStream import kotlinx.serialization.DeserializationStrategy import kotlinx.serialization.SerialName @@ -161,14 +162,17 @@ internal constructor( @Serializable internal data class Internal( - @SerialName("inlineData") val inlineData: InlineData, + @SerialName("inlineData") val inlineData: InlineData.Internal, val thought: Boolean? = null, val thoughtSignature: String? = null - ) : InternalPart { + ) : InternalPart +} - @Serializable - internal data class InlineData(@SerialName("mimeType") val mimeType: String, val data: Base64) - } +public class InlineData(public val data: ByteArray, public val mimeType: String) { + @Serializable internal data class Internal(val mimeType: String, val data: Base64) + + internal fun toInternal() = + Internal(mimeType, android.util.Base64.encodeToString(data, BASE_64_FLAGS)) } /** Represents function call name and params received from requests. */ From e5bd5813b45ea139632d14a2c2cf35f738aebc1d Mon Sep 17 00:00:00 2001 From: Rodrigo Lazo Paz Date: Thu, 16 Oct 2025 01:00:00 -0400 Subject: [PATCH 08/18] Use the new public InlineData when creating InlineDataPart --- .../src/main/kotlin/com/google/firebase/ai/type/Part.kt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/Part.kt b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/Part.kt index 8aa0388e9a9..c0f8242118a 100644 --- a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/Part.kt +++ b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/Part.kt @@ -338,13 +338,13 @@ internal fun Part.toInternal(): InternalPart { is TextPart -> TextPart.Internal(text, isThought, thoughtSignature) is ImagePart -> InlineDataPart.Internal( - InlineDataPart.Internal.InlineData("image/jpeg", encodeBitmapToBase64Jpeg(image)), + InlineData.Internal("image/jpeg", encodeBitmapToBase64Jpeg(image)), isThought, thoughtSignature ) is InlineDataPart -> InlineDataPart.Internal( - InlineDataPart.Internal.InlineData( + InlineData.Internal( mimeType, android.util.Base64.encodeToString(inlineData, BASE_64_FLAGS) ), From 5c7a31fac32568b5d59a9dcc5f7bd3aebd641360 Mon Sep 17 00:00:00 2001 From: Rodrigo Lazo Paz Date: Thu, 16 Oct 2025 01:09:30 -0400 Subject: [PATCH 09/18] Finish migrating to InlineData --- .../google/firebase/ai/type/LiveSession.kt | 46 +++++++++---------- .../com/google/firebase/ai/type/MediaData.kt | 1 + 2 files changed, 24 insertions(+), 23 deletions(-) diff --git a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt index 0b93bc8737b..62a61acee63 100644 --- a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt +++ b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt @@ -264,15 +264,10 @@ internal constructor( * input usage. * @param audio The audio data to send. */ - public suspend fun sendAudioRealtime(audio: InlineDataPart) { + public suspend fun sendAudioRealtime(audio: InlineData) { FirebaseAIException.catchAsync { val jsonString = - Json.encodeToString( - BidiGenerateContentRealtimeInputSetup( - audio = MediaData(audio.inlineData, mimeType = audio.mimeType).toInternal() - ) - .toInternal() - ) + Json.encodeToString(BidiGenerateContentRealtimeInputSetup(audio = audio).toInternal()) session.send(Frame.Text(jsonString)) } } @@ -283,15 +278,10 @@ internal constructor( * input usage. * @param video The video data to send. Video MIME type could be either video or image. */ - public suspend fun sendVideoRealtime(video: InlineDataPart) { + public suspend fun sendVideoRealtime(video: InlineData) { FirebaseAIException.catchAsync { val jsonString = - Json.encodeToString( - BidiGenerateContentRealtimeInputSetup( - video = MediaData(video.inlineData, mimeType = video.mimeType).toInternal() - ) - .toInternal() - ) + Json.encodeToString(BidiGenerateContentRealtimeInputSetup(video = video).toInternal()) session.send(Frame.Text(jsonString)) } } @@ -324,7 +314,10 @@ internal constructor( FirebaseAIException.catchAsync { val jsonString = Json.encodeToString( - BidiGenerateContentRealtimeInputSetup(mediaChunks.map { (it.toInternal()) }).toInternal() + BidiGenerateContentRealtimeInputSetup( + mediaChunks.map { InlineData(it.data, it.mimeType) } + ) + .toInternal() ) Log.d(TAG, jsonString) session.send(Frame.Text(jsonString)) @@ -382,7 +375,7 @@ internal constructor( ?.listenToRecording() ?.buffer(UNLIMITED) ?.accumulateUntil(MIN_BUFFER_SIZE) - ?.onEach { sendAudioRealtime(InlineDataPart(it, "audio/pcm")) } + ?.onEach { sendAudioRealtime(InlineData(it, "audio/pcm")) } ?.catch { throw FirebaseAIException.from(it) } ?.launchIn(scope) } @@ -523,23 +516,30 @@ internal constructor( * End of turn is derived from user activity (eg; end of speech). */ internal class BidiGenerateContentRealtimeInputSetup( - val mediaChunks: List? = null, - val audio: MediaData.Internal? = null, - val video: MediaData.Internal? = null, + val mediaChunks: List? = null, + val audio: InlineData? = null, + val video: InlineData? = null, val text: String? = null ) { @Serializable internal class Internal(val realtimeInput: BidiGenerateContentRealtimeInput) { @Serializable internal data class BidiGenerateContentRealtimeInput( - val mediaChunks: List?, - val audio: MediaData.Internal?, - val video: MediaData.Internal?, + val mediaChunks: List?, + val audio: InlineData.Internal?, + val video: InlineData.Internal?, val text: String? ) } fun toInternal() = - Internal(Internal.BidiGenerateContentRealtimeInput(mediaChunks, audio, video, text)) + Internal( + Internal.BidiGenerateContentRealtimeInput( + mediaChunks?.map { it.toInternal() }, + audio?.toInternal(), + video?.toInternal(), + text + ) + ) } private companion object { diff --git a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/MediaData.kt b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/MediaData.kt index 1262027989d..7647c687934 100644 --- a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/MediaData.kt +++ b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/MediaData.kt @@ -27,6 +27,7 @@ import kotlinx.serialization.Serializable * [Firebase documentation](https://firebase.google.com/docs/vertex-ai/input-file-requirements). */ @PublicPreviewAPI +@Deprecated("Use InlineData instead", ReplaceWith("InlineData")) public class MediaData(public val data: ByteArray, public val mimeType: String) { @Serializable internal class Internal( From d14639496fd36226e66576eaf6627dc5a84e5360 Mon Sep 17 00:00:00 2001 From: Rodrigo Lazo Paz Date: Thu, 16 Oct 2025 01:15:35 -0400 Subject: [PATCH 10/18] Added java implementation --- .../firebase/ai/java/LiveSessionFutures.kt | 36 +++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/firebase-ai/src/main/kotlin/com/google/firebase/ai/java/LiveSessionFutures.kt b/firebase-ai/src/main/kotlin/com/google/firebase/ai/java/LiveSessionFutures.kt index a9615ac2afb..36bb685124a 100644 --- a/firebase-ai/src/main/kotlin/com/google/firebase/ai/java/LiveSessionFutures.kt +++ b/firebase-ai/src/main/kotlin/com/google/firebase/ai/java/LiveSessionFutures.kt @@ -23,6 +23,7 @@ import com.google.common.util.concurrent.ListenableFuture import com.google.firebase.ai.type.Content import com.google.firebase.ai.type.FunctionCallPart import com.google.firebase.ai.type.FunctionResponsePart +import com.google.firebase.ai.type.InlineData import com.google.firebase.ai.type.LiveServerMessage import com.google.firebase.ai.type.LiveSession import com.google.firebase.ai.type.MediaData @@ -126,6 +127,30 @@ public abstract class LiveSessionFutures internal constructor() { functionList: List ): ListenableFuture + /** + * Sends audio data to the server in realtime. Check + * https://ai.google.dev/api/live#bidigeneratecontentrealtimeinput for details about the realtime + * input usage. + * @param audio The audio data to send. + */ + public abstract fun sendAudioRealtime(audio: InlineData): ListenableFuture + + /** + * Sends video data to the server in realtime. Check + * https://ai.google.dev/api/live#bidigeneratecontentrealtimeinput for details about the realtime + * input usage. + * @param video The video data to send. Video MIME type could be either video or image. + */ + public abstract fun sendVideoRealtime(video: InlineData) : ListenableFuture + + /** + * Sends text data to the server in realtime. Check + * https://ai.google.dev/api/live#bidigeneratecontentrealtimeinput for details about the realtime + * input usage. + * @param text The text data to send. + */ + public abstract fun sendTextRealtime(text: String): ListenableFuture + /** * Streams client data to the model. * @@ -133,6 +158,7 @@ public abstract class LiveSessionFutures internal constructor() { * * @param mediaChunks The list of [MediaData] instances representing the media data to be sent. */ + @Deprecated("Use sendAudioRealtime, sendVideoRealtime, or sendTextRealtime instead") public abstract fun sendMediaStream(mediaChunks: List): ListenableFuture /** @@ -190,6 +216,16 @@ public abstract class LiveSessionFutures internal constructor() { override fun sendFunctionResponse(functionList: List) = SuspendToFutureAdapter.launchFuture { session.sendFunctionResponse(functionList) } + override fun sendAudioRealtime(audio: InlineData): ListenableFuture = + SuspendToFutureAdapter.launchFuture { session.sendAudioRealtime(audio) } + + + override fun sendVideoRealtime(video: InlineData): ListenableFuture = + SuspendToFutureAdapter.launchFuture { session.sendVideoRealtime(video) } + + override fun sendTextRealtime(text: String): ListenableFuture = + SuspendToFutureAdapter.launchFuture { session.sendTextRealtime(text) } + override fun sendMediaStream(mediaChunks: List) = SuspendToFutureAdapter.launchFuture { session.sendMediaStream(mediaChunks) } From d4f182213912953e336382703d1d43a4198c6c92 Mon Sep 17 00:00:00 2001 From: Rodrigo Lazo Paz Date: Thu, 16 Oct 2025 01:18:24 -0400 Subject: [PATCH 11/18] Java test data --- .../java/com/google/firebase/ai/JavaCompileTests.java | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/firebase-ai/src/testUtil/java/com/google/firebase/ai/JavaCompileTests.java b/firebase-ai/src/testUtil/java/com/google/firebase/ai/JavaCompileTests.java index fd2fb81687b..ef18dd94ae7 100644 --- a/firebase-ai/src/testUtil/java/com/google/firebase/ai/JavaCompileTests.java +++ b/firebase-ai/src/testUtil/java/com/google/firebase/ai/JavaCompileTests.java @@ -51,6 +51,7 @@ import com.google.firebase.ai.type.ImagenEditingConfig; import com.google.firebase.ai.type.ImagenInlineImage; import com.google.firebase.ai.type.ImagenMaskReference; +import com.google.firebase.ai.type.InlineData; import com.google.firebase.ai.type.InlineDataPart; import com.google.firebase.ai.type.LiveGenerationConfig; import com.google.firebase.ai.type.LiveServerContent; @@ -365,6 +366,9 @@ public void onComplete() { byte[] bytes = new byte[] {(byte) 0xCA, (byte) 0xFE, (byte) 0xBA, (byte) 0xBE}; session.sendMediaStream(List.of(new MediaData(bytes, "image/jxl"))); + session.sendAudioRealtime(new InlineData(bytes, "audio/jxl")); + session.sendVideoRealtime(new InlineData(bytes, "image/jxl")); + session.sendTextRealtime("text"); FunctionResponsePart functionResponse = new FunctionResponsePart("myFunction", new JsonObject(Map.of())); From 6b6ff209d70c40b80acf966f79bab02797719a1e Mon Sep 17 00:00:00 2001 From: Rodrigo Lazo Paz Date: Thu, 16 Oct 2025 01:31:21 -0400 Subject: [PATCH 12/18] Address code review feedback --- .../main/kotlin/com/google/firebase/ai/type/LiveSession.kt | 4 ---- 1 file changed, 4 deletions(-) diff --git a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt index 62a61acee63..a1d0ae5d2d5 100644 --- a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt +++ b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt @@ -196,8 +196,6 @@ internal constructor( response .getOrNull() ?.let { - var x = it.readBytes().toString(Charsets.UTF_8) - Log.d(TAG, x) JSON.decodeFromString( it.readBytes().toString(Charsets.UTF_8) ) @@ -319,7 +317,6 @@ internal constructor( ) .toInternal() ) - Log.d(TAG, jsonString) session.send(Frame.Text(jsonString)) } } @@ -369,7 +366,6 @@ internal constructor( /** Listen to the user's microphone and send the data to the model. */ private fun recordUserAudio() { - // ?.onEach { sendAudioRealtime(InlineDataPart (it, "audio/pcm")) } // Buffer the recording so we can keep recording while data is sent to the server audioHelper ?.listenToRecording() From 1c2a669ce3c93c5d19e5730ff30f2eae0e758ffa Mon Sep 17 00:00:00 2001 From: Rodrigo Lazo Paz Date: Thu, 16 Oct 2025 01:36:39 -0400 Subject: [PATCH 13/18] Better kdoc --- .../google/firebase/ai/type/LiveSession.kt | 28 +++++++++++-------- 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt index a1d0ae5d2d5..0e7c136a5b4 100644 --- a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt +++ b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt @@ -257,10 +257,14 @@ internal constructor( } /** - * Sends audio data to the server in realtime. Check - * https://ai.google.dev/api/live#bidigeneratecontentrealtimeinput for details about the realtime - * input usage. - * @param audio The audio data to send. + * Sends an audio input stream to the model, using the realtime API. + * + * To learn more about audio formats, and the required state they should be provided in, see the + * docs on + * [Supported audio formats](https://cloud.google.com/vertex-ai/generative-ai/docs/live-api#supported-audio-formats). + * + * @param audio Raw audio data used to update the model on the client's conversation. For best + * results, send 16-bit PCM audio at 24kHz. */ public suspend fun sendAudioRealtime(audio: InlineData) { FirebaseAIException.catchAsync { @@ -271,10 +275,11 @@ internal constructor( } /** - * Sends video data to the server in realtime. Check - * https://ai.google.dev/api/live#bidigeneratecontentrealtimeinput for details about the realtime - * input usage. - * @param video The video data to send. Video MIME type could be either video or image. + * Sends a video input stream to the model, using the realtime API. + * + * @param video Encoded video data, used to update the model on the client's conversation. The + * MIME type can be a video format (e.g., `video/webm`) or an image format (e.g., + * `image/jpeg`). */ public suspend fun sendVideoRealtime(video: InlineData) { FirebaseAIException.catchAsync { @@ -285,10 +290,9 @@ internal constructor( } /** - * Sends text data to the server in realtime. Check - * https://ai.google.dev/api/live#bidigeneratecontentrealtimeinput for details about the realtime - * input usage. - * @param text The text data to send. + * Sends a text input stream to the model, using the realtime API. + * + * @param text Text content to append to the current client's conversation. */ public suspend fun sendTextRealtime(text: String) { FirebaseAIException.catchAsync { From 90320ea23449d890298eee987ab409487e6c90f1 Mon Sep 17 00:00:00 2001 From: Rodrigo Lazo Paz Date: Thu, 16 Oct 2025 01:40:24 -0400 Subject: [PATCH 14/18] Add changelog entry --- firebase-ai/CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/firebase-ai/CHANGELOG.md b/firebase-ai/CHANGELOG.md index bc356414fb7..053febea25a 100644 --- a/firebase-ai/CHANGELOG.md +++ b/firebase-ai/CHANGELOG.md @@ -1,5 +1,6 @@ # Unreleased +- [feature] Added support for sending realtime audio and video in a `LiveSession`. - [changed] Removed redundant internal exception types. (#7475) # 17.4.0 From 27a97580eb24377968b145783d2686c55d06d4eb Mon Sep 17 00:00:00 2001 From: Rodrigo Lazo Paz Date: Thu, 16 Oct 2025 01:52:20 -0400 Subject: [PATCH 15/18] format fixes --- .../kotlin/com/google/firebase/ai/java/LiveSessionFutures.kt | 3 +-- .../main/kotlin/com/google/firebase/ai/type/LiveSession.kt | 5 ++--- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/firebase-ai/src/main/kotlin/com/google/firebase/ai/java/LiveSessionFutures.kt b/firebase-ai/src/main/kotlin/com/google/firebase/ai/java/LiveSessionFutures.kt index 36bb685124a..2fb74689643 100644 --- a/firebase-ai/src/main/kotlin/com/google/firebase/ai/java/LiveSessionFutures.kt +++ b/firebase-ai/src/main/kotlin/com/google/firebase/ai/java/LiveSessionFutures.kt @@ -141,7 +141,7 @@ public abstract class LiveSessionFutures internal constructor() { * input usage. * @param video The video data to send. Video MIME type could be either video or image. */ - public abstract fun sendVideoRealtime(video: InlineData) : ListenableFuture + public abstract fun sendVideoRealtime(video: InlineData): ListenableFuture /** * Sends text data to the server in realtime. Check @@ -219,7 +219,6 @@ public abstract class LiveSessionFutures internal constructor() { override fun sendAudioRealtime(audio: InlineData): ListenableFuture = SuspendToFutureAdapter.launchFuture { session.sendAudioRealtime(audio) } - override fun sendVideoRealtime(video: InlineData): ListenableFuture = SuspendToFutureAdapter.launchFuture { session.sendVideoRealtime(video) } diff --git a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt index 0e7c136a5b4..0e6796ab01b 100644 --- a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt +++ b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt @@ -261,7 +261,7 @@ internal constructor( * * To learn more about audio formats, and the required state they should be provided in, see the * docs on - * [Supported audio formats](https://cloud.google.com/vertex-ai/generative-ai/docs/live-api#supported-audio-formats). + * [Supported audio formats](https://cloud.google.com/vertex-ai/generative-ai/docs/live-api#supported-audio-formats) * * @param audio Raw audio data used to update the model on the client's conversation. For best * results, send 16-bit PCM audio at 24kHz. @@ -278,8 +278,7 @@ internal constructor( * Sends a video input stream to the model, using the realtime API. * * @param video Encoded video data, used to update the model on the client's conversation. The - * MIME type can be a video format (e.g., `video/webm`) or an image format (e.g., - * `image/jpeg`). + * MIME type can be a video format (e.g., `video/webm`) or an image format (e.g., `image/jpeg`). */ public suspend fun sendVideoRealtime(video: InlineData) { FirebaseAIException.catchAsync { From d90009851033c34c1723b6880f540884e2c6fb59 Mon Sep 17 00:00:00 2001 From: Rodrigo Lazo Paz Date: Thu, 16 Oct 2025 12:00:43 -0400 Subject: [PATCH 16/18] Update api.txt --- firebase-ai/api.txt | 30 ++++++++++++++++++++++-------- 1 file changed, 22 insertions(+), 8 deletions(-) diff --git a/firebase-ai/api.txt b/firebase-ai/api.txt index f8df1f045bc..ba27e5682d8 100644 --- a/firebase-ai/api.txt +++ b/firebase-ai/api.txt @@ -145,8 +145,11 @@ package com.google.firebase.ai.java { method public abstract org.reactivestreams.Publisher receive(); method public abstract com.google.common.util.concurrent.ListenableFuture send(com.google.firebase.ai.type.Content content); method public abstract com.google.common.util.concurrent.ListenableFuture send(String text); + method public abstract com.google.common.util.concurrent.ListenableFuture sendAudioRealtime(com.google.firebase.ai.type.InlineData audio); method public abstract com.google.common.util.concurrent.ListenableFuture sendFunctionResponse(java.util.List functionList); - method public abstract com.google.common.util.concurrent.ListenableFuture sendMediaStream(java.util.List mediaChunks); + method @Deprecated public abstract com.google.common.util.concurrent.ListenableFuture sendMediaStream(java.util.List mediaChunks); + method public abstract com.google.common.util.concurrent.ListenableFuture sendTextRealtime(String text); + method public abstract com.google.common.util.concurrent.ListenableFuture sendVideoRealtime(com.google.firebase.ai.type.InlineData video); method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture startAudioConversation(); method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture startAudioConversation(boolean enableInterruptions); method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture startAudioConversation(kotlin.jvm.functions.Function1? functionCallHandler); @@ -801,6 +804,14 @@ package com.google.firebase.ai.type { public static final class ImagenSubjectReferenceType.Companion { } + public final class InlineData { + ctor public InlineData(byte[] data, String mimeType); + method public byte[] getData(); + method public String getMimeType(); + property public final byte[] data; + property public final String mimeType; + } + public final class InlineDataPart implements com.google.firebase.ai.type.Part { ctor public InlineDataPart(byte[] inlineData, String mimeType); method public byte[] getInlineData(); @@ -891,20 +902,23 @@ package com.google.firebase.ai.type { method public kotlinx.coroutines.flow.Flow receive(); method public suspend Object? send(com.google.firebase.ai.type.Content content, kotlin.coroutines.Continuation); method public suspend Object? send(String text, kotlin.coroutines.Continuation); + method public suspend Object? sendAudioRealtime(com.google.firebase.ai.type.InlineData audio, kotlin.coroutines.Continuation); method public suspend Object? sendFunctionResponse(java.util.List functionList, kotlin.coroutines.Continuation); - method public suspend Object? sendMediaStream(java.util.List mediaChunks, kotlin.coroutines.Continuation); + method @Deprecated public suspend Object? sendMediaStream(java.util.List mediaChunks, kotlin.coroutines.Continuation); + method public suspend Object? sendTextRealtime(String text, kotlin.coroutines.Continuation); + method public suspend Object? sendVideoRealtime(com.google.firebase.ai.type.InlineData video, kotlin.coroutines.Continuation); method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public suspend Object? startAudioConversation(kotlin.jvm.functions.Function1? functionCallHandler = null, boolean enableInterruptions = false, kotlin.coroutines.Continuation); method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public suspend Object? startAudioConversation(kotlin.jvm.functions.Function1? functionCallHandler = null, kotlin.coroutines.Continuation); method public void stopAudioConversation(); method public void stopReceiving(); } - @com.google.firebase.ai.type.PublicPreviewAPI public final class MediaData { - ctor public MediaData(byte[] data, String mimeType); - method public byte[] getData(); - method public String getMimeType(); - property public final byte[] data; - property public final String mimeType; + @Deprecated @com.google.firebase.ai.type.PublicPreviewAPI public final class MediaData { + ctor @Deprecated public MediaData(byte[] data, String mimeType); + method @Deprecated public byte[] getData(); + method @Deprecated public String getMimeType(); + property @Deprecated public final byte[] data; + property @Deprecated public final String mimeType; } public final class ModalityTokenCount { From 14a40120c9d0957e5e41d463bf1c20c6777901bb Mon Sep 17 00:00:00 2001 From: Rodrigo Lazo Paz Date: Thu, 16 Oct 2025 12:21:28 -0400 Subject: [PATCH 17/18] Add documentation for `InlineData` class --- .../src/main/kotlin/com/google/firebase/ai/type/Part.kt | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/Part.kt b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/Part.kt index c0f8242118a..a231680daea 100644 --- a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/Part.kt +++ b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/Part.kt @@ -168,6 +168,12 @@ internal constructor( ) : InternalPart } + +/** + * Represents binary data with an associated MIME type. + * @property data the binary data as a [ByteArray] + * @property mimeType an IANA standard MIME type. + */ public class InlineData(public val data: ByteArray, public val mimeType: String) { @Serializable internal data class Internal(val mimeType: String, val data: Base64) From 05e193903d326ef73753b8dfcb4cdb67d7a0a22a Mon Sep 17 00:00:00 2001 From: Rodrigo Lazo Paz Date: Thu, 16 Oct 2025 12:47:17 -0400 Subject: [PATCH 18/18] Fix format --- firebase-ai/src/main/kotlin/com/google/firebase/ai/type/Part.kt | 1 - 1 file changed, 1 deletion(-) diff --git a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/Part.kt b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/Part.kt index a231680daea..d232c222b10 100644 --- a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/Part.kt +++ b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/Part.kt @@ -168,7 +168,6 @@ internal constructor( ) : InternalPart } - /** * Represents binary data with an associated MIME type. * @property data the binary data as a [ByteArray]