Skip to content

Commit 7e53168

Browse files
authored
Merge b7fa201 into cb9e9c5
2 parents cb9e9c5 + b7fa201 commit 7e53168

File tree

2 files changed

+291
-0
lines changed

2 files changed

+291
-0
lines changed

firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616

1717
package com.google.firebase.ai.type
1818

19+
import android.Manifest
1920
import android.Manifest.permission.RECORD_AUDIO
2021
import android.content.pm.PackageManager
2122
import android.media.AudioFormat
@@ -62,6 +63,7 @@ internal constructor(
6263
private val session: DefaultClientWebSocketSession,
6364
@Blocking private val blockingDispatcher: CoroutineContext,
6465
private var audioHelper: AudioHelper? = null,
66+
private var videoHelper: VideoHelper? = null,
6567
private val firebaseApp: FirebaseApp,
6668
) {
6769
/**
@@ -162,6 +164,66 @@ internal constructor(
162164

163165
audioHelper?.release()
164166
audioHelper = null
167+
videoHelper?.release()
168+
videoHelper = null
169+
}
170+
}
171+
172+
/**
173+
* Stops the video conversation with the model.
174+
*
175+
* This only needs to be called after a previous call to [startVideoConversation].
176+
*
177+
* If there is no video conversation currently active, this function does nothing.
178+
*/
179+
public fun stopVideoConversation() {
180+
FirebaseAIException.catch {
181+
if (!startedReceiving.getAndSet(false)) return@catch
182+
183+
scope.cancel()
184+
185+
videoHelper?.release()
186+
videoHelper = null
187+
}
188+
}
189+
190+
/**
191+
* Starts a video conversation with the model, which can only be stopped using
192+
* [stopVideoConversation] or [close].
193+
*
194+
* @param cameraId The ID of the camera to use for the video stream.
195+
*/
196+
@RequiresPermission(Manifest.permission.CAMERA)
197+
public suspend fun startVideoConversation(cameraId: String) {
198+
val context = firebaseApp.applicationContext
199+
if (
200+
ContextCompat.checkSelfPermission(context, Manifest.permission.CAMERA) !=
201+
PackageManager.PERMISSION_GRANTED
202+
) {
203+
throw PermissionMissingException("Camera access not provided by the user")
204+
}
205+
206+
FirebaseAIException.catchAsync {
207+
if (scope.isActive) {
208+
Log.w(
209+
TAG,
210+
"startVideoConversation called after a conversation has already started. " +
211+
"Call stopVideoConversation to close the previous connection."
212+
)
213+
return@catchAsync
214+
}
215+
216+
scope = CoroutineScope(blockingDispatcher + childJob())
217+
val cameraManager =
218+
context.getSystemService(android.content.Context.CAMERA_SERVICE)
219+
as android.hardware.camera2.CameraManager
220+
videoHelper = VideoHelper.build(cameraManager)
221+
videoHelper
222+
?.start(cameraId)
223+
?.buffer(UNLIMITED)
224+
?.onEach { sendMediaStream(listOf(MediaData(it, "image/jpeg"))) }
225+
?.catch { throw FirebaseAIException.from(it) }
226+
?.launchIn(scope)
165227
}
166228
}
167229

@@ -171,6 +233,9 @@ internal constructor(
171233
/** Indicates whether an audio conversation is being used for this session object. */
172234
public fun isAudioConversationActive(): Boolean = (audioHelper != null)
173235

236+
/** Indicates whether a video conversation is being used for this session object. */
237+
public fun isVideoConversationActive(): Boolean = (videoHelper != null)
238+
174239
/**
175240
* Receives responses from the model for both streaming and standard requests.
176241
*
@@ -314,6 +379,7 @@ internal constructor(
314379
FirebaseAIException.catchAsync {
315380
session.close()
316381
stopAudioConversation()
382+
stopVideoConversation()
317383
}
318384
}
319385

Lines changed: 225 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,225 @@
1+
/*
2+
* Copyright 2025 Google LLC
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package com.google.firebase.ai.type
18+
19+
import android.Manifest
20+
import android.graphics.Bitmap
21+
import android.graphics.BitmapFactory
22+
import android.graphics.ImageFormat
23+
import android.hardware.camera2.CameraCaptureSession
24+
import android.hardware.camera2.CameraDevice
25+
import android.hardware.camera2.CameraManager
26+
import android.media.ImageReader
27+
import android.os.Handler
28+
import android.os.Looper
29+
import androidx.annotation.RequiresPermission
30+
import java.io.ByteArrayOutputStream
31+
import kotlin.coroutines.resume
32+
import kotlin.coroutines.resumeWithException
33+
import kotlin.math.max
34+
import kotlinx.coroutines.CoroutineScope
35+
import kotlinx.coroutines.Dispatchers
36+
import kotlinx.coroutines.cancel
37+
import kotlinx.coroutines.channels.awaitClose
38+
import kotlinx.coroutines.flow.Flow
39+
import kotlinx.coroutines.flow.callbackFlow
40+
import kotlinx.coroutines.flow.emptyFlow
41+
import kotlinx.coroutines.launch
42+
import kotlinx.coroutines.suspendCancellableCoroutine
43+
44+
/**
45+
* Helper class for streaming video from the camera.
46+
*
47+
* @see VideoHelper.build
48+
* @see LiveSession.startVideoConversation
49+
*/
50+
@PublicPreviewAPI
51+
internal class VideoHelper(
52+
private val cameraManager: CameraManager,
53+
) {
54+
private var cameraDevice: CameraDevice? = null
55+
private var imageReader: ImageReader? = null
56+
private var session: CameraCaptureSession? = null
57+
private val scope = CoroutineScope(Dispatchers.Default)
58+
59+
private var released: Boolean = false
60+
61+
/**
62+
* Release the system resources on the camera.
63+
*
64+
* Once a [VideoHelper] has been "released", it can _not_ be used again.
65+
*
66+
* This method can safely be called multiple times, as it won't do anything if this instance has
67+
* already been released.
68+
*/
69+
fun release() {
70+
if (released) return
71+
released = true
72+
73+
session?.close()
74+
imageReader?.close()
75+
cameraDevice?.close()
76+
scope.cancel()
77+
}
78+
79+
/**
80+
* Start perpetually streaming the camera, and return the bytes read in a flow.
81+
*
82+
* Returns an empty flow if this [VideoHelper] has been [released][release].
83+
*/
84+
@RequiresPermission(Manifest.permission.CAMERA)
85+
suspend fun start(cameraId: String): Flow<ByteArray> {
86+
if (released) return emptyFlow()
87+
88+
cameraDevice = openCamera(cameraId)
89+
val cameraDevice = cameraDevice ?: return emptyFlow()
90+
91+
val characteristics = cameraManager.getCameraCharacteristics(cameraId)
92+
val streamConfigurationMap =
93+
characteristics.get(
94+
android.hardware.camera2.CameraCharacteristics.SCALER_STREAM_CONFIGURATION_MAP
95+
)
96+
val outputSizes = streamConfigurationMap?.getOutputSizes(ImageFormat.JPEG)
97+
val size = outputSizes?.maxByOrNull { it.width * it.height } ?: return emptyFlow()
98+
99+
imageReader = ImageReader.newInstance(size.width, size.height, ImageFormat.JPEG, 1)
100+
val imageReader = imageReader ?: return emptyFlow()
101+
102+
session = createCaptureSession(cameraDevice, imageReader)
103+
val session = session ?: return emptyFlow()
104+
105+
val captureRequest =
106+
session.device.createCaptureRequest(CameraDevice.TEMPLATE_PREVIEW).apply {
107+
addTarget(imageReader.surface)
108+
}
109+
session.setRepeatingRequest(captureRequest.build(), null, null)
110+
111+
return callbackFlow {
112+
val listener =
113+
ImageReader.OnImageAvailableListener { reader ->
114+
val image = reader.acquireLatestImage()
115+
if (image != null) {
116+
scope.launch {
117+
val buffer = image.planes[0].buffer
118+
val bytes = ByteArray(buffer.remaining())
119+
buffer.get(bytes)
120+
image.close()
121+
122+
val scaledBytes = scaleAndCompressImage(bytes)
123+
trySend(scaledBytes)
124+
}
125+
}
126+
}
127+
imageReader.setOnImageAvailableListener(listener, null)
128+
129+
awaitClose { imageReader.setOnImageAvailableListener(null, null) }
130+
}
131+
}
132+
133+
private fun scaleAndCompressImage(bytes: ByteArray): ByteArray {
134+
val options = BitmapFactory.Options().apply { inJustDecodeBounds = true }
135+
BitmapFactory.decodeByteArray(bytes, 0, bytes.size, options)
136+
137+
val width = options.outWidth
138+
val height = options.outHeight
139+
val largestDimension = max(width, height)
140+
141+
var inSampleSize = 1
142+
if (largestDimension > 2048) {
143+
val halfLargestDimension = largestDimension / 2
144+
while ((halfLargestDimension / inSampleSize) >= 2048) {
145+
inSampleSize *= 2
146+
}
147+
}
148+
149+
options.inSampleSize = inSampleSize
150+
options.inJustDecodeBounds = false
151+
var bitmap = BitmapFactory.decodeByteArray(bytes, 0, bytes.size, options)
152+
153+
val scaledWidth = bitmap.width
154+
val scaledHeight = bitmap.height
155+
val scaledLargestDimension = max(scaledWidth, scaledHeight)
156+
if (scaledLargestDimension > 2048) {
157+
val scaleFactor = 2048.0f / scaledLargestDimension
158+
val newWidth = (scaledWidth * scaleFactor).toInt()
159+
val newHeight = (scaledHeight * scaleFactor).toInt()
160+
bitmap = Bitmap.createScaledBitmap(bitmap, newWidth, newHeight, true)
161+
}
162+
163+
val outputStream = ByteArrayOutputStream()
164+
bitmap.compress(Bitmap.CompressFormat.JPEG, 80, outputStream)
165+
return outputStream.toByteArray()
166+
}
167+
168+
@RequiresPermission(Manifest.permission.CAMERA)
169+
private suspend fun openCamera(cameraId: String): CameraDevice =
170+
suspendCancellableCoroutine { cont ->
171+
val handler = Handler(Looper.getMainLooper())
172+
cameraManager.openCamera(
173+
cameraId,
174+
object : CameraDevice.StateCallback() {
175+
override fun onOpened(camera: CameraDevice) {
176+
cont.resume(camera)
177+
}
178+
179+
override fun onDisconnected(camera: CameraDevice) {
180+
camera.close()
181+
}
182+
183+
override fun onError(camera: CameraDevice, error: Int) {
184+
camera.close()
185+
cont.resumeWithException(RuntimeException("Failed to open camera. Error: $error"))
186+
}
187+
},
188+
handler
189+
)
190+
}
191+
192+
private suspend fun createCaptureSession(
193+
cameraDevice: CameraDevice,
194+
imageReader: ImageReader
195+
): CameraCaptureSession = suspendCancellableCoroutine { cont ->
196+
cameraDevice.createCaptureSession(
197+
listOf(imageReader.surface),
198+
object : CameraCaptureSession.StateCallback() {
199+
override fun onConfigured(session: CameraCaptureSession) {
200+
cont.resume(session)
201+
}
202+
203+
override fun onConfigureFailed(session: CameraCaptureSession) {
204+
cont.resumeWithException(RuntimeException("Failed to create capture session."))
205+
}
206+
},
207+
null
208+
)
209+
}
210+
211+
companion object {
212+
private val TAG = VideoHelper::class.java.simpleName
213+
214+
/**
215+
* Creates an instance of [VideoHelper] with the camera manager initialized.
216+
*
217+
* A separate build method is necessary so that we can properly propagate the required manifest
218+
* permission, and throw exceptions when needed.
219+
*/
220+
@RequiresPermission(Manifest.permission.CAMERA)
221+
fun build(cameraManager: CameraManager): VideoHelper {
222+
return VideoHelper(cameraManager)
223+
}
224+
}
225+
}

0 commit comments

Comments
 (0)