Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions FirebaseVertexAI/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,12 @@
# Unreleased
- [added] **Public Preview**: Added support for specifying response modalities
in `GenerationConfig`. This includes **public experimental** support for image
generation using Gemini 2.0 Flash (`gemini-2.0-flash-exp`). (#14658)
<br /><br />
Note: This feature is in Public Preview and relies on experimental models,
which means that it is not subject to any SLA or deprecation policy and could
change in backwards-incompatible ways.

# 11.11.0
- [added] Emits a warning when attempting to use an incompatible model with
`GenerativeModel` or `ImagenModel`. (#14610)
Expand Down
16 changes: 15 additions & 1 deletion FirebaseVertexAI/Sources/GenerationConfig.swift
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,9 @@ public struct GenerationConfig: Sendable {
/// Output schema of the generated candidate text.
let responseSchema: Schema?

/// Supported modalities of the response.
let responseModalities: [ResponseModality]?

/// Creates a new `GenerationConfig` value.
///
/// See the
Expand Down Expand Up @@ -140,11 +143,20 @@ public struct GenerationConfig: Sendable {
/// [Generate structured
/// output](https://firebase.google.com/docs/vertex-ai/structured-output?platform=ios) guide
/// for more details.
/// - responseModalities: The data types (modalities) that may be returned in model responses.
///
/// See the [multimodal
/// responses](https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal-response-generation)
/// documentation for more details.
///
/// > Warning: Specifying response modalities is a **Public Preview** feature, which means
/// > that it is not subject to any SLA or deprecation policy and could change in
/// > backwards-incompatible ways.
public init(temperature: Float? = nil, topP: Float? = nil, topK: Int? = nil,
candidateCount: Int? = nil, maxOutputTokens: Int? = nil,
presencePenalty: Float? = nil, frequencyPenalty: Float? = nil,
stopSequences: [String]? = nil, responseMIMEType: String? = nil,
responseSchema: Schema? = nil) {
responseSchema: Schema? = nil, responseModalities: [ResponseModality]? = nil) {
// Explicit init because otherwise if we re-arrange the above variables it changes the API
// surface.
self.temperature = temperature
Expand All @@ -157,6 +169,7 @@ public struct GenerationConfig: Sendable {
self.stopSequences = stopSequences
self.responseMIMEType = responseMIMEType
self.responseSchema = responseSchema
self.responseModalities = responseModalities
}
}

Expand All @@ -175,5 +188,6 @@ extension GenerationConfig: Encodable {
case stopSequences
case responseMIMEType = "responseMimeType"
case responseSchema
case responseModalities
}
}
52 changes: 52 additions & 0 deletions FirebaseVertexAI/Sources/Types/Public/ResponseModality.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
// Copyright 2025 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

import Foundation

/// Represents the different types, or modalities, of data that a model can produce as output.
///
/// To configure the desired output modalities for model requests, set the `responseModalities`
/// parameter when initializing a ``GenerationConfig``. See the [multimodal
/// responses](https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal-response-generation)
/// documentation for more details.
///
/// > Important: Support for each response modality, or combination of modalities, depends on the
/// > model.
@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *)
public struct ResponseModality: EncodableProtoEnum, Sendable {
enum Kind: String {
case text = "TEXT"
case image = "IMAGE"
}

/// Specifies that the model should generate textual content.
///
/// Use this modality when you need the model to produce written language, such as answers to
/// questions, summaries, creative writing, code snippets, or structured data formats like JSON.
public static let text = ResponseModality(kind: .text)

/// **Public Experimental**: Specifies that the model should generate image data.
///
/// Use this modality when you want the model to create visual content based on the provided input
/// or prompts. The response might contain one or more generated images. See the [image
/// generation](https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal-response-generation#image-generation)
/// documentation for more details.
///
/// > Warning: Image generation using Gemini 2.0 Flash is a **Public Experimental** feature, which
/// > means that it is not subject to any SLA or deprecation policy and could change in
/// > backwards-incompatible ways.
public static let image = ResponseModality(kind: .image)

let rawValue: String
}
1 change: 1 addition & 0 deletions FirebaseVertexAI/Tests/TestApp/Sources/Constants.swift
Original file line number Diff line number Diff line change
Expand Up @@ -23,4 +23,5 @@ public enum FirebaseAppNames {
public enum ModelNames {
public static let gemini2Flash = "gemini-2.0-flash-001"
public static let gemini2FlashLite = "gemini-2.0-flash-lite-001"
public static let gemini2FlashExperimental = "gemini-2.0-flash-exp"
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,12 @@ import FirebaseVertexAI
import Testing
import VertexAITestApp

#if canImport(UIKit)
import UIKit
#endif // canImport(UIKit)

@testable import struct FirebaseVertexAI.BackendError

@Suite(.serialized)
struct GenerateContentIntegrationTests {
// Set temperature, topP and topK to lowest allowed values to make responses more deterministic.
Expand Down Expand Up @@ -119,6 +125,51 @@ struct GenerateContentIntegrationTests {
#expect(candidatesTokensDetails.tokenCount == usageMetadata.candidatesTokenCount)
}

@Test(arguments: [
InstanceConfig.vertexV1Beta,
InstanceConfig.developerV1Beta,
])
func generateImage(_ config: InstanceConfig) async throws {
let generationConfig = GenerationConfig(
temperature: 0.0,
topP: 0.0,
topK: 1,
responseModalities: [.text, .image]
)
let model = VertexAI.componentInstance(config).generativeModel(
modelName: ModelNames.gemini2FlashExperimental,
generationConfig: generationConfig,
safetySettings: safetySettings
)
let prompt = "Generate an image of a cute cartoon kitten playing with a ball of yarn."

var response: GenerateContentResponse?
try await withKnownIssue(
"Backend may fail with a 503 - Service Unavailable error when overloaded",
isIntermittent: true
) {
response = try await model.generateContent(prompt)
} matching: { issue in
(issue.error as? BackendError).map { $0.httpResponseCode == 503 } ?? false
}

guard let response else { return }
let candidate = try #require(response.candidates.first)
let inlineDataPart = try #require(candidate.content.parts
.first { $0 is InlineDataPart } as? InlineDataPart)
#expect(inlineDataPart.mimeType == "image/png")
#expect(inlineDataPart.data.count > 0)
#if canImport(UIKit)
let uiImage = try #require(UIImage(data: inlineDataPart.data))
// Gemini 2.0 Flash Experimental returns images sized to fit within a 1024x1024 pixel box but
// dimensions may vary depending on the aspect ratio.
#expect(uiImage.size.width <= 1024)
#expect(uiImage.size.width >= 500)
#expect(uiImage.size.height <= 1024)
#expect(uiImage.size.height >= 500)
#endif // canImport(UIKit)
}

// MARK: Streaming Tests

@Test(arguments: InstanceConfig.allConfigs)
Expand Down
7 changes: 6 additions & 1 deletion FirebaseVertexAI/Tests/Unit/GenerationConfigTests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,8 @@ final class GenerationConfigTests: XCTestCase {
frequencyPenalty: frequencyPenalty,
stopSequences: stopSequences,
responseMIMEType: responseMIMEType,
responseSchema: .array(items: .string())
responseSchema: .array(items: .string()),
responseModalities: [.text, .image]
)

let jsonData = try encoder.encode(generationConfig)
Expand All @@ -74,6 +75,10 @@ final class GenerationConfigTests: XCTestCase {
"maxOutputTokens" : \(maxOutputTokens),
"presencePenalty" : \(presencePenalty),
"responseMimeType" : "\(responseMIMEType)",
"responseModalities" : [
"TEXT",
"IMAGE"
],
"responseSchema" : {
"items" : {
"nullable" : false,
Expand Down
Loading