Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions lib/ruby_llm/aliases.json
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,8 @@
},
"gemini-2.5-flash-image": {
"gemini": "gemini-2.5-flash-image",
"openrouter": "google/gemini-2.5-flash-image"
"openrouter": "google/gemini-2.5-flash-image",
"vertexai": "gemini-2.5-flash-image"
},
"gemini-2.5-flash-image-preview": {
"gemini": "gemini-2.5-flash-image-preview",
Expand Down Expand Up @@ -305,4 +306,4 @@
"gemini": "text-embedding-004",
"vertexai": "text-embedding-004"
}
}
}
30 changes: 29 additions & 1 deletion lib/ruby_llm/models.json
Original file line number Diff line number Diff line change
Expand Up @@ -32735,6 +32735,34 @@
"source": "known_models"
}
},
{
"id": "gemini-2.5-flash-image",
"name": "gemini-2.5-flash-image",
"provider": "vertexai",
"family": "gemini-2",
"created_at": null,
"context_window": null,
"max_output_tokens": null,
"knowledge_cutoff": null,
"modalities": {
"input": [],
"output": []
},
"capabilities": [
"streaming",
"function_calling"
],
"pricing": {},
"metadata": {
"version": "2.0",
"description": "Gemini 2.5 Flash Preview Image",
"supported_generation_methods": [
"generateContent",
"countTokens"
],
"source": "known_models"
}
},
{
"id": "gemini-2.5-flash-lite",
"name": "Gemini 2.5 Flash-Lite",
Expand Down Expand Up @@ -33374,4 +33402,4 @@
"publisher_model_template": "projects/{project}/locations/{location}/publishers/google/models/textembedding-gecko@003"
}
}
]
]
108 changes: 94 additions & 14 deletions lib/ruby_llm/providers/gemini/images.rb
Original file line number Diff line number Diff line change
Expand Up @@ -6,31 +6,111 @@ class Gemini
# Image generation methods for the Gemini API implementation
module Images
def images_url
"models/#{@model}:predict"
if uses_generate_content?(@model)
"models/#{@model}:generateContent"
else
"models/#{@model}:predict"
end
end

def render_image_payload(prompt, model:, size:)
RubyLLM.logger.debug "Ignoring size #{size}. Gemini does not support image size customization."
@model = model
{
instances: [
{
prompt: prompt
if uses_generate_content?(model)
aspect_ratio = calculate_aspect_ratio(size)
RubyLLM.logger.debug "Using aspect ratio #{aspect_ratio} for size #{size}"
{
contents: [
{
role: 'user',
parts: [
{
text: prompt
}
]
}
],
generationConfig: {
responseModalities: [
'IMAGE'
],
imageConfig: {
aspectRatio: aspect_ratio
}
}
}
else
RubyLLM.logger.debug "Ignoring size #{size}. Gemini does not support image size customization."
{
instances: [
{
prompt: prompt
}
],
parameters: {
sampleCount: 1
}
],
parameters: {
sampleCount: 1
}
}
end
end

SUPPORTED_ASPECT_RATIOS = {
# Landscape
'21:9' => 21.0 / 9.0,
'16:9' => 16.0 / 9.0,
'4:3' => 4.0 / 3.0,
'3:2' => 3.0 / 2.0,
# Square
'1:1' => 1.0,
# Portrait
'9:16' => 9.0 / 16.0,
'3:4' => 3.0 / 4.0,
'2:3' => 2.0 / 3.0,
# Flexible
'5:4' => 5.0 / 4.0,
'4:5' => 4.0 / 5.0
}.freeze

private

def calculate_aspect_ratio(size)
# Default to square if no size specified or invalid format
return '1:1' if size.nil? || size.empty?

# Extract width and height from size string (e.g., "124x421", "1024x768")
match = size.match(/(\d+)[x×](\d+)/i)
return '1:1' unless match

width = match[1].to_f
height = match[2].to_f
return '1:1' if width <= 0 || height <= 0

target_ratio = width / height

# Find the closest supported aspect ratio
closest_ratio = SUPPORTED_ASPECT_RATIOS.min_by do |_ratio_name, ratio_value|
(ratio_value - target_ratio).abs
end

closest_ratio[0]
end

def uses_generate_content?(model)
model = RubyLLM::Models.find(model, :vertexai)
supported_methods = model.metadata[:supported_generation_methods]
supported_methods.include?('generateContent')
rescue ModelNotFoundError
false
end

def parse_image_response(response, model:)
data = response.body
image_data = data['predictions']&.first

unless image_data&.key?('bytesBase64Encoded')
raise Error, 'Unexpected response format from Gemini image generation API'
end
image_data = if uses_generate_content?(model)
raw_data = data.dig('candidates', 0, 'content', 'parts', 0, 'inlineData')
{ 'bytesBase64Encoded' => raw_data['data'], 'mimeType' => raw_data['mimeType'] }
else
data['predictions']&.first
end

mime_type = image_data['mimeType'] || 'image/png'
base64_data = image_data['bytesBase64Encoded']
Expand Down
20 changes: 17 additions & 3 deletions lib/ruby_llm/providers/vertexai/models.rb
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ class VertexAI
module Models
# Gemini and other Google models that aren't returned by the API
KNOWN_GOOGLE_MODELS = %w[
gemini-2.5-flash-image
gemini-2.5-flash-lite
gemini-2.5-pro
gemini-2.5-flash
Expand Down Expand Up @@ -75,13 +76,26 @@ def build_known_models
modalities: nil,
capabilities: %w[streaming function_calling],
pricing: nil,
metadata: {
source: 'known_models'
}
metadata: build_known_metadata(model_id)
)
end
end

def build_known_metadata(model_id)
if model_id.include?('flash-image')
{
version: '2.0',
description: 'Gemini 2.5 Flash Preview Image',
supported_generation_methods: %w[generateContent countTokens],
source: 'known_models'
}
else
{
source: 'known_models'
}
end
end

def build_model_from_api_data(model_data, model_id)
Model::Info.new(
id: model_id,
Expand Down
2 changes: 2 additions & 0 deletions lib/ruby_llm/utils.rb
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ def deep_merge(original, overrides)
original.merge(overrides) do |_key, original_value, overrides_value|
if original_value.is_a?(Hash) && overrides_value.is_a?(Hash)
deep_merge(original_value, overrides_value)
elsif original_value.is_a?(Array) && overrides_value.is_a?(Array)
original_value + overrides_value
else
overrides_value
end
Expand Down
Loading