diff --git a/lib/ruby_llm/aliases.json b/lib/ruby_llm/aliases.json index e1c5097b3..41bf3e496 100644 --- a/lib/ruby_llm/aliases.json +++ b/lib/ruby_llm/aliases.json @@ -97,7 +97,8 @@ }, "gemini-2.5-flash-image": { "gemini": "gemini-2.5-flash-image", - "openrouter": "google/gemini-2.5-flash-image" + "openrouter": "google/gemini-2.5-flash-image", + "vertexai": "gemini-2.5-flash-image" }, "gemini-2.5-flash-image-preview": { "gemini": "gemini-2.5-flash-image-preview", @@ -305,4 +306,4 @@ "gemini": "text-embedding-004", "vertexai": "text-embedding-004" } -} \ No newline at end of file +} diff --git a/lib/ruby_llm/models.json b/lib/ruby_llm/models.json index cb41d55f7..8557e026a 100644 --- a/lib/ruby_llm/models.json +++ b/lib/ruby_llm/models.json @@ -32735,6 +32735,34 @@ "source": "known_models" } }, + { + "id": "gemini-2.5-flash-image", + "name": "gemini-2.5-flash-image", + "provider": "vertexai", + "family": "gemini-2", + "created_at": null, + "context_window": null, + "max_output_tokens": null, + "knowledge_cutoff": null, + "modalities": { + "input": [], + "output": [] + }, + "capabilities": [ + "streaming", + "function_calling" + ], + "pricing": {}, + "metadata": { + "version": "2.0", + "description": "Gemini 2.5 Flash Preview Image", + "supported_generation_methods": [ + "generateContent", + "countTokens" + ], + "source": "known_models" + } + }, { "id": "gemini-2.5-flash-lite", "name": "Gemini 2.5 Flash-Lite", @@ -33374,4 +33402,4 @@ "publisher_model_template": "projects/{project}/locations/{location}/publishers/google/models/textembedding-gecko@003" } } -] \ No newline at end of file +] diff --git a/lib/ruby_llm/providers/gemini/images.rb b/lib/ruby_llm/providers/gemini/images.rb index f01e356c7..96f60ec23 100644 --- a/lib/ruby_llm/providers/gemini/images.rb +++ b/lib/ruby_llm/providers/gemini/images.rb @@ -6,31 +6,111 @@ class Gemini # Image generation methods for the Gemini API implementation module Images def images_url - "models/#{@model}:predict" + if uses_generate_content?(@model) + "models/#{@model}:generateContent" + else + "models/#{@model}:predict" + end end def render_image_payload(prompt, model:, size:) - RubyLLM.logger.debug "Ignoring size #{size}. Gemini does not support image size customization." @model = model - { - instances: [ - { - prompt: prompt + if uses_generate_content?(model) + aspect_ratio = calculate_aspect_ratio(size) + RubyLLM.logger.debug "Using aspect ratio #{aspect_ratio} for size #{size}" + { + contents: [ + { + role: 'user', + parts: [ + { + text: prompt + } + ] + } + ], + generationConfig: { + responseModalities: [ + 'IMAGE' + ], + imageConfig: { + aspectRatio: aspect_ratio + } + } + } + else + RubyLLM.logger.debug "Ignoring size #{size}. Gemini does not support image size customization." + { + instances: [ + { + prompt: prompt + } + ], + parameters: { + sampleCount: 1 } - ], - parameters: { - sampleCount: 1 } - } + end + end + + SUPPORTED_ASPECT_RATIOS = { + # Landscape + '21:9' => 21.0 / 9.0, + '16:9' => 16.0 / 9.0, + '4:3' => 4.0 / 3.0, + '3:2' => 3.0 / 2.0, + # Square + '1:1' => 1.0, + # Portrait + '9:16' => 9.0 / 16.0, + '3:4' => 3.0 / 4.0, + '2:3' => 2.0 / 3.0, + # Flexible + '5:4' => 5.0 / 4.0, + '4:5' => 4.0 / 5.0 + }.freeze + + private + + def calculate_aspect_ratio(size) + # Default to square if no size specified or invalid format + return '1:1' if size.nil? || size.empty? + + # Extract width and height from size string (e.g., "124x421", "1024x768") + match = size.match(/(\d+)[x×](\d+)/i) + return '1:1' unless match + + width = match[1].to_f + height = match[2].to_f + return '1:1' if width <= 0 || height <= 0 + + target_ratio = width / height + + # Find the closest supported aspect ratio + closest_ratio = SUPPORTED_ASPECT_RATIOS.min_by do |_ratio_name, ratio_value| + (ratio_value - target_ratio).abs + end + + closest_ratio[0] + end + + def uses_generate_content?(model) + model = RubyLLM::Models.find(model, :vertexai) + supported_methods = model.metadata[:supported_generation_methods] + supported_methods.include?('generateContent') + rescue ModelNotFoundError + false end def parse_image_response(response, model:) data = response.body - image_data = data['predictions']&.first - unless image_data&.key?('bytesBase64Encoded') - raise Error, 'Unexpected response format from Gemini image generation API' - end + image_data = if uses_generate_content?(model) + raw_data = data.dig('candidates', 0, 'content', 'parts', 0, 'inlineData') + { 'bytesBase64Encoded' => raw_data['data'], 'mimeType' => raw_data['mimeType'] } + else + data['predictions']&.first + end mime_type = image_data['mimeType'] || 'image/png' base64_data = image_data['bytesBase64Encoded'] diff --git a/lib/ruby_llm/providers/vertexai/models.rb b/lib/ruby_llm/providers/vertexai/models.rb index 196e9620f..7ccce0e09 100644 --- a/lib/ruby_llm/providers/vertexai/models.rb +++ b/lib/ruby_llm/providers/vertexai/models.rb @@ -7,6 +7,7 @@ class VertexAI module Models # Gemini and other Google models that aren't returned by the API KNOWN_GOOGLE_MODELS = %w[ + gemini-2.5-flash-image gemini-2.5-flash-lite gemini-2.5-pro gemini-2.5-flash @@ -75,13 +76,26 @@ def build_known_models modalities: nil, capabilities: %w[streaming function_calling], pricing: nil, - metadata: { - source: 'known_models' - } + metadata: build_known_metadata(model_id) ) end end + def build_known_metadata(model_id) + if model_id.include?('flash-image') + { + version: '2.0', + description: 'Gemini 2.5 Flash Preview Image', + supported_generation_methods: %w[generateContent countTokens], + source: 'known_models' + } + else + { + source: 'known_models' + } + end + end + def build_model_from_api_data(model_data, model_id) Model::Info.new( id: model_id, diff --git a/lib/ruby_llm/utils.rb b/lib/ruby_llm/utils.rb index 8ea25b369..630ad4858 100644 --- a/lib/ruby_llm/utils.rb +++ b/lib/ruby_llm/utils.rb @@ -36,6 +36,8 @@ def deep_merge(original, overrides) original.merge(overrides) do |_key, original_value, overrides_value| if original_value.is_a?(Hash) && overrides_value.is_a?(Hash) deep_merge(original_value, overrides_value) + elsif original_value.is_a?(Array) && overrides_value.is_a?(Array) + original_value + overrides_value else overrides_value end diff --git a/spec/ruby_llm/providers/gemini/images_spec.rb b/spec/ruby_llm/providers/gemini/images_spec.rb new file mode 100644 index 000000000..7579bed4b --- /dev/null +++ b/spec/ruby_llm/providers/gemini/images_spec.rb @@ -0,0 +1,220 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe RubyLLM::Providers::Gemini::Images do + let(:provider) do + Class.new do + include RubyLLM::Providers::Gemini::Images + + attr_accessor :model + + def initialize + @model = nil + end + end.new + end + + describe '#calculate_aspect_ratio' do + context 'with standard landscape dimensions' do + it 'maps 1920x1080 to 16:9' do + expect(provider.send(:calculate_aspect_ratio, '1920x1080')).to eq('16:9') + end + + it 'maps 1024x768 to 4:3' do + expect(provider.send(:calculate_aspect_ratio, '1024x768')).to eq('4:3') + end + + it 'maps 2560x1080 to 21:9' do + expect(provider.send(:calculate_aspect_ratio, '2560x1080')).to eq('21:9') + end + end + + context 'with portrait dimensions' do + it 'maps 1080x1920 to 9:16' do + expect(provider.send(:calculate_aspect_ratio, '1080x1920')).to eq('9:16') + end + + it 'maps 768x1024 to 3:4' do + expect(provider.send(:calculate_aspect_ratio, '768x1024')).to eq('3:4') + end + end + + context 'with square dimensions' do + it 'maps 1024x1024 to 1:1' do + expect(provider.send(:calculate_aspect_ratio, '1024x1024')).to eq('1:1') + end + + it 'maps 512x512 to 1:1' do + expect(provider.send(:calculate_aspect_ratio, '512x512')).to eq('1:1') + end + end + + context 'with edge cases' do + it 'defaults to 1:1 for nil size' do + expect(provider.send(:calculate_aspect_ratio, nil)).to eq('1:1') + end + + it 'defaults to 1:1 for empty string' do + expect(provider.send(:calculate_aspect_ratio, '')).to eq('1:1') + end + + it 'defaults to 1:1 for invalid format' do + expect(provider.send(:calculate_aspect_ratio, 'invalid')).to eq('1:1') + end + + it 'defaults to 1:1 for zero width' do + expect(provider.send(:calculate_aspect_ratio, '0x1024')).to eq('1:1') + end + + it 'defaults to 1:1 for zero height' do + expect(provider.send(:calculate_aspect_ratio, '1024x0')).to eq('1:1') + end + + it 'handles negative dimensions by calculating ratio from absolute values' do + # Negative numbers are converted to float, so -100 becomes 100.0 + result = provider.send(:calculate_aspect_ratio, '-100x200') + expect(described_class::SUPPORTED_ASPECT_RATIOS.keys).to include(result) + end + end + + context 'with alternative separators' do + it 'handles uppercase X separator' do + expect(provider.send(:calculate_aspect_ratio, '1920X1080')).to eq('16:9') + end + + it 'handles × (multiplication sign) separator' do + expect(provider.send(:calculate_aspect_ratio, '1920×1080')).to eq('16:9') + end + end + + context 'with custom dimensions that need closest match' do + it 'finds closest ratio for 1280x720 (matches 16:9)' do + expect(provider.send(:calculate_aspect_ratio, '1280x720')).to eq('16:9') + end + + it 'finds closest ratio for 800x600 (matches 4:3)' do + expect(provider.send(:calculate_aspect_ratio, '800x600')).to eq('4:3') + end + + it 'finds closest ratio for unusual dimensions' do + # 1000x1200 ratio is ~0.833, closest to 5:4 (0.8) or 3:4 (0.75) + result = provider.send(:calculate_aspect_ratio, '1000x1200') + expect(described_class::SUPPORTED_ASPECT_RATIOS.keys).to include(result) + end + end + end + + describe '#uses_generate_content?' do + context 'when model supports generateContent' do + it 'returns true for models with generateContent support' do + model = instance_double( + RubyLLM::Model::Info, + metadata: { supported_generation_methods: ['generateContent'] } + ) + allow(RubyLLM::Models).to receive(:find) + .with('imagen-4.0-generate-001', :vertexai) + .and_return(model) + + expect(provider.send(:uses_generate_content?, 'imagen-4.0-generate-001')).to be true + end + + it 'returns true when generateContent is among multiple methods' do + model = instance_double( + RubyLLM::Model::Info, + metadata: { supported_generation_methods: %w[predict generateContent] } + ) + allow(RubyLLM::Models).to receive(:find) + .with('some-model', :vertexai) + .and_return(model) + + expect(provider.send(:uses_generate_content?, 'some-model')).to be true + end + end + + context 'when model does not support generateContent' do + it 'returns false for models with only predict support' do + model = instance_double( + RubyLLM::Model::Info, + metadata: { supported_generation_methods: ['predict'] } + ) + allow(RubyLLM::Models).to receive(:find) + .with('imagen-3.0-generate-002', :vertexai) + .and_return(model) + + expect(provider.send(:uses_generate_content?, 'imagen-3.0-generate-002')).to be false + end + + it 'returns false when model is not found' do + allow(RubyLLM::Models).to receive(:find) + .with('unknown-model', :vertexai) + .and_raise(RubyLLM::ModelNotFoundError) + + expect(provider.send(:uses_generate_content?, 'unknown-model')).to be false + end + end + end + + describe '#images_url' do + it 'returns generateContent URL for models that support it' do + provider.model = 'imagen-4.0-generate-001' + allow(provider).to receive(:uses_generate_content?).with('imagen-4.0-generate-001').and_return(true) + + expect(provider.images_url).to eq('models/imagen-4.0-generate-001:generateContent') + end + + it 'returns predict URL for models that do not support generateContent' do + provider.model = 'imagen-3.0-generate-002' + allow(provider).to receive(:uses_generate_content?).with('imagen-3.0-generate-002').and_return(false) + + expect(provider.images_url).to eq('models/imagen-3.0-generate-002:predict') + end + end + + describe '#render_image_payload' do + context 'when model uses generateContent' do + before do + allow(provider).to receive(:uses_generate_content?).and_return(true) + end + + it 'returns generateContent payload with aspect ratio' do + payload = provider.render_image_payload('a cat', model: 'imagen-4.0', size: '1024x1024') + + expect(payload).to include(:contents, :generationConfig) + expect(payload[:contents]).to be_an(Array) + expect(payload[:contents][0][:role]).to eq('user') + expect(payload[:contents][0][:parts][0][:text]).to eq('a cat') + expect(payload[:generationConfig][:responseModalities]).to eq(['IMAGE']) + expect(payload[:generationConfig][:imageConfig][:aspectRatio]).to eq('1:1') + end + + it 'calculates aspect ratio from size parameter' do + payload = provider.render_image_payload('a landscape', model: 'imagen-4.0', size: '1920x1080') + + expect(payload[:generationConfig][:imageConfig][:aspectRatio]).to eq('16:9') + end + end + + context 'when model uses predict' do + before do + allow(provider).to receive(:uses_generate_content?).and_return(false) + end + + it 'returns predict payload' do + payload = provider.render_image_payload('a cat', model: 'imagen-3.0', size: '1024x1024') + + expect(payload).to include(:instances, :parameters) + expect(payload[:instances]).to be_an(Array) + expect(payload[:instances][0][:prompt]).to eq('a cat') + expect(payload[:parameters][:sampleCount]).to eq(1) + end + + it 'does not include aspect ratio configuration' do + payload = provider.render_image_payload('a cat', model: 'imagen-3.0', size: '1920x1080') + + expect(payload).not_to have_key(:generationConfig) + expect(payload).not_to have_key(:imageConfig) + end + end + end +end diff --git a/spec/ruby_llm/utils_spec.rb b/spec/ruby_llm/utils_spec.rb new file mode 100644 index 000000000..e6227d384 --- /dev/null +++ b/spec/ruby_llm/utils_spec.rb @@ -0,0 +1,111 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe RubyLLM::Utils do + describe '.deep_merge' do + context 'when merging hashes' do + it 'merges nested hashes recursively' do + original = { a: { b: 1, c: 2 } } + overrides = { a: { c: 3, d: 4 } } + + result = described_class.deep_merge(original, overrides) + + expect(result).to eq({ a: { b: 1, c: 3, d: 4 } }) + end + + it 'handles deeply nested hashes' do + original = { a: { b: { c: 1 } } } + overrides = { a: { b: { d: 2 } } } + + result = described_class.deep_merge(original, overrides) + + expect(result).to eq({ a: { b: { c: 1, d: 2 } } }) + end + end + + context 'when merging arrays' do + it 'concatenates arrays' do + original = { items: [1, 2, 3] } + overrides = { items: [4, 5] } + + result = described_class.deep_merge(original, overrides) + + expect(result).to eq({ items: [1, 2, 3, 4, 5] }) + end + + it 'concatenates arrays in nested hashes' do + original = { config: { tags: %w[a b] } } + overrides = { config: { tags: %w[c d] } } + + result = described_class.deep_merge(original, overrides) + + expect(result).to eq({ config: { tags: %w[a b c d] } }) + end + + it 'preserves array order when concatenating' do + original = { list: %w[first second] } + overrides = { list: %w[third fourth] } + + result = described_class.deep_merge(original, overrides) + + expect(result[:list]).to eq(%w[first second third fourth]) + end + end + + context 'when merging other types' do + it 'overrides scalar values' do + original = { name: 'old' } + overrides = { name: 'new' } + + result = described_class.deep_merge(original, overrides) + + expect(result).to eq({ name: 'new' }) + end + + it 'overrides when types do not match' do + original = { value: [1, 2] } + overrides = { value: 'string' } + + result = described_class.deep_merge(original, overrides) + + expect(result).to eq({ value: 'string' }) + end + + it 'handles nil values' do + original = { key: 'value' } + overrides = { key: nil } + + result = described_class.deep_merge(original, overrides) + + expect(result).to eq({ key: nil }) + end + end + + context 'when merging complex structures' do + it 'handles mixed nested structures with arrays and hashes' do + original = { + config: { + settings: { timeout: 30 }, + tags: ['production'] + } + } + overrides = { + config: { + settings: { retries: 3 }, + tags: ['monitoring'] + } + } + + result = described_class.deep_merge(original, overrides) + + expect(result).to eq({ + config: { + settings: { timeout: 30, retries: 3 }, + tags: %w[production monitoring] + } + }) + end + end + end +end