crmne · myxoh · Oct 24, 2025
diff --git a/lib/ruby_llm/aliases.json b/lib/ruby_llm/aliases.json
@@ -97,7 +97,8 @@
   },
   "gemini-2.5-flash-image": {
     "gemini": "gemini-2.5-flash-image",
-    "openrouter": "google/gemini-2.5-flash-image"
+    "openrouter": "google/gemini-2.5-flash-image",
+    "vertexai": "gemini-2.5-flash-image"
   },
   "gemini-2.5-flash-image-preview": {
     "gemini": "gemini-2.5-flash-image-preview",
@@ -305,4 +306,4 @@
     "gemini": "text-embedding-004",
     "vertexai": "text-embedding-004"
   }
-}
+}
diff --git a/lib/ruby_llm/models.json b/lib/ruby_llm/models.json
@@ -32735,6 +32735,34 @@
       "source": "known_models"
     }
   },
+  {
+    "id": "gemini-2.5-flash-image",
+    "name": "gemini-2.5-flash-image",
+    "provider": "vertexai",
+    "family": "gemini-2",
+    "created_at": null,
+    "context_window": null,
+    "max_output_tokens": null,
+    "knowledge_cutoff": null,
+    "modalities": {
+      "input": [],
+      "output": []
+    },
+    "capabilities": [
+      "streaming",
+      "function_calling"
+    ],
+    "pricing": {},
+    "metadata": {
+      "version": "2.0",
+      "description": "Gemini 2.5 Flash Preview Image",
+      "supported_generation_methods": [
+        "generateContent",
+        "countTokens"
+      ],
+      "source": "known_models"
+    }
+  },
   {
     "id": "gemini-2.5-flash-lite",
     "name": "Gemini 2.5 Flash-Lite",
@@ -33374,4 +33402,4 @@
       "publisher_model_template": "projects/{project}/locations/{location}/publishers/google/models/textembedding-gecko@003"
     }
   }
-]
+]
diff --git a/lib/ruby_llm/providers/gemini/images.rb b/lib/ruby_llm/providers/gemini/images.rb
@@ -6,31 +6,111 @@ class Gemini
       # Image generation methods for the Gemini API implementation
       module Images
         def images_url
-          "models/#{@model}:predict"
+          if uses_generate_content?(@model)
+            "models/#{@model}:generateContent"
+          else
+            "models/#{@model}:predict"
+          end
         end
 
         def render_image_payload(prompt, model:, size:)
-          RubyLLM.logger.debug "Ignoring size #{size}. Gemini does not support image size customization."
           @model = model
-          {
-            instances: [
-              {
-                prompt: prompt
+          if uses_generate_content?(model)
+            aspect_ratio = calculate_aspect_ratio(size)
+            RubyLLM.logger.debug "Using aspect ratio #{aspect_ratio} for size #{size}"
+            {
+              contents: [
+                {
+                  role: 'user',
+                  parts: [
+                    {
+                      text: prompt
+                    }
+                  ]
+                }
+              ],
+              generationConfig: {
+                responseModalities: [
+                  'IMAGE'
+                ],
+                imageConfig: {
+                  aspectRatio: aspect_ratio
+                }
+              }
+            }
+          else
+            RubyLLM.logger.debug "Ignoring size #{size}. Gemini does not support image size customization."
+            {
+              instances: [
+                {
+                  prompt: prompt
+                }
+              ],
+              parameters: {
+                sampleCount: 1
               }
-            ],
-            parameters: {
-              sampleCount: 1
             }
-          }
+          end
+        end
+
+        SUPPORTED_ASPECT_RATIOS = {
+          # Landscape
+          '21:9' => 21.0 / 9.0,
+          '16:9' => 16.0 / 9.0,
+          '4:3' => 4.0 / 3.0,
+          '3:2' => 3.0 / 2.0,
+          # Square
+          '1:1' => 1.0,
+          # Portrait
+          '9:16' => 9.0 / 16.0,
+          '3:4' => 3.0 / 4.0,
+          '2:3' => 2.0 / 3.0,
+          # Flexible
+          '5:4' => 5.0 / 4.0,
+          '4:5' => 4.0 / 5.0
+        }.freeze
+
+        private
+
+        def calculate_aspect_ratio(size)
+          # Default to square if no size specified or invalid format
+          return '1:1' if size.nil? || size.empty?
+
+          # Extract width and height from size string (e.g., "124x421", "1024x768")
+          match = size.match(/(\d+)[x×](\d+)/i)
+          return '1:1' unless match
+
+          width = match[1].to_f
+          height = match[2].to_f
+          return '1:1' if width <= 0 || height <= 0
+
+          target_ratio = width / height
+
+          # Find the closest supported aspect ratio
+          closest_ratio = SUPPORTED_ASPECT_RATIOS.min_by do |_ratio_name, ratio_value|
+            (ratio_value - target_ratio).abs
+          end
+
+          closest_ratio[0]
+        end
+
+        def uses_generate_content?(model)
+          model = RubyLLM::Models.find(model, :vertexai)
+          supported_methods = model.metadata[:supported_generation_methods]
+          supported_methods.include?('generateContent')
+        rescue ModelNotFoundError
+          false
         end
 
         def parse_image_response(response, model:)
           data = response.body
-          image_data = data['predictions']&.first
 
-          unless image_data&.key?('bytesBase64Encoded')
-            raise Error, 'Unexpected response format from Gemini image generation API'
-          end
+          image_data = if uses_generate_content?(model)
+                         raw_data = data.dig('candidates', 0, 'content', 'parts', 0, 'inlineData')
+                         { 'bytesBase64Encoded' => raw_data['data'], 'mimeType' => raw_data['mimeType'] }
+                       else
+                         data['predictions']&.first
+                       end
 
           mime_type = image_data['mimeType'] || 'image/png'
           base64_data = image_data['bytesBase64Encoded']

diff --git a/lib/ruby_llm/providers/vertexai/models.rb b/lib/ruby_llm/providers/vertexai/models.rb
@@ -7,6 +7,7 @@ class VertexAI
       module Models
         # Gemini and other Google models that aren't returned by the API
         KNOWN_GOOGLE_MODELS = %w[
+          gemini-2.5-flash-image
           gemini-2.5-flash-lite
           gemini-2.5-pro
           gemini-2.5-flash
@@ -75,13 +76,26 @@ def build_known_models
               modalities: nil,
               capabilities: %w[streaming function_calling],
               pricing: nil,
-              metadata: {
-                source: 'known_models'
-              }
+              metadata: build_known_metadata(model_id)
             )
           end
         end
 
+        def build_known_metadata(model_id)
+          if model_id.include?('flash-image')
+            {
+              version: '2.0',
+              description: 'Gemini 2.5 Flash Preview Image',
+              supported_generation_methods: %w[generateContent countTokens],
+              source: 'known_models'
+            }
+          else
+            {
+              source: 'known_models'
+            }
+          end
+        end
+
         def build_model_from_api_data(model_data, model_id)
           Model::Info.new(
             id: model_id,

diff --git a/lib/ruby_llm/utils.rb b/lib/ruby_llm/utils.rb
@@ -36,6 +36,8 @@ def deep_merge(original, overrides)
       original.merge(overrides) do |_key, original_value, overrides_value|
         if original_value.is_a?(Hash) && overrides_value.is_a?(Hash)
           deep_merge(original_value, overrides_value)
+        elsif original_value.is_a?(Array) && overrides_value.is_a?(Array)
+          original_value + overrides_value
         else
           overrides_value
         end