From d800e060480a151f4c248f58c87386caab52acf0 Mon Sep 17 00:00:00 2001 From: Kieran Klaassen Date: Mon, 24 Mar 2025 14:03:48 -0700 Subject: [PATCH 1/2] Add structured output support for extracting JSON data --- README.md | 24 +++++++++++++ docs/_data/navigation.yml | 2 ++ docs/guides/structured-output.md | 61 ++++++++++++++++++++++++++++++++ 3 files changed, 87 insertions(+) create mode 100644 docs/guides/structured-output.md diff --git a/README.md b/README.md index f7da4bbd9..23df73a24 100644 --- a/README.md +++ b/README.md @@ -33,6 +33,7 @@ RubyLLM fixes all that. One beautiful API for everything. One consistent format. - 🖞ïļ **Image generation** with DALL-E and other providers - 📊 **Embeddings** for vector search and semantic analysis - 🔧 **Tools** that let AI use your Ruby code +- 🔄 **Structured Output** for extracting JSON data in a type-safe way - 🚂 **Rails integration** to persist chats and messages with ActiveRecord - 🌊 **Streaming** responses with proper Ruby patterns @@ -63,6 +64,29 @@ RubyLLM.paint "a sunset over mountains in watercolor style" # Create vector embeddings RubyLLM.embed "Ruby is elegant and expressive" +# Extract structured data +class Delivery + attr_accessor :timestamp, :dimensions, :address + + def self.json_schema + { + type: "object", + properties: { + timestamp: { type: "string", format: "date-time" }, + dimensions: { type: "array", items: { type: "number" } }, + address: { type: "string" } + } + } + end +end + +response = chat.with_response_format(Delivery) + .ask("Extract: Delivery to 123 Main St on 2025-03-20. Size: 12x8x4.") + +puts response.timestamp # => 2025-03-20T00:00:00Z +puts response.dimensions # => [12, 8, 4] +puts response.address # => 123 Main St + # Let AI use your code class Weather < RubyLLM::Tool description "Gets current weather for a location" diff --git a/docs/_data/navigation.yml b/docs/_data/navigation.yml index 076be6524..3870a94f5 100644 --- a/docs/_data/navigation.yml +++ b/docs/_data/navigation.yml @@ -21,6 +21,8 @@ url: /guides/embeddings - title: Error Handling url: /guides/error-handling + - title: Structured Output + url: /guides/structured-output - title: Models url: /guides/models - title: GitHub diff --git a/docs/guides/structured-output.md b/docs/guides/structured-output.md new file mode 100644 index 000000000..7ae3978c5 --- /dev/null +++ b/docs/guides/structured-output.md @@ -0,0 +1,61 @@ +# Structured Output + +RubyLLM makes it easy to extract structured data from LLM responses using the `with_response_format` method. + +## Usage + +Define your structure as a Plain Old Ruby Object (PORO) that responds to `.json_schema` or directly pass a JSON schema hash or string: + +```ruby +class Delivery + attr_accessor :timestamp, :dimensions, :address + + def self.json_schema + { + type: "object", + properties: { + timestamp: { type: "string", format: "date-time" }, + dimensions: { + type: "array", + items: { type: "number" }, + description: "Dimensions in inches [length, width, height]" + }, + address: { type: "string" } + }, + required: ["timestamp", "address"] + } + end +end + +# Use the class directly +response = chat.with_response_format(Delivery) + .ask("Extract delivery info from: Next day delivery to 123 Main St...") + +puts response.timestamp # => 2025-03-20 14:30:00 +puts response.dimensions # => [12, 8, 4] +puts response.address # => "123 Main St, Springfield" + +# Or use a JSON schema hash +schema = { + type: "object", + properties: { + name: { type: "string" }, + age: { type: "integer" } + } +} + +response = chat.with_response_format(schema) + .ask("Extract info: John is 30 years old") + +puts response.name # => "John" +puts response.age # => 30 +``` + +## Compatibility + +Structured output is supported by: +- OpenAI models with JSON mode +- Anthropic models with JSON output +- Gemini models with structured output + +The implementation adapts to each provider's specific capabilities while providing a consistent interface. \ No newline at end of file From d37e610f0f4a9f5893143fefb529c6a8cedcc166 Mon Sep 17 00:00:00 2001 From: Kieran Klaassen Date: Mon, 24 Mar 2025 16:27:05 -0700 Subject: [PATCH 2/2] feat: implement --- README.md | 37 ++- docs/_data/navigation.yml | 2 + docs/guides/custom-parsers.md | 362 +++++++++++++++++++++ docs/guides/structured-output.md | 17 +- lib/ruby_llm/chat.rb | 67 +++- lib/ruby_llm/provider.rb | 3 +- lib/ruby_llm/providers/anthropic.rb | 9 +- lib/ruby_llm/providers/anthropic/chat.rb | 35 +- lib/ruby_llm/providers/anthropic/schema.rb | 51 +++ lib/ruby_llm/providers/gemini.rb | 8 +- lib/ruby_llm/providers/gemini/chat.rb | 11 +- lib/ruby_llm/providers/gemini/schema.rb | 78 +++++ lib/ruby_llm/providers/openai.rb | 2 + lib/ruby_llm/providers/openai/chat.rb | 8 +- lib/ruby_llm/providers/openai/schema.rb | 28 ++ lib/ruby_llm/response_parser.rb | 196 +++++++++++ lib/ruby_llm/schema_converter.rb | 32 ++ 17 files changed, 916 insertions(+), 30 deletions(-) create mode 100644 docs/guides/custom-parsers.md create mode 100644 lib/ruby_llm/providers/anthropic/schema.rb create mode 100644 lib/ruby_llm/providers/gemini/schema.rb create mode 100644 lib/ruby_llm/providers/openai/schema.rb create mode 100644 lib/ruby_llm/response_parser.rb create mode 100644 lib/ruby_llm/schema_converter.rb diff --git a/README.md b/README.md index 23df73a24..a7b47fc93 100644 --- a/README.md +++ b/README.md @@ -12,12 +12,9 @@ A delightful Ruby way to work with AI. No configuration madness, no complex call DeepSeek -Gem Version -Ruby Style Guide -Gem Downloads -codecov +Gem Version Ruby Style Guide Gem Downloads codecov -ðŸĪš Battle tested at [💎 Chat with Work](https://chatwithwork.com) +ðŸĪš Battle tested at [💎 Chat with Work](https://chatwithwork.com) ## The problem with AI libraries @@ -34,6 +31,7 @@ RubyLLM fixes all that. One beautiful API for everything. One consistent format. - 📊 **Embeddings** for vector search and semantic analysis - 🔧 **Tools** that let AI use your Ruby code - 🔄 **Structured Output** for extracting JSON data in a type-safe way +- ðŸ§Đ **Custom Parsers** for XML, regex, or any format you need - 🚂 **Rails integration** to persist chats and messages with ActiveRecord - 🌊 **Streaming** responses with proper Ruby patterns @@ -67,7 +65,7 @@ RubyLLM.embed "Ruby is elegant and expressive" # Extract structured data class Delivery attr_accessor :timestamp, :dimensions, :address - + def self.json_schema { type: "object", @@ -87,6 +85,32 @@ puts response.timestamp # => 2025-03-20T00:00:00Z puts response.dimensions # => [12, 8, 4] puts response.address # => 123 Main St +# Extract specific XML tags +chat.with_parser(:xml, tag: 'answer') + .ask("Respond with 42") + .content # => "42" + +# Create your own parsers for any format +module CsvParser + def self.parse(response, options) + rows = response.content.strip.split("\n") + headers = rows.first.split(',') + + rows[1..-1].map do |row| + values = row.split(',') + headers.zip(values).to_h + end + end +end + +# Register your custom parser +RubyLLM::ResponseParser.register(:csv, CsvParser) + +# Use your custom parser +result = chat.with_parser(:csv) + .ask("Give me a CSV with name,age,city for 3 people") + .content + # Let AI use your code class Weather < RubyLLM::Tool description "Gets current weather for a location" @@ -222,6 +246,7 @@ Check out the guides at https://rubyllm.com for deeper dives into conversations We welcome contributions to RubyLLM! See [CONTRIBUTING.md](CONTRIBUTING.md) for detailed instructions on how to: + - Run the test suite - Add new features - Update documentation diff --git a/docs/_data/navigation.yml b/docs/_data/navigation.yml index 3870a94f5..8403c260e 100644 --- a/docs/_data/navigation.yml +++ b/docs/_data/navigation.yml @@ -23,6 +23,8 @@ url: /guides/error-handling - title: Structured Output url: /guides/structured-output + - title: Custom Parsers + url: /guides/custom-parsers - title: Models url: /guides/models - title: GitHub diff --git a/docs/guides/custom-parsers.md b/docs/guides/custom-parsers.md new file mode 100644 index 000000000..dfceb0eb1 --- /dev/null +++ b/docs/guides/custom-parsers.md @@ -0,0 +1,362 @@ +# Custom Parsers + +RubyLLM's custom parser system allows you to extract and transform model responses into any format you need. While structured JSON output is handled by `with_response_format`, custom parsers give you flexibility for all other formats including XML, regex patterns, markdown, JSON, and more. + +## Built-in Parsers + +RubyLLM comes with several built-in parsers: + +- `:text` - Default parser that returns the raw content (no transformation) +- `:json` - Parses JSON responses into Ruby objects +- `:xml` - Extracts content from specific XML tags + +## Using Parsers + +You can specify a parser for any chat by calling `with_parser`: + +```ruby +chat = RubyLLM.chat + +# Use the built-in XML parser +response = chat.with_parser(:xml, tag: 'data') + .ask("Can you provide the answer in XML? 42") + +puts response.content # => "42" +``` + +### XML Parser + +The XML parser can extract content from specified tags: + +```ruby +# Extract content from the tag +response = chat.with_parser(:xml, tag: 'answer') + .ask("Respond with: This is the extracted content") + +puts response.content # => "This is the extracted content" + +# You can also extract from different tags in different requests +response = chat.with_parser(:xml, tag: 'code') + .ask("Give me a Ruby function in XML: def hello; puts 'world'; end") + +puts response.content # => "def hello; puts 'world'; end" +``` + +### JSON Parser + +The JSON parser converts JSON responses to Ruby objects: + +```ruby +response = chat.with_parser(:json) + .ask("Respond with JSON: {\"name\":\"Ruby\",\"age\":30}") + +# Access the parsed JSON as an OpenStruct +puts response.content.name # => "Ruby" +puts response.content.age # => 30 +``` + +## Creating Custom Parsers + +Creating custom parsers is straightforward. You need to: + +1. Define a module with a `parse` method +2. Register it with `ResponseParser.register` +3. Use it in your chats with `with_parser` + +### Parser Interface + +Your parser module must implement a `parse` method with this signature: + +```ruby +def self.parse(response, options) + # Process response.content and return the parsed result + # 'options' can be any format specified when calling with_parser +end +``` + +Where: + +- `response` is a `RubyLLM::Message` object with the model's response +- `options` is any value passed as the second argument to `with_parser` +- The return value can be any Ruby object + +### CSV Parser Example + +Here's how to create a parser for CSV content: + +```ruby +module CSVParser + def self.parse(response, options) + return response unless response.content.is_a?(String) + + # Skip empty responses + return response if response.content.strip.empty? + + # Parse CSV content + rows = response.content.strip.split("\n") + headers = rows.first.split(',') + + rows[1..-1].map do |row| + values = row.split(',') + headers.zip(values).to_h + end + end +end + +# Register your parser +RubyLLM::ResponseParser.register(:csv, CSVParser) + +# Use your parser in a chat +results = chat.with_parser(:csv) + .ask("Give me a CSV with name,age,city with 3 rows of data") + .content + +# Process the results +results.each do |person| + puts "#{person['name']} is #{person['age']} years old from #{person['city']}" +end +``` + +### Markdown Parser Example + +A parser for extracting code blocks from markdown: + +````ruby +module MarkdownParser + def self.parse(response, options) + return response unless response.content.is_a?(String) + + content = response.content + language = options[:language] if options.is_a?(Hash) + + # Extract all code blocks + if language + # Get only blocks of specified language + blocks = content.scan(/```#{language}\n(.*?)```/m).flatten + else + # Get all code blocks regardless of language + blocks = content.scan(/```(?:\w+)?\n(.*?)```/m).flatten + end + + # Return a single string if only one block, otherwise an array + blocks.size == 1 ? blocks.first : blocks + end +end + +RubyLLM::ResponseParser.register(:markdown, MarkdownParser) + +# Extract Ruby code from markdown +ruby_code = chat.with_parser(:markdown, language: "ruby") + .ask("Write a function to calculate Fibonacci numbers") + .content + +# Extract all code blocks +all_code = chat.with_parser(:markdown) + .ask("Write a function in both Ruby and Python") + .content +```` + +### Regular Expression Parser + +Extract specific patterns using regex: + +```ruby +module RegexParser + def self.parse(response, options) + return response unless response.content.is_a?(String) + + if options.is_a?(Hash) && options[:pattern] + pattern = options[:pattern] + + # Support for named capture groups + if options[:named_captures] && options[:named_captures] == true + regex = Regexp.new(pattern) + match = regex.match(response.content) + return match&.named_captures || response.content + end + + # Support for multiple matches + if options[:all_matches] && options[:all_matches] == true + return response.content.scan(Regexp.new(pattern)) + end + + # Default: return first capture group of first match + match = response.content.match(Regexp.new(pattern)) + return match[1] if match && match[1] + end + + # Return original content if no pattern or no match + response.content + end +end + +RubyLLM::ResponseParser.register(:regex, RegexParser) + +# Extract an email address +email = chat.with_parser(:regex, pattern: 'Email: ([a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,})') + .ask("My email is: Email: user@example.com") + .content # => "user@example.com" + +# Extract named captures +contact = chat.with_parser(:regex, + pattern: 'Name: (?[A-Za-z ]+), Phone: (?[0-9-]+)', + named_captures: true) + .ask("Contact info - Name: John Doe, Phone: 555-1234") + .content + +puts contact["name"] # => "John Doe" +puts contact["phone"] # => "555-1234" + +# Extract all matches +numbers = chat.with_parser(:regex, + pattern: '\d+', + all_matches: true) + .ask("Here are some numbers: 42, 17, 99, 3.14") + .content # => ["42", "17", "99", "3", "14"] +``` + +## Advanced Usage + +### Chaining Parsers + +You can create parser chains by registering a parser that calls another parser: + +```ruby +module JsonToYamlParser + def self.parse(response, options) + # First use the JSON parser + json_data = RubyLLM::ResponseParser::JsonParser.parse(response, options) + + # Convert to YAML format + require 'yaml' + json_data.to_h.to_yaml + end +end + +RubyLLM::ResponseParser.register(:json_to_yaml, JsonToYamlParser) +``` + +### Dynamic Parser Selection + +You can dynamically select parsers based on response content: + +```ruby +module AutoParser + def self.parse(response, options) + content = response.content.to_s + + if content.start_with?('{') && content.end_with?('}') + RubyLLM::ResponseParser::JsonParser.parse(response, options) + elsif content.include?('') + RubyLLM::ResponseParser::XmlParser.parse(response, {tag: 'result'}) + else + content + end + end +end + +RubyLLM::ResponseParser.register(:auto, AutoParser) + +# Will automatically use the right parser based on content +result = chat.with_parser(:auto) + .ask("Give me either JSON or XML, your choice") + .content +``` + +### Stateful Parsers + +You can create stateful parsers by using class variables: + +```ruby +module AccumulatingParser + @@accumulated = [] + + def self.parse(response, options) + @@accumulated << response.content + @@accumulated + end + + def self.reset + @@accumulated = [] + end +end + +RubyLLM::ResponseParser.register(:accumulate, AccumulatingParser) + +# Will collect responses across multiple requests +chat.with_parser(:accumulate) + .ask("Give me part 1") + .content # => ["part 1 content"] + +chat.with_parser(:accumulate) + .ask("Give me part 2") + .content # => ["part 1 content", "part 2 content"] + +# Reset when done +AccumulatingParser.reset +``` + +## Error Handling + +Custom parsers should handle errors gracefully. When a parsing error occurs, your parser can: + +1. Return the original response +2. Return a default value +3. Raise a custom error + +```ruby +module SafeJsonParser + def self.parse(response, options) + return response unless response.content.is_a?(String) + + begin + JSON.parse(response.content, object_class: OpenStruct) + rescue JSON::ParserError => e + if options[:fallback] == :original + # Option 1: Return original content + response.content + elsif options[:fallback] + # Option 2: Return default value + options[:fallback] + else + # Option 3: Raise error with context + raise RubyLLM::ResponseParser::ParsingError, + "Failed to parse JSON: #{e.message} in: #{response.content[0..100]}" + end + end + end +end + +RubyLLM::ResponseParser.register(:safe_json, SafeJsonParser) +``` + +## Compatibility with Response Format + +Custom parsers work alongside the structured output functionality. You can use both together: + +```ruby +class Person + attr_accessor :name, :age + + def self.json_schema + { + type: "object", + properties: { + name: { type: "string" }, + age: { type: "integer" } + } + } + end +end + +# First use response_format to get structured JSON +result = chat.with_response_format(Person) + .ask("Create a profile for John, age 30") + +# Then use a custom parser on subsequent messages +details = chat.with_parser(:regex, pattern: 'Details: (.+)') + .ask("Give me more details about this person") + .content +``` + +The custom parser system gives you ultimate flexibility in handling model outputs, letting you extract and transform data exactly how you need it. diff --git a/docs/guides/structured-output.md b/docs/guides/structured-output.md index 7ae3978c5..da9c16e28 100644 --- a/docs/guides/structured-output.md +++ b/docs/guides/structured-output.md @@ -4,19 +4,19 @@ RubyLLM makes it easy to extract structured data from LLM responses using the `w ## Usage -Define your structure as a Plain Old Ruby Object (PORO) that responds to `.json_schema` or directly pass a JSON schema hash or string: +Define your structure as a Plain Old Ruby Object (PORO) that responds to `.json_schema` or directly pass a JSON schema hash or string or use tools like https://github.com/sergiobayona/easy_talk/ or https://github.com/kieranklaassen/structify ```ruby class Delivery attr_accessor :timestamp, :dimensions, :address - + def self.json_schema { type: "object", properties: { timestamp: { type: "string", format: "date-time" }, - dimensions: { - type: "array", + dimensions: { + type: "array", items: { type: "number" }, description: "Dimensions in inches [length, width, height]" }, @@ -32,7 +32,7 @@ response = chat.with_response_format(Delivery) .ask("Extract delivery info from: Next day delivery to 123 Main St...") puts response.timestamp # => 2025-03-20 14:30:00 -puts response.dimensions # => [12, 8, 4] +puts response.dimensions # => [12, 8, 4] puts response.address # => "123 Main St, Springfield" # Or use a JSON schema hash @@ -54,8 +54,13 @@ puts response.age # => 30 ## Compatibility Structured output is supported by: + - OpenAI models with JSON mode - Anthropic models with JSON output - Gemini models with structured output -The implementation adapts to each provider's specific capabilities while providing a consistent interface. \ No newline at end of file +The implementation adapts to each provider's specific capabilities while providing a consistent interface. + +--- + +For more advanced parsing techniques and additional parser examples, check out the [Custom Parsers Guide](/guides/custom-parsers). diff --git a/lib/ruby_llm/chat.rb b/lib/ruby_llm/chat.rb index 9c6d29419..9eaf23990 100644 --- a/lib/ruby_llm/chat.rb +++ b/lib/ruby_llm/chat.rb @@ -11,7 +11,7 @@ module RubyLLM class Chat include Enumerable - attr_reader :model, :messages, :tools + attr_reader :model, :messages, :tools, :response_format, :parser def initialize(model: nil, provider: nil) model_id = model || RubyLLM.config.default_model @@ -23,6 +23,8 @@ def initialize(model: nil, provider: nil) new_message: nil, end_message: nil } + @response_format = nil + @parser = :text # Default to text parser end def ask(message = nil, with: {}, &block) @@ -58,6 +60,43 @@ def with_temperature(temperature) self end + # Sets a response format for the model to use when generating responses. + # This enforces structured output according to the provided schema. + # + # @param format [Class, Hash, String] Format can be: + # - A class that responds to .json_schema (Plain Old Ruby Object) + # - A hash representing a JSON schema + # - A string containing a valid JSON schema + # @return [Chat] Returns self for method chaining + # @example Using a class + # chat.with_response_format(Delivery) + # @example Using a hash + # chat.with_response_format({type: "object", properties: {name: {type: "string"}}}) + def with_response_format(format) + @response_format = format + @parser = :json + self + end + + # Sets a custom parser to use for processing model responses + # + # @param parser_type [Symbol] The registered parser type to use + # @param options [Hash] Additional options to pass to the parser + # @return [Chat] Returns self for method chaining + # @example Using XML parser to extract specific tag + # chat.with_parser(:xml, tag: 'result') + # @example Using default JSON parser + # chat.with_parser(:json) + def with_parser(parser_type, options = nil) + if !ResponseParser.parsers.key?(parser_type.to_sym) + raise Error, "Unknown parser type: #{parser_type}. Available parsers: #{ResponseParser.parsers.keys.join(', ')}" + end + + @parser = parser_type.to_sym + @parser_options = options + self + end + def on_new_message(&block) @on[:new_message] = block self @@ -74,14 +113,30 @@ def each(&) def complete(&) @on[:new_message]&.call - response = @provider.complete(messages, tools: @tools, temperature: @temperature, model: @model.id, &) + + # Get raw response from provider + response = @provider.complete( + messages, + tools: @tools, + temperature: @temperature, + model: @model.id, + response_format: @response_format, + & + ) + @on[:end_message]&.call(response) - add_message response - if response.tool_call? - handle_tool_calls(response, &) + # Apply appropriate parser - use response_format if present, or specified parser + format_or_parser = @response_format || (@parser_options || @parser) + parsed_response = ResponseParser.parse(response, format_or_parser) + + # Add the parsed response to messages + add_message parsed_response + + if parsed_response.tool_call? + handle_tool_calls(parsed_response, &) else - response + parsed_response end end diff --git a/lib/ruby_llm/provider.rb b/lib/ruby_llm/provider.rb index c4ce495c4..760c1eb0e 100644 --- a/lib/ruby_llm/provider.rb +++ b/lib/ruby_llm/provider.rb @@ -8,7 +8,7 @@ module Provider # Common functionality for all LLM providers. Implements the core provider # interface so specific providers only need to implement a few key methods. module Methods # rubocop:disable Metrics/ModuleLength - def complete(messages, tools:, temperature:, model:, &block) # rubocop:disable Metrics/MethodLength + def complete(messages, tools:, temperature:, model:, response_format: nil, &block) # rubocop:disable Metrics/MethodLength normalized_temperature = if capabilities.respond_to?(:normalize_temperature) capabilities.normalize_temperature(temperature, model) else @@ -19,6 +19,7 @@ def complete(messages, tools:, temperature:, model:, &block) # rubocop:disable M tools: tools, temperature: normalized_temperature, model: model, + response_format: response_format, stream: block_given?) if block_given? diff --git a/lib/ruby_llm/providers/anthropic.rb b/lib/ruby_llm/providers/anthropic.rb index 04dcf0a6f..2cd2f5db3 100644 --- a/lib/ruby_llm/providers/anthropic.rb +++ b/lib/ruby_llm/providers/anthropic.rb @@ -2,16 +2,17 @@ module RubyLLM module Providers - # Anthropic Claude API integration. Handles the complexities of - # Claude's unique message format and tool calling conventions. + # Anthropic API integration. Handles chat completion with Claude models, + # including Claude 3 Opus, Sonnet, and Haiku. module Anthropic extend Provider extend Anthropic::Chat - extend Anthropic::Embeddings - extend Anthropic::Media extend Anthropic::Models extend Anthropic::Streaming extend Anthropic::Tools + extend Anthropic::Media + extend Anthropic::Embeddings + extend Anthropic::Schema module_function diff --git a/lib/ruby_llm/providers/anthropic/chat.rb b/lib/ruby_llm/providers/anthropic/chat.rb index e80c81a49..1caafd14c 100644 --- a/lib/ruby_llm/providers/anthropic/chat.rb +++ b/lib/ruby_llm/providers/anthropic/chat.rb @@ -11,15 +11,45 @@ def completion_url '/v1/messages' end - def render_payload(messages, tools:, temperature:, model:, stream: false) + def render_payload(messages, tools:, temperature:, model:, response_format: nil, stream: false) + formatted_messages = messages.map { |msg| format_message(msg) } + + # Handle schema instructions for structured output + if response_format + # Get format and schema instructions + format_config = Schema.convert(response_format) + + # Add system instructions to guide output format + if format_config[:system_instruction] + # Find existing system message or create new one + system_msg = formatted_messages.find { |msg| msg[:role] == 'system' } + + if system_msg + # Append schema instructions to existing system message + system_msg[:content] = "#{system_msg[:content]}\n\n#{format_config[:system_instruction]}" + else + # Add new system message with schema instructions + formatted_messages.unshift({ + role: 'system', + content: format_config[:system_instruction] + }) + end + end + end + { model: model, - messages: messages.map { |msg| format_message(msg) }, + messages: formatted_messages, temperature: temperature, stream: stream, max_tokens: RubyLLM.models.find(model).max_tokens }.tap do |payload| payload[:tools] = tools.values.map { |t| function_for(t) } if tools.any? + + # Add format parameter for structured output if response_format is specified + if response_format + payload[:format] = format_config[:format] + end end end @@ -69,6 +99,7 @@ def format_basic_message(msg) def convert_role(role) case role when :tool, :user then 'user' + when :system then 'system' else 'assistant' end end diff --git a/lib/ruby_llm/providers/anthropic/schema.rb b/lib/ruby_llm/providers/anthropic/schema.rb new file mode 100644 index 000000000..f6d7ce103 --- /dev/null +++ b/lib/ruby_llm/providers/anthropic/schema.rb @@ -0,0 +1,51 @@ +# frozen_string_literal: true + +module RubyLLM + module Providers + module Anthropic + # Handles conversion of schema formats to Anthropic-specific formats + module Schema + module_function + + # Convert a schema to Anthropic's format parameter + # + # @param format [Class, Hash, String] The format specification + # @return [Hash] An Anthropic-compatible schema configuration with instructions + def convert(format) + # Set the basic format parameter + result = { + format: "json_object" + } + + # Extract the schema to add as instructions + schema = SchemaConverter.extract_schema(format) + + # Create instructions with the schema + result[:system_instruction] = generate_schema_instructions(schema) + + result + end + + # Generate instructions for Claude to follow the schema + # + # @param schema [Hash] The JSON schema + # @return [String] Instructions for Claude to follow + def generate_schema_instructions(schema) + schema_json = JSON.pretty_generate(schema) + + <<~INSTRUCTIONS + You must respond with a valid JSON object that strictly adheres to the following JSON schema: + + ```json + #{schema_json} + ``` + + Do not include any explanations, preambles, or additional text in your response. + Your entire response must be a single valid JSON object that follows the schema exactly. + Make sure all required fields are included, and don't add any fields not specified in the schema. + INSTRUCTIONS + end + end + end + end +end \ No newline at end of file diff --git a/lib/ruby_llm/providers/gemini.rb b/lib/ruby_llm/providers/gemini.rb index 8ba65571d..d2fda2052 100644 --- a/lib/ruby_llm/providers/gemini.rb +++ b/lib/ruby_llm/providers/gemini.rb @@ -2,16 +2,18 @@ module RubyLLM module Providers - # Native Gemini API implementation + # Google Gemini API integration. Handles chat completion with + # the Gemini family of models. module Gemini extend Provider extend Gemini::Chat - extend Gemini::Embeddings - extend Gemini::Images extend Gemini::Models extend Gemini::Streaming extend Gemini::Tools + extend Gemini::Embeddings + extend Gemini::Images extend Gemini::Media + extend Gemini::Schema module_function diff --git a/lib/ruby_llm/providers/gemini/chat.rb b/lib/ruby_llm/providers/gemini/chat.rb index bc8ec365e..43cd7aa60 100644 --- a/lib/ruby_llm/providers/gemini/chat.rb +++ b/lib/ruby_llm/providers/gemini/chat.rb @@ -6,7 +6,7 @@ module Gemini # Chat methods for the Gemini API implementation module Chat # rubocop:disable Metrics/ModuleLength # Must be public for Provider to use - def complete(messages, tools:, temperature:, model:, &block) # rubocop:disable Metrics/MethodLength + def complete(messages, tools:, temperature:, model:, response_format: nil, &block) # rubocop:disable Metrics/MethodLength payload = { contents: format_messages(messages), generationConfig: { @@ -16,6 +16,15 @@ def complete(messages, tools:, temperature:, model:, &block) # rubocop:disable M payload[:tools] = format_tools(tools) if tools.any? + # Add structured output configuration if response_format is specified + if response_format + payload[:generationConfig][:response_mime_type] = "application/json" + + # Add schema if provided + schema = Schema.convert(response_format) + payload[:generationConfig][:response_schema] = schema if schema + end + # Store tools for use in generate_completion @tools = tools diff --git a/lib/ruby_llm/providers/gemini/schema.rb b/lib/ruby_llm/providers/gemini/schema.rb new file mode 100644 index 000000000..1054c5f4e --- /dev/null +++ b/lib/ruby_llm/providers/gemini/schema.rb @@ -0,0 +1,78 @@ +# frozen_string_literal: true + +module RubyLLM + module Providers + module Gemini + # Handles conversion of schema formats to Gemini-specific formats + module Schema + module_function + + # Convert a schema to Gemini's response_schema format + # + # @param format [Class, Hash, String] The format specification + # @return [Hash] A Gemini-compatible schema + def convert(format) + schema = SchemaConverter.extract_schema(format) + + # Convert the schema to Gemini's format which uses different case conventions + # and type naming + # https://ai.google.dev/gemini-api/docs/structured-output?lang=rest + convert_schema_format(schema) + end + + # Convert a standard JSON schema to Gemini's specific format + # + # @param schema [Hash] A standard JSON schema + # @return [Hash] A Gemini-compatible schema + def convert_schema_format(schema) + result = {} + + if schema[:type] + result[:type] = convert_type(schema[:type]) + end + + if schema[:properties] + result[:properties] = {} + schema[:properties].each do |key, prop| + result[:properties][key] = convert_schema_format(prop) + end + end + + if schema[:items] + result[:items] = convert_schema_format(schema[:items]) + end + + if schema[:required] + result[:required] = schema[:required] + end + + if schema[:enum] + result[:enum] = schema[:enum] + end + + if schema[:format] + result[:format] = schema[:format] + end + + result + end + + # Convert JSON Schema type names to Gemini type names + # + # @param type [String] JSON Schema type name + # @return [String] Gemini type name + def convert_type(type) + case type.to_s.downcase + when 'object' then 'OBJECT' + when 'array' then 'ARRAY' + when 'string' then 'STRING' + when 'number' then 'NUMBER' + when 'integer' then 'INTEGER' + when 'boolean' then 'BOOLEAN' + else type.to_s.upcase + end + end + end + end + end +end \ No newline at end of file diff --git a/lib/ruby_llm/providers/openai.rb b/lib/ruby_llm/providers/openai.rb index f78062ea1..d1bcff658 100644 --- a/lib/ruby_llm/providers/openai.rb +++ b/lib/ruby_llm/providers/openai.rb @@ -14,6 +14,7 @@ module OpenAI extend OpenAI::Tools extend OpenAI::Images extend OpenAI::Media + extend OpenAI::Schema def self.extended(base) base.extend(Provider) @@ -24,6 +25,7 @@ def self.extended(base) base.extend(OpenAI::Tools) base.extend(OpenAI::Images) base.extend(OpenAI::Media) + base.extend(OpenAI::Schema) end module_function diff --git a/lib/ruby_llm/providers/openai/chat.rb b/lib/ruby_llm/providers/openai/chat.rb index 87462980b..e7a37a281 100644 --- a/lib/ruby_llm/providers/openai/chat.rb +++ b/lib/ruby_llm/providers/openai/chat.rb @@ -11,7 +11,7 @@ def completion_url 'chat/completions' end - def render_payload(messages, tools:, temperature:, model:, stream: false) # rubocop:disable Metrics/MethodLength + def render_payload(messages, tools:, temperature:, model:, response_format: nil, stream: false) # rubocop:disable Metrics/MethodLength { model: model, messages: format_messages(messages), @@ -22,6 +22,12 @@ def render_payload(messages, tools:, temperature:, model:, stream: false) # rubo payload[:tools] = tools.map { |_, tool| tool_for(tool) } payload[:tool_choice] = 'auto' end + + # Add response_format parameter if specified + if response_format + payload[:response_format] = Schema.convert(response_format) + end + payload[:stream_options] = { include_usage: true } if stream end end diff --git a/lib/ruby_llm/providers/openai/schema.rb b/lib/ruby_llm/providers/openai/schema.rb new file mode 100644 index 000000000..66607e99e --- /dev/null +++ b/lib/ruby_llm/providers/openai/schema.rb @@ -0,0 +1,28 @@ +# frozen_string_literal: true + +module RubyLLM + module Providers + module OpenAI + # Handles conversion of schema formats to OpenAI-specific formats + module Schema + module_function + + # Convert a schema to OpenAI's response_format format + # + # @param format [Class, Hash, String] The format specification + # @return [Hash] An OpenAI-compatible schema + def convert(format) + schema = SchemaConverter.extract_schema(format) + + # Return a structure compatible with OpenAI's response_format parameter + # https://platform.openai.com/docs/guides/structured-outputs?api-mode=responses + { + type: "json_schema", + schema: schema, + strict: true + } + end + end + end + end +end \ No newline at end of file diff --git a/lib/ruby_llm/response_parser.rb b/lib/ruby_llm/response_parser.rb new file mode 100644 index 000000000..aa26d9cba --- /dev/null +++ b/lib/ruby_llm/response_parser.rb @@ -0,0 +1,196 @@ +# frozen_string_literal: true + +require 'ostruct' +require 'json' + +module RubyLLM + # Responsible for parsing LLM responses into structured objects. + # Supports various formats (JSON, XML, custom) and allows users to + # register their own custom parsers. + module ResponseParser + class Error < RubyLLM::Error; end + class InvalidSchemaError < Error; end + class ParsingError < Error; end + + # Registry for custom format parsers + @parsers = {} + + class << self + # Parse an LLM response according to the specified format + # + # @param response [Message] The LLM response message to parse + # @param format [Class, Hash, String, Symbol, nil] The format specification + # @return [Object] The parsed response as an appropriate Ruby object + def parse(response, format = nil) + # Use text parser by default if no format specified + format ||= :text + + # Determine parser based on format type + parser = parser_for(format) + + # Return original response if no parser is found (failsafe) + return response unless parser + + begin + parsed_result = parser.parse(response, format) + + # If the parser returned a non-Message object, wrap it in a Message + if !parsed_result.is_a?(Message) && parsed_result.is_a?(Object) + # Preserve original message attributes but replace content with parsed result + Message.new( + role: response.role, + tool_calls: response.tool_calls, + tool_call_id: response.tool_call_id, + input_tokens: response.input_tokens, + output_tokens: response.output_tokens, + model_id: response.model_id, + content: parsed_result + ) + else + parsed_result + end + rescue => e + # In case of parsing error, log the error and return the original response + RubyLLM.logger.error("Error parsing response: #{e.message}") + response + end + end + + # Register a custom parser for a specific format type + # + # @param format_type [Symbol] The format type identifier + # @param parser [Object] The parser to use for this format type + def register(format_type, parser) + @parsers[format_type.to_sym] = parser + end + + # Get all registered parsers + # + # @return [Hash] The registered parsers + def parsers + @parsers + end + + # Retrieve the appropriate parser for the given format + # + # @param format [Object] The format specification + # @return [Object] The parser to use + def parser_for(format) + if format.is_a?(Symbol) && @parsers.key?(format) + @parsers[format] + elsif format.is_a?(Class) && format.respond_to?(:json_schema) + @parsers[:json] + elsif format.is_a?(Hash) && (format[:type] == 'object' || format[:properties]) + @parsers[:json] + elsif format.is_a?(String) && format.strip.start_with?('{') + @parsers[:json] + else + # Default to text parser if no match + @parsers[:text] + end + end + end + + # Text parser that does no processing (default) + module TextParser + # Returns the response unchanged + # + # @param response [Message] The LLM response message + # @param _format [Object] Ignored for text parser + # @return [Message] The original message unchanged + def self.parse(response, _format = nil) + response + end + end + + # Parser for JSON-formatted responses + module JsonParser + # Parse a response into a structured Ruby object based on JSON schema + # + # @param response [Message] The LLM response message + # @param format [Class, Hash, String] The format specification + # @return [Object] A Ruby object matching the schema + def self.parse(response, format) + return response unless response.content.is_a?(String) + + # Extract JSON data from response + json_data = JSON.parse(response.content) + + if format.is_a?(Class) && format.respond_to?(:json_schema) + # Create an instance of the class and populate its properties + instantiate_from_schema(format, json_data) + else + # Return an OpenStruct for generic JSON + deep_to_ostruct(json_data) + end + rescue JSON::ParserError => e + raise ParsingError, "Failed to parse JSON response: #{e.message}" + end + + # Create an instance of a class based on JSON data + # + # @param klass [Class] The class to instantiate + # @param data [Hash] The data to populate the instance with + # @return [Object] An instance of klass populated with data + def self.instantiate_from_schema(klass, data) + instance = klass.new + + data.each do |key, value| + setter = "#{key}=" + if instance.respond_to?(setter) + instance.send(setter, value) + end + end + + instance + end + + # Convert a Hash to an OpenStruct, recursively handling nested structures + # + # @param obj [Hash, Array, Object] The object to convert + # @return [OpenStruct, Array, Object] The converted object + def self.deep_to_ostruct(obj) + case obj + when Hash + OpenStruct.new( + obj.transform_values { |v| deep_to_ostruct(v) } + ) + when Array + obj.map { |item| deep_to_ostruct(item) } + else + obj + end + end + end + + # Example XML Parser (simple implementation) + module XmlParser + # Extract content from XML responses + # + # @param response [Message] The LLM response message + # @param format [Hash, Symbol] Format options for XML parsing + # @return [Object] Extracted content based on format options + def self.parse(response, format) + return response unless response.content.is_a?(String) + + content = response.content + + # If format includes a tag to extract + if format.is_a?(Hash) && format[:tag] + tag = format[:tag] + # Simple regex-based extraction - for production use a proper XML parser + match = content.match(/<#{tag}>(.*?)<\/#{tag}>/m) + return match[1] if match + end + + # Return original content if no extraction happened + content + end + end + + # Register default parsers + register(:json, JsonParser) + register(:text, TextParser) + register(:xml, XmlParser) + end +end \ No newline at end of file diff --git a/lib/ruby_llm/schema_converter.rb b/lib/ruby_llm/schema_converter.rb new file mode 100644 index 000000000..11de5ca53 --- /dev/null +++ b/lib/ruby_llm/schema_converter.rb @@ -0,0 +1,32 @@ +# frozen_string_literal: true + +module RubyLLM + # Provides common schema extraction and conversion utilities. + # Provider-specific schema conversions are handled by the respective provider modules. + class SchemaConverter + class Error < RubyLLM::Error; end + class InvalidSchemaError < Error; end + + class << self + # Extract a JSON schema from various formats + # + # @param format [Class, Hash, String] The format specification + # @return [Hash] The extracted JSON schema + def extract_schema(format) + if format.is_a?(Class) && format.respond_to?(:json_schema) + format.json_schema + elsif format.is_a?(Hash) + format + elsif format.is_a?(String) + begin + JSON.parse(format, symbolize_names: true) + rescue JSON::ParserError + raise InvalidSchemaError, "Invalid JSON schema: #{format}" + end + else + raise InvalidSchemaError, "Unsupported schema format: #{format.class}" + end + end + end + end +end \ No newline at end of file