Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
dff1e85
init implemnation
codelion May 12, 2025
a39a44f
Update spl_plugin.py
codelion May 12, 2025
bfb702a
Update spl_plugin.py
codelion May 12, 2025
02de092
fix stragery selection
codelion May 13, 2025
63f80fb
fix bug
codelion May 13, 2025
e7db603
Update spl_plugin.py
codelion May 13, 2025
07fd890
updates
codelion May 13, 2025
93091ac
refactoring
codelion May 13, 2025
a572f39
Update README.md
codelion May 13, 2025
8e6fee0
remove data folder
codelion May 13, 2025
84e1dd1
Update .gitignore
codelion May 13, 2025
b05577b
Update .gitignore
codelion May 13, 2025
02bdae5
Update eval_optillmbench.py
codelion May 14, 2025
2672735
handle mmlu_pro answers better
codelion May 14, 2025
77285d3
Update eval_optillmbench.py
codelion May 15, 2025
e372bac
Update .gitignore
codelion May 15, 2025
bf77f59
make inference the default mode
codelion May 15, 2025
d444e96
Update eval_optillmbench.py
codelion May 15, 2025
910814e
add strategy selection prompt
codelion May 15, 2025
7840643
Update prompts.py
codelion May 15, 2025
24152b6
fix prompt
codelion May 15, 2025
9431c16
Update prompts.py
codelion May 15, 2025
8d84788
add data
codelion May 15, 2025
acb4cb1
Update main.py
codelion May 16, 2025
7f4a174
Update eval_aime_benchmark.py
codelion May 16, 2025
de03c2c
Update main.py
codelion May 16, 2025
7e2ebf9
fix edge cases
codelion May 16, 2025
6d6f850
fix edge cases
codelion May 16, 2025
daddc4f
Merge branch 'main' into feat-spl-plugin
codelion May 17, 2025
a21170d
Update README.md
codelion May 17, 2025
988f9ff
Update README.md
codelion May 17, 2025
a7eb99e
Update README.md
codelion May 17, 2025
c309f51
add images
codelion May 17, 2025
4b69ae9
Update README.md
codelion May 17, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -365,6 +365,7 @@ Check this log file for connection issues, tool execution errors, and other diag

| Plugin | Slug | Description |
| ----------------------- | ------------------ | ---------------------------------------------------------------------------------------------- |
| System Prompt Learning | `spl` | Implements what [Andrej Karpathy called the third paradigm](https://x.com/karpathy/status/1921368644069765486) for LLM learning, this enables the model to acquire program solving knowledge and strategies |
| Long-Context Cerebras Planning and Optimization | `longcepo` | Combines planning and divide-and-conquer processing of long documents to enable infinite context |
| MCP Client | `mcp` | Implements the model context protocol (MCP) client, enabling you to use any LLM with any MCP Server |
| Router | `router` | Uses the [optillm-modernbert-large](https://huggingface.co/codelion/optillm-modernbert-large) model to route requests to different approaches based on the user prompt |
Expand Down
14 changes: 10 additions & 4 deletions optillm.py
Original file line number Diff line number Diff line change
Expand Up @@ -594,12 +594,18 @@ def proxy():
# Extract response_format if present
response_format = data.get("response_format", None)

# Create request config with all parameters
request_config = {
# Explicit keys that we are already handling
explicit_keys = {'stream', 'messages', 'model', 'n', 'response_format'}

# Copy the rest into request_config
request_config = {k: v for k, v in data.items() if k not in explicit_keys}

# Add the explicitly handled ones
request_config.update({
"stream": stream,
"n": n,
"response_format": response_format # Add response_format to config
}
"response_format": response_format, # Add response_format to config
})

optillm_approach = data.get('optillm_approach', server_config['approach'])
logger.debug(data)
Expand Down
37 changes: 37 additions & 0 deletions optillm/plugins/spl.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
"""
System Prompt Learning (SPL) Plugin for OptiLLM

This plugin implements Andrej Karpathy's proposed system prompt learning paradigm,
allowing LLMs to improve their problem-solving capabilities by:
1. Identifying problem types
2. Generating and refining strategies for solving different problems
3. Building a knowledge base of problem-solving techniques
4. Applying these techniques to new instances of similar problems
5. Tracking the success of different strategies to prioritize effective ones

The plugin maintains a database of strategies that evolves over time, making the
LLM incrementally better at solving problems by learning from its experiences.
"""

from typing import Tuple
from optillm.plugins.spl.main import run_spl

# Plugin identifier
SLUG = "spl"

def run(system_prompt: str, initial_query: str, client, model: str, request_config: dict = None) -> Tuple[str, int]:
"""
Plugin entry point for System Prompt Learning.

Args:
system_prompt: The system prompt
initial_query: The user's query
client: The LLM client
model: The model identifier
request_config: Optional request configuration
Can include {'spl_learning': True} to enable learning mode

Returns:
Tuple[str, int]: The LLM response and token count
"""
return run_spl(system_prompt, initial_query, client, model, request_config)
180 changes: 180 additions & 0 deletions optillm/plugins/spl/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,180 @@
# System Prompt Learning (SPL) Plugin for OptiLLM

This plugin implements Andrej Karpathy's [proposed](https://x.com/karpathy/status/1921368644069765486) "third paradigm" for LLM learning, enabling large language models to learn and improve their problem-solving strategies over time through experience and reflection.

## Introduction: The Evolution of LLM Learning

Large Language Models (LLMs) have traditionally learned in two primary ways:
1. **Pretraining**: Learning facts, patterns, and language from massive text corpora
2. **Finetuning**: Learning behaviors through supervised or reinforcement learning

System Prompt Learning introduces a third paradigm:
3. **Strategy Learning**: The model learns explicit problem-solving strategies through experience, maintains them in a growing knowledge base, and applies them selectively based on problem types

This approach addresses a fundamental limitation of current LLMs—their inability to learn cumulatively from experience. While LLMs can solve individual problems impressively, they typically approach each new problem from scratch rather than building on past successes.

## The SPL Paradigm

System Prompt Learning represents a significant shift in how LLMs approach problem-solving:

- **Experience-Driven Learning**: Rather than relying solely on pretraining or supervised finetuning, SPL enables models to learn from their own problem-solving experiences
- **Strategy Formalization**: The system explicitly generates, evaluates, and refines problem-solving strategies
- **Performance Tracking**: SPL tracks which strategies work well for different problem types, creating a dynamic feedback loop
- **Selective Application**: When faced with a new problem, the system selects the most relevant strategies based on similarity and past performance

This approach mirrors how human experts develop expertise—by accumulating strategies through experience and applying them selectively to new situations.

## Experimental Results

We conducted extensive experiments using the SPL plugin with gemini-2.0-flash-lite on various benchmarks. The learning phase used the OptILLMBench training split (400 instances), while evaluation was performed on the test split (100 instances) and additional popular mathematical benchmarks.

The results demonstrate consistent improvements across all benchmarks:

| Benchmark | Baseline | With SPL | Improvement |
|-----------|----------|----------|-------------|
| OptILLMBench | 61% | 65% | +4% |
| MATH-500 | 85% | 85.6% | +0.6% |
| Arena Auto Hard | 29% | 37.6% | +8.6% |
| AIME24 | 23.33% | 30% | +6.67% |

These results are particularly notable for the challenging Arena Auto Hard and AIME24 benchmarks, where traditional approaches often struggle. The improvements suggest that SPL is especially effective for complex problem-solving tasks that benefit from strategic approaches.

![Performance Improvements with System Prompt Learning](performance-comparison.svg)

*Figure 1: Performance comparison between baseline gemini-2.0-flash-lite and the same model with SPL across multiple mathematical benchmarks.*

## Usage

### Basic Usage

Use the plugin by prefixing your model name with `spl-`:

```
spl-gpt-4o
```

### Combining with Other Plugins

SPL can be combined with other plugins using the `&` operator:

```
spl&memory-gpt-4o
```

### Learning Mode

By default, the plugin runs in inference-only mode, which uses existing strategies without creating or modifying them. To enable learning mode, which allows the plugin to create and refine strategies based on usage, add the `spl_learning` parameter to the request config:

```python
client.chat.completions.create(
model="spl-gpt-4o",
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": query}
],
extra_body= {"spl_learning": True},
)
```

## How It Works

1. **Problem Classification**: The plugin analyzes each query to determine its problem type
2. **Strategy Selection**: It selects relevant strategies from its database based on the problem type and content
3. **System Prompt Augmentation**: Selected strategies (up to MAX_STRATEGIES_FOR_INFERENCE) are added to the system prompt

When learning mode is enabled, the plugin also performs:

4. **Effectiveness Evaluation**: After generating a response, the system evaluates how well each strategy worked
5. **Strategy Creation & Refinement**: The system creates new strategies for unseen problem types and periodically refines existing strategies based on usage

The plugin maintains two separate limits:
- **Storage Limit** (MAX_STRATEGIES_PER_TYPE): Controls how many strategies can be stored in the database per problem type
- **Inference Limit** (MAX_STRATEGIES_FOR_INFERENCE): Controls how many strategies are used during inference for system prompt augmentation

![SPL Learning Workflow](learning-workflow.svg)

*Figure 2: The SPL learning and inference workflow showing how strategies are learned, refined, and applied.*

## Learning Metrics

After training on the OptILLMBench dataset, the system developed a rich knowledge base of strategies:

- **Total queries processed**: 500
- **Strategies created**: 129
- **Strategies refined**: 97
- **Successful resolutions**: 346
- **Strategies merged**: 28

These metrics indicate a healthy learning process with a balance between creation, refinement, and merging of similar strategies.

## Data Storage

Strategies are stored in JSON format in the `spl_data` directory:
- `strategies.json`: Contains all learned strategies
- `metrics.json`: Contains performance metrics and usage statistics

## Configuration

The SPL plugin maintains these core files:
- **Strategy Database**: `/optillm/plugins/spl/data/strategies.json`
- **Metrics**: `/optillm/plugins/spl/data/metrics.json`

You can:
1. Backup these files to preserve learned strategies
2. Edit the strategies.json file to manually add or modify strategies
3. Reset the learning by deleting these files (they will be recreated)

## Example Strategy

Below is an example of a strategy learned by the system for word problems:

```json
{
"strategy_id": "strategy_3",
"problem_type": "word_problem",
"strategy_text": "**Refined Strategy for Solving Word Problems:**\n\n1. **Understand:**\n * Read the problem carefully (multiple times).\n * Identify the question (what are you trying to find?).\n * List all given information (facts, numbers, units).\n * Clarify ambiguous terms/units.\n\n2. **Organize Information & Identify Unknowns:**\n * Choose an organization method: (e.g., table, diagram, list, drawing).\n * Clearly identify the unknowns (what you need to solve for).\n\n3. **Plan and Translate:**\n * Define *all* variables with units (e.g., `p = number of pennies`, `c = number of compartments`).\n * Identify relationships between knowns and unknowns.\n * Convert units if necessary.\n * Write equations or expressions, including units, that relate the knowns and unknowns.\n * Ensure units are consistent throughout the equations.\n * Outline the solution steps.\n\n4. **Solve:**\n * Show work step-by-step.\n * Track units throughout calculations.\n * Calculate accurately.\n * Solve for the unknowns.\n\n5. **Evaluate and Verify:**\n * Check if the answer is reasonable.\n * Verify the answer.\n\n6. **Summarize:**\n * State the answer with units.",
"success_count": 85,
"total_attempts": 192,
"confidence": 0.425
}
```

This strategy was developed through multiple refinement cycles and has a success rate of 44.3% (85/192). The system continuously updates these metrics as the strategy is applied to new problems.

## Motivations and Broader Impact

### The System Prompt Gap

Most LLM providers like Anthropic (Claude) and OpenAI (GPT) employ elaborate system prompts that encode sophisticated problem-solving strategies. However, the majority of users interact with these models using very basic or empty system prompts, missing out on the benefits of strategic guidance.

SPL bridges this gap by automatically learning and applying effective strategies, democratizing access to the benefits of well-crafted system prompts without requiring expertise in prompt engineering.

### Learning from Experience

Current LLMs are often described as "one-shot learners"—they can solve individual problems but don't accumulate knowledge from these experiences. SPL represents a step toward models that improve through use, similar to how humans develop expertise through practice and reflection.

### Human-Readable Learning

Unlike black-box learning approaches, SPL produces human-readable strategies that can be inspected, understood, and even manually edited. This transparency allows for:
- Understanding how the model approaches different problems
- Identifying potential biases or flaws in reasoning
- Transferring strategies between models or domains

## Benefits

1. **Cumulative Learning**: The LLM improves on specific problem types over time
2. **Explicit Knowledge**: Strategies are human-readable and provide insight into the LLM's reasoning
3. **Efficiency**: Reuses successful approaches rather than solving each problem from scratch
4. **Adaptability**: Different strategies for different problem types
5. **Transparency**: Learning process and outcomes can be inspected and understood

## Conclusion and Future Work

System Prompt Learning represents a promising new direction for enabling LLMs to learn from experience in a transparent and interpretable way. Our experiments demonstrate significant performance improvements across multiple benchmarks, particularly for complex problem-solving tasks.

Future work will focus on:
1. Expanding the range of problem types the system can recognize
2. Improving the strategy refinement process
3. Enabling cross-domain strategy transfer
4. Developing mechanisms for human feedback on strategies
5. Exploring hybrid approaches that combine SPL with other learning paradigms
3 changes: 3 additions & 0 deletions optillm/plugins/spl/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
"""
System Prompt Learning (SPL) plugin module initialization.
"""
53 changes: 53 additions & 0 deletions optillm/plugins/spl/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
"""
Configuration settings for the System Prompt Learning (SPL) plugin.
"""

import os
from typing import List

# Plugin identifier
SLUG = "spl"

# Base directory for storing strategy data
PLUGIN_DIR = os.path.dirname(os.path.abspath(__file__))
DATA_DIR = os.path.join(PLUGIN_DIR, 'data')
STRATEGY_DB_PATH = os.path.join(DATA_DIR, 'strategies.json')
STRATEGY_METRICS_PATH = os.path.join(DATA_DIR, 'metrics.json')

# Default max tokens for reasoning LLMs
DEFAULT_MAX_TOKENS = 4096

# How often to perform maintenance operations (merge, prune)
MAINTENANCE_INTERVAL = 40

# Strategy selection thresholds
STRATEGY_CREATION_THRESHOLD = 0.7 # Higher threshold to avoid creating similar strategies
STRATEGY_MERGING_THRESHOLD = 0.6 # Lower threshold to merge more similar strategies
MIN_SUCCESS_RATE_FOR_INFERENCE = 0.4 # Minimum success rate for a strategy to be used during inference

# Limits for strategy management
MAX_STRATEGIES_PER_TYPE = 10 # Maximum strategies to store in DB per problem type
MAX_STRATEGIES_FOR_INFERENCE = 3 # Maximum strategies to use during inference

# Define valid problem types (used for strict classification)
VALID_PROBLEM_TYPES: List[str] = [
"arithmetic_calculation",
"algebraic_equation",
"statistical_analysis",
"logical_reasoning",
"word_problem",
"coding_problem",
"algorithm_design",
"creative_writing",
"text_summarization",
"information_retrieval",
"planning_task",
"decision_making",
"knowledge_question",
"language_translation",
"sequence_completion",
"general_problem" # Fallback type
]

# Ensure data directory exists
os.makedirs(DATA_DIR, exist_ok=True)
10 changes: 10 additions & 0 deletions optillm/plugins/spl/data/metrics.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
{
"total_queries": 500,
"strategy_applications": 1297,
"strategies_created": 129,
"strategies_refined": 97,
"successful_resolutions": 346,
"last_strategy_id": 129,
"reasoning_examples_collected": 0,
"strategies_merged": 28
}
Loading