Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 65 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,15 @@ python run.py --engines redis-default-simple --datasets random-100
python run.py --engines redis-default-simple --datasets glove-25-angular
python run.py --engines "*-m-16-*" --datasets "glove-*"

# Using custom engine configurations from a JSON file
python run.py --engines-file custom_engines.json --datasets glove-25-angular

# Get information about available engines (with pattern matching)
python run.py --engines "*redis*" --describe engines --verbose

# Get information about engines from a custom file
python run.py --engines-file custom_engines.json --describe engines --verbose

# Docker usage (recommended)
docker run --rm -v $(pwd)/results:/app/results --network=host \
redis/vector-db-benchmark:latest \
Expand All @@ -237,6 +246,62 @@ python run.py --help
Command allows you to specify wildcards for engines and datasets.
Results of the benchmarks are stored in the `./results/` directory.

## Using Custom Engine Configurations

The benchmark tool supports two ways to specify which engine configurations to use:

### 1. Pattern Matching (Default)
Use the `--engines` flag with wildcard patterns to select configurations from the `experiments/configurations/` directory:

```bash
python run.py --engines "*redis*" --datasets glove-25-angular
python run.py --engines "qdrant-m-*" --datasets random-100
```

### 2. Custom Configuration File
Use the `--engines-file` flag to specify a JSON file containing custom engine configurations:

```bash
python run.py --engines-file my_engines.json --datasets glove-25-angular
```

The JSON file should contain an array of engine configuration objects. Each configuration must have a `name` field and follow the same structure as configurations in `experiments/configurations/`:

```json
[
{
"name": "my-custom-redis-config",
"engine": "redis",
"connection_params": {},
"collection_params": {
"algorithm": "hnsw",
"data_type": "FLOAT32",
"hnsw_config": {
"M": 16,
"DISTANCE_METRIC": "L2",
"EF_CONSTRUCTION": 200
}
},
"search_params": [
{
"parallel": 1,
"top": 10,
"search_params": {
"ef": 100,
"data_type": "FLOAT32"
}
}
],
"upload_params": {
"parallel": 16,
"data_type": "FLOAT32"
}
}
]
```

**Note:** You cannot use both `--engines` and `--engines-file` at the same time.

## How to update benchmark parameters?

Each engine has a configuration file, which is used to define the parameters for the benchmark.
Expand Down
126 changes: 101 additions & 25 deletions run.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import fnmatch
import json
import os
import traceback
import warnings
from typing import List
Expand All @@ -17,9 +19,54 @@
app = typer.Typer()


def load_engines(engines: List[str], engines_file: str = None) -> dict:
"""Load engine configurations from file or pattern matching."""
# Check if both engines and engines_file are provided
if engines != ["*"] and engines_file is not None:
typer.echo("Error: Cannot use both --engines and --engines-file at the same time.", err=True)
raise typer.Exit(1)

# Load engine configurations
if engines_file is not None:
# Load engines from specified file
if not os.path.exists(engines_file):
typer.echo(f"Error: Engines file '{engines_file}' not found.", err=True)
raise typer.Exit(1)

try:
with open(engines_file, 'r') as f:
engines_from_file = json.load(f)

# Convert list of engine configs to dictionary with name as key
selected_engines = {}
for config in engines_from_file:
if 'name' not in config:
typer.echo(f"Error: Engine configuration missing 'name' field in {engines_file}", err=True)
raise typer.Exit(1)
selected_engines[config['name']] = config

except json.JSONDecodeError as e:
typer.echo(f"Error: Invalid JSON in engines file '{engines_file}': {e}", err=True)
raise typer.Exit(1)
except Exception as e:
typer.echo(f"Error reading engines file '{engines_file}': {e}", err=True)
raise typer.Exit(1)
else:
# Load engines using pattern matching (original behavior)
all_engines = read_engine_configs()
selected_engines = {
name: config
for name, config in all_engines.items()
if any(fnmatch.fnmatch(name, engine) for engine in engines)
}

return selected_engines


@app.command()
def run(
engines: List[str] = typer.Option(["*"]),
engines_file: str = typer.Option(None, help="Path to JSON file containing engine configurations to use instead of searching by pattern"),
datasets: List[str] = typer.Option(["*"]),
parallels: List[int] = typer.Option([]),
host: str = "localhost",
Expand All @@ -36,8 +83,14 @@ def run(
verbose: bool = typer.Option(False, "--verbose", "-v", help="Show detailed information when using --describe"),
):
"""
Example:
Examples:
# Use pattern matching to select engines (original behavior)
python3 run.py --engines *-m-16-* --engines qdrant-* --datasets glove-*

# Use engines from a specific JSON file
python3 run.py --engines-file my_engines.json --datasets glove-*

# Describe available options
python3 run.py --describe datasets
python3 run.py --describe engines --verbose
"""
Expand All @@ -47,20 +100,22 @@ def run(
describe_datasets(datasets[0] if datasets != ["*"] else "*", verbose)
return
elif describe.lower() == "engines":
describe_engines(engines[0] if engines != ["*"] else "*", verbose)
# Load engines using same logic as main function
selected_engines = load_engines(engines, engines_file)
# For describe engines, we'll pass all loaded engines or filter by pattern
if engines_file is not None:
# When using engines_file, show all engines from the file
describe_engines_with_configs(selected_engines, "*", verbose)
else:
# When using pattern matching, use the pattern
describe_engines_with_configs(selected_engines, engines[0] if engines != ["*"] else "*", verbose)
return
else:
typer.echo(f"Error: Unknown describe target '{describe}'. Use 'datasets' or 'engines'.", err=True)
raise typer.Exit(1)

all_engines = read_engine_configs()
all_datasets = read_dataset_config()

selected_engines = {
name: config
for name, config in all_engines.items()
if any(fnmatch.fnmatch(name, engine) for engine in engines)
}
selected_engines = load_engines(engines, engines_file)

selected_datasets = {
name: config
Expand Down Expand Up @@ -263,18 +318,12 @@ def get_sort_key(item):
typer.echo("\nUse --verbose for detailed information")


def describe_engines(filter_pattern: str = "*", verbose: bool = False):
"""Display information about available engines."""
try:
all_engines = read_engine_configs()
except Exception as e:
typer.echo(f"Error reading engine configuration: {e}", err=True)
raise typer.Exit(1)

def describe_engines_with_configs(engines_dict: dict, filter_pattern: str = "*", verbose: bool = False):
"""Display information about engines from provided configurations."""
# Filter engines
filtered_engines = {
name: config
for name, config in all_engines.items()
for name, config in engines_dict.items()
if fnmatch.fnmatch(name, filter_pattern)
}

Expand All @@ -296,11 +345,23 @@ def describe_engines(filter_pattern: str = "*", verbose: bool = False):
if 'search_params' in config:
search_params = config['search_params']
typer.echo(f" Search Params:")
for param, values in search_params.items():
if isinstance(values, list):
typer.echo(f" {param}: {values}")
else:
typer.echo(f" {param}: {values}")
if isinstance(search_params, list):
for i, param_config in enumerate(search_params):
typer.echo(f" Config {i+1}:")
for param, value in param_config.items():
if isinstance(value, dict):
typer.echo(f" {param}:")
for subparam, subvalue in value.items():
typer.echo(f" {subparam}: {subvalue}")
else:
typer.echo(f" {param}: {value}")
else:
# Legacy format - dict
for param, values in search_params.items():
if isinstance(values, list):
typer.echo(f" {param}: {values}")
else:
typer.echo(f" {param}: {values}")
if 'upload_params' in config:
upload_params = config['upload_params']
typer.echo(f" Upload Params:")
Expand All @@ -313,12 +374,27 @@ def describe_engines(filter_pattern: str = "*", verbose: bool = False):
for name, config in sorted(filtered_engines.items()):
engine_type = config.get('engine', 'N/A')
module = config.get('module', 'N/A')
typer.echo(f"{name:<40} {engine_type:<15} {module:<25}")
display_name = name[:37] + "..." if len(name) > 40 else name
display_engine = engine_type[:12] + "..." if len(engine_type) > 15 else engine_type
display_module = module[:22] + "..." if len(module) > 25 else module
typer.echo(f"{display_name:<40} {display_engine:<15} {display_module:<25}")

typer.echo(f"\nTotal: {len(filtered_engines)} engines")
if filter_pattern != "*":
typer.echo(f"Filter: '{filter_pattern}'")
typer.echo("\nUse --verbose for detailed information")
if not verbose:
typer.echo("\nUse --verbose for detailed information")


def describe_engines(filter_pattern: str = "*", verbose: bool = False):
"""Display information about available engines using default configuration loading."""
try:
all_engines = read_engine_configs()
except Exception as e:
typer.echo(f"Error reading engine configuration: {e}", err=True)
raise typer.Exit(1)

describe_engines_with_configs(all_engines, filter_pattern, verbose)


if __name__ == "__main__":
Expand Down