From 70c0545b87098eb5c42ce43d7565d11579da6d93 Mon Sep 17 00:00:00 2001 From: Asankhaya Sharma Date: Wed, 23 Apr 2025 06:59:49 +0800 Subject: [PATCH] bump version for new release --- README.md | 50 ++++++++++++++++++++++----------------------- optillm/__init__.py | 2 +- setup.py | 2 +- 3 files changed, 27 insertions(+), 27 deletions(-) diff --git a/README.md b/README.md index 78017928..3e86040c 100644 --- a/README.md +++ b/README.md @@ -467,31 +467,6 @@ Authorization: Bearer your_secret_api_key ## SOTA results on benchmarks with optillm -### CePO on math and code benchmarks (Mar 2025) - -| Method | Math-L5 | MMLU-Pro (Math) | CRUX | LiveCodeBench (pass@1) | Simple QA | -| -----------------------------: | :-----: | :-------------: | :----: | :--------------------: | :-------: | -| Llama 3.3 70B | 51.0 | 78.6 | 72.6 | 27.1 | 20.9 | -| Llama 3.1 405B | 49.8 | 79.2 | 73.0 | 31.8 | 13.5 | -| CePO (using Llama 3.3 70B) | 69.6 | 84.8 | 80.1 | 31.9 | **22.6** | -| QwQ 32B | 61.4 | 90.8 | 82.5 | 44.3 | 7.8 | -| CePO (using QwQ 32B) | 88.1 | **92.0** | 86.3 | **51.5** | 8.2 | -| DeepSeek R1 Llama | 83.1 | 82.0 | 84.0 | 47.3 | 14.6 | -| CePO (using DeepSeek R1 Llama) |**90.2** | 84.0 |**89.4**| 47.2 | 15.5 | - -### coc-claude-3-5-sonnet-20241022 on AIME 2024 pass@1 (Nov 2024) - -| Model | Score | -|-------|-----:| -| o1-mini | 56.67 | -| coc-claude-3-5-sonnet-20241022 | 46.67 | -| coc-gemini/gemini-exp-1121 | 46.67 | -| o1-preview | 40.00 | -| gemini-exp-1114 | 36.67 | -| claude-3-5-sonnet-20241022 | 20.00 | -| gemini-1.5-pro-002 | 20.00 | -| gemini-1.5-flash-002 | 16.67 | - ### LongCePO on LongBench v2 (Apr 2025) | Model¹ | Context window | Short samples (up to 32K words) | Medium samples (32–128K words) | @@ -518,6 +493,31 @@ Authorization: Bearer your_secret_api_key ¹ Numbers in parentheses for LongCePO indicate accuracy of majority voting from 5 runs. +### CePO on math and code benchmarks (Mar 2025) + +| Method | Math-L5 | MMLU-Pro (Math) | CRUX | LiveCodeBench (pass@1) | Simple QA | +| -----------------------------: | :-----: | :-------------: | :----: | :--------------------: | :-------: | +| Llama 3.3 70B | 51.0 | 78.6 | 72.6 | 27.1 | 20.9 | +| Llama 3.1 405B | 49.8 | 79.2 | 73.0 | 31.8 | 13.5 | +| CePO (using Llama 3.3 70B) | 69.6 | 84.8 | 80.1 | 31.9 | **22.6** | +| QwQ 32B | 61.4 | 90.8 | 82.5 | 44.3 | 7.8 | +| CePO (using QwQ 32B) | 88.1 | **92.0** | 86.3 | **51.5** | 8.2 | +| DeepSeek R1 Llama | 83.1 | 82.0 | 84.0 | 47.3 | 14.6 | +| CePO (using DeepSeek R1 Llama) |**90.2** | 84.0 |**89.4**| 47.2 | 15.5 | + +### coc-claude-3-5-sonnet-20241022 on AIME 2024 pass@1 (Nov 2024) + +| Model | Score | +|-------|-----:| +| o1-mini | 56.67 | +| coc-claude-3-5-sonnet-20241022 | 46.67 | +| coc-gemini/gemini-exp-1121 | 46.67 | +| o1-preview | 40.00 | +| gemini-exp-1114 | 36.67 | +| claude-3-5-sonnet-20241022 | 20.00 | +| gemini-1.5-pro-002 | 20.00 | +| gemini-1.5-flash-002 | 16.67 | + ### readurls&memory-gpt-4o-mini on Google FRAMES Benchmark (Oct 2024) | Model | Accuracy | | ----- | -------- | diff --git a/optillm/__init__.py b/optillm/__init__.py index 302ab204..8fdde599 100644 --- a/optillm/__init__.py +++ b/optillm/__init__.py @@ -2,7 +2,7 @@ import os # Version information -__version__ = "0.1.10" +__version__ = "0.1.11" # Get the path to the root optillm.py spec = util.spec_from_file_location( diff --git a/setup.py b/setup.py index ab029608..9c521910 100644 --- a/setup.py +++ b/setup.py @@ -3,7 +3,7 @@ setup( name="optillm", - version="0.1.10", + version="0.1.11", packages=find_packages(include=['optillm', 'optillm.*']), # This ensures all subpackages are included py_modules=['optillm'], package_data={