Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions .github/workflows/publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,32 @@ jobs:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}

# Extract metadata for proxy_only image
- name: Extract metadata for proxy_only Docker
id: meta-proxy
uses: docker/metadata-action@v5
with:
images: ghcr.io/${{ github.repository }}
flavor: |
suffix=-slim
tags: |
type=semver,pattern={{version}}
type=semver,pattern={{major}}.{{minor}}
latest

# Build and push proxy image
- name: Build and push proxy_only Docker image
uses: docker/build-push-action@v5
with:
context: .
file: Dockerfile.proxy_only
push: true
platforms: linux/amd64,linux/arm64
tags: ${{ steps.meta-proxy.outputs.tags }}
labels: ${{ steps.meta-proxy.outputs.labels }}
cache-from: type=gha,scope=proxy
cache-to: type=gha,scope=proxy,mode=max

- name: Extract metadata for Docker
id: meta
Expand Down
55 changes: 55 additions & 0 deletions Dockerfile.proxy_only
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
# Build stage
FROM python:3.12-slim AS builder

# Define build argument with default value
ARG PORT=8000
# Make it available as env variable at runtime
ENV OPTILLM_PORT=$PORT

# Set working directory
WORKDIR /app

# Install system dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
build-essential \
python3-dev \
gcc \
g++ \
&& rm -rf /var/lib/apt/lists/*

# Copy only the requirements file first to leverage Docker cache
COPY requirements_proxy_only.txt .

# Install Python dependencies
RUN pip install --no-cache-dir -r requirements_proxy_only.txt

# Final stage
FROM python:3.12-slim

# Install curl for the healthcheck
RUN apt-get update && apt-get install -y --no-install-recommends \
curl && \
apt-get clean && rm -rf /var/lib/apt/lists/*

# Set working directory
WORKDIR /app

# Copy installed dependencies from builder stage
COPY --from=builder /usr/local/lib/python3.12/site-packages /usr/local/lib/python3.12/site-packages
COPY --from=builder /usr/local/bin /usr/local/bin

# Copy application code
COPY . .

# Create a non-root user and switch to it
RUN useradd -m appuser
USER appuser

# Set environment variables
ENV PYTHONUNBUFFERED=1

# Use the ARG in EXPOSE
EXPOSE ${PORT}

# Run the application
ENTRYPOINT ["python", "optillm.py"]
9 changes: 5 additions & 4 deletions optillm.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ def load_plugins():
package_plugin_dir = os.path.join(os.path.dirname(optillm.__file__), 'plugins')

# Get local project plugins directory
current_dir = os.getcwd()
current_dir = os.getcwd() if server_config.get("plugins_dir", "") == "" else server_config["plugins_dir"]
local_plugin_dir = os.path.join(current_dir, 'optillm', 'plugins')

plugin_dirs = []
Expand Down Expand Up @@ -664,7 +664,8 @@ def parse_args():
("--return-full-response", "OPTILLM_RETURN_FULL_RESPONSE", bool, False, "Return the full response including the CoT with <thinking> tags"),
("--port", "OPTILLM_PORT", int, 8000, "Specify the port to run the proxy"),
("--log", "OPTILLM_LOG", str, "info", "Specify the logging level", list(logging_levels.keys())),
("--launch-gui", "OPTILLM_LAUNCH_GUI", bool, False, "Launch a Gradio chat interface")
("--launch-gui", "OPTILLM_LAUNCH_GUI", bool, False, "Launch a Gradio chat interface"),
("--plugins-dir", "OPTILLM_PLUGINS_DIR", str, "", "Path to the plugins directory"),
]

for arg, env, type_, default, help_text, *extra in args_env:
Expand Down Expand Up @@ -704,11 +705,11 @@ def main():
global server_config
# Call this function at the start of main()
args = parse_args()
load_plugins()

# Update server_config with all argument values
server_config.update(vars(args))

load_plugins()

port = server_config['port']

# Set logging level from user request
Expand Down
3 changes: 2 additions & 1 deletion optillm/plugins/coc_plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,14 +104,15 @@ def sanitize_code(code: str) -> str:
safe_lines.append(line)

safe_code = '\n'.join(safe_lines)
safe_code = safe_code.replace('\n', '\n ')

# Add safety wrapper
wrapper = f"""
{imports}
def safe_execute():
import numpy as np # Always allow numpy
{safe_code.replace('\n', '\n ')}
{safe_code}
return answer if 'answer' in locals() else None
result = safe_execute()
Expand Down
19 changes: 19 additions & 0 deletions requirements_proxy_only.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
numpy
networkx
openai
z3-solver
aiohttp
flask
azure.identity
scikit-learn
litellm
requests
beautifulsoup4
lxml
presidio_analyzer
presidio_anonymizer
nbformat
nbconvert
ipython
ipykernel
gradio
18 changes: 11 additions & 7 deletions scripts/eval_aime_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
logger = logging.getLogger(__name__)

# Initialize OpenAI client
client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"), base_url="http://localhost:8000/v1")
client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"), base_url="https://ot7nh9nqf4l7b43s.us-east-1.aws.endpoints.huggingface.cloud/v1/")

SYSTEM_PROMPT = '''You are solving AIME (American Invitational Mathematics Examination) problems.

Expand Down Expand Up @@ -241,18 +241,21 @@ def analyze_results(results: List[Dict], n: int):
print("---")

def main(model: str, n_attempts: int):
"""Main evaluation function."""
"""Main evaluation function that handles gaps in processed indexes."""
os.makedirs("results", exist_ok=True)

# Include n_attempts in filename to keep separate results for different n values
results_file = f"evaluation_results_{model.replace('/', '_')}_pass_at_{n_attempts}.json"

dataset = load_2024_dataset()
existing_results = load_existing_results(results_file)
last_processed_index = get_last_processed_index(existing_results)

for idx, item in enumerate(tqdm(dataset, desc="Evaluating problems")):
if idx <= last_processed_index:
# Create a set of already processed indexes for efficient lookup
processed_indexes = {result['index'] for result in existing_results}

for _, item in enumerate(tqdm(dataset, desc="Evaluating problems")):
id = int(item['id'])
# Skip if this index has already been processed
if id in processed_indexes:
continue

problem_text = item['problem']
Expand All @@ -263,7 +266,7 @@ def main(model: str, n_attempts: int):
is_correct, first_correct = evaluate_pass_at_n(attempts, correct_answer)

result = {
"index": idx,
"index": id,
"problem": problem_text,
"attempts": attempts,
"correct_answer": correct_answer,
Expand All @@ -275,6 +278,7 @@ def main(model: str, n_attempts: int):
final_results = load_existing_results(results_file)
analyze_results(final_results, n_attempts)


if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Evaluate LLM performance on AIME 2024 problems")
parser.add_argument("--model", type=str, required=True, help="OpenAI model to use (e.g., gpt-4, gpt-3.5-turbo)")
Expand Down
6 changes: 3 additions & 3 deletions scripts/eval_arena_hard_auto_rtc.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@
logger = logging.getLogger(__name__)

# Initialize OpenAI client (only used for chat completions now)
client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))
client = OpenAI(base_url="http://localhost:8000/v1", api_key=os.environ.get("OPENAI_API_KEY"))
# client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))

@dataclass
class RTCConfig:
Expand Down Expand Up @@ -58,8 +59,7 @@ def get_llm_response(messages: List[Dict], model: str) -> Optional[str]:
response = client.chat.completions.create(
model=model,
messages=messages,
temperature=0.7,
max_tokens=1000
max_tokens=4096
)
return response.choices[0].message.content.strip()
except Exception as e:
Expand Down
1 change: 0 additions & 1 deletion scripts/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
datasets
accelerate
huggingface_hub
git+https://github.com/huggingface/transformers.git
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

setup(
name="optillm",
version="0.0.24",
version="0.0.25",
packages=find_packages(),
py_modules=['optillm'],
package_data={
Expand Down Expand Up @@ -33,7 +33,7 @@
"ipykernel",
"peft",
"bitsandbytes",
"gradio",
"gradio"
],
entry_points={
'console_scripts': [
Expand Down