Skip to content
105 changes: 105 additions & 0 deletions .github/workflows/claude-code-test-harness.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
# Runs the claude-code-test-harness to gather metrics on performance with Claude Code.
name: Claude Code Test Harness

on:
workflow_call:
workflow_dispatch:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

would be good to call this workflow from code-checks.yml

pull_request:
# TODO: remove this trigger once the workflow is well tested
types: [synchronize]

permissions:
contents: read

jobs:
claude-code-test-harness:
name: Claude Code Test Harness
runs-on: ubuntu-latest
timeout-minutes: 20

steps:
# pull and build mcp-optimizer for deployment in the test harness
- name: Checkout mcp-optimizer code
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0

- name: Set up Docker Buildx
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # v3.11.1

- name: Build mcp-optimizer Docker image
uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # v6.18.0
with:
context: .
platforms: linux/amd64
push: false
load: true
cache-from: type=gha
tags: mcp-optimizer:latest

# install dependencies
- name: Install uv
uses: astral-sh/setup-uv@85856786d1ce8acfbcc2f13a5f3fbd6b938f9f41 # v7.1.2
with:
enable-cache: true
python-version: '3.13'

- name: Install ToolHive
uses: StacklokLabs/toolhive-actions/install@6a095f99aa2fd6cd92cf0bb94bdf509b99820c06 # v0.0.3

- name: Install Claude CLI
run: |
npm install -g @anthropic-ai/claude-code

# Start toolhive server
- name: Run ToolHive server
run: |
thv serve --host 0.0.0.0 --port 9090 &
echo "Waiting for ToolHive server to start..."
sleep 5
echo "Checking ToolHive API is accessible..."
curl -v http://localhost:9090/api/v1beta/version || echo "Failed to connect to ToolHive API"
echo "Checking from Docker bridge IP..."
curl -v http://172.17.0.1:9090/api/v1beta/version || echo "Failed to connect via Docker bridge IP"

# pull the claude-code-harness code
- name: Checkout claude-code-harness code
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
with:
repository: StacklokLabs/claude-code-harness
ref: wait-for-running_2025-10-30
# PAT with read-only access to the claude-code-harness repo
token: ${{ secrets.GHA_CLAUDE_CODE_HARNESS_READ_PAT }}
path: claude-code-harness

# Run the test harness, capture mcp-optimizer server logs
- name: Run Claude Code Test Harness
run: |
cd claude-code-harness
export ANTHROPIC_API_KEY="${{ secrets.ANTHROPIC_API_KEY }}"
uv run python -m src ./configs/test/gha.json --setup ./configs/test/gha_server_setup.json --persist-servers
thv logs mcp-optimizer > ./mcp-optimizer-server.log || echo "Failed to get mcp-optimizer logs"
thv list --format json > ./thv-list.json || echo "Failed to list thv servers"
continue-on-error: true

# Upload the results as an artifact
- name: Upload Test Harness Run Logs
uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0
with:
name: claude-code-harness-logs
path: claude-code-harness/logs/*.jsonl
if-no-files-found: warn

# upload mcp-optimizer server logs
- name: Upload mcp-optimizer Server Logs
uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0
with:
name: mcp-optimizer-server-logs
path: ./claude-code-harness/mcp-optimizer-server.log
if-no-files-found: warn

# upload thv list output
- name: Upload thv list output
uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0
with:
name: thv-list
path: ./claude-code-harness/thv-list.json
if-no-files-found: warn