diff --git a/.github/workflows/claude-code-test-harness.yml b/.github/workflows/claude-code-test-harness.yml new file mode 100644 index 0000000..bc59a8b --- /dev/null +++ b/.github/workflows/claude-code-test-harness.yml @@ -0,0 +1,105 @@ +# Runs the claude-code-test-harness to gather metrics on performance with Claude Code. +name: Claude Code Test Harness + +on: + workflow_call: + workflow_dispatch: + pull_request: + # TODO: remove this trigger once the workflow is well tested + types: [synchronize] + +permissions: + contents: read + +jobs: + claude-code-test-harness: + name: Claude Code Test Harness + runs-on: ubuntu-latest + timeout-minutes: 20 + + steps: + # pull and build mcp-optimizer for deployment in the test harness + - name: Checkout mcp-optimizer code + uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # v3.11.1 + + - name: Build mcp-optimizer Docker image + uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # v6.18.0 + with: + context: . + platforms: linux/amd64 + push: false + load: true + cache-from: type=gha + tags: mcp-optimizer:latest + + # install dependencies + - name: Install uv + uses: astral-sh/setup-uv@85856786d1ce8acfbcc2f13a5f3fbd6b938f9f41 # v7.1.2 + with: + enable-cache: true + python-version: '3.13' + + - name: Install ToolHive + uses: StacklokLabs/toolhive-actions/install@6a095f99aa2fd6cd92cf0bb94bdf509b99820c06 # v0.0.3 + + - name: Install Claude CLI + run: | + npm install -g @anthropic-ai/claude-code + + # Start toolhive server + - name: Run ToolHive server + run: | + thv serve --host 0.0.0.0 --port 9090 & + echo "Waiting for ToolHive server to start..." + sleep 5 + echo "Checking ToolHive API is accessible..." + curl -v http://localhost:9090/api/v1beta/version || echo "Failed to connect to ToolHive API" + echo "Checking from Docker bridge IP..." + curl -v http://172.17.0.1:9090/api/v1beta/version || echo "Failed to connect via Docker bridge IP" + + # pull the claude-code-harness code + - name: Checkout claude-code-harness code + uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + with: + repository: StacklokLabs/claude-code-harness + ref: wait-for-running_2025-10-30 + # PAT with read-only access to the claude-code-harness repo + token: ${{ secrets.GHA_CLAUDE_CODE_HARNESS_READ_PAT }} + path: claude-code-harness + + # Run the test harness, capture mcp-optimizer server logs + - name: Run Claude Code Test Harness + run: | + cd claude-code-harness + export ANTHROPIC_API_KEY="${{ secrets.ANTHROPIC_API_KEY }}" + uv run python -m src ./configs/test/gha.json --setup ./configs/test/gha_server_setup.json --persist-servers + thv logs mcp-optimizer > ./mcp-optimizer-server.log || echo "Failed to get mcp-optimizer logs" + thv list --format json > ./thv-list.json || echo "Failed to list thv servers" + continue-on-error: true + + # Upload the results as an artifact + - name: Upload Test Harness Run Logs + uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 + with: + name: claude-code-harness-logs + path: claude-code-harness/logs/*.jsonl + if-no-files-found: warn + + # upload mcp-optimizer server logs + - name: Upload mcp-optimizer Server Logs + uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 + with: + name: mcp-optimizer-server-logs + path: ./claude-code-harness/mcp-optimizer-server.log + if-no-files-found: warn + + # upload thv list output + - name: Upload thv list output + uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 + with: + name: thv-list + path: ./claude-code-harness/thv-list.json + if-no-files-found: warn