From 4dfce2ff31256fa9e194742eb4851dff0f4bc406 Mon Sep 17 00:00:00 2001 From: Trey Date: Thu, 30 Oct 2025 09:38:37 -0700 Subject: [PATCH 01/11] Create GHA to run the claude-code-harness Creates a github action to run the [claude-code-harness](https://github.com/StacklokLabs/claude-code-harness) against the mcp-optimizer. --- .../workflows/claude-code-test-harness.yml | 63 +++++++++++++++++++ 1 file changed, 63 insertions(+) create mode 100644 .github/workflows/claude-code-test-harness.yml diff --git a/.github/workflows/claude-code-test-harness.yml b/.github/workflows/claude-code-test-harness.yml new file mode 100644 index 0000000..e386d21 --- /dev/null +++ b/.github/workflows/claude-code-test-harness.yml @@ -0,0 +1,63 @@ +# Runs the claude-code-test-harness to gather metrics on performance with Claude Code. +name: Claude Code Test Harness + +on: + workflow_call: + workflow_dispatch: + +permissions: + contents: read + +jobs: + claude-code-test-harness: + name: Claude Code Test Harness + runs-on: ubuntu-latest + + steps: + # pull and build mcp-optimizer for deployment in the test harness + - name: Checkout mcp-optimizer code + uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # v3.11.1 + + - name: Build mcp-optimizer Docker image + uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # v6.18.0 + with: + context: . + platforms: linux/amd64 + push: false + load: true + cache-from: type=gha + tags: mcp-optimizer:latest + + # install uv and thv + - name: Install uv + uses: astral-sh/setup-uv@85856786d1ce8acfbcc2f13a5f3fbd6b938f9f41 # v7.1.2 + with: + enable-cache: true + python-version: '3.13' + + - name: Install ToolHive + uses: StacklokLabs/toolhive-actions/install@6a095f99aa2fd6cd92cf0bb94bdf509b99820c06 # v0.0.3 + + # pull the claude-code-harness code + - name: Checkout claude-code-harness code + uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + with: + repository: StacklokLabs/claude-code-harness + token: ${{ secrets.GHA_CLAUDE_CODE_HARNESS_READ_PAT }} + path: claude-code-harness + + # Run the test harness + - name: Run Claude Code Test Harness + run: | + cd claude-code-harness + uv run python -m src ./configs/test/gha.json --setup ./configs/test/gha_server_setup.json + + # Upload the results as an artifact + - name: Upload Test Harness Run Logs + uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 + with: + name: claude-code-harness-logs + path: claude-code-harness/logs/*.jsonl From 22dabae7b8c6eff0705f6086ffdee2105c2e6452 Mon Sep 17 00:00:00 2001 From: Trey Date: Thu, 30 Oct 2025 09:53:45 -0700 Subject: [PATCH 02/11] Address claude PR feedback --- .github/workflows/claude-code-test-harness.yml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/.github/workflows/claude-code-test-harness.yml b/.github/workflows/claude-code-test-harness.yml index e386d21..92c9fd2 100644 --- a/.github/workflows/claude-code-test-harness.yml +++ b/.github/workflows/claude-code-test-harness.yml @@ -12,6 +12,7 @@ jobs: claude-code-test-harness: name: Claude Code Test Harness runs-on: ubuntu-latest + timeout-minutes: 20 steps: # pull and build mcp-optimizer for deployment in the test harness @@ -31,7 +32,7 @@ jobs: cache-from: type=gha tags: mcp-optimizer:latest - # install uv and thv + # install dependencies - name: Install uv uses: astral-sh/setup-uv@85856786d1ce8acfbcc2f13a5f3fbd6b938f9f41 # v7.1.2 with: @@ -46,6 +47,7 @@ jobs: uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 with: repository: StacklokLabs/claude-code-harness + # PAT with read-only access to the claude-code-harness repo token: ${{ secrets.GHA_CLAUDE_CODE_HARNESS_READ_PAT }} path: claude-code-harness @@ -54,6 +56,7 @@ jobs: run: | cd claude-code-harness uv run python -m src ./configs/test/gha.json --setup ./configs/test/gha_server_setup.json + continue-on-error: true # Upload the results as an artifact - name: Upload Test Harness Run Logs @@ -61,3 +64,4 @@ jobs: with: name: claude-code-harness-logs path: claude-code-harness/logs/*.jsonl + if-no-files-found: warn From 159f7e57d5a305eee853fab38218d6293341f032 Mon Sep 17 00:00:00 2001 From: Trey Date: Thu, 30 Oct 2025 09:56:19 -0700 Subject: [PATCH 03/11] Add trigger to run the action for testing before merge --- .github/workflows/claude-code-test-harness.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/claude-code-test-harness.yml b/.github/workflows/claude-code-test-harness.yml index 92c9fd2..b9a54e0 100644 --- a/.github/workflows/claude-code-test-harness.yml +++ b/.github/workflows/claude-code-test-harness.yml @@ -4,6 +4,9 @@ name: Claude Code Test Harness on: workflow_call: workflow_dispatch: + pull_request: + # TODO: remove this trigger once the workflow is well tested + types: [synchronize] permissions: contents: read From 953feb0434bbb923fd831f5ecae6aa92d92cc41f Mon Sep 17 00:00:00 2001 From: Trey Date: Thu, 30 Oct 2025 10:00:04 -0700 Subject: [PATCH 04/11] Specify claude-code-harness branch --- .github/workflows/claude-code-test-harness.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/claude-code-test-harness.yml b/.github/workflows/claude-code-test-harness.yml index b9a54e0..261df81 100644 --- a/.github/workflows/claude-code-test-harness.yml +++ b/.github/workflows/claude-code-test-harness.yml @@ -50,6 +50,7 @@ jobs: uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 with: repository: StacklokLabs/claude-code-harness + ref: main # PAT with read-only access to the claude-code-harness repo token: ${{ secrets.GHA_CLAUDE_CODE_HARNESS_READ_PAT }} path: claude-code-harness From 3a408b24561d10dcaf7c508407e2c37feb51526f Mon Sep 17 00:00:00 2001 From: Trey Date: Thu, 30 Oct 2025 11:41:50 -0700 Subject: [PATCH 05/11] Export ANTHROPIC_API_KEY to run the harness --- .github/workflows/claude-code-test-harness.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/claude-code-test-harness.yml b/.github/workflows/claude-code-test-harness.yml index 261df81..db93357 100644 --- a/.github/workflows/claude-code-test-harness.yml +++ b/.github/workflows/claude-code-test-harness.yml @@ -59,6 +59,7 @@ jobs: - name: Run Claude Code Test Harness run: | cd claude-code-harness + export ANTHROPIC_API_KEY="${{ secrets.ANTHROPIC_API_KEY }}" uv run python -m src ./configs/test/gha.json --setup ./configs/test/gha_server_setup.json continue-on-error: true From 98d1485e7bf8397c4c7b6a617070aa4687668a21 Mon Sep 17 00:00:00 2001 From: Trey Date: Thu, 30 Oct 2025 11:45:53 -0700 Subject: [PATCH 06/11] Add 'Install Claude CLI' step --- .github/workflows/claude-code-test-harness.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/claude-code-test-harness.yml b/.github/workflows/claude-code-test-harness.yml index db93357..e7a4670 100644 --- a/.github/workflows/claude-code-test-harness.yml +++ b/.github/workflows/claude-code-test-harness.yml @@ -45,6 +45,10 @@ jobs: - name: Install ToolHive uses: StacklokLabs/toolhive-actions/install@6a095f99aa2fd6cd92cf0bb94bdf509b99820c06 # v0.0.3 + - name: Install Claude CLI + run: | + npm install -g @anthropic-ai/claude-code + # pull the claude-code-harness code - name: Checkout claude-code-harness code uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 From 48adf71be9498f0335e29d426af32061a9f8f4f2 Mon Sep 17 00:00:00 2001 From: Trey Date: Thu, 30 Oct 2025 13:58:40 -0700 Subject: [PATCH 07/11] Test on claude-code-harness feature branch --- .github/workflows/claude-code-test-harness.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/claude-code-test-harness.yml b/.github/workflows/claude-code-test-harness.yml index e7a4670..8c0ff31 100644 --- a/.github/workflows/claude-code-test-harness.yml +++ b/.github/workflows/claude-code-test-harness.yml @@ -48,13 +48,13 @@ jobs: - name: Install Claude CLI run: | npm install -g @anthropic-ai/claude-code - + # pull the claude-code-harness code - name: Checkout claude-code-harness code uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 with: repository: StacklokLabs/claude-code-harness - ref: main + ref: wait-for-running_2025-10-30 # PAT with read-only access to the claude-code-harness repo token: ${{ secrets.GHA_CLAUDE_CODE_HARNESS_READ_PAT }} path: claude-code-harness From c84bc07a15a2492b0822e5a05847f5091299e5ae Mon Sep 17 00:00:00 2001 From: Trey Date: Thu, 30 Oct 2025 16:44:46 -0700 Subject: [PATCH 08/11] Upload mcp-optimizer logs to action artifacts --- .github/workflows/claude-code-test-harness.yml | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/.github/workflows/claude-code-test-harness.yml b/.github/workflows/claude-code-test-harness.yml index 8c0ff31..e293c34 100644 --- a/.github/workflows/claude-code-test-harness.yml +++ b/.github/workflows/claude-code-test-harness.yml @@ -65,6 +65,7 @@ jobs: cd claude-code-harness export ANTHROPIC_API_KEY="${{ secrets.ANTHROPIC_API_KEY }}" uv run python -m src ./configs/test/gha.json --setup ./configs/test/gha_server_setup.json + thv logs mcp-optimizer > ./mcp-optimizer-server.log || echo "Failed to get mcp-optimizer logs" continue-on-error: true # Upload the results as an artifact @@ -74,3 +75,11 @@ jobs: name: claude-code-harness-logs path: claude-code-harness/logs/*.jsonl if-no-files-found: warn + + # upload mcp-optimizer server logs + - name: Upload mcp-optimizer Server Logs + uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 + with: + name: mcp-optimizer-server-logs + path: ./claude-code-harness/mcp-optimizer-server.log + if-no-files-found: warn From 6f80522eb288e16ab0d5ce60e9b4d2521f647251 Mon Sep 17 00:00:00 2001 From: Trey Date: Thu, 30 Oct 2025 16:49:37 -0700 Subject: [PATCH 09/11] Persist servers to get logs --- .github/workflows/claude-code-test-harness.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/claude-code-test-harness.yml b/.github/workflows/claude-code-test-harness.yml index e293c34..76c7543 100644 --- a/.github/workflows/claude-code-test-harness.yml +++ b/.github/workflows/claude-code-test-harness.yml @@ -64,7 +64,7 @@ jobs: run: | cd claude-code-harness export ANTHROPIC_API_KEY="${{ secrets.ANTHROPIC_API_KEY }}" - uv run python -m src ./configs/test/gha.json --setup ./configs/test/gha_server_setup.json + uv run python -m src ./configs/test/gha.json --setup ./configs/test/gha_server_setup.json --persist-servers thv logs mcp-optimizer > ./mcp-optimizer-server.log || echo "Failed to get mcp-optimizer logs" continue-on-error: true From b4ead84ee16fc38dac811cb630ec5dbccf23b800 Mon Sep 17 00:00:00 2001 From: Trey Date: Thu, 30 Oct 2025 16:59:56 -0700 Subject: [PATCH 10/11] Start thv server as part of action --- .github/workflows/claude-code-test-harness.yml | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/.github/workflows/claude-code-test-harness.yml b/.github/workflows/claude-code-test-harness.yml index 76c7543..4b0a8e8 100644 --- a/.github/workflows/claude-code-test-harness.yml +++ b/.github/workflows/claude-code-test-harness.yml @@ -49,6 +49,17 @@ jobs: run: | npm install -g @anthropic-ai/claude-code + # Start toolhive server + - name: Run ToolHive server + run: | + thv serve --host 0.0.0.0 --port 9090 & + echo "Waiting for ToolHive server to start..." + sleep 5 + echo "Checking ToolHive API is accessible..." + curl -v http://localhost:9090/api/v1beta/version || echo "Failed to connect to ToolHive API" + echo "Checking from Docker bridge IP..." + curl -v http://172.17.0.1:9090/api/v1beta/version || echo "Failed to connect via Docker bridge IP" + # pull the claude-code-harness code - name: Checkout claude-code-harness code uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 From b18dfc4e6a8178f50a3f2f70c234d90d011063f0 Mon Sep 17 00:00:00 2001 From: Trey Date: Fri, 31 Oct 2025 08:58:27 -0700 Subject: [PATCH 11/11] Add artifact for `thv list` query --- .github/workflows/claude-code-test-harness.yml | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/.github/workflows/claude-code-test-harness.yml b/.github/workflows/claude-code-test-harness.yml index 4b0a8e8..bc59a8b 100644 --- a/.github/workflows/claude-code-test-harness.yml +++ b/.github/workflows/claude-code-test-harness.yml @@ -70,13 +70,14 @@ jobs: token: ${{ secrets.GHA_CLAUDE_CODE_HARNESS_READ_PAT }} path: claude-code-harness - # Run the test harness + # Run the test harness, capture mcp-optimizer server logs - name: Run Claude Code Test Harness run: | cd claude-code-harness export ANTHROPIC_API_KEY="${{ secrets.ANTHROPIC_API_KEY }}" uv run python -m src ./configs/test/gha.json --setup ./configs/test/gha_server_setup.json --persist-servers thv logs mcp-optimizer > ./mcp-optimizer-server.log || echo "Failed to get mcp-optimizer logs" + thv list --format json > ./thv-list.json || echo "Failed to list thv servers" continue-on-error: true # Upload the results as an artifact @@ -94,3 +95,11 @@ jobs: name: mcp-optimizer-server-logs path: ./claude-code-harness/mcp-optimizer-server.log if-no-files-found: warn + + # upload thv list output + - name: Upload thv list output + uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 + with: + name: thv-list + path: ./claude-code-harness/thv-list.json + if-no-files-found: warn