From d6895e4cd3d8e1fc684d3c57d2d503ea5acd6fab Mon Sep 17 00:00:00 2001 From: enyst Date: Thu, 6 Nov 2025 21:31:05 +0000 Subject: [PATCH 01/13] docs: sync OpenHands LLMs list with Agent SDK VERIFIED_OPENHANDS_MODELS\n\nSource of truth: openhands-sdk/openhands/sdk/llm/utils/verified_models.py\n- Add: claude-haiku-4-5-20251001, gpt-5-codex, claude-opus-4-1-20250805, kimi-k2-0711-preview\n- Remove: devstral-small-2505\n- Align order with VERIFIED_OPENHANDS_MODELS\n\nCo-authored-by: openhands --- openhands/usage/llms/openhands-llms.mdx | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/openhands/usage/llms/openhands-llms.mdx b/openhands/usage/llms/openhands-llms.mdx index 895f7192..139dc161 100644 --- a/openhands/usage/llms/openhands-llms.mdx +++ b/openhands/usage/llms/openhands-llms.mdx @@ -34,17 +34,20 @@ Pricing follows official API provider rates. Below are the current pricing detai | Model | Input Cost (per 1M tokens) | Cached Input Cost (per 1M tokens) | Output Cost (per 1M tokens) | Max Input Tokens | Max Output Tokens | |-------|----------------------------|-----------------------------------|------------------------------|------------------|-------------------| -| claude-opus-4-20250514 | $15.00 | $1.50 | $75.00 | 200,000 | 32,000 | -| claude-sonnet-4-20250514 | $3.00 | $0.30 | $15.00 | 200,000 | 64,000 | | claude-sonnet-4-5-20250929 | $3.00 | $0.30 | $15.00 | 200,000 | 64,000 | -| devstral-medium-2507 | $0.40 | N/A | $2.00 | 128,000 | 128,000 | -| devstral-small-2505 | $0.10 | N/A | $0.30 | 128,000 | 128,000 | -| devstral-small-2507 | $0.10 | N/A | $0.30 | 128,000 | 128,000 | -| gemini-2.5-pro | $1.25 | $0.31 | $10.00 | 1,048,576 | 65,535 | +| claude-haiku-4-5-20251001 | — | — | — | — | — | +| gpt-5-codex | — | — | — | — | — | | gpt-5-2025-08-07 | $1.25 | $0.125 | $10.00 | 400,000 | 128,000 | | gpt-5-mini-2025-08-07 | $0.25 | $0.025 | $2.00 | 400,000 | 128,000 | +| claude-sonnet-4-20250514 | $3.00 | $0.30 | $15.00 | 200,000 | 64,000 | +| claude-opus-4-20250514 | $15.00 | $1.50 | $75.00 | 200,000 | 32,000 | +| claude-opus-4-1-20250805 | — | — | — | — | — | +| devstral-small-2507 | $0.10 | N/A | $0.30 | 128,000 | 128,000 | +| devstral-medium-2507 | $0.40 | N/A | $2.00 | 128,000 | 128,000 | | o3 | $2.00 | $0.50 | $8.00 | 200,000 | 100,000 | | o4-mini | $1.10 | $0.28 | $4.40 | 200,000 | 100,000 | +| gemini-2.5-pro | $1.25 | $0.31 | $10.00 | 1,048,576 | 65,535 | +| kimi-k2-0711-preview | — | — | — | — | — | | qwen3-coder-480b | $0.40 | N/A | $1.60 | N/A | N/A | -**Note:** Cached input tokens are charged at a reduced rate when the same content is reused across requests. Models that don't support prompt caching show "N/A" for cached input cost. +**Note:** Cached input tokens are charged at a reduced rate when the same content is reused across requests. Models that don't support prompt caching show "N/A" for cached input cost. A "—" indicates the provider has not publicly documented a stable price or token limit; refer to the provider's pricing page for the latest values. From d219bb537fb5474b1fcf3f1b28fc8e9d58e09960 Mon Sep 17 00:00:00 2001 From: openhands Date: Thu, 6 Nov 2025 22:03:17 +0000 Subject: [PATCH 02/13] docs: populate OpenHands LLM prices from LiteLLM DB; keep qwen3-coder N/A; add source note\n\nSource: litellm model_prices_and_context_window_backup.json; Verified list remains source-of-truth for models.\n\nCo-authored-by: openhands --- openhands/usage/llms/openhands-llms.mdx | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/openhands/usage/llms/openhands-llms.mdx b/openhands/usage/llms/openhands-llms.mdx index 139dc161..66c4d41b 100644 --- a/openhands/usage/llms/openhands-llms.mdx +++ b/openhands/usage/llms/openhands-llms.mdx @@ -35,19 +35,19 @@ Pricing follows official API provider rates. Below are the current pricing detai | Model | Input Cost (per 1M tokens) | Cached Input Cost (per 1M tokens) | Output Cost (per 1M tokens) | Max Input Tokens | Max Output Tokens | |-------|----------------------------|-----------------------------------|------------------------------|------------------|-------------------| | claude-sonnet-4-5-20250929 | $3.00 | $0.30 | $15.00 | 200,000 | 64,000 | -| claude-haiku-4-5-20251001 | — | — | — | — | — | -| gpt-5-codex | — | — | — | — | — | -| gpt-5-2025-08-07 | $1.25 | $0.125 | $10.00 | 400,000 | 128,000 | -| gpt-5-mini-2025-08-07 | $0.25 | $0.025 | $2.00 | 400,000 | 128,000 | +| claude-haiku-4-5-20251001 | $1.00 | $0.10 | $5.00 | 200,000 | 64,000 | +| gpt-5-codex | $1.25 | $0.125 | $10.00 | 272,000 | 128,000 | +| gpt-5-2025-08-07 | $1.25 | $0.125 | $10.00 | 272,000 | 128,000 | +| gpt-5-mini-2025-08-07 | $0.25 | $0.025 | $2.00 | 272,000 | 128,000 | | claude-sonnet-4-20250514 | $3.00 | $0.30 | $15.00 | 200,000 | 64,000 | | claude-opus-4-20250514 | $15.00 | $1.50 | $75.00 | 200,000 | 32,000 | -| claude-opus-4-1-20250805 | — | — | — | — | — | +| claude-opus-4-1-20250805 | $15.00 | $1.50 | $75.00 | 200,000 | 32,000 | | devstral-small-2507 | $0.10 | N/A | $0.30 | 128,000 | 128,000 | | devstral-medium-2507 | $0.40 | N/A | $2.00 | 128,000 | 128,000 | | o3 | $2.00 | $0.50 | $8.00 | 200,000 | 100,000 | | o4-mini | $1.10 | $0.28 | $4.40 | 200,000 | 100,000 | | gemini-2.5-pro | $1.25 | $0.31 | $10.00 | 1,048,576 | 65,535 | -| kimi-k2-0711-preview | — | — | — | — | — | +| kimi-k2-0711-preview | $0.60 | $0.15 | $2.50 | 131,072 | 131,072 | | qwen3-coder-480b | $0.40 | N/A | $1.60 | N/A | N/A | -**Note:** Cached input tokens are charged at a reduced rate when the same content is reused across requests. Models that don't support prompt caching show "N/A" for cached input cost. A "—" indicates the provider has not publicly documented a stable price or token limit; refer to the provider's pricing page for the latest values. +**Note:** Pricing and limits are sourced from LiteLLM’s model price database and provider pages; they may change. Cached input tokens are charged at a reduced rate when the same content is reused across requests. Models that don't support prompt caching show "N/A" for cached input cost. A "—" indicates the provider has not publicly documented a stable price or token limit; refer to the provider's pricing page for the latest values. From 049706915b5b5845fa2a8621ba555c9ef1775b57 Mon Sep 17 00:00:00 2001 From: openhands Date: Thu, 6 Nov 2025 22:05:13 +0000 Subject: [PATCH 03/13] docs: align pricing/limits with LiteLLM DB (incl. claude-sonnet-4-20250514 1M input tokens)\n\nCo-authored-by: openhands --- openhands/usage/llms/openhands-llms.mdx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openhands/usage/llms/openhands-llms.mdx b/openhands/usage/llms/openhands-llms.mdx index 66c4d41b..42f578b5 100644 --- a/openhands/usage/llms/openhands-llms.mdx +++ b/openhands/usage/llms/openhands-llms.mdx @@ -39,7 +39,7 @@ Pricing follows official API provider rates. Below are the current pricing detai | gpt-5-codex | $1.25 | $0.125 | $10.00 | 272,000 | 128,000 | | gpt-5-2025-08-07 | $1.25 | $0.125 | $10.00 | 272,000 | 128,000 | | gpt-5-mini-2025-08-07 | $0.25 | $0.025 | $2.00 | 272,000 | 128,000 | -| claude-sonnet-4-20250514 | $3.00 | $0.30 | $15.00 | 200,000 | 64,000 | +| claude-sonnet-4-20250514 | $3.00 | $0.30 | $15.00 | 1,000,000 | 64,000 | | claude-opus-4-20250514 | $15.00 | $1.50 | $75.00 | 200,000 | 32,000 | | claude-opus-4-1-20250805 | $15.00 | $1.50 | $75.00 | 200,000 | 32,000 | | devstral-small-2507 | $0.10 | N/A | $0.30 | 128,000 | 128,000 | From 66ca58a37b556d584996ad9e33f399ec91b128a5 Mon Sep 17 00:00:00 2001 From: openhands Date: Thu, 6 Nov 2025 22:05:54 +0000 Subject: [PATCH 04/13] docs: fix o4-mini cached read to bash.275 per 1M (LiteLLM)\n\nCo-authored-by: openhands --- openhands/usage/llms/openhands-llms.mdx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openhands/usage/llms/openhands-llms.mdx b/openhands/usage/llms/openhands-llms.mdx index 42f578b5..3b525df8 100644 --- a/openhands/usage/llms/openhands-llms.mdx +++ b/openhands/usage/llms/openhands-llms.mdx @@ -45,7 +45,7 @@ Pricing follows official API provider rates. Below are the current pricing detai | devstral-small-2507 | $0.10 | N/A | $0.30 | 128,000 | 128,000 | | devstral-medium-2507 | $0.40 | N/A | $2.00 | 128,000 | 128,000 | | o3 | $2.00 | $0.50 | $8.00 | 200,000 | 100,000 | -| o4-mini | $1.10 | $0.28 | $4.40 | 200,000 | 100,000 | +| o4-mini | $1.10 | $0.275 | $4.40 | 200,000 | 100,000 | | gemini-2.5-pro | $1.25 | $0.31 | $10.00 | 1,048,576 | 65,535 | | kimi-k2-0711-preview | $0.60 | $0.15 | $2.50 | 131,072 | 131,072 | | qwen3-coder-480b | $0.40 | N/A | $1.60 | N/A | N/A | From b319d322c5a98a15fffc8ebdf550ba59a6104199 Mon Sep 17 00:00:00 2001 From: openhands Date: Thu, 6 Nov 2025 22:06:38 +0000 Subject: [PATCH 05/13] docs: correct gemini-2.5-pro cached read to bash.125 per 1M (LiteLLM)\n\nCo-authored-by: openhands --- openhands/usage/llms/openhands-llms.mdx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openhands/usage/llms/openhands-llms.mdx b/openhands/usage/llms/openhands-llms.mdx index 3b525df8..56f1eb33 100644 --- a/openhands/usage/llms/openhands-llms.mdx +++ b/openhands/usage/llms/openhands-llms.mdx @@ -46,7 +46,7 @@ Pricing follows official API provider rates. Below are the current pricing detai | devstral-medium-2507 | $0.40 | N/A | $2.00 | 128,000 | 128,000 | | o3 | $2.00 | $0.50 | $8.00 | 200,000 | 100,000 | | o4-mini | $1.10 | $0.275 | $4.40 | 200,000 | 100,000 | -| gemini-2.5-pro | $1.25 | $0.31 | $10.00 | 1,048,576 | 65,535 | +| gemini-2.5-pro | $1.25 | $0.125 | $10.00 | 1,048,576 | 65,535 | | kimi-k2-0711-preview | $0.60 | $0.15 | $2.50 | 131,072 | 131,072 | | qwen3-coder-480b | $0.40 | N/A | $1.60 | N/A | N/A | From 60576f5299cbcf1fc156cc181cf837e151c41b3f Mon Sep 17 00:00:00 2001 From: openhands Date: Thu, 6 Nov 2025 22:17:42 +0000 Subject: [PATCH 06/13] docs: reorder Anthropic models to top; remove note about em dash; keep LiteLLM source note\n\nCo-authored-by: openhands --- openhands/usage/llms/openhands-llms.mdx | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/openhands/usage/llms/openhands-llms.mdx b/openhands/usage/llms/openhands-llms.mdx index 56f1eb33..0fdfdf2b 100644 --- a/openhands/usage/llms/openhands-llms.mdx +++ b/openhands/usage/llms/openhands-llms.mdx @@ -35,13 +35,13 @@ Pricing follows official API provider rates. Below are the current pricing detai | Model | Input Cost (per 1M tokens) | Cached Input Cost (per 1M tokens) | Output Cost (per 1M tokens) | Max Input Tokens | Max Output Tokens | |-------|----------------------------|-----------------------------------|------------------------------|------------------|-------------------| | claude-sonnet-4-5-20250929 | $3.00 | $0.30 | $15.00 | 200,000 | 64,000 | +| claude-sonnet-4-20250514 | $3.00 | $0.30 | $15.00 | 1,000,000 | 64,000 | +| claude-opus-4-20250514 | $15.00 | $1.50 | $75.00 | 200,000 | 32,000 | +| claude-opus-4-1-20250805 | $15.00 | $1.50 | $75.00 | 200,000 | 32,000 | | claude-haiku-4-5-20251001 | $1.00 | $0.10 | $5.00 | 200,000 | 64,000 | | gpt-5-codex | $1.25 | $0.125 | $10.00 | 272,000 | 128,000 | | gpt-5-2025-08-07 | $1.25 | $0.125 | $10.00 | 272,000 | 128,000 | | gpt-5-mini-2025-08-07 | $0.25 | $0.025 | $2.00 | 272,000 | 128,000 | -| claude-sonnet-4-20250514 | $3.00 | $0.30 | $15.00 | 1,000,000 | 64,000 | -| claude-opus-4-20250514 | $15.00 | $1.50 | $75.00 | 200,000 | 32,000 | -| claude-opus-4-1-20250805 | $15.00 | $1.50 | $75.00 | 200,000 | 32,000 | | devstral-small-2507 | $0.10 | N/A | $0.30 | 128,000 | 128,000 | | devstral-medium-2507 | $0.40 | N/A | $2.00 | 128,000 | 128,000 | | o3 | $2.00 | $0.50 | $8.00 | 200,000 | 100,000 | @@ -50,4 +50,4 @@ Pricing follows official API provider rates. Below are the current pricing detai | kimi-k2-0711-preview | $0.60 | $0.15 | $2.50 | 131,072 | 131,072 | | qwen3-coder-480b | $0.40 | N/A | $1.60 | N/A | N/A | -**Note:** Pricing and limits are sourced from LiteLLM’s model price database and provider pages; they may change. Cached input tokens are charged at a reduced rate when the same content is reused across requests. Models that don't support prompt caching show "N/A" for cached input cost. A "—" indicates the provider has not publicly documented a stable price or token limit; refer to the provider's pricing page for the latest values. +**Note:** Pricing and limits are sourced from LiteLLM’s model price database and provider pages; they may change. Cached input tokens are charged at a reduced rate when the same content is reused across requests. Models that don't support prompt caching show "N/A" for cached input cost. From f5ee940ed158d0cc46ae1b136f99df5bc856f333 Mon Sep 17 00:00:00 2001 From: openhands Date: Thu, 6 Nov 2025 22:23:51 +0000 Subject: [PATCH 07/13] docs: move devstral-small-2507 below devstral-medium-2507\n\nCo-authored-by: openhands --- openhands/usage/llms/openhands-llms.mdx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openhands/usage/llms/openhands-llms.mdx b/openhands/usage/llms/openhands-llms.mdx index 0fdfdf2b..a5692106 100644 --- a/openhands/usage/llms/openhands-llms.mdx +++ b/openhands/usage/llms/openhands-llms.mdx @@ -42,8 +42,8 @@ Pricing follows official API provider rates. Below are the current pricing detai | gpt-5-codex | $1.25 | $0.125 | $10.00 | 272,000 | 128,000 | | gpt-5-2025-08-07 | $1.25 | $0.125 | $10.00 | 272,000 | 128,000 | | gpt-5-mini-2025-08-07 | $0.25 | $0.025 | $2.00 | 272,000 | 128,000 | -| devstral-small-2507 | $0.10 | N/A | $0.30 | 128,000 | 128,000 | | devstral-medium-2507 | $0.40 | N/A | $2.00 | 128,000 | 128,000 | +| devstral-small-2507 | $0.10 | N/A | $0.30 | 128,000 | 128,000 | | o3 | $2.00 | $0.50 | $8.00 | 200,000 | 100,000 | | o4-mini | $1.10 | $0.275 | $4.40 | 200,000 | 100,000 | | gemini-2.5-pro | $1.25 | $0.125 | $10.00 | 1,048,576 | 65,535 | From daed252a0b9dd4f0f285d44fcb4ae8cf6bee3427 Mon Sep 17 00:00:00 2001 From: openhands Date: Thu, 6 Nov 2025 22:29:34 +0000 Subject: [PATCH 08/13] =?UTF-8?q?docs:=20clarify=20pricing=20note=E2=80=94?= =?UTF-8?q?provider=20rates=20with=20no=20OpenHands=20markup\n\nCo-authore?= =?UTF-8?q?d-by:=20openhands=20?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- openhands/usage/llms/openhands-llms.mdx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openhands/usage/llms/openhands-llms.mdx b/openhands/usage/llms/openhands-llms.mdx index a5692106..5413da92 100644 --- a/openhands/usage/llms/openhands-llms.mdx +++ b/openhands/usage/llms/openhands-llms.mdx @@ -50,4 +50,4 @@ Pricing follows official API provider rates. Below are the current pricing detai | kimi-k2-0711-preview | $0.60 | $0.15 | $2.50 | 131,072 | 131,072 | | qwen3-coder-480b | $0.40 | N/A | $1.60 | N/A | N/A | -**Note:** Pricing and limits are sourced from LiteLLM’s model price database and provider pages; they may change. Cached input tokens are charged at a reduced rate when the same content is reused across requests. Models that don't support prompt caching show "N/A" for cached input cost. +**Note:** Prices listed reflect provider rates with no OpenHands markup, sourced via LiteLLM’s model price database and provider pricing pages. Cached input tokens are charged at a reduced rate when the same content is reused across requests. Models that don't support prompt caching show "N/A" for cached input cost. From 406edba4e528ceb6691292947bc3414eb7dc7b64 Mon Sep 17 00:00:00 2001 From: Engel Nyst Date: Thu, 6 Nov 2025 23:30:33 +0100 Subject: [PATCH 09/13] Update openhands/usage/llms/openhands-llms.mdx --- openhands/usage/llms/openhands-llms.mdx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openhands/usage/llms/openhands-llms.mdx b/openhands/usage/llms/openhands-llms.mdx index 5413da92..a93c0cfd 100644 --- a/openhands/usage/llms/openhands-llms.mdx +++ b/openhands/usage/llms/openhands-llms.mdx @@ -50,4 +50,4 @@ Pricing follows official API provider rates. Below are the current pricing detai | kimi-k2-0711-preview | $0.60 | $0.15 | $2.50 | 131,072 | 131,072 | | qwen3-coder-480b | $0.40 | N/A | $1.60 | N/A | N/A | -**Note:** Prices listed reflect provider rates with no OpenHands markup, sourced via LiteLLM’s model price database and provider pricing pages. Cached input tokens are charged at a reduced rate when the same content is reused across requests. Models that don't support prompt caching show "N/A" for cached input cost. +**Note:** Prices listed reflect provider rates with no markup, sourced via LiteLLM’s model price database and provider pricing pages. Cached input tokens are charged at a reduced rate when the same content is reused across requests. Models that don't support prompt caching show "N/A" for cached input cost. From db3aecd95b9a076f0f6cd8f43de40a8b4eee2936 Mon Sep 17 00:00:00 2001 From: openhands Date: Thu, 6 Nov 2025 23:44:39 +0000 Subject: [PATCH 10/13] test: add validator to compare MDX pricing vs LiteLLM price DB (remote JSON)\n\n- Skips models not present or intentionally N/A\n- Compares input/cached/output costs per 1M and token limits when available\n\nCo-authored-by: openhands --- .github/scripts/validate_llm_pricing.py | 220 ++++++++++++++++++++++++ 1 file changed, 220 insertions(+) create mode 100644 .github/scripts/validate_llm_pricing.py diff --git a/.github/scripts/validate_llm_pricing.py b/.github/scripts/validate_llm_pricing.py new file mode 100644 index 00000000..ac8d6772 --- /dev/null +++ b/.github/scripts/validate_llm_pricing.py @@ -0,0 +1,220 @@ +#!/usr/bin/env python3 +import json +import re +import sys +import urllib.request +from typing import Dict, List, Optional, Tuple + +LITELLM_URL = "https://raw.githubusercontent.com/BerriAI/litellm/refs/heads/main/model_prices_and_context_window.json" +MDX_PATH = "openhands/usage/llms/openhands-llms.mdx" + +# Models to skip from strict validation (absent in LiteLLM DB or intentionally N/A values) +SKIP_MODELS = { + "qwen3-coder-480b", + "devstral-medium-2507", + "devstral-small-2507", +} + +# Optional manual key mapping if MDX model name differs from LiteLLM JSON key +MODEL_KEY_MAP: Dict[str, str] = { + # Add mappings here only if necessary +} + + +def fetch_litellm_db(url: str) -> Dict: + with urllib.request.urlopen(url, timeout=30) as resp: + data = resp.read() + return json.loads(data) + + +def parse_money(s: str) -> Optional[float]: + s = s.strip() + if s.upper() in {"N/A", "NA", "-", "—", "--", ""}: + return None + if s.startswith("$"): + s = s[1:] + try: + return float(s) + except ValueError: + return None + + +def parse_int(s: str) -> Optional[int]: + s = s.strip() + if s.upper() in {"N/A", "NA", "-", "—", "--", ""}: + return None + s = s.replace(",", "") + try: + return int(s) + except ValueError: + return None + + +def extract_table_from_mdx(path: str) -> List[Dict[str, Optional[str]]]: + rows: List[Dict[str, Optional[str]]] = [] + with open(path, "r", encoding="utf-8") as f: + lines = f.read().splitlines() + + # Find table header + start = None + for i, line in enumerate(lines): + if "| Model |" in line: + start = i + break + if start is None: + raise SystemExit("ERROR: Could not find LLM pricing table header in MDX file.") + + i = start + 1 + # Skip the separator line (---) + while i < len(lines) and lines[i].strip().startswith("|"): + # Stop when we hit a blank line after table + if not lines[i].strip(): + break + # Skip header separator row like |-----| + if re.match(r"^\|\s*-+\s*\|", lines[i]): + i += 1 + continue + # Stop when the row clearly ends (non-table line) + if not lines[i].strip().startswith("|"): + break + + parts = [p.strip() for p in lines[i].strip().strip("|").split("|")] + if len(parts) == 6 and parts[0] != "Model": + rows.append({ + "model": parts[0], + "input_cost": parts[1], + "cached_input_cost": parts[2], + "output_cost": parts[3], + "max_input_tokens": parts[4], + "max_output_tokens": parts[5], + }) + i += 1 + + if not rows: + raise SystemExit("ERROR: Found table header but no data rows parsed.") + return rows + + +def to_per_million(val_per_token: Optional[float]) -> Optional[float]: + if val_per_token is None: + return None + return val_per_token * 1_000_000.0 + + +def near(a: Optional[float], b: Optional[float], tol: float = 1e-3) -> bool: + if a is None and b is None: + return True + if a is None or b is None: + return False + return abs(a - b) <= tol + + +def main() -> int: + db = fetch_litellm_db(LITELLM_URL) + rows = extract_table_from_mdx(MDX_PATH) + + failures: List[str] = [] + validations = 0 + + for row in rows: + model = row["model"] + if model in SKIP_MODELS: + continue + + key = MODEL_KEY_MAP.get(model, model) + entry = db.get(key) + if entry is None: + # Try a few fallbacks (provider-prefixed keys) + # e.g., openai/gpt-5-codex, google/gemini-2.5-pro + candidates = [ + f"openai/{model}", + f"azure/{model}", + f"anthropic/{model}", + f"google/{model}", + f"gemini/{model}", + f"vertex_ai/{model}", + f"mistral/{model}", + f"cloudflare/{model}", + f"groq/{model}", + f"bedrock/{model}", + ] + for c in candidates: + if c in db: + entry = db[c] + key = c + break + + if entry is None: + # Not in LiteLLM DB; skip but report + print(f"[skip] {model}: not found in LiteLLM DB") + continue + + # Parse MDX values + mdx_input_cost = parse_money(row["input_cost"]) # $ per 1M + mdx_cached_cost = parse_money(row["cached_input_cost"]) # $ per 1M or None + mdx_output_cost = parse_money(row["output_cost"]) # $ per 1M + mdx_max_in = parse_int(row["max_input_tokens"]) # tokens + mdx_max_out = parse_int(row["max_output_tokens"]) # tokens + + # Compute expected from LiteLLM DB + llm_in_per_token = entry.get("input_cost_per_token") + llm_cached_per_token = entry.get("cache_read_input_token_cost") + llm_out_per_token = entry.get("output_cost_per_token") + + exp_input_cost = to_per_million(llm_in_per_token) + exp_cached_cost = to_per_million(llm_cached_per_token) + exp_output_cost = to_per_million(llm_out_per_token) + + # Compare costs (only if LLM DB provides them) + def add_fail(msg: str): + failures.append(f"{model}: {msg}") + + # Input cost + if exp_input_cost is not None and mdx_input_cost is not None: + validations += 1 + if not near(mdx_input_cost, exp_input_cost): + add_fail(f"input_cost mismatch: mdx={mdx_input_cost} vs litellm={exp_input_cost}") + + # Cached input cost + if exp_cached_cost is not None or mdx_cached_cost is not None: + # If JSON missing but MDX has numeric, that's a mismatch; if MDX N/A and JSON missing, accept + validations += 1 + if exp_cached_cost is None and mdx_cached_cost is None: + pass + elif exp_cached_cost is None and mdx_cached_cost is not None: + add_fail(f"cached_input_cost present in MDX but missing in LiteLLM: mdx={mdx_cached_cost}") + elif exp_cached_cost is not None and mdx_cached_cost is None: + add_fail(f"cached_input_cost missing in MDX but present in LiteLLM: litellm={exp_cached_cost}") + elif not near(mdx_cached_cost, exp_cached_cost): + add_fail(f"cached_input_cost mismatch: mdx={mdx_cached_cost} vs litellm={exp_cached_cost}") + + # Output cost + if exp_output_cost is not None and mdx_output_cost is not None: + validations += 1 + if not near(mdx_output_cost, exp_output_cost): + add_fail(f"output_cost mismatch: mdx={mdx_output_cost} vs litellm={exp_output_cost}") + + # Token limits (compare only if LiteLLM provides the field) + llm_max_in = entry.get("max_input_tokens") + llm_max_out = entry.get("max_output_tokens") + + if llm_max_in is not None and mdx_max_in is not None: + validations += 1 + if mdx_max_in != int(llm_max_in): + add_fail(f"max_input_tokens mismatch: mdx={mdx_max_in} vs litellm={llm_max_in}") + + if llm_max_out is not None and mdx_max_out is not None: + validations += 1 + if mdx_max_out != int(llm_max_out): + add_fail(f"max_output_tokens mismatch: mdx={mdx_max_out} vs litellm={llm_max_out}") + + if failures: + print("\nValidation FAILED:\n" + "\n".join(failures)) + return 1 + + print(f"Validation passed. Checks performed: {validations}") + return 0 + + +if __name__ == "__main__": + sys.exit(main()) From 4ac60791a3aed7305d7d5c7d9ef122425a938bc7 Mon Sep 17 00:00:00 2001 From: openhands Date: Thu, 6 Nov 2025 23:56:52 +0000 Subject: [PATCH 11/13] ci: add GH Action to validate MDX LLM pricing against LiteLLM DB on PRs\n\nCo-authored-by: openhands --- .github/workflows/validate-llm-pricing.yml | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 .github/workflows/validate-llm-pricing.yml diff --git a/.github/workflows/validate-llm-pricing.yml b/.github/workflows/validate-llm-pricing.yml new file mode 100644 index 00000000..609aa527 --- /dev/null +++ b/.github/workflows/validate-llm-pricing.yml @@ -0,0 +1,22 @@ +name: Validate LLM pricing table + +on: + pull_request: + paths: + - 'openhands/usage/llms/openhands-llms.mdx' + - '.github/scripts/validate_llm_pricing.py' + +jobs: + validate: + runs-on: ubuntu-latest + steps: + - name: Check out repository + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.x' + + - name: Run pricing validator + run: python .github/scripts/validate_llm_pricing.py From e520183af62ba182a5ce6f1e780f902838672052 Mon Sep 17 00:00:00 2001 From: openhands Date: Thu, 6 Nov 2025 23:59:43 +0000 Subject: [PATCH 12/13] chore(test): drop typing imports; use Python 3.12 builtins for type hints in validator\n\nCo-authored-by: openhands --- .github/scripts/validate_llm_pricing.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/.github/scripts/validate_llm_pricing.py b/.github/scripts/validate_llm_pricing.py index ac8d6772..2f503a41 100644 --- a/.github/scripts/validate_llm_pricing.py +++ b/.github/scripts/validate_llm_pricing.py @@ -3,7 +3,6 @@ import re import sys import urllib.request -from typing import Dict, List, Optional, Tuple LITELLM_URL = "https://raw.githubusercontent.com/BerriAI/litellm/refs/heads/main/model_prices_and_context_window.json" MDX_PATH = "openhands/usage/llms/openhands-llms.mdx" @@ -16,18 +15,18 @@ } # Optional manual key mapping if MDX model name differs from LiteLLM JSON key -MODEL_KEY_MAP: Dict[str, str] = { +MODEL_KEY_MAP: dict[str, str] = { # Add mappings here only if necessary } -def fetch_litellm_db(url: str) -> Dict: +def fetch_litellm_db(url: str) -> dict: with urllib.request.urlopen(url, timeout=30) as resp: data = resp.read() return json.loads(data) -def parse_money(s: str) -> Optional[float]: +def parse_money(s: str) -> float | None: s = s.strip() if s.upper() in {"N/A", "NA", "-", "—", "--", ""}: return None @@ -39,7 +38,7 @@ def parse_money(s: str) -> Optional[float]: return None -def parse_int(s: str) -> Optional[int]: +def parse_int(s: str) -> int | None: s = s.strip() if s.upper() in {"N/A", "NA", "-", "—", "--", ""}: return None @@ -50,8 +49,8 @@ def parse_int(s: str) -> Optional[int]: return None -def extract_table_from_mdx(path: str) -> List[Dict[str, Optional[str]]]: - rows: List[Dict[str, Optional[str]]] = [] +def extract_table_from_mdx(path: str) -> list[dict[str, str | None]]: + rows: list[dict[str, str | None]] = [] with open(path, "r", encoding="utf-8") as f: lines = f.read().splitlines() @@ -95,13 +94,13 @@ def extract_table_from_mdx(path: str) -> List[Dict[str, Optional[str]]]: return rows -def to_per_million(val_per_token: Optional[float]) -> Optional[float]: +def to_per_million(val_per_token: float | None) -> float | None: if val_per_token is None: return None return val_per_token * 1_000_000.0 -def near(a: Optional[float], b: Optional[float], tol: float = 1e-3) -> bool: +def near(a: float | None, b: float | None, tol: float = 1e-3) -> bool: if a is None and b is None: return True if a is None or b is None: From 79c7e8f656e6c9d1cd998b78c56694304dcc751b Mon Sep 17 00:00:00 2001 From: openhands Date: Fri, 7 Nov 2025 00:51:18 +0000 Subject: [PATCH 13/13] chore(test): restrict provider fallbacks (drop vertex_ai, bedrock, groq, cloudflare)\n\nCo-authored-by: openhands --- .github/scripts/validate_llm_pricing.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/.github/scripts/validate_llm_pricing.py b/.github/scripts/validate_llm_pricing.py index 2f503a41..6c7aa45d 100644 --- a/.github/scripts/validate_llm_pricing.py +++ b/.github/scripts/validate_llm_pricing.py @@ -131,11 +131,7 @@ def main() -> int: f"anthropic/{model}", f"google/{model}", f"gemini/{model}", - f"vertex_ai/{model}", f"mistral/{model}", - f"cloudflare/{model}", - f"groq/{model}", - f"bedrock/{model}", ] for c in candidates: if c in db: