From d6895e4cd3d8e1fc684d3c57d2d503ea5acd6fab Mon Sep 17 00:00:00 2001
From: enyst <engel.nyst@gmail.com>
Date: Thu, 6 Nov 2025 21:31:05 +0000
Subject: [PATCH 01/13] docs: sync OpenHands LLMs list with Agent SDK
 VERIFIED_OPENHANDS_MODELS\n\nSource of truth:
 openhands-sdk/openhands/sdk/llm/utils/verified_models.py\n- Add:
 claude-haiku-4-5-20251001, gpt-5-codex, claude-opus-4-1-20250805,
 kimi-k2-0711-preview\n- Remove: devstral-small-2505\n- Align order with
 VERIFIED_OPENHANDS_MODELS\n\nCo-authored-by: openhands
 <openhands@all-hands.dev>

---
 openhands/usage/llms/openhands-llms.mdx | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/openhands/usage/llms/openhands-llms.mdx b/openhands/usage/llms/openhands-llms.mdx
index 895f7192..139dc161 100644
--- a/openhands/usage/llms/openhands-llms.mdx
+++ b/openhands/usage/llms/openhands-llms.mdx
@@ -34,17 +34,20 @@ Pricing follows official API provider rates. Below are the current pricing detai
 
 | Model | Input Cost (per 1M tokens) | Cached Input Cost (per 1M tokens) | Output Cost (per 1M tokens) | Max Input Tokens | Max Output Tokens |
 |-------|----------------------------|-----------------------------------|------------------------------|------------------|-------------------|
-| claude-opus-4-20250514 | $15.00 | $1.50 | $75.00 | 200,000 | 32,000 |
-| claude-sonnet-4-20250514 | $3.00 | $0.30 | $15.00 | 200,000 | 64,000 |
 | claude-sonnet-4-5-20250929 | $3.00 | $0.30 | $15.00 | 200,000 | 64,000 |
-| devstral-medium-2507 | $0.40 | N/A | $2.00 | 128,000 | 128,000 |
-| devstral-small-2505 | $0.10 | N/A | $0.30 | 128,000 | 128,000 |
-| devstral-small-2507 | $0.10 | N/A | $0.30 | 128,000 | 128,000 |
-| gemini-2.5-pro | $1.25 | $0.31 | $10.00 | 1,048,576 | 65,535 |
+| claude-haiku-4-5-20251001 | — | — | — | — | — |
+| gpt-5-codex | — | — | — | — | — |
 | gpt-5-2025-08-07 | $1.25 | $0.125 | $10.00 | 400,000 | 128,000 |
 | gpt-5-mini-2025-08-07 | $0.25 | $0.025 | $2.00 | 400,000 | 128,000 |
+| claude-sonnet-4-20250514 | $3.00 | $0.30 | $15.00 | 200,000 | 64,000 |
+| claude-opus-4-20250514 | $15.00 | $1.50 | $75.00 | 200,000 | 32,000 |
+| claude-opus-4-1-20250805 | — | — | — | — | — |
+| devstral-small-2507 | $0.10 | N/A | $0.30 | 128,000 | 128,000 |
+| devstral-medium-2507 | $0.40 | N/A | $2.00 | 128,000 | 128,000 |
 | o3 | $2.00 | $0.50 | $8.00 | 200,000 | 100,000 |
 | o4-mini | $1.10 | $0.28 | $4.40 | 200,000 | 100,000 |
+| gemini-2.5-pro | $1.25 | $0.31 | $10.00 | 1,048,576 | 65,535 |
+| kimi-k2-0711-preview | — | — | — | — | — |
 | qwen3-coder-480b | $0.40 | N/A | $1.60 | N/A | N/A |
 
-**Note:** Cached input tokens are charged at a reduced rate when the same content is reused across requests. Models that don't support prompt caching show "N/A" for cached input cost.
+**Note:** Cached input tokens are charged at a reduced rate when the same content is reused across requests. Models that don't support prompt caching show "N/A" for cached input cost. A "—" indicates the provider has not publicly documented a stable price or token limit; refer to the provider's pricing page for the latest values.

From d219bb537fb5474b1fcf3f1b28fc8e9d58e09960 Mon Sep 17 00:00:00 2001
From: openhands <openhands@all-hands.dev>
Date: Thu, 6 Nov 2025 22:03:17 +0000
Subject: [PATCH 02/13] docs: populate OpenHands LLM prices from LiteLLM DB;
 keep qwen3-coder N/A; add source note\n\nSource: litellm
 model_prices_and_context_window_backup.json; Verified list remains
 source-of-truth for models.\n\nCo-authored-by: openhands
 <openhands@all-hands.dev>

---
 openhands/usage/llms/openhands-llms.mdx | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/openhands/usage/llms/openhands-llms.mdx b/openhands/usage/llms/openhands-llms.mdx
index 139dc161..66c4d41b 100644
--- a/openhands/usage/llms/openhands-llms.mdx
+++ b/openhands/usage/llms/openhands-llms.mdx
@@ -35,19 +35,19 @@ Pricing follows official API provider rates. Below are the current pricing detai
 | Model | Input Cost (per 1M tokens) | Cached Input Cost (per 1M tokens) | Output Cost (per 1M tokens) | Max Input Tokens | Max Output Tokens |
 |-------|----------------------------|-----------------------------------|------------------------------|------------------|-------------------|
 | claude-sonnet-4-5-20250929 | $3.00 | $0.30 | $15.00 | 200,000 | 64,000 |
-| claude-haiku-4-5-20251001 | — | — | — | — | — |
-| gpt-5-codex | — | — | — | — | — |
-| gpt-5-2025-08-07 | $1.25 | $0.125 | $10.00 | 400,000 | 128,000 |
-| gpt-5-mini-2025-08-07 | $0.25 | $0.025 | $2.00 | 400,000 | 128,000 |
+| claude-haiku-4-5-20251001 | $1.00 | $0.10 | $5.00 | 200,000 | 64,000 |
+| gpt-5-codex | $1.25 | $0.125 | $10.00 | 272,000 | 128,000 |
+| gpt-5-2025-08-07 | $1.25 | $0.125 | $10.00 | 272,000 | 128,000 |
+| gpt-5-mini-2025-08-07 | $0.25 | $0.025 | $2.00 | 272,000 | 128,000 |
 | claude-sonnet-4-20250514 | $3.00 | $0.30 | $15.00 | 200,000 | 64,000 |
 | claude-opus-4-20250514 | $15.00 | $1.50 | $75.00 | 200,000 | 32,000 |
-| claude-opus-4-1-20250805 | — | — | — | — | — |
+| claude-opus-4-1-20250805 | $15.00 | $1.50 | $75.00 | 200,000 | 32,000 |
 | devstral-small-2507 | $0.10 | N/A | $0.30 | 128,000 | 128,000 |
 | devstral-medium-2507 | $0.40 | N/A | $2.00 | 128,000 | 128,000 |
 | o3 | $2.00 | $0.50 | $8.00 | 200,000 | 100,000 |
 | o4-mini | $1.10 | $0.28 | $4.40 | 200,000 | 100,000 |
 | gemini-2.5-pro | $1.25 | $0.31 | $10.00 | 1,048,576 | 65,535 |
-| kimi-k2-0711-preview | — | — | — | — | — |
+| kimi-k2-0711-preview | $0.60 | $0.15 | $2.50 | 131,072 | 131,072 |
 | qwen3-coder-480b | $0.40 | N/A | $1.60 | N/A | N/A |
 
-**Note:** Cached input tokens are charged at a reduced rate when the same content is reused across requests. Models that don't support prompt caching show "N/A" for cached input cost. A "—" indicates the provider has not publicly documented a stable price or token limit; refer to the provider's pricing page for the latest values.
+**Note:** Pricing and limits are sourced from LiteLLM’s model price database and provider pages; they may change. Cached input tokens are charged at a reduced rate when the same content is reused across requests. Models that don't support prompt caching show "N/A" for cached input cost. A "—" indicates the provider has not publicly documented a stable price or token limit; refer to the provider's pricing page for the latest values.

From 049706915b5b5845fa2a8621ba555c9ef1775b57 Mon Sep 17 00:00:00 2001
From: openhands <openhands@all-hands.dev>
Date: Thu, 6 Nov 2025 22:05:13 +0000
Subject: [PATCH 03/13] docs: align pricing/limits with LiteLLM DB (incl.
 claude-sonnet-4-20250514 1M input tokens)\n\nCo-authored-by: openhands
 <openhands@all-hands.dev>

---
 openhands/usage/llms/openhands-llms.mdx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/openhands/usage/llms/openhands-llms.mdx b/openhands/usage/llms/openhands-llms.mdx
index 66c4d41b..42f578b5 100644
--- a/openhands/usage/llms/openhands-llms.mdx
+++ b/openhands/usage/llms/openhands-llms.mdx
@@ -39,7 +39,7 @@ Pricing follows official API provider rates. Below are the current pricing detai
 | gpt-5-codex | $1.25 | $0.125 | $10.00 | 272,000 | 128,000 |
 | gpt-5-2025-08-07 | $1.25 | $0.125 | $10.00 | 272,000 | 128,000 |
 | gpt-5-mini-2025-08-07 | $0.25 | $0.025 | $2.00 | 272,000 | 128,000 |
-| claude-sonnet-4-20250514 | $3.00 | $0.30 | $15.00 | 200,000 | 64,000 |
+| claude-sonnet-4-20250514 | $3.00 | $0.30 | $15.00 | 1,000,000 | 64,000 |
 | claude-opus-4-20250514 | $15.00 | $1.50 | $75.00 | 200,000 | 32,000 |
 | claude-opus-4-1-20250805 | $15.00 | $1.50 | $75.00 | 200,000 | 32,000 |
 | devstral-small-2507 | $0.10 | N/A | $0.30 | 128,000 | 128,000 |

From 66ca58a37b556d584996ad9e33f399ec91b128a5 Mon Sep 17 00:00:00 2001
From: openhands <openhands@all-hands.dev>
Date: Thu, 6 Nov 2025 22:05:54 +0000
Subject: [PATCH 04/13] docs: fix o4-mini cached read to bash.275 per 1M
 (LiteLLM)\n\nCo-authored-by: openhands <openhands@all-hands.dev>

---
 openhands/usage/llms/openhands-llms.mdx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/openhands/usage/llms/openhands-llms.mdx b/openhands/usage/llms/openhands-llms.mdx
index 42f578b5..3b525df8 100644
--- a/openhands/usage/llms/openhands-llms.mdx
+++ b/openhands/usage/llms/openhands-llms.mdx
@@ -45,7 +45,7 @@ Pricing follows official API provider rates. Below are the current pricing detai
 | devstral-small-2507 | $0.10 | N/A | $0.30 | 128,000 | 128,000 |
 | devstral-medium-2507 | $0.40 | N/A | $2.00 | 128,000 | 128,000 |
 | o3 | $2.00 | $0.50 | $8.00 | 200,000 | 100,000 |
-| o4-mini | $1.10 | $0.28 | $4.40 | 200,000 | 100,000 |
+| o4-mini | $1.10 | $0.275 | $4.40 | 200,000 | 100,000 |
 | gemini-2.5-pro | $1.25 | $0.31 | $10.00 | 1,048,576 | 65,535 |
 | kimi-k2-0711-preview | $0.60 | $0.15 | $2.50 | 131,072 | 131,072 |
 | qwen3-coder-480b | $0.40 | N/A | $1.60 | N/A | N/A |

From b319d322c5a98a15fffc8ebdf550ba59a6104199 Mon Sep 17 00:00:00 2001
From: openhands <openhands@all-hands.dev>
Date: Thu, 6 Nov 2025 22:06:38 +0000
Subject: [PATCH 05/13] docs: correct gemini-2.5-pro cached read to bash.125
 per 1M (LiteLLM)\n\nCo-authored-by: openhands <openhands@all-hands.dev>

---
 openhands/usage/llms/openhands-llms.mdx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/openhands/usage/llms/openhands-llms.mdx b/openhands/usage/llms/openhands-llms.mdx
index 3b525df8..56f1eb33 100644
--- a/openhands/usage/llms/openhands-llms.mdx
+++ b/openhands/usage/llms/openhands-llms.mdx
@@ -46,7 +46,7 @@ Pricing follows official API provider rates. Below are the current pricing detai
 | devstral-medium-2507 | $0.40 | N/A | $2.00 | 128,000 | 128,000 |
 | o3 | $2.00 | $0.50 | $8.00 | 200,000 | 100,000 |
 | o4-mini | $1.10 | $0.275 | $4.40 | 200,000 | 100,000 |
-| gemini-2.5-pro | $1.25 | $0.31 | $10.00 | 1,048,576 | 65,535 |
+| gemini-2.5-pro | $1.25 | $0.125 | $10.00 | 1,048,576 | 65,535 |
 | kimi-k2-0711-preview | $0.60 | $0.15 | $2.50 | 131,072 | 131,072 |
 | qwen3-coder-480b | $0.40 | N/A | $1.60 | N/A | N/A |
 

From 60576f5299cbcf1fc156cc181cf837e151c41b3f Mon Sep 17 00:00:00 2001
From: openhands <openhands@all-hands.dev>
Date: Thu, 6 Nov 2025 22:17:42 +0000
Subject: [PATCH 06/13] docs: reorder Anthropic models to top; remove note
 about em dash; keep LiteLLM source note\n\nCo-authored-by: openhands
 <openhands@all-hands.dev>

---
 openhands/usage/llms/openhands-llms.mdx | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/openhands/usage/llms/openhands-llms.mdx b/openhands/usage/llms/openhands-llms.mdx
index 56f1eb33..0fdfdf2b 100644
--- a/openhands/usage/llms/openhands-llms.mdx
+++ b/openhands/usage/llms/openhands-llms.mdx
@@ -35,13 +35,13 @@ Pricing follows official API provider rates. Below are the current pricing detai
 | Model | Input Cost (per 1M tokens) | Cached Input Cost (per 1M tokens) | Output Cost (per 1M tokens) | Max Input Tokens | Max Output Tokens |
 |-------|----------------------------|-----------------------------------|------------------------------|------------------|-------------------|
 | claude-sonnet-4-5-20250929 | $3.00 | $0.30 | $15.00 | 200,000 | 64,000 |
+| claude-sonnet-4-20250514 | $3.00 | $0.30 | $15.00 | 1,000,000 | 64,000 |
+| claude-opus-4-20250514 | $15.00 | $1.50 | $75.00 | 200,000 | 32,000 |
+| claude-opus-4-1-20250805 | $15.00 | $1.50 | $75.00 | 200,000 | 32,000 |
 | claude-haiku-4-5-20251001 | $1.00 | $0.10 | $5.00 | 200,000 | 64,000 |
 | gpt-5-codex | $1.25 | $0.125 | $10.00 | 272,000 | 128,000 |
 | gpt-5-2025-08-07 | $1.25 | $0.125 | $10.00 | 272,000 | 128,000 |
 | gpt-5-mini-2025-08-07 | $0.25 | $0.025 | $2.00 | 272,000 | 128,000 |
-| claude-sonnet-4-20250514 | $3.00 | $0.30 | $15.00 | 1,000,000 | 64,000 |
-| claude-opus-4-20250514 | $15.00 | $1.50 | $75.00 | 200,000 | 32,000 |
-| claude-opus-4-1-20250805 | $15.00 | $1.50 | $75.00 | 200,000 | 32,000 |
 | devstral-small-2507 | $0.10 | N/A | $0.30 | 128,000 | 128,000 |
 | devstral-medium-2507 | $0.40 | N/A | $2.00 | 128,000 | 128,000 |
 | o3 | $2.00 | $0.50 | $8.00 | 200,000 | 100,000 |
@@ -50,4 +50,4 @@ Pricing follows official API provider rates. Below are the current pricing detai
 | kimi-k2-0711-preview | $0.60 | $0.15 | $2.50 | 131,072 | 131,072 |
 | qwen3-coder-480b | $0.40 | N/A | $1.60 | N/A | N/A |
 
-**Note:** Pricing and limits are sourced from LiteLLM’s model price database and provider pages; they may change. Cached input tokens are charged at a reduced rate when the same content is reused across requests. Models that don't support prompt caching show "N/A" for cached input cost. A "—" indicates the provider has not publicly documented a stable price or token limit; refer to the provider's pricing page for the latest values.
+**Note:** Pricing and limits are sourced from LiteLLM’s model price database and provider pages; they may change. Cached input tokens are charged at a reduced rate when the same content is reused across requests. Models that don't support prompt caching show "N/A" for cached input cost.

From f5ee940ed158d0cc46ae1b136f99df5bc856f333 Mon Sep 17 00:00:00 2001
From: openhands <openhands@all-hands.dev>
Date: Thu, 6 Nov 2025 22:23:51 +0000
Subject: [PATCH 07/13] docs: move devstral-small-2507 below
 devstral-medium-2507\n\nCo-authored-by: openhands <openhands@all-hands.dev>

---
 openhands/usage/llms/openhands-llms.mdx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/openhands/usage/llms/openhands-llms.mdx b/openhands/usage/llms/openhands-llms.mdx
index 0fdfdf2b..a5692106 100644
--- a/openhands/usage/llms/openhands-llms.mdx
+++ b/openhands/usage/llms/openhands-llms.mdx
@@ -42,8 +42,8 @@ Pricing follows official API provider rates. Below are the current pricing detai
 | gpt-5-codex | $1.25 | $0.125 | $10.00 | 272,000 | 128,000 |
 | gpt-5-2025-08-07 | $1.25 | $0.125 | $10.00 | 272,000 | 128,000 |
 | gpt-5-mini-2025-08-07 | $0.25 | $0.025 | $2.00 | 272,000 | 128,000 |
-| devstral-small-2507 | $0.10 | N/A | $0.30 | 128,000 | 128,000 |
 | devstral-medium-2507 | $0.40 | N/A | $2.00 | 128,000 | 128,000 |
+| devstral-small-2507 | $0.10 | N/A | $0.30 | 128,000 | 128,000 |
 | o3 | $2.00 | $0.50 | $8.00 | 200,000 | 100,000 |
 | o4-mini | $1.10 | $0.275 | $4.40 | 200,000 | 100,000 |
 | gemini-2.5-pro | $1.25 | $0.125 | $10.00 | 1,048,576 | 65,535 |

From daed252a0b9dd4f0f285d44fcb4ae8cf6bee3427 Mon Sep 17 00:00:00 2001
From: openhands <openhands@all-hands.dev>
Date: Thu, 6 Nov 2025 22:29:34 +0000
Subject: [PATCH 08/13] =?UTF-8?q?docs:=20clarify=20pricing=20note=E2=80=94?=
 =?UTF-8?q?provider=20rates=20with=20no=20OpenHands=20markup\n\nCo-authore?=
 =?UTF-8?q?d-by:=20openhands=20<openhands@all-hands.dev>?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 openhands/usage/llms/openhands-llms.mdx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/openhands/usage/llms/openhands-llms.mdx b/openhands/usage/llms/openhands-llms.mdx
index a5692106..5413da92 100644
--- a/openhands/usage/llms/openhands-llms.mdx
+++ b/openhands/usage/llms/openhands-llms.mdx
@@ -50,4 +50,4 @@ Pricing follows official API provider rates. Below are the current pricing detai
 | kimi-k2-0711-preview | $0.60 | $0.15 | $2.50 | 131,072 | 131,072 |
 | qwen3-coder-480b | $0.40 | N/A | $1.60 | N/A | N/A |
 
-**Note:** Pricing and limits are sourced from LiteLLM’s model price database and provider pages; they may change. Cached input tokens are charged at a reduced rate when the same content is reused across requests. Models that don't support prompt caching show "N/A" for cached input cost.
+**Note:** Prices listed reflect provider rates with no OpenHands markup, sourced via LiteLLM’s model price database and provider pricing pages. Cached input tokens are charged at a reduced rate when the same content is reused across requests. Models that don't support prompt caching show "N/A" for cached input cost.

From 406edba4e528ceb6691292947bc3414eb7dc7b64 Mon Sep 17 00:00:00 2001
From: Engel Nyst <engel.nyst@gmail.com>
Date: Thu, 6 Nov 2025 23:30:33 +0100
Subject: [PATCH 09/13] Update openhands/usage/llms/openhands-llms.mdx

---
 openhands/usage/llms/openhands-llms.mdx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/openhands/usage/llms/openhands-llms.mdx b/openhands/usage/llms/openhands-llms.mdx
index 5413da92..a93c0cfd 100644
--- a/openhands/usage/llms/openhands-llms.mdx
+++ b/openhands/usage/llms/openhands-llms.mdx
@@ -50,4 +50,4 @@ Pricing follows official API provider rates. Below are the current pricing detai
 | kimi-k2-0711-preview | $0.60 | $0.15 | $2.50 | 131,072 | 131,072 |
 | qwen3-coder-480b | $0.40 | N/A | $1.60 | N/A | N/A |
 
-**Note:** Prices listed reflect provider rates with no OpenHands markup, sourced via LiteLLM’s model price database and provider pricing pages. Cached input tokens are charged at a reduced rate when the same content is reused across requests. Models that don't support prompt caching show "N/A" for cached input cost.
+**Note:** Prices listed reflect provider rates with no markup, sourced via LiteLLM’s model price database and provider pricing pages. Cached input tokens are charged at a reduced rate when the same content is reused across requests. Models that don't support prompt caching show "N/A" for cached input cost.

From db3aecd95b9a076f0f6cd8f43de40a8b4eee2936 Mon Sep 17 00:00:00 2001
From: openhands <openhands@all-hands.dev>
Date: Thu, 6 Nov 2025 23:44:39 +0000
Subject: [PATCH 10/13] test: add validator to compare MDX pricing vs LiteLLM
 price DB (remote JSON)\n\n- Skips models not present or intentionally N/A\n-
 Compares input/cached/output costs per 1M and token limits when
 available\n\nCo-authored-by: openhands <openhands@all-hands.dev>

---
 .github/scripts/validate_llm_pricing.py | 220 ++++++++++++++++++++++++
 1 file changed, 220 insertions(+)
 create mode 100644 .github/scripts/validate_llm_pricing.py

diff --git a/.github/scripts/validate_llm_pricing.py b/.github/scripts/validate_llm_pricing.py
new file mode 100644
index 00000000..ac8d6772
--- /dev/null
+++ b/.github/scripts/validate_llm_pricing.py
@@ -0,0 +1,220 @@
+#!/usr/bin/env python3
+import json
+import re
+import sys
+import urllib.request
+from typing import Dict, List, Optional, Tuple
+
+LITELLM_URL = "https://raw.githubusercontent.com/BerriAI/litellm/refs/heads/main/model_prices_and_context_window.json"
+MDX_PATH = "openhands/usage/llms/openhands-llms.mdx"
+
+# Models to skip from strict validation (absent in LiteLLM DB or intentionally N/A values)
+SKIP_MODELS = {
+    "qwen3-coder-480b",
+    "devstral-medium-2507",
+    "devstral-small-2507",
+}
+
+# Optional manual key mapping if MDX model name differs from LiteLLM JSON key
+MODEL_KEY_MAP: Dict[str, str] = {
+    # Add mappings here only if necessary
+}
+
+
+def fetch_litellm_db(url: str) -> Dict:
+    with urllib.request.urlopen(url, timeout=30) as resp:
+        data = resp.read()
+    return json.loads(data)
+
+
+def parse_money(s: str) -> Optional[float]:
+    s = s.strip()
+    if s.upper() in {"N/A", "NA", "-", "—", "--", ""}:
+        return None
+    if s.startswith("$"):
+        s = s[1:]
+    try:
+        return float(s)
+    except ValueError:
+        return None
+
+
+def parse_int(s: str) -> Optional[int]:
+    s = s.strip()
+    if s.upper() in {"N/A", "NA", "-", "—", "--", ""}:
+        return None
+    s = s.replace(",", "")
+    try:
+        return int(s)
+    except ValueError:
+        return None
+
+
+def extract_table_from_mdx(path: str) -> List[Dict[str, Optional[str]]]:
+    rows: List[Dict[str, Optional[str]]] = []
+    with open(path, "r", encoding="utf-8") as f:
+        lines = f.read().splitlines()
+
+    # Find table header
+    start = None
+    for i, line in enumerate(lines):
+        if "| Model |" in line:
+            start = i
+            break
+    if start is None:
+        raise SystemExit("ERROR: Could not find LLM pricing table header in MDX file.")
+
+    i = start + 1
+    # Skip the separator line (---)
+    while i < len(lines) and lines[i].strip().startswith("|"):
+        # Stop when we hit a blank line after table
+        if not lines[i].strip():
+            break
+        # Skip header separator row like |-----|
+        if re.match(r"^\|\s*-+\s*\|", lines[i]):
+            i += 1
+            continue
+        # Stop when the row clearly ends (non-table line)
+        if not lines[i].strip().startswith("|"):
+            break
+
+        parts = [p.strip() for p in lines[i].strip().strip("|").split("|")]
+        if len(parts) == 6 and parts[0] != "Model":
+            rows.append({
+                "model": parts[0],
+                "input_cost": parts[1],
+                "cached_input_cost": parts[2],
+                "output_cost": parts[3],
+                "max_input_tokens": parts[4],
+                "max_output_tokens": parts[5],
+            })
+        i += 1
+
+    if not rows:
+        raise SystemExit("ERROR: Found table header but no data rows parsed.")
+    return rows
+
+
+def to_per_million(val_per_token: Optional[float]) -> Optional[float]:
+    if val_per_token is None:
+        return None
+    return val_per_token * 1_000_000.0
+
+
+def near(a: Optional[float], b: Optional[float], tol: float = 1e-3) -> bool:
+    if a is None and b is None:
+        return True
+    if a is None or b is None:
+        return False
+    return abs(a - b) <= tol
+
+
+def main() -> int:
+    db = fetch_litellm_db(LITELLM_URL)
+    rows = extract_table_from_mdx(MDX_PATH)
+
+    failures: List[str] = []
+    validations = 0
+
+    for row in rows:
+        model = row["model"]
+        if model in SKIP_MODELS:
+            continue
+
+        key = MODEL_KEY_MAP.get(model, model)
+        entry = db.get(key)
+        if entry is None:
+            # Try a few fallbacks (provider-prefixed keys)
+            # e.g., openai/gpt-5-codex, google/gemini-2.5-pro
+            candidates = [
+                f"openai/{model}",
+                f"azure/{model}",
+                f"anthropic/{model}",
+                f"google/{model}",
+                f"gemini/{model}",
+                f"vertex_ai/{model}",
+                f"mistral/{model}",
+                f"cloudflare/{model}",
+                f"groq/{model}",
+                f"bedrock/{model}",
+            ]
+            for c in candidates:
+                if c in db:
+                    entry = db[c]
+                    key = c
+                    break
+
+        if entry is None:
+            # Not in LiteLLM DB; skip but report
+            print(f"[skip] {model}: not found in LiteLLM DB")
+            continue
+
+        # Parse MDX values
+        mdx_input_cost = parse_money(row["input_cost"])  # $ per 1M
+        mdx_cached_cost = parse_money(row["cached_input_cost"])  # $ per 1M or None
+        mdx_output_cost = parse_money(row["output_cost"])  # $ per 1M
+        mdx_max_in = parse_int(row["max_input_tokens"])  # tokens
+        mdx_max_out = parse_int(row["max_output_tokens"])  # tokens
+
+        # Compute expected from LiteLLM DB
+        llm_in_per_token = entry.get("input_cost_per_token")
+        llm_cached_per_token = entry.get("cache_read_input_token_cost")
+        llm_out_per_token = entry.get("output_cost_per_token")
+
+        exp_input_cost = to_per_million(llm_in_per_token)
+        exp_cached_cost = to_per_million(llm_cached_per_token)
+        exp_output_cost = to_per_million(llm_out_per_token)
+
+        # Compare costs (only if LLM DB provides them)
+        def add_fail(msg: str):
+            failures.append(f"{model}: {msg}")
+
+        # Input cost
+        if exp_input_cost is not None and mdx_input_cost is not None:
+            validations += 1
+            if not near(mdx_input_cost, exp_input_cost):
+                add_fail(f"input_cost mismatch: mdx={mdx_input_cost} vs litellm={exp_input_cost}")
+
+        # Cached input cost
+        if exp_cached_cost is not None or mdx_cached_cost is not None:
+            # If JSON missing but MDX has numeric, that's a mismatch; if MDX N/A and JSON missing, accept
+            validations += 1
+            if exp_cached_cost is None and mdx_cached_cost is None:
+                pass
+            elif exp_cached_cost is None and mdx_cached_cost is not None:
+                add_fail(f"cached_input_cost present in MDX but missing in LiteLLM: mdx={mdx_cached_cost}")
+            elif exp_cached_cost is not None and mdx_cached_cost is None:
+                add_fail(f"cached_input_cost missing in MDX but present in LiteLLM: litellm={exp_cached_cost}")
+            elif not near(mdx_cached_cost, exp_cached_cost):
+                add_fail(f"cached_input_cost mismatch: mdx={mdx_cached_cost} vs litellm={exp_cached_cost}")
+
+        # Output cost
+        if exp_output_cost is not None and mdx_output_cost is not None:
+            validations += 1
+            if not near(mdx_output_cost, exp_output_cost):
+                add_fail(f"output_cost mismatch: mdx={mdx_output_cost} vs litellm={exp_output_cost}")
+
+        # Token limits (compare only if LiteLLM provides the field)
+        llm_max_in = entry.get("max_input_tokens")
+        llm_max_out = entry.get("max_output_tokens")
+
+        if llm_max_in is not None and mdx_max_in is not None:
+            validations += 1
+            if mdx_max_in != int(llm_max_in):
+                add_fail(f"max_input_tokens mismatch: mdx={mdx_max_in} vs litellm={llm_max_in}")
+
+        if llm_max_out is not None and mdx_max_out is not None:
+            validations += 1
+            if mdx_max_out != int(llm_max_out):
+                add_fail(f"max_output_tokens mismatch: mdx={mdx_max_out} vs litellm={llm_max_out}")
+
+    if failures:
+        print("\nValidation FAILED:\n" + "\n".join(failures))
+        return 1
+
+    print(f"Validation passed. Checks performed: {validations}")
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())

From 4ac60791a3aed7305d7d5c7d9ef122425a938bc7 Mon Sep 17 00:00:00 2001
From: openhands <openhands@all-hands.dev>
Date: Thu, 6 Nov 2025 23:56:52 +0000
Subject: [PATCH 11/13] ci: add GH Action to validate MDX LLM pricing against
 LiteLLM DB on PRs\n\nCo-authored-by: openhands <openhands@all-hands.dev>

---
 .github/workflows/validate-llm-pricing.yml | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)
 create mode 100644 .github/workflows/validate-llm-pricing.yml

diff --git a/.github/workflows/validate-llm-pricing.yml b/.github/workflows/validate-llm-pricing.yml
new file mode 100644
index 00000000..609aa527
--- /dev/null
+++ b/.github/workflows/validate-llm-pricing.yml
@@ -0,0 +1,22 @@
+name: Validate LLM pricing table
+
+on:
+  pull_request:
+    paths:
+      - 'openhands/usage/llms/openhands-llms.mdx'
+      - '.github/scripts/validate_llm_pricing.py'
+
+jobs:
+  validate:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Check out repository
+        uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.x'
+
+      - name: Run pricing validator
+        run: python .github/scripts/validate_llm_pricing.py

From e520183af62ba182a5ce6f1e780f902838672052 Mon Sep 17 00:00:00 2001
From: openhands <openhands@all-hands.dev>
Date: Thu, 6 Nov 2025 23:59:43 +0000
Subject: [PATCH 12/13] chore(test): drop typing imports; use Python 3.12
 builtins for type hints in validator\n\nCo-authored-by: openhands
 <openhands@all-hands.dev>

---
 .github/scripts/validate_llm_pricing.py | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/.github/scripts/validate_llm_pricing.py b/.github/scripts/validate_llm_pricing.py
index ac8d6772..2f503a41 100644
--- a/.github/scripts/validate_llm_pricing.py
+++ b/.github/scripts/validate_llm_pricing.py
@@ -3,7 +3,6 @@
 import re
 import sys
 import urllib.request
-from typing import Dict, List, Optional, Tuple
 
 LITELLM_URL = "https://raw.githubusercontent.com/BerriAI/litellm/refs/heads/main/model_prices_and_context_window.json"
 MDX_PATH = "openhands/usage/llms/openhands-llms.mdx"
@@ -16,18 +15,18 @@
 }
 
 # Optional manual key mapping if MDX model name differs from LiteLLM JSON key
-MODEL_KEY_MAP: Dict[str, str] = {
+MODEL_KEY_MAP: dict[str, str] = {
     # Add mappings here only if necessary
 }
 
 
-def fetch_litellm_db(url: str) -> Dict:
+def fetch_litellm_db(url: str) -> dict:
     with urllib.request.urlopen(url, timeout=30) as resp:
         data = resp.read()
     return json.loads(data)
 
 
-def parse_money(s: str) -> Optional[float]:
+def parse_money(s: str) -> float | None:
     s = s.strip()
     if s.upper() in {"N/A", "NA", "-", "—", "--", ""}:
         return None
@@ -39,7 +38,7 @@ def parse_money(s: str) -> Optional[float]:
         return None
 
 
-def parse_int(s: str) -> Optional[int]:
+def parse_int(s: str) -> int | None:
     s = s.strip()
     if s.upper() in {"N/A", "NA", "-", "—", "--", ""}:
         return None
@@ -50,8 +49,8 @@ def parse_int(s: str) -> Optional[int]:
         return None
 
 
-def extract_table_from_mdx(path: str) -> List[Dict[str, Optional[str]]]:
-    rows: List[Dict[str, Optional[str]]] = []
+def extract_table_from_mdx(path: str) -> list[dict[str, str | None]]:
+    rows: list[dict[str, str | None]] = []
     with open(path, "r", encoding="utf-8") as f:
         lines = f.read().splitlines()
 
@@ -95,13 +94,13 @@ def extract_table_from_mdx(path: str) -> List[Dict[str, Optional[str]]]:
     return rows
 
 
-def to_per_million(val_per_token: Optional[float]) -> Optional[float]:
+def to_per_million(val_per_token: float | None) -> float | None:
     if val_per_token is None:
         return None
     return val_per_token * 1_000_000.0
 
 
-def near(a: Optional[float], b: Optional[float], tol: float = 1e-3) -> bool:
+def near(a: float | None, b: float | None, tol: float = 1e-3) -> bool:
     if a is None and b is None:
         return True
     if a is None or b is None:

From 79c7e8f656e6c9d1cd998b78c56694304dcc751b Mon Sep 17 00:00:00 2001
From: openhands <openhands@all-hands.dev>
Date: Fri, 7 Nov 2025 00:51:18 +0000
Subject: [PATCH 13/13] chore(test): restrict provider fallbacks (drop
 vertex_ai, bedrock, groq, cloudflare)\n\nCo-authored-by: openhands
 <openhands@all-hands.dev>

---
 .github/scripts/validate_llm_pricing.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/.github/scripts/validate_llm_pricing.py b/.github/scripts/validate_llm_pricing.py
index 2f503a41..6c7aa45d 100644
--- a/.github/scripts/validate_llm_pricing.py
+++ b/.github/scripts/validate_llm_pricing.py
@@ -131,11 +131,7 @@ def main() -> int:
                 f"anthropic/{model}",
                 f"google/{model}",
                 f"gemini/{model}",
-                f"vertex_ai/{model}",
                 f"mistral/{model}",
-                f"cloudflare/{model}",
-                f"groq/{model}",
-                f"bedrock/{model}",
             ]
             for c in candidates:
                 if c in db: