From f73dacc2d962c17a82c53a17be19fe6c61e73733 Mon Sep 17 00:00:00 2001 From: Bojan Babic Date: Wed, 3 Sep 2025 12:50:27 -0700 Subject: [PATCH 1/6] Add You.com as tool for browser --- README.md | 6 +- gpt-oss-mcp-server/browser_server.py | 4 +- gpt-oss-mcp-server/reference-system-prompt.py | 4 +- gpt_oss/chat.py | 4 +- gpt_oss/responses_api/api_server.py | 4 +- gpt_oss/tools/simple_browser/__init__.py | 3 +- gpt_oss/tools/simple_browser/backend.py | 112 ++++++++++++++++-- .../tools/simple_browser/test_backend.py | 64 ++++++++++ 8 files changed, 181 insertions(+), 20 deletions(-) create mode 100644 tests/gpt_oss/tools/simple_browser/test_backend.py diff --git a/README.md b/README.md index c4612bca..5d2bf140 100644 --- a/README.md +++ b/README.md @@ -426,7 +426,7 @@ codex -p oss ### Browser > [!WARNING] -> This implementation is purely for educational purposes and should not be used in production. You should implement your own equivalent of the [`ExaBackend`](gpt_oss/tools/simple_browser/backend.py) class with your own browsing environment. +> This implementation is purely for educational purposes and should not be used in production. You should implement your own equivalent of the [`YouComBackend`](gpt_oss/tools/simple_browser/backend.py) class with your own browsing environment. Both gpt-oss models were trained with the capability to browse using the `browser` tool that exposes the following three methods: @@ -441,13 +441,13 @@ To enable the browser tool, you'll have to place the definition into the `system ```python import datetime from gpt_oss.tools.simple_browser import SimpleBrowserTool -from gpt_oss.tools.simple_browser.backend import ExaBackend +from gpt_oss.tools.simple_browser.backend import YouComBackend from openai_harmony import SystemContent, Message, Conversation, Role, load_harmony_encoding, HarmonyEncodingName encoding = load_harmony_encoding(HarmonyEncodingName.HARMONY_GPT_OSS) # Exa backend requires you to have set the EXA_API_KEY environment variable -backend = ExaBackend( +backend = YouComBackend( source="web", ) browser_tool = SimpleBrowserTool(backend=backend) diff --git a/gpt-oss-mcp-server/browser_server.py b/gpt-oss-mcp-server/browser_server.py index 5d5ad4ad..423a8454 100644 --- a/gpt-oss-mcp-server/browser_server.py +++ b/gpt-oss-mcp-server/browser_server.py @@ -5,7 +5,7 @@ from mcp.server.fastmcp import Context, FastMCP from gpt_oss.tools.simple_browser import SimpleBrowserTool -from gpt_oss.tools.simple_browser.backend import ExaBackend +from gpt_oss.tools.simple_browser.backend import YouComBackend @dataclass @@ -14,7 +14,7 @@ class AppContext: def create_or_get_browser(self, session_id: str) -> SimpleBrowserTool: if session_id not in self.browsers: - backend = ExaBackend(source="web") + backend = YouComBackend(source="web") self.browsers[session_id] = SimpleBrowserTool(backend=backend) return self.browsers[session_id] diff --git a/gpt-oss-mcp-server/reference-system-prompt.py b/gpt-oss-mcp-server/reference-system-prompt.py index 98f171dd..6ddbf7c9 100644 --- a/gpt-oss-mcp-server/reference-system-prompt.py +++ b/gpt-oss-mcp-server/reference-system-prompt.py @@ -1,7 +1,7 @@ import datetime from gpt_oss.tools.simple_browser import SimpleBrowserTool -from gpt_oss.tools.simple_browser.backend import ExaBackend +from gpt_oss.tools.simple_browser.backend import YouComBackend from gpt_oss.tools.python_docker.docker_tool import PythonTool from gpt_oss.tokenizer import tokenizer @@ -22,7 +22,7 @@ ReasoningEffort.LOW).with_conversation_start_date( datetime.datetime.now().strftime("%Y-%m-%d"))) -backend = ExaBackend(source="web", ) +backend = YouComBackend(source="web") browser_tool = SimpleBrowserTool(backend=backend) system_message_content = system_message_content.with_tools( browser_tool.tool_config) diff --git a/gpt_oss/chat.py b/gpt_oss/chat.py index 5e40079d..4856a397 100644 --- a/gpt_oss/chat.py +++ b/gpt_oss/chat.py @@ -19,7 +19,7 @@ from gpt_oss.tools import apply_patch from gpt_oss.tools.simple_browser import SimpleBrowserTool -from gpt_oss.tools.simple_browser.backend import ExaBackend +from gpt_oss.tools.simple_browser.backend import YouComBackend from gpt_oss.tools.python_docker.docker_tool import PythonTool from openai_harmony import ( @@ -85,7 +85,7 @@ def main(args): ) if args.browser: - backend = ExaBackend( + backend = YouComBackend( source="web", ) browser_tool = SimpleBrowserTool(backend=backend) diff --git a/gpt_oss/responses_api/api_server.py b/gpt_oss/responses_api/api_server.py index 2934b011..37fddd7a 100644 --- a/gpt_oss/responses_api/api_server.py +++ b/gpt_oss/responses_api/api_server.py @@ -20,7 +20,7 @@ from gpt_oss.tools.python_docker.docker_tool import PythonTool from gpt_oss.tools.simple_browser import SimpleBrowserTool -from gpt_oss.tools.simple_browser.backend import ExaBackend +from gpt_oss.tools.simple_browser.backend import YouComBackend from .events import ( ResponseCodeInterpreterCallCompleted, @@ -904,7 +904,7 @@ async def generate(body: ResponsesRequest, request: Request): ) if use_browser_tool: - backend = ExaBackend( + backend = YouComBackend( source="web", ) browser_tool = SimpleBrowserTool(backend=backend) diff --git a/gpt_oss/tools/simple_browser/__init__.py b/gpt_oss/tools/simple_browser/__init__.py index 9043cb18..da3ff280 100644 --- a/gpt_oss/tools/simple_browser/__init__.py +++ b/gpt_oss/tools/simple_browser/__init__.py @@ -1,7 +1,8 @@ from .simple_browser_tool import SimpleBrowserTool -from .backend import ExaBackend +from .backend import ExaBackend, YouComBackend __all__ = [ "SimpleBrowserTool", "ExaBackend", + "YouComBackend", ] diff --git a/gpt_oss/tools/simple_browser/backend.py b/gpt_oss/tools/simple_browser/backend.py index 03bdf566..cab2eff3 100644 --- a/gpt_oss/tools/simple_browser/backend.py +++ b/gpt_oss/tools/simple_browser/backend.py @@ -3,6 +3,7 @@ """ import functools +import asyncio import logging import os from abc import abstractmethod @@ -87,6 +88,24 @@ async def search( async def fetch(self, url: str, session: ClientSession) -> PageContents: pass + async def _post(self, session: ClientSession, endpoint: str, payload: dict) -> dict: + headers = {"x-api-key": self._get_api_key()} + async with session.post(f"{self.BASE_URL}{endpoint}", json=payload, headers=headers) as resp: + if resp.status != 200: + raise BackendError( + f"{self.__class__.__name__} error {resp.status}: {await resp.text()}" + ) + return await resp.json() + + async def _get(self, session: ClientSession, endpoint: str, params: dict) -> dict: + headers = {"x-api-key": self._get_api_key()} + async with session.get(f"{self.BASE_URL}{endpoint}", params=params, headers=headers) as resp: + if resp.status != 200: + raise BackendError( + f"{self.__class__.__name__} error {resp.status}: {await resp.text()}" + ) + return await resp.json() + @chz.chz(typecheck=True) class ExaBackend(Backend): @@ -106,14 +125,6 @@ def _get_api_key(self) -> str: raise BackendError("Exa API key not provided") return key - async def _post(self, session: ClientSession, endpoint: str, payload: dict) -> dict: - headers = {"x-api-key": self._get_api_key()} - async with session.post(f"{self.BASE_URL}{endpoint}", json=payload, headers=headers) as resp: - if resp.status != 200: - raise BackendError( - f"Exa API error {resp.status}: {await resp.text()}" - ) - return await resp.json() async def search( self, query: str, topn: int, session: ClientSession @@ -164,3 +175,88 @@ async def fetch(self, url: str, session: ClientSession) -> PageContents: display_urls=True, session=session, ) + +@chz.chz(typecheck=True) +class YouComBackend(Backend): + """Backend that uses the You.com Search API.""" + + source: str = chz.field(doc="Description of the backend source") + + BASE_URL: str = "https://api.ydc-index.io" + + def _get_api_key(self) -> str: + key = os.environ.get("YDC_API_KEY") + if not key: + raise BackendError("You.com API key not provided") + return key + + + async def search( + self, query: str, topn: int, session: ClientSession + ) -> PageContents: + data = await self._get( + session, + "/v1/search", + {"query": query, "count": topn}, + ) + # make a simple HTML page to work with browser format + web_titles_and_urls, news_titles_and_urls = [], [] + if "web" in data["results"]: + web_titles_and_urls = [ + (result["title"], result["url"], result["snippets"]) + for result in data["results"]["web"] + ] + if "news" in data["results"]: + news_titles_and_urls = [ + (result["title"], result["url"], result["description"]) + for result in data["results"]["news"] + ] + titles_and_urls = web_titles_and_urls + news_titles_and_urls + html_page = f""" + +

Search Results

+ + +""" + + return process_html( + html=html_page, + url="", + title=query, + display_urls=True, + session=session, + ) + + async def fetch(self, url: str, session: ClientSession) -> PageContents: + is_view_source = url.startswith(VIEW_SOURCE_PREFIX) + if is_view_source: + url = url[len(VIEW_SOURCE_PREFIX) :] + data = await self._post( + session, + "/v1/contents", + {"urls": [url], "livecrawl_formats": "html"}, + ) + if not data: + raise BackendError(f"No contents returned for {url}") + if "html" not in data[0]: + raise BackendError(f"No HTML returned for {url}") + return process_html( + html=data[0].get("html", ""), + url=url, + title=data[0].get("title", ""), + display_urls=True, + session=session, + ) + + +async def main(): + backend = YouComBackend(source="web") + async with ClientSession() as session: + response = await backend.search(query="taylor swift", topn=10, session=session) + fetch_response = await backend.fetch(url="https://www.imdb.com/name/nm2357847/", session=session) + return response, fetch_response + +if __name__ == "__main__": + response, fetch_response = asyncio.run(main()) \ No newline at end of file diff --git a/tests/gpt_oss/tools/simple_browser/test_backend.py b/tests/gpt_oss/tools/simple_browser/test_backend.py new file mode 100644 index 00000000..dc7306af --- /dev/null +++ b/tests/gpt_oss/tools/simple_browser/test_backend.py @@ -0,0 +1,64 @@ +import pytest +from typing import Generator, Any +from unittest import mock +from aiohttp import ClientSession + +from gpt_oss.tools.simple_browser.backend import YouComBackend + +class MockAiohttpResponse: + """Mocks responses for get/post requests from async libraries.""" + + def __init__(self, json: dict, status: int): + self._json = json + self.status = status + + async def json(self): + return self._json + + async def __aexit__(self, exc_type, exc, tb): + pass + + async def __aenter__(self): + return self + +def test_youcom_backend(): + backend = YouComBackend(source="web") + assert backend.source == "web" + +@pytest.mark.asyncio +@mock.patch("aiohttp.ClientSession.get") +async def test_youcom_backend_search(mock_session_get): + backend = YouComBackend(source="web") + api_response = { + "results": { + "web": [ + {"title": "Web Result 1", "url": "https://www.example.com/web1", "snippets": "Web Result 1 snippets"}, + {"title": "Web Result 2", "url": "https://www.example.com/web2", "snippets": "Web Result 2 snippets"}, + ], + "news": [ + {"title": "News Result 1", "url": "https://www.example.com/news1", "description": "News Result 1 description"}, + {"title": "News Result 2", "url": "https://www.example.com/news2", "description": "News Result 2 description"}, + ], + } + } + mock_session_get.return_value = MockAiohttpResponse(api_response, 200) + async with ClientSession() as session: + result = await backend.search(query="test", topn=10, session=session) + assert result.title == "test" + assert result.urls == {"0": "https://www.example.com/web1", "1": "https://www.example.com/web2", "2": "https://www.example.com/news1", "3": "https://www.example.com/news2"} + +@pytest.mark.asyncio +@mock.patch("aiohttp.ClientSession.post") +async def test_youcom_backend_fetch(mock_session_get): + backend = YouComBackend(source="web") + api_response = [ + {"title": "Fetch Result 1", "url": "https://www.example.com/fetch1", "html": "
Fetch Result 1 text
"}, + ] + mock_session_get.return_value = MockAiohttpResponse(api_response, 200) + async with ClientSession() as session: + result = await backend.fetch(url="https://www.example.com/fetch1", session=session) + assert result.title == "Fetch Result 1" + assert result.text == "\nURL: https://www.example.com/fetch1\nFetch Result 1 text" + + + \ No newline at end of file From ebfe2b414eea07fda1eefeabb1040e301c158e2c Mon Sep 17 00:00:00 2001 From: Bojan Babic Date: Wed, 3 Sep 2025 12:58:17 -0700 Subject: [PATCH 2/6] change key name --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 5d2bf140..26fef4d9 100644 --- a/README.md +++ b/README.md @@ -446,7 +446,7 @@ from openai_harmony import SystemContent, Message, Conversation, Role, load_harm encoding = load_harmony_encoding(HarmonyEncodingName.HARMONY_GPT_OSS) -# Exa backend requires you to have set the EXA_API_KEY environment variable +# Exa backend requires you to have set the YDC_API_KEY environment variable backend = YouComBackend( source="web", ) From 9930a0a3e15f9544b5f49cad72710ccb3d1f731c Mon Sep 17 00:00:00 2001 From: Bojan Babic Date: Wed, 3 Sep 2025 13:07:22 -0700 Subject: [PATCH 3/6] update tests in order to mock API key --- .../tools/simple_browser/test_backend.py | 26 ++++++++++++------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/tests/gpt_oss/tools/simple_browser/test_backend.py b/tests/gpt_oss/tools/simple_browser/test_backend.py index dc7306af..ab0dc780 100644 --- a/tests/gpt_oss/tools/simple_browser/test_backend.py +++ b/tests/gpt_oss/tools/simple_browser/test_backend.py @@ -21,6 +21,10 @@ async def __aexit__(self, exc_type, exc, tb): async def __aenter__(self): return self +def mock_os_environ_get(name: str, default: Any = "test_api_key"): + assert name in ["YDC_API_KEY"] + return default + def test_youcom_backend(): backend = YouComBackend(source="web") assert backend.source == "web" @@ -41,11 +45,12 @@ async def test_youcom_backend_search(mock_session_get): ], } } - mock_session_get.return_value = MockAiohttpResponse(api_response, 200) - async with ClientSession() as session: - result = await backend.search(query="test", topn=10, session=session) - assert result.title == "test" - assert result.urls == {"0": "https://www.example.com/web1", "1": "https://www.example.com/web2", "2": "https://www.example.com/news1", "3": "https://www.example.com/news2"} + with mock.patch("os.environ.get", wraps=mock_os_environ_get): + mock_session_get.return_value = MockAiohttpResponse(api_response, 200) + async with ClientSession() as session: + result = await backend.search(query="test", topn=10, session=session) + assert result.title == "test" + assert result.urls == {"0": "https://www.example.com/web1", "1": "https://www.example.com/web2", "2": "https://www.example.com/news1", "3": "https://www.example.com/news2"} @pytest.mark.asyncio @mock.patch("aiohttp.ClientSession.post") @@ -54,11 +59,12 @@ async def test_youcom_backend_fetch(mock_session_get): api_response = [ {"title": "Fetch Result 1", "url": "https://www.example.com/fetch1", "html": "
Fetch Result 1 text
"}, ] - mock_session_get.return_value = MockAiohttpResponse(api_response, 200) - async with ClientSession() as session: - result = await backend.fetch(url="https://www.example.com/fetch1", session=session) - assert result.title == "Fetch Result 1" - assert result.text == "\nURL: https://www.example.com/fetch1\nFetch Result 1 text" + with mock.patch("os.environ.get", wraps=mock_os_environ_get): + mock_session_get.return_value = MockAiohttpResponse(api_response, 200) + async with ClientSession() as session: + result = await backend.fetch(url="https://www.example.com/fetch1", session=session) + assert result.title == "Fetch Result 1" + assert result.text == "\nURL: https://www.example.com/fetch1\nFetch Result 1 text" \ No newline at end of file From 4a329bbe3b920f5e8173e418141fadacccbdded8 Mon Sep 17 00:00:00 2001 From: Bojan Babic Date: Wed, 3 Sep 2025 14:56:07 -0700 Subject: [PATCH 4/6] address changes --- README.md | 4 ++-- gpt-oss-mcp-server/browser_server.py | 12 +++++++++--- gpt_oss/responses_api/api_server.py | 13 +++++++++---- gpt_oss/tools/simple_browser/backend.py | 10 ---------- 4 files changed, 20 insertions(+), 19 deletions(-) diff --git a/README.md b/README.md index 26fef4d9..ac788b28 100644 --- a/README.md +++ b/README.md @@ -426,7 +426,7 @@ codex -p oss ### Browser > [!WARNING] -> This implementation is purely for educational purposes and should not be used in production. You should implement your own equivalent of the [`YouComBackend`](gpt_oss/tools/simple_browser/backend.py) class with your own browsing environment. +> This implementation is purely for educational purposes and should not be used in production. You should implement your own equivalent of the [`YouComBackend`](gpt_oss/tools/simple_browser/backend.py) class with your own browsing environment. Currently we have available `YouComBackend` and `ExaBackend`. Both gpt-oss models were trained with the capability to browse using the `browser` tool that exposes the following three methods: @@ -446,7 +446,7 @@ from openai_harmony import SystemContent, Message, Conversation, Role, load_harm encoding = load_harmony_encoding(HarmonyEncodingName.HARMONY_GPT_OSS) -# Exa backend requires you to have set the YDC_API_KEY environment variable +# You.com backend requires you to have set the YDC_API_KEY environment variable backend = YouComBackend( source="web", ) diff --git a/gpt-oss-mcp-server/browser_server.py b/gpt-oss-mcp-server/browser_server.py index 423a8454..b37a63a6 100644 --- a/gpt-oss-mcp-server/browser_server.py +++ b/gpt-oss-mcp-server/browser_server.py @@ -1,3 +1,4 @@ +import os from collections.abc import AsyncIterator from contextlib import asynccontextmanager from dataclasses import dataclass, field @@ -5,8 +6,7 @@ from mcp.server.fastmcp import Context, FastMCP from gpt_oss.tools.simple_browser import SimpleBrowserTool -from gpt_oss.tools.simple_browser.backend import YouComBackend - +from gpt_oss.tools.simple_browser.backend import YouComBackend, ExaBackend @dataclass class AppContext: @@ -14,7 +14,13 @@ class AppContext: def create_or_get_browser(self, session_id: str) -> SimpleBrowserTool: if session_id not in self.browsers: - backend = YouComBackend(source="web") + tool_backend = os.getenv("BROWSER_BACKEND", "exa") + if tool_backend == "youcom": + backend = YouComBackend(source="web") + elif tool_backend == "exa": + backend = ExaBackend(source="web") + else: + raise ValueError(f"Invalid tool backend: {tool_backend}") self.browsers[session_id] = SimpleBrowserTool(backend=backend) return self.browsers[session_id] diff --git a/gpt_oss/responses_api/api_server.py b/gpt_oss/responses_api/api_server.py index 37fddd7a..8eb053f1 100644 --- a/gpt_oss/responses_api/api_server.py +++ b/gpt_oss/responses_api/api_server.py @@ -1,3 +1,4 @@ +import os import datetime import uuid from typing import Callable, Literal, Optional @@ -20,7 +21,7 @@ from gpt_oss.tools.python_docker.docker_tool import PythonTool from gpt_oss.tools.simple_browser import SimpleBrowserTool -from gpt_oss.tools.simple_browser.backend import YouComBackend +from gpt_oss.tools.simple_browser.backend import YouComBackend, ExaBackend from .events import ( ResponseCodeInterpreterCallCompleted, @@ -904,9 +905,13 @@ async def generate(body: ResponsesRequest, request: Request): ) if use_browser_tool: - backend = YouComBackend( - source="web", - ) + tool_backend = os.getenv("BROWSER_BACKEND", "exa") + if tool_backend == "youcom": + backend = YouComBackend(source="web") + elif tool_backend == "exa": + backend = ExaBackend(source="web") + else: + raise ValueError(f"Invalid tool backend: {tool_backend}") browser_tool = SimpleBrowserTool(backend=backend) else: browser_tool = None diff --git a/gpt_oss/tools/simple_browser/backend.py b/gpt_oss/tools/simple_browser/backend.py index cab2eff3..33daf8d6 100644 --- a/gpt_oss/tools/simple_browser/backend.py +++ b/gpt_oss/tools/simple_browser/backend.py @@ -250,13 +250,3 @@ async def fetch(self, url: str, session: ClientSession) -> PageContents: session=session, ) - -async def main(): - backend = YouComBackend(source="web") - async with ClientSession() as session: - response = await backend.search(query="taylor swift", topn=10, session=session) - fetch_response = await backend.fetch(url="https://www.imdb.com/name/nm2357847/", session=session) - return response, fetch_response - -if __name__ == "__main__": - response, fetch_response = asyncio.run(main()) \ No newline at end of file From e789548f658715abbf1c83d6f8d25be9d2013101 Mon Sep 17 00:00:00 2001 From: Bojan Babic Date: Wed, 3 Sep 2025 14:59:32 -0700 Subject: [PATCH 5/6] address changes --- README.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index ac788b28..84fcf644 100644 --- a/README.md +++ b/README.md @@ -446,7 +446,9 @@ from openai_harmony import SystemContent, Message, Conversation, Role, load_harm encoding = load_harmony_encoding(HarmonyEncodingName.HARMONY_GPT_OSS) -# You.com backend requires you to have set the YDC_API_KEY environment variable +# Depending on the choice of the browser backend you need corresponding env variables setup +# In case you use You.com backend requires you to have set the YDC_API_KEY environment variable, +# while for Exa you might need EXA_API_KEY environment variable set backend = YouComBackend( source="web", ) From ed5b48d9b986e26655e9ec8198570dbfa070b50f Mon Sep 17 00:00:00 2001 From: Bojan Babic Date: Wed, 3 Sep 2025 15:02:39 -0700 Subject: [PATCH 6/6] update README --- README.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/README.md b/README.md index 84fcf644..0104cec4 100644 --- a/README.md +++ b/README.md @@ -452,6 +452,9 @@ encoding = load_harmony_encoding(HarmonyEncodingName.HARMONY_GPT_OSS) backend = YouComBackend( source="web", ) +# backend = ExaBackend( +# source="web", +# ) browser_tool = SimpleBrowserTool(backend=backend) # create a basic system prompt