diff --git a/README.md b/README.md index 798f73b..96669cb 100644 --- a/README.md +++ b/README.md @@ -11,13 +11,13 @@ A [Python SDK](https://docs.linkup.so/pages/sdk/python/python) for the ## 🌟 Features - ✅ **Simple and intuitive API client.** -- 🔍 **Supports both standard and deep search queries.** +- 🔍 **Support all Linkup entrypoints and parameters.** - ⚡ **Supports synchronous and asynchronous requests.** - 🔒 **Handles authentication and request management.** ## 📦 Installation -Simply install the Linkup Python SDK using `pip`: +Simply install the Linkup Python SDK as any Python package, for instance using `pip`: ```bash pip install linkup-sdk @@ -66,7 +66,10 @@ pip install linkup-sdk ### 📋 Examples -All search queries can be used with two very different modes: +#### 📝 Search + +The `search` function can be used to performs web searches. It supports two very different +complexity modes: - with `depth="standard"`, the search will be straightforward and fast, suited for relatively simple queries (e.g. "What's the weather in Paris today?") @@ -74,22 +77,63 @@ All search queries can be used with two very different modes: but it will be able to solve more complex queries (e.g. "What is the company profile of LangChain accross the last few years, and how does it compare to its concurrents?") -#### 📝 Standard Search Query +The `search` function also supports three output types: -```python -from linkup import LinkupClient +- with `output_type="searchResults"`, the search will return a list of relevant documents +- with `output_type="sourcedAnswer"`, the search will return a concise answer with sources +- with `output_type="structured"`, the search will return a structured output according to a + user-defined schema -# Initialize the client (API key can be read from the environment variable or passed as an argument) -client = LinkupClient() +```python +from linkup import LinkupClient, LinkupSourcedAnswer +from typing import Any -# Perform a search query -search_response = client.search( +client = LinkupClient() # API key can be read from the environment variable or passed as an argument +search_response: Any = client.search( query="What are the 3 major events in the life of Abraham Lincoln?", depth="deep", # "standard" or "deep" output_type="sourcedAnswer", # "searchResults" or "sourcedAnswer" or "structured" structured_output_schema=None, # must be filled if output_type is "structured" ) -print(search_response) +assert isinstance(search_response, LinkupSourcedAnswer) +print(search_response.model_dump()) +# Response: +# { +# answer="The three major events in the life of Abraham Lincoln are: 1. ...", +# sources=[ +# { +# "name": "HISTORY", +# "url": "https://www.history.com/topics/us-presidents/abraham-lincoln", +# "snippet": "Abraham Lincoln - Facts & Summary - HISTORY ..." +# }, +# ... +# ] +# } +``` + +#### 🪝 Fetch + +The `fetch` function can be used to retrieve the content of a given web page in a cleaned up +markdown format. + +You can use the `render_js` flag to execute the JavaScript code of the page before returning the +content, and ask to `include_raw_html` to the response if you feel like it. + +```python +from linkup import LinkupClient, LinkupFetchResponse + +client = LinkupClient() # API key can be read from the environment variable or passed as an argument +fetch_response: LinkupFetchResponse = client.fetch( + url="https://docs.linkup.so", + render_js=False, + include_raw_html=True, +) +print(fetch_response.model_dump()) +# Response: +# { +# markdown="Get started for free, no credit card required...", +# raw_html="......" +# } ``` #### 📚 More Examples diff --git a/examples/5_fetch.py b/examples/5_fetch.py new file mode 100644 index 0000000..9c1b4d6 --- /dev/null +++ b/examples/5_fetch.py @@ -0,0 +1,17 @@ +""" +The Linkup fetch can output the raw content of a web page. +""" + +from dotenv import load_dotenv +from rich import print + +from linkup import LinkupClient + +load_dotenv() +client = LinkupClient() + +response = client.fetch( + url="https://docs.linkup.so", + render_js=False, +) +print(response) diff --git a/src/linkup/__init__.py b/src/linkup/__init__.py index 22dd0b3..82b5c7f 100644 --- a/src/linkup/__init__.py +++ b/src/linkup/__init__.py @@ -1,15 +1,16 @@ from ._version import __version__ -from .client import ( - LinkupClient, -) +from .client import LinkupClient from .errors import ( LinkupAuthenticationError, + LinkupFailedFetchError, LinkupInsufficientCreditError, LinkupInvalidRequestError, LinkupNoResultError, + LinkupTooManyRequestsError, LinkupUnknownError, ) from .types import ( + LinkupFetchResponse, LinkupSearchImageResult, LinkupSearchResults, LinkupSearchTextResult, @@ -21,13 +22,16 @@ "__version__", "LinkupClient", "LinkupAuthenticationError", + "LinkupFailedFetchError", + "LinkupInsufficientCreditError", "LinkupInvalidRequestError", - "LinkupUnknownError", "LinkupNoResultError", - "LinkupInsufficientCreditError", - "LinkupSearchTextResult", + "LinkupTooManyRequestsError", + "LinkupUnknownError", + "LinkupFetchResponse", "LinkupSearchImageResult", "LinkupSearchResults", + "LinkupSearchTextResult", "LinkupSource", "LinkupSourcedAnswer", ] diff --git a/src/linkup/client.py b/src/linkup/client.py index a5e6fe5..90c6823 100644 --- a/src/linkup/client.py +++ b/src/linkup/client.py @@ -9,13 +9,14 @@ from linkup._version import __version__ from linkup.errors import ( LinkupAuthenticationError, + LinkupFailedFetchError, LinkupInsufficientCreditError, LinkupInvalidRequestError, LinkupNoResultError, LinkupTooManyRequestsError, LinkupUnknownError, ) -from linkup.types import LinkupSearchResults, LinkupSourcedAnswer +from linkup.types import LinkupFetchResponse, LinkupSearchResults, LinkupSourcedAnswer class LinkupClient: @@ -115,9 +116,9 @@ def search( timeout=None, ) if response.status_code != 200: - self._raise_linkup_error(response) + self._raise_linkup_error(response=response) - return self._validate_search_response( + return self._parse_search_response( response=response, output_type=output_type, structured_output_schema=structured_output_schema, @@ -191,14 +192,80 @@ async def async_search( timeout=None, ) if response.status_code != 200: - self._raise_linkup_error(response) + self._raise_linkup_error(response=response) - return self._validate_search_response( + return self._parse_search_response( response=response, output_type=output_type, structured_output_schema=structured_output_schema, ) + def fetch( + self, + url: str, + render_js: bool = False, + include_raw_html: bool = False, + ) -> LinkupFetchResponse: + """Fetch the content of a web page. + + Args: + url: The URL of the web page to fetch. + render_js: Whether the API should render the JavaScript of the webpage. + include_raw_html: Whether to include the raw HTML of the webpage in the response. + + Returns: + The response of the web page fetch, containing the web page content. + """ + params: dict[str, Union[str, bool]] = self._get_fetch_params( + url=url, + render_js=render_js, + include_raw_html=include_raw_html, + ) + + response: httpx.Response = self._request( + method="POST", + url="/fetch", + json=params, + timeout=None, + ) + if response.status_code != 200: + self._raise_linkup_error(response=response) + + return self._parse_fetch_response(response=response) + + async def async_fetch( + self, + url: str, + render_js: bool = False, + include_raw_html: bool = False, + ) -> LinkupFetchResponse: + """Asynchronously fetch the content of a web page. + + Args: + url: The URL of the web page to fetch. + render_js: Whether the API should render the JavaScript of the webpage. + include_raw_html: Whether to include the raw HTML of the webpage in the response. + + Returns: + The response of the web page fetch, containing the web page content. + """ + params: dict[str, Union[str, bool]] = self._get_fetch_params( + url=url, + render_js=render_js, + include_raw_html=include_raw_html, + ) + + response: httpx.Response = await self._async_request( + method="POST", + url="/fetch", + json=params, + timeout=None, + ) + if response.status_code != 200: + self._raise_linkup_error(response=response) + + return self._parse_fetch_response(response=response) + def _user_agent(self) -> str: # pragma: no cover return f"Linkup-Python/{self.__version__}" @@ -240,10 +307,9 @@ def _raise_linkup_error(self, response: httpx.Response) -> None: if "error" in error_data: error = error_data["error"] code = error.get("code", "") - message = error.get("message", "") + error_msg = error.get("message", "") details = error.get("details", []) - error_msg = f"{message}" if details and isinstance(details, list): for detail in details: if isinstance(detail, dict): @@ -258,6 +324,12 @@ def _raise_linkup_error(self, response: httpx.Response) -> None: "Try rephrasing you query.\n" f"Original error message: {error_msg}." ) + if code == "FETCH_ERROR": + raise LinkupFailedFetchError( + "The Linkup API returned a fetch error (400). " + "The provided URL might not be found or can't be fetched.\n" + f"Original error message: {error_msg}." + ) else: raise LinkupInvalidRequestError( "The Linkup API returned an invalid request error (400). Make sure the " @@ -341,7 +413,19 @@ def _get_search_params( toDate=to_date.isoformat() if to_date is not None else date.today().isoformat(), ) - def _validate_search_response( + def _get_fetch_params( + self, + url: str, + render_js: bool, + include_raw_html: bool = False, + ) -> dict[str, Union[str, bool]]: + return dict( + url=url, + renderJs=render_js, + includeRawHtml=include_raw_html, + ) + + def _parse_search_response( self, response: httpx.Response, output_type: Literal["searchResults", "sourcedAnswer", "structured"], @@ -363,3 +447,6 @@ def _validate_search_response( if output_base_model is None: return response_data return output_base_model.model_validate(response_data) + + def _parse_fetch_response(self, response: httpx.Response) -> LinkupFetchResponse: + return LinkupFetchResponse.model_validate(response.json()) diff --git a/src/linkup/errors.py b/src/linkup/errors.py index eb5f71f..5d6c108 100644 --- a/src/linkup/errors.py +++ b/src/linkup/errors.py @@ -44,6 +44,16 @@ class LinkupTooManyRequestsError(Exception): pass +class LinkupFailedFetchError(Exception): + """Failed fetch error, raised when the Linkup API search returns a 400 status code. + + It is returned when the Linkup API failed to fetch the content of an URL due to technical + reasons. + """ + + pass + + class LinkupUnknownError(Exception): """Unknown error, raised when the Linkup API returns an unknown status code.""" diff --git a/src/linkup/types.py b/src/linkup/types.py index c975bea..f5de4f5 100644 --- a/src/linkup/types.py +++ b/src/linkup/types.py @@ -1,6 +1,6 @@ -from typing import Literal, Union +from typing import Literal, Optional, Union -from pydantic import BaseModel +from pydantic import BaseModel, ConfigDict, Field class LinkupSearchTextResult(BaseModel): @@ -72,3 +72,18 @@ class LinkupSourcedAnswer(BaseModel): answer: str sources: list[LinkupSource] + + +class LinkupFetchResponse(BaseModel): + """ + The response from a Linkup web page fetch. + + Attributes: + markdown: The cleaned up markdown content. + raw_html: The optional raw HTML content. + """ + + model_config = ConfigDict(populate_by_name=True) + + markdown: str + raw_html: Optional[str] = Field(default=None, validation_alias="rawHtml") diff --git a/tests/unit/client_test.py b/tests/unit/client_test.py index 886781f..2b32f7f 100644 --- a/tests/unit/client_test.py +++ b/tests/unit/client_test.py @@ -17,11 +17,12 @@ LinkupUnknownError, ) from linkup.errors import ( + LinkupFailedFetchError, LinkupInsufficientCreditError, LinkupNoResultError, LinkupTooManyRequestsError, ) -from linkup.types import LinkupSearchImageResult, LinkupSearchTextResult +from linkup.types import LinkupFetchResponse, LinkupSearchImageResult, LinkupSearchTextResult class Company(BaseModel): @@ -437,3 +438,170 @@ async def test_async_search_error( with pytest.raises(expected_exception): await client.async_search(query="query", depth="standard", output_type="searchResults") request_mock.assert_called_once() + + +test_fetch_parameters = [ + ( + {"url": "https://example.com"}, + {"url": "https://example.com", "renderJs": False, "includeRawHtml": False}, + b'{"markdown": "Some web page content"}', + LinkupFetchResponse(markdown="Some web page content", raw_html=None), + ), + ( + {"url": "https://example.com", "render_js": True}, + {"url": "https://example.com", "renderJs": True, "includeRawHtml": False}, + b'{"markdown": "#Some web page content"}', + LinkupFetchResponse(markdown="#Some web page content", raw_html=None), + ), + ( + {"url": "https://example.com", "include_raw_html": True}, + {"url": "https://example.com", "renderJs": False, "includeRawHtml": True}, + b'{"markdown": "#Some web page content", "rawHtml": "..."}', + LinkupFetchResponse(markdown="#Some web page content", raw_html="..."), + ), +] + + +@pytest.mark.parametrize( + "fetch_kwargs, expected_request_params, mock_request_response_content, expected_fetch_response", + test_fetch_parameters, +) +def test_fetch( + mocker: MockerFixture, + client: LinkupClient, + fetch_kwargs: dict[str, Any], + expected_request_params: dict[str, Any], + mock_request_response_content: bytes, + expected_fetch_response: Any, +) -> None: + request_mock = mocker.patch( + "linkup.client.LinkupClient._request", + return_value=Response( + status_code=200, + content=mock_request_response_content, + ), + ) + + fetch_response: Any = client.fetch(**fetch_kwargs) + request_mock.assert_called_once_with( + method="POST", + url="/fetch", + json=expected_request_params, + timeout=None, + ) + assert fetch_response == expected_fetch_response + + +@pytest.mark.asyncio +@pytest.mark.parametrize( + "fetch_kwargs, expected_request_params, mock_request_response_content, expected_fetch_response", + test_fetch_parameters, +) +async def test_async_fetch( + mocker: MockerFixture, + client: LinkupClient, + fetch_kwargs: dict[str, Any], + expected_request_params: dict[str, Any], + mock_request_response_content: bytes, + expected_fetch_response: Any, +) -> None: + request_mock = mocker.patch( + "linkup.client.LinkupClient._async_request", + return_value=Response( + status_code=200, + content=mock_request_response_content, + ), + ) + + fetch_response: Any = await client.async_fetch(**fetch_kwargs) + request_mock.assert_called_once_with( + method="POST", + url="/fetch", + json=expected_request_params, + timeout=None, + ) + assert fetch_response == expected_fetch_response + + +test_fetch_error_parameters = [ + ( + 400, + b""" + { + "error": { + "code": "FETCH_ERROR", + "message": "Could not fetch the URL", + "details": [] + } + } + """, + LinkupFailedFetchError, + ), + ( + 400, + b""" + { + "error": { + "code": "VALIDATION_ERROR", + "message": "Validation failed", + "details": [ + { + "field": "url", + "message": "url must be a valid URL" + } + ] + } + } + """, + LinkupInvalidRequestError, + ), +] + + +@pytest.mark.parametrize( + "mock_request_response_status_code, mock_request_response_content, expected_exception", + test_fetch_error_parameters, +) +def test_fetch_error( + mocker: MockerFixture, + client: LinkupClient, + mock_request_response_status_code: int, + mock_request_response_content: bytes, + expected_exception: Any, +) -> None: + request_mock = mocker.patch( + "linkup.client.LinkupClient._request", + return_value=Response( + status_code=mock_request_response_status_code, + content=mock_request_response_content, + ), + ) + + with pytest.raises(expected_exception): + client.fetch(url="https://example.com") + request_mock.assert_called_once() + + +@pytest.mark.asyncio +@pytest.mark.parametrize( + "mock_request_response_status_code, mock_request_response_content, expected_exception", + test_fetch_error_parameters, +) +async def test_async_fetch_error( + mocker: MockerFixture, + client: LinkupClient, + mock_request_response_status_code: int, + mock_request_response_content: bytes, + expected_exception: Any, +) -> None: + request_mock = mocker.patch( + "linkup.client.LinkupClient._async_request", + return_value=Response( + status_code=mock_request_response_status_code, + content=mock_request_response_content, + ), + ) + + with pytest.raises(expected_exception): + await client.async_fetch(url="https://example.com") + request_mock.assert_called_once()