diff --git a/examples/2_sourced_answer_search.py b/examples/2_sourced_answer_search.py index 084893c..e1ee3b8 100644 --- a/examples/2_sourced_answer_search.py +++ b/examples/2_sourced_answer_search.py @@ -20,5 +20,6 @@ query="What are the 3 major events in the life of Abraham Lincoln ?", depth="standard", # or "deep" output_type="sourcedAnswer", + include_inline_citations=False, ) print(response) diff --git a/examples/3_structured_search.py b/examples/3_structured_search.py index a7606a3..3ca9f23 100644 --- a/examples/3_structured_search.py +++ b/examples/3_structured_search.py @@ -31,6 +31,7 @@ class Events(BaseModel): query="What are the 3 major events in the life of Abraham Lincoln?", depth="standard", # or "deep" output_type="structured", - structured_output_schema=Events, + structured_output_schema=Events, # or json.dumps(Events.model_json_schema()) + include_sources=False, ) print(response) diff --git a/src/linkup/__init__.py b/src/linkup/__init__.py index 82b5c7f..81fbe0a 100644 --- a/src/linkup/__init__.py +++ b/src/linkup/__init__.py @@ -13,6 +13,7 @@ LinkupFetchResponse, LinkupSearchImageResult, LinkupSearchResults, + LinkupSearchStructuredResponse, LinkupSearchTextResult, LinkupSource, LinkupSourcedAnswer, @@ -31,6 +32,7 @@ "LinkupFetchResponse", "LinkupSearchImageResult", "LinkupSearchResults", + "LinkupSearchStructuredResponse", "LinkupSearchTextResult", "LinkupSource", "LinkupSourcedAnswer", diff --git a/src/linkup/client.py b/src/linkup/client.py index 51d565f..8d387ac 100644 --- a/src/linkup/client.py +++ b/src/linkup/client.py @@ -16,7 +16,12 @@ LinkupTooManyRequestsError, LinkupUnknownError, ) -from linkup.types import LinkupFetchResponse, LinkupSearchResults, LinkupSourcedAnswer +from linkup.types import ( + LinkupFetchResponse, + LinkupSearchResults, + LinkupSearchStructuredResponse, + LinkupSourcedAnswer, +) class LinkupClient: @@ -58,6 +63,7 @@ def search( exclude_domains: Optional[list[str]] = None, include_domains: Optional[list[str]] = None, include_inline_citations: Optional[bool] = None, + include_sources: Optional[bool] = None, ) -> Any: """Perform a web search using the Linkup API `search` endpoint. @@ -85,13 +91,18 @@ def search( include_domains: If you want the search to only return results from certain domains. include_inline_citations: If output_type is "sourcedAnswer", indicate whether the answer should include inline citations. + include_sources: If output_type is "structured", indicate whether the answer should + include sources. This will modify the schema of the structured response. Returns: - The Linkup API search result. If output_type is "searchResults", the result will be a - linkup.LinkupSearchResults. If output_type is "sourcedAnswer", the result will be a - linkup.LinkupSourcedAnswer. If output_type is "structured", the result will be - either an instance of the provided pydantic.BaseModel, or an arbitrary data - structure, following structured_output_schema. + The Linkup API search result, which can have different types based on the parameters: + - LinkupSearchResults if output_type is "searchResults" + - LinkupSourcedAnswer if output_type is "sourcedAnswer" + - the provided pydantic.BaseModel or an arbitrary data structure if output_type is + "structured" and include_sources is False + - LinkupSearchStructuredResponse with the provided pydantic.BaseModel or an arbitrary + data structure as data field, if output_type is "structured" and include_sources is + True Raises: TypeError: If structured_output_schema is not provided or is not a string or a @@ -113,6 +124,7 @@ def search( exclude_domains=exclude_domains, include_domains=include_domains, include_inline_citations=include_inline_citations, + include_sources=include_sources, ) response: httpx.Response = self._request( @@ -128,6 +140,7 @@ def search( response=response, output_type=output_type, structured_output_schema=structured_output_schema, + include_sources=include_sources, ) async def async_search( @@ -142,6 +155,7 @@ async def async_search( exclude_domains: Optional[list[str]] = None, include_domains: Optional[list[str]] = None, include_inline_citations: Optional[bool] = None, + include_sources: Optional[bool] = None, ) -> Any: """Asynchronously perform a web search using the Linkup API `search` endpoint. @@ -169,13 +183,18 @@ async def async_search( include_domains: If you want the search to only return results from certain domains. include_inline_citations: If output_type is "sourcedAnswer", indicate whether the answer should include inline citations. + include_sources: If output_type is "structured", indicate whether the answer should + include sources. This will modify the schema of the structured response. Returns: - The Linkup API search result. If output_type is "searchResults", the result will be a - linkup.LinkupSearchResults. If output_type is "sourcedAnswer", the result will be a - linkup.LinkupSourcedAnswer. If output_type is "structured", the result will be - either an instance of the provided pydantic.BaseModel, or an arbitrary data - structure, following structured_output_schema. + The Linkup API search result, which can have different types based on the parameters: + - LinkupSearchResults if output_type is "searchResults" + - LinkupSourcedAnswer if output_type is "sourcedAnswer" + - the provided pydantic.BaseModel or an arbitrary data structure if output_type is + "structured" and include_sources is False + - LinkupSearchStructuredResponse with the provided pydantic.BaseModel or an arbitrary + data structure as data field, if output_type is "structured" and include_sources is + True Raises: TypeError: If structured_output_schema is not provided or is not a string or a @@ -197,6 +216,7 @@ async def async_search( exclude_domains=exclude_domains, include_domains=include_domains, include_inline_citations=include_inline_citations, + include_sources=include_sources, ) response: httpx.Response = await self._async_request( @@ -212,6 +232,7 @@ async def async_search( response=response, output_type=output_type, structured_output_schema=structured_output_schema, + include_sources=include_sources, ) def fetch( @@ -419,6 +440,7 @@ def _get_search_params( exclude_domains: Optional[list[str]], include_domains: Optional[list[str]], include_inline_citations: Optional[bool], + include_sources: Optional[bool], ) -> dict[str, Union[str, bool, list[str]]]: params: dict[str, Union[str, bool, list[str]]] = dict( q=query, @@ -448,6 +470,8 @@ def _get_search_params( params["includeDomains"] = include_domains if include_inline_citations is not None: params["includeInlineCitations"] = include_inline_citations + if include_sources is not None: + params["includeSources"] = include_sources return params @@ -471,23 +495,35 @@ def _parse_search_response( response: httpx.Response, output_type: Literal["searchResults", "sourcedAnswer", "structured"], structured_output_schema: Union[type[BaseModel], str, None], + include_sources: Optional[bool], ) -> Any: response_data: Any = response.json() - output_base_model: Optional[type[BaseModel]] = None if output_type == "searchResults": - output_base_model = LinkupSearchResults + return LinkupSearchResults.model_validate(response_data) elif output_type == "sourcedAnswer": - output_base_model = LinkupSourcedAnswer - elif ( - output_type == "structured" - and not isinstance(structured_output_schema, (str, type(None))) - and issubclass(structured_output_schema, BaseModel) - ): - output_base_model = structured_output_schema - - if output_base_model is None: + return LinkupSourcedAnswer.model_validate(response_data) + elif output_type == "structured": + if structured_output_schema is None: + raise ValueError( + "structured_output_schema must be provided when output_type is 'structured'" + ) + # HACK: we assume that `include_sources` will default to False, since the API output can + # be arbitrary so we can't guess if it includes sources or not + if include_sources: + if not isinstance(structured_output_schema, str) and issubclass( + structured_output_schema, BaseModel + ): + response_data["data"] = structured_output_schema.model_validate( + response_data["data"] + ) + return LinkupSearchStructuredResponse.model_validate(response_data) + if not isinstance(structured_output_schema, str) and issubclass( + structured_output_schema, BaseModel + ): + return structured_output_schema.model_validate(response_data) return response_data - return output_base_model.model_validate(response_data) + else: + raise ValueError(f"Unexpected output_type value: '{output_type}'") def _parse_fetch_response(self, response: httpx.Response) -> LinkupFetchResponse: return LinkupFetchResponse.model_validate(response.json()) diff --git a/src/linkup/types.py b/src/linkup/types.py index af509aa..810a539 100644 --- a/src/linkup/types.py +++ b/src/linkup/types.py @@ -1,4 +1,4 @@ -from typing import Literal, Optional, Union +from typing import Any, Literal, Optional, Union from pydantic import BaseModel, ConfigDict, Field @@ -69,6 +69,18 @@ class LinkupSourcedAnswer(BaseModel): sources: list[LinkupSource] +class LinkupSearchStructuredResponse(BaseModel): + """A Linkup `search` structured response, with the sources supporting it. + + Attributes: + data: The answer data, either as a Pydantic model or an arbitrary JSON structure. + sources: The sources supporting the answer. + """ + + data: Any + sources: list[Union[LinkupSearchTextResult, LinkupSearchImageResult]] + + class LinkupFetchResponse(BaseModel): """The response from a Linkup web page fetch. diff --git a/tests/unit/client_test.py b/tests/unit/client_test.py index 7acfa47..b6186b5 100644 --- a/tests/unit/client_test.py +++ b/tests/unit/client_test.py @@ -22,7 +22,12 @@ LinkupNoResultError, LinkupTooManyRequestsError, ) -from linkup.types import LinkupFetchResponse, LinkupSearchImageResult, LinkupSearchTextResult +from linkup.types import ( + LinkupFetchResponse, + LinkupSearchImageResult, + LinkupSearchStructuredResponse, + LinkupSearchTextResult, +) class Company(BaseModel): @@ -181,6 +186,110 @@ class Company(BaseModel): website_url="https://www.linkup.so/", ), ), + ( + { + "query": "query", + "depth": "standard", + "output_type": "structured", + "structured_output_schema": Company, + "include_sources": True, + }, + { + "q": "query", + "depth": "standard", + "outputType": "structured", + "structuredOutputSchema": json.dumps(Company.model_json_schema()), + "includeSources": True, + }, + b""" + { + "data": { + "name": "Linkup", + "founders_names": ["Philippe Mizrahi", "Denis Charrier", "Boris Toledano"], + "creation_date": "2024", + "website_url": "https://www.linkup.so/" + }, + "sources": [ + { + "type": "text", + "name": "foo", + "url": "https://foo.com", + "content": "lorem ipsum dolor sit amet" + }, + {"type": "image", "name": "bar", "url": "https://bar.com"} + ] + } + """, + LinkupSearchStructuredResponse( + data=Company( + name="Linkup", + founders_names=["Philippe Mizrahi", "Denis Charrier", "Boris Toledano"], + creation_date="2024", + website_url="https://www.linkup.so/", + ), + sources=[ + LinkupSearchTextResult( + type="text", + name="foo", + url="https://foo.com", + content="lorem ipsum dolor sit amet", + ), + LinkupSearchImageResult(type="image", name="bar", url="https://bar.com"), + ], + ), + ), + ( + { + "query": "query", + "depth": "standard", + "output_type": "structured", + "structured_output_schema": json.dumps(Company.model_json_schema()), + "include_sources": True, + }, + { + "q": "query", + "depth": "standard", + "outputType": "structured", + "structuredOutputSchema": json.dumps(Company.model_json_schema()), + "includeSources": True, + }, + b""" + { + "data": { + "name": "Linkup", + "founders_names": ["Philippe Mizrahi", "Denis Charrier", "Boris Toledano"], + "creation_date": "2024", + "website_url": "https://www.linkup.so/" + }, + "sources": [ + { + "type": "text", + "name": "foo", + "url": "https://foo.com", + "content": "lorem ipsum dolor sit amet" + }, + {"type": "image", "name": "bar", "url": "https://bar.com"} + ] + } + """, + LinkupSearchStructuredResponse( + data=dict( + name="Linkup", + founders_names=["Philippe Mizrahi", "Denis Charrier", "Boris Toledano"], + creation_date="2024", + website_url="https://www.linkup.so/", + ), + sources=[ + LinkupSearchTextResult( + type="text", + name="foo", + url="https://foo.com", + content="lorem ipsum dolor sit amet", + ), + LinkupSearchImageResult(type="image", name="bar", url="https://bar.com"), + ], + ), + ), ]