diff --git a/src/linkup/client.py b/src/linkup/client.py index 8d387ac..6f3a9b0 100644 --- a/src/linkup/client.py +++ b/src/linkup/client.py @@ -240,6 +240,7 @@ def fetch( url: str, include_raw_html: Optional[bool] = None, render_js: Optional[bool] = None, + extract_images: Optional[bool] = None, ) -> LinkupFetchResponse: """Fetch the content of a web page using the Linkup API `fetch` endpoint. @@ -251,6 +252,8 @@ def fetch( url: The URL of the web page to fetch. include_raw_html: Whether to include the raw HTML of the webpage in the response. render_js: Whether the API should render the JavaScript of the webpage. + extract_images: Whether the API should extract images from the webpage and return them + in the response. Returns: The response of the web page fetch, containing the web page content. @@ -263,6 +266,7 @@ def fetch( url=url, include_raw_html=include_raw_html, render_js=render_js, + extract_images=extract_images, ) response: httpx.Response = self._request( @@ -281,6 +285,7 @@ async def async_fetch( url: str, include_raw_html: Optional[bool] = None, render_js: Optional[bool] = None, + extract_images: Optional[bool] = None, ) -> LinkupFetchResponse: """Asynchronously fetch the content of a web page using the Linkup API `fetch` endpoint. @@ -292,6 +297,8 @@ async def async_fetch( url: The URL of the web page to fetch. include_raw_html: Whether to include the raw HTML of the webpage in the response. render_js: Whether the API should render the JavaScript of the webpage. + extract_images: Whether the API should extract images from the webpage and return them + in the response. Returns: The response of the web page fetch, containing the web page content. @@ -304,6 +311,7 @@ async def async_fetch( url=url, include_raw_html=include_raw_html, render_js=render_js, + extract_images=extract_images, ) response: httpx.Response = await self._async_request( @@ -480,6 +488,7 @@ def _get_fetch_params( url: str, include_raw_html: Optional[bool], render_js: Optional[bool], + extract_images: Optional[bool], ) -> dict[str, Union[str, bool]]: params: dict[str, Union[str, bool]] = { "url": url, @@ -488,6 +497,8 @@ def _get_fetch_params( params["includeRawHtml"] = include_raw_html if render_js is not None: params["renderJs"] = render_js + if extract_images is not None: + params["extractImages"] = extract_images return params def _parse_search_response( diff --git a/src/linkup/types.py b/src/linkup/types.py index 810a539..1038eef 100644 --- a/src/linkup/types.py +++ b/src/linkup/types.py @@ -81,15 +81,29 @@ class LinkupSearchStructuredResponse(BaseModel): sources: list[Union[LinkupSearchTextResult, LinkupSearchImageResult]] +class LinkupFetchImageExtraction(BaseModel): + """An image extraction from a Linkup web page fetch. + + Attributes: + alt: The alt text of the image. + url: The URL of the image. + """ + + alt: str + url: str + + class LinkupFetchResponse(BaseModel): """The response from a Linkup web page fetch. Attributes: markdown: The cleaned up markdown content. raw_html: The optional raw HTML content. + images: The optional list of image URLs. """ model_config = ConfigDict(populate_by_name=True) markdown: str raw_html: Optional[str] = Field(default=None, validation_alias="rawHtml") + images: Optional[list[LinkupFetchImageExtraction]] = Field(default=None)