Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions src/linkup/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,7 @@ def fetch(
url: str,
include_raw_html: Optional[bool] = None,
render_js: Optional[bool] = None,
extract_images: Optional[bool] = None,
) -> LinkupFetchResponse:
"""Fetch the content of a web page using the Linkup API `fetch` endpoint.

Expand All @@ -251,6 +252,8 @@ def fetch(
url: The URL of the web page to fetch.
include_raw_html: Whether to include the raw HTML of the webpage in the response.
render_js: Whether the API should render the JavaScript of the webpage.
extract_images: Whether the API should extract images from the webpage and return them
in the response.

Returns:
The response of the web page fetch, containing the web page content.
Expand All @@ -263,6 +266,7 @@ def fetch(
url=url,
include_raw_html=include_raw_html,
render_js=render_js,
extract_images=extract_images,
)

response: httpx.Response = self._request(
Expand All @@ -281,6 +285,7 @@ async def async_fetch(
url: str,
include_raw_html: Optional[bool] = None,
render_js: Optional[bool] = None,
extract_images: Optional[bool] = None,
) -> LinkupFetchResponse:
"""Asynchronously fetch the content of a web page using the Linkup API `fetch` endpoint.

Expand All @@ -292,6 +297,8 @@ async def async_fetch(
url: The URL of the web page to fetch.
include_raw_html: Whether to include the raw HTML of the webpage in the response.
render_js: Whether the API should render the JavaScript of the webpage.
extract_images: Whether the API should extract images from the webpage and return them
in the response.

Returns:
The response of the web page fetch, containing the web page content.
Expand All @@ -304,6 +311,7 @@ async def async_fetch(
url=url,
include_raw_html=include_raw_html,
render_js=render_js,
extract_images=extract_images,
)

response: httpx.Response = await self._async_request(
Expand Down Expand Up @@ -480,6 +488,7 @@ def _get_fetch_params(
url: str,
include_raw_html: Optional[bool],
render_js: Optional[bool],
extract_images: Optional[bool],
) -> dict[str, Union[str, bool]]:
params: dict[str, Union[str, bool]] = {
"url": url,
Expand All @@ -488,6 +497,8 @@ def _get_fetch_params(
params["includeRawHtml"] = include_raw_html
if render_js is not None:
params["renderJs"] = render_js
if extract_images is not None:
params["extractImages"] = extract_images
return params

def _parse_search_response(
Expand Down
14 changes: 14 additions & 0 deletions src/linkup/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,15 +81,29 @@ class LinkupSearchStructuredResponse(BaseModel):
sources: list[Union[LinkupSearchTextResult, LinkupSearchImageResult]]


class LinkupFetchImageExtraction(BaseModel):
"""An image extraction from a Linkup web page fetch.

Attributes:
alt: The alt text of the image.
url: The URL of the image.
"""

alt: str
url: str


class LinkupFetchResponse(BaseModel):
"""The response from a Linkup web page fetch.

Attributes:
markdown: The cleaned up markdown content.
raw_html: The optional raw HTML content.
images: The optional list of image URLs.
"""

model_config = ConfigDict(populate_by_name=True)

markdown: str
raw_html: Optional[str] = Field(default=None, validation_alias="rawHtml")
images: Optional[list[LinkupFetchImageExtraction]] = Field(default=None)