From a3d5011008ee8109a7e3d0c4953bfa7e0ed0aa50 Mon Sep 17 00:00:00 2001 From: Filip Michalsky Date: Sun, 15 Jun 2025 12:35:07 -0400 Subject: [PATCH 1/3] fix example by adding proxies --- examples/example.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/examples/example.py b/examples/example.py index 7821996..f680b09 100644 --- a/examples/example.py +++ b/examples/example.py @@ -4,11 +4,11 @@ from rich.console import Console from rich.panel import Panel from rich.theme import Theme -import json from dotenv import load_dotenv -from stagehand import Stagehand, StagehandConfig -from stagehand.utils import configure_logging +from stagehand import Stagehand, StagehandConfig, configure_logging + +from browserbase.types import SessionCreateParams as BrowserbaseSessionCreateParams # Configure logging with cleaner format configure_logging( @@ -34,6 +34,11 @@ load_dotenv() +browserbase_session_create_params = BrowserbaseSessionCreateParams( + project_id=os.getenv("BROWSERBASE_PROJECT_ID"), + proxies=True, +) + console.print( Panel.fit( "[yellow]Logging Levels:[/]\n" @@ -52,6 +57,7 @@ async def main(): env="BROWSERBASE", api_key=os.getenv("BROWSERBASE_API_KEY"), project_id=os.getenv("BROWSERBASE_PROJECT_ID"), + browserbase_session_create_params=browserbase_session_create_params, headless=False, dom_settle_timeout_ms=3000, model_name="google/gemini-2.0-flash", @@ -98,7 +104,7 @@ async def main(): await asyncio.sleep(2) console.print("\nā–¶ļø [highlight] Observing page[/] for news button") - observed = await page.observe("find all articles") + observed = await page.observe("find the news button") if len(observed) > 0: element = observed[0] From de775333f2cd33477b66987afc3e8418ff49b62c Mon Sep 17 00:00:00 2001 From: Filip Michalsky Date: Sun, 15 Jun 2025 13:07:44 -0400 Subject: [PATCH 2/3] update agent with proxy --- examples/agent_example.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/examples/agent_example.py b/examples/agent_example.py index 8b21449..be4e50c 100644 --- a/examples/agent_example.py +++ b/examples/agent_example.py @@ -10,6 +10,8 @@ from stagehand import Stagehand, StagehandConfig, AgentConfig, configure_logging from stagehand.schemas import AgentExecuteOptions, AgentProvider +from browserbase.types import SessionCreateParams as BrowserbaseSessionCreateParams + # Create a custom theme for consistent styling custom_theme = Theme( { @@ -27,6 +29,11 @@ load_dotenv() +browserbase_session_create_params = BrowserbaseSessionCreateParams( + project_id=os.getenv("BROWSERBASE_PROJECT_ID"), + proxies=True, +) + # Configure logging with the utility function configure_logging( level=logging.INFO, # Set to INFO for regular logs, DEBUG for detailed @@ -40,6 +47,7 @@ async def main(): # env="LOCAL", api_key=os.getenv("BROWSERBASE_API_KEY"), project_id=os.getenv("BROWSERBASE_PROJECT_ID"), + browserbase_session_create_params=browserbase_session_create_params, model_name="gpt-4o", self_heal=True, system_prompt="You are a browser automation assistant that helps users navigate websites effectively.", @@ -70,7 +78,7 @@ async def main(): options={"apiKey": os.getenv("MODEL_API_KEY")} ) agent_result = await agent.execute( - instruction="Play a game of 2048", + instruction="Search for the game 2048 and play one game.", max_steps=20, auto_screenshot=True, ) From 6ff745f54043fccb34444d7fa3029ac0505ecdac Mon Sep 17 00:00:00 2001 From: Filip Michalsky Date: Mon, 16 Jun 2025 07:55:23 -0400 Subject: [PATCH 3/3] update example to not use proxy --- examples/example.py | 299 +++++++++++++++++++++++++++++--------------- 1 file changed, 195 insertions(+), 104 deletions(-) diff --git a/examples/example.py b/examples/example.py index f680b09..f6de0d4 100644 --- a/examples/example.py +++ b/examples/example.py @@ -4,130 +4,221 @@ from rich.console import Console from rich.panel import Panel from rich.theme import Theme +from pydantic import BaseModel, Field, HttpUrl from dotenv import load_dotenv -from stagehand import Stagehand, StagehandConfig, configure_logging +from stagehand import StagehandConfig, Stagehand, configure_logging +from stagehand.schemas import ExtractOptions +from stagehand.a11y.utils import get_accessibility_tree, get_xpath_by_resolved_object_id -from browserbase.types import SessionCreateParams as BrowserbaseSessionCreateParams +# Load environment variables +load_dotenv() -# Configure logging with cleaner format configure_logging( level=logging.INFO, remove_logger_name=True, # Remove the redundant stagehand.client prefix quiet_dependencies=True, # Suppress httpx and other noisy logs ) -# Create a custom theme for consistent styling -custom_theme = Theme( - { - "info": "cyan", - "success": "green", - "warning": "yellow", - "error": "red bold", - "highlight": "magenta", - "url": "blue underline", - } -) - -# Create a Rich console instance with our theme -console = Console(theme=custom_theme) - -load_dotenv() - -browserbase_session_create_params = BrowserbaseSessionCreateParams( - project_id=os.getenv("BROWSERBASE_PROJECT_ID"), - proxies=True, -) +# Configure Rich console +console = Console(theme=Theme({ + "info": "cyan", + "success": "green", + "warning": "yellow", + "error": "red bold", + "highlight": "magenta", + "url": "blue underline", +})) + +# Define Pydantic models for testing +class Company(BaseModel): + name: str = Field(..., description="The name of the company") + url: HttpUrl = Field(..., description="The URL of the company website or relevant page") + +class Companies(BaseModel): + companies: list[Company] = Field(..., description="List of companies extracted from the page, maximum of 5 companies") -console.print( - Panel.fit( - "[yellow]Logging Levels:[/]\n" - "[white]- Set [bold]verbose=0[/] for errors (ERROR)[/]\n" - "[white]- Set [bold]verbose=1[/] for minimal logs (INFO)[/]\n" - "[white]- Set [bold]verbose=2[/] for medium logs (WARNING)[/]\n" - "[white]- Set [bold]verbose=3[/] for detailed logs (DEBUG)[/]", - title="Verbosity Options", - border_style="blue", - ) -) +class ElementAction(BaseModel): + action: str + id: int + arguments: list[str] async def main(): - # Build a unified configuration object for Stagehand + + # Create configuration config = StagehandConfig( - env="BROWSERBASE", api_key=os.getenv("BROWSERBASE_API_KEY"), project_id=os.getenv("BROWSERBASE_PROJECT_ID"), - browserbase_session_create_params=browserbase_session_create_params, - headless=False, - dom_settle_timeout_ms=3000, - model_name="google/gemini-2.0-flash", - self_heal=True, - wait_for_captcha_solves=True, - system_prompt="You are a browser automation assistant that helps users navigate websites effectively.", + model_name="google/gemini-2.5-flash-preview-04-17", model_client_options={"apiKey": os.getenv("MODEL_API_KEY")}, - # Use verbose=2 for medium-detail logs (1=minimal, 3=debug) - verbose=2, - ) - - stagehand = Stagehand(config) - - # Initialize - this creates a new session automatically. - console.print("\nšŸš€ [info]Initializing Stagehand...[/]") - await stagehand.init() - page = stagehand.page - console.print(f"\n[yellow]Created new session:[/] {stagehand.session_id}") - console.print( - f"🌐 [white]View your live browser:[/] [url]https://www.browserbase.com/sessions/{stagehand.session_id}[/]" + verbose=1, ) - - await asyncio.sleep(2) - - console.print("\nā–¶ļø [highlight] Navigating[/] to Google") - await page.goto("https://google.com/") - console.print("āœ… [success]Navigated to Google[/]") - - console.print("\nā–¶ļø [highlight] Clicking[/] on About link") - # Click on the "About" link using Playwright - await page.get_by_role("link", name="About", exact=True).click() - console.print("āœ… [success]Clicked on About link[/]") - - await asyncio.sleep(2) - console.print("\nā–¶ļø [highlight] Navigating[/] back to Google") - await page.goto("https://google.com/") - console.print("āœ… [success]Navigated back to Google[/]") - - console.print("\nā–¶ļø [highlight] Performing action:[/] search for openai") - await page.act("search for openai") - await page.keyboard.press("Enter") - console.print("āœ… [success]Performing Action:[/] Action completed successfully") - await asyncio.sleep(2) - - console.print("\nā–¶ļø [highlight] Observing page[/] for news button") - observed = await page.observe("find the news button") + # Initialize async client + stagehand = Stagehand( + config=config, + env="BROWSERBASE", # LOCAL for local execution, BROWSERBASE for remote execution + server_url=os.getenv("STAGEHAND_API_URL"), # only needed for remote execution + ) - if len(observed) > 0: - element = observed[0] - console.print("āœ… [success]Found element:[/] News button") - console.print("\nā–¶ļø [highlight] Performing action on observed element:") - console.print(element) - await page.act(element) - console.print("āœ… [success]Performing Action:[/] Action completed successfully") - - else: - console.print("āŒ [error]No element found[/]") - - console.print("\nā–¶ļø [highlight] Extracting[/] first search result") - data = await page.extract("extract the first result from the search") - console.print("šŸ“Š [info]Extracted data:[/]") - console.print_json(f"{data.model_dump_json()}") - - # Close the session - console.print("\nā¹ļø [warning]Closing session...[/]") - await stagehand.close() - console.print("āœ… [success]Session closed successfully![/]") - console.rule("[bold]End of Example[/]") - + try: + # Initialize the client + await stagehand.init() + console.print("[success]āœ“ Successfully initialized Stagehand async client[/]") + console.print(f"[info]Environment: {stagehand.env}[/]") + console.print(f"[info]LLM Client Available: {stagehand.llm is not None}[/]") + + # Navigate to AIgrant (as in the original test) + await stagehand.page.goto("https://www.aigrant.com") + console.print("[success]āœ“ Navigated to AIgrant[/]") + await asyncio.sleep(2) + + # Get accessibility tree + tree = await get_accessibility_tree(stagehand.page, stagehand.logger) + console.print("[success]āœ“ Extracted accessibility tree[/]") + + print("ID to URL mapping:", tree.get("idToUrl")) + print("IFrames:", tree.get("iframes")) + + # Click the "Get Started" button + await stagehand.page.act("click the button with text 'Get Started'") + console.print("[success]āœ“ Clicked 'Get Started' button[/]") + + # Observe the button + await stagehand.page.observe("the button with text 'Get Started'") + console.print("[success]āœ“ Observed 'Get Started' button[/]") + + # Extract companies using schema + extract_options = ExtractOptions( + instruction="Extract the names and URLs of up to 5 companies mentioned on this page", + schema_definition=Companies + ) + + extract_result = await stagehand.page.extract(extract_options) + console.print("[success]āœ“ Extracted companies data[/]") + + # Display results + print("Extract result:", extract_result) + print("Extract result data:", extract_result.data if hasattr(extract_result, 'data') else 'No data field') + + # Parse the result into the Companies model + companies_data = None + + # Both LOCAL and BROWSERBASE modes now return the Pydantic model directly + try: + companies_data = extract_result if isinstance(extract_result, Companies) else Companies.model_validate(extract_result) + console.print("[success]āœ“ Successfully parsed extract result into Companies model[/]") + + # Handle URL resolution if needed + if hasattr(companies_data, 'companies'): + id_to_url = tree.get("idToUrl", {}) + for company in companies_data.companies: + if hasattr(company, 'url') and isinstance(company.url, str): + # Check if URL is just an ID that needs to be resolved + if company.url.isdigit() and company.url in id_to_url: + company.url = id_to_url[company.url] + console.print(f"[success]āœ“ Resolved URL for {company.name}: {company.url}[/]") + + except Exception as e: + console.print(f"[error]Failed to parse extract result: {e}[/]") + print("Raw extract result:", extract_result) + + print("\nExtracted Companies:") + if companies_data and hasattr(companies_data, "companies"): + for idx, company in enumerate(companies_data.companies, 1): + print(f"{idx}. {company.name}: {company.url}") + else: + print("No companies were found in the extraction result") + + # XPath click + await stagehand.page.locator("xpath=/html/body/div/ul[2]/li[2]/a").click() + await stagehand.page.wait_for_load_state('networkidle') + console.print("[success]āœ“ Clicked element using XPath[/]") + + # Open a new page with Google + console.print("\n[info]Creating a new page...[/]") + new_page = await stagehand.context.new_page() + await new_page.goto("https://www.google.com") + console.print("[success]āœ“ Opened Google in a new page[/]") + + # Get accessibility tree for the new page + tree = await get_accessibility_tree(new_page, stagehand.logger) + console.print("[success]āœ“ Extracted accessibility tree for new page[/]") + + # Try clicking Get Started button on Google + await new_page.act("click the button with text 'Get Started'") + + # Only use LLM directly if in LOCAL mode + if stagehand.llm is not None: + console.print("[info]LLM client available - using direct LLM call[/]") + + # Use LLM to analyze the page + response = stagehand.llm.create_response( + messages=[ + { + "role": "system", + "content": "Based on the provided accessibility tree of the page, find the element and the action the user is expecting to perform. The tree consists of an enhanced a11y tree from a website with unique identifiers prepended to each element's role, and name. The actions you can take are playwright compatible locator actions." + }, + { + "role": "user", + "content": [ + { + "type": "text", + "text": f"fill the search bar with the text 'Hello'\nPage Tree:\n{tree.get('simplified')}" + } + ] + } + ], + model=config.model_name, + response_format=ElementAction, + ) + + action = ElementAction.model_validate_json(response.choices[0].message.content) + console.print(f"[success]āœ“ LLM identified element ID: {action.id}[/]") + + # Test CDP functionality + args = {"backendNodeId": action.id} + result = await new_page.send_cdp("DOM.resolveNode", args) + object_info = result.get("object") + print(object_info) + + xpath = await get_xpath_by_resolved_object_id(await new_page.get_cdp_client(), object_info["objectId"]) + console.print(f"[success]āœ“ Retrieved XPath: {xpath}[/]") + + # Interact with the element + if xpath: + await new_page.locator(f"xpath={xpath}").click() + await new_page.locator(f"xpath={xpath}").fill(action.arguments[0]) + console.print("[success]āœ“ Filled search bar with 'Hello'[/]") + else: + print("No xpath found") + else: + console.print("[warning]LLM client not available in BROWSERBASE mode - skipping direct LLM test[/]") + # Alternative: use page.observe to find the search bar + observe_result = await new_page.observe("the search bar or search input field") + console.print(f"[info]Observed search elements: {observe_result}[/]") + + # Use page.act to fill the search bar + try: + await new_page.act("fill the search bar with 'Hello'") + console.print("[success]āœ“ Filled search bar using act()[/]") + except Exception as e: + console.print(f"[warning]Could not fill search bar: {e}[/]") + + # Final test summary + console.print("\n[success]All tests completed successfully![/]") + + except Exception as e: + console.print(f"[error]Error during testing: {str(e)}[/]") + import traceback + traceback.print_exc() + raise + finally: + # Close the client + # wait for 5 seconds + await asyncio.sleep(5) + await stagehand.close() + console.print("[info]Stagehand async client closed[/]") if __name__ == "__main__": # Add a fancy header