Skip to content

Fm/stg 464 update examples python #96

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion examples/agent_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
from stagehand import Stagehand, StagehandConfig, AgentConfig, configure_logging
from stagehand.schemas import AgentExecuteOptions, AgentProvider

from browserbase.types import SessionCreateParams as BrowserbaseSessionCreateParams

# Create a custom theme for consistent styling
custom_theme = Theme(
{
Expand All @@ -27,6 +29,11 @@

load_dotenv()

browserbase_session_create_params = BrowserbaseSessionCreateParams(
project_id=os.getenv("BROWSERBASE_PROJECT_ID"),
proxies=True,
)

# Configure logging with the utility function
configure_logging(
level=logging.INFO, # Set to INFO for regular logs, DEBUG for detailed
Expand All @@ -40,6 +47,7 @@ async def main():
# env="LOCAL",
api_key=os.getenv("BROWSERBASE_API_KEY"),
project_id=os.getenv("BROWSERBASE_PROJECT_ID"),
browserbase_session_create_params=browserbase_session_create_params,
model_name="gpt-4o",
self_heal=True,
system_prompt="You are a browser automation assistant that helps users navigate websites effectively.",
Expand Down Expand Up @@ -70,7 +78,7 @@ async def main():
options={"apiKey": os.getenv("MODEL_API_KEY")}
)
agent_result = await agent.execute(
instruction="Play a game of 2048",
instruction="Search for the game 2048 and play one game.",
max_steps=20,
auto_screenshot=True,
)
Expand Down
295 changes: 196 additions & 99 deletions examples/example.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,124 +4,221 @@
from rich.console import Console
from rich.panel import Panel
from rich.theme import Theme
import json
from pydantic import BaseModel, Field, HttpUrl
from dotenv import load_dotenv

from stagehand import Stagehand, StagehandConfig
from stagehand.utils import configure_logging
from stagehand import StagehandConfig, Stagehand, configure_logging
from stagehand.schemas import ExtractOptions
from stagehand.a11y.utils import get_accessibility_tree, get_xpath_by_resolved_object_id

# Load environment variables
load_dotenv()

# Configure logging with cleaner format
configure_logging(
level=logging.INFO,
remove_logger_name=True, # Remove the redundant stagehand.client prefix
quiet_dependencies=True, # Suppress httpx and other noisy logs
)

# Create a custom theme for consistent styling
custom_theme = Theme(
{
"info": "cyan",
"success": "green",
"warning": "yellow",
"error": "red bold",
"highlight": "magenta",
"url": "blue underline",
}
)

# Create a Rich console instance with our theme
console = Console(theme=custom_theme)
# Configure Rich console
console = Console(theme=Theme({
"info": "cyan",
"success": "green",
"warning": "yellow",
"error": "red bold",
"highlight": "magenta",
"url": "blue underline",
}))

# Define Pydantic models for testing
class Company(BaseModel):
name: str = Field(..., description="The name of the company")
url: HttpUrl = Field(..., description="The URL of the company website or relevant page")

class Companies(BaseModel):
companies: list[Company] = Field(..., description="List of companies extracted from the page, maximum of 5 companies")

load_dotenv()

console.print(
Panel.fit(
"[yellow]Logging Levels:[/]\n"
"[white]- Set [bold]verbose=0[/] for errors (ERROR)[/]\n"
"[white]- Set [bold]verbose=1[/] for minimal logs (INFO)[/]\n"
"[white]- Set [bold]verbose=2[/] for medium logs (WARNING)[/]\n"
"[white]- Set [bold]verbose=3[/] for detailed logs (DEBUG)[/]",
title="Verbosity Options",
border_style="blue",
)
)
class ElementAction(BaseModel):
action: str
id: int
arguments: list[str]

async def main():
# Build a unified configuration object for Stagehand

# Create configuration
config = StagehandConfig(
env="BROWSERBASE",
api_key=os.getenv("BROWSERBASE_API_KEY"),
project_id=os.getenv("BROWSERBASE_PROJECT_ID"),
headless=False,
dom_settle_timeout_ms=3000,
model_name="google/gemini-2.0-flash",
self_heal=True,
wait_for_captcha_solves=True,
system_prompt="You are a browser automation assistant that helps users navigate websites effectively.",
model_name="google/gemini-2.5-flash-preview-04-17",
model_client_options={"apiKey": os.getenv("MODEL_API_KEY")},
# Use verbose=2 for medium-detail logs (1=minimal, 3=debug)
verbose=2,
verbose=1,
)

stagehand = Stagehand(config)

# Initialize - this creates a new session automatically.
console.print("\n🚀 [info]Initializing Stagehand...[/]")
await stagehand.init()
page = stagehand.page
console.print(f"\n[yellow]Created new session:[/] {stagehand.session_id}")
console.print(
f"🌐 [white]View your live browser:[/] [url]https://www.browserbase.com/sessions/{stagehand.session_id}[/]"
)

await asyncio.sleep(2)

console.print("\n▶️ [highlight] Navigating[/] to Google")
await page.goto("https://google.com/")
console.print("✅ [success]Navigated to Google[/]")

console.print("\n▶️ [highlight] Clicking[/] on About link")
# Click on the "About" link using Playwright
await page.get_by_role("link", name="About", exact=True).click()
console.print("✅ [success]Clicked on About link[/]")

await asyncio.sleep(2)
console.print("\n▶️ [highlight] Navigating[/] back to Google")
await page.goto("https://google.com/")
console.print("✅ [success]Navigated back to Google[/]")

console.print("\n▶️ [highlight] Performing action:[/] search for openai")
await page.act("search for openai")
await page.keyboard.press("Enter")
console.print("✅ [success]Performing Action:[/] Action completed successfully")

await asyncio.sleep(2)

console.print("\n▶️ [highlight] Observing page[/] for news button")
observed = await page.observe("find all articles")
# Initialize async client
stagehand = Stagehand(
config=config,
env="BROWSERBASE", # LOCAL for local execution, BROWSERBASE for remote execution
server_url=os.getenv("STAGEHAND_API_URL"), # only needed for remote execution
)

if len(observed) > 0:
element = observed[0]
console.print("✅ [success]Found element:[/] News button")
console.print("\n▶️ [highlight] Performing action on observed element:")
console.print(element)
await page.act(element)
console.print("✅ [success]Performing Action:[/] Action completed successfully")

else:
console.print("❌ [error]No element found[/]")

console.print("\n▶️ [highlight] Extracting[/] first search result")
data = await page.extract("extract the first result from the search")
console.print("📊 [info]Extracted data:[/]")
console.print_json(f"{data.model_dump_json()}")

# Close the session
console.print("\n⏹️ [warning]Closing session...[/]")
await stagehand.close()
console.print("✅ [success]Session closed successfully![/]")
console.rule("[bold]End of Example[/]")

try:
# Initialize the client
await stagehand.init()
console.print("[success]✓ Successfully initialized Stagehand async client[/]")
console.print(f"[info]Environment: {stagehand.env}[/]")
console.print(f"[info]LLM Client Available: {stagehand.llm is not None}[/]")

# Navigate to AIgrant (as in the original test)
await stagehand.page.goto("https://www.aigrant.com")
console.print("[success]✓ Navigated to AIgrant[/]")
await asyncio.sleep(2)

# Get accessibility tree
tree = await get_accessibility_tree(stagehand.page, stagehand.logger)
console.print("[success]✓ Extracted accessibility tree[/]")

print("ID to URL mapping:", tree.get("idToUrl"))
print("IFrames:", tree.get("iframes"))

# Click the "Get Started" button
await stagehand.page.act("click the button with text 'Get Started'")
console.print("[success]✓ Clicked 'Get Started' button[/]")

# Observe the button
await stagehand.page.observe("the button with text 'Get Started'")
console.print("[success]✓ Observed 'Get Started' button[/]")

# Extract companies using schema
extract_options = ExtractOptions(
instruction="Extract the names and URLs of up to 5 companies mentioned on this page",
schema_definition=Companies
)

extract_result = await stagehand.page.extract(extract_options)
console.print("[success]✓ Extracted companies data[/]")

# Display results
print("Extract result:", extract_result)
print("Extract result data:", extract_result.data if hasattr(extract_result, 'data') else 'No data field')

# Parse the result into the Companies model
companies_data = None

# Both LOCAL and BROWSERBASE modes now return the Pydantic model directly
try:
companies_data = extract_result if isinstance(extract_result, Companies) else Companies.model_validate(extract_result)
console.print("[success]✓ Successfully parsed extract result into Companies model[/]")

# Handle URL resolution if needed
if hasattr(companies_data, 'companies'):
id_to_url = tree.get("idToUrl", {})
for company in companies_data.companies:
if hasattr(company, 'url') and isinstance(company.url, str):
# Check if URL is just an ID that needs to be resolved
if company.url.isdigit() and company.url in id_to_url:
company.url = id_to_url[company.url]
console.print(f"[success]✓ Resolved URL for {company.name}: {company.url}[/]")

except Exception as e:
console.print(f"[error]Failed to parse extract result: {e}[/]")
print("Raw extract result:", extract_result)

print("\nExtracted Companies:")
if companies_data and hasattr(companies_data, "companies"):
for idx, company in enumerate(companies_data.companies, 1):
print(f"{idx}. {company.name}: {company.url}")
else:
print("No companies were found in the extraction result")

# XPath click
await stagehand.page.locator("xpath=/html/body/div/ul[2]/li[2]/a").click()
await stagehand.page.wait_for_load_state('networkidle')
console.print("[success]✓ Clicked element using XPath[/]")

# Open a new page with Google
console.print("\n[info]Creating a new page...[/]")
new_page = await stagehand.context.new_page()
await new_page.goto("https://www.google.com")
console.print("[success]✓ Opened Google in a new page[/]")

# Get accessibility tree for the new page
tree = await get_accessibility_tree(new_page, stagehand.logger)
console.print("[success]✓ Extracted accessibility tree for new page[/]")

# Try clicking Get Started button on Google
await new_page.act("click the button with text 'Get Started'")

# Only use LLM directly if in LOCAL mode
if stagehand.llm is not None:
console.print("[info]LLM client available - using direct LLM call[/]")

# Use LLM to analyze the page
response = stagehand.llm.create_response(
messages=[
{
"role": "system",
"content": "Based on the provided accessibility tree of the page, find the element and the action the user is expecting to perform. The tree consists of an enhanced a11y tree from a website with unique identifiers prepended to each element's role, and name. The actions you can take are playwright compatible locator actions."
},
{
"role": "user",
"content": [
{
"type": "text",
"text": f"fill the search bar with the text 'Hello'\nPage Tree:\n{tree.get('simplified')}"
}
]
}
],
model=config.model_name,
response_format=ElementAction,
)

action = ElementAction.model_validate_json(response.choices[0].message.content)
console.print(f"[success]✓ LLM identified element ID: {action.id}[/]")

# Test CDP functionality
args = {"backendNodeId": action.id}
result = await new_page.send_cdp("DOM.resolveNode", args)
object_info = result.get("object")
print(object_info)

xpath = await get_xpath_by_resolved_object_id(await new_page.get_cdp_client(), object_info["objectId"])
console.print(f"[success]✓ Retrieved XPath: {xpath}[/]")

# Interact with the element
if xpath:
await new_page.locator(f"xpath={xpath}").click()
await new_page.locator(f"xpath={xpath}").fill(action.arguments[0])
console.print("[success]✓ Filled search bar with 'Hello'[/]")
else:
print("No xpath found")
else:
console.print("[warning]LLM client not available in BROWSERBASE mode - skipping direct LLM test[/]")
# Alternative: use page.observe to find the search bar
observe_result = await new_page.observe("the search bar or search input field")
console.print(f"[info]Observed search elements: {observe_result}[/]")

# Use page.act to fill the search bar
try:
await new_page.act("fill the search bar with 'Hello'")
console.print("[success]✓ Filled search bar using act()[/]")
except Exception as e:
console.print(f"[warning]Could not fill search bar: {e}[/]")

# Final test summary
console.print("\n[success]All tests completed successfully![/]")

except Exception as e:
console.print(f"[error]Error during testing: {str(e)}[/]")
import traceback
traceback.print_exc()
raise
finally:
# Close the client
# wait for 5 seconds
await asyncio.sleep(5)
await stagehand.close()
console.print("[info]Stagehand async client closed[/]")

if __name__ == "__main__":
# Add a fancy header
Expand Down
Loading