pip install -e .
# Install Playwright browsers (required)
playwright install chromiumfrom sentience import SentienceBrowser, snapshot, find, click
# Start browser with extension
with SentienceBrowser(headless=False) as browser:
browser.goto("https://example.com", wait_until="domcontentloaded")
# Take snapshot - captures all interactive elements
snap = snapshot(browser)
print(f"Found {len(snap.elements)} elements")
# Find and click a link using semantic selectors
link = find(snap, "role=link text~'More information'")
if link:
result = click(browser, link.id)
print(f"Click success: {result.success}")This example demonstrates navigating Amazon, finding products, and adding items to cart:
from sentience import SentienceBrowser, snapshot, find, click
import time
with SentienceBrowser(headless=False) as browser:
# Navigate to Amazon Best Sellers
browser.goto("https://www.amazon.com/gp/bestsellers/", wait_until="domcontentloaded")
time.sleep(2) # Wait for dynamic content
# Take snapshot and find products
snap = snapshot(browser)
print(f"Found {len(snap.elements)} elements")
# Find first product in viewport using spatial filtering
products = [
el for el in snap.elements
if el.role == "link"
and el.visual_cues.is_clickable
and el.in_viewport
and not el.is_occluded
and el.bbox.y < 600 # First row
]
if products:
# Sort by position (left to right, top to bottom)
products.sort(key=lambda e: (e.bbox.y, e.bbox.x))
first_product = products[0]
print(f"Clicking: {first_product.text}")
result = click(browser, first_product.id)
# Wait for product page
browser.page.wait_for_load_state("networkidle")
time.sleep(2)
# Find and click "Add to Cart" button
product_snap = snapshot(browser)
add_to_cart = find(product_snap, "role=button text~'add to cart'")
if add_to_cart:
cart_result = click(browser, add_to_cart.id)
print(f"Added to cart: {cart_result.success}")See the complete tutorial: Amazon Shopping Guide
SentienceBrowser- Playwright browser with Sentience extension pre-loadedbrowser.goto(url)- Navigate with automatic extension readiness checks- Automatic bot evasion and stealth mode
- Configurable headless/headed mode
snapshot(browser, screenshot=True)- Capture page state with AI-ranked elements- Returns semantic elements with roles, text, importance scores, and bounding boxes
- Optional screenshot capture (PNG/JPEG)
- Pydantic models for type safety
snapshot.save(filepath)- Export to JSON
Example:
snap = snapshot(browser, screenshot=True)
# Access structured data
print(f"URL: {snap.url}")
print(f"Viewport: {snap.viewport.width}x{snap.viewport.height}")
print(f"Elements: {len(snap.elements)}")
# Iterate over elements
for element in snap.elements:
print(f"{element.role}: {element.text} (importance: {element.importance})")query(snapshot, selector)- Find all matching elementsfind(snapshot, selector)- Find single best match (by importance)- Powerful query DSL with multiple operators
Query Examples:
# Find by role and text
button = find(snap, "role=button text='Sign in'")
# Substring match (case-insensitive)
link = find(snap, "role=link text~'more info'")
# Spatial filtering
top_left = find(snap, "bbox.x<=100 bbox.y<=200")
# Multiple conditions (AND logic)
primary_btn = find(snap, "role=button clickable=true visible=true importance>800")
# Prefix/suffix matching
starts_with = find(snap, "text^='Add'")
ends_with = find(snap, "text$='Cart'")
# Numeric comparisons
important = query(snap, "importance>=700")
first_row = query(snap, "bbox.y<600")📖 Complete Query DSL Guide - All operators, fields, and advanced patterns
click(browser, element_id)- Click element by IDclick_rect(browser, rect)- Click at center of rectangle (coordinate-based)type_text(browser, element_id, text)- Type into input fieldspress(browser, key)- Press keyboard keys (Enter, Escape, Tab, etc.)
All actions return ActionResult with success status, timing, and outcome:
result = click(browser, element.id)
print(f"Success: {result.success}")
print(f"Outcome: {result.outcome}") # "navigated", "dom_updated", "error"
print(f"Duration: {result.duration_ms}ms")
print(f"URL changed: {result.url_changed}")Coordinate-based clicking:
from sentience import click_rect
# Click at center of rectangle (x, y, width, height)
click_rect(browser, {"x": 100, "y": 200, "w": 50, "h": 30})
# With visual highlight (default: red border for 2 seconds)
click_rect(browser, {"x": 100, "y": 200, "w": 50, "h": 30}, highlight=True, highlight_duration=2.0)
# Using element's bounding box
snap = snapshot(browser)
element = find(snap, "role=button")
if element:
click_rect(browser, {
"x": element.bbox.x,
"y": element.bbox.y,
"w": element.bbox.width,
"h": element.bbox.height
})wait_for(browser, selector, timeout=5.0, interval=None, use_api=None)- Wait for element to appearexpect(browser, selector)- Assertion helper with fluent API
Examples:
# Wait for element (auto-detects optimal interval based on API usage)
result = wait_for(browser, "role=button text='Submit'", timeout=10.0)
if result.found:
print(f"Found after {result.duration_ms}ms")
# Use local extension with fast polling (0.25s interval)
result = wait_for(browser, "role=button", timeout=5.0, use_api=False)
# Use remote API with network-friendly polling (1.5s interval)
result = wait_for(browser, "role=button", timeout=5.0, use_api=True)
# Custom interval override
result = wait_for(browser, "role=button", timeout=5.0, interval=0.5, use_api=False)
# Semantic wait conditions
wait_for(browser, "clickable=true", timeout=5.0) # Wait for clickable element
wait_for(browser, "importance>100", timeout=5.0) # Wait for important element
wait_for(browser, "role=link visible=true", timeout=5.0) # Wait for visible link
# Assertions
expect(browser, "role=button text='Submit'").to_exist(timeout=5.0)
expect(browser, "role=heading").to_be_visible()
expect(browser, "role=button").to_have_text("Submit")
expect(browser, "role=link").to_have_count(10)read(browser, format="text|markdown|raw")- Extract page contentformat="text"- Plain text extractionformat="markdown"- High-quality markdown conversion (uses markdownify)format="raw"- Cleaned HTML (default)
Example:
from sentience import read
# Get markdown content
result = read(browser, format="markdown")
print(result["content"]) # Markdown text
# Get plain text
result = read(browser, format="text")
print(result["content"]) # Plain textscreenshot(browser, format="png|jpeg", quality=80)- Standalone screenshot capture- Returns base64-encoded data URL
- PNG or JPEG format
- Quality control for JPEG (1-100)
Example:
from sentience import screenshot
import base64
# Capture PNG screenshot
data_url = screenshot(browser, format="png")
# Save to file
image_data = base64.b64decode(data_url.split(",")[1])
with open("screenshot.png", "wb") as f:
f.write(image_data)
# JPEG with quality control (smaller file size)
data_url = screenshot(browser, format="jpeg", quality=85)Elements returned by snapshot() have the following properties:
element.id # Unique identifier for interactions
element.role # ARIA role (button, link, textbox, heading, etc.)
element.text # Visible text content
element.importance # AI importance score (0-1000)
element.bbox # Bounding box (x, y, width, height)
element.visual_cues # Visual analysis (is_primary, is_clickable, background_color)
element.in_viewport # Is element visible in current viewport?
element.is_occluded # Is element covered by other elements?
element.z_index # CSS stacking order| Operator | Description | Example |
|---|---|---|
= |
Exact match | role=button |
!= |
Exclusion | role!=link |
~ |
Substring (case-insensitive) | text~'sign in' |
^= |
Prefix match | text^='Add' |
$= |
Suffix match | text$='Cart' |
>, >= |
Greater than | importance>500 |
<, <= |
Less than | bbox.y<600 |
- Role:
role=button|link|textbox|heading|... - Text:
text,text~,text^=,text$= - Visibility:
clickable=true|false,visible=true|false - Importance:
importance,importance>=N,importance<N - Position:
bbox.x,bbox.y,bbox.width,bbox.height - Layering:
z_index
See the examples/ directory for complete working examples:
hello.py- Extension bridge verificationbasic_agent.py- Basic snapshot and element inspectionquery_demo.py- Query engine demonstrationswait_and_click.py- Waiting for elements and performing actionsread_markdown.py- Content extraction and markdown conversion
# Run all tests
pytest tests/
# Run specific test file
pytest tests/test_snapshot.py
# Run with verbose output
pytest -v tests/Default viewport is 1280x800 pixels. You can customize it using Playwright's API:
with SentienceBrowser(headless=False) as browser:
# Set custom viewport before navigating
browser.page.set_viewport_size({"width": 1920, "height": 1080})
browser.goto("https://example.com")# Headed mode (default in dev, shows browser window)
browser = SentienceBrowser(headless=False)
# Headless mode (default in CI environments)
browser = SentienceBrowser(headless=True)
# Auto-detect based on environment
browser = SentienceBrowser() # headless=True if CI=true, else Falsebrowser.goto("https://example.com", wait_until="domcontentloaded")
time.sleep(1) # Extra buffer for AJAX/animations# Try exact match first
btn = find(snap, "role=button text='Add to Cart'")
# Fallback to fuzzy match
if not btn:
btn = find(snap, "role=button text~='cart'")if element.in_viewport and not element.is_occluded:
click(browser, element.id)result = click(browser, link_id)
if result.url_changed:
browser.page.wait_for_load_state("networkidle")# Fast - no screenshot (only element data)
snap = snapshot(browser)
# Slower - with screenshot (for debugging/verification)
snap = snapshot(browser, screenshot=True)Solution: Build the extension first:
cd sentience-chrome
./build.shSolutions:
- Ensure page is loaded:
browser.page.wait_for_load_state("networkidle") - Use
wait_for():wait_for(browser, "role=button", timeout=10) - Debug elements:
print([el.text for el in snap.elements])
Solutions:
- Check visibility:
element.in_viewport and not element.is_occluded - Scroll to element:
browser.page.evaluate(f"window.sentience_registry[{element.id}].scrollIntoView()")
- 📖 Amazon Shopping Guide - Complete tutorial with real-world example
- 📖 Query DSL Guide - Advanced query patterns and operators
- 📄 API Contract - Snapshot API specification
- 📄 Type Definitions - TypeScript/Python type definitions
📜 License
This SDK is licensed under the Elastic License 2.0 (ELv2).
The Elastic License 2.0 allows you to use, modify, and distribute this SDK for internal, research, and non-competitive purposes. It does not permit offering this SDK or a derivative as a hosted or managed service, nor using it to build a competing product or service.
-
This SDK is a client-side library that communicates with proprietary Sentience services and browser components.
-
The Sentience backend services (including semantic geometry grounding, ranking, visual cues, and trace processing) are not open source and are governed by Sentience’s Terms of Service.
-
Use of this SDK does not grant rights to operate, replicate, or reimplement Sentience’s hosted services.
For commercial usage, hosted offerings, or enterprise deployments, please contact Sentience to obtain a commercial license.
See the full license text in LICENSE.