predicate-runtime-python/sentience/snapshot.py at 94379491a565d574dc2e6ad1d12dff101bccb74c · PredicateSystems/predicate-runtime-python · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
"""
Snapshot functionality - calls window.sentience.snapshot() or server-side API
"""

import json
import os
import time
from typing import Any

import requests

from .browser import SentienceBrowser
from .models import Snapshot, SnapshotOptions

# Maximum payload size for API requests (10MB server limit)
MAX_PAYLOAD_BYTES = 10 * 1024 * 1024


def _save_trace_to_file(raw_elements: list[dict[str, Any]], trace_path: str | None = None) -> None:
    """
    Save raw_elements to a JSON file for benchmarking/training

    Args:
        raw_elements: Raw elements data from snapshot
        trace_path: Path to save trace file. If None, uses "trace_{timestamp}.json"
    """
    # Default filename if none provided
    filename = trace_path or f"trace_{int(time.time())}.json"

    # Ensure directory exists
    directory = os.path.dirname(filename)
    if directory:
        os.makedirs(directory, exist_ok=True)

    # Save the raw elements to JSON
    with open(filename, "w") as f:
        json.dump(raw_elements, f, indent=2)

    print(f"[SDK] Trace saved to: {filename}")


def snapshot(
    browser: SentienceBrowser,
    screenshot: bool | None = None,
    limit: int | None = None,
    filter: dict[str, Any] | None = None,
    use_api: bool | None = None,
    save_trace: bool = False,
    trace_path: str | None = None,
    show_overlay: bool = False,
) -> Snapshot:
    """
    Take a snapshot of the current page

    Args:
        browser: SentienceBrowser instance
        screenshot: Whether to capture screenshot (bool or dict with format/quality)
        limit: Limit number of elements returned
        filter: Filter options (min_area, allowed_roles, min_z_index)
        use_api: Force use of server-side API if True, local extension if False.
                 If None, uses API if api_key is set, otherwise uses local extension.
        save_trace: Whether to save raw_elements to JSON for benchmarking/training
        trace_path: Path to save trace file. If None, uses "trace_{timestamp}.json"
        show_overlay: Show visual overlay highlighting elements in browser

    Returns:
        Snapshot object
    """
    # Build SnapshotOptions from individual parameters
    options = SnapshotOptions(
        screenshot=screenshot if screenshot is not None else False,
        limit=limit if limit is not None else 50,
        filter=filter,
        use_api=use_api,
        save_trace=save_trace,
        trace_path=trace_path,
        show_overlay=show_overlay,
    )

    # Determine if we should use server-side API
    should_use_api = (
        options.use_api if options.use_api is not None else (browser.api_key is not None)
    )

    if should_use_api and browser.api_key:
        # Use server-side API (Pro/Enterprise tier)
        return _snapshot_via_api(browser, options)
    else:
        # Use local extension (Free tier)
        return _snapshot_via_extension(browser, options)


def _snapshot_via_extension(
    browser: SentienceBrowser,
    options: SnapshotOptions,
) -> Snapshot:
    """Take snapshot using local extension (Free tier)"""
    if not browser.page:
        raise RuntimeError("Browser not started. Call browser.start() first.")

    # CRITICAL: Wait for extension injection to complete (CSP-resistant architecture)
    # The new architecture loads injected_api.js asynchronously, so window.sentience
    # may not be immediately available after page load
    try:
        browser.page.wait_for_function(
            "typeof window.sentience !== 'undefined'",
            timeout=5000,  # 5 second timeout
        )
    except Exception as e:
        # Gather diagnostics if wait fails
        try:
            diag = browser.page.evaluate(
                """() => ({
                    sentience_defined: typeof window.sentience !== 'undefined',
                    extension_id: document.documentElement.dataset.sentienceExtensionId || 'not set',
                    url: window.location.href
                })"""
            )
        except Exception:
            diag = {"error": "Could not gather diagnostics"}

        raise RuntimeError(
            f"Sentience extension failed to inject window.sentience API. "
            f"Is the extension loaded? Diagnostics: {diag}"
        ) from e

    # Build options dict for extension API (exclude save_trace/trace_path)
    ext_options: dict[str, Any] = {}
    if options.screenshot is not False:
        ext_options["screenshot"] = options.screenshot
    if options.limit != 50:
        ext_options["limit"] = options.limit
    if options.filter is not None:
        ext_options["filter"] = (
            options.filter.model_dump() if hasattr(options.filter, "model_dump") else options.filter
        )

    # Call extension API
    result = browser.page.evaluate(
        """
        (options) => {
            return window.sentience.snapshot(options);
        }
        """,
        ext_options,
    )

    # Save trace if requested
    if options.save_trace:
        _save_trace_to_file(result.get("raw_elements", []), options.trace_path)

    # Show visual overlay if requested
    if options.show_overlay:
        raw_elements = result.get("raw_elements", [])
        if raw_elements:
            browser.page.evaluate(
                """
                (elements) => {
                    if (window.sentience && window.sentience.showOverlay) {
                        window.sentience.showOverlay(elements, null);
                    }
                }
                """,
                raw_elements,
            )

    # Validate and parse with Pydantic
    snapshot_obj = Snapshot(**result)
    return snapshot_obj


def _snapshot_via_api(
    browser: SentienceBrowser,
    options: SnapshotOptions,
) -> Snapshot:
    """Take snapshot using server-side API (Pro/Enterprise tier)"""
    if not browser.page:
        raise RuntimeError("Browser not started. Call browser.start() first.")

    if not browser.api_key:
        raise ValueError("API key required for server-side processing")

    if not browser.api_url:
        raise ValueError("API URL required for server-side processing")

    # CRITICAL: Wait for extension injection to complete (CSP-resistant architecture)
    # Even for API mode, we need the extension to collect raw data locally
    try:
        browser.page.wait_for_function("typeof window.sentience !== 'undefined'", timeout=5000)
    except Exception as e:
        raise RuntimeError(
            "Sentience extension failed to inject. Cannot collect raw data for API processing."
        ) from e

    # Step 1: Get raw data from local extension (always happens locally)
    raw_options: dict[str, Any] = {}
    if options.screenshot is not False:
        raw_options["screenshot"] = options.screenshot

    raw_result = browser.page.evaluate(
        """
        (options) => {
            return window.sentience.snapshot(options);
        }
        """,
        raw_options,
    )

    # Save trace if requested (save raw data before API processing)
    if options.save_trace:
        _save_trace_to_file(raw_result.get("raw_elements", []), options.trace_path)

    # Step 2: Send to server for smart ranking/filtering
    # Use raw_elements (raw data) instead of elements (processed data)
    # Server validates API key and applies proprietary ranking logic
    payload = {
        "raw_elements": raw_result.get("raw_elements", []),  # Raw data needed for server processing
        "url": raw_result.get("url", ""),
        "viewport": raw_result.get("viewport"),
        "goal": options.goal,  # Optional goal/task description
        "options": {
            "limit": options.limit,
            "filter": options.filter.model_dump() if options.filter else None,
        },
    }

    # Check payload size before sending (server has 10MB limit)
    payload_json = json.dumps(payload)
    payload_size = len(payload_json.encode("utf-8"))
    if payload_size > MAX_PAYLOAD_BYTES:
        raise ValueError(
            f"Payload size ({payload_size / 1024 / 1024:.2f}MB) exceeds server limit "
            f"({MAX_PAYLOAD_BYTES / 1024 / 1024:.0f}MB). "
            f"Try reducing the number of elements on the page or filtering elements."
        )

    headers = {
        "Authorization": f"Bearer {browser.api_key}",
        "Content-Type": "application/json",
    }

    try:
        response = requests.post(
            f"{browser.api_url}/v1/snapshot",
            data=payload_json,  # Reuse already-serialized JSON
            headers=headers,
            timeout=30,
        )
        response.raise_for_status()

        api_result = response.json()

        # Merge API result with local data (screenshot, etc.)
        snapshot_data = {
            "status": api_result.get("status", "success"),
            "timestamp": api_result.get("timestamp"),
            "url": api_result.get("url", raw_result.get("url", "")),
            "viewport": api_result.get("viewport", raw_result.get("viewport")),
            "elements": api_result.get("elements", []),
            "screenshot": raw_result.get("screenshot"),  # Keep local screenshot
            "screenshot_format": raw_result.get("screenshot_format"),
            "error": api_result.get("error"),
        }

        # Show visual overlay if requested (use API-ranked elements)
        if options.show_overlay:
            elements = api_result.get("elements", [])
            if elements:
                browser.page.evaluate(
                    """
                    (elements) => {
                        if (window.sentience && window.sentience.showOverlay) {
                            window.sentience.showOverlay(elements, null);
                        }
                    }
                    """,
                    elements,
                )

        return Snapshot(**snapshot_data)
    except requests.exceptions.RequestException as e:
        raise RuntimeError(f"API request failed: {e}")