predicate-runtime-python/sentience/tracing.py at df748e9cd225d06beacd0f505c69a750d9bd56fb · PredicateSystems/predicate-runtime-python · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
"""
Trace event writer for Sentience agents.

Provides abstract interface and JSONL implementation for emitting trace events.
"""

import time
from abc import ABC, abstractmethod
from dataclasses import dataclass, field
from datetime import datetime
from pathlib import Path
from typing import Any, Optional
from collections.abc import Callable

from .models import TraceStats
from .trace_file_manager import TraceFileManager


@dataclass
class TraceEvent:
    """
    Trace event data structure.

    Represents a single event in the agent execution trace.
    """

    v: int  # Schema version
    type: str  # Event type
    ts: str  # ISO 8601 timestamp
    run_id: str  # UUID for the run
    seq: int  # Sequence number
    data: dict[str, Any]  # Event payload
    step_id: str | None = None  # UUID for the step (if step-scoped)
    ts_ms: int | None = None  # Unix timestamp in milliseconds

    def to_dict(self) -> dict[str, Any]:
        """Convert to dictionary for JSON serialization."""
        result = {
            "v": self.v,
            "type": self.type,
            "ts": self.ts,
            "run_id": self.run_id,
            "seq": self.seq,
            "data": self.data,
        }

        if self.step_id is not None:
            result["step_id"] = self.step_id

        if self.ts_ms is not None:
            result["ts_ms"] = self.ts_ms

        return result


class TraceSink(ABC):
    """
    Abstract interface for trace event sink.

    Implementations can write to files, databases, or remote services.
    """

    @abstractmethod
    def emit(self, event: dict[str, Any]) -> None:
        """
        Emit a trace event.

        Args:
            event: Event dictionary (from TraceEvent.to_dict())
        """
        pass

    @abstractmethod
    def close(self) -> None:
        """Close the sink and flush any buffered data."""
        pass


class JsonlTraceSink(TraceSink):
    """
    JSONL file sink for trace events.

    Writes one JSON object per line to a file.
    """

    def __init__(self, path: str | Path):
        """
        Initialize JSONL sink.

        Args:
            path: File path to write traces to
        """
        self.path = Path(path)
        TraceFileManager.ensure_directory(self.path)

        # Open file in append mode with line buffering
        self._file = open(self.path, "a", encoding="utf-8", buffering=1)

    def emit(self, event: dict[str, Any]) -> None:
        """
        Emit event as JSONL line.

        Args:
            event: Event dictionary
        """
        TraceFileManager.write_event(self._file, event)

    def close(self) -> None:
        """Close the file and generate index."""
        if hasattr(self, "_file") and not self._file.closed:
            self._file.close()

        # Generate index after closing file
        self._generate_index()

    def get_stats(self) -> TraceStats:
        """
        Extract execution statistics from trace file (for local traces).

        Returns:
            TraceStats with execution statistics
        """
        try:
            # Read trace file to extract stats
            events = TraceFileManager.read_events(self.path)
            return TraceFileManager.extract_stats(events)
        except Exception:
            return TraceStats(
                total_steps=0,
                total_events=0,
                duration_ms=None,
                final_status="unknown",
                started_at=None,
                ended_at=None,
            )

    def _generate_index(self) -> None:
        """Generate trace index file (automatic on close)."""
        try:
            from .trace_indexing import write_trace_index

            # Use frontend format to ensure 'step' field is present (1-based)
            # Frontend derives sequence from step.step - 1, so step must be valid
            index_path = Path(self.path).with_suffix(".index.json")
            write_trace_index(str(self.path), str(index_path), frontend_format=True)
        except Exception as e:
            # Non-fatal: log but don't crash
            print(f"⚠️  Failed to generate trace index: {e}")

    def __enter__(self):
        """Context manager support."""
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        """Context manager cleanup."""
        self.close()
        return False


@dataclass
class Tracer:
    """
    Trace event builder and emitter.

    Manages sequence numbers and provides convenient methods for emitting events.
    Tracks execution statistics and final status for trace completion.

    Args:
        run_id: Unique identifier for this trace run
        sink: TraceSink implementation for writing events
        screenshot_processor: Optional function to process screenshots before emission.
                            Takes base64 string, returns processed base64 string.
                            Useful for PII redaction or custom image processing.

    Example:
        >>> from sentience import Tracer, JsonlTraceSink
        >>>
        >>> # Basic usage
        >>> sink = JsonlTraceSink("trace.jsonl")
        >>> tracer = Tracer(run_id="abc123", sink=sink)
        >>>
        >>> # With screenshot processor for PII redaction
        >>> def redact_pii(screenshot_base64: str) -> str:
        ...     # Your custom redaction logic
        ...     return redacted_screenshot
        >>>
        >>> tracer = Tracer(
        ...     run_id="abc123",
        ...     sink=sink,
        ...     screenshot_processor=redact_pii
        ... )
    """

    run_id: str
    sink: TraceSink
    screenshot_processor: Callable[[str], str] | None = None
    seq: int = field(default=0, init=False)
    # Stats tracking
    total_steps: int = field(default=0, init=False)
    total_events: int = field(default=0, init=False)
    started_at: datetime | None = field(default=None, init=False)
    ended_at: datetime | None = field(default=None, init=False)
    final_status: str = field(default="unknown", init=False)
    # Track step outcomes for automatic status inference
    _step_successes: int = field(default=0, init=False)
    _step_failures: int = field(default=0, init=False)
    _has_errors: bool = field(default=False, init=False)

    def emit(
        self,
        event_type: str,
        data: dict[str, Any],
        step_id: str | None = None,
    ) -> None:
        """
        Emit a trace event.

        Args:
            event_type: Type of event (e.g., 'run_start', 'step_end')
            data: Event-specific payload
            step_id: Step UUID (if step-scoped event)
        """
        self.seq += 1
        self.total_events += 1

        # Apply screenshot processor if configured and screenshot is present
        if self.screenshot_processor and "screenshot_base64" in data:
            data = data.copy()  # Don't modify the original dict
            data["screenshot_base64"] = self.screenshot_processor(data["screenshot_base64"])

        # Generate timestamps
        ts_ms = int(time.time() * 1000)
        ts = time.strftime("%Y-%m-%dT%H:%M:%S.000Z", time.gmtime())

        event = TraceEvent(
            v=1,
            type=event_type,
            ts=ts,
            ts_ms=ts_ms,
            run_id=self.run_id,
            seq=self.seq,
            step_id=step_id,
            data=data,
        )

        self.sink.emit(event.to_dict())

        # Track step outcomes for automatic status inference
        if event_type == "step_end":
            success = data.get("success", False)
            if success:
                self._step_successes += 1
            else:
                self._step_failures += 1
        elif event_type == "error":
            self._has_errors = True

    def emit_run_start(
        self,
        agent: str,
        llm_model: str | None = None,
        config: dict[str, Any] | None = None,
    ) -> None:
        """
        Emit run_start event.

        Args:
            agent: Agent name (e.g., 'SentienceAgent')
            llm_model: LLM model name
            config: Agent configuration
        """
        # Track start time
        self.started_at = datetime.utcnow()

        data: dict[str, Any] = {"agent": agent}
        if llm_model is not None:
            data["llm_model"] = llm_model
        if config is not None:
            data["config"] = config

        self.emit("run_start", data)

    def emit_step_start(
        self,
        step_id: str,
        step_index: int,
        goal: str,
        attempt: int = 0,
        pre_url: str | None = None,
    ) -> None:
        """
        Emit step_start event.

        Args:
            step_id: Step UUID
            step_index: Step number (1-indexed)
            goal: Step goal description
            attempt: Attempt number (0-indexed)
            pre_url: URL before step
        """
        # Track step count (only count first attempt of each step)
        if attempt == 0:
            self.total_steps = max(self.total_steps, step_index)

        data = {
            "step_id": step_id,
            "step_index": step_index,
            "goal": goal,
            "attempt": attempt,
        }
        if pre_url is not None:
            data["pre_url"] = pre_url

        self.emit("step_start", data, step_id=step_id)

    def emit_run_end(self, steps: int, status: str | None = None) -> None:
        """
        Emit run_end event.

        Args:
            steps: Total number of steps executed
            status: Optional final status ("success", "failure", "partial", "unknown")
                    If not provided, infers from tracked outcomes or uses self.final_status
        """
        # Track end time
        self.ended_at = datetime.utcnow()

        # Auto-infer status if not provided and not explicitly set
        if status is None and self.final_status == "unknown":
            self._infer_final_status()

        # Use provided status or fallback to self.final_status
        final_status = status if status is not None else self.final_status

        # Ensure total_steps is at least the provided steps value
        self.total_steps = max(self.total_steps, steps)

        self.emit("run_end", {"steps": steps, "status": final_status})

    def emit_error(
        self,
        step_id: str,
        error: str,
        attempt: int = 0,
    ) -> None:
        """
        Emit error event.

        Args:
            step_id: Step UUID
            error: Error message
            attempt: Attempt number when error occurred
        """
        data = {
            "step_id": step_id,
            "error": error,
            "attempt": attempt,
        }
        self.emit("error", data, step_id=step_id)

    def set_final_status(self, status: str) -> None:
        """
        Set the final status of the trace run.

        Args:
            status: Final status ("success", "failure", "partial", "unknown")
        """
        if status not in ("success", "failure", "partial", "unknown"):
            raise ValueError(
                f"Invalid status: {status}. Must be one of: success, failure, partial, unknown"
            )
        self.final_status = status

    def get_stats(self) -> TraceStats:
        """
        Get execution statistics for trace completion.

        Returns:
            TraceStats with execution statistics
        """
        duration_ms: int | None = None
        if self.started_at and self.ended_at:
            delta = self.ended_at - self.started_at
            duration_ms = int(delta.total_seconds() * 1000)

        return TraceStats(
            total_steps=self.total_steps,
            total_events=self.total_events,
            duration_ms=duration_ms,
            final_status=self.final_status,
            started_at=self.started_at.isoformat() + "Z" if self.started_at else None,
            ended_at=self.ended_at.isoformat() + "Z" if self.ended_at else None,
        )

    def _infer_final_status(self) -> None:
        """
        Automatically infer final_status from tracked step outcomes if not explicitly set.

        This is called automatically in close() if final_status is still "unknown".
        """
        if self.final_status != "unknown":
            # Status already set explicitly, don't override
            return

        # Infer from tracked outcomes
        if self._has_errors:
            # Has errors - check if there were successful steps too
            if self._step_successes > 0:
                self.final_status = "partial"
            else:
                self.final_status = "failure"
        elif self._step_successes > 0:
            # Has successful steps and no errors
            self.final_status = "success"
        # Otherwise stays "unknown" (no steps executed or no clear outcome)

    def close(self, **kwargs) -> None:
        """
        Close the underlying sink.

        Args:
            **kwargs: Passed through to sink.close() (e.g., blocking=True for CloudTraceSink)
        """
        # Auto-infer final_status if not explicitly set and we have step outcomes
        if self.final_status == "unknown" and (
            self._step_successes > 0 or self._step_failures > 0 or self._has_errors
        ):
            self._infer_final_status()

        # Check if sink.close() accepts kwargs (CloudTraceSink does, JsonlTraceSink doesn't)
        import inspect

        sig = inspect.signature(self.sink.close)
        if any(
            p.kind in (inspect.Parameter.VAR_KEYWORD, inspect.Parameter.KEYWORD_ONLY)
            for p in sig.parameters.values()
        ):
            self.sink.close(**kwargs)
        else:
            self.sink.close()

    def __enter__(self):
        """Context manager support."""
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        """Context manager cleanup."""
        self.close()
        return False