From a9a69778e65b9a7ed4be6001303260ce05d7b59f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Magnus=20M=C3=BCller?= <magnus@browser-use.com>
Date: Sun, 10 May 2026 15:45:54 +0000
Subject: [PATCH] docs: add self-modifying harness edge benchmark

---
 README.md                      |   5 +
 docs/edge-case-benchmark.html  | 328 +++++++++++++++++++++++++++++++++
 docs/self-modifying-harness.md | 148 +++++++++++++++
 3 files changed, 481 insertions(+)
 create mode 100644 docs/edge-case-benchmark.html
 create mode 100644 docs/self-modifying-harness.md
diff --git a/README.md b/README.md
index ab7f6a5d..399bdcd5 100644
--- a/README.md
+++ b/README.md
@@ -38,6 +38,11 @@ Click Allow when the per-attach popup appears (Chrome 144+):
 
 See [agent-workspace/domain-skills/](agent-workspace/domain-skills/) for example tasks.
 
+For the core self-healing pattern, see
+[docs/self-modifying-harness.md](docs/self-modifying-harness.md). It includes
+four concrete edge cases and a standalone benchmark page for canvas signature,
+file upload, drag-and-drop, and coordinate-click tasks.
+
 ## Free Browser Use Cloud browsers
 
 Stealth, sub-agents, or headless deployment.<br>
diff --git a/docs/edge-case-benchmark.html b/docs/edge-case-benchmark.html
new file mode 100644
index 00000000..3abf7051
--- /dev/null
+++ b/docs/edge-case-benchmark.html
@@ -0,0 +1,328 @@
+<!doctype html>
+<html lang="en">
+<head>
+  <meta charset="utf-8">
+  <meta name="viewport" content="width=device-width, initial-scale=1">
+  <title>Browser Harness Edge-case Benchmark</title>
+  <style>
+    :root {
+      color-scheme: light;
+      --bg: #f7f7fb;
+      --ink: #171923;
+      --muted: #596171;
+      --line: #cfd6e4;
+      --pass: #087f5b;
+      --card: #ffffff;
+      --accent: #5b46f6;
+    }
+    * { box-sizing: border-box; }
+    body {
+      margin: 0;
+      font-family: Inter, ui-sans-serif, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif;
+      background: var(--bg);
+      color: var(--ink);
+    }
+    main {
+      max-width: 1120px;
+      margin: 0 auto;
+      padding: 32px 20px 56px;
+    }
+    header {
+      margin-bottom: 24px;
+    }
+    h1 {
+      margin: 0 0 8px;
+      font-size: 34px;
+      line-height: 1.1;
+      letter-spacing: 0;
+    }
+    p {
+      margin: 0;
+      color: var(--muted);
+      line-height: 1.5;
+    }
+    .grid {
+      display: grid;
+      grid-template-columns: repeat(2, minmax(0, 1fr));
+      gap: 16px;
+    }
+    .panel {
+      min-height: 280px;
+      background: var(--card);
+      border: 1px solid var(--line);
+      border-radius: 8px;
+      padding: 18px;
+      box-shadow: 0 1px 2px rgb(15 23 42 / 0.06);
+    }
+    .panel h2 {
+      margin: 0 0 12px;
+      font-size: 18px;
+    }
+    .status {
+      min-height: 28px;
+      margin-top: 12px;
+      font-weight: 700;
+      color: #8a4b00;
+    }
+    .status[data-pass="true"] {
+      color: var(--pass);
+    }
+    canvas {
+      display: block;
+      width: 100%;
+      height: 150px;
+      border: 2px dashed #8c97ac;
+      border-radius: 6px;
+      background: #fff;
+      touch-action: none;
+    }
+    .upload-row {
+      display: flex;
+      align-items: center;
+      gap: 12px;
+      min-height: 116px;
+    }
+    input[type="file"] {
+      max-width: 100%;
+    }
+    .drop-layout {
+      display: grid;
+      grid-template-columns: 160px 1fr;
+      gap: 18px;
+      align-items: center;
+      min-height: 150px;
+    }
+    #drag-card {
+      width: 150px;
+      height: 96px;
+      display: grid;
+      place-items: center;
+      border-radius: 8px;
+      background: #ebe9ff;
+      border: 2px solid var(--accent);
+      color: #2d218f;
+      font-weight: 800;
+      cursor: grab;
+      user-select: none;
+    }
+    #drop-zone {
+      min-height: 132px;
+      display: grid;
+      place-items: center;
+      border: 2px dashed #74809a;
+      border-radius: 8px;
+      background: #fbfcff;
+      color: var(--muted);
+      font-weight: 700;
+    }
+    #drop-zone.hot {
+      border-color: var(--accent);
+      background: #f1efff;
+    }
+    .coordinate-stage {
+      position: relative;
+      height: 168px;
+      border: 1px solid var(--line);
+      border-radius: 8px;
+      overflow: hidden;
+      background:
+        linear-gradient(90deg, rgb(15 23 42 / 0.06) 1px, transparent 1px),
+        linear-gradient(rgb(15 23 42 / 0.06) 1px, transparent 1px),
+        #ffffff;
+      background-size: 32px 32px;
+    }
+    #coordinate-target {
+      position: absolute;
+      left: 72%;
+      top: 58%;
+      width: 34px;
+      height: 34px;
+      border-radius: 50%;
+      border: 0;
+      background: #ff4d6d;
+      box-shadow: 0 0 0 8px rgb(255 77 109 / 0.16);
+      cursor: crosshair;
+    }
+    #coordinate-target span {
+      position: absolute;
+      width: 1px;
+      height: 1px;
+      overflow: hidden;
+      clip: rect(0 0 0 0);
+    }
+    .summary {
+      margin-top: 16px;
+      padding: 16px 18px;
+      border: 1px solid var(--line);
+      border-radius: 8px;
+      background: #fff;
+      font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, monospace;
+      color: #202938;
+      white-space: pre-wrap;
+    }
+    @media (max-width: 760px) {
+      .grid { grid-template-columns: 1fr; }
+      .drop-layout { grid-template-columns: 1fr; }
+    }
+  </style>
+</head>
+<body>
+  <main>
+    <header>
+      <h1>Browser Harness Edge-case Benchmark</h1>
+      <p>Four browser mechanics that usually force an agent to inspect, patch, retry, and verify.</p>
+    </header>
+
+    <section class="grid" aria-label="Benchmark tasks">
+      <article class="panel" data-task="signature">
+        <h2>1. Canvas signature</h2>
+        <canvas id="signature-pad" width="520" height="150" aria-label="Signature pad"></canvas>
+        <div id="signature-status" class="status">Draw a stroke across the signature pad.</div>
+      </article>
+
+      <article class="panel" data-task="upload">
+        <h2>2. File upload</h2>
+        <div class="upload-row">
+          <input id="upload-input" type="file" aria-label="Upload benchmark file">
+        </div>
+        <div id="upload-status" class="status">Upload any local file.</div>
+      </article>
+
+      <article class="panel" data-task="drag">
+        <h2>3. Drag and drop</h2>
+        <div class="drop-layout">
+          <div id="drag-card" draggable="true">DRAG ME</div>
+          <div id="drop-zone">Drop here</div>
+        </div>
+        <div id="drag-status" class="status">Move the card into the drop zone.</div>
+      </article>
+
+      <article class="panel" data-task="coordinate">
+        <h2>4. Coordinate target</h2>
+        <div class="coordinate-stage" id="coordinate-stage">
+          <button id="coordinate-target" type="button"><span>Hit target</span></button>
+        </div>
+        <div id="coordinate-status" class="status">Click the red target by visible coordinates.</div>
+      </article>
+    </section>
+
+    <section class="summary" id="summary" aria-live="polite"></section>
+  </main>
+
+  <script>
+    const results = {
+      signature: false,
+      upload: false,
+      drag: false,
+      coordinate: false,
+    };
+
+    window.edgeBenchmark = {
+      results,
+      passed() {
+        return Object.values(results).filter(Boolean).length;
+      },
+      complete() {
+        return Object.values(results).every(Boolean);
+      },
+    };
+
+    function setPass(task, detail) {
+      results[task] = true;
+      const status = document.getElementById(`${task}-status`);
+      status.dataset.pass = "true";
+      status.textContent = `PASS: ${detail}`;
+      renderSummary();
+    }
+
+    function renderSummary() {
+      document.getElementById("summary").textContent =
+        `passed=${window.edgeBenchmark.passed()}/4\n` +
+        JSON.stringify(results, null, 2);
+    }
+
+    renderSummary();
+
+    const canvas = document.getElementById("signature-pad");
+    const ctx = canvas.getContext("2d");
+    ctx.lineWidth = 4;
+    ctx.lineCap = "round";
+    ctx.strokeStyle = "#171923";
+    let drawing = false;
+    let last = null;
+    let points = [];
+
+    function canvasPoint(event) {
+      const rect = canvas.getBoundingClientRect();
+      return {
+        x: (event.clientX - rect.left) * (canvas.width / rect.width),
+        y: (event.clientY - rect.top) * (canvas.height / rect.height),
+      };
+    }
+
+    canvas.addEventListener("pointerdown", (event) => {
+      drawing = true;
+      points = [canvasPoint(event)];
+      last = points[0];
+      canvas.setPointerCapture(event.pointerId);
+    });
+
+    canvas.addEventListener("pointermove", (event) => {
+      if (!drawing) return;
+      const next = canvasPoint(event);
+      ctx.beginPath();
+      ctx.moveTo(last.x, last.y);
+      ctx.lineTo(next.x, next.y);
+      ctx.stroke();
+      points.push(next);
+      last = next;
+    });
+
+    canvas.addEventListener("pointerup", () => {
+      drawing = false;
+      const distance = points.slice(1).reduce((total, point, index) => {
+        const prev = points[index];
+        return total + Math.hypot(point.x - prev.x, point.y - prev.y);
+      }, 0);
+      if (points.length >= 4 && distance > 80) {
+        setPass("signature", `${points.length} points, ${Math.round(distance)}px stroke`);
+      }
+    });
+
+    document.getElementById("upload-input").addEventListener("change", (event) => {
+      const file = event.target.files && event.target.files[0];
+      if (file) setPass("upload", file.name);
+    });
+
+    const dragCard = document.getElementById("drag-card");
+    const dropZone = document.getElementById("drop-zone");
+
+    dragCard.addEventListener("dragstart", (event) => {
+      event.dataTransfer.setData("text/plain", "browser-harness-edge-card");
+      event.dataTransfer.effectAllowed = "move";
+    });
+
+    dropZone.addEventListener("dragover", (event) => {
+      event.preventDefault();
+      dropZone.classList.add("hot");
+    });
+
+    dropZone.addEventListener("dragleave", () => {
+      dropZone.classList.remove("hot");
+    });
+
+    dropZone.addEventListener("drop", (event) => {
+      event.preventDefault();
+      dropZone.classList.remove("hot");
+      if (event.dataTransfer.getData("text/plain") === "browser-harness-edge-card") {
+        dropZone.textContent = "Dropped";
+        setPass("drag", "HTML5 DataTransfer accepted");
+      }
+    });
+
+    document.getElementById("coordinate-target").addEventListener("click", () => {
+      setPass("coordinate", "red target clicked");
+    });
+  </script>
+</body>
+</html>
diff --git a/docs/self-modifying-harness.md b/docs/self-modifying-harness.md
new file mode 100644
index 00000000..11ae13b7
--- /dev/null
+++ b/docs/self-modifying-harness.md
@@ -0,0 +1,148 @@
+# Self-modifying browser harness
+
+Browser Harness is intentionally thin: it gives the agent direct Chrome
+DevTools Protocol access plus an editable workspace. When a browser task hits a
+missing mechanic, the agent should add the smallest helper or skill that makes
+the task work, use it, and keep that reusable code in `agent-workspace/`.
+
+That changes how to think about edge cases. A signature pad, a canvas-only UI,
+a custom drag target, or a hidden file input is not a permanent product limit.
+It is a prompt to inspect the page, patch the harness, retry, and save the
+working path.
+
+## The loop
+
+1. Reproduce the blocked interaction in the browser.
+2. Inspect with screenshots first, then DOM and raw CDP only when needed.
+3. Add a focused helper in `agent-workspace/agent_helpers.py` or a durable
+   site note in `agent-workspace/domain-skills/<site>/`.
+4. Run the helper against the real page.
+5. Verify with a screenshot or page-state read.
+6. Keep the helper small enough that the next agent can understand and edit it.
+
+Core helpers stay generic. Site-specific selectors, timing, and private API
+knowledge belong in the agent workspace or domain skills.
+
+## Example: signature or canvas field
+
+Problem: there is no real `<input>` to fill. The site expects pointer events on
+a canvas.
+
+Patch shape:
+
+```python
+def draw_signature_on_canvas(selector, points):
+    box = js(f"""
+    (() => {{
+      const c = document.querySelector({selector!r});
+      const r = c.getBoundingClientRect();
+      return {{x:r.left, y:r.top, w:r.width, h:r.height}};
+    }})()
+    """)
+    for i, (x, y) in enumerate(points):
+        event_type = "mousePressed" if i == 0 else "mouseMoved"
+        cdp("Input.dispatchMouseEvent", type=event_type,
+            x=box["x"] + x, y=box["y"] + y, button="left", clickCount=1)
+    cdp("Input.dispatchMouseEvent", type="mouseReleased",
+        x=box["x"] + points[-1][0], y=box["y"] + points[-1][1],
+        button="left", clickCount=1)
+```
+
+Use screenshot coordinates to choose the visible stroke path, then verify by
+reading the page state or taking another screenshot.
+
+## Example: file upload
+
+Problem: the visible button opens an OS picker, which an agent cannot use
+directly.
+
+Patch shape:
+
+```python
+def upload_visible_or_hidden_file(selector, path):
+    upload_file(selector, path)
+    js(f"""
+    (() => {{
+      const input = document.querySelector({selector!r});
+      input.dispatchEvent(new Event("input", {{bubbles:true}}));
+      input.dispatchEvent(new Event("change", {{bubbles:true}}));
+    }})()
+    """)
+```
+
+Prefer `DOM.setFileInputFiles` through `upload_file()`. If the file input is
+created lazily, first click the visible upload button, wait for the input, then
+set the file.
+
+## Example: drag and drop
+
+Problem: the site uses custom drag events or a drop zone that does not respond
+to a simple click.
+
+Patch shape:
+
+```python
+def drag_center_to_center(source_selector, target_selector):
+    boxes = js(f"""
+    (() => {{
+      const s = document.querySelector({source_selector!r}).getBoundingClientRect();
+      const t = document.querySelector({target_selector!r}).getBoundingClientRect();
+      return {{
+        sx: s.left + s.width / 2, sy: s.top + s.height / 2,
+        tx: t.left + t.width / 2, ty: t.top + t.height / 2
+      }};
+    }})()
+    """)
+    cdp("Input.dispatchMouseEvent", type="mousePressed",
+        x=boxes["sx"], y=boxes["sy"], button="left", clickCount=1)
+    cdp("Input.dispatchMouseEvent", type="mouseMoved",
+        x=boxes["tx"], y=boxes["ty"], button="left")
+    cdp("Input.dispatchMouseEvent", type="mouseReleased",
+        x=boxes["tx"], y=boxes["ty"], button="left", clickCount=1)
+```
+
+If compositor-level movement does not trigger the app, inspect whether the app
+expects HTML5 `DataTransfer` events and add a DOM-specific helper for that
+site.
+
+## Example: coordinate-only target
+
+Problem: a visible control has no stable selector, sits inside a canvas, or is
+inside cross-origin UI where DOM inspection is the wrong tool.
+
+Patch shape:
+
+```python
+def click_visible_point(x, y):
+    click_at_xy(x, y)
+    wait(0.2)
+    capture_screenshot()
+```
+
+Use `capture_screenshot()` to locate the visible target. Keep the coordinate in
+the task script, not in a public domain skill, unless the layout is fixed and
+the skill also records viewport assumptions.
+
+## Local benchmark
+
+`docs/edge-case-benchmark.html` is a standalone page that exercises the four
+mechanics above:
+
+- canvas signature
+- file upload
+- drag and drop
+- coordinate click
+
+Open it with Browser Harness when changing helpers:
+
+```bash
+browser-harness -c '
+new_tab("file:///absolute/path/to/docs/edge-case-benchmark.html")
+wait_for_load()
+print(page_info())
+'
+```
+
+The page exposes `window.edgeBenchmark.results` for quick verification from
+`js(...)`.
+