pinchtab · luigiagent · Apr 12, 2026 · Apr 12, 2026 · Apr 17, 2026 · Apr 17, 2026
diff --git a/.gitignore b/.gitignore
@@ -1,5 +1,8 @@
-# Binary (root only, not cmd/semantic/)
+# Binary
 /semantic
+/semantic-bench
+tests/benchmark/semantic
+tests/e2e/semantic
 *.exe
 
 # Test
@@ -19,4 +22,5 @@ cover.out
 .claude
 tests/e2e/results/*.txt
 tests/benchmark/results/*.json
-tests/benchmark/results/*.md
+tests/benchmark/results/*.md
+tests/benchmark/baselines/*.json
diff --git a/README.md b/README.md
@@ -103,6 +103,7 @@ Implementations are internal — consumers use the `ElementMatcher` interface an
 ## Features
 
 - **Synonym expansion** — 54 UI synonym groups ("sign in" ↔ "log in", "cart" ↔ "basket", "preferences" ↔ "settings", etc.)
+- **Visual position hints** — Understand layout cues like `top`, `bottom`, `left`, `right`, and `above`/`below` anchors
 - **Confidence calibration** — Scores mapped to high (≥ 0.8) / medium (≥ 0.6) / low labels
 - **Error classification** — Classify browser errors (CDP, chromedp) as recoverable or not
 - **Self-healing recovery** — Re-locate stale elements after DOM changes via callback interfaces
@@ -184,6 +185,11 @@ semantic find "login" --snapshot page.json --format json    # machine-readable
 semantic find "login" --snapshot page.json --format table   # human-readable
 semantic find "login" --snapshot page.json --format refs    # just refs
 
+# Visual position hints
+semantic find "button in top right corner" --snapshot page.json
+semantic find "link below the search box" --snapshot page.json
+semantic find "sidebar on the left" --snapshot page.json
+
 # Score a specific element
 semantic match "login" e4 --snapshot page.json
 
@@ -198,7 +204,7 @@ The library uses only the Go standard library. No external dependencies, no mode
 
 ## Design Trade-offs
 
-See [docs/DESIGN.md](docs/DESIGN.md) for detailed discussion of architectural decisions: hashing vs real embeddings, fixed synonym table vs learned, Jaccard vs TF-IDF, and recovery callbacks vs direct integration.
+See [docs/architecture/design-decisions.md](docs/architecture/design-decisions.md) for detailed discussion of architectural decisions: hashing vs real embeddings, fixed synonym table vs learned, Jaccard vs TF-IDF, and recovery callbacks vs direct integration.
 
 ## Origin
 

diff --git a/cmd/semantic-bench/main.go b/cmd/semantic-bench/main.go
@@ -0,0 +1,168 @@
+package main
+
+import (
+	"fmt"
+	"os"
+
+	"github.com/pinchtab/semantic/internal/benchmark"
+)
+
+const usage = `semantic-bench - Benchmark runner for semantic matching
+
+Usage:
+  semantic-bench <command> [flags]
+
+Commands:
+  check       Run benchmark and compare against baseline (default)
+  run         Run benchmark suites
+  compare     Compare two reports
+  lint        Validate dataset
+  catalog     Print dataset inventory
+  baseline    Manage quality baselines (create, update)
+  calibrate   Find optimal thresholds via precision/recall analysis
+  tune        Grid-search lexical/embedding weights
+  runtime     Check Go benchmark performance against baseline
+
+Flags:
+  -h, --help    Show help
+
+Run 'semantic-bench <command> --help' for command-specific help.
+`
+
+func main() {
+	if len(os.Args) < 2 {
+		runCheck(os.Args[1:])
+		return
+	}
+
+	cmd := os.Args[1]
+	args := os.Args[2:]
+
+	switch cmd {
+	case "check":
+		runCheck(args)
+	case "run":
+		runRun(args)
+	case "compare":
+		runCompare(args)
+	case "lint":
+		runLint(args)
+	case "catalog":
+		runCatalog(args)
+	case "baseline":
+		runBaseline(args)
+	case "calibrate":
+		runCalibrate(args)
+	case "tune":
+		runTune(args)
+	case "runtime":
+		runRuntime(args)
+	case "-h", "--help", "help":
+		fmt.Print(usage)
+	default:
+		fmt.Fprintf(os.Stderr, "unknown command: %s\n\n%s", cmd, usage)
+		os.Exit(2)
+	}
+}
+
+func runCheck(args []string) {
+	cfg := benchmark.ParseCheckFlags(args)
+	result, err := benchmark.RunCheck(cfg)
+	if err != nil {
+		fmt.Fprintf(os.Stderr, "error: %v\n", err)
+		os.Exit(2)
+	}
+	benchmark.PrintCheckResult(result, cfg)
+	if result.Status == "fail" {
+		os.Exit(1)
+	}
+}
+
+func runRun(args []string) {
+	cfg := benchmark.ParseRunFlags(args)
+	result, err := benchmark.RunBenchmark(cfg)
+	if err != nil {
+		fmt.Fprintf(os.Stderr, "error: %v\n", err)
+		os.Exit(2)
+	}
+	benchmark.PrintRunResult(result, cfg)
+}
+
+func runCompare(args []string) {
+	cfg := benchmark.ParseCompareFlags(args)
+	result, err := benchmark.RunCompare(cfg)
+	if err != nil {
+		fmt.Fprintf(os.Stderr, "error: %v\n", err)
+		os.Exit(2)
+	}
+	benchmark.PrintCompareResult(result, cfg)
+	if result.Status == "fail" {
+		os.Exit(1)
+	}
+}
+
+func runLint(args []string) {
+	cfg := benchmark.ParseLintFlags(args)
+	result, err := benchmark.RunLint(cfg)
+	if err != nil {
+		fmt.Fprintf(os.Stderr, "error: %v\n", err)
+		os.Exit(2)
+	}
+	benchmark.PrintLintResult(result, cfg)
+	if result.Errors > 0 {
+		os.Exit(1)
+	}
+}
+
+func runCatalog(args []string) {
+	cfg := benchmark.ParseCatalogFlags(args)
+	result, err := benchmark.RunCatalog(cfg)
+	if err != nil {
+		fmt.Fprintf(os.Stderr, "error: %v\n", err)
+		os.Exit(2)
+	}
+	benchmark.PrintCatalogResult(result, cfg)
+}
+
+func runBaseline(args []string) {
+	cfg := benchmark.ParseBaselineFlags(args)
+	result, err := benchmark.RunBaseline(cfg)
+	if err != nil {
+		fmt.Fprintf(os.Stderr, "error: %v\n", err)
+		os.Exit(2)
+	}
+	benchmark.PrintBaselineResult(result, cfg)
+}
+
+func runCalibrate(args []string) {
+	cfg := benchmark.ParseCalibrateFlags(args)
+	result, err := benchmark.RunCalibrate(cfg)
+	if err != nil {
+		fmt.Fprintf(os.Stderr, "error: %v\n", err)
+		os.Exit(2)
+	}
+	benchmark.PrintCalibrateResult(result, cfg)
+}
+
+func runTune(args []string) {
+	cfg := benchmark.ParseTuneFlags(args)
+	result, err := benchmark.RunTune(cfg)
+	if err != nil {
+		fmt.Fprintf(os.Stderr, "error: %v\n", err)
+		os.Exit(2)
+	}
+	benchmark.PrintTuneResult(result, cfg)
+}
+
+func runRuntime(args []string) {
+	cfg := benchmark.ParseRuntimeFlags(args)
+	result, err := benchmark.RunRuntime(cfg)
+	if err != nil {
+		fmt.Fprintf(os.Stderr, "error: %v\n", err)
+		os.Exit(2)
+	}
+	benchmark.PrintRuntimeResult(result, cfg)
+	if result.Status == "fail" && cfg.FailOnRegression {
+		os.Exit(1)
+	}
+}
diff --git a/cmd/semantic/main.go b/cmd/semantic/main.go
@@ -56,16 +56,24 @@ Flags (find/match):
   --threshold <n>     Minimum score (default: 0.3)
   --top-k <n>         Max results (default: 3)
   --strategy <name>   lexical, embedding, or combined (default: combined)
+  --lexical-weight <n>   Combined strategy lexical weight override
+  --embedding-weight <n> Combined strategy embedding weight override
   --format <fmt>      json, table, or refs (default: table)
 `)
 }
 
 // snapshotElement is the JSON shape from pinchtab's /snapshot endpoint.
 type snapshotPositional struct {
-	Depth        int    `json:"depth"`
-	SiblingIndex int    `json:"sibling_index"`
-	SiblingCount int    `json:"sibling_count"`
-	LabelledBy   string `json:"labelled_by"`
+	Depth        int     `json:"depth"`
+	SiblingIndex int     `json:"sibling_index"`
+	SiblingCount int     `json:"sibling_count"`
+	LabelledBy   string  `json:"labelled_by"`
+	X            float64 `json:"x"`
+	Y            float64 `json:"y"`
+	Top          float64 `json:"top"`
+	Left         float64 `json:"left"`
+	Width        float64 `json:"width"`
+	Height       float64 `json:"height"`
 }
 
 type snapshotElement struct {
@@ -80,6 +88,12 @@ type snapshotElement struct {
 	SiblingIdx  int                 `json:"sibling_index"`
 	SiblingCnt  int                 `json:"sibling_count"`
 	LabelledBy  string              `json:"labelled_by"`
+	X           float64             `json:"x"`
+	Y           float64             `json:"y"`
+	Top         float64             `json:"top"`
+	Left        float64             `json:"left"`
+	Width       float64             `json:"width"`
+	Height      float64             `json:"height"`
 	Positional  *snapshotPositional `json:"positional"`
 }
 
@@ -112,6 +126,16 @@ func loadSnapshot(path string) ([]semantic.ElementDescriptor, error) {
 		depth := e.Depth
 		siblingIdx := e.SiblingIdx
 		siblingCnt := e.SiblingCnt
+		x := e.X
+		y := e.Y
+		if x == 0 && e.Left != 0 {
+			x = e.Left
+		}
+		if y == 0 && e.Top != 0 {
+			y = e.Top
+		}
+		width := e.Width
+		height := e.Height
 		if e.Positional != nil {
 			if e.Positional.Depth != 0 {
 				depth = e.Positional.Depth
@@ -125,6 +149,23 @@ func loadSnapshot(path string) ([]semantic.ElementDescriptor, error) {
 			if e.Positional.LabelledBy != "" {
 				labelledBy = e.Positional.LabelledBy
 			}
+
+			hasHorizontal := e.Positional.X != 0 || e.Positional.Left != 0 || e.Positional.Width > 0
+			hasVertical := e.Positional.Y != 0 || e.Positional.Top != 0 || e.Positional.Height > 0
+			if hasHorizontal {
+				x = e.Positional.X
+				if x == 0 && e.Positional.Left != 0 {
+					x = e.Positional.Left
+				}
+				width = e.Positional.Width
+			}
+			if hasVertical {
+				y = e.Positional.Y
+				if y == 0 && e.Positional.Top != 0 {
+					y = e.Positional.Top
+				}
+				height = e.Positional.Height
+			}
 		}
 
 		descs[i] = semantic.ElementDescriptor{
@@ -140,6 +181,10 @@ func loadSnapshot(path string) ([]semantic.ElementDescriptor, error) {
 				SiblingIndex: siblingIdx,
 				SiblingCount: siblingCnt,
 				LabelledBy:   labelledBy,
+				X:            x,
+				Y:            y,
+				Width:        width,
+				Height:       height,
 			},
 		}
 	}
@@ -166,6 +211,8 @@ func runFind(args []string) {
 	threshold := fs.Float64("threshold", 0.3, "minimum score")
 	topK := fs.Int("top-k", 3, "max results")
 	strategy := fs.String("strategy", "combined", "matching strategy")
+	lexicalWeight := fs.Float64("lexical-weight", 0, "combined strategy lexical weight override")
+	embeddingWeight := fs.Float64("embedding-weight", 0, "combined strategy embedding weight override")
 	format := fs.String("format", "table", "output format: json, table, refs")
 	_ = fs.Parse(args)
 
@@ -183,8 +230,10 @@ func runFind(args []string) {
 
 	matcher := newMatcher(*strategy)
 	result, err := matcher.Find(context.Background(), query, elements, semantic.FindOptions{
-		Threshold: *threshold,
-		TopK:      *topK,
+		Threshold:       *threshold,
+		TopK:            *topK,
+		LexicalWeight:   *lexicalWeight,
+		EmbeddingWeight: *embeddingWeight,
 	})
 	if err != nil {
 		fmt.Fprintf(os.Stderr, "error: %v\n", err)

diff --git a/cmd/semantic/main_test.go b/cmd/semantic/main_test.go
@@ -12,8 +12,8 @@ func TestLoadSnapshot_PropagatesInteractiveFlag(t *testing.T) {
 	}
 
 	json := `[
-		{"ref":"e1","role":"button","name":"Submit","interactive":true,"parent":"Login form","section":"Authentication","depth":3,"sibling_index":1,"sibling_count":2,"labelled_by":"Primary Action"},
-		{"ref":"e2","role":"text","name":"Submit","interactive":false,"parent":"Payment form","section":"Checkout","positional":{"depth":2,"sibling_index":0,"sibling_count":1,"labelled_by":"Secondary Action"}}
+		{"ref":"e1","role":"button","name":"Submit","interactive":true,"parent":"Login form","section":"Authentication","depth":3,"sibling_index":1,"sibling_count":2,"labelled_by":"Primary Action","x":20,"y":40,"width":120,"height":30},
+		{"ref":"e2","role":"text","name":"Submit","interactive":false,"parent":"Payment form","section":"Checkout","positional":{"depth":2,"sibling_index":0,"sibling_count":1,"labelled_by":"Secondary Action","left":300,"top":640,"width":200,"height":44}}
 	]`
 	if _, err := f.WriteString(json); err != nil {
 		t.Fatalf("WriteString failed: %v", err)
@@ -50,6 +50,12 @@ func TestLoadSnapshot_PropagatesInteractiveFlag(t *testing.T) {
 	if descs[0].Positional.LabelledBy != "Primary Action" {
 		t.Fatalf("expected first descriptor labelled_by=Primary Action, got %q", descs[0].Positional.LabelledBy)
 	}
+	if descs[0].Positional.X != 20 || descs[0].Positional.Y != 40 {
+		t.Fatalf("expected first descriptor x/y=20/40, got %f/%f", descs[0].Positional.X, descs[0].Positional.Y)
+	}
+	if descs[0].Positional.Width != 120 || descs[0].Positional.Height != 30 {
+		t.Fatalf("expected first descriptor width/height=120/30, got %f/%f", descs[0].Positional.Width, descs[0].Positional.Height)
+	}
 	if descs[1].Interactive {
 		t.Fatalf("expected second descriptor interactive=false")
 	}
@@ -71,4 +77,10 @@ func TestLoadSnapshot_PropagatesInteractiveFlag(t *testing.T) {
 	if descs[1].Positional.LabelledBy != "Secondary Action" {
 		t.Fatalf("expected second descriptor labelled_by=Secondary Action, got %q", descs[1].Positional.LabelledBy)
 	}
+	if descs[1].Positional.X != 300 || descs[1].Positional.Y != 640 {
+		t.Fatalf("expected second descriptor x/y=300/640, got %f/%f", descs[1].Positional.X, descs[1].Positional.Y)
+	}
+	if descs[1].Positional.Width != 200 || descs[1].Positional.Height != 44 {
+		t.Fatalf("expected second descriptor width/height=200/44, got %f/%f", descs[1].Positional.Width, descs[1].Positional.Height)
+	}
 }