From b4ce8a14c357d96ff84dcf4025015746bdc1038f Mon Sep 17 00:00:00 2001 From: realkim93 Date: Fri, 22 May 2026 20:03:00 +0900 Subject: [PATCH 1/2] feat: auto-zoom on clicks + exclude OpenScreen's own windows from display capture Cursor telemetry already records click events (interactionType), but the suggestion algorithm only looks at cursor dwell. This wires clicks through to the suggestion pipeline so the "Suggest Zooms from Cursor" button can produce zooms where the user actually clicked, not just where the cursor paused. - Pass interactionType through readCursorTelemetryFile (was stripped on load). - Add detectZoomClickCandidates with 700ms clustering for double/triple clicks. - detectZoomCandidates combines click + dwell with clicks ranked stronger. - TimelineEditor switches to the combined detector. Separately, full-screen recordings include the OpenScreen HUD because the helper passes excludingWindows: []. Allow the renderer to pass its own pid through the request and have the SCK helper exclude any windows owned by that process / bundle identifier from the SCContentFilter. Tests: add zoomSuggestionUtils.test.ts (5 cases). --- electron/ipc/handlers.ts | 4 ++ .../main.swift | 25 +++++++- .../video-editor/timeline/TimelineEditor.tsx | 4 +- .../timeline/zoomSuggestionUtils.test.ts | 61 ++++++++++++++++++ .../timeline/zoomSuggestionUtils.ts | 62 ++++++++++++++++++- src/lib/nativeMacRecording.ts | 4 ++ src/native/contracts.ts | 1 + 7 files changed, 157 insertions(+), 4 deletions(-) create mode 100644 src/components/video-editor/timeline/zoomSuggestionUtils.test.ts diff --git a/electron/ipc/handlers.ts b/electron/ipc/handlers.ts index 009ade60a..5d2815fb5 100644 --- a/electron/ipc/handlers.ts +++ b/electron/ipc/handlers.ts @@ -516,6 +516,7 @@ async function readCursorTelemetryFile(targetVideoPath: string) { timeMs: sample.timeMs, cx: sample.cx, cy: sample.cy, + ...(sample.interactionType ? { interactionType: sample.interactionType } : {}), })), }; } catch (error) { @@ -1686,6 +1687,8 @@ export function registerIpcHandlers( null) : getSelectedDisplay(); const bounds = request.source.bounds ?? sourceDisplay?.bounds ?? getSelectedSourceBounds(); + const excludedApps = + request.source.type === "display" ? [{ processID: process.pid }] : undefined; const config: NativeMacRecordingRequest = { ...request, schemaVersion: 1, @@ -1712,6 +1715,7 @@ export function registerIpcHandlers( `${RECORDING_FILE_PREFIX}${recordingId}${RECORDING_SESSION_SUFFIX}`, ), }, + excludedApps, }; console.info("[native-sck] starting macOS capture", { diff --git a/electron/native/screencapturekit/Sources/OpenScreenScreenCaptureKitHelper/main.swift b/electron/native/screencapturekit/Sources/OpenScreenScreenCaptureKitHelper/main.swift index 14860b03f..4b625ce28 100644 --- a/electron/native/screencapturekit/Sources/OpenScreenScreenCaptureKitHelper/main.swift +++ b/electron/native/screencapturekit/Sources/OpenScreenScreenCaptureKitHelper/main.swift @@ -62,6 +62,11 @@ struct RecordingRequest: Decodable { let manifestPath: String? } + struct ExcludedApp: Decodable { + let bundleIdentifier: String? + let processID: Int32? + } + let schemaVersion: Int? let recordingId: Int? let source: Source @@ -70,6 +75,7 @@ struct RecordingRequest: Decodable { let webcam: Webcam let cursor: Cursor let outputs: Outputs + let excludedApps: [ExcludedApp]? } enum HelperError: Error, CustomStringConvertible { @@ -348,8 +354,25 @@ final class ScreenCaptureRecorder: NSObject, SCStreamOutput, SCStreamDelegate { } let width = Int(CGDisplayPixelsWide(display.displayID)) let height = Int(CGDisplayPixelsHigh(display.displayID)) + let requestedExclusions = request.excludedApps ?? [] + let excludedBundleIdentifiers = Set( + requestedExclusions.compactMap { $0.bundleIdentifier } + ) + let excludedProcessIDs = Set( + requestedExclusions.compactMap { $0.processID } + ) + let excludedWindows = content.windows.filter { window in + guard let owner = window.owningApplication else { return false } + if excludedBundleIdentifiers.contains(owner.bundleIdentifier) { + return true + } + if excludedProcessIDs.contains(owner.processID) { + return true + } + return false + } return CaptureTarget( - filter: SCContentFilter(display: display, excludingWindows: []), + filter: SCContentFilter(display: display, excludingWindows: excludedWindows), width: clampCaptureDimension(width, fallback: request.video.width), height: clampCaptureDimension(height, fallback: request.video.height) ) diff --git a/src/components/video-editor/timeline/TimelineEditor.tsx b/src/components/video-editor/timeline/TimelineEditor.tsx index 759fcbbed..553c34901 100644 --- a/src/components/video-editor/timeline/TimelineEditor.tsx +++ b/src/components/video-editor/timeline/TimelineEditor.tsx @@ -38,7 +38,7 @@ import Item from "./Item"; import KeyframeMarkers from "./KeyframeMarkers"; import Row from "./Row"; import TimelineWrapper from "./TimelineWrapper"; -import { detectZoomDwellCandidates, normalizeCursorTelemetry } from "./zoomSuggestionUtils"; +import { detectZoomCandidates, normalizeCursorTelemetry } from "./zoomSuggestionUtils"; const ZOOM_ROW_ID = "row-zoom"; const TRIM_ROW_ID = "row-trim"; @@ -1157,7 +1157,7 @@ export default function TimelineEditor({ return; } - const dwellCandidates = detectZoomDwellCandidates(normalizedSamples); + const dwellCandidates = detectZoomCandidates(normalizedSamples); if (dwellCandidates.length === 0) { toast.info(t("errors.noDwellMoments"), { diff --git a/src/components/video-editor/timeline/zoomSuggestionUtils.test.ts b/src/components/video-editor/timeline/zoomSuggestionUtils.test.ts new file mode 100644 index 000000000..18fb44b82 --- /dev/null +++ b/src/components/video-editor/timeline/zoomSuggestionUtils.test.ts @@ -0,0 +1,61 @@ +import { describe, expect, it } from "vitest"; +import type { CursorTelemetryPoint } from "../types"; +import { detectZoomCandidates, detectZoomClickCandidates } from "./zoomSuggestionUtils"; + +describe("detectZoomClickCandidates", () => { + it("returns no candidates when there are no click samples", () => { + const samples: CursorTelemetryPoint[] = [ + { timeMs: 0, cx: 0.1, cy: 0.1, interactionType: "move" }, + { timeMs: 100, cx: 0.2, cy: 0.2, interactionType: "move" }, + ]; + expect(detectZoomClickCandidates(samples)).toEqual([]); + }); + + it("creates one candidate per isolated click", () => { + const samples: CursorTelemetryPoint[] = [ + { timeMs: 1000, cx: 0.3, cy: 0.4, interactionType: "click" }, + { timeMs: 5000, cx: 0.7, cy: 0.8, interactionType: "click" }, + ]; + const candidates = detectZoomClickCandidates(samples); + expect(candidates).toHaveLength(2); + expect(candidates[0].focus).toEqual({ cx: 0.3, cy: 0.4 }); + expect(candidates[1].focus).toEqual({ cx: 0.7, cy: 0.8 }); + expect(candidates[0].source).toBe("click"); + }); + + it("clusters rapid successive clicks (double-click) into a single candidate", () => { + const samples: CursorTelemetryPoint[] = [ + { timeMs: 1000, cx: 0.5, cy: 0.5, interactionType: "click" }, + { timeMs: 1200, cx: 0.5, cy: 0.5, interactionType: "click" }, + { timeMs: 1400, cx: 0.5, cy: 0.5, interactionType: "click" }, + ]; + const candidates = detectZoomClickCandidates(samples); + expect(candidates).toHaveLength(1); + expect(candidates[0].centerTimeMs).toBe(1200); + }); + + it("treats double-click and right-click as click interactions", () => { + const samples: CursorTelemetryPoint[] = [ + { timeMs: 1000, cx: 0.2, cy: 0.2, interactionType: "double-click" }, + { timeMs: 5000, cx: 0.8, cy: 0.8, interactionType: "right-click" }, + ]; + expect(detectZoomClickCandidates(samples)).toHaveLength(2); + }); +}); + +describe("detectZoomCandidates", () => { + it("returns click candidates ahead of dwell candidates", () => { + const samples: CursorTelemetryPoint[] = [ + { timeMs: 0, cx: 0.1, cy: 0.1, interactionType: "move" }, + { timeMs: 500, cx: 0.1, cy: 0.1, interactionType: "move" }, + { timeMs: 1000, cx: 0.1, cy: 0.1, interactionType: "move" }, + { timeMs: 2000, cx: 0.9, cy: 0.9, interactionType: "click" }, + ]; + const candidates = detectZoomCandidates(samples); + const clickIndex = candidates.findIndex((c) => c.source === "click"); + const dwellIndex = candidates.findIndex((c) => c.source === "dwell"); + expect(clickIndex).toBeGreaterThanOrEqual(0); + expect(dwellIndex).toBeGreaterThanOrEqual(0); + expect(clickIndex).toBeLessThan(dwellIndex); + }); +}); diff --git a/src/components/video-editor/timeline/zoomSuggestionUtils.ts b/src/components/video-editor/timeline/zoomSuggestionUtils.ts index 9f807d32c..0aa58fda1 100644 --- a/src/components/video-editor/timeline/zoomSuggestionUtils.ts +++ b/src/components/video-editor/timeline/zoomSuggestionUtils.ts @@ -4,10 +4,15 @@ export const MIN_DWELL_DURATION_MS = 450; export const MAX_DWELL_DURATION_MS = 2600; export const DWELL_MOVE_THRESHOLD = 0.02; +export const CLICK_CLUSTER_WINDOW_MS = 700; +export const CLICK_STRENGTH_BASE_MS = 3000; +export const CLICK_STRENGTH_PER_EVENT_MS = 600; + export interface ZoomDwellCandidate { centerTimeMs: number; focus: ZoomFocus; strength: number; + source?: "dwell" | "click"; } function normalizeTelemetrySample( @@ -77,5 +82,60 @@ export function detectZoomDwellCandidates(samples: CursorTelemetryPoint[]): Zoom } pushRunIfDwell(runStart, samples.length); - return dwellCandidates; + return dwellCandidates.map((candidate) => ({ ...candidate, source: "dwell" as const })); +} + +const CLICK_INTERACTIONS = new Set(["click", "double-click", "right-click", "middle-click"]); + +export function detectZoomClickCandidates(samples: CursorTelemetryPoint[]): ZoomDwellCandidate[] { + if (samples.length === 0) { + return []; + } + + const clickSamples = samples.filter( + (sample) => sample.interactionType && CLICK_INTERACTIONS.has(sample.interactionType), + ); + + if (clickSamples.length === 0) { + return []; + } + + const clusters: CursorTelemetryPoint[][] = []; + let currentCluster: CursorTelemetryPoint[] = []; + + for (const click of clickSamples) { + if (currentCluster.length === 0) { + currentCluster.push(click); + continue; + } + const lastClick = currentCluster[currentCluster.length - 1]; + if (click.timeMs - lastClick.timeMs <= CLICK_CLUSTER_WINDOW_MS) { + currentCluster.push(click); + } else { + clusters.push(currentCluster); + currentCluster = [click]; + } + } + if (currentCluster.length > 0) { + clusters.push(currentCluster); + } + + return clusters.map((cluster) => { + const centerTimeMs = Math.round(cluster.reduce((sum, c) => sum + c.timeMs, 0) / cluster.length); + const avgCx = cluster.reduce((sum, c) => sum + c.cx, 0) / cluster.length; + const avgCy = cluster.reduce((sum, c) => sum + c.cy, 0) / cluster.length; + const strength = CLICK_STRENGTH_BASE_MS + cluster.length * CLICK_STRENGTH_PER_EVENT_MS; + return { + centerTimeMs, + focus: { cx: avgCx, cy: avgCy }, + strength, + source: "click" as const, + }; + }); +} + +export function detectZoomCandidates(samples: CursorTelemetryPoint[]): ZoomDwellCandidate[] { + const clickCandidates = detectZoomClickCandidates(samples); + const dwellCandidates = detectZoomDwellCandidates(samples); + return [...clickCandidates, ...dwellCandidates]; } diff --git a/src/lib/nativeMacRecording.ts b/src/lib/nativeMacRecording.ts index 4202132f9..e7149f9d2 100644 --- a/src/lib/nativeMacRecording.ts +++ b/src/lib/nativeMacRecording.ts @@ -46,6 +46,10 @@ export type NativeMacRecordingRequest = { screenPath: string; manifestPath?: string; }; + excludedApps?: Array<{ + bundleIdentifier?: string; + processID?: number; + }>; }; export type NativeMacHelperReadyEvent = { diff --git a/src/native/contracts.ts b/src/native/contracts.ts index 6836095ac..8282c005a 100644 --- a/src/native/contracts.ts +++ b/src/native/contracts.ts @@ -25,6 +25,7 @@ export interface CursorTelemetryPoint { timeMs: number; cx: number; cy: number; + interactionType?: "move" | "click" | "double-click" | "right-click" | "middle-click" | "mouseup"; } export interface CursorRecordingSample extends CursorTelemetryPoint { From 4e4a8abb9daab8b906652cd6adacf4ea26909c91 Mon Sep 17 00:00:00 2001 From: realkim93 Date: Fri, 22 May 2026 20:19:42 +0900 Subject: [PATCH 2/2] fix: preserve interactionType through normalization pipeline Codex flagged that normalizeCursorTelemetry was stripping interactionType in normalizeTelemetrySample, so the click-based detector never saw any clicks in the real UI flow (TimelineEditor always normalizes before detecting). CodeRabbit additionally pointed out that normalizeCursorSample in handlers.ts was coercing anything outside "click" | "mouseup" | "move" down to "move", so "double-click", "right-click", and "middle-click" were being lost upstream before they could ever reach the renderer. - Pass interactionType through normalizeTelemetrySample. - Widen normalizeCursorSample's allow-list to all five click variants. - Widen CursorRecordingSample.interactionType to the same union. - Add a normalize -> detect integration test so a future stripping regression fails fast. --- electron/ipc/handlers.ts | 3 +++ .../timeline/zoomSuggestionUtils.test.ts | 17 ++++++++++++++++- .../timeline/zoomSuggestionUtils.ts | 1 + src/native/contracts.ts | 2 +- 4 files changed, 21 insertions(+), 2 deletions(-) diff --git a/electron/ipc/handlers.ts b/electron/ipc/handlers.ts index 5d2815fb5..2738dad67 100644 --- a/electron/ipc/handlers.ts +++ b/electron/ipc/handlers.ts @@ -404,6 +404,9 @@ function normalizeCursorSample(sample: unknown): CursorRecordingSample | null { const point = sample as Partial; const interactionType = point.interactionType === "click" || + point.interactionType === "double-click" || + point.interactionType === "right-click" || + point.interactionType === "middle-click" || point.interactionType === "mouseup" || point.interactionType === "move" ? point.interactionType diff --git a/src/components/video-editor/timeline/zoomSuggestionUtils.test.ts b/src/components/video-editor/timeline/zoomSuggestionUtils.test.ts index 18fb44b82..429f114c2 100644 --- a/src/components/video-editor/timeline/zoomSuggestionUtils.test.ts +++ b/src/components/video-editor/timeline/zoomSuggestionUtils.test.ts @@ -1,6 +1,10 @@ import { describe, expect, it } from "vitest"; import type { CursorTelemetryPoint } from "../types"; -import { detectZoomCandidates, detectZoomClickCandidates } from "./zoomSuggestionUtils"; +import { + detectZoomCandidates, + detectZoomClickCandidates, + normalizeCursorTelemetry, +} from "./zoomSuggestionUtils"; describe("detectZoomClickCandidates", () => { it("returns no candidates when there are no click samples", () => { @@ -44,6 +48,17 @@ describe("detectZoomClickCandidates", () => { }); describe("detectZoomCandidates", () => { + it("preserves click interactions through normalizeCursorTelemetry", () => { + const raw: CursorTelemetryPoint[] = [ + { timeMs: 100, cx: 0.4, cy: 0.4, interactionType: "click" }, + { timeMs: 700, cx: 0.4, cy: 0.4, interactionType: "move" }, + ]; + const normalized = normalizeCursorTelemetry(raw, 2000); + expect(normalized[0].interactionType).toBe("click"); + const candidates = detectZoomCandidates(normalized); + expect(candidates.some((c) => c.source === "click")).toBe(true); + }); + it("returns click candidates ahead of dwell candidates", () => { const samples: CursorTelemetryPoint[] = [ { timeMs: 0, cx: 0.1, cy: 0.1, interactionType: "move" }, diff --git a/src/components/video-editor/timeline/zoomSuggestionUtils.ts b/src/components/video-editor/timeline/zoomSuggestionUtils.ts index 0aa58fda1..3d7e5e8dd 100644 --- a/src/components/video-editor/timeline/zoomSuggestionUtils.ts +++ b/src/components/video-editor/timeline/zoomSuggestionUtils.ts @@ -23,6 +23,7 @@ function normalizeTelemetrySample( timeMs: Math.max(0, Math.min(sample.timeMs, totalMs)), cx: Math.max(0, Math.min(sample.cx, 1)), cy: Math.max(0, Math.min(sample.cy, 1)), + ...(sample.interactionType ? { interactionType: sample.interactionType } : {}), }; } diff --git a/src/native/contracts.ts b/src/native/contracts.ts index 8282c005a..94ce2cd56 100644 --- a/src/native/contracts.ts +++ b/src/native/contracts.ts @@ -32,7 +32,7 @@ export interface CursorRecordingSample extends CursorTelemetryPoint { assetId?: string | null; visible?: boolean; cursorType?: NativeCursorType | null; - interactionType?: "move" | "click" | "mouseup"; + interactionType?: "move" | "click" | "double-click" | "right-click" | "middle-click" | "mouseup"; } export interface NativeCursorAsset {