From 32ed4b5c9c876136a743f57d36ec00ca101bba74 Mon Sep 17 00:00:00 2001 From: Tanmay Sardesai Date: Tue, 27 Jan 2026 23:51:16 -0800 Subject: [PATCH 01/13] feat: add 1280x800 viewport support and update Yutori templates default - Add 1280x800@60 viewport option to browser create/update commands - Update Yutori computer-use templates (TypeScript & Python) to use 1280x800 as default viewport - Update documentation and help text to reflect new viewport option --- cmd/browsers.go | 5 +++-- cmd/browsers_test.go | 2 +- pkg/templates/python/yutori-computer-use/README.md | 4 ++-- pkg/templates/python/yutori-computer-use/session.py | 2 +- pkg/templates/python/yutori-computer-use/tools/computer.py | 2 +- .../python/yutori-computer-use/tools/playwright_computer.py | 2 +- pkg/templates/typescript/yutori-computer-use/README.md | 4 ++-- pkg/templates/typescript/yutori-computer-use/loop.ts | 5 ++--- pkg/templates/typescript/yutori-computer-use/session.ts | 2 +- .../typescript/yutori-computer-use/tools/computer.ts | 2 +- 10 files changed, 15 insertions(+), 15 deletions(-) diff --git a/cmd/browsers.go b/cmd/browsers.go index 7555d917..904de0f6 100644 --- a/cmd/browsers.go +++ b/cmd/browsers.go @@ -126,6 +126,7 @@ func getAvailableViewports() []string { "1440x900@25", "1024x768@60", "1200x800@60", + "1280x800@60", } } @@ -2069,7 +2070,7 @@ func init() { browsersUpdateCmd.Flags().String("profile-id", "", "Profile ID to load into the browser session (mutually exclusive with --profile-name)") browsersUpdateCmd.Flags().String("profile-name", "", "Profile name to load into the browser session (mutually exclusive with --profile-id)") browsersUpdateCmd.Flags().Bool("save-changes", false, "If set, save changes back to the profile when the session ends") - browsersUpdateCmd.Flags().String("viewport", "", "Browser viewport size (e.g., 1920x1080@25). Supported: 2560x1440@10, 1920x1080@25, 1920x1200@25, 1440x900@25, 1024x768@60, 1200x800@60") + browsersUpdateCmd.Flags().String("viewport", "", "Browser viewport size (e.g., 1920x1080@25). Supported: 2560x1440@10, 1920x1080@25, 1920x1200@25, 1440x900@25, 1024x768@60, 1200x800@60, 1280x800@60") browsersCmd.AddCommand(browsersListCmd) browsersCmd.AddCommand(browsersCreateCmd) @@ -2304,7 +2305,7 @@ func init() { browsersCreateCmd.Flags().Bool("save-changes", false, "If set, save changes back to the profile when the session ends") browsersCreateCmd.Flags().String("proxy-id", "", "Proxy ID to use for the browser session") browsersCreateCmd.Flags().StringSlice("extension", []string{}, "Extension IDs or names to load (repeatable; may be passed multiple times or comma-separated)") - browsersCreateCmd.Flags().String("viewport", "", "Browser viewport size (e.g., 1920x1080@25). Supported: 2560x1440@10, 1920x1080@25, 1920x1200@25, 1440x900@25, 1024x768@60, 1200x800@60") + browsersCreateCmd.Flags().String("viewport", "", "Browser viewport size (e.g., 1920x1080@25). Supported: 2560x1440@10, 1920x1080@25, 1920x1200@25, 1440x900@25, 1024x768@60, 1200x800@60, 1280x800@60") browsersCreateCmd.Flags().Bool("viewport-interactive", false, "Interactively select viewport size from list") browsersCreateCmd.Flags().String("pool-id", "", "Browser pool ID to acquire from (mutually exclusive with --pool-name)") browsersCreateCmd.Flags().String("pool-name", "", "Browser pool name to acquire from (mutually exclusive with --pool-id)") diff --git a/cmd/browsers_test.go b/cmd/browsers_test.go index 447b6bda..eb3fa003 100644 --- a/cmd/browsers_test.go +++ b/cmd/browsers_test.go @@ -280,7 +280,6 @@ func TestBrowsersDelete_Failure(t *testing.T) { assert.True(t, strings.Contains(errMsg, "right failed") || strings.Contains(errMsg, "left failed"), "expected error message to contain either 'right failed' or 'left failed', got: %s", errMsg) } - func TestBrowsersView_ByID_PrintsURL(t *testing.T) { // Capture both pterm output and raw stdout setupStdoutCapture(t) @@ -1153,6 +1152,7 @@ func TestGetAvailableViewports_ReturnsExpectedOptions(t *testing.T) { assert.Contains(t, viewports, "1920x1200@25") assert.Contains(t, viewports, "1440x900@25") assert.Contains(t, viewports, "1200x800@60") + assert.Contains(t, viewports, "1280x800@60") assert.Contains(t, viewports, "1024x768@60") } diff --git a/pkg/templates/python/yutori-computer-use/README.md b/pkg/templates/python/yutori-computer-use/README.md index 2f8ec2fa..2b52fd01 100644 --- a/pkg/templates/python/yutori-computer-use/README.md +++ b/pkg/templates/python/yutori-computer-use/README.md @@ -37,9 +37,9 @@ When enabled, the response will include a `replay_url` field with a link to view ## Viewport Configuration -Yutori n1 recommends a **1280×800 (WXGA, 16:10)** viewport for best grounding accuracy. Kernel's closest supported viewport is **1200×800 at 25Hz**, which this template uses by default. +Yutori n1 recommends a **1280×800 (WXGA, 16:10)** viewport for best grounding accuracy. -> **Note:** n1 outputs coordinates in a 1000×1000 relative space, which are automatically scaled to the actual viewport dimensions. The slight width difference (1200 vs 1280) should have minimal impact on accuracy. +> **Note:** n1 outputs coordinates in a 1000×1000 relative space, which are automatically scaled to the actual viewport dimensions. See [Kernel Viewport Documentation](https://www.kernel.sh/docs/browsers/viewport) for all supported configurations. diff --git a/pkg/templates/python/yutori-computer-use/session.py b/pkg/templates/python/yutori-computer-use/session.py index f4f2d011..bffa17bc 100644 --- a/pkg/templates/python/yutori-computer-use/session.py +++ b/pkg/templates/python/yutori-computer-use/session.py @@ -32,7 +32,7 @@ class KernelBrowserSession: stealth: bool = True timeout_seconds: int = 300 - viewport_width: int = 1200 + viewport_width: int = 1280 viewport_height: int = 800 # Replay recording options diff --git a/pkg/templates/python/yutori-computer-use/tools/computer.py b/pkg/templates/python/yutori-computer-use/tools/computer.py index 44601616..e72f191a 100644 --- a/pkg/templates/python/yutori-computer-use/tools/computer.py +++ b/pkg/templates/python/yutori-computer-use/tools/computer.py @@ -91,7 +91,7 @@ class N1Action(TypedDict, total=False): class ComputerTool: - def __init__(self, kernel: Kernel, session_id: str, width: int = 1200, height: int = 800): + def __init__(self, kernel: Kernel, session_id: str, width: int = 1280, height: int = 800): self.kernel = kernel self.session_id = session_id self.width = width diff --git a/pkg/templates/python/yutori-computer-use/tools/playwright_computer.py b/pkg/templates/python/yutori-computer-use/tools/playwright_computer.py index df98628a..c0b88390 100644 --- a/pkg/templates/python/yutori-computer-use/tools/playwright_computer.py +++ b/pkg/templates/python/yutori-computer-use/tools/playwright_computer.py @@ -38,7 +38,7 @@ class PlaywrightComputerTool: - def __init__(self, cdp_ws_url: str, width: int = 1200, height: int = 800): + def __init__(self, cdp_ws_url: str, width: int = 1280, height: int = 800): self.cdp_ws_url = cdp_ws_url self.width = width self.height = height diff --git a/pkg/templates/typescript/yutori-computer-use/README.md b/pkg/templates/typescript/yutori-computer-use/README.md index 625c94df..60bcd255 100644 --- a/pkg/templates/typescript/yutori-computer-use/README.md +++ b/pkg/templates/typescript/yutori-computer-use/README.md @@ -37,9 +37,9 @@ When enabled, the response will include a `replay_url` field with a link to view ## Viewport Configuration -Yutori n1 recommends a **1280×800 (WXGA, 16:10)** viewport for best grounding accuracy. Kernel's closest supported viewport is **1200×800 at 25Hz**, which this template uses by default. +Yutori n1 recommends a **1280×800 (WXGA, 16:10)** viewport for best grounding accuracy. -> **Note:** n1 outputs coordinates in a 1000×1000 relative space, which are automatically scaled to the actual viewport dimensions. The slight width difference (1200 vs 1280) should have minimal impact on accuracy. +> **Note:** n1 outputs coordinates in a 1000×1000 relative space, which are automatically scaled to the actual viewport dimensions. See [Kernel Viewport Documentation](https://www.kernel.sh/docs/browsers/viewport) for all supported configurations. diff --git a/pkg/templates/typescript/yutori-computer-use/loop.ts b/pkg/templates/typescript/yutori-computer-use/loop.ts index 351aa9c1..c53fd3cd 100644 --- a/pkg/templates/typescript/yutori-computer-use/loop.ts +++ b/pkg/templates/typescript/yutori-computer-use/loop.ts @@ -53,7 +53,7 @@ interface SamplingLoopOptions { cdpWsUrl?: string; maxTokens?: number; maxIterations?: number; - /** Viewport width for coordinate scaling (default: 1200, closest to Yutori's 1280 recommendation) */ + /** Viewport width for coordinate scaling (default: 1280 per Yutori recommendation) */ viewportWidth?: number; /** Viewport height for coordinate scaling (default: 800 per Yutori recommendation) */ viewportHeight?: number; @@ -80,8 +80,7 @@ export async function samplingLoop({ cdpWsUrl, maxTokens = 4096, maxIterations = 50, - // Default viewport: 1200x800 (closest Kernel-supported size to Yutori's recommended 1280x800) - viewportWidth = 1200, + viewportWidth = 1280, viewportHeight = 800, mode = 'computer_use', }: SamplingLoopOptions): Promise { diff --git a/pkg/templates/typescript/yutori-computer-use/session.ts b/pkg/templates/typescript/yutori-computer-use/session.ts index 3a3c5675..24b1b9ba 100644 --- a/pkg/templates/typescript/yutori-computer-use/session.ts +++ b/pkg/templates/typescript/yutori-computer-use/session.ts @@ -16,7 +16,7 @@ export interface SessionOptions { recordReplay?: boolean; /** Grace period in seconds before stopping replay */ replayGracePeriod?: number; - /** Viewport width (default: 1200, closest to Yutori's 1280 recommendation) */ + /** Viewport width (default: 1280 per Yutori recommendation) */ viewportWidth?: number; /** Viewport height (default: 800 per Yutori recommendation) */ viewportHeight?: number; diff --git a/pkg/templates/typescript/yutori-computer-use/tools/computer.ts b/pkg/templates/typescript/yutori-computer-use/tools/computer.ts index 46fd76ef..e9cdaf35 100644 --- a/pkg/templates/typescript/yutori-computer-use/tools/computer.ts +++ b/pkg/templates/typescript/yutori-computer-use/tools/computer.ts @@ -98,7 +98,7 @@ export class ComputerTool { private width: number; private height: number; - constructor(kernel: Kernel, sessionId: string, width = 1200, height = 800) { + constructor(kernel: Kernel, sessionId: string, width = 1280, height = 800) { this.kernel = kernel; this.sessionId = sessionId; this.width = width; From a17c0b57bfaa3b8ab26248f1bf168a03e1b0ee95 Mon Sep 17 00:00:00 2001 From: Tanmay Sardesai Date: Tue, 27 Jan 2026 23:59:09 -0800 Subject: [PATCH 02/13] test: update viewport count expectation to 7 --- cmd/browsers_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmd/browsers_test.go b/cmd/browsers_test.go index eb3fa003..49d770d8 100644 --- a/cmd/browsers_test.go +++ b/cmd/browsers_test.go @@ -1146,7 +1146,7 @@ func TestParseViewport_InvalidFormats(t *testing.T) { func TestGetAvailableViewports_ReturnsExpectedOptions(t *testing.T) { viewports := getAvailableViewports() - assert.Len(t, viewports, 6) + assert.Len(t, viewports, 7) assert.Contains(t, viewports, "2560x1440@10") assert.Contains(t, viewports, "1920x1080@25") assert.Contains(t, viewports, "1920x1200@25") From 99d10e29b610fc42b708fb851b754038d948bbaf Mon Sep 17 00:00:00 2001 From: Tanmay Sardesai Date: Wed, 28 Jan 2026 00:12:31 -0800 Subject: [PATCH 03/13] missed some more changes --- pkg/templates/python/yutori-computer-use/loop.py | 2 +- pkg/templates/typescript/yutori-computer-use/session.ts | 2 +- .../typescript/yutori-computer-use/tools/playwright-computer.ts | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pkg/templates/python/yutori-computer-use/loop.py b/pkg/templates/python/yutori-computer-use/loop.py index 236d4ad1..ef5748ce 100644 --- a/pkg/templates/python/yutori-computer-use/loop.py +++ b/pkg/templates/python/yutori-computer-use/loop.py @@ -49,7 +49,7 @@ async def sampling_loop( cdp_ws_url: Optional[str] = None, max_tokens: int = 4096, max_iterations: int = 50, - viewport_width: int = 1200, + viewport_width: int = 1280, viewport_height: int = 800, mode: BrowserMode = "computer_use", ) -> dict[str, Any]: diff --git a/pkg/templates/typescript/yutori-computer-use/session.ts b/pkg/templates/typescript/yutori-computer-use/session.ts index 24b1b9ba..0621aad2 100644 --- a/pkg/templates/typescript/yutori-computer-use/session.ts +++ b/pkg/templates/typescript/yutori-computer-use/session.ts @@ -37,7 +37,7 @@ const DEFAULT_OPTIONS: Required = { timeoutSeconds: 300, recordReplay: false, replayGracePeriod: 5.0, - viewportWidth: 1200, + viewportWidth: 1280, viewportHeight: 800, }; diff --git a/pkg/templates/typescript/yutori-computer-use/tools/playwright-computer.ts b/pkg/templates/typescript/yutori-computer-use/tools/playwright-computer.ts index d6ce229d..3062c44d 100644 --- a/pkg/templates/typescript/yutori-computer-use/tools/playwright-computer.ts +++ b/pkg/templates/typescript/yutori-computer-use/tools/playwright-computer.ts @@ -36,7 +36,7 @@ export class PlaywrightComputerTool { private context: BrowserContext | null = null; private page: Page | null = null; - constructor(cdpWsUrl: string, width = 1200, height = 800) { + constructor(cdpWsUrl: string, width = 1280, height = 800) { this.cdpWsUrl = cdpWsUrl; this.width = width; this.height = height; From b582c8193cdd6f5b6db6b145f23410193b344fd2 Mon Sep 17 00:00:00 2001 From: Daniel Prevoznik Date: Wed, 28 Jan 2026 10:25:33 -0500 Subject: [PATCH 04/13] refactor: remove refresh_rate from viewport settings in TypeScript and Python templates - Removed the `refresh_rate` property from the viewport configuration in both TypeScript and Python templates for the Anthropic and Yutori computer use sessions. - This change simplifies the viewport settings and aligns with the current requirements. --- pkg/templates/python/anthropic-computer-use/session.py | 1 - pkg/templates/python/yutori-computer-use/session.py | 1 - pkg/templates/typescript/anthropic-computer-use/session.ts | 1 - pkg/templates/typescript/yutori-computer-use/session.ts | 1 - 4 files changed, 4 deletions(-) diff --git a/pkg/templates/python/anthropic-computer-use/session.py b/pkg/templates/python/anthropic-computer-use/session.py index 3227b283..e01ab4b5 100644 --- a/pkg/templates/python/anthropic-computer-use/session.py +++ b/pkg/templates/python/anthropic-computer-use/session.py @@ -54,7 +54,6 @@ async def __aenter__(self) -> "KernelBrowserSession": viewport={ "width": 1024, "height": 768, - "refresh_rate": 60, }, ) diff --git a/pkg/templates/python/yutori-computer-use/session.py b/pkg/templates/python/yutori-computer-use/session.py index bffa17bc..42dc0177 100644 --- a/pkg/templates/python/yutori-computer-use/session.py +++ b/pkg/templates/python/yutori-computer-use/session.py @@ -56,7 +56,6 @@ async def __aenter__(self) -> "KernelBrowserSession": viewport={ "width": self.viewport_width, "height": self.viewport_height, - "refresh_rate": 25, }, ) diff --git a/pkg/templates/typescript/anthropic-computer-use/session.ts b/pkg/templates/typescript/anthropic-computer-use/session.ts index 06e30a64..96bbdbb4 100644 --- a/pkg/templates/typescript/anthropic-computer-use/session.ts +++ b/pkg/templates/typescript/anthropic-computer-use/session.ts @@ -96,7 +96,6 @@ export class KernelBrowserSession { viewport: { width: 1024, height: 768, - refresh_rate: 60, }, }); diff --git a/pkg/templates/typescript/yutori-computer-use/session.ts b/pkg/templates/typescript/yutori-computer-use/session.ts index 0621aad2..e1064615 100644 --- a/pkg/templates/typescript/yutori-computer-use/session.ts +++ b/pkg/templates/typescript/yutori-computer-use/session.ts @@ -117,7 +117,6 @@ export class KernelBrowserSession { viewport: { width: this.options.viewportWidth, height: this.options.viewportHeight, - refresh_rate: 25, }, }); From 152c9615883e00a3db1f13eb0063c3f252071eec Mon Sep 17 00:00:00 2001 From: Daniel Prevoznik Date: Wed, 28 Jan 2026 10:44:14 -0500 Subject: [PATCH 05/13] Remove default viewport size comments from options Updated comments in SamplingLoopOptions and SessionOptions to remove references to default viewport width and height values, clarifying that these fields are for coordinate scaling and viewport size without specifying defaults. --- pkg/templates/typescript/yutori-computer-use/loop.ts | 4 ++-- pkg/templates/typescript/yutori-computer-use/session.ts | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pkg/templates/typescript/yutori-computer-use/loop.ts b/pkg/templates/typescript/yutori-computer-use/loop.ts index c53fd3cd..34cdc23b 100644 --- a/pkg/templates/typescript/yutori-computer-use/loop.ts +++ b/pkg/templates/typescript/yutori-computer-use/loop.ts @@ -53,9 +53,9 @@ interface SamplingLoopOptions { cdpWsUrl?: string; maxTokens?: number; maxIterations?: number; - /** Viewport width for coordinate scaling (default: 1280 per Yutori recommendation) */ + /** Viewport width for coordinate scaling */ viewportWidth?: number; - /** Viewport height for coordinate scaling (default: 800 per Yutori recommendation) */ + /** Viewport height for coordinate scaling */ viewportHeight?: number; /** * Browser interaction mode: diff --git a/pkg/templates/typescript/yutori-computer-use/session.ts b/pkg/templates/typescript/yutori-computer-use/session.ts index e1064615..d3324f0a 100644 --- a/pkg/templates/typescript/yutori-computer-use/session.ts +++ b/pkg/templates/typescript/yutori-computer-use/session.ts @@ -16,9 +16,9 @@ export interface SessionOptions { recordReplay?: boolean; /** Grace period in seconds before stopping replay */ replayGracePeriod?: number; - /** Viewport width (default: 1280 per Yutori recommendation) */ + /** Viewport width */ viewportWidth?: number; - /** Viewport height (default: 800 per Yutori recommendation) */ + /** Viewport height */ viewportHeight?: number; } From 5c060e2f6709d6d426f393ccddbca1d3fa81dfed Mon Sep 17 00:00:00 2001 From: Daniel Prevoznik Date: Wed, 28 Jan 2026 10:45:57 -0500 Subject: [PATCH 06/13] Add configurable viewport sizes + new defaults for Anthropic Templates Introduces viewportWidth and viewportHeight parameters to both Python and TypeScript anthropic templates, allowing the viewport size to be set when initializing sessions and tools. Updates default values to 1280x800 and ensures these values are used throughout session creation and tool instantiation. --- .../python/anthropic-computer-use/loop.py | 4 +++- .../python/anthropic-computer-use/session.py | 7 ++++-- .../anthropic-computer-use/tools/computer.py | 6 ++--- .../typescript/anthropic-computer-use/loop.ts | 6 ++++- .../anthropic-computer-use/session.ts | 22 +++++++++++++++++-- .../anthropic-computer-use/tools/computer.ts | 18 +++++++++------ 6 files changed, 47 insertions(+), 16 deletions(-) diff --git a/pkg/templates/python/anthropic-computer-use/loop.py b/pkg/templates/python/anthropic-computer-use/loop.py index 40620881..afee6f61 100644 --- a/pkg/templates/python/anthropic-computer-use/loop.py +++ b/pkg/templates/python/anthropic-computer-use/loop.py @@ -78,6 +78,8 @@ async def sampling_loop( tool_version: ToolVersion = "computer_use_20250124", thinking_budget: int | None = None, token_efficient_tools_beta: bool = False, + viewport_width: int = 1280, + viewport_height: int = 800, ): """ Agentic sampling loop for the assistant/tool interaction of computer use. @@ -99,7 +101,7 @@ async def sampling_loop( tool_group = TOOL_GROUPS_BY_VERSION[tool_version] tool_collection = ToolCollection( *( - ToolCls(kernel=kernel, session_id=session_id) if ToolCls.__name__.startswith("ComputerTool") else ToolCls() + ToolCls(kernel=kernel, session_id=session_id, width=viewport_width, height=viewport_height) if ToolCls.__name__.startswith("ComputerTool") else ToolCls() for ToolCls in tool_group.tools ) ) diff --git a/pkg/templates/python/anthropic-computer-use/session.py b/pkg/templates/python/anthropic-computer-use/session.py index e01ab4b5..4718dbd7 100644 --- a/pkg/templates/python/anthropic-computer-use/session.py +++ b/pkg/templates/python/anthropic-computer-use/session.py @@ -32,6 +32,9 @@ class KernelBrowserSession: stealth: bool = True timeout_seconds: int = 300 + viewport_width: int = 1280 + viewport_height: int = 800 + # Replay recording options record_replay: bool = False replay_grace_period: float = 5.0 # Seconds to wait before stopping replay @@ -52,8 +55,8 @@ async def __aenter__(self) -> "KernelBrowserSession": stealth=self.stealth, timeout_seconds=self.timeout_seconds, viewport={ - "width": 1024, - "height": 768, + "width": self.viewport_width, + "height": self.viewport_height, }, ) diff --git a/pkg/templates/python/anthropic-computer-use/tools/computer.py b/pkg/templates/python/anthropic-computer-use/tools/computer.py index 654a289b..d4a46d7d 100644 --- a/pkg/templates/python/anthropic-computer-use/tools/computer.py +++ b/pkg/templates/python/anthropic-computer-use/tools/computer.py @@ -107,8 +107,6 @@ class BaseComputerTool: """ name: Literal["computer"] = "computer" - width: int = 1024 - height: int = 768 display_num: int | None = None # Kernel client and session @@ -127,10 +125,12 @@ def options(self) -> ComputerToolOptions: "display_number": self.display_num, } - def __init__(self, kernel: Kernel | None = None, session_id: str | None = None): + def __init__(self, kernel: Kernel | None = None, session_id: str | None = None, width: int = 1280, height: int = 800): super().__init__() self.kernel = kernel self.session_id = session_id + self.width = width + self.height = height def validate_coordinates(self, coordinate: tuple[int, int] | list[int] | None = None) -> tuple[int, int] | None: """Validate that coordinates are non-negative integers and convert lists to tuples if needed.""" diff --git a/pkg/templates/typescript/anthropic-computer-use/loop.ts b/pkg/templates/typescript/anthropic-computer-use/loop.ts index fa775d92..cc209d1d 100644 --- a/pkg/templates/typescript/anthropic-computer-use/loop.ts +++ b/pkg/templates/typescript/anthropic-computer-use/loop.ts @@ -57,6 +57,8 @@ export async function samplingLoop({ tokenEfficientToolsBeta = false, kernel, sessionId, + viewportWidth = 1280, + viewportHeight = 800, }: { model: string; systemPromptSuffix?: string; @@ -69,10 +71,12 @@ export async function samplingLoop({ tokenEfficientToolsBeta?: boolean; kernel: Kernel; sessionId: string; + viewportWidth?: number; + viewportHeight?: number; }): Promise { const selectedVersion = toolVersion || DEFAULT_TOOL_VERSION; const toolGroup = TOOL_GROUPS_BY_VERSION[selectedVersion]; - const toolCollection = new ToolCollection(...toolGroup.tools.map((Tool: typeof ComputerTool20241022 | typeof ComputerTool20250124) => new Tool(kernel, sessionId))); + const toolCollection = new ToolCollection(...toolGroup.tools.map((Tool: typeof ComputerTool20241022 | typeof ComputerTool20250124) => new Tool(kernel, sessionId, viewportWidth, viewportHeight))); const system: BetaTextBlock = { type: 'text', diff --git a/pkg/templates/typescript/anthropic-computer-use/session.ts b/pkg/templates/typescript/anthropic-computer-use/session.ts index 96bbdbb4..fed3dd47 100644 --- a/pkg/templates/typescript/anthropic-computer-use/session.ts +++ b/pkg/templates/typescript/anthropic-computer-use/session.ts @@ -16,6 +16,10 @@ export interface SessionOptions { recordReplay?: boolean; /** Grace period in seconds before stopping replay */ replayGracePeriod?: number; + /** Viewport width */ + viewportWidth?: number; + /** Viewport height */ + viewportHeight?: number; } export interface SessionInfo { @@ -23,6 +27,8 @@ export interface SessionInfo { liveViewUrl: string; replayId?: string; replayViewUrl?: string; + viewportWidth: number; + viewportHeight: number; } const DEFAULT_OPTIONS: Required = { @@ -30,6 +36,8 @@ const DEFAULT_OPTIONS: Required = { timeoutSeconds: 300, recordReplay: false, replayGracePeriod: 5.0, + viewportWidth: 1280, + viewportHeight: 800, }; /** @@ -76,12 +84,22 @@ export class KernelBrowserSession { return this._replayViewUrl; } + get viewportWidth(): number { + return this.options.viewportWidth; + } + + get viewportHeight(): number { + return this.options.viewportHeight; + } + get info(): SessionInfo { return { sessionId: this.sessionId, liveViewUrl: this._liveViewUrl || '', replayId: this._replayId || undefined, replayViewUrl: this._replayViewUrl || undefined, + viewportWidth: this.options.viewportWidth, + viewportHeight: this.options.viewportHeight, }; } @@ -94,8 +112,8 @@ export class KernelBrowserSession { stealth: this.options.stealth, timeout_seconds: this.options.timeoutSeconds, viewport: { - width: 1024, - height: 768, + width: this.options.viewportWidth, + height: this.options.viewportHeight, }, }); diff --git a/pkg/templates/typescript/anthropic-computer-use/tools/computer.ts b/pkg/templates/typescript/anthropic-computer-use/tools/computer.ts index 8e415ade..580ea238 100644 --- a/pkg/templates/typescript/anthropic-computer-use/tools/computer.ts +++ b/pkg/templates/typescript/anthropic-computer-use/tools/computer.ts @@ -12,6 +12,8 @@ export class ComputerTool implements BaseAnthropicTool { protected sessionId: string; protected _screenshotDelay = 2.0; protected version: '20241022' | '20250124'; + protected width: number; + protected height: number; private lastMousePosition: [number, number] = [0, 0]; @@ -39,10 +41,12 @@ export class ComputerTool implements BaseAnthropicTool { Action.WAIT, ]); - constructor(kernel: Kernel, sessionId: string, version: '20241022' | '20250124' = '20250124') { + constructor(kernel: Kernel, sessionId: string, version: '20241022' | '20250124' = '20250124', width = 1280, height = 800) { this.kernel = kernel; this.sessionId = sessionId; this.version = version; + this.width = width; + this.height = height; } get apiType(): 'computer_20241022' | 'computer_20250124' { @@ -53,8 +57,8 @@ export class ComputerTool implements BaseAnthropicTool { const params = { name: this.name, type: this.apiType, - display_width_px: 1024, - display_height_px: 768, + display_width_px: this.width, + display_height_px: this.height, display_number: null, }; return params; @@ -380,13 +384,13 @@ export class ComputerTool implements BaseAnthropicTool { // For backward compatibility export class ComputerTool20241022 extends ComputerTool { - constructor(kernel: Kernel, sessionId: string) { - super(kernel, sessionId, '20241022'); + constructor(kernel: Kernel, sessionId: string, width = 1280, height = 800) { + super(kernel, sessionId, '20241022', width, height); } } export class ComputerTool20250124 extends ComputerTool { - constructor(kernel: Kernel, sessionId: string) { - super(kernel, sessionId, '20250124'); + constructor(kernel: Kernel, sessionId: string, width = 1280, height = 800) { + super(kernel, sessionId, '20250124', width, height); } } From be44da4daf15e132ae715d7eb3a44ad3381efd46 Mon Sep 17 00:00:00 2001 From: Daniel Prevoznik Date: Fri, 30 Jan 2026 17:22:08 -0500 Subject: [PATCH 07/13] fix: update model version in Yutori computer use template Changed the model parameter in the Yutori computer use template from "n1-preview-2025-11" to "n1-latest" to ensure the latest model is utilized for tasks. --- pkg/templates/python/yutori-computer-use/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/templates/python/yutori-computer-use/main.py b/pkg/templates/python/yutori-computer-use/main.py index d909c67f..91633288 100644 --- a/pkg/templates/python/yutori-computer-use/main.py +++ b/pkg/templates/python/yutori-computer-use/main.py @@ -59,7 +59,7 @@ async def cua_task( print("Kernel browser live view url:", session.live_view_url) loop_result = await sampling_loop( - model="n1-preview-2025-11", + model="n1-latest", task=payload["query"], api_key=str(api_key), kernel=session.kernel, From 3df3a3b9ab014bf437ae9f2aab0f97fb78268bf8 Mon Sep 17 00:00:00 2001 From: Daniel Prevoznik Date: Fri, 30 Jan 2026 17:23:39 -0500 Subject: [PATCH 08/13] Update Yutori Template model used to latest --- pkg/templates/python/yutori-computer-use/loop.py | 2 +- pkg/templates/typescript/yutori-computer-use/index.ts | 2 +- pkg/templates/typescript/yutori-computer-use/loop.ts | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pkg/templates/python/yutori-computer-use/loop.py b/pkg/templates/python/yutori-computer-use/loop.py index ef5748ce..aa7f6b24 100644 --- a/pkg/templates/python/yutori-computer-use/loop.py +++ b/pkg/templates/python/yutori-computer-use/loop.py @@ -41,7 +41,7 @@ async def screenshot(self) -> ToolResult: async def sampling_loop( *, - model: str = "n1-preview-2025-11", + model: str = "n1-latest", task: str, api_key: str, kernel: Kernel, diff --git a/pkg/templates/typescript/yutori-computer-use/index.ts b/pkg/templates/typescript/yutori-computer-use/index.ts index afe51bab..a4aee2f8 100644 --- a/pkg/templates/typescript/yutori-computer-use/index.ts +++ b/pkg/templates/typescript/yutori-computer-use/index.ts @@ -50,7 +50,7 @@ app.action( // Run the sampling loop const mode = payload.mode ?? 'computer_use'; const { finalAnswer, messages } = await samplingLoop({ - model: 'n1-preview-2025-11', + model: 'n1-latest', task: payload.query, apiKey: YUTORI_API_KEY, kernel, diff --git a/pkg/templates/typescript/yutori-computer-use/loop.ts b/pkg/templates/typescript/yutori-computer-use/loop.ts index 34cdc23b..0fcfba9b 100644 --- a/pkg/templates/typescript/yutori-computer-use/loop.ts +++ b/pkg/templates/typescript/yutori-computer-use/loop.ts @@ -72,7 +72,7 @@ interface SamplingLoopResult { } export async function samplingLoop({ - model = 'n1-preview-2025-11', + model = 'n1-latest', task, apiKey, kernel, From 871d1738cf18f9c81346eb5e590b942dbb7fbf59 Mon Sep 17 00:00:00 2001 From: Daniel Prevoznik Date: Fri, 30 Jan 2026 19:24:26 -0500 Subject: [PATCH 09/13] fix: update URLs in QA commands to use HTTPS Replaced HTTP links with HTTPS in various kernel invoke commands within the QA documentation to ensure secure connections. This includes updates for the Yutori and Anthropic tasks related to the Magnitasks website. --- .cursor/commands/qa.md | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/.cursor/commands/qa.md b/.cursor/commands/qa.md index 63d7d048..7f5a540e 100644 --- a/.cursor/commands/qa.md +++ b/.cursor/commands/qa.md @@ -270,25 +270,25 @@ Once all deployments are complete, present the human with these invoke commands kernel invoke ts-basic get-page-title --payload '{"url": "https://www.google.com"}' kernel invoke ts-captcha-solver test-captcha-solver kernel invoke ts-stagehand teamsize-task --payload '{"company": "Kernel"}' -kernel invoke ts-anthropic-cua cua-task --payload '{"query": "Go to http://magnitasks.com, Click the Tasks option in the left-side bar, and move the 5 items in the To Do and In Progress items to the Done section of the Kanban board. You are done successfully when the items are moved.", "record_replay": true}' +kernel invoke ts-anthropic-cua cua-task --payload '{"query": "Go to https://www.magnitasks.com, Click the Tasks option in the left-side bar, and move the 5 items in the To Do and In Progress items to the Done section of the Kanban board. You are done successfully when the items are moved.", "record_replay": true}' kernel invoke ts-magnitude mag-url-extract --payload '{"url": "https://en.wikipedia.org/wiki/Special:Random"}' kernel invoke ts-openai-cua cua-task --payload '{"task": "Go to https://news.ycombinator.com and get the top 5 articles"}' -kernel invoke ts-gemini-cua cua-task --payload '{"query": "Go to http://magnitasks.com, Click the Tasks option in the left-side bar, and move the 5 items in the To Do and In Progress items to the Done section of the Kanban board. You are done successfully when the items are moved.", "record_replay": true}' +kernel invoke ts-gemini-cua cua-task --payload '{"query": "Go to https://www.magnitasks.com, Click the Tasks option in the left-side bar, and move the 5 items in the To Do and In Progress items to the Done section of the Kanban board. You are done successfully when the items are moved.", "record_replay": true}' kernel invoke ts-claude-agent-sdk agent-task --payload '{"task": "Go to https://news.ycombinator.com and get the top 3 stories"}' -kernel invoke ts-yutori-cua cua-task --payload '{"query": "Go to http://magnitasks.com, Click the Tasks option in the left-side bar, and drag the 5 items in the To Do and In Progress columns to the Done section of the Kanban board. You are done successfully when the items are dragged to Done. Do not click into the items.", "record_replay": true, "mode": "computer_use"}' -kernel invoke ts-yutori-cua cua-task --payload '{"query": "Go to http://magnitasks.com, Click the Tasks option in the left-side bar, and drag the 5 items in the To Do and In Progress columns to the Done section of the Kanban board. You are done successfully when the items are dragged to Done. Do not click into the items.", "record_replay": true, "mode": "playwright"}' +kernel invoke ts-yutori-cua cua-task --payload '{"query": "Go to https://www.magnitasks.com, Click the Tasks option in the left-side bar, and drag the 5 items in the To Do and In Progress columns to the Done section of the Kanban board. You are done successfully when the items are dragged to Done. Do not click into the items.", "record_replay": true, "mode": "computer_use"}' +kernel invoke ts-yutori-cua cua-task --payload '{"query": "Go to https://www.magnitasks.com, Click the Tasks option in the left-side bar, and drag the 5 items in the To Do and In Progress columns to the Done section of the Kanban board. You are done successfully when the items are dragged to Done. Do not click into the items.", "record_replay": true, "mode": "playwright"}' # Python apps kernel invoke python-basic get-page-title --payload '{"url": "https://www.google.com"}' kernel invoke python-captcha-solver test-captcha-solver kernel invoke python-bu bu-task --payload '{"task": "Compare the price of gpt-4o and DeepSeek-V3"}' -kernel invoke python-anthropic-cua cua-task --payload '{"query": "Go to http://magnitasks.com, Click the Tasks option in the left-side bar, and move the 5 items in the To Do and In Progress items to the Done section of the Kanban board. You are done successfully when the items are moved.", "record_replay": true}' +kernel invoke python-anthropic-cua cua-task --payload '{"query": "Go to https://www.magnitasks.com, Click the Tasks option in the left-side bar, and move the 5 items in the To Do and In Progress items to the Done section of the Kanban board. You are done successfully when the items are moved.", "record_replay": true}' kernel invoke python-openai-cua cua-task --payload '{"task": "Go to https://news.ycombinator.com and get the top 5 articles"}' kernel invoke python-openagi-cua openagi-default-task -p '{"instruction": "Navigate to https://agiopen.org and click the What is Computer Use? button"}' kernel invoke py-claude-agent-sdk agent-task --payload '{"task": "Go to https://news.ycombinator.com and get the top 3 stories"}' -kernel invoke python-gemini-cua cua-task --payload '{"query": "Go to http://magnitasks.com, Click the Tasks option in the left-side bar, and move the 5 items in the To Do and In Progress items to the Done section of the Kanban board. You are done successfully when the items are moved.", "record_replay": true}' -kernel invoke python-yutori-cua cua-task --payload '{"query": "Go to http://magnitasks.com, Click the Tasks option in the left-side bar, and drag the 5 items in the To Do and In Progress columns to the Done section of the Kanban board. You are done successfully when the items are dragged to Done. Do not click into the items.", "record_replay": true, "mode": "computer_use"}' -kernel invoke python-yutori-cua cua-task --payload '{"query": "Go to http://magnitasks.com, Click the Tasks option in the left-side bar, and drag the 5 items in the To Do and In Progress columns to the Done section of the Kanban board. You are done successfully when the items are dragged to Done. Do not click into the items.", "record_replay": true, "mode": "playwright"}' +kernel invoke python-gemini-cua cua-task --payload '{"query": "Go to https://www.magnitasks.com, Click the Tasks option in the left-side bar, and move the 5 items in the To Do and In Progress items to the Done section of the Kanban board. You are done successfully when the items are moved.", "record_replay": true}' +kernel invoke python-yutori-cua cua-task --payload '{"query": "Go to https://www.magnitasks.com, Click the Tasks option in the left-side bar, and drag the 5 items in the To Do and In Progress columns to the Done section of the Kanban board. You are done successfully when the items are dragged to Done. Do not click into the items.", "record_replay": true, "mode": "computer_use"}' +kernel invoke python-yutori-cua cua-task --payload '{"query": "Go to https://www.magnitasks.com, Click the Tasks option in the left-side bar, and drag the 5 items in the To Do and In Progress columns to the Done section of the Kanban board. You are done successfully when the items are dragged to Done. Do not click into the items.", "record_replay": true, "mode": "playwright"}' ``` ## Step 7: Automated Runtime Testing (Optional) From e9453f049c37a04e023f235918d64e88b679b872 Mon Sep 17 00:00:00 2001 From: Daniel Prevoznik Date: Sat, 31 Jan 2026 08:13:40 -0500 Subject: [PATCH 10/13] WIP: Yutori template debugging - revert to n1-preview-2025-11 - Reverted model from n1-latest to n1-preview-2025-11 - Reverted viewport to 1200x800 (API issues with 1280) - Reverted message format to observation role - These changes are pending Yutori API stability fixes --- pkg/templates/python/yutori-computer-use/loop.py | 3 ++- pkg/templates/python/yutori-computer-use/main.py | 2 +- pkg/templates/python/yutori-computer-use/session.py | 2 +- pkg/templates/typescript/yutori-computer-use/index.ts | 2 +- pkg/templates/typescript/yutori-computer-use/loop.ts | 5 +++-- pkg/templates/typescript/yutori-computer-use/session.ts | 2 +- 6 files changed, 9 insertions(+), 7 deletions(-) diff --git a/pkg/templates/python/yutori-computer-use/loop.py b/pkg/templates/python/yutori-computer-use/loop.py index aa7f6b24..a2dfd9a8 100644 --- a/pkg/templates/python/yutori-computer-use/loop.py +++ b/pkg/templates/python/yutori-computer-use/loop.py @@ -41,7 +41,7 @@ async def screenshot(self) -> ToolResult: async def sampling_loop( *, - model: str = "n1-latest", + model: str = "n1-preview-2025-11", task: str, api_key: str, kernel: Kernel, @@ -86,6 +86,7 @@ async def sampling_loop( } ] + # Add initial screenshot as observation (n1's required format) if initial_screenshot.get("base64_image"): conversation_messages.append({ "role": "observation", diff --git a/pkg/templates/python/yutori-computer-use/main.py b/pkg/templates/python/yutori-computer-use/main.py index 91633288..d909c67f 100644 --- a/pkg/templates/python/yutori-computer-use/main.py +++ b/pkg/templates/python/yutori-computer-use/main.py @@ -59,7 +59,7 @@ async def cua_task( print("Kernel browser live view url:", session.live_view_url) loop_result = await sampling_loop( - model="n1-latest", + model="n1-preview-2025-11", task=payload["query"], api_key=str(api_key), kernel=session.kernel, diff --git a/pkg/templates/python/yutori-computer-use/session.py b/pkg/templates/python/yutori-computer-use/session.py index 42dc0177..1c449ec9 100644 --- a/pkg/templates/python/yutori-computer-use/session.py +++ b/pkg/templates/python/yutori-computer-use/session.py @@ -32,7 +32,7 @@ class KernelBrowserSession: stealth: bool = True timeout_seconds: int = 300 - viewport_width: int = 1280 + viewport_width: int = 1200 viewport_height: int = 800 # Replay recording options diff --git a/pkg/templates/typescript/yutori-computer-use/index.ts b/pkg/templates/typescript/yutori-computer-use/index.ts index a4aee2f8..afe51bab 100644 --- a/pkg/templates/typescript/yutori-computer-use/index.ts +++ b/pkg/templates/typescript/yutori-computer-use/index.ts @@ -50,7 +50,7 @@ app.action( // Run the sampling loop const mode = payload.mode ?? 'computer_use'; const { finalAnswer, messages } = await samplingLoop({ - model: 'n1-latest', + model: 'n1-preview-2025-11', task: payload.query, apiKey: YUTORI_API_KEY, kernel, diff --git a/pkg/templates/typescript/yutori-computer-use/loop.ts b/pkg/templates/typescript/yutori-computer-use/loop.ts index 0fcfba9b..a4b73744 100644 --- a/pkg/templates/typescript/yutori-computer-use/loop.ts +++ b/pkg/templates/typescript/yutori-computer-use/loop.ts @@ -72,7 +72,7 @@ interface SamplingLoopResult { } export async function samplingLoop({ - model = 'n1-latest', + model = 'n1-preview-2025-11', task, apiKey, kernel, @@ -80,7 +80,7 @@ export async function samplingLoop({ cdpWsUrl, maxTokens = 4096, maxIterations = 50, - viewportWidth = 1280, + viewportWidth = 1200, viewportHeight = 800, mode = 'computer_use', }: SamplingLoopOptions): Promise { @@ -118,6 +118,7 @@ export async function samplingLoop({ }, ]; + // Add initial screenshot as observation (n1's required format) if (initialScreenshot.base64Image) { conversationMessages.push({ role: 'observation', diff --git a/pkg/templates/typescript/yutori-computer-use/session.ts b/pkg/templates/typescript/yutori-computer-use/session.ts index d3324f0a..644f8226 100644 --- a/pkg/templates/typescript/yutori-computer-use/session.ts +++ b/pkg/templates/typescript/yutori-computer-use/session.ts @@ -37,7 +37,7 @@ const DEFAULT_OPTIONS: Required = { timeoutSeconds: 300, recordReplay: false, replayGracePeriod: 5.0, - viewportWidth: 1280, + viewportWidth: 1200, viewportHeight: 800, }; From 70423d5d18200b5365895d779cb6e3c27217ecf9 Mon Sep 17 00:00:00 2001 From: Daniel Prevoznik Date: Sat, 31 Jan 2026 08:14:04 -0500 Subject: [PATCH 11/13] Revert Yutori template changes pending API stability Reverting Yutori template modifications made in this branch. The Yutori API has stability issues with: - 1280x800 screenshots (500 errors) - n1-latest model with observation message format - Multi-turn conversations failing after 2-3 iterations Yutori work preserved in branch: tanmay/yutori-template-fixes This PR now focuses only on: - Adding 1280x800 viewport support to CLI - Updating Anthropic computer-use templates --- pkg/templates/python/yutori-computer-use/README.md | 4 ++-- pkg/templates/python/yutori-computer-use/loop.py | 3 +-- pkg/templates/python/yutori-computer-use/session.py | 1 + pkg/templates/python/yutori-computer-use/tools/computer.py | 2 +- .../python/yutori-computer-use/tools/playwright_computer.py | 2 +- pkg/templates/typescript/yutori-computer-use/README.md | 4 ++-- pkg/templates/typescript/yutori-computer-use/loop.ts | 6 +++--- pkg/templates/typescript/yutori-computer-use/session.ts | 5 +++-- .../typescript/yutori-computer-use/tools/computer.ts | 2 +- .../yutori-computer-use/tools/playwright-computer.ts | 2 +- 10 files changed, 16 insertions(+), 15 deletions(-) diff --git a/pkg/templates/python/yutori-computer-use/README.md b/pkg/templates/python/yutori-computer-use/README.md index 2b52fd01..2f8ec2fa 100644 --- a/pkg/templates/python/yutori-computer-use/README.md +++ b/pkg/templates/python/yutori-computer-use/README.md @@ -37,9 +37,9 @@ When enabled, the response will include a `replay_url` field with a link to view ## Viewport Configuration -Yutori n1 recommends a **1280×800 (WXGA, 16:10)** viewport for best grounding accuracy. +Yutori n1 recommends a **1280×800 (WXGA, 16:10)** viewport for best grounding accuracy. Kernel's closest supported viewport is **1200×800 at 25Hz**, which this template uses by default. -> **Note:** n1 outputs coordinates in a 1000×1000 relative space, which are automatically scaled to the actual viewport dimensions. +> **Note:** n1 outputs coordinates in a 1000×1000 relative space, which are automatically scaled to the actual viewport dimensions. The slight width difference (1200 vs 1280) should have minimal impact on accuracy. See [Kernel Viewport Documentation](https://www.kernel.sh/docs/browsers/viewport) for all supported configurations. diff --git a/pkg/templates/python/yutori-computer-use/loop.py b/pkg/templates/python/yutori-computer-use/loop.py index a2dfd9a8..236d4ad1 100644 --- a/pkg/templates/python/yutori-computer-use/loop.py +++ b/pkg/templates/python/yutori-computer-use/loop.py @@ -49,7 +49,7 @@ async def sampling_loop( cdp_ws_url: Optional[str] = None, max_tokens: int = 4096, max_iterations: int = 50, - viewport_width: int = 1280, + viewport_width: int = 1200, viewport_height: int = 800, mode: BrowserMode = "computer_use", ) -> dict[str, Any]: @@ -86,7 +86,6 @@ async def sampling_loop( } ] - # Add initial screenshot as observation (n1's required format) if initial_screenshot.get("base64_image"): conversation_messages.append({ "role": "observation", diff --git a/pkg/templates/python/yutori-computer-use/session.py b/pkg/templates/python/yutori-computer-use/session.py index 1c449ec9..f4f2d011 100644 --- a/pkg/templates/python/yutori-computer-use/session.py +++ b/pkg/templates/python/yutori-computer-use/session.py @@ -56,6 +56,7 @@ async def __aenter__(self) -> "KernelBrowserSession": viewport={ "width": self.viewport_width, "height": self.viewport_height, + "refresh_rate": 25, }, ) diff --git a/pkg/templates/python/yutori-computer-use/tools/computer.py b/pkg/templates/python/yutori-computer-use/tools/computer.py index e72f191a..44601616 100644 --- a/pkg/templates/python/yutori-computer-use/tools/computer.py +++ b/pkg/templates/python/yutori-computer-use/tools/computer.py @@ -91,7 +91,7 @@ class N1Action(TypedDict, total=False): class ComputerTool: - def __init__(self, kernel: Kernel, session_id: str, width: int = 1280, height: int = 800): + def __init__(self, kernel: Kernel, session_id: str, width: int = 1200, height: int = 800): self.kernel = kernel self.session_id = session_id self.width = width diff --git a/pkg/templates/python/yutori-computer-use/tools/playwright_computer.py b/pkg/templates/python/yutori-computer-use/tools/playwright_computer.py index c0b88390..df98628a 100644 --- a/pkg/templates/python/yutori-computer-use/tools/playwright_computer.py +++ b/pkg/templates/python/yutori-computer-use/tools/playwright_computer.py @@ -38,7 +38,7 @@ class PlaywrightComputerTool: - def __init__(self, cdp_ws_url: str, width: int = 1280, height: int = 800): + def __init__(self, cdp_ws_url: str, width: int = 1200, height: int = 800): self.cdp_ws_url = cdp_ws_url self.width = width self.height = height diff --git a/pkg/templates/typescript/yutori-computer-use/README.md b/pkg/templates/typescript/yutori-computer-use/README.md index 60bcd255..625c94df 100644 --- a/pkg/templates/typescript/yutori-computer-use/README.md +++ b/pkg/templates/typescript/yutori-computer-use/README.md @@ -37,9 +37,9 @@ When enabled, the response will include a `replay_url` field with a link to view ## Viewport Configuration -Yutori n1 recommends a **1280×800 (WXGA, 16:10)** viewport for best grounding accuracy. +Yutori n1 recommends a **1280×800 (WXGA, 16:10)** viewport for best grounding accuracy. Kernel's closest supported viewport is **1200×800 at 25Hz**, which this template uses by default. -> **Note:** n1 outputs coordinates in a 1000×1000 relative space, which are automatically scaled to the actual viewport dimensions. +> **Note:** n1 outputs coordinates in a 1000×1000 relative space, which are automatically scaled to the actual viewport dimensions. The slight width difference (1200 vs 1280) should have minimal impact on accuracy. See [Kernel Viewport Documentation](https://www.kernel.sh/docs/browsers/viewport) for all supported configurations. diff --git a/pkg/templates/typescript/yutori-computer-use/loop.ts b/pkg/templates/typescript/yutori-computer-use/loop.ts index a4b73744..351aa9c1 100644 --- a/pkg/templates/typescript/yutori-computer-use/loop.ts +++ b/pkg/templates/typescript/yutori-computer-use/loop.ts @@ -53,9 +53,9 @@ interface SamplingLoopOptions { cdpWsUrl?: string; maxTokens?: number; maxIterations?: number; - /** Viewport width for coordinate scaling */ + /** Viewport width for coordinate scaling (default: 1200, closest to Yutori's 1280 recommendation) */ viewportWidth?: number; - /** Viewport height for coordinate scaling */ + /** Viewport height for coordinate scaling (default: 800 per Yutori recommendation) */ viewportHeight?: number; /** * Browser interaction mode: @@ -80,6 +80,7 @@ export async function samplingLoop({ cdpWsUrl, maxTokens = 4096, maxIterations = 50, + // Default viewport: 1200x800 (closest Kernel-supported size to Yutori's recommended 1280x800) viewportWidth = 1200, viewportHeight = 800, mode = 'computer_use', @@ -118,7 +119,6 @@ export async function samplingLoop({ }, ]; - // Add initial screenshot as observation (n1's required format) if (initialScreenshot.base64Image) { conversationMessages.push({ role: 'observation', diff --git a/pkg/templates/typescript/yutori-computer-use/session.ts b/pkg/templates/typescript/yutori-computer-use/session.ts index 644f8226..3a3c5675 100644 --- a/pkg/templates/typescript/yutori-computer-use/session.ts +++ b/pkg/templates/typescript/yutori-computer-use/session.ts @@ -16,9 +16,9 @@ export interface SessionOptions { recordReplay?: boolean; /** Grace period in seconds before stopping replay */ replayGracePeriod?: number; - /** Viewport width */ + /** Viewport width (default: 1200, closest to Yutori's 1280 recommendation) */ viewportWidth?: number; - /** Viewport height */ + /** Viewport height (default: 800 per Yutori recommendation) */ viewportHeight?: number; } @@ -117,6 +117,7 @@ export class KernelBrowserSession { viewport: { width: this.options.viewportWidth, height: this.options.viewportHeight, + refresh_rate: 25, }, }); diff --git a/pkg/templates/typescript/yutori-computer-use/tools/computer.ts b/pkg/templates/typescript/yutori-computer-use/tools/computer.ts index e9cdaf35..46fd76ef 100644 --- a/pkg/templates/typescript/yutori-computer-use/tools/computer.ts +++ b/pkg/templates/typescript/yutori-computer-use/tools/computer.ts @@ -98,7 +98,7 @@ export class ComputerTool { private width: number; private height: number; - constructor(kernel: Kernel, sessionId: string, width = 1280, height = 800) { + constructor(kernel: Kernel, sessionId: string, width = 1200, height = 800) { this.kernel = kernel; this.sessionId = sessionId; this.width = width; diff --git a/pkg/templates/typescript/yutori-computer-use/tools/playwright-computer.ts b/pkg/templates/typescript/yutori-computer-use/tools/playwright-computer.ts index 3062c44d..d6ce229d 100644 --- a/pkg/templates/typescript/yutori-computer-use/tools/playwright-computer.ts +++ b/pkg/templates/typescript/yutori-computer-use/tools/playwright-computer.ts @@ -36,7 +36,7 @@ export class PlaywrightComputerTool { private context: BrowserContext | null = null; private page: Page | null = null; - constructor(cdpWsUrl: string, width = 1280, height = 800) { + constructor(cdpWsUrl: string, width = 1200, height = 800) { this.cdpWsUrl = cdpWsUrl; this.width = width; this.height = height; From 510fadbb47fa9d9f06f0bb997d3c5d20fc28c462 Mon Sep 17 00:00:00 2001 From: Daniel Prevoznik Date: Sat, 31 Jan 2026 08:19:00 -0500 Subject: [PATCH 12/13] remove refresh rate for yutori template --- pkg/templates/python/yutori-computer-use/session.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pkg/templates/python/yutori-computer-use/session.py b/pkg/templates/python/yutori-computer-use/session.py index f4f2d011..1c449ec9 100644 --- a/pkg/templates/python/yutori-computer-use/session.py +++ b/pkg/templates/python/yutori-computer-use/session.py @@ -56,7 +56,6 @@ async def __aenter__(self) -> "KernelBrowserSession": viewport={ "width": self.viewport_width, "height": self.viewport_height, - "refresh_rate": 25, }, ) From 8e3233890500377e782da5511e267a602d2ea069 Mon Sep 17 00:00:00 2001 From: Daniel Prevoznik Date: Sat, 31 Jan 2026 08:38:21 -0500 Subject: [PATCH 13/13] Remove refresh_rate from TypeScript Yutori template viewport Aligns TypeScript template with Python template and other templates where refresh_rate was removed from viewport settings. --- pkg/templates/typescript/yutori-computer-use/session.ts | 1 - 1 file changed, 1 deletion(-) diff --git a/pkg/templates/typescript/yutori-computer-use/session.ts b/pkg/templates/typescript/yutori-computer-use/session.ts index 3a3c5675..2ba59697 100644 --- a/pkg/templates/typescript/yutori-computer-use/session.ts +++ b/pkg/templates/typescript/yutori-computer-use/session.ts @@ -117,7 +117,6 @@ export class KernelBrowserSession { viewport: { width: this.options.viewportWidth, height: this.options.viewportHeight, - refresh_rate: 25, }, });