Skip to content
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions pkg/create/templates.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ const (
TemplateOpenAGIComputerUse = "openagi-computer-use"
TemplateClaudeAgentSDK = "claude-agent-sdk"
TemplateYutoriComputerUse = "yutori"
TemplateUnifiedCUA = "cua"
)

type TemplateInfo struct {
Expand Down Expand Up @@ -90,6 +91,11 @@ var Templates = map[string]TemplateInfo{
Description: "Implements a Yutori n1 computer use agent",
Languages: []string{LanguageTypeScript, LanguagePython},
},
TemplateUnifiedCUA: {
Name: "Unified CUA",
Description: "Multi-provider computer use agent with Anthropic/OpenAI/Gemini fallback",
Languages: []string{LanguageTypeScript, LanguagePython},
},
}

// GetSupportedTemplatesForLanguage returns a list of all supported template names for a given language
Expand Down Expand Up @@ -213,6 +219,11 @@ var Commands = map[string]map[string]DeployConfig{
NeedsEnvFile: true,
InvokeCommand: `kernel invoke ts-yutori-cua cua-task --payload '{"query": "Navigate to https://example.com and describe the page"}'`,
},
TemplateUnifiedCUA: {
EntryPoint: "index.ts",
NeedsEnvFile: true,
InvokeCommand: `kernel invoke ts-cua cua-task --payload '{"query": "Go to https://news.ycombinator.com and get the top 5 stories"}'`,
},
},
LanguagePython: {
TemplateSampleApp: {
Expand Down Expand Up @@ -260,6 +271,11 @@ var Commands = map[string]map[string]DeployConfig{
NeedsEnvFile: true,
InvokeCommand: `kernel invoke python-yutori-cua cua-task --payload '{"query": "Navigate to https://example.com and describe the page"}'`,
},
TemplateUnifiedCUA: {
EntryPoint: "main.py",
NeedsEnvFile: true,
InvokeCommand: `kernel invoke python-cua cua-task --payload '{"query": "Go to https://news.ycombinator.com and get the top 5 stories"}'`,
},
},
}

Expand Down
14 changes: 14 additions & 0 deletions pkg/templates/python/cua/.env.example
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# Copy this file to .env and fill in your API keys.
# Only the key for your chosen provider is required.

# Primary provider: "anthropic", "openai", or "gemini"
CUA_PROVIDER=anthropic

# Comma-separated fallback order (optional).
# If the primary provider fails, these are tried in order.
# CUA_FALLBACK_PROVIDERS=openai,gemini

# Provider API keys — set the one(s) you plan to use
ANTHROPIC_API_KEY=your_anthropic_api_key_here
OPENAI_API_KEY=your_openai_api_key_here
GOOGLE_API_KEY=your_google_api_key_here
88 changes: 88 additions & 0 deletions pkg/templates/python/cua/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
# Unified CUA Template

A multi-provider Computer Use Agent (CUA) template for [Kernel](https://kernel.sh). Supports **Anthropic**, **OpenAI**, and **Google Gemini** as interchangeable backends with automatic fallback.

## Quick start

### 1. Install dependencies

```bash
uv sync
```

### 2. Configure environment

Copy the example env file and add your API keys:

```bash
cp .env.example .env
```

Set `CUA_PROVIDER` to your preferred provider and add the matching API key:


| Provider | Env var for key | Model used |
| ----------- | ------------------- | ----------------------------------------- |
| `anthropic` | `ANTHROPIC_API_KEY` | `claude-sonnet-4-6` |
| `openai` | `OPENAI_API_KEY` | `gpt-5.4` |
| `gemini` | `GOOGLE_API_KEY` | `gemini-2.5-computer-use-preview-10-2025` |


### 3. Deploy to Kernel

```bash
kernel deploy main.py --env-file .env
```

### 4. Invoke

```bash
kernel invoke python-cua cua-task --payload '{"query": "Go to https://news.ycombinator.com and get the top 5 stories"}'
```

## Multi-provider fallback

Set `CUA_FALLBACK_PROVIDERS` to automatically try another provider if the primary fails:

```env
CUA_PROVIDER=anthropic
CUA_FALLBACK_PROVIDERS=openai,gemini
```

This will try Anthropic first, then OpenAI, then Gemini. Only providers with valid API keys are used.

## Replay recording

Pass `record_replay: true` in the payload to capture a video replay of the browser session:

```bash
kernel invoke python-cua cua-task --payload '{"query": "Navigate to example.com", "record_replay": true}'
```

The response will include a `replay_url` you can open in your browser.

## Project structure

```
main.py — Kernel app entrypoint
session.py — Browser session lifecycle with replay support
providers/
__init__.py — Provider factory and fallback logic
anthropic.py — Anthropic Claude adapter
openai.py — OpenAI GPT adapter
gemini.py — Google Gemini adapter
```

## Customization

Each provider adapter is self-contained. To customize a provider's behavior (system prompt, model, tool handling), edit the corresponding file in `providers/`.

To add a new provider, create a new file that implements the `CuaProvider` protocol and register it in `providers/__init__.py`.

## Resources

- [Kernel Docs](https://docs.kernel.sh)
- [Anthropic Computer Use](https://docs.anthropic.com/en/docs/agents-and-tools/computer-use)
- [OpenAI Computer Use](https://platform.openai.com/docs/guides/computer-use)
- [Google Gemini Computer Use](https://ai.google.dev/gemini-api/docs/computer-use)

31 changes: 31 additions & 0 deletions pkg/templates/python/cua/_gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# Python
__pycache__/
*.py[cod]
*$py.class
*.so
*.egg-info/
dist/
build/

# Virtual environments
.venv/
venv/
env/

# Environment
.env
.env.local
.env.*.local

# IDE
.vscode/
.idea/
*.swp
*.swo

# OS
.DS_Store
Thumbs.db

# Logs
*.log
105 changes: 105 additions & 0 deletions pkg/templates/python/cua/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
"""
Unified CUA (Computer Use Agent) template with multi-provider support.

Supports Anthropic, OpenAI, and Gemini as interchangeable providers.
Configure via environment variables:
CUA_PROVIDER — primary provider ("anthropic", "openai", or "gemini")
CUA_FALLBACK_PROVIDERS — comma-separated fallback order (optional)

Each provider requires its own API key:
ANTHROPIC_API_KEY, OPENAI_API_KEY, GOOGLE_API_KEY
"""

from __future__ import annotations

import asyncio
from typing import Literal, TypedDict

import kernel
from kernel import Kernel

from providers import resolve_providers, run_with_fallback, TaskOptions
from session import KernelBrowserSession, SessionOptions

kernel_client = Kernel()
app = kernel.App("python-cua")


class CuaInput(TypedDict, total=False):
query: str
provider: Literal["anthropic", "openai", "gemini"]
model: str
record_replay: bool


class CuaOutput(TypedDict, total=False):
result: str
provider: str
replay_url: str


# Provider resolution is deferred to the action handler because env vars
# are not available during Hypeman's build/discovery phase.
_providers: list | None = None


def _get_providers():
global _providers
if _providers is None:
_providers = resolve_providers()
print(f"Configured providers: {' -> '.join(p.name for p in _providers)}")
return _providers


@app.action("cua-task")
async def cua_task(ctx: kernel.KernelContext, payload: CuaInput | None = None) -> CuaOutput:
if not payload or not payload.get("query"):
raise ValueError('Query is required. Payload must include: {"query": "your task description"}')

providers = _get_providers()

# Per-request provider override: move requested provider to front
if payload.get("provider"):
requested = next((p for p in providers if p.name == payload["provider"]), None)
if requested:
providers = [requested] + [p for p in providers if p is not requested]

session = KernelBrowserSession(
kernel_client,
SessionOptions(
invocation_id=ctx.invocation_id,
stealth=True,
record_replay=payload.get("record_replay", False),
),
)

await session.start()
print(f"Live view: {session.live_view_url}")

try:
task_result = await run_with_fallback(
providers,
TaskOptions(
query=payload["query"],
kernel=kernel_client,
session_id=session.session_id,
model=payload.get("model"),
viewport_width=session.opts.viewport_width,
viewport_height=session.opts.viewport_height,
),
)

session_info = await session.stop()

output: CuaOutput = {
"result": task_result.result,
"provider": task_result.provider,
}
if session_info.replay_view_url:
output["replay_url"] = session_info.replay_view_url

return output

except Exception:
await session.stop()
raise
108 changes: 108 additions & 0 deletions pkg/templates/python/cua/providers/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
"""
Provider factory with automatic fallback.

Resolution order:
1. CUA_PROVIDER env var (required)
2. CUA_FALLBACK_PROVIDERS env var (optional, comma-separated)
"""

from __future__ import annotations

import os
from dataclasses import dataclass
from typing import Protocol

from kernel import Kernel


@dataclass
class TaskOptions:
query: str
kernel: Kernel
session_id: str
model: str | None = None
viewport_width: int = 1280
viewport_height: int = 800


@dataclass
class TaskResult:
result: str
provider: str


class CuaProvider(Protocol):
@property
def name(self) -> str: ...
def is_configured(self) -> bool: ...
async def run_task(self, options: TaskOptions) -> TaskResult: ...


def _build_provider(name: str) -> CuaProvider | None:
if name == "anthropic":
from .anthropic import AnthropicProvider
return AnthropicProvider()
if name == "openai":
from .openai import OpenAIProvider
return OpenAIProvider()
if name == "gemini":
from .gemini import GeminiProvider
return GeminiProvider()
return None


def resolve_providers() -> list[CuaProvider]:
"""Build the ordered list of providers to try."""
primary = os.environ.get("CUA_PROVIDER", "").strip().lower()
fallbacks = [
s.strip().lower()
for s in os.environ.get("CUA_FALLBACK_PROVIDERS", "").split(",")
if s.strip()
]

order = ([primary] if primary else []) + fallbacks

seen: set[str] = set()
providers: list[CuaProvider] = []

for name in order:
if name in seen:
continue
seen.add(name)

provider = _build_provider(name)
if provider is None:
print(f'Warning: Unknown provider "{name}", skipping.')
continue
if not provider.is_configured():
print(f'Warning: Provider "{name}" missing API key, skipping.')
continue
providers.append(provider)

if not providers:
raise RuntimeError(
"No CUA provider is configured. "
"Set CUA_PROVIDER to one of: anthropic, openai, gemini, "
"and provide the matching API key."
)

return providers


async def run_with_fallback(
providers: list[CuaProvider],
options: TaskOptions,
) -> TaskResult:
"""Run a CUA task, trying each provider in order until one succeeds."""
errors: list[tuple[str, Exception]] = []

for provider in providers:
try:
print(f"Attempting provider: {provider.name}")
return await provider.run_task(options)
except Exception as exc:
print(f'Provider "{provider.name}" failed: {exc}')
errors.append((provider.name, exc))

summary = "\n".join(f" {name}: {exc}" for name, exc in errors)
raise RuntimeError(f"All providers failed:\n{summary}")
Loading
Loading