Skip to content
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 5 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -119,9 +119,10 @@ Create an API key from the [Kernel dashboard](https://dashboard.onkernel.com).
- `advanced-sample` - Sample apps using advanced Kernel configs
- `stagehand` - Template with Stagehand SDK (TypeScript only)
- `browser-use` - Template with Browser Use SDK (Python only)
- `computer-use` - Anthropic Computer Use prompt loop
- `cua` - OpenAI Computer Using Agent (CUA) sample
- `gemini-cua` - Google Gemini CUA sample (TypeScript only)
- `anthropic-computer-use` - Anthropic Computer Use prompt loop
- `openai-computer-use` - OpenAI Computer Use Agent sample
- `gemini-computer-use` - Gemini Computer Use Agent sample (TypeScript only)
- `openagi-computer-use` - OpenAGI Lux computer-use models (Python only)
- `magnitude` - Magnitude framework sample (TypeScript only)

### App Deployment
Expand Down Expand Up @@ -393,7 +394,7 @@ kernel create --name my-scraper --language python --template browser-use
kernel create --name my-agent --language ts --template stagehand

# Create a Python Computer Use app
kernel create --name my-cu-app --language py --template computer-use
kernel create --name my-cu-app --language py --template anthropic-computer-use
```

### Deploy with environment variables
Expand Down
8 changes: 4 additions & 4 deletions cmd/create_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -440,10 +440,10 @@ func TestCreateCommand_InvalidLanguageTemplateCombinations(t *testing.T) {
errContains: "template not found: python/magnitude",
},
{
name: "gemini-cua not available for python",
name: "gemini-computer-use not available for python",
language: create.LanguagePython,
template: create.TemplateGeminiCUA,
errContains: "template not found: python/gemini-cua",
template: create.TemplateGeminiComputerUse,
errContains: "template not found: python/gemini-computer-use",
},
{
name: "invalid language",
Expand Down Expand Up @@ -558,7 +558,7 @@ func TestCreateCommand_TemplateNotAvailableForLanguage(t *testing.T) {
create.TemplateBrowserUse: {create.LanguageTypeScript},
create.TemplateStagehand: {create.LanguagePython},
create.TemplateMagnitude: {create.LanguagePython},
create.TemplateGeminiCUA: {create.LanguagePython},
create.TemplateGeminiComputerUse: {create.LanguagePython},
}

for template, unavailableLanguages := range unavailableCombinations {
Expand Down
4 changes: 2 additions & 2 deletions pkg/create/copy_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -179,10 +179,10 @@ func TestCopyTemplateFiles_PreservesDirectoryStructure(t *testing.T) {
require.NoError(t, err)

// Use a template that has subdirectories
err = CopyTemplateFiles(appPath, LanguageTypeScript, TemplateComputerUse)
err = CopyTemplateFiles(appPath, LanguageTypeScript, TemplateAnthropicComputerUse)
require.NoError(t, err)

// Verify that subdirectories are created (computer-use has src/ directory)
// Verify that subdirectories are created (anthropic-computer-use has src/ directory)
srcDir := filepath.Join(appPath, "src")
if _, err := os.Stat(srcDir); err == nil {
assert.DirExists(t, srcDir, "Subdirectories should be preserved")
Expand Down
84 changes: 53 additions & 31 deletions pkg/create/templates.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,15 @@ import (

// Template key constants
const (
TemplateSampleApp = "sample-app"
TemplateCaptchaSolver = "captcha-solver"
TemplateComputerUse = "computer-use"
TemplateCUA = "cua"
TemplateMagnitude = "magnitude"
TemplateGeminiCUA = "gemini-cua"
TemplateBrowserUse = "browser-use"
TemplateStagehand = "stagehand"
TemplateSampleApp = "sample-app"
TemplateCaptchaSolver = "captcha-solver"
TemplateAnthropicComputerUse = "anthropic-computer-use"
TemplateOpenAIComputerUse = "openai-computer-use"
TemplateMagnitude = "magnitude"
TemplateGeminiComputerUse = "gemini-computer-use"
TemplateBrowserUse = "browser-use"
TemplateStagehand = "stagehand"
TemplateOpenAGIComputerUse = "openagi-computer-use"
)

type TemplateInfo struct {
Expand All @@ -42,24 +43,24 @@ var Templates = map[string]TemplateInfo{
Description: "Demo of Kernel's auto-CAPTCHA solving capability",
Languages: []string{LanguageTypeScript, LanguagePython},
},
TemplateComputerUse: {
Name: "Computer Use",
Description: "Implements the Anthropic Computer Use SDK",
TemplateAnthropicComputerUse: {
Name: "Anthropic Computer Use",
Description: "Implements an Anthropic computer use agent",
Languages: []string{LanguageTypeScript, LanguagePython},
},
TemplateCUA: {
Name: "CUA Sample",
Description: "Implements a Computer Use Agent (OpenAI CUA) sample",
TemplateOpenAIComputerUse: {
Name: "OpenAI Computer Use",
Description: "Implements an OpenAI computer use agent",
Languages: []string{LanguageTypeScript, LanguagePython},
},
TemplateMagnitude: {
Name: "Magnitude",
Description: "Implements the Magnitude.run SDK",
Languages: []string{LanguageTypeScript},
},
TemplateGeminiCUA: {
Name: "Gemini CUA",
Description: "Implements Gemini 2.5 Computer Use Agent",
TemplateGeminiComputerUse: {
Name: "Gemini Computer Use",
Description: "Implements a Gemini computer use agent",
Languages: []string{LanguageTypeScript},
},
TemplateBrowserUse: {
Expand All @@ -72,6 +73,11 @@ var Templates = map[string]TemplateInfo{
Description: "Implements the Stagehand v3 SDK",
Languages: []string{LanguageTypeScript},
},
TemplateOpenAGIComputerUse: {
Name: "OpenAGI Computer Use",
Description: "Implements an OpenAGI computer use agent",
Languages: []string{LanguagePython},
},
}

// GetSupportedTemplatesForLanguage returns a list of all supported template names for a given language
Expand All @@ -87,12 +93,23 @@ func GetSupportedTemplatesForLanguage(language string) TemplateKeyValues {
}

sort.Slice(templates, func(i, j int) bool {
// Put computer-use first, then sort alphabetically
if templates[i].Key == TemplateComputerUse {
return true
// Put computer-use templates first (Anthropic/OpenAI/Gemini), then sort alphabetically.
priority := func(key string) int {
switch key {
case TemplateAnthropicComputerUse:
return 0
case TemplateOpenAIComputerUse:
return 1
case TemplateGeminiComputerUse:
return 2
default:
return 10
}
}
if templates[j].Key == TemplateComputerUse {
return false

pi, pj := priority(templates[i].Key), priority(templates[j].Key)
if pi != pj {
return pi < pj
}
return templates[i].Key < templates[j].Key
})
Expand Down Expand Up @@ -152,22 +169,22 @@ var Commands = map[string]map[string]DeployConfig{
NeedsEnvFile: true,
InvokeCommand: `kernel invoke ts-stagehand teamsize-task --payload '{"company": "Kernel"}'`,
},
TemplateComputerUse: {
TemplateAnthropicComputerUse: {
EntryPoint: "index.ts",
NeedsEnvFile: true,
InvokeCommand: `kernel invoke ts-cu cu-task --payload '{"query": "Return the first url of a search result for NYC restaurant reviews Pete Wells"}'`,
InvokeCommand: `kernel invoke ts-anthropic-cua cua-task --payload '{"query": "Return the first url of a search result for NYC restaurant reviews Pete Wells"}'`,
},
TemplateMagnitude: {
EntryPoint: "index.ts",
NeedsEnvFile: true,
InvokeCommand: `kernel invoke ts-magnitude mag-url-extract --payload '{"url": "https://en.wikipedia.org/wiki/Special:Random"}'`,
},
TemplateCUA: {
TemplateOpenAIComputerUse: {
EntryPoint: "index.ts",
NeedsEnvFile: true,
InvokeCommand: `kernel invoke ts-cua cua-task --payload '{"task": "Go to https://news.ycombinator.com and get the top 5 articles"}'`,
InvokeCommand: `kernel invoke ts-openai-cua cua-task --payload '{"task": "Go to https://news.ycombinator.com and get the top 5 articles"}'`,
},
TemplateGeminiCUA: {
TemplateGeminiComputerUse: {
EntryPoint: "index.ts",
NeedsEnvFile: true,
InvokeCommand: "kernel invoke ts-gemini-cua gemini-cua-task",
Expand All @@ -189,15 +206,20 @@ var Commands = map[string]map[string]DeployConfig{
NeedsEnvFile: true,
InvokeCommand: `kernel invoke python-bu bu-task --payload '{"task": "Compare the price of gpt-4o and DeepSeek-V3"}'`,
},
TemplateComputerUse: {
TemplateAnthropicComputerUse: {
EntryPoint: "main.py",
NeedsEnvFile: true,
InvokeCommand: `kernel invoke python-anthropic-cua cua-task --payload '{"query": "Return the first url of a search result for NYC restaurant reviews Pete Wells"}'`,
},
TemplateOpenAIComputerUse: {
EntryPoint: "main.py",
NeedsEnvFile: true,
InvokeCommand: `kernel invoke python-cu cu-task --payload '{"query": "Return the first url of a search result for NYC restaurant reviews Pete Wells"}'`,
InvokeCommand: `kernel invoke python-openai-cua cua-task --payload '{"task": "Go to https://news.ycombinator.com and get the top 5 articles"}'`,
},
Comment thread
dprevoznik marked this conversation as resolved.
TemplateCUA: {
TemplateOpenAGIComputerUse: {
EntryPoint: "main.py",
NeedsEnvFile: true,
InvokeCommand: `kernel invoke python-cua cua-task --payload '{"task": "Go to https://news.ycombinator.com and get the top 5 articles"}'`,
InvokeCommand: `kernel invoke python-openagi-cua openagi-default-task -p '{"instruction": "Navigate to https://agiopen.org and click the What is Computer Use? button", "record_replay": "True"}'`,
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug: JSON string instead of boolean for record_replay parameter

The record_replay parameter is passed as a JSON string "True" instead of a JSON boolean true. The TypedDict and dataclass both expect a boolean type. While "True" (string) happens to be truthy so recording is enabled, using "False" to disable recording will fail silently — the string "False" is also truthy in Python, so recording will still be enabled. The correct JSON syntax would be "record_replay": true or "record_replay": false (lowercase, no quotes).

Additional Locations (1)

Fix in Cursor Fix in Web

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

oh wait this might be valid

},
},
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Kernel Python Sample App - Computer Use
# Kernel Python Sample App - Anthropic Computer Use

This is a simple Kernel application that implements a prompt loop using Anthropic Computer Use.

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,10 @@ class QueryOutput(TypedDict):
raise ValueError("ANTHROPIC_API_KEY is not set")

client = Kernel()
app = kernel.App("python-cu")
app = kernel.App("python-anthropic-cua")


@app.action("cu-task")
async def cu_task(
@app.action("cua-task")
async def cua_task(
ctx: kernel.KernelContext,
payload: QueryInput,
) -> QueryOutput:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[project]
name = "python-cu"
name = "python-anthropic-cua"
version = "0.1.0"
description = "Kernel reference app for Anthropic Computer Use"
requires-python = ">=3.9"
Expand Down

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 5 additions & 0 deletions pkg/templates/python/openagi-computer-use/.env.example
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
## OpenAGI API key (required)
OAGI_API_KEY=

## Optional override (defaults to https://api.agiopen.org)
OAGI_BASE_URL=https://api.agiopen.org
79 changes: 79 additions & 0 deletions pkg/templates/python/openagi-computer-use/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
# Kernel Python Sample App - OpenAGI Computer Use
Comment thread
dprevoznik marked this conversation as resolved.

This is a Kernel application that demonstrates using OpenAGI's Lux computer-use models for browser automation.

## Overview

This template provides two agent types from the [OpenAGI SDK](https://github.com/onkernel/kernel-oagi):

### AsyncDefaultAgent
Best for high-level tasks with immediate execution. Supports two models:
- `lux-actor-1`: Fast execution (~1s/step), simple linear tasks
- `lux-thinker-1`: Complex planning, comparison tasks, handling ambiguity

### TaskerAgent
Best for structured workflows with predefined steps (todos).

## Setup

1. Get your API keys:
- **Kernel**: [dashboard.onkernel.com](https://dashboard.onkernel.com)
- **OpenAGI**: [developer.agiopen.org](https://developer.agiopen.org)

2. Deploy the app:
```bash
kernel login
cp .env.example .env
kernel deploy main.py --env-file .env
```

## Usage

### AsyncDefaultAgent

Execute high-level tasks with optional model selection:

```bash
# Default model (lux-actor-1)
kernel invoke python-openagi-cua openagi-default-task \
-p '{"instruction": "Navigate to https://agiopen.org and click the What is Computer Use? button"}'

# With specific model
kernel invoke python-openagi-cua openagi-default-task \
-p '{"instruction": "Navigate to https://developer.agiopen.org/docs and find the Lux model pricing page.", "model": "lux-thinker-1"}'
```

### TaskerAgent

Execute structured workflows with predefined steps:

```bash
kernel invoke python-openagi-cua openagi-tasker-task \
-p '{"task": "Navigate to OAGI documentation and navigate to the What is Computer Use? section", "todos": ["Go to https://agiopen.org", "Click on the What is Computer Use? button", "Highlight point number 2 about computer use."]}'
```

## Recording Replays

> **Note:** Replay recording is only available to Kernel users on paid plans.

Both actions support optional video replay recording. Add `"record_replay": "True"` to your payload to capture a video of the browser session:

```bash
kernel invoke python-openagi-cua openagi-default-task \
-p '{"instruction": "Navigate to https://agiopen.org", "record_replay": "True"}'
```

When enabled, the response will include a `replay_url` field with a link to view the recorded session.

## Model Selection Guide

| Model | Best For | Avoid When |
|-------|----------|------------|
| `lux-actor-1` | Fast execution, simple linear tasks (10-20 steps) | Complex reasoning, comparison tasks |
| `lux-thinker-1` | Complex planning, comparison tasks, handling ambiguity | Low latency needs, simple click-paths |

## Resources

- [OpenAGI Documentation](https://developer.agiopen.org)
- [Kernel Documentation](https://onkernel.com/docs/quickstart)
- [Kernel + OpenAGI Template Repository](https://github.com/onkernel/kernel-oagi)
Loading