Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 18 additions & 5 deletions apps/agent/entrypoints/sidepanel/index/useNotifyActiveTab.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -11,20 +11,33 @@ export const useNotifyActiveTab = ({
status: ChatStatus
conversationId: string
}) => {
// Ref to store the last active tab ID
const lastTabIdRef = useRef<number | null>(null)
const pageToTabRef = useRef<Map<number, number>>(new Map())

const lastMessage = messages?.[messages.length - 1]

const latestTool =
lastMessage?.parts?.findLast((part) => part?.type?.startsWith('tool-')) ??
null

const latestTabId = (
latestTool as ToolUIPart & { input?: { tabId?: number } }
)?.input?.tabId
const latestInput = (
latestTool as ToolUIPart & {
input?: { tabId?: number; page?: number; pageId?: number }
}
)?.input

const latestPageId = latestInput?.pageId ?? latestInput?.page
const latestTabId =
latestInput?.tabId ??
(latestPageId !== undefined
? pageToTabRef.current.get(latestPageId)
: undefined)

useEffect(() => {
if (latestInput?.tabId && latestPageId !== undefined) {
pageToTabRef.current.set(latestPageId, latestInput.tabId)
}

const isStreaming = status === 'streaming'
const previousTabId = lastTabIdRef.current

Expand Down Expand Up @@ -65,7 +78,7 @@ export const useNotifyActiveTab = ({
if (latestTabId) {
lastTabIdRef.current = latestTabId
}
}, [conversationId, status, latestTabId])
}, [conversationId, status, latestInput?.tabId, latestPageId, latestTabId])

return
}
20 changes: 19 additions & 1 deletion apps/server/src/agent/gemini-agent.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,10 @@ import {
} from '@google/gemini-cli-core'
import type { Content, Part } from '@google/genai'
import type { BrowserContext } from '../api/types'
import type { Browser } from '../browser/browser'
import { logger } from '../lib/logger'
import { Sentry } from '../lib/sentry'
import { enrichToolInputWithTabId } from '../tools/framework'
import { registry } from '../tools/registry'
import { AgentExecutionError } from './errors'
import { buildSystemPrompt } from './prompt'
Expand Down Expand Up @@ -63,6 +65,7 @@ export class GeminiAgent {
private client: GeminiClient,
private geminiConfig: GeminiConfig,
private contentGenerator: VercelAIContentGenerator,
private browser: Browser,
private conversationId: string,
) {}

Expand All @@ -77,6 +80,7 @@ export class GeminiAgent {
static async create(
config: ResolvedAgentConfig,
mcpServers: Record<string, MCPServerConfig>,
browser: Browser,
): Promise<GeminiAgent> {
// Build model string with upstream provider if available
const modelString = config.upstreamProvider
Expand Down Expand Up @@ -183,6 +187,7 @@ export class GeminiAgent {
client,
geminiConfig,
contentGenerator,
browser,
config.conversationId,
)
}
Expand Down Expand Up @@ -338,9 +343,22 @@ export class GeminiAgent {
for (const requestInfo of toolCallRequests) {
if (abortSignal.aborted) break

const toolInput = await enrichToolInputWithTabId(
requestInfo.args,
this.browser,
)

if (uiStream && toolInput !== requestInfo.args) {
await uiStream.writeToolCall(
requestInfo.callId,
requestInfo.name,
toolInput,
)
}

await this.toolHooks?.onBeforeToolCall?.(
requestInfo.name,
requestInfo.args,
toolInput,
browserContext,
)

Expand Down
4 changes: 3 additions & 1 deletion apps/server/src/agent/session.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import type { MCPServerConfig } from '@google/gemini-cli-core'
import type { Browser } from '../browser/browser'
import { logger } from '../lib/logger'

import { GeminiAgent } from './gemini-agent'
Expand All @@ -19,6 +20,7 @@ export class SessionManager {
async getOrCreate(
config: ResolvedAgentConfig,
mcpServers: Record<string, MCPServerConfig>,
browser: Browser,
): Promise<Session> {
const existing = this.sessions.get(config.conversationId)

Expand All @@ -30,7 +32,7 @@ export class SessionManager {
return existing
}

const agent = await GeminiAgent.create(config, mcpServers)
const agent = await GeminiAgent.create(config, mcpServers, browser)
const session: Session = { agent }
this.sessions.set(config.conversationId, session)

Expand Down
30 changes: 30 additions & 0 deletions apps/server/src/agent/tool-loop/glow-enrichment.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import type { UIMessageChunk } from 'ai'
import type { Browser } from '../../browser/browser'
import { enrichToolInputWithTabId } from '../../tools/framework'

type ToolInputAvailableChunk = UIMessageChunk & {
type: 'tool-input-available'
input?: unknown
}

function isToolInputAvailableChunk(
chunk: UIMessageChunk,
): chunk is ToolInputAvailableChunk {
return chunk.type === 'tool-input-available'
}

export async function enrichToolInputChunkForGlow(
chunk: UIMessageChunk,
browser: Browser,
): Promise<UIMessageChunk> {
if (!isToolInputAvailableChunk(chunk)) {
return chunk
}

const enrichedInput = await enrichToolInputWithTabId(chunk.input, browser)
if (enrichedInput === chunk.input) {
return chunk
}

return { ...chunk, input: enrichedInput } as UIMessageChunk
}
27 changes: 24 additions & 3 deletions apps/server/src/agent/tool-loop/service.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
import { mkdir } from 'node:fs/promises'
import path from 'node:path'
import { createAgentUIStreamResponse, type UIMessage } from 'ai'
import {
createAgentUIStream,
createUIMessageStreamResponse,
type UIMessage,
type UIMessageChunk,
} from 'ai'
import type { ChatRequest } from '../../api/types'
import type { Browser } from '../../browser/browser'
import type { KlavisClient } from '../../lib/clients/klavis/klavis-client'
Expand All @@ -10,6 +15,7 @@ import type { ToolRegistry } from '../../tools/tool-registry'
import type { ResolvedAgentConfig } from '../types'
import { AiSdkAgent } from './ai-sdk-agent'
import { formatUserMessage } from './format-message'
import { enrichToolInputChunkForGlow } from './glow-enrichment'
import type { SessionStore } from './session-store'

export interface ChatV2ServiceDeps {
Expand Down Expand Up @@ -128,8 +134,7 @@ export class ChatV2Service {
const userContent = formatUserMessage(request.message, messageContext)
session.agent.appendUserMessage(userContent)

// Stream the agent response
return createAgentUIStreamResponse({
const stream = await createAgentUIStream({
agent: session.agent.toolLoopAgent,
uiMessages: session.agent.messages,
abortSignal,
Expand All @@ -149,6 +154,22 @@ export class ChatV2Service {
}
},
})

const enrichedStream = stream.pipeThrough(
new TransformStream<UIMessageChunk, UIMessageChunk>({
transform: async (chunk, controller) => {
const enrichedChunk = await enrichToolInputChunkForGlow(
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if CDP disconnects, enrichToolInputChunkForGlow will throw and break the stream

Suggested change
const enrichedChunk = await enrichToolInputChunkForGlow(
const enrichedChunk = await enrichToolInputChunkForGlow(
chunk,
this.deps.browser,
).catch(() => chunk)
Prompt To Fix With AI
This is a comment left during a code review.
Path: apps/server/src/agent/tool-loop/service.ts
Line: 161

Comment:
if CDP disconnects, `enrichToolInputChunkForGlow` will throw and break the stream

```suggestion
          const enrichedChunk = await enrichToolInputChunkForGlow(
            chunk,
            this.deps.browser,
          ).catch(() => chunk)
```

How can I resolve this? If you propose a fix, please make it concise.

chunk,
this.deps.browser,
)
controller.enqueue(enrichedChunk)
},
}),
)

return createUIMessageStreamResponse({
stream: enrichedStream,
})
}

async deleteSession(
Expand Down
6 changes: 5 additions & 1 deletion apps/server/src/api/services/chat-service.ts
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,11 @@ export class ChatService {

const browserContext = await this.resolvePageIds(request.browserContext)

const session = await sessionManager.getOrCreate(agentConfig, mcpServers)
const session = await sessionManager.getOrCreate(
agentConfig,
mcpServers,
this.deps.browser,
)
await session.agent.execute(
request.message,
rawStream,
Expand Down
5 changes: 5 additions & 0 deletions apps/server/src/browser/browser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,11 @@ export class Browser {
return tabToPage
}

async resolvePageIdToTabId(pageId: number): Promise<number | undefined> {
await this.listPages()
return this.pages.get(pageId)?.tabId
}

async getActivePage(): Promise<PageInfo | null> {
const result = await this.cdp.Browser.getActiveTab()

Expand Down
38 changes: 38 additions & 0 deletions apps/server/src/tools/framework.ts
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,44 @@ export function defineTool<T extends z.ZodType>(config: {
return config as ToolDefinition
}

function getNumberField(
value: Record<string, unknown>,
key: string,
): number | undefined {
const candidate = value[key]
if (typeof candidate !== 'number' || !Number.isFinite(candidate)) {
return undefined
}
return candidate
}

export async function enrichToolInputWithTabId(
args: unknown,
browser: Browser,
): Promise<unknown> {
if (!args || typeof args !== 'object' || Array.isArray(args)) {
return args
}

const input = args as Record<string, unknown>
if (getNumberField(input, 'tabId') !== undefined) {
return args
}

const pageId =
getNumberField(input, 'pageId') ?? getNumberField(input, 'page')
if (pageId === undefined) {
return args
}

const tabId = await browser.resolvePageIdToTabId(pageId)
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if CDP disconnects, resolvePageIdToTabId will throw during listPages(). wrap in try-catch to gracefully handle

Suggested change
const tabId = await browser.resolvePageIdToTabId(pageId)
const tabId = await browser.resolvePageIdToTabId(pageId).catch(() => undefined)
Prompt To Fix With AI
This is a comment left during a code review.
Path: apps/server/src/tools/framework.ts
Line: 65

Comment:
if CDP disconnects, `resolvePageIdToTabId` will throw during `listPages()`. wrap in try-catch to gracefully handle

```suggestion
  const tabId = await browser.resolvePageIdToTabId(pageId).catch(() => undefined)
```

How can I resolve this? If you propose a fix, please make it concise.

if (tabId === undefined) {
return args
}

return { ...input, tabId }
}

export async function executeTool(
tool: ToolDefinition,
args: unknown,
Expand Down
65 changes: 65 additions & 0 deletions apps/server/tests/agent/tool-loop/glow-enrichment.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
import { describe, expect, it } from 'bun:test'
import type { UIMessageChunk } from 'ai'
import { enrichToolInputChunkForGlow } from '../../../src/agent/tool-loop/glow-enrichment'
import type { Browser } from '../../../src/browser/browser'

describe('enrichToolInputChunkForGlow', () => {
const browser = {
resolvePageIdToTabId: async (pageId: number) => {
if (pageId === 5) return 99
return undefined
},
} as unknown as Browser

it('returns non-tool-input chunk unchanged', async () => {
const chunk = {
type: 'text-delta',
id: '0',
delta: 'hello',
} as UIMessageChunk

const result = await enrichToolInputChunkForGlow(chunk, browser)
expect(result).toBe(chunk)
})

it('enriches tool-input-available chunk with tabId from page', async () => {
const chunk = {
type: 'tool-input-available',
toolCallId: 'call_1',
toolName: 'click',
input: { page: 5, element: 12 },
} as UIMessageChunk

const result = await enrichToolInputChunkForGlow(chunk, browser)
expect(result).toEqual({
type: 'tool-input-available',
toolCallId: 'call_1',
toolName: 'click',
input: { page: 5, element: 12, tabId: 99 },
})
})

it('keeps chunk unchanged when tabId already exists', async () => {
const chunk = {
type: 'tool-input-available',
toolCallId: 'call_2',
toolName: 'click',
input: { tabId: 7, page: 5 },
} as UIMessageChunk

const result = await enrichToolInputChunkForGlow(chunk, browser)
expect(result).toBe(chunk)
})

it('keeps chunk unchanged when page cannot be resolved', async () => {
const chunk = {
type: 'tool-input-available',
toolCallId: 'call_3',
toolName: 'click',
input: { page: 404 },
} as UIMessageChunk

const result = await enrichToolInputChunkForGlow(chunk, browser)
expect(result).toBe(chunk)
})
})
43 changes: 43 additions & 0 deletions apps/server/tests/tools/framework.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
import { describe, expect, it } from 'bun:test'
import type { Browser } from '../../src/browser/browser'
import { enrichToolInputWithTabId } from '../../src/tools/framework'

describe('enrichToolInputWithTabId', () => {
const browser = {
resolvePageIdToTabId: async (pageId: number) => {
if (pageId === 7) return 42
if (pageId === 8) return 84
return undefined
},
} as unknown as Browser

it('returns non-object input unchanged', async () => {
const input = 'not-an-object'
const result = await enrichToolInputWithTabId(input, browser)
expect(result).toBe(input)
})

it('returns input unchanged when tabId is already present', async () => {
const input = { tabId: 11, page: 7 }
const result = await enrichToolInputWithTabId(input, browser)
expect(result).toBe(input)
})

it('adds tabId when page is present', async () => {
const input = { page: 7, element: 13 }
const result = await enrichToolInputWithTabId(input, browser)
expect(result).toEqual({ page: 7, element: 13, tabId: 42 })
})

it('adds tabId when pageId is present', async () => {
const input = { pageId: 8 }
const result = await enrichToolInputWithTabId(input, browser)
expect(result).toEqual({ pageId: 8, tabId: 84 })
})

it('returns input unchanged when page cannot be resolved', async () => {
const input = { page: 999 }
const result = await enrichToolInputWithTabId(input, browser)
expect(result).toBe(input)
})
})