Merge origin/main into brandon/cli-env-filter

brandonkachen · brandonkachen · commit cd324a25fda7 · 2025-10-17T14:44:22.000-07:00
diff --git a/.agents/base2/base2-gpt-5-planner.ts b/.agents/base2/base2-gpt-5-planner.ts
@@ -15,8 +15,10 @@ const definition: SecretAgentDefinition = {
   toolNames: ['spawn_agents', 'read_files'],
 
   spawnableAgents: buildArray(
-    'file-picker',
-    'find-all-referencer',
+    'file-picker-max',
+    'code-searcher',
+    'directory-lister',
+    'glob-matcher',
     'researcher-web',
     'researcher-docs',
     'commander',
diff --git a/.agents/base2/base2-gpt-5-worker.ts b/.agents/base2/base2-gpt-5-worker.ts
@@ -10,7 +10,9 @@ const definition: SecretAgentDefinition = {
   model: 'openai/gpt-5',
   spawnableAgents: buildArray(
     'file-picker',
-    'find-all-referencer',
+    'code-searcher',
+    'directory-lister',
+    'glob-matcher',
     'researcher-web',
     'researcher-docs',
     'commander',
diff --git a/.agents/base2/base2.ts b/.agents/base2/base2.ts
@@ -134,7 +134,7 @@ The user asks you to implement a new feature. You respond in multiple steps:
 6. Spawn a validator to run validation commands (tests, typechecks, etc.) to ensure the changes are correct.
 7. Inform the user that you have completed the task in one sentence without a final summary.`,
 
-    stepPrompt: `Don't forget to spawn agents that could help, especially: the file-picker-max and find-all-referencer to get codebase context, the generate-plan agent to create a plan, and the reviewer to review changes.`,
+    stepPrompt: `Don't forget to spawn agents that could help, especially: the file-picker-max and find-all-referencer to get codebase context, the generate-plan agent to create a plan, code-reviewer to review changes, and the validator to run validation commands.`,
 
     handleSteps: function* ({ prompt, params }) {
       let steps = 0
diff --git a/backend/src/tools/batch-str-replace.ts b/backend/src/tools/batch-str-replace.ts
@@ -344,6 +344,7 @@ async function executeSingleStrReplace(
       toolResults.push(toolResultPart)
       onResponseChunk({
         type: 'tool_result',
+        toolName: toolResultPart.toolName,
         toolCallId: toolCall.toolCallId,
         toolName: 'str_replace',
         output: toolResult,
@@ -491,6 +492,7 @@ function handleStrReplaceError(params: {
   toolResults.push(errorResult)
   onResponseChunk({
     type: 'tool_result',
+    toolName: errorResult.toolName,
     toolCallId: toolCall.toolCallId,
     toolName: 'str_replace',
     output: errorResult.output,
@@ -878,6 +880,7 @@ async function applyBenchifyResultSafely(params: {
     // Notify client about the benchify update
     onResponseChunk({
       type: 'tool_result',
+      toolName: benchifyToolResult.toolName,
       toolCallId: relatedToolCall.toolCallId,
       toolName: 'str_replace',
       output: benchifyToolResult.output,
diff --git a/backend/src/tools/tool-executor.ts b/backend/src/tools/tool-executor.ts
@@ -283,6 +283,7 @@ export function executeToolCall<T extends ToolName>(
     onResponseChunk({
       type: 'tool_result',
       toolCallId: toolResult.toolCallId,
+      toolName: toolResult.toolName,
       output: toolResult.output,
     })
 
@@ -508,6 +509,7 @@ export async function executeCustomToolCall(
 
       onResponseChunk({
         type: 'tool_result',
+        toolName: toolResult.toolName,
         toolCallId: toolResult.toolCallId,
         output: toolResult.output,
       })
diff --git a/common/src/constants/analytics-events.ts b/common/src/constants/analytics-events.ts
@@ -96,8 +96,8 @@ export enum AnalyticsEvent {
   CHAT_COMPLETIONS_AUTH_ERROR = 'api.chat_completions_auth_error',
   CHAT_COMPLETIONS_VALIDATION_ERROR = 'api.chat_completions_validation_error',
   CHAT_COMPLETIONS_INSUFFICIENT_CREDITS = 'api.chat_completions_insufficient_credits',
+  CHAT_COMPLETIONS_GENERATION_STARTED = 'api.chat_completions_generation_started',
   CHAT_COMPLETIONS_STREAM_STARTED = 'api.chat_completions_stream_started',
-  CHAT_COMPLETIONS_STREAM_ERROR = 'api.chat_completions_stream_error',
   CHAT_COMPLETIONS_ERROR = 'api.chat_completions_error',
 
   // Common
diff --git a/common/src/types/contracts/bigquery.ts b/common/src/types/contracts/bigquery.ts
@@ -0,0 +1,23 @@
+import type { Logger } from './logger'
+
+export type MessageRow = {
+  id: string
+  user_id: string
+  finished_at: Date
+  created_at: Date
+  request: unknown
+  reasoning_text: string
+  response: string
+  output_tokens?: number | null
+  reasoning_tokens?: number | null
+  cost?: number | null
+  upstream_inference_cost?: number | null
+  input_tokens?: number | null
+  cache_read_input_tokens?: number | null
+}
+
+export type InsertMessageBigqueryFn = (params: {
+  row: MessageRow
+  dataset?: string
+  logger: Logger
+}) => Promise<boolean>
diff --git a/common/src/types/contracts/database.ts b/common/src/types/contracts/database.ts
@@ -35,7 +35,7 @@ export type GetAgentRunFromIdOutput<T extends AgentRunColumn> = Promise<
   | {
       [K in T]: AgentRun[K]
     }
-  | undefined
+  | null
 >
 export type GetAgentRunFromIdFn = <T extends AgentRunColumn>(
   params: GetAgentRunFromIdInput<T>,
diff --git a/common/src/types/print-mode.ts b/common/src/types/print-mode.ts
@@ -37,7 +37,7 @@ export type PrintModeToolCall = z.infer<typeof printModeToolCallSchema>
 export const printModeToolResultSchema = z.object({
   type: z.literal('tool_result'),
   toolCallId: z.string(),
-  toolName: z.string().optional(),
+  toolName: z.string(),
   output: toolResultOutputSchema.array(),
   parentAgentId: z.string().optional(),
 })
diff --git a/evals/buffbench/agent-runner.ts b/evals/buffbench/agent-runner.ts
@@ -4,13 +4,10 @@ import { withTimeout } from '@codebuff/common/util/promise'
 import { CodebuffClient } from '../../sdk/src/client'
 import { withTestRepo } from '../subagents/test-repo-utils'
 
+import type { PrintModeEvent } from '@codebuff/common/types/print-mode'
 import type { EvalCommitV2 } from './types'
 
-export interface AgentStep {
-  response: string
-  toolCalls: any[]
-  toolResults: any[]
-}
+export type AgentStep = PrintModeEvent
 
 export async function runAgentOnCommit({
   client,
@@ -50,23 +47,6 @@ export async function runAgentOnCommit({
         initCommand,
       },
       async (repoDir) => {
-        let responseText = ''
-        let toolCalls: any[] = []
-        let toolResults: any[] = []
-
-        function flushStep() {
-          if (
-            responseText.length > 0 ||
-            toolCalls.length > 0 ||
-            toolResults.length > 0
-          ) {
-            trace.push({ response: responseText, toolCalls, toolResults })
-            responseText = ''
-            toolCalls = []
-            toolResults = []
-          }
-        }
-
         const timeoutMs = 30 * 60 * 1000 // 30 minutes
         const result = await withTimeout(
           client.run({
@@ -75,30 +55,18 @@ export async function runAgentOnCommit({
             agentDefinitions: localAgentDefinitions,
             cwd: repoDir,
             handleEvent: (event) => {
-              if (event.type === 'text') {
-                if (toolResults.length > 0) {
-                  flushStep()
-                }
-                responseText += event.text
-              } else if (event.type === 'tool_call') {
-                if (event.toolName === 'set_messages') {
-                  return
-                }
-                toolCalls.push(event)
-              } else if (event.type === 'tool_result') {
-                toolResults.push(event)
-              } else if (event.type === 'finish') {
-                flushStep()
-              } else if (event.type === 'error') {
+              if (event.type === 'tool_call' && event.toolName === 'set_messages') {
+                return
+              }
+              if (event.type === 'error') {
                 console.error(`[${agentId}] Error event:`, event.message)
               }
+              trace.push(event)
             },
           }),
           timeoutMs,
           `Agent ${agentId} timed out after ${timeoutMs / 1000} seconds`,
         )
-
-        flushStep()
         cost = result.sessionState.mainAgentState.creditsUsed / 100
 
         execSync('git add .', { cwd: repoDir, stdio: 'ignore' })
diff --git a/evals/buffbench/trace-analyzer.ts b/evals/buffbench/trace-analyzer.ts
@@ -19,17 +19,15 @@ export interface AgentTraceData {
 }
 
 function truncateTrace(trace: AgentStep[]): AgentStep[] {
-  return trace.map((step) => ({
-    ...step,
-    toolResults: step.toolResults.map((result) => {
-      // Truncate read_files, run_terminal_command, and code_search results to save tokens
-      if (result.toolName === 'read_files' && result.output) {
-        const output = Array.isArray(result.output)
-          ? result.output
-          : [result.output]
+  return trace.map((step) => {
+    // Handle tool_result events
+    if (step.type === 'tool_result') {
+      const output = Array.isArray(step.output) ? step.output : [step.output]
+      
+      // Truncate read_files results
+      if (step.toolName === 'read_files') {
         const truncatedOutput = output.map((item: any) => {
           if (item.type === 'json' && Array.isArray(item.value)) {
-            // Truncate file contents in read_files results
             return {
               ...item,
               value: item.value.map((file: any) => {
@@ -47,16 +45,13 @@ function truncateTrace(trace: AgentStep[]): AgentStep[] {
           return item
         })
         return {
-          ...result,
+          ...step,
           output: truncatedOutput,
         }
       }
 
       // Truncate run_terminal_command results (keep first 500 chars)
-      if (result.toolName === 'run_terminal_command' && result.output) {
-        const output = Array.isArray(result.output)
-          ? result.output
-          : [result.output]
+      if (step.toolName === 'run_terminal_command') {
         const truncatedOutput = output.map((item: any) => {
           if (item.type === 'json' && item.value?.stdout) {
             return {
@@ -73,16 +68,13 @@ function truncateTrace(trace: AgentStep[]): AgentStep[] {
           return item
         })
         return {
-          ...result,
+          ...step,
           output: truncatedOutput,
         }
       }
 
       // Truncate code_search results (keep first 500 chars)
-      if (result.toolName === 'code_search' && result.output) {
-        const output = Array.isArray(result.output)
-          ? result.output
-          : [result.output]
+      if (step.toolName === 'code_search') {
         const truncatedOutput = output.map((item: any) => {
           if (item.type === 'json' && item.value?.stdout) {
             return {
@@ -99,14 +91,14 @@ function truncateTrace(trace: AgentStep[]): AgentStep[] {
           return item
         })
         return {
-          ...result,
+          ...step,
           output: truncatedOutput,
         }
       }
-
-      return result
-    }),
-  }))
+    }
+    
+    return step
+  })
 }
 
 const traceAnalyzerAgent: AgentDefinition = {
diff --git a/packages/bigquery/src/client.ts b/packages/bigquery/src/client.ts
@@ -3,13 +3,8 @@ import { BigQuery } from '@google-cloud/bigquery'
 
 import { MESSAGE_SCHEMA, RELABELS_SCHEMA, TRACES_SCHEMA } from './schema'
 
-import type {
-  BaseTrace,
-  GetRelevantFilesTrace,
-  MessageRow,
-  Relabel,
-  Trace,
-} from './schema'
+import type { BaseTrace, GetRelevantFilesTrace, Relabel, Trace } from './schema'
+import type { MessageRow } from '@codebuff/common/types/contracts/bigquery'
 import type { Logger } from '@codebuff/common/types/contracts/logger'
 
 const DATASET =
@@ -99,7 +94,7 @@ export async function setupBigQuery({
   }
 }
 
-export async function insertMessage({
+export async function insertMessageBigquery({
   row,
   dataset = DATASET,
   logger,
diff --git a/packages/bigquery/src/schema.ts b/packages/bigquery/src/schema.ts
@@ -126,22 +126,6 @@ export const RELABELS_SCHEMA: TableSchema = {
   ],
 }
 
-export type MessageRow = {
-  id: string
-  user_id: string
-  finished_at: Date
-  created_at: Date
-  request: unknown
-  reasoning_text: string
-  response: string
-  output_tokens?: number | null
-  reasoning_tokens?: number | null
-  cost?: number | null
-  upstream_inference_cost?: number | null
-  input_tokens?: number | null
-  cache_read_input_tokens?: number | null
-}
-
 export const MESSAGE_SCHEMA: TableSchema = {
   fields: [
     { name: 'id', type: 'STRING', mode: 'REQUIRED' },
diff --git a/scripts/fat-sdk-openrouter-example.ts b/scripts/fat-sdk-openrouter-example.ts
@@ -1,14 +1,15 @@
 import { createOpenAICompatible } from '@ai-sdk/openai-compatible'
 import { websiteUrl } from '@codebuff/npm-app/config'
-import { streamText } from 'ai'
+import { generateText } from 'ai'
 
 const codebuffBackendProvider = createOpenAICompatible({
   name: 'codebuff',
   apiKey: '12345',
   baseURL: websiteUrl + '/api/v1',
 })
 
-const response = streamText({
+// const response = await streamText({
+const response = await generateText({
   model: codebuffBackendProvider('anthropic/claude-sonnet-4.5'),
   messages: [
     {
@@ -44,6 +45,8 @@ const response = streamText({
     },
   },
 })
-for await (const chunk of response.fullStream) {
-  console.log({ chunk })
-}
+
+console.dir({ response }, { depth: null })
+// for await (const chunk of response.fullStream) {
+//   console.dir({ chunk }, { depth: null })
+// }
diff --git a/sdk/src/__tests__/run-text-emission.test.ts b/sdk/src/__tests__/run-text-emission.test.ts
diff --git a/sdk/src/run.ts b/sdk/src/run.ts
diff --git a/web/src/api/v1/chat/__tests__/completions.test.ts b/web/src/api/v1/chat/__tests__/completions.test.ts
diff --git a/web/src/api/v1/chat/completions.ts b/web/src/api/v1/chat/completions.ts
diff --git a/web/src/app/api/v1/chat/completions/route.ts b/web/src/app/api/v1/chat/completions/route.ts
diff --git a/web/src/db/agent-run.ts b/web/src/db/agent-run.ts
diff --git a/web/src/llm-api/openrouter.ts b/web/src/llm-api/openrouter.ts

Original file line number	Diff line number	Diff line change
`@@ -35,7 +35,7 @@ export type GetAgentRunFromIdOutput<T extends AgentRunColumn> = Promise<`
`35`	`35`	`\| {`
`36`	`36`	`[K in T]: AgentRun[K]`
`37`	`37`	`}`
`38`		`- \| undefined`
	`38`	`+ \| null`
`39`	`39`	`>`
`40`	`40`	`export type GetAgentRunFromIdFn = <T extends AgentRunColumn>(`
`41`	`41`	`params: GetAgentRunFromIdInput<T>,`