Skip to content

Commit cd324a2

Browse files
committed
Merge origin/main into brandon/cli-env-filter
2 parents 16c7613 + 72490f3 commit cd324a2

File tree

21 files changed

+898
-351
lines changed

21 files changed

+898
-351
lines changed

.agents/base2/base2-gpt-5-planner.ts

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,10 @@ const definition: SecretAgentDefinition = {
1515
toolNames: ['spawn_agents', 'read_files'],
1616

1717
spawnableAgents: buildArray(
18-
'file-picker',
19-
'find-all-referencer',
18+
'file-picker-max',
19+
'code-searcher',
20+
'directory-lister',
21+
'glob-matcher',
2022
'researcher-web',
2123
'researcher-docs',
2224
'commander',

.agents/base2/base2-gpt-5-worker.ts

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,9 @@ const definition: SecretAgentDefinition = {
1010
model: 'openai/gpt-5',
1111
spawnableAgents: buildArray(
1212
'file-picker',
13-
'find-all-referencer',
13+
'code-searcher',
14+
'directory-lister',
15+
'glob-matcher',
1416
'researcher-web',
1517
'researcher-docs',
1618
'commander',

.agents/base2/base2.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,7 @@ The user asks you to implement a new feature. You respond in multiple steps:
134134
6. Spawn a validator to run validation commands (tests, typechecks, etc.) to ensure the changes are correct.
135135
7. Inform the user that you have completed the task in one sentence without a final summary.`,
136136

137-
stepPrompt: `Don't forget to spawn agents that could help, especially: the file-picker-max and find-all-referencer to get codebase context, the generate-plan agent to create a plan, and the reviewer to review changes.`,
137+
stepPrompt: `Don't forget to spawn agents that could help, especially: the file-picker-max and find-all-referencer to get codebase context, the generate-plan agent to create a plan, code-reviewer to review changes, and the validator to run validation commands.`,
138138

139139
handleSteps: function* ({ prompt, params }) {
140140
let steps = 0

backend/src/tools/batch-str-replace.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -344,6 +344,7 @@ async function executeSingleStrReplace(
344344
toolResults.push(toolResultPart)
345345
onResponseChunk({
346346
type: 'tool_result',
347+
toolName: toolResultPart.toolName,
347348
toolCallId: toolCall.toolCallId,
348349
toolName: 'str_replace',
349350
output: toolResult,
@@ -491,6 +492,7 @@ function handleStrReplaceError(params: {
491492
toolResults.push(errorResult)
492493
onResponseChunk({
493494
type: 'tool_result',
495+
toolName: errorResult.toolName,
494496
toolCallId: toolCall.toolCallId,
495497
toolName: 'str_replace',
496498
output: errorResult.output,
@@ -878,6 +880,7 @@ async function applyBenchifyResultSafely(params: {
878880
// Notify client about the benchify update
879881
onResponseChunk({
880882
type: 'tool_result',
883+
toolName: benchifyToolResult.toolName,
881884
toolCallId: relatedToolCall.toolCallId,
882885
toolName: 'str_replace',
883886
output: benchifyToolResult.output,

backend/src/tools/tool-executor.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -283,6 +283,7 @@ export function executeToolCall<T extends ToolName>(
283283
onResponseChunk({
284284
type: 'tool_result',
285285
toolCallId: toolResult.toolCallId,
286+
toolName: toolResult.toolName,
286287
output: toolResult.output,
287288
})
288289

@@ -508,6 +509,7 @@ export async function executeCustomToolCall(
508509

509510
onResponseChunk({
510511
type: 'tool_result',
512+
toolName: toolResult.toolName,
511513
toolCallId: toolResult.toolCallId,
512514
output: toolResult.output,
513515
})

common/src/constants/analytics-events.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,8 +96,8 @@ export enum AnalyticsEvent {
9696
CHAT_COMPLETIONS_AUTH_ERROR = 'api.chat_completions_auth_error',
9797
CHAT_COMPLETIONS_VALIDATION_ERROR = 'api.chat_completions_validation_error',
9898
CHAT_COMPLETIONS_INSUFFICIENT_CREDITS = 'api.chat_completions_insufficient_credits',
99+
CHAT_COMPLETIONS_GENERATION_STARTED = 'api.chat_completions_generation_started',
99100
CHAT_COMPLETIONS_STREAM_STARTED = 'api.chat_completions_stream_started',
100-
CHAT_COMPLETIONS_STREAM_ERROR = 'api.chat_completions_stream_error',
101101
CHAT_COMPLETIONS_ERROR = 'api.chat_completions_error',
102102

103103
// Common
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
import type { Logger } from './logger'
2+
3+
export type MessageRow = {
4+
id: string
5+
user_id: string
6+
finished_at: Date
7+
created_at: Date
8+
request: unknown
9+
reasoning_text: string
10+
response: string
11+
output_tokens?: number | null
12+
reasoning_tokens?: number | null
13+
cost?: number | null
14+
upstream_inference_cost?: number | null
15+
input_tokens?: number | null
16+
cache_read_input_tokens?: number | null
17+
}
18+
19+
export type InsertMessageBigqueryFn = (params: {
20+
row: MessageRow
21+
dataset?: string
22+
logger: Logger
23+
}) => Promise<boolean>

common/src/types/contracts/database.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ export type GetAgentRunFromIdOutput<T extends AgentRunColumn> = Promise<
3535
| {
3636
[K in T]: AgentRun[K]
3737
}
38-
| undefined
38+
| null
3939
>
4040
export type GetAgentRunFromIdFn = <T extends AgentRunColumn>(
4141
params: GetAgentRunFromIdInput<T>,

common/src/types/print-mode.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ export type PrintModeToolCall = z.infer<typeof printModeToolCallSchema>
3737
export const printModeToolResultSchema = z.object({
3838
type: z.literal('tool_result'),
3939
toolCallId: z.string(),
40-
toolName: z.string().optional(),
40+
toolName: z.string(),
4141
output: toolResultOutputSchema.array(),
4242
parentAgentId: z.string().optional(),
4343
})

evals/buffbench/agent-runner.ts

Lines changed: 7 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,10 @@ import { withTimeout } from '@codebuff/common/util/promise'
44
import { CodebuffClient } from '../../sdk/src/client'
55
import { withTestRepo } from '../subagents/test-repo-utils'
66

7+
import type { PrintModeEvent } from '@codebuff/common/types/print-mode'
78
import type { EvalCommitV2 } from './types'
89

9-
export interface AgentStep {
10-
response: string
11-
toolCalls: any[]
12-
toolResults: any[]
13-
}
10+
export type AgentStep = PrintModeEvent
1411

1512
export async function runAgentOnCommit({
1613
client,
@@ -50,23 +47,6 @@ export async function runAgentOnCommit({
5047
initCommand,
5148
},
5249
async (repoDir) => {
53-
let responseText = ''
54-
let toolCalls: any[] = []
55-
let toolResults: any[] = []
56-
57-
function flushStep() {
58-
if (
59-
responseText.length > 0 ||
60-
toolCalls.length > 0 ||
61-
toolResults.length > 0
62-
) {
63-
trace.push({ response: responseText, toolCalls, toolResults })
64-
responseText = ''
65-
toolCalls = []
66-
toolResults = []
67-
}
68-
}
69-
7050
const timeoutMs = 30 * 60 * 1000 // 30 minutes
7151
const result = await withTimeout(
7252
client.run({
@@ -75,30 +55,18 @@ export async function runAgentOnCommit({
7555
agentDefinitions: localAgentDefinitions,
7656
cwd: repoDir,
7757
handleEvent: (event) => {
78-
if (event.type === 'text') {
79-
if (toolResults.length > 0) {
80-
flushStep()
81-
}
82-
responseText += event.text
83-
} else if (event.type === 'tool_call') {
84-
if (event.toolName === 'set_messages') {
85-
return
86-
}
87-
toolCalls.push(event)
88-
} else if (event.type === 'tool_result') {
89-
toolResults.push(event)
90-
} else if (event.type === 'finish') {
91-
flushStep()
92-
} else if (event.type === 'error') {
58+
if (event.type === 'tool_call' && event.toolName === 'set_messages') {
59+
return
60+
}
61+
if (event.type === 'error') {
9362
console.error(`[${agentId}] Error event:`, event.message)
9463
}
64+
trace.push(event)
9565
},
9666
}),
9767
timeoutMs,
9868
`Agent ${agentId} timed out after ${timeoutMs / 1000} seconds`,
9969
)
100-
101-
flushStep()
10270
cost = result.sessionState.mainAgentState.creditsUsed / 100
10371

10472
execSync('git add .', { cwd: repoDir, stdio: 'ignore' })

0 commit comments

Comments
 (0)