Skip to content

Commit 9c1c32b

Browse files
committed
Improve SDK E2E test assertions
- stream-chunks: Fix vacuous timeSpread >= 0 assertion, make content assertions unconditional - concurrent-streams: Replace object identity check with proper content validation - subagent-streaming: Require subagent events instead of silently skipping assertions - max-agent-steps: Add finish event assertion and new maxAgentSteps=1 test case
1 parent 763b6dd commit 9c1c32b

File tree

4 files changed

+136
-50
lines changed

4 files changed

+136
-50
lines changed

sdk/e2e/features/max-agent-steps.e2e.test.ts

Lines changed: 58 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,6 @@ describe('Features: Max Agent Steps', () => {
3030
test(
3131
'run completes with maxAgentSteps set',
3232
async () => {
33-
3433
const collector = new EventCollector()
3534

3635
const result = await client.run({
@@ -51,7 +50,6 @@ describe('Features: Max Agent Steps', () => {
5150
test(
5251
'low maxAgentSteps still allows simple responses',
5352
async () => {
54-
5553
const collector = new EventCollector()
5654

5755
const result = await client.run({
@@ -65,7 +63,65 @@ describe('Features: Max Agent Steps', () => {
6563

6664
// Should still complete for simple prompts
6765
expect(collector.hasEventType('start')).toBe(true)
66+
expect(collector.hasEventType('finish')).toBe(true)
6867
},
6968
DEFAULT_TIMEOUT,
7069
)
70+
71+
test(
72+
'maxAgentSteps=1 limits multi-step tasks',
73+
async () => {
74+
const collectorLimited = new EventCollector()
75+
const collectorUnlimited = new EventCollector()
76+
77+
// Run the same multi-step prompt with different step limits
78+
// A task requiring search + read should behave differently with maxAgentSteps=1
79+
const prompt = 'Search for files named package.json and read the first one you find'
80+
81+
const [limitedResult, unlimitedResult] = await Promise.all([
82+
client.run({
83+
agent: 'base2-max',
84+
prompt,
85+
maxAgentSteps: 1,
86+
handleEvent: collectorLimited.handleEvent,
87+
cwd: process.cwd(),
88+
}),
89+
client.run({
90+
agent: 'base2-max',
91+
prompt,
92+
maxAgentSteps: 10,
93+
handleEvent: collectorUnlimited.handleEvent,
94+
cwd: process.cwd(),
95+
}),
96+
])
97+
98+
assertNoAuthError(limitedResult.output)
99+
assertNoAuthError(unlimitedResult.output)
100+
101+
// Both runs should complete
102+
expect(collectorLimited.hasEventType('start')).toBe(true)
103+
expect(collectorLimited.hasEventType('finish')).toBe(true)
104+
expect(collectorUnlimited.hasEventType('start')).toBe(true)
105+
expect(collectorUnlimited.hasEventType('finish')).toBe(true)
106+
107+
// The limited run should have fewer subagent spawns than unlimited
108+
// This verifies the step limit actually constrains execution
109+
const limitedSubagents = collectorLimited.getEventsByType('subagent_start').length
110+
const unlimitedSubagents = collectorUnlimited.getEventsByType('subagent_start').length
111+
112+
// With maxAgentSteps=1, the agent should spawn fewer subagents
113+
// or complete fewer operations than with maxAgentSteps=10
114+
expect(limitedSubagents).toBeLessThanOrEqual(unlimitedSubagents)
115+
116+
// Additionally verify the limited response is shorter/less complete
117+
// (a properly limited run can't do as much work)
118+
const limitedText = collectorLimited.getFullText()
119+
const unlimitedText = collectorUnlimited.getFullText()
120+
121+
// The unlimited run should have more content (did more work)
122+
// This is a soft check - the key assertion is the subagent count above
123+
expect(unlimitedText.length).toBeGreaterThanOrEqual(limitedText.length * 0.5)
124+
},
125+
DEFAULT_TIMEOUT * 3,
126+
)
71127
})

sdk/e2e/integration/stream-chunks.integration.test.ts

Lines changed: 18 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -62,33 +62,20 @@ describe('Integration: Stream Chunks', () => {
6262
test(
6363
'stream chunks arrive incrementally (not all at once)',
6464
async () => {
65-
66-
const chunkTimestamps: number[] = []
6765
const collector = new EventCollector()
6866

69-
const customChunkHandler = (chunk: typeof collector.streamChunks[0]) => {
70-
chunkTimestamps.push(Date.now())
71-
collector.handleStreamChunk(chunk)
72-
}
73-
7467
const result = await client.run({
7568
agent: DEFAULT_AGENT,
7669
prompt: 'Write a detailed explanation of async/await in JavaScript (at least 100 words)',
7770
handleEvent: collector.handleEvent,
78-
handleStreamChunk: customChunkHandler,
71+
handleStreamChunk: collector.handleStreamChunk,
7972
})
8073

8174
assertNoAuthError(result.output)
8275

83-
// Should have multiple chunks
84-
expect(chunkTimestamps.length).toBeGreaterThan(1)
85-
86-
// Verify chunks arrived over time (not all at the same millisecond)
87-
if (chunkTimestamps.length > 2) {
88-
const timeSpread = chunkTimestamps[chunkTimestamps.length - 1] - chunkTimestamps[0]
89-
// The spread should be at least some milliseconds for a longer response
90-
expect(timeSpread).toBeGreaterThanOrEqual(0)
91-
}
76+
// Should have multiple chunks - this validates incremental delivery
77+
// If content arrived all at once, there would only be 1 chunk
78+
expect(collector.streamChunks.length).toBeGreaterThan(1)
9279
},
9380
DEFAULT_TIMEOUT,
9481
)
@@ -111,13 +98,20 @@ describe('Integration: Stream Chunks', () => {
11198
const eventText = collector.getFullText()
11299
const streamText = collector.getFullStreamText()
113100

114-
// Both should contain meaningful content
115-
// Note: They may not be exactly equal due to filtering, but should overlap
116-
if (eventText.length > 0 && streamText.length > 0) {
117-
// At least some content should be present in both
118-
expect(eventText.length).toBeGreaterThan(0)
119-
expect(streamText.length).toBeGreaterThan(0)
120-
}
101+
// Both should contain meaningful content - verify they're not empty
102+
// This ensures the streaming actually worked and delivered content
103+
expect(eventText.length).toBeGreaterThan(0)
104+
expect(streamText.length).toBeGreaterThan(0)
105+
106+
// The stream text and event text should have some overlap
107+
// (they come from the same response, just different callbacks)
108+
// We check that at least one contains content from the other
109+
const hasOverlap =
110+
eventText.toLowerCase().includes('hello') ||
111+
streamText.toLowerCase().includes('hello') ||
112+
eventText.toLowerCase().includes('world') ||
113+
streamText.toLowerCase().includes('world')
114+
expect(hasOverlap).toBe(true)
121115
},
122116
DEFAULT_TIMEOUT,
123117
)

sdk/e2e/streaming/concurrent-streams.e2e.test.ts

Lines changed: 32 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ describe('Streaming: Concurrent Streams', () => {
3535
const collector1 = new EventCollector()
3636
const collector2 = new EventCollector()
3737

38-
// Run two prompts concurrently
38+
// Run two prompts concurrently with distinctive keywords
3939
const [result1, result2] = await Promise.all([
4040
client.run({
4141
agent: DEFAULT_AGENT,
@@ -64,9 +64,17 @@ describe('Streaming: Concurrent Streams', () => {
6464
expect(collector2.hasEventType('start')).toBe(true)
6565
expect(collector2.hasEventType('finish')).toBe(true)
6666

67-
// Event counts should be independent
68-
expect(collector1.events.length).toBeGreaterThan(0)
69-
expect(collector2.events.length).toBeGreaterThan(0)
67+
// Verify streams contain expected content and aren't mixed
68+
const text1 = collector1.getFullStreamText().toUpperCase()
69+
const text2 = collector2.getFullStreamText().toUpperCase()
70+
71+
// Each stream should contain its expected keyword
72+
expect(text1).toContain('ALPHA')
73+
expect(text2).toContain('BETA')
74+
75+
// Streams should NOT contain the other stream's keyword (no mixing)
76+
expect(text1).not.toContain('BETA')
77+
expect(text2).not.toContain('ALPHA')
7078
},
7179
DEFAULT_TIMEOUT * 2,
7280
)
@@ -125,10 +133,26 @@ describe('Streaming: Concurrent Streams', () => {
125133
}),
126134
])
127135

128-
// Each collector should have independent chunks
129-
// The chunks shouldn't be identical (different prompts)
130-
// Note: We can't guarantee exact output, but they should be independent
131-
expect(collector1.streamChunks).not.toBe(collector2.streamChunks)
136+
// Each collector should have independent chunks with different content
137+
// Verify both collectors received content
138+
expect(collector1.streamChunks.length).toBeGreaterThan(0)
139+
expect(collector2.streamChunks.length).toBeGreaterThan(0)
140+
141+
// Get the full text from each stream
142+
const text1 = collector1.getFullStreamText().toUpperCase()
143+
const text2 = collector2.getFullStreamText().toUpperCase()
144+
145+
// Both should have content
146+
expect(text1.length).toBeGreaterThan(0)
147+
expect(text2.length).toBeGreaterThan(0)
148+
149+
// Verify each stream contains its expected keyword
150+
expect(text1).toContain('FIRST')
151+
expect(text2).toContain('SECOND')
152+
153+
// Verify streams are NOT mixed - each should only have its own content
154+
expect(text1).not.toContain('SECOND')
155+
expect(text2).not.toContain('FIRST')
132156
},
133157
DEFAULT_TIMEOUT * 2,
134158
)

sdk/e2e/streaming/subagent-streaming.e2e.test.ts

Lines changed: 28 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -39,18 +39,20 @@ describe('Streaming: Subagent Streaming', () => {
3939
const subagentStarts = collector.getEventsByType('subagent_start')
4040
const subagentFinishes = collector.getEventsByType('subagent_finish')
4141

42-
// If subagents were spawned, starts and finishes should match
43-
if (subagentStarts.length > 0) {
44-
// Each started subagent should have a finish
45-
for (const start of subagentStarts) {
46-
const matchingFinish = subagentFinishes.find(
47-
(f) => f.agentId === start.agentId,
48-
)
49-
// Subagent should eventually finish (or the run ends)
50-
expect(start.agentId).toBeDefined()
51-
expect(start.agentType).toBeDefined()
52-
expect(start.displayName).toBeDefined()
53-
}
42+
// The prompt should trigger file search which spawns a subagent
43+
// If no subagents were spawned, the test isn't validating what we intend
44+
expect(subagentStarts.length).toBeGreaterThan(0)
45+
46+
// Each started subagent should have a finish
47+
for (const start of subagentStarts) {
48+
const matchingFinish = subagentFinishes.find(
49+
(f) => f.agentId === start.agentId,
50+
)
51+
// Subagent should eventually finish
52+
expect(matchingFinish).toBeDefined()
53+
expect(start.agentId).toBeDefined()
54+
expect(start.agentType).toBeDefined()
55+
expect(start.displayName).toBeDefined()
5456
}
5557
},
5658
DEFAULT_TIMEOUT * 2,
@@ -72,6 +74,9 @@ describe('Streaming: Subagent Streaming', () => {
7274

7375
const subagentStarts = collector.getEventsByType('subagent_start')
7476

77+
// Ensure we actually got subagent events to validate
78+
expect(subagentStarts.length).toBeGreaterThan(0)
79+
7580
for (const event of subagentStarts) {
7681
// Required fields
7782
expect(typeof event.agentId).toBe('string')
@@ -105,22 +110,26 @@ describe('Streaming: Subagent Streaming', () => {
105110
cwd: process.cwd(),
106111
})
107112

113+
// Verify we got subagent events (prompt should trigger file exploration)
114+
const subagentStarts = collector.getEventsByType('subagent_start')
115+
expect(subagentStarts.length).toBeGreaterThan(0)
116+
108117
// Check for subagent chunks in stream
109118
const subagentChunks = collector.streamChunks.filter(
110119
(c): c is Extract<typeof c, { type: 'subagent_chunk' }> =>
111120
typeof c !== 'string' && c.type === 'subagent_chunk',
112121
)
113122

114-
// If there are subagent events, there might be subagent chunks
115-
const subagentStarts = collector.getEventsByType('subagent_start')
116-
if (subagentStarts.length > 0 && subagentChunks.length > 0) {
117-
// Verify chunk structure
123+
// If there are subagent chunks, verify their structure
124+
if (subagentChunks.length > 0) {
118125
for (const chunk of subagentChunks) {
119126
expect(chunk.agentId).toBeDefined()
120127
expect(chunk.agentType).toBeDefined()
121128
expect(typeof chunk.chunk).toBe('string')
122129
}
123130
}
131+
// Note: Subagent chunks may not always be present even with subagent events
132+
// (e.g., if the subagent completes very quickly without streaming)
124133
},
125134
DEFAULT_TIMEOUT * 2,
126135
)
@@ -140,6 +149,9 @@ describe('Streaming: Subagent Streaming', () => {
140149

141150
const subagentStarts = collector.getEventsByType('subagent_start')
142151

152+
// Ensure we got subagent events to validate uniqueness
153+
expect(subagentStarts.length).toBeGreaterThan(0)
154+
143155
// Check for duplicates by agentId
144156
const agentIds = subagentStarts.map((s) => s.agentId)
145157
const uniqueIds = new Set(agentIds)

0 commit comments

Comments
 (0)