Improve SDK E2E test assertions

brandonkachen · brandonkachen · commit 9c1c32b5490a · 2025-12-10T11:36:29.000-08:00
- stream-chunks: Fix vacuous timeSpread &gt;= 0 assertion, make content assertions unconditional
- concurrent-streams: Replace object identity check with proper content validation
- subagent-streaming: Require subagent events instead of silently skipping assertions
- max-agent-steps: Add finish event assertion and new maxAgentSteps=1 test case
diff --git a/sdk/e2e/features/max-agent-steps.e2e.test.ts b/sdk/e2e/features/max-agent-steps.e2e.test.ts
@@ -30,7 +30,6 @@ describe('Features: Max Agent Steps', () => {
   test(
     'run completes with maxAgentSteps set',
     async () => {
-
       const collector = new EventCollector()
 
       const result = await client.run({
@@ -51,7 +50,6 @@ describe('Features: Max Agent Steps', () => {
   test(
     'low maxAgentSteps still allows simple responses',
     async () => {
-
       const collector = new EventCollector()
 
       const result = await client.run({
@@ -65,7 +63,65 @@ describe('Features: Max Agent Steps', () => {
 
       // Should still complete for simple prompts
       expect(collector.hasEventType('start')).toBe(true)
+      expect(collector.hasEventType('finish')).toBe(true)
     },
     DEFAULT_TIMEOUT,
   )
+
+  test(
+    'maxAgentSteps=1 limits multi-step tasks',
+    async () => {
+      const collectorLimited = new EventCollector()
+      const collectorUnlimited = new EventCollector()
+
+      // Run the same multi-step prompt with different step limits
+      // A task requiring search + read should behave differently with maxAgentSteps=1
+      const prompt = 'Search for files named package.json and read the first one you find'
+
+      const [limitedResult, unlimitedResult] = await Promise.all([
+        client.run({
+          agent: 'base2-max',
+          prompt,
+          maxAgentSteps: 1,
+          handleEvent: collectorLimited.handleEvent,
+          cwd: process.cwd(),
+        }),
+        client.run({
+          agent: 'base2-max',
+          prompt,
+          maxAgentSteps: 10,
+          handleEvent: collectorUnlimited.handleEvent,
+          cwd: process.cwd(),
+        }),
+      ])
+
+      assertNoAuthError(limitedResult.output)
+      assertNoAuthError(unlimitedResult.output)
+
+      // Both runs should complete
+      expect(collectorLimited.hasEventType('start')).toBe(true)
+      expect(collectorLimited.hasEventType('finish')).toBe(true)
+      expect(collectorUnlimited.hasEventType('start')).toBe(true)
+      expect(collectorUnlimited.hasEventType('finish')).toBe(true)
+
+      // The limited run should have fewer subagent spawns than unlimited
+      // This verifies the step limit actually constrains execution
+      const limitedSubagents = collectorLimited.getEventsByType('subagent_start').length
+      const unlimitedSubagents = collectorUnlimited.getEventsByType('subagent_start').length
+
+      // With maxAgentSteps=1, the agent should spawn fewer subagents
+      // or complete fewer operations than with maxAgentSteps=10
+      expect(limitedSubagents).toBeLessThanOrEqual(unlimitedSubagents)
+
+      // Additionally verify the limited response is shorter/less complete
+      // (a properly limited run can't do as much work)
+      const limitedText = collectorLimited.getFullText()
+      const unlimitedText = collectorUnlimited.getFullText()
+      
+      // The unlimited run should have more content (did more work)
+      // This is a soft check - the key assertion is the subagent count above
+      expect(unlimitedText.length).toBeGreaterThanOrEqual(limitedText.length * 0.5)
+    },
+    DEFAULT_TIMEOUT * 3,
+  )
 })
diff --git a/sdk/e2e/integration/stream-chunks.integration.test.ts b/sdk/e2e/integration/stream-chunks.integration.test.ts
@@ -62,33 +62,20 @@ describe('Integration: Stream Chunks', () => {
   test(
     'stream chunks arrive incrementally (not all at once)',
     async () => {
-
-      const chunkTimestamps: number[] = []
       const collector = new EventCollector()
 
-      const customChunkHandler = (chunk: typeof collector.streamChunks[0]) => {
-        chunkTimestamps.push(Date.now())
-        collector.handleStreamChunk(chunk)
-      }
-
       const result = await client.run({
         agent: DEFAULT_AGENT,
         prompt: 'Write a detailed explanation of async/await in JavaScript (at least 100 words)',
         handleEvent: collector.handleEvent,
-        handleStreamChunk: customChunkHandler,
+        handleStreamChunk: collector.handleStreamChunk,
       })
 
       assertNoAuthError(result.output)
 
-      // Should have multiple chunks
-      expect(chunkTimestamps.length).toBeGreaterThan(1)
-
-      // Verify chunks arrived over time (not all at the same millisecond)
-      if (chunkTimestamps.length > 2) {
-        const timeSpread = chunkTimestamps[chunkTimestamps.length - 1] - chunkTimestamps[0]
-        // The spread should be at least some milliseconds for a longer response
-        expect(timeSpread).toBeGreaterThanOrEqual(0)
-      }
+      // Should have multiple chunks - this validates incremental delivery
+      // If content arrived all at once, there would only be 1 chunk
+      expect(collector.streamChunks.length).toBeGreaterThan(1)
     },
     DEFAULT_TIMEOUT,
   )
@@ -111,13 +98,20 @@ describe('Integration: Stream Chunks', () => {
       const eventText = collector.getFullText()
       const streamText = collector.getFullStreamText()
 
-      // Both should contain meaningful content
-      // Note: They may not be exactly equal due to filtering, but should overlap
-      if (eventText.length > 0 && streamText.length > 0) {
-        // At least some content should be present in both
-        expect(eventText.length).toBeGreaterThan(0)
-        expect(streamText.length).toBeGreaterThan(0)
-      }
+      // Both should contain meaningful content - verify they're not empty
+      // This ensures the streaming actually worked and delivered content
+      expect(eventText.length).toBeGreaterThan(0)
+      expect(streamText.length).toBeGreaterThan(0)
+
+      // The stream text and event text should have some overlap
+      // (they come from the same response, just different callbacks)
+      // We check that at least one contains content from the other
+      const hasOverlap =
+        eventText.toLowerCase().includes('hello') ||
+        streamText.toLowerCase().includes('hello') ||
+        eventText.toLowerCase().includes('world') ||
+        streamText.toLowerCase().includes('world')
+      expect(hasOverlap).toBe(true)
     },
     DEFAULT_TIMEOUT,
   )
diff --git a/sdk/e2e/streaming/concurrent-streams.e2e.test.ts b/sdk/e2e/streaming/concurrent-streams.e2e.test.ts
@@ -35,7 +35,7 @@ describe('Streaming: Concurrent Streams', () => {
       const collector1 = new EventCollector()
       const collector2 = new EventCollector()
 
-      // Run two prompts concurrently
+      // Run two prompts concurrently with distinctive keywords
       const [result1, result2] = await Promise.all([
         client.run({
           agent: DEFAULT_AGENT,
@@ -64,9 +64,17 @@ describe('Streaming: Concurrent Streams', () => {
       expect(collector2.hasEventType('start')).toBe(true)
       expect(collector2.hasEventType('finish')).toBe(true)
 
-      // Event counts should be independent
-      expect(collector1.events.length).toBeGreaterThan(0)
-      expect(collector2.events.length).toBeGreaterThan(0)
+      // Verify streams contain expected content and aren't mixed
+      const text1 = collector1.getFullStreamText().toUpperCase()
+      const text2 = collector2.getFullStreamText().toUpperCase()
+
+      // Each stream should contain its expected keyword
+      expect(text1).toContain('ALPHA')
+      expect(text2).toContain('BETA')
+
+      // Streams should NOT contain the other stream's keyword (no mixing)
+      expect(text1).not.toContain('BETA')
+      expect(text2).not.toContain('ALPHA')
     },
     DEFAULT_TIMEOUT * 2,
   )
@@ -125,10 +133,26 @@ describe('Streaming: Concurrent Streams', () => {
         }),
       ])
 
-      // Each collector should have independent chunks
-      // The chunks shouldn't be identical (different prompts)
-      // Note: We can't guarantee exact output, but they should be independent
-      expect(collector1.streamChunks).not.toBe(collector2.streamChunks)
+      // Each collector should have independent chunks with different content
+      // Verify both collectors received content
+      expect(collector1.streamChunks.length).toBeGreaterThan(0)
+      expect(collector2.streamChunks.length).toBeGreaterThan(0)
+
+      // Get the full text from each stream
+      const text1 = collector1.getFullStreamText().toUpperCase()
+      const text2 = collector2.getFullStreamText().toUpperCase()
+
+      // Both should have content
+      expect(text1.length).toBeGreaterThan(0)
+      expect(text2.length).toBeGreaterThan(0)
+
+      // Verify each stream contains its expected keyword
+      expect(text1).toContain('FIRST')
+      expect(text2).toContain('SECOND')
+
+      // Verify streams are NOT mixed - each should only have its own content
+      expect(text1).not.toContain('SECOND')
+      expect(text2).not.toContain('FIRST')
     },
     DEFAULT_TIMEOUT * 2,
   )
diff --git a/sdk/e2e/streaming/subagent-streaming.e2e.test.ts b/sdk/e2e/streaming/subagent-streaming.e2e.test.ts
@@ -39,18 +39,20 @@ describe('Streaming: Subagent Streaming', () => {
       const subagentStarts = collector.getEventsByType('subagent_start')
       const subagentFinishes = collector.getEventsByType('subagent_finish')
 
-      // If subagents were spawned, starts and finishes should match
-      if (subagentStarts.length > 0) {
-        // Each started subagent should have a finish
-        for (const start of subagentStarts) {
-          const matchingFinish = subagentFinishes.find(
-            (f) => f.agentId === start.agentId,
-          )
-          // Subagent should eventually finish (or the run ends)
-          expect(start.agentId).toBeDefined()
-          expect(start.agentType).toBeDefined()
-          expect(start.displayName).toBeDefined()
-        }
+      // The prompt should trigger file search which spawns a subagent
+      // If no subagents were spawned, the test isn't validating what we intend
+      expect(subagentStarts.length).toBeGreaterThan(0)
+
+      // Each started subagent should have a finish
+      for (const start of subagentStarts) {
+        const matchingFinish = subagentFinishes.find(
+          (f) => f.agentId === start.agentId,
+        )
+        // Subagent should eventually finish
+        expect(matchingFinish).toBeDefined()
+        expect(start.agentId).toBeDefined()
+        expect(start.agentType).toBeDefined()
+        expect(start.displayName).toBeDefined()
       }
     },
     DEFAULT_TIMEOUT * 2,
@@ -72,6 +74,9 @@ describe('Streaming: Subagent Streaming', () => {
 
       const subagentStarts = collector.getEventsByType('subagent_start')
 
+      // Ensure we actually got subagent events to validate
+      expect(subagentStarts.length).toBeGreaterThan(0)
+
       for (const event of subagentStarts) {
         // Required fields
         expect(typeof event.agentId).toBe('string')
@@ -105,22 +110,26 @@ describe('Streaming: Subagent Streaming', () => {
         cwd: process.cwd(),
       })
 
+      // Verify we got subagent events (prompt should trigger file exploration)
+      const subagentStarts = collector.getEventsByType('subagent_start')
+      expect(subagentStarts.length).toBeGreaterThan(0)
+
       // Check for subagent chunks in stream
       const subagentChunks = collector.streamChunks.filter(
         (c): c is Extract<typeof c, { type: 'subagent_chunk' }> =>
           typeof c !== 'string' && c.type === 'subagent_chunk',
       )
 
-      // If there are subagent events, there might be subagent chunks
-      const subagentStarts = collector.getEventsByType('subagent_start')
-      if (subagentStarts.length > 0 && subagentChunks.length > 0) {
-        // Verify chunk structure
+      // If there are subagent chunks, verify their structure
+      if (subagentChunks.length > 0) {
         for (const chunk of subagentChunks) {
           expect(chunk.agentId).toBeDefined()
           expect(chunk.agentType).toBeDefined()
           expect(typeof chunk.chunk).toBe('string')
         }
       }
+      // Note: Subagent chunks may not always be present even with subagent events
+      // (e.g., if the subagent completes very quickly without streaming)
     },
     DEFAULT_TIMEOUT * 2,
   )
@@ -140,6 +149,9 @@ describe('Streaming: Subagent Streaming', () => {
 
       const subagentStarts = collector.getEventsByType('subagent_start')
 
+      // Ensure we got subagent events to validate uniqueness
+      expect(subagentStarts.length).toBeGreaterThan(0)
+
       // Check for duplicates by agentId
       const agentIds = subagentStarts.map((s) => s.agentId)
       const uniqueIds = new Set(agentIds)