From ce950c27eaa4f17972d5025ff5c17b5d4c345741 Mon Sep 17 00:00:00 2001 From: Jakub Hrozek Date: Wed, 19 Nov 2025 22:48:53 +0000 Subject: [PATCH 1/4] Add composer workflow integration tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Tests cover core DAG execution patterns: - Sequential two-step workflow - Parallel fan-out with aggregation - Diamond DAG (diverge/converge) - Parameter template expansion Uses table-driven approach for maintainability. Additional tests for conditionals and nested data blocked by issue #2669. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- test/integration/vmcp/composer_basic_test.go | 252 +++++++++++++++++++ test/integration/vmcp/helpers/vmcp_server.go | 27 +- 2 files changed, 278 insertions(+), 1 deletion(-) create mode 100644 test/integration/vmcp/composer_basic_test.go diff --git a/test/integration/vmcp/composer_basic_test.go b/test/integration/vmcp/composer_basic_test.go new file mode 100644 index 000000000..507b856d3 --- /dev/null +++ b/test/integration/vmcp/composer_basic_test.go @@ -0,0 +1,252 @@ +package vmcp_test + +import ( + "context" + "strings" + "testing" + + "github.com/stretchr/testify/assert" + + "github.com/stacklok/toolhive/pkg/vmcp/composer" + "github.com/stacklok/toolhive/test/integration/vmcp/helpers" +) + +// composerTestCase defines a declarative test case for composer workflows. +type composerTestCase struct { + name string + tools []helpers.BackendTool // All tools (will be grouped by backend automatically) + workflow *composer.WorkflowDefinition + params map[string]any + wantStrings []string // Strings that must appear in output +} + +// runComposerTest executes a single composer test case. +func runComposerTest(t *testing.T, tc composerTestCase) { + t.Helper() + t.Parallel() + + ctx := context.Background() + + // Create a single backend with all tools + backend := helpers.CreateBackendServer(t, tc.tools, helpers.WithBackendName("test-backend")) + defer backend.Close() + + // Create vMCP server + vmcpServer := helpers.NewVMCPServer(ctx, t, helpers.BackendsFromURL("test", backend.URL+"/mcp"), + helpers.WithPrefixConflictResolution("{workload}_"), + helpers.WithWorkflows([]*composer.WorkflowDefinition{tc.workflow}), + ) + + // Create client and execute workflow + client := helpers.NewMCPClient(ctx, t, "http://"+vmcpServer.Address()+"/mcp") + defer client.Close() + + result := client.CallTool(ctx, tc.workflow.Name, tc.params) + text := helpers.AssertToolCallSuccess(t, result) + + // Verify expected strings + helpers.AssertTextContains(t, text, tc.wantStrings...) + + t.Logf("Test %q completed: %s", tc.name, text) +} + +// TestVMCPComposer runs table-driven tests for composer workflow patterns. +// Note: Tests use plain text output instead of structured JSON fields due to +// a known bug where MCP tool results aren't parsed into structured data. +// See: https://github.com/stacklok/toolhive/issues/2669 +func TestVMCPComposer(t *testing.T) { + t.Parallel() + + tests := []composerTestCase{ + { + name: "SequentialTwoSteps", + tools: []helpers.BackendTool{ + helpers.NewBackendTool("generate", "Generate message", + func(_ context.Context, args map[string]any) string { + prefix, _ := args["prefix"].(string) + return prefix + " - generated" + }), + helpers.NewBackendTool("process", "Process message", + func(_ context.Context, args map[string]any) string { + msg, _ := args["message"].(string) + suffix, _ := args["suffix"].(string) + return strings.ToUpper(msg) + " " + suffix + }), + }, + workflow: &composer.WorkflowDefinition{ + Name: "sequential", + Description: "Two-step sequential workflow", + Parameters: map[string]any{ + "prefix": map[string]any{"type": "string"}, + "suffix": map[string]any{"type": "string"}, + }, + FailureMode: "abort", + Steps: []composer.WorkflowStep{ + {ID: "gen", Type: composer.StepTypeTool, Tool: "test_generate", + Arguments: map[string]any{"prefix": "{{.params.prefix}}"}}, + {ID: "proc", Type: composer.StepTypeTool, Tool: "test_process", + Arguments: map[string]any{ + "message": "{{.steps.gen.output.text}}", + "suffix": "{{.params.suffix}}", + }, + DependsOn: []string{"gen"}}, + }, + }, + params: map[string]any{"prefix": "Hello", "suffix": "[DONE]"}, + wantStrings: []string{"HELLO", "GENERATED", "[DONE]"}, + }, + { + name: "ParallelFanOut", + tools: []helpers.BackendTool{ + helpers.NewBackendTool("task_a", "Task A", func(_ context.Context, _ map[string]any) string { return "A" }), + helpers.NewBackendTool("task_b", "Task B", func(_ context.Context, _ map[string]any) string { return "B" }), + helpers.NewBackendTool("task_c", "Task C", func(_ context.Context, _ map[string]any) string { return "C" }), + helpers.NewBackendTool("aggregate", "Aggregate", + func(_ context.Context, args map[string]any) string { + a, _ := args["a"].(string) + b, _ := args["b"].(string) + c, _ := args["c"].(string) + return "COMBINED:" + a + "," + b + "," + c + }), + }, + workflow: &composer.WorkflowDefinition{ + Name: "fanout", + Description: "Parallel fan-out workflow", + Parameters: map[string]any{}, + FailureMode: "abort", + Steps: []composer.WorkflowStep{ + {ID: "a", Type: composer.StepTypeTool, Tool: "test_task_a", Arguments: map[string]any{}}, + {ID: "b", Type: composer.StepTypeTool, Tool: "test_task_b", Arguments: map[string]any{}}, + {ID: "c", Type: composer.StepTypeTool, Tool: "test_task_c", Arguments: map[string]any{}}, + {ID: "agg", Type: composer.StepTypeTool, Tool: "test_aggregate", + Arguments: map[string]any{ + "a": "{{.steps.a.output.text}}", + "b": "{{.steps.b.output.text}}", + "c": "{{.steps.c.output.text}}", + }, + DependsOn: []string{"a", "b", "c"}}, + }, + }, + params: map[string]any{}, + wantStrings: []string{"COMBINED", "A", "B", "C"}, + }, + { + name: "DiamondDAG", + tools: []helpers.BackendTool{ + helpers.NewBackendTool("start", "Start", + func(_ context.Context, args map[string]any) string { + input, _ := args["input"].(string) + return "started:" + input + }), + helpers.NewBackendTool("left", "Left branch", + func(_ context.Context, args map[string]any) string { + data, _ := args["data"].(string) + return "L(" + data + ")" + }), + helpers.NewBackendTool("right", "Right branch", + func(_ context.Context, args map[string]any) string { + data, _ := args["data"].(string) + return "R(" + data + ")" + }), + helpers.NewBackendTool("merge", "Merge", + func(_ context.Context, args map[string]any) string { + l, _ := args["left"].(string) + r, _ := args["right"].(string) + return "MERGED[" + l + "+" + r + "]" + }), + }, + workflow: &composer.WorkflowDefinition{ + Name: "diamond", + Description: "Diamond-shaped DAG", + Parameters: map[string]any{"input": map[string]any{"type": "string"}}, + FailureMode: "abort", + Steps: []composer.WorkflowStep{ + {ID: "start", Type: composer.StepTypeTool, Tool: "test_start", + Arguments: map[string]any{"input": "{{.params.input}}"}}, + {ID: "left", Type: composer.StepTypeTool, Tool: "test_left", + Arguments: map[string]any{"data": "{{.steps.start.output.text}}"}, + DependsOn: []string{"start"}}, + {ID: "right", Type: composer.StepTypeTool, Tool: "test_right", + Arguments: map[string]any{"data": "{{.steps.start.output.text}}"}, + DependsOn: []string{"start"}}, + {ID: "merge", Type: composer.StepTypeTool, Tool: "test_merge", + Arguments: map[string]any{ + "left": "{{.steps.left.output.text}}", + "right": "{{.steps.right.output.text}}", + }, + DependsOn: []string{"left", "right"}}, + }, + }, + params: map[string]any{"input": "test"}, + wantStrings: []string{"MERGED", "L(", "R(", "started:test"}, + }, + { + name: "ParameterPassing", + tools: []helpers.BackendTool{ + helpers.NewBackendTool("echo", "Echo params", + func(_ context.Context, args map[string]any) string { + name, _ := args["name"].(string) + count, _ := args["count"].(string) + return "name=" + name + ",count=" + count + }), + }, + workflow: &composer.WorkflowDefinition{ + Name: "params", + Description: "Parameter passing test", + Parameters: map[string]any{ + "name": map[string]any{"type": "string"}, + "count": map[string]any{"type": "string"}, + }, + FailureMode: "abort", + Steps: []composer.WorkflowStep{ + {ID: "echo", Type: composer.StepTypeTool, Tool: "test_echo", + Arguments: map[string]any{ + "name": "{{.params.name}}", + "count": "{{.params.count}}", + }}, + }, + }, + params: map[string]any{"name": "testuser", "count": "42"}, + wantStrings: []string{"name=testuser", "count=42"}, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + runComposerTest(t, tc) + }) + } +} + +// TestVMCPComposer_WorkflowListed verifies workflow appears in tool list. +func TestVMCPComposer_WorkflowListed(t *testing.T) { + t.Parallel() + ctx := context.Background() + + backend := helpers.CreateBackendServer(t, []helpers.BackendTool{ + helpers.NewBackendTool("noop", "No-op", func(_ context.Context, _ map[string]any) string { return "ok" }), + }, helpers.WithBackendName("test")) + defer backend.Close() + + workflow := &composer.WorkflowDefinition{ + Name: "my_workflow", + Description: "Test workflow", + Parameters: map[string]any{}, + FailureMode: "abort", + Steps: []composer.WorkflowStep{ + {ID: "step1", Type: composer.StepTypeTool, Tool: "test_noop", Arguments: map[string]any{}}, + }, + } + + vmcpServer := helpers.NewVMCPServer(ctx, t, helpers.BackendsFromURL("test", backend.URL+"/mcp"), + helpers.WithPrefixConflictResolution("{workload}_"), + helpers.WithWorkflows([]*composer.WorkflowDefinition{workflow}), + ) + + client := helpers.NewMCPClient(ctx, t, "http://"+vmcpServer.Address()+"/mcp") + defer client.Close() + + toolsResp := client.ListTools(ctx) + toolNames := helpers.GetToolNames(toolsResp) + assert.Contains(t, toolNames, "my_workflow", "Workflow should be exposed as a tool") +} diff --git a/test/integration/vmcp/helpers/vmcp_server.go b/test/integration/vmcp/helpers/vmcp_server.go index f001a575c..12b6bc570 100644 --- a/test/integration/vmcp/helpers/vmcp_server.go +++ b/test/integration/vmcp/helpers/vmcp_server.go @@ -15,6 +15,7 @@ import ( "github.com/stacklok/toolhive/pkg/vmcp/auth/factory" authtypes "github.com/stacklok/toolhive/pkg/vmcp/auth/types" vmcpclient "github.com/stacklok/toolhive/pkg/vmcp/client" + "github.com/stacklok/toolhive/pkg/vmcp/composer" "github.com/stacklok/toolhive/pkg/vmcp/discovery" "github.com/stacklok/toolhive/pkg/vmcp/router" vmcpserver "github.com/stacklok/toolhive/pkg/vmcp/server" @@ -58,6 +59,11 @@ func WithMetadata(key, value string) func(*vmcptypes.Backend) { } } +// BackendsFromURL creates a single-element backend slice for simple test cases. +func BackendsFromURL(id, url string) []vmcptypes.Backend { + return []vmcptypes.Backend{NewBackend(id, WithURL(url))} +} + // VMCPServerOption is a functional option for configuring a vMCP test server. type VMCPServerOption func(*vmcpServerConfig) @@ -65,6 +71,7 @@ type VMCPServerOption func(*vmcpServerConfig) type vmcpServerConfig struct { conflictStrategy string prefixFormat string + workflows []*composer.WorkflowDefinition } // WithPrefixConflictResolution configures prefix-based conflict resolution. @@ -75,6 +82,13 @@ func WithPrefixConflictResolution(format string) VMCPServerOption { } } +// WithWorkflows configures workflow definitions for the vMCP server. +func WithWorkflows(workflows []*composer.WorkflowDefinition) VMCPServerOption { + return func(c *vmcpServerConfig) { + c.workflows = workflows + } +} + // getFreePort returns an available TCP port on localhost. // This is used for parallel test execution to avoid port conflicts. func getFreePort(tb testing.TB) int { @@ -146,6 +160,17 @@ func NewVMCPServer( // Create router rtr := router.NewDefaultRouter() + // Convert workflows slice to map keyed by workflow name + // Note: This is a simple conversion for test-created WorkflowDefinitions. + // For config-based workflows, use server.ConvertConfigToWorkflowDefinitions() instead. + var workflowsMap map[string]*composer.WorkflowDefinition + if len(config.workflows) > 0 { + workflowsMap = make(map[string]*composer.WorkflowDefinition, len(config.workflows)) + for _, wf := range config.workflows { + workflowsMap[wf.Name] = wf + } + } + // Create vMCP server with test-specific defaults vmcpServer, err := vmcpserver.New(&vmcpserver.Config{ Name: "test-vmcp", @@ -153,7 +178,7 @@ func NewVMCPServer( Host: "127.0.0.1", Port: getFreePort(tb), // Get a random available port for parallel test execution AuthMiddleware: auth.AnonymousMiddleware, - }, rtr, backendClient, discoveryMgr, backends, nil) // nil for workflowDefs in tests + }, rtr, backendClient, discoveryMgr, backends, workflowsMap) require.NoError(tb, err, "failed to create vMCP server") // Start server automatically From e7831a4d7f0c10f62a83c8376abd53dd2056d844 Mon Sep 17 00:00:00 2001 From: Jakub Hrozek Date: Mon, 24 Nov 2025 15:10:50 +0000 Subject: [PATCH 2/4] Add error handling tests for composer workflows MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extends the vMCP composer integration tests with error handling scenarios: - SingleStepToolError: basic error propagation - AbortMode_StopsOnFirstError: abort mode stops on first failure - ContinueMode_PartialSuccess: continue mode returns partial results - MultipleParallelErrors: handles multiple concurrent failures Also adds test infrastructure for error scenarios: - ErrorHandler field on BackendTool for returning errors - Helper functions: NewErrorBackendTool, NewSlowBackendTool, NewTransientBackendTool - AssertToolCallError helper for asserting error responses 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- test/integration/vmcp/composer_basic_test.go | 96 ++++++++++++++++++++ test/integration/vmcp/helpers/backend.go | 76 +++++++++++++++- test/integration/vmcp/helpers/mcp_client.go | 30 ++++++ 3 files changed, 199 insertions(+), 3 deletions(-) diff --git a/test/integration/vmcp/composer_basic_test.go b/test/integration/vmcp/composer_basic_test.go index 507b856d3..914c076a7 100644 --- a/test/integration/vmcp/composer_basic_test.go +++ b/test/integration/vmcp/composer_basic_test.go @@ -18,6 +18,7 @@ type composerTestCase struct { workflow *composer.WorkflowDefinition params map[string]any wantStrings []string // Strings that must appear in output + wantError bool // Expect workflow to return an error } // runComposerTest executes a single composer test case. @@ -42,6 +43,17 @@ func runComposerTest(t *testing.T, tc composerTestCase) { defer client.Close() result := client.CallTool(ctx, tc.workflow.Name, tc.params) + + // Handle error vs success expectations + if tc.wantError { + text := helpers.AssertToolCallError(t, result) + if len(tc.wantStrings) > 0 { + helpers.AssertTextContains(t, text, tc.wantStrings...) + } + t.Logf("Test %q completed with expected error: %s", tc.name, text) + return + } + text := helpers.AssertToolCallSuccess(t, result) // Verify expected strings @@ -209,6 +221,90 @@ func TestVMCPComposer(t *testing.T) { params: map[string]any{"name": "testuser", "count": "42"}, wantStrings: []string{"name=testuser", "count=42"}, }, + // Error handling tests + { + name: "SingleStepToolError", + tools: []helpers.BackendTool{ + helpers.NewErrorBackendTool("failing_tool", "Always fails", "TOOL_ERROR: operation failed"), + }, + workflow: &composer.WorkflowDefinition{ + Name: "single_error", + Description: "Single step that errors", + Parameters: map[string]any{}, + FailureMode: "abort", + Steps: []composer.WorkflowStep{ + {ID: "fail", Type: composer.StepTypeTool, Tool: "test_failing_tool", Arguments: map[string]any{}}, + }, + }, + params: map[string]any{}, + wantError: true, + wantStrings: []string{"TOOL_ERROR"}, + }, + { + name: "AbortMode_StopsOnFirstError", + tools: []helpers.BackendTool{ + helpers.NewErrorBackendTool("error_step", "Fails", "STEP_FAILED"), + helpers.NewBackendTool("success_step", "Succeeds", func(_ context.Context, _ map[string]any) string { return "SUCCESS" }), + }, + workflow: &composer.WorkflowDefinition{ + Name: "abort_test", + Description: "Abort on first error", + Parameters: map[string]any{}, + FailureMode: "abort", + Steps: []composer.WorkflowStep{ + {ID: "error", Type: composer.StepTypeTool, Tool: "test_error_step", Arguments: map[string]any{}}, + {ID: "success", Type: composer.StepTypeTool, Tool: "test_success_step", Arguments: map[string]any{}, DependsOn: []string{"error"}}, + }, + }, + params: map[string]any{}, + wantError: true, + wantStrings: []string{"STEP_FAILED"}, + }, + { + name: "ContinueMode_PartialSuccess", + tools: []helpers.BackendTool{ + helpers.NewErrorBackendTool("fail_a", "Fails A", "ERROR_A"), + helpers.NewBackendTool("ok_b", "OK B", func(_ context.Context, _ map[string]any) string { return "SUCCESS_B" }), + }, + workflow: &composer.WorkflowDefinition{ + Name: "continue_test", + Description: "Continue despite errors", + Parameters: map[string]any{}, + FailureMode: "continue", + Steps: []composer.WorkflowStep{ + // Independent parallel steps + {ID: "a", Type: composer.StepTypeTool, Tool: "test_fail_a", Arguments: map[string]any{}}, + {ID: "b", Type: composer.StepTypeTool, Tool: "test_ok_b", Arguments: map[string]any{}}, + }, + }, + params: map[string]any{}, + // Continue mode returns success with partial results + wantError: false, + wantStrings: []string{"SUCCESS_B"}, + }, + { + name: "MultipleParallelErrors", + tools: []helpers.BackendTool{ + helpers.NewErrorBackendTool("fail_1", "Fails 1", "ERROR_1"), + helpers.NewErrorBackendTool("fail_2", "Fails 2", "ERROR_2"), + helpers.NewErrorBackendTool("fail_3", "Fails 3", "ERROR_3"), + }, + workflow: &composer.WorkflowDefinition{ + Name: "multi_error", + Description: "Multiple parallel failures", + Parameters: map[string]any{}, + FailureMode: "abort", + Steps: []composer.WorkflowStep{ + {ID: "f1", Type: composer.StepTypeTool, Tool: "test_fail_1", Arguments: map[string]any{}}, + {ID: "f2", Type: composer.StepTypeTool, Tool: "test_fail_2", Arguments: map[string]any{}}, + {ID: "f3", Type: composer.StepTypeTool, Tool: "test_fail_3", Arguments: map[string]any{}}, + }, + }, + params: map[string]any{}, + wantError: true, + // At least one error message should appear + wantStrings: []string{"ERROR"}, + }, } for _, tc := range tests { diff --git a/test/integration/vmcp/helpers/backend.go b/test/integration/vmcp/helpers/backend.go index 3bcfa2a4a..b4d81526f 100644 --- a/test/integration/vmcp/helpers/backend.go +++ b/test/integration/vmcp/helpers/backend.go @@ -5,7 +5,9 @@ import ( "context" "net/http" "net/http/httptest" + "sync/atomic" "testing" + "time" "github.com/mark3labs/mcp-go/mcp" "github.com/mark3labs/mcp-go/server" @@ -31,6 +33,11 @@ type BackendTool struct { // The handler receives the tool arguments as a map and should return // a string representation of the result (typically JSON). Handler func(ctx context.Context, args map[string]any) string + + // ErrorHandler is an alternative handler that can return errors. + // If set, this takes precedence over Handler. + // Returns (result, isError). + ErrorHandler func(ctx context.Context, args map[string]any) (string, bool) } // NewBackendTool creates a new BackendTool with sensible defaults. @@ -58,6 +65,60 @@ func NewBackendTool(name, description string, handler func(ctx context.Context, } } +// NewErrorBackendTool creates a BackendTool that always returns an error. +// This is useful for testing error handling and failure modes. +func NewErrorBackendTool(name, description, errorMsg string) BackendTool { + return BackendTool{ + Name: name, + Description: description, + InputSchema: mcp.ToolInputSchema{ + Type: "object", + Properties: map[string]any{}, + }, + ErrorHandler: func(_ context.Context, _ map[string]any) (string, bool) { + return errorMsg, true + }, + } +} + +// NewSlowBackendTool creates a BackendTool that delays before returning. +// This is useful for testing timeout behavior. +func NewSlowBackendTool(name, description string, delay time.Duration, result string) BackendTool { + return BackendTool{ + Name: name, + Description: description, + InputSchema: mcp.ToolInputSchema{ + Type: "object", + Properties: map[string]any{}, + }, + Handler: func(_ context.Context, _ map[string]any) string { + time.Sleep(delay) + return result + }, + } +} + +// NewTransientBackendTool creates a BackendTool that fails a specified number of times before succeeding. +// This is useful for testing retry behavior. +func NewTransientBackendTool(name, description string, failCount int32, errorMsg, successMsg string) BackendTool { + var attempts int32 + return BackendTool{ + Name: name, + Description: description, + InputSchema: mcp.ToolInputSchema{ + Type: "object", + Properties: map[string]any{}, + }, + ErrorHandler: func(_ context.Context, _ map[string]any) (string, bool) { + current := atomic.AddInt32(&attempts, 1) + if current <= failCount { + return errorMsg, true + } + return successMsg, false + }, + } +} + // contextKey is a private type for context keys to avoid collisions. type contextKey string @@ -198,14 +259,23 @@ func CreateBackendServer(tb testing.TB, tools []BackendTool, opts ...BackendServ args = make(map[string]any) } - // Invoke the tool handler - result := tool.Handler(ctx, args) + // Invoke the appropriate handler + var result string + var isError bool + + if tool.ErrorHandler != nil { + result, isError = tool.ErrorHandler(ctx, args) + } else { + result = tool.Handler(ctx, args) + isError = false + } - // Return successful result with text content + // Return result with text content return &mcp.CallToolResult{ Content: []mcp.Content{ mcp.NewTextContent(result), }, + IsError: isError, }, nil }, ) diff --git a/test/integration/vmcp/helpers/mcp_client.go b/test/integration/vmcp/helpers/mcp_client.go index 10a5a54b4..82fd531a7 100644 --- a/test/integration/vmcp/helpers/mcp_client.go +++ b/test/integration/vmcp/helpers/mcp_client.go @@ -196,6 +196,36 @@ func AssertToolCallSuccess(tb testing.TB, result *mcp.CallToolResult) string { return text } +// AssertToolCallError asserts that a tool call failed (IsError=true) +// and returns the concatenated text content from all content items. +// +// The function uses require assertions, so it will fail the test immediately +// if the tool call was successful. +// +// Example: +// +// result := client.CallTool(ctx, "failing_workflow", map[string]any{}) +// text := helpers.AssertToolCallError(t, result) +// assert.Contains(t, text, "error message") +func AssertToolCallError(tb testing.TB, result *mcp.CallToolResult) string { + tb.Helper() + + require.NotNil(tb, result, "tool call result should not be nil") + require.True(tb, result.IsError, "tool call should return an error, got success: %v", result.Content) + + var textParts []string + for _, content := range result.Content { + if textContent, ok := mcp.AsTextContent(content); ok { + textParts = append(textParts, textContent.Text) + } + } + + text := strings.Join(textParts, "\n") + tb.Logf("Tool call failed as expected with %d content items, total text length: %d", len(result.Content), len(text)) + + return text +} + // AssertTextContains asserts that text contains all expected substrings. // This is a variadic helper for checking multiple content expectations in tool results. // From a92662ab00c1d9b3d65f30969a5ea729d8cf2b80 Mon Sep 17 00:00:00 2001 From: Jakub Hrozek Date: Wed, 26 Nov 2025 15:22:16 +0000 Subject: [PATCH 3/4] Use OutputConfig for structured outputs in composer tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Updates integration tests to use the proper OutputConfig feature instead of relying on the plain text workaround. This aligns with the new structured output support added in commit 51ac7d80. - Add Output config to all successful workflow test cases - Remove outdated note about issue #2669 workaround - Error test cases intentionally left without Output config 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- test/integration/vmcp/composer_basic_test.go | 58 +++++++++++++++++++- 1 file changed, 55 insertions(+), 3 deletions(-) diff --git a/test/integration/vmcp/composer_basic_test.go b/test/integration/vmcp/composer_basic_test.go index 914c076a7..4c84ec15e 100644 --- a/test/integration/vmcp/composer_basic_test.go +++ b/test/integration/vmcp/composer_basic_test.go @@ -8,6 +8,7 @@ import ( "github.com/stretchr/testify/assert" "github.com/stacklok/toolhive/pkg/vmcp/composer" + "github.com/stacklok/toolhive/pkg/vmcp/config" "github.com/stacklok/toolhive/test/integration/vmcp/helpers" ) @@ -63,9 +64,6 @@ func runComposerTest(t *testing.T, tc composerTestCase) { } // TestVMCPComposer runs table-driven tests for composer workflow patterns. -// Note: Tests use plain text output instead of structured JSON fields due to -// a known bug where MCP tool results aren't parsed into structured data. -// See: https://github.com/stacklok/toolhive/issues/2669 func TestVMCPComposer(t *testing.T) { t.Parallel() @@ -103,6 +101,15 @@ func TestVMCPComposer(t *testing.T) { }, DependsOn: []string{"gen"}}, }, + Output: &config.OutputConfig{ + Properties: map[string]config.OutputProperty{ + "result": { + Type: "string", + Description: "Processed result", + Value: "{{.steps.proc.output.text}}", + }, + }, + }, }, params: map[string]any{"prefix": "Hello", "suffix": "[DONE]"}, wantStrings: []string{"HELLO", "GENERATED", "[DONE]"}, @@ -138,6 +145,15 @@ func TestVMCPComposer(t *testing.T) { }, DependsOn: []string{"a", "b", "c"}}, }, + Output: &config.OutputConfig{ + Properties: map[string]config.OutputProperty{ + "combined": { + Type: "string", + Description: "Combined result from all tasks", + Value: "{{.steps.agg.output.text}}", + }, + }, + }, }, params: map[string]any{}, wantStrings: []string{"COMBINED", "A", "B", "C"}, @@ -188,6 +204,15 @@ func TestVMCPComposer(t *testing.T) { }, DependsOn: []string{"left", "right"}}, }, + Output: &config.OutputConfig{ + Properties: map[string]config.OutputProperty{ + "merged": { + Type: "string", + Description: "Merged result from diamond DAG", + Value: "{{.steps.merge.output.text}}", + }, + }, + }, }, params: map[string]any{"input": "test"}, wantStrings: []string{"MERGED", "L(", "R(", "started:test"}, @@ -217,6 +242,15 @@ func TestVMCPComposer(t *testing.T) { "count": "{{.params.count}}", }}, }, + Output: &config.OutputConfig{ + Properties: map[string]config.OutputProperty{ + "echo_result": { + Type: "string", + Description: "Echo result with parameters", + Value: "{{.steps.echo.output.text}}", + }, + }, + }, }, params: map[string]any{"name": "testuser", "count": "42"}, wantStrings: []string{"name=testuser", "count=42"}, @@ -276,6 +310,15 @@ func TestVMCPComposer(t *testing.T) { {ID: "a", Type: composer.StepTypeTool, Tool: "test_fail_a", Arguments: map[string]any{}}, {ID: "b", Type: composer.StepTypeTool, Tool: "test_ok_b", Arguments: map[string]any{}}, }, + Output: &config.OutputConfig{ + Properties: map[string]config.OutputProperty{ + "result_b": { + Type: "string", + Description: "Result from successful step B", + Value: "{{.steps.b.output.text}}", + }, + }, + }, }, params: map[string]any{}, // Continue mode returns success with partial results @@ -332,6 +375,15 @@ func TestVMCPComposer_WorkflowListed(t *testing.T) { Steps: []composer.WorkflowStep{ {ID: "step1", Type: composer.StepTypeTool, Tool: "test_noop", Arguments: map[string]any{}}, }, + Output: &config.OutputConfig{ + Properties: map[string]config.OutputProperty{ + "result": { + Type: "string", + Description: "Noop result", + Value: "{{.steps.step1.output.text}}", + }, + }, + }, } vmcpServer := helpers.NewVMCPServer(ctx, t, helpers.BackendsFromURL("test", backend.URL+"/mcp"), From ecd6480d771e3c5593bba09555417cad1783e657 Mon Sep 17 00:00:00 2001 From: Jakub Hrozek Date: Wed, 26 Nov 2025 22:48:42 +0000 Subject: [PATCH 4/4] Add edge case tests for composer workflows MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds 4 new integration tests covering edge cases identified during QE analysis: - StepTimeoutDuringRetry: Verifies timeout is respected during retry attempts - ConditionalStepReferencingSkippedStep: Documents behavior when template references output from a skipped step (returns ) - OutputMissingRequiredField: Documents that Required field doesn't validate templated values are non-empty (returns ) - ContinueModeWithFailedDependency: Documents that dependent steps still run when their dependency fails in continue mode (receives ) Tests include DISCUSSION comments noting behaviors that may warrant review. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- test/integration/vmcp/composer_basic_test.go | 255 +++++++++++++++++++ 1 file changed, 255 insertions(+) diff --git a/test/integration/vmcp/composer_basic_test.go b/test/integration/vmcp/composer_basic_test.go index 4c84ec15e..3e5ba36be 100644 --- a/test/integration/vmcp/composer_basic_test.go +++ b/test/integration/vmcp/composer_basic_test.go @@ -4,6 +4,7 @@ import ( "context" "strings" "testing" + "time" "github.com/stretchr/testify/assert" @@ -348,6 +349,260 @@ func TestVMCPComposer(t *testing.T) { // At least one error message should appear wantStrings: []string{"ERROR"}, }, + // Advanced error handling and edge case tests + // + // Test: StepTimeoutDuringRetry + // Verifies that step timeout is respected even when retry is configured. + // The slow tool takes longer than the step timeout, so it should fail due + // to timeout rather than exhausting all retry attempts. + { + name: "StepTimeoutDuringRetry", + tools: []helpers.BackendTool{ + // Tool that delays 500ms before returning - longer than the 50ms step timeout + helpers.NewSlowBackendTool("slow_tool", "Slow tool that times out", 500*time.Millisecond, "SUCCESS"), + }, + workflow: &composer.WorkflowDefinition{ + Name: "timeout_retry", + Description: "Tests that timeout is respected during retry attempts", + Parameters: map[string]any{}, + FailureMode: "abort", + Steps: []composer.WorkflowStep{ + { + ID: "slow_step", + Type: composer.StepTypeTool, + Tool: "test_slow_tool", + Arguments: map[string]any{}, + // Step timeout is shorter than the tool delay + Timeout: 50 * time.Millisecond, + OnError: &composer.ErrorHandler{ + Action: "retry", + RetryCount: 3, + RetryDelay: 10 * time.Millisecond, + }, + }, + }, + }, + params: map[string]any{}, + wantError: true, + // Should fail due to timeout - the error message should indicate timeout or context deadline + wantStrings: []string{}, + }, + // Test: ConditionalStepReferencingSkippedStep + // Tests template expansion when a step references output from a skipped step. + // Step 1 has a condition that evaluates to false (gets skipped). + // Step 2 depends on Step 1 and tries to use its output in arguments. + // + // DISCUSSION: When a step is skipped, referencing its output via template + // (e.g., {{.steps.step1.output.text}}) returns "" (Go template default + // for missing map keys). The workflow succeeds and the dependent step runs with + // this placeholder value. + // + // This may or may not be the desired behavior. Alternatives to consider: + // - Fail the workflow when referencing skipped step output + // - Skip dependent steps automatically when their dependency is skipped + // - Provide an explicit "default" value mechanism for skipped step references + { + name: "ConditionalStepReferencingSkippedStep", + tools: []helpers.BackendTool{ + helpers.NewBackendTool("producer", "Produces data", + func(_ context.Context, _ map[string]any) string { + return "PRODUCED_DATA" + }), + helpers.NewBackendTool("consumer", "Consumes data", + func(_ context.Context, args map[string]any) string { + data, _ := args["data"].(string) + // The data will be "" from the skipped step's template expansion + return "RECEIVED:" + data + }), + }, + workflow: &composer.WorkflowDefinition{ + Name: "skipped_ref", + Description: "Tests referencing output from a skipped step", + Parameters: map[string]any{}, + FailureMode: "abort", + Steps: []composer.WorkflowStep{ + { + ID: "step1", + Type: composer.StepTypeTool, + Tool: "test_producer", + Arguments: map[string]any{}, + // Condition that always evaluates to false - step will be skipped + Condition: "false", + }, + { + ID: "step2", + Type: composer.StepTypeTool, + Tool: "test_consumer", + Arguments: map[string]any{ + // Reference output from skipped step - will expand to "" + "data": "{{.steps.step1.output.text}}", + }, + DependsOn: []string{"step1"}, + }, + }, + Output: &config.OutputConfig{ + Properties: map[string]config.OutputProperty{ + "result": { + Type: "string", + Description: "Consumer result", + Value: "{{.steps.step2.output.text}}", + }, + }, + }, + }, + params: map[string]any{}, + // The workflow succeeds. Step2 receives "" from the skipped step's + // template expansion (Go template default for missing keys). + // Note: "" is JSON-escaped as "\u003cno value\u003e" in the response. + wantError: false, + wantStrings: []string{"RECEIVED:", "no value"}, + }, + // Test: OutputMissingRequiredField + // Tests output construction when a required output field template references + // a non-existent field. + // + // NOTE: The `Required` field in OutputConfig does NOT validate that the + // templated value is non-empty. When a template references a non-existent + // field (e.g., {{.steps.step1.output.nonexistent_field}}), it expands to + // "" (Go template default) WITHOUT failing. + // + // DISCUSSION: This is potentially surprising behavior that may warrant a fix. + // Users may expect `Required` to ensure meaningful values are present. + // Consider: + // - Adding validation that required fields don't contain "" + // - Using Go template's "missingkey=error" option for strict mode + // - Documenting this behavior clearly if it's intentional + { + name: "OutputMissingRequiredField", + tools: []helpers.BackendTool{ + helpers.NewBackendTool("simple_tool", "Returns simple output", + func(_ context.Context, _ map[string]any) string { + // Returns text output but not a "special_field" that the output requires + return "simple_result" + }), + }, + workflow: &composer.WorkflowDefinition{ + Name: "missing_required", + Description: "Tests output with template referencing non-existent field", + Parameters: map[string]any{}, + FailureMode: "abort", + Steps: []composer.WorkflowStep{ + { + ID: "step1", + Type: composer.StepTypeTool, + Tool: "test_simple_tool", + Arguments: map[string]any{}, + }, + }, + Output: &config.OutputConfig{ + Properties: map[string]config.OutputProperty{ + "result": { + Type: "string", + Description: "Result field referencing non-existent step output field", + // Reference a field that doesn't exist in the step output + // This will expand to "" instead of failing + Value: "{{.steps.step1.output.nonexistent_field}}", + }, + }, + // Required does not validate that the value is non-empty + Required: []string{"result"}, + }, + }, + params: map[string]any{}, + // The workflow succeeds - Required does not enforce non-empty values. + // The result field will contain "" from the template expansion. + // Note: "" is JSON-escaped as "\u003cno value\u003e" in the response. + wantError: false, + wantStrings: []string{"no value"}, + }, + // Test: ContinueModeWithFailedDependency + // Tests what happens in "continue" failure mode when a dependent step + // tries to use output from a failed step. + // - Step A fails with an error + // - Step B depends on Step A and tries to use its output + // - Step C is independent (no dependencies) + // + // NOTE: In "continue" mode, dependent steps still run even when their + // dependency failed. They receive "" placeholder for the failed + // step's output. Step B is NOT skipped - it executes with the placeholder. + // + // DISCUSSION: This may or may not be desired behavior. Alternative approaches: + // - Skip dependent steps automatically when their dependency fails + // - Allow steps to declare "skip_on_dependency_failure: true" + // - Provide a way to detect failed dependencies in conditions + // + // Current behavior means dependent steps must handle "" gracefully + // or the workflow may produce unexpected results. + { + name: "ContinueModeWithFailedDependency", + tools: []helpers.BackendTool{ + helpers.NewErrorBackendTool("fail_first", "Always fails", "DEPENDENCY_FAILED"), + helpers.NewBackendTool("use_output", "Uses output from dependency", + func(_ context.Context, args map[string]any) string { + data, _ := args["data"].(string) + // data will be "" from the failed dependency + return "PROCESSED:" + data + }), + helpers.NewBackendTool("independent", "Independent step", + func(_ context.Context, _ map[string]any) string { + return "INDEPENDENT_SUCCESS" + }), + }, + workflow: &composer.WorkflowDefinition{ + Name: "continue_failed_dep", + Description: "Tests continue mode when a step depends on a failed step", + Parameters: map[string]any{}, + FailureMode: "continue", + Steps: []composer.WorkflowStep{ + { + ID: "step_a", + Type: composer.StepTypeTool, + Tool: "test_fail_first", + Arguments: map[string]any{}, + }, + { + ID: "step_b", + Type: composer.StepTypeTool, + Tool: "test_use_output", + Arguments: map[string]any{ + // Reference output from failed step - will expand to "" + "data": "{{.steps.step_a.output.text}}", + }, + DependsOn: []string{"step_a"}, + }, + { + // Independent step that succeeds regardless + ID: "step_c", + Type: composer.StepTypeTool, + Tool: "test_independent", + Arguments: map[string]any{}, + }, + }, + Output: &config.OutputConfig{ + Properties: map[string]config.OutputProperty{ + "independent_result": { + Type: "string", + Description: "Result from independent step", + Value: "{{.steps.step_c.output.text}}", + }, + "dependent_result": { + Type: "string", + Description: "Result from dependent step (runs with placeholder from failed dep)", + Value: "{{.steps.step_b.output.text}}", + Default: "SKIPPED_DUE_TO_DEPENDENCY", + }, + }, + }, + }, + params: map[string]any{}, + // In continue mode, the workflow succeeds with partial results. + // Step B runs (not skipped!) with "" from failed step_a. + // Step C (independent) also succeeds. + // Both outputs are present in the result. + // Note: "" is JSON-escaped as "\u003cno value\u003e" in the response. + wantError: false, + wantStrings: []string{"INDEPENDENT_SUCCESS", "PROCESSED:", "no value"}, + }, } for _, tc := range tests {