From 71ed02b183f1c84044fb9e104928f7d2e609a858 Mon Sep 17 00:00:00 2001 From: AnantKumar17 Date: Wed, 6 May 2026 17:10:45 +0530 Subject: [PATCH 1/4] fix(controller): add timeout to RemoteMCPServer registration Fixes the issue 1785 Signed-off-by: AnantKumar17 --- .../controller/reconciler/reconciler.go | 30 ++++++++++++++++--- 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/go/core/internal/controller/reconciler/reconciler.go b/go/core/internal/controller/reconciler/reconciler.go index a4bdbe18a..817f69bb6 100644 --- a/go/core/internal/controller/reconciler/reconciler.go +++ b/go/core/internal/controller/reconciler/reconciler.go @@ -10,6 +10,7 @@ import ( "reflect" "slices" "strings" + "time" "github.com/hashicorp/go-multierror" reconcilerutils "github.com/kagent-dev/kagent/go/core/internal/controller/reconciler/utils" @@ -45,6 +46,13 @@ var ( const ( AgentReadyReasonDeploymentReady = "DeploymentReady" AgentReadyReasonWorkloadReady = "WorkloadReady" + + // mcpRegistrationTimeout is the deadline applied to each RemoteMCPServer + // registration attempt (header resolution + MCP connect + tool listing). + // A hung or unreachable endpoint is bounded to this duration, ensuring the + // reconciler goroutine is always released and does not block subsequent + // RemoteMCPServer reconciliations. + mcpRegistrationTimeout = 30 * time.Second ) type KagentReconciler interface { @@ -576,9 +584,11 @@ func (a *kagentReconciler) ReconcileKagentRemoteMCPServer(ctx context.Context, r GroupKind: server.GroupVersionKind().GroupKind().String(), } + l.Info("registering remote MCP server", "url", server.Spec.URL, "protocol", server.Spec.Protocol) + start := time.Now() tools, err := a.upsertToolServerForRemoteMCPServer(ctx, dbServer, server) if err != nil { - l.Error(err, "failed to upsert tool server for remote mcp server") + l.Error(err, "failed to upsert tool server for remote mcp server", "duration", time.Since(start)) // Fetch previously discovered tools from database if possible var discoveryErr error @@ -586,6 +596,8 @@ func (a *kagentReconciler) ReconcileKagentRemoteMCPServer(ctx context.Context, r if discoveryErr != nil { err = multierror.Append(err, discoveryErr) } + } else { + l.Info("successfully registered remote MCP server", "url", server.Spec.URL, "toolCount", len(tools), "duration", time.Since(start)) } // update the tool server status as the agents depend on it @@ -968,12 +980,19 @@ func (a *kagentReconciler) upsertToolServerForRemoteMCPServer(ctx context.Contex return nil, fmt.Errorf("failed to store toolServer %s: %w", toolServer.Name, err) } - tsp, err := a.createMcpTransport(ctx, remoteMcpServer) + // Bound the entire registration sequence (header resolution + MCP connect + + // tool listing) to mcpRegistrationTimeout so that a hung or unreachable + // endpoint cannot block this goroutine — and therefore all subsequent + // RemoteMCPServer reconciliations — indefinitely. + tCtx, cancel := context.WithTimeout(ctx, mcpRegistrationTimeout) + defer cancel() + + tsp, err := a.createMcpTransport(tCtx, remoteMcpServer) if err != nil { return nil, fmt.Errorf("failed to create client for toolServer %s: %w", toolServer.Name, err) } - tools, err := a.listTools(ctx, tsp, toolServer) + tools, err := a.listTools(tCtx, tsp, toolServer) if err != nil { return nil, fmt.Errorf("failed to fetch tools for toolServer %s: %w", toolServer.Name, err) } @@ -1019,9 +1038,12 @@ func (a *kagentReconciler) createMcpTransport(ctx context.Context, s *v1alpha2.R // so we need to create a custom HTTP client that adds headers to all requests. func newHTTPClient(headers map[string]string) *http.Client { if len(headers) == 0 { - return http.DefaultClient + return &http.Client{ + Timeout: mcpRegistrationTimeout, + } } return &http.Client{ + Timeout: mcpRegistrationTimeout, Transport: &headerTransport{ headers: headers, base: http.DefaultTransport, From 73233961396c4f465af2fe2cad694ca822d099b4 Mon Sep 17 00:00:00 2001 From: AnantKumar17 Date: Wed, 6 May 2026 17:44:42 +0530 Subject: [PATCH 2/4] fix(controller): use spec.timeout per RemoteMCPServer when set Signed-off-by: AnantKumar17 --- .../controller/reconciler/reconciler.go | 36 ++++++++++++------- 1 file changed, 23 insertions(+), 13 deletions(-) diff --git a/go/core/internal/controller/reconciler/reconciler.go b/go/core/internal/controller/reconciler/reconciler.go index 817f69bb6..0b2d93e28 100644 --- a/go/core/internal/controller/reconciler/reconciler.go +++ b/go/core/internal/controller/reconciler/reconciler.go @@ -47,14 +47,24 @@ const ( AgentReadyReasonDeploymentReady = "DeploymentReady" AgentReadyReasonWorkloadReady = "WorkloadReady" - // mcpRegistrationTimeout is the deadline applied to each RemoteMCPServer - // registration attempt (header resolution + MCP connect + tool listing). - // A hung or unreachable endpoint is bounded to this duration, ensuring the - // reconciler goroutine is always released and does not block subsequent - // RemoteMCPServer reconciliations. + // mcpRegistrationTimeout is the default deadline applied to a RemoteMCPServer + // registration attempt (header resolution + MCP connect + tool listing) when + // .spec.timeout is not set. A hung or unreachable endpoint is bounded to this + // duration, ensuring the reconciler goroutine is always released and does not + // block subsequent RemoteMCPServer reconciliations. mcpRegistrationTimeout = 30 * time.Second ) +// remoteMCPRegistrationTimeout returns the effective registration deadline for +// a RemoteMCPServer. It uses .spec.timeout when set, and falls back to the +// package-level default otherwise. +func remoteMCPRegistrationTimeout(s *v1alpha2.RemoteMCPServer) time.Duration { + if s != nil && s.Spec.Timeout != nil { + return s.Spec.Timeout.Duration + } + return mcpRegistrationTimeout +} + type KagentReconciler interface { ReconcileKagentAgent(ctx context.Context, req ctrl.Request) error ReconcileKagentSandboxAgent(ctx context.Context, req ctrl.Request) error @@ -981,10 +991,10 @@ func (a *kagentReconciler) upsertToolServerForRemoteMCPServer(ctx context.Contex } // Bound the entire registration sequence (header resolution + MCP connect + - // tool listing) to mcpRegistrationTimeout so that a hung or unreachable - // endpoint cannot block this goroutine — and therefore all subsequent - // RemoteMCPServer reconciliations — indefinitely. - tCtx, cancel := context.WithTimeout(ctx, mcpRegistrationTimeout) + // tool listing) to the effective per-resource timeout so that a hung or + // unreachable endpoint cannot block this goroutine — and therefore all + // subsequent RemoteMCPServer reconciliations — indefinitely. + tCtx, cancel := context.WithTimeout(ctx, remoteMCPRegistrationTimeout(remoteMcpServer)) defer cancel() tsp, err := a.createMcpTransport(tCtx, remoteMcpServer) @@ -1018,7 +1028,7 @@ func (a *kagentReconciler) createMcpTransport(ctx context.Context, s *v1alpha2.R return nil, err } - httpClient := newHTTPClient(headers) + httpClient := newHTTPClient(headers, remoteMCPRegistrationTimeout(s)) switch s.Spec.Protocol { case v1alpha2.RemoteMCPServerProtocolSse: @@ -1036,14 +1046,14 @@ func (a *kagentReconciler) createMcpTransport(ctx context.Context, s *v1alpha2.R // go-sdk does not have a WithHeaders option when initializing transport // so we need to create a custom HTTP client that adds headers to all requests. -func newHTTPClient(headers map[string]string) *http.Client { +func newHTTPClient(headers map[string]string, timeout time.Duration) *http.Client { if len(headers) == 0 { return &http.Client{ - Timeout: mcpRegistrationTimeout, + Timeout: timeout, } } return &http.Client{ - Timeout: mcpRegistrationTimeout, + Timeout: timeout, Transport: &headerTransport{ headers: headers, base: http.DefaultTransport, From 4512e1381ac37c60f765d97328098025914d5a7c Mon Sep 17 00:00:00 2001 From: AnantKumar17 Date: Wed, 6 May 2026 17:50:18 +0530 Subject: [PATCH 3/4] test(controller): add unit tests for registration timeout and HTTP client Signed-off-by: AnantKumar17 --- .../controller/reconciler/reconciler_test.go | 61 +++++++++++++++++++ 1 file changed, 61 insertions(+) diff --git a/go/core/internal/controller/reconciler/reconciler_test.go b/go/core/internal/controller/reconciler/reconciler_test.go index b4b22a62a..d9a4860a9 100644 --- a/go/core/internal/controller/reconciler/reconciler_test.go +++ b/go/core/internal/controller/reconciler/reconciler_test.go @@ -3,6 +3,7 @@ package reconciler import ( "context" "testing" + "time" "github.com/kagent-dev/kagent/go/api/v1alpha2" "github.com/kagent-dev/kagent/go/core/internal/utils" @@ -672,3 +673,63 @@ func TestValidateCrossNamespaceReferences(t *testing.T) { }) } } + +// TestRemoteMCPRegistrationTimeout verifies that remoteMCPRegistrationTimeout +// returns spec.timeout when set and falls back to the package default otherwise. +func TestRemoteMCPRegistrationTimeout(t *testing.T) { + custom := 10 * time.Second + + tests := []struct { + name string + server *v1alpha2.RemoteMCPServer + want time.Duration + }{ + { + name: "nil server returns default", + server: nil, + want: mcpRegistrationTimeout, + }, + { + name: "nil spec.timeout returns default", + server: &v1alpha2.RemoteMCPServer{}, + want: mcpRegistrationTimeout, + }, + { + name: "spec.timeout overrides default", + server: &v1alpha2.RemoteMCPServer{ + Spec: v1alpha2.RemoteMCPServerSpec{ + Timeout: &metav1.Duration{Duration: custom}, + }, + }, + want: custom, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + assert.Equal(t, tt.want, remoteMCPRegistrationTimeout(tt.server)) + }) + } +} + +// TestNewHTTPClient verifies that newHTTPClient always produces a client with +// the supplied timeout, regardless of whether custom headers are present. +func TestNewHTTPClient(t *testing.T) { + timeout := 5 * time.Second + + t.Run("no headers", func(t *testing.T) { + c := newHTTPClient(nil, timeout) + assert.Equal(t, timeout, c.Timeout) + }) + + t.Run("empty headers", func(t *testing.T) { + c := newHTTPClient(map[string]string{}, timeout) + assert.Equal(t, timeout, c.Timeout) + }) + + t.Run("with headers sets timeout and custom transport", func(t *testing.T) { + c := newHTTPClient(map[string]string{"X-Key": "val"}, timeout) + assert.Equal(t, timeout, c.Timeout) + _, ok := c.Transport.(*headerTransport) + assert.True(t, ok, "expected headerTransport") + }) +} From f3f12823d1acb6b1f77af6e791d6ec524a205b11 Mon Sep 17 00:00:00 2001 From: AnantKumar17 Date: Thu, 7 May 2026 11:19:42 +0530 Subject: [PATCH 4/4] fix(api): set kubebuilder default of 30s on spec.timeout Signed-off-by: AnantKumar17 --- go/api/config/crd/bases/kagent.dev_remotemcpservers.yaml | 1 + go/api/v1alpha2/remotemcpserver_types.go | 1 + helm/kagent-crds/templates/kagent.dev_remotemcpservers.yaml | 1 + 3 files changed, 3 insertions(+) diff --git a/go/api/config/crd/bases/kagent.dev_remotemcpservers.yaml b/go/api/config/crd/bases/kagent.dev_remotemcpservers.yaml index b51a8ab41..33c24ba17 100644 --- a/go/api/config/crd/bases/kagent.dev_remotemcpservers.yaml +++ b/go/api/config/crd/bases/kagent.dev_remotemcpservers.yaml @@ -177,6 +177,7 @@ spec: default: true type: boolean timeout: + default: 30s type: string url: minLength: 1 diff --git a/go/api/v1alpha2/remotemcpserver_types.go b/go/api/v1alpha2/remotemcpserver_types.go index f8a355894..95f0923ca 100644 --- a/go/api/v1alpha2/remotemcpserver_types.go +++ b/go/api/v1alpha2/remotemcpserver_types.go @@ -46,6 +46,7 @@ type RemoteMCPServerSpec struct { // +optional HeadersFrom []ValueRef `json:"headersFrom,omitempty"` // +optional + // +kubebuilder:default="30s" Timeout *metav1.Duration `json:"timeout,omitempty"` // +optional SseReadTimeout *metav1.Duration `json:"sseReadTimeout,omitempty"` diff --git a/helm/kagent-crds/templates/kagent.dev_remotemcpservers.yaml b/helm/kagent-crds/templates/kagent.dev_remotemcpservers.yaml index b51a8ab41..33c24ba17 100644 --- a/helm/kagent-crds/templates/kagent.dev_remotemcpservers.yaml +++ b/helm/kagent-crds/templates/kagent.dev_remotemcpservers.yaml @@ -177,6 +177,7 @@ spec: default: true type: boolean timeout: + default: 30s type: string url: minLength: 1