From 67e711df4dde68b18814b24089e89f009dcf6a62 Mon Sep 17 00:00:00 2001 From: yanjiaxin534 Date: Thu, 18 Sep 2025 15:41:46 +0800 Subject: [PATCH 1/8] add cert provider for cert management --- .../managers/solution/solution-manager.go | 96 ++++ .../managers/targets/targets-manager.go | 10 + api/pkg/apis/v1alpha1/model/deployment.go | 1 + api/pkg/apis/v1alpha1/providers/cert/cert.go | 59 +++ .../providers/cert/certmanager/certmanager.go | 50 ++ .../providers/cert/k8scert/k8scert.go | 431 ++++++++++++++++++ .../v1alpha1/providers/providerfactory.go | 7 + .../apis/v1alpha1/vendors/solution-vendor.go | 41 ++ .../apis/v1alpha1/vendors/targets-vendor.go | 140 ++---- .../helm/symphony/files/symphony-api.json | 19 +- 10 files changed, 742 insertions(+), 112 deletions(-) create mode 100644 api/pkg/apis/v1alpha1/providers/cert/cert.go create mode 100644 api/pkg/apis/v1alpha1/providers/cert/certmanager/certmanager.go create mode 100644 api/pkg/apis/v1alpha1/providers/cert/k8scert/k8scert.go diff --git a/api/pkg/apis/v1alpha1/managers/solution/solution-manager.go b/api/pkg/apis/v1alpha1/managers/solution/solution-manager.go index bac9548c5..e15e4f32c 100644 --- a/api/pkg/apis/v1alpha1/managers/solution/solution-manager.go +++ b/api/pkg/apis/v1alpha1/managers/solution/solution-manager.go @@ -20,6 +20,7 @@ import ( "github.com/eclipse-symphony/symphony/api/pkg/apis/v1alpha1/managers/solution/metrics" "github.com/eclipse-symphony/symphony/api/pkg/apis/v1alpha1/model" sp "github.com/eclipse-symphony/symphony/api/pkg/apis/v1alpha1/providers" + certProvider "github.com/eclipse-symphony/symphony/api/pkg/apis/v1alpha1/providers/cert" tgt "github.com/eclipse-symphony/symphony/api/pkg/apis/v1alpha1/providers/target" api_utils "github.com/eclipse-symphony/symphony/api/pkg/apis/v1alpha1/utils" "github.com/eclipse-symphony/symphony/coa/pkg/apis/v1alpha2" @@ -68,6 +69,7 @@ const ( type SolutionManager struct { SummaryManager TargetProviders map[string]tgt.ITargetProvider + CertProvider certProvider.ICertProvider ConfigProvider config.IExtConfigProvider SecretProvider secret.ISecretProvider KeyLockProvider keylock.IKeyLockProvider @@ -118,6 +120,15 @@ func (s *SolutionManager) Init(context *contexts.VendorContext, config managers. return err } + // Initialize cert provider + if certProviderInstance, exists := providers["working-cert"]; exists { + if cp, ok := certProviderInstance.(certProvider.ICertProvider); ok { + s.CertProvider = cp + } else { + return fmt.Errorf("working-cert provider does not implement ICertProvider interface") + } + } + if v, ok := config.Properties["isTarget"]; ok { b, err := strconv.ParseBool(v) if err == nil || b { @@ -159,6 +170,77 @@ func (s *SolutionManager) Init(context *contexts.VendorContext, config managers. return nil } + +// GetCertProvider returns the cert provider instance for certificate management operations +func (s *SolutionManager) GetCertProvider() certProvider.ICertProvider { + return s.CertProvider +} + +// SafeCreateWorkingCert creates a working certificate with validation checks +// It validates that the certificate doesn't exist before creation and verifies creation success after +func (s *SolutionManager) SafeCreateWorkingCert(ctx context.Context, certID string, request certProvider.CertRequest) error { + if s.CertProvider == nil { + return fmt.Errorf("cert provider not initialized") + } + + // Pre-creation validation: check if certificate already exists + log.InfofCtx(ctx, " M (Solution): validating certificate %s doesn't exist before creation", certID) + _, err := s.CertProvider.GetCert(ctx, certID, request.Namespace) + if err == nil { + log.InfofCtx(ctx, " M (Solution): certificate %s already exists, skipping creation", certID) + return nil + } + + // Create the certificate + log.InfofCtx(ctx, " M (Solution): creating working certificate %s", certID) + err = s.CertProvider.CreateCert(ctx, request) + if err != nil { + return fmt.Errorf("failed to create certificate %s: %v", certID, err) + } + + // Post-creation validation: verify certificate was created successfully + log.InfofCtx(ctx, " M (Solution): validating certificate %s was created successfully", certID) + _, err = s.CertProvider.GetCert(ctx, certID, request.Namespace) + if err != nil { + return fmt.Errorf("certificate %s creation validation failed, certificate not found after creation: %v", certID, err) + } + + log.InfofCtx(ctx, " M (Solution): working certificate %s created and validated successfully", certID) + return nil +} + +// SafeDeleteWorkingCert deletes a working certificate with validation checks +// It validates that the certificate exists before deletion and verifies deletion success after +func (s *SolutionManager) SafeDeleteWorkingCert(ctx context.Context, certID string, namespace string) error { + if s.CertProvider == nil { + return fmt.Errorf("cert provider not initialized") + } + + // Pre-deletion validation: check if certificate exists + log.InfofCtx(ctx, " M (Solution): validating certificate %s exists before deletion", certID) + _, err := s.CertProvider.GetCert(ctx, certID, namespace) + if err != nil { + return fmt.Errorf("certificate %s not found, cannot delete: %v", certID, err) + } + + // Delete the certificate + log.InfofCtx(ctx, " M (Solution): deleting working certificate %s", certID) + err = s.CertProvider.DeleteCert(ctx, certID, namespace) + if err != nil { + return fmt.Errorf("failed to delete certificate %s: %v", certID, err) + } + + // Post-deletion validation: verify certificate was deleted successfully + log.InfofCtx(ctx, " M (Solution): validating certificate %s was deleted successfully", certID) + _, err = s.CertProvider.GetCert(ctx, certID, namespace) + if err == nil { + return fmt.Errorf("certificate %s deletion validation failed, certificate still exists after deletion", certID) + } + + log.InfofCtx(ctx, " M (Solution): working certificate %s deleted and validated successfully", certID) + return nil +} + func (s *SolutionManager) AsyncReconcile(ctx context.Context, deployment model.DeploymentSpec, remove bool, namespace string, targetName string) (model.SummarySpec, error) { lockName := api_utils.GenerateKeyLockName(namespace, deployment.Instance.ObjectMeta.Name) s.KeyLockProvider.Lock(lockName) @@ -1895,3 +1977,17 @@ func (s *SolutionManager) getOperationState(ctx context.Context, operationId str } return ret, err } + +// CreateCertRequest creates a certificate request for the given target and namespace +func (s *SolutionManager) CreateCertRequest(targetName string, namespace string) certProvider.CertRequest { + return certProvider.CertRequest{ + TargetName: targetName, + Namespace: namespace, + Duration: time.Hour * 2160, // 90 days default + RenewBefore: time.Hour * 360, // 15 days before expiration + CommonName: fmt.Sprintf("symphony-%s", targetName), + DNSNames: []string{targetName, fmt.Sprintf("%s.%s", targetName, namespace)}, + IssuerName: "symphony-ca", + ServiceName: "symphony-service", + } +} diff --git a/api/pkg/apis/v1alpha1/managers/targets/targets-manager.go b/api/pkg/apis/v1alpha1/managers/targets/targets-manager.go index 04b595037..dbc10d3c9 100644 --- a/api/pkg/apis/v1alpha1/managers/targets/targets-manager.go +++ b/api/pkg/apis/v1alpha1/managers/targets/targets-manager.go @@ -13,6 +13,7 @@ import ( "github.com/eclipse-symphony/symphony/api/constants" "github.com/eclipse-symphony/symphony/api/pkg/apis/v1alpha1/model" + certProvider "github.com/eclipse-symphony/symphony/api/pkg/apis/v1alpha1/providers/cert" "github.com/eclipse-symphony/symphony/api/pkg/apis/v1alpha1/validation" "github.com/eclipse-symphony/symphony/coa/pkg/apis/v1alpha2" "github.com/eclipse-symphony/symphony/coa/pkg/apis/v1alpha2/contexts" @@ -37,6 +38,7 @@ type TargetsManager struct { needValidate bool TargetValidator validation.TargetValidator SecretProvider secret.ISecretProvider + CertProvider certProvider.ICertProvider } func (s *TargetsManager) Init(context *contexts.VendorContext, config managers.ManagerConfig, providers map[string]providers.IProvider) error { @@ -61,6 +63,9 @@ func (s *TargetsManager) Init(context *contexts.VendorContext, config managers.M if c, ok := p.(secret.ISecretProvider); ok { s.SecretProvider = c } + if c, ok := p.(certProvider.ICertProvider); ok { + s.CertProvider = c + } } return nil @@ -308,3 +313,8 @@ func (t *TargetsManager) targetInstanceLookup(ctx context.Context, name string, } return len(instanceList) > 0, nil } + +// GetCertProvider returns the certificate provider for read-only access to certificates +func (t *TargetsManager) GetCertProvider() certProvider.ICertProvider { + return t.CertProvider +} diff --git a/api/pkg/apis/v1alpha1/model/deployment.go b/api/pkg/apis/v1alpha1/model/deployment.go index d3df4c7e2..d9bff9a4e 100644 --- a/api/pkg/apis/v1alpha1/model/deployment.go +++ b/api/pkg/apis/v1alpha1/model/deployment.go @@ -29,6 +29,7 @@ type DeploymentSpec struct { Hash string `json:"hash,omitempty"` IsDryRun bool `json:"isDryRun,omitempty"` IsInActive bool `json:"isInActive,omitempty"` + RemoteTargetName string `json:"remoteTargetName,omitempty"` } func (d DeploymentSpec) GetComponentSlice() []ComponentSpec { diff --git a/api/pkg/apis/v1alpha1/providers/cert/cert.go b/api/pkg/apis/v1alpha1/providers/cert/cert.go new file mode 100644 index 000000000..116eca468 --- /dev/null +++ b/api/pkg/apis/v1alpha1/providers/cert/cert.go @@ -0,0 +1,59 @@ +/* + * Copyright (c) Microsoft Corporation. + * Licensed under the MIT license. + * SPDX-License-Identifier: MIT + */ + +package cert + +import ( + "context" + "time" +) + +// ICertProvider defines the interface for certificate management +type ICertProvider interface { + // CreateCert creates a certificate for the specified target + CreateCert(ctx context.Context, req CertRequest) error + + // DeleteCert deletes the certificate for the specified target + DeleteCert(ctx context.Context, targetName, namespace string) error + + // GetCert retrieves the certificate for the specified target (read-only) + GetCert(ctx context.Context, targetName, namespace string) (*CertResponse, error) + + // RotateCert rotates/renews the certificate for the specified target + RotateCert(ctx context.Context, targetName, namespace string) error + + // CheckCertStatus checks if the certificate is ready and valid + CheckCertStatus(ctx context.Context, targetName, namespace string) (*CertStatus, error) +} + +// CertRequest represents a certificate creation request +type CertRequest struct { + TargetName string `json:"targetName"` + Namespace string `json:"namespace"` + Duration time.Duration `json:"duration"` + RenewBefore time.Duration `json:"renewBefore"` + CommonName string `json:"commonName"` + DNSNames []string `json:"dnsNames"` + IssuerName string `json:"issuerName"` + ServiceName string `json:"serviceName"` +} + +// CertResponse represents the certificate data +type CertResponse struct { + PublicKey string `json:"publicKey"` + PrivateKey string `json:"privateKey"` + ExpiresAt time.Time `json:"expiresAt"` + SerialNumber string `json:"serialNumber"` +} + +// CertStatus represents the certificate status +type CertStatus struct { + Ready bool `json:"ready"` + Reason string `json:"reason"` + Message string `json:"message"` + LastUpdate time.Time `json:"lastUpdate"` + NextRenewal time.Time `json:"nextRenewal"` +} diff --git a/api/pkg/apis/v1alpha1/providers/cert/certmanager/certmanager.go b/api/pkg/apis/v1alpha1/providers/cert/certmanager/certmanager.go new file mode 100644 index 000000000..e93b72a6e --- /dev/null +++ b/api/pkg/apis/v1alpha1/providers/cert/certmanager/certmanager.go @@ -0,0 +1,50 @@ +/* + * Copyright (c) Microsoft Corporation. + * Licensed under the MIT license. + * SPDX-License-Identifier: MIT + */ + +package certmanager + +import ( + "context" + + "github.com/eclipse-symphony/symphony/api/pkg/apis/v1alpha1/providers/cert" + "github.com/eclipse-symphony/symphony/coa/pkg/apis/v1alpha2/contexts" + "github.com/eclipse-symphony/symphony/coa/pkg/apis/v1alpha2/providers" +) + +// OSSCMCertProvider is a placeholder cert provider that implements the cert.ICertProvider interface +// This is used for backward compatibility in the provider factory +type OSSCMCertProvider struct { + Config providers.IProviderConfig + Context *contexts.ManagerContext +} + +func (o *OSSCMCertProvider) Init(config providers.IProviderConfig) error { + o.Config = config + return nil +} + +func (o *OSSCMCertProvider) SetContext(ctx *contexts.ManagerContext) { + o.Context = ctx +} + +func (o *OSSCMCertProvider) CreateCert(ctx context.Context, req cert.CertRequest) error { + return nil // placeholder implementation +} + +func (o *OSSCMCertProvider) DeleteCert(ctx context.Context, targetName, namespace string) error { + return nil // placeholder implementation +} + +func (o *OSSCMCertProvider) GetCert(ctx context.Context, targetName, namespace string) (*cert.CertResponse, error) { + return nil, nil // placeholder implementation +} + +func (o *OSSCMCertProvider) RotateCert(ctx context.Context, targetName, namespace string) error { + return nil // placeholder implementation +} + +func (o *OSSCMCertProvider) CheckCertStatus(ctx context.Context, targetName, namespace string) (*cert.CertStatus, error) { + return nil, nil // placeholder implementation diff --git a/api/pkg/apis/v1alpha1/providers/cert/k8scert/k8scert.go b/api/pkg/apis/v1alpha1/providers/cert/k8scert/k8scert.go new file mode 100644 index 000000000..ac4d861f3 --- /dev/null +++ b/api/pkg/apis/v1alpha1/providers/cert/k8scert/k8scert.go @@ -0,0 +1,431 @@ +/* + * Copyright (c) Microsoft Corporation. + * Licensed under the MIT license. + * SPDX-License-Identifier: MIT + */ + +package k8scert + +import ( + "context" + "crypto/rand" + "crypto/rsa" + "crypto/x509" + "crypto/x509/pkix" + "encoding/base64" + "encoding/json" + "encoding/pem" + "fmt" + "math/big" + "time" + + "github.com/eclipse-symphony/symphony/api/pkg/apis/v1alpha1/providers/cert" + "github.com/eclipse-symphony/symphony/coa/pkg/apis/v1alpha2/contexts" + "github.com/eclipse-symphony/symphony/coa/pkg/apis/v1alpha2/observability" + observ_utils "github.com/eclipse-symphony/symphony/coa/pkg/apis/v1alpha2/observability/utils" + "github.com/eclipse-symphony/symphony/coa/pkg/apis/v1alpha2/providers" + "github.com/eclipse-symphony/symphony/coa/pkg/logger" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/kubernetes" + "k8s.io/client-go/rest" +) + +const loggerName = "providers.cert.k8scert" + +var sLog = logger.NewLogger(loggerName) + +type K8sCertProviderConfig struct { + Name string `json:"name"` + InCluster bool `json:"inCluster,omitempty"` +} + +type K8sCertProvider struct { + Config K8sCertProviderConfig + Context *contexts.ManagerContext + kubeClient kubernetes.Interface +} + +func K8sCertProviderConfigFromMap(properties map[string]string) (K8sCertProviderConfig, error) { + ret := K8sCertProviderConfig{ + InCluster: true, // default to in-cluster + } + if v, ok := properties["name"]; ok { + ret.Name = v + } + if v, ok := properties["inCluster"]; ok { + ret.InCluster = v == "true" + } + return ret, nil +} + +func (k *K8sCertProvider) InitWithMap(properties map[string]string) error { + config, err := K8sCertProviderConfigFromMap(properties) + if err != nil { + sLog.Errorf(" P (K8sCert): expected K8sCertProviderConfigFromMap: %+v", err) + return err + } + return k.Init(config) +} + +func (k *K8sCertProvider) SetContext(ctx *contexts.ManagerContext) { + k.Context = ctx +} + +func (k *K8sCertProvider) Init(config providers.IProviderConfig) error { + ctx, span := observability.StartSpan("K8sCert Provider", context.TODO(), &map[string]string{ + "method": "Init", + }) + var err error = nil + defer observ_utils.CloseSpanWithError(span, &err) + defer observ_utils.EmitUserDiagnosticsLogs(ctx, &err) + + sLog.InfoCtx(ctx, " P (K8sCert): Init()") + + // convert config to K8sCertProviderConfig type + certConfig, err := toK8sCertProviderConfig(config) + if err != nil { + sLog.ErrorfCtx(ctx, " P (K8sCert): expected K8sCertProviderConfig: %+v", err) + return err + } + + k.Config = certConfig + + // Initialize Kubernetes client + var kubeConfig *rest.Config + if k.Config.InCluster { + kubeConfig, err = rest.InClusterConfig() + } else { + // For out-of-cluster access, would need to load from kubeconfig file + // This can be implemented later if needed + err = fmt.Errorf("out-of-cluster configuration not implemented yet") + } + if err != nil { + sLog.ErrorfCtx(ctx, " P (K8sCert): failed to get kubernetes config: %+v", err) + return err + } + + k.kubeClient, err = kubernetes.NewForConfig(kubeConfig) + if err != nil { + sLog.ErrorfCtx(ctx, " P (K8sCert): failed to create kubernetes client: %+v", err) + return err + } + + return nil +} + +func toK8sCertProviderConfig(config providers.IProviderConfig) (K8sCertProviderConfig, error) { + ret := K8sCertProviderConfig{} + data, err := json.Marshal(config) + if err != nil { + return ret, err + } + err = json.Unmarshal(data, &ret) + return ret, err +} + +// CreateCert creates a self-signed certificate and stores it as a Kubernetes Secret +func (k *K8sCertProvider) CreateCert(ctx context.Context, req cert.CertRequest) error { + ctx, span := observability.StartSpan("K8sCert Provider", ctx, &map[string]string{ + "method": "CreateCert", + }) + var err error = nil + defer observ_utils.CloseSpanWithError(span, &err) + defer observ_utils.EmitUserDiagnosticsLogs(ctx, &err) + + sLog.InfofCtx(ctx, " P (K8sCert): creating certificate for target %s in namespace %s", req.TargetName, req.Namespace) + + // Generate private key + privateKey, err := rsa.GenerateKey(rand.Reader, 2048) + if err != nil { + sLog.ErrorfCtx(ctx, " P (K8sCert): failed to generate private key: %+v", err) + return err + } + + // Set default duration if not specified + duration := req.Duration + if duration == 0 { + duration = 365 * 24 * time.Hour // 1 year default + } + + // Create certificate template + template := x509.Certificate{ + SerialNumber: big.NewInt(1), + Subject: pkix.Name{ + Organization: []string{"Symphony"}, + Country: []string{"US"}, + Province: []string{""}, + Locality: []string{""}, + StreetAddress: []string{""}, + PostalCode: []string{""}, + CommonName: req.CommonName, + }, + NotBefore: time.Now(), + NotAfter: time.Now().Add(duration), + KeyUsage: x509.KeyUsageKeyEncipherment | x509.KeyUsageDigitalSignature, + ExtKeyUsage: []x509.ExtKeyUsage{x509.ExtKeyUsageServerAuth}, + BasicConstraintsValid: true, + } + + // Add DNS names if specified + if len(req.DNSNames) > 0 { + template.DNSNames = req.DNSNames + } + + // Set default CommonName if not specified + if req.CommonName == "" { + template.Subject.CommonName = fmt.Sprintf("%s.symphony.local", req.TargetName) + } + + // Create the certificate + certDER, err := x509.CreateCertificate(rand.Reader, &template, &template, &privateKey.PublicKey, privateKey) + if err != nil { + sLog.ErrorfCtx(ctx, " P (K8sCert): failed to create certificate: %+v", err) + return err + } + + // Encode certificate to PEM + certPEM := pem.EncodeToMemory(&pem.Block{ + Type: "CERTIFICATE", + Bytes: certDER, + }) + + // Encode private key to PEM + privateKeyDER, err := x509.MarshalPKCS8PrivateKey(privateKey) + if err != nil { + sLog.ErrorfCtx(ctx, " P (K8sCert): failed to marshal private key: %+v", err) + return err + } + + privateKeyPEM := pem.EncodeToMemory(&pem.Block{ + Type: "PRIVATE KEY", + Bytes: privateKeyDER, + }) + + // Create Kubernetes Secret + secretName := fmt.Sprintf("%s-working-cert", req.TargetName) + secret := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: secretName, + Namespace: req.Namespace, + Labels: map[string]string{ + "symphony.microsoft.com/managed-by": "symphony", + "symphony.microsoft.com/target": req.TargetName, + "symphony.microsoft.com/cert-type": "working-cert", + }, + }, + Type: corev1.SecretTypeTLS, + Data: map[string][]byte{ + "tls.crt": certPEM, + "tls.key": privateKeyPEM, + }, + } + + // Create or update the secret + _, err = k.kubeClient.CoreV1().Secrets(req.Namespace).Create(ctx, secret, metav1.CreateOptions{}) + if err != nil { + if errors.IsAlreadyExists(err) { + // Update existing secret + _, err = k.kubeClient.CoreV1().Secrets(req.Namespace).Update(ctx, secret, metav1.UpdateOptions{}) + if err != nil { + sLog.ErrorfCtx(ctx, " P (K8sCert): failed to update certificate secret: %+v", err) + return err + } + sLog.InfofCtx(ctx, " P (K8sCert): updated certificate secret %s in namespace %s", secretName, req.Namespace) + } else { + sLog.ErrorfCtx(ctx, " P (K8sCert): failed to create certificate secret: %+v", err) + return err + } + } else { + sLog.InfofCtx(ctx, " P (K8sCert): created certificate secret %s in namespace %s", secretName, req.Namespace) + } + + return nil +} + +// DeleteCert deletes the certificate secret for the specified target +func (k *K8sCertProvider) DeleteCert(ctx context.Context, targetName, namespace string) error { + ctx, span := observability.StartSpan("K8sCert Provider", ctx, &map[string]string{ + "method": "DeleteCert", + }) + var err error = nil + defer observ_utils.CloseSpanWithError(span, &err) + defer observ_utils.EmitUserDiagnosticsLogs(ctx, &err) + + sLog.InfofCtx(ctx, " P (K8sCert): deleting certificate for target %s in namespace %s", targetName, namespace) + + secretName := fmt.Sprintf("%s-working-cert", targetName) + err = k.kubeClient.CoreV1().Secrets(namespace).Delete(ctx, secretName, metav1.DeleteOptions{}) + if err != nil { + if errors.IsNotFound(err) { + sLog.InfofCtx(ctx, " P (K8sCert): certificate secret %s not found (already deleted)", secretName) + return nil + } + sLog.ErrorfCtx(ctx, " P (K8sCert): failed to delete certificate secret: %+v", err) + return err + } + + sLog.InfofCtx(ctx, " P (K8sCert): deleted certificate secret %s in namespace %s", secretName, namespace) + return nil +} + +// GetCert retrieves the certificate for the specified target (read-only) +func (k *K8sCertProvider) GetCert(ctx context.Context, targetName, namespace string) (*cert.CertResponse, error) { + ctx, span := observability.StartSpan("K8sCert Provider", ctx, &map[string]string{ + "method": "GetCert", + }) + var err error = nil + defer observ_utils.CloseSpanWithError(span, &err) + defer observ_utils.EmitUserDiagnosticsLogs(ctx, &err) + + sLog.InfofCtx(ctx, " P (K8sCert): getting certificate for target %s in namespace %s", targetName, namespace) + + secretName := fmt.Sprintf("%s-working-cert", targetName) + secret, err := k.kubeClient.CoreV1().Secrets(namespace).Get(ctx, secretName, metav1.GetOptions{}) + if err != nil { + if errors.IsNotFound(err) { + sLog.InfofCtx(ctx, " P (K8sCert): certificate secret %s not found", secretName) + return nil, fmt.Errorf("certificate not found for target %s", targetName) + } + sLog.ErrorfCtx(ctx, " P (K8sCert): failed to get certificate secret: %+v", err) + return nil, err + } + + certPEM := secret.Data["tls.crt"] + keyPEM := secret.Data["tls.key"] + + if len(certPEM) == 0 || len(keyPEM) == 0 { + sLog.ErrorfCtx(ctx, " P (K8sCert): certificate secret %s is missing certificate or key data", secretName) + return nil, fmt.Errorf("invalid certificate data for target %s", targetName) + } + + // Parse certificate to get expiration date and serial number + block, _ := pem.Decode(certPEM) + if block == nil { + sLog.ErrorfCtx(ctx, " P (K8sCert): failed to decode certificate PEM") + return nil, fmt.Errorf("invalid certificate format for target %s", targetName) + } + + parsedCert, err := x509.ParseCertificate(block.Bytes) + if err != nil { + sLog.ErrorfCtx(ctx, " P (K8sCert): failed to parse certificate: %+v", err) + return nil, err + } + + response := &cert.CertResponse{ + PublicKey: base64.StdEncoding.EncodeToString(certPEM), + PrivateKey: base64.StdEncoding.EncodeToString(keyPEM), + ExpiresAt: parsedCert.NotAfter, + SerialNumber: parsedCert.SerialNumber.String(), + } + + sLog.InfofCtx(ctx, " P (K8sCert): retrieved certificate for target %s, expires at %v", targetName, parsedCert.NotAfter) + return response, nil +} + +// RotateCert rotates/renews the certificate for the specified target +func (k *K8sCertProvider) RotateCert(ctx context.Context, targetName, namespace string) error { + ctx, span := observability.StartSpan("K8sCert Provider", ctx, &map[string]string{ + "method": "RotateCert", + }) + var err error = nil + defer observ_utils.CloseSpanWithError(span, &err) + defer observ_utils.EmitUserDiagnosticsLogs(ctx, &err) + + sLog.InfofCtx(ctx, " P (K8sCert): rotating certificate for target %s in namespace %s", targetName, namespace) + + // Create a new certificate with default settings + req := cert.CertRequest{ + TargetName: targetName, + Namespace: namespace, + Duration: 365 * 24 * time.Hour, // 1 year + CommonName: fmt.Sprintf("%s.symphony.local", targetName), + DNSNames: []string{targetName, fmt.Sprintf("%s.symphony.local", targetName)}, + } + + return k.CreateCert(ctx, req) +} + +// CheckCertStatus checks if the certificate is ready and valid +func (k *K8sCertProvider) CheckCertStatus(ctx context.Context, targetName, namespace string) (*cert.CertStatus, error) { + ctx, span := observability.StartSpan("K8sCert Provider", ctx, &map[string]string{ + "method": "CheckCertStatus", + }) + var err error = nil + defer observ_utils.CloseSpanWithError(span, &err) + defer observ_utils.EmitUserDiagnosticsLogs(ctx, &err) + + sLog.InfofCtx(ctx, " P (K8sCert): checking certificate status for target %s in namespace %s", targetName, namespace) + + status := &cert.CertStatus{ + Ready: false, + LastUpdate: time.Now(), + } + + secretName := fmt.Sprintf("%s-working-cert", targetName) + secret, err := k.kubeClient.CoreV1().Secrets(namespace).Get(ctx, secretName, metav1.GetOptions{}) + if err != nil { + if errors.IsNotFound(err) { + status.Reason = "NotFound" + status.Message = "Certificate secret not found" + return status, nil + } + sLog.ErrorfCtx(ctx, " P (K8sCert): failed to get certificate secret: %+v", err) + status.Reason = "Error" + status.Message = err.Error() + return status, nil + } + + certPEM := secret.Data["tls.crt"] + if len(certPEM) == 0 { + status.Reason = "InvalidData" + status.Message = "Certificate data is missing" + return status, nil + } + + // Parse certificate to check validity + block, _ := pem.Decode(certPEM) + if block == nil { + status.Reason = "InvalidFormat" + status.Message = "Certificate format is invalid" + return status, nil + } + + parsedCert, err := x509.ParseCertificate(block.Bytes) + if err != nil { + sLog.ErrorfCtx(ctx, " P (K8sCert): failed to parse certificate: %+v", err) + status.Reason = "ParseError" + status.Message = err.Error() + return status, nil + } + + now := time.Now() + if now.Before(parsedCert.NotBefore) { + status.Reason = "NotYetValid" + status.Message = "Certificate is not yet valid" + return status, nil + } + + if now.After(parsedCert.NotAfter) { + status.Reason = "Expired" + status.Message = "Certificate has expired" + return status, nil + } + + // Check if renewal is needed (30 days before expiration) + renewalThreshold := parsedCert.NotAfter.Add(-30 * 24 * time.Hour) + if now.After(renewalThreshold) { + status.NextRenewal = renewalThreshold + status.Message = "Certificate needs renewal soon" + } else { + status.NextRenewal = renewalThreshold + } + + status.Ready = true + status.Reason = "Ready" + status.Message = "Certificate is valid and ready" + + sLog.InfofCtx(ctx, " P (K8sCert): certificate status for target %s: ready=%v, reason=%s", targetName, status.Ready, status.Reason) + return status, nil +} diff --git a/api/pkg/apis/v1alpha1/providers/providerfactory.go b/api/pkg/apis/v1alpha1/providers/providerfactory.go index e120105e5..6e84714be 100644 --- a/api/pkg/apis/v1alpha1/providers/providerfactory.go +++ b/api/pkg/apis/v1alpha1/providers/providerfactory.go @@ -10,6 +10,7 @@ import ( "fmt" "github.com/eclipse-symphony/symphony/api/pkg/apis/v1alpha1/model" + "github.com/eclipse-symphony/symphony/api/pkg/apis/v1alpha1/providers/cert/k8scert" catalogconfig "github.com/eclipse-symphony/symphony/api/pkg/apis/v1alpha1/providers/config/catalog" memorygraph "github.com/eclipse-symphony/symphony/api/pkg/apis/v1alpha1/providers/graph/memory" "github.com/eclipse-symphony/symphony/api/pkg/apis/v1alpha1/providers/secret" @@ -390,6 +391,12 @@ func (s SymphonyProviderFactory) CreateProvider(providerType string, config cp.I if err == nil { return mProvider, nil } + case "providers.cert.k8scert": + mProvider := &k8scert.K8sCertProvider{} + err = mProvider.Init(config) + if err == nil { + return mProvider, nil + } } return nil, err //TODO: in current design, factory doesn't return errors on unrecognized provider types as there could be other factories. We may want to change this. } diff --git a/api/pkg/apis/v1alpha1/vendors/solution-vendor.go b/api/pkg/apis/v1alpha1/vendors/solution-vendor.go index 940a713e0..725289429 100644 --- a/api/pkg/apis/v1alpha1/vendors/solution-vendor.go +++ b/api/pkg/apis/v1alpha1/vendors/solution-vendor.go @@ -322,6 +322,19 @@ func (c *SolutionVendor) onReconcile(request v1alpha2.COARequest) v1alpha2.COARe targetName = v } } + + // Handle working certificate management for remote targets + if deployment.RemoteTargetName != "" { + err = c.handleWorkingCertManagement(ctx, deployment, remove, namespace) + if err != nil { + sLog.ErrorfCtx(ctx, "V (Solution): failed to handle working cert management: %s", err.Error()) + return observ_utils.CloseSpanWithCOAResponse(span, v1alpha2.COAResponse{ + State: v1alpha2.InternalError, + Body: []byte(err.Error()), + }) + } + } + summary, err := c.SolutionManager.AsyncReconcile(ctx, deployment, remove, namespace, targetName) data, _ := json.Marshal(summary) if err != nil { @@ -556,3 +569,31 @@ func (c *SolutionVendor) onGetResponse(request v1alpha2.COARequest) v1alpha2.COA } return c.SolutionManager.HandleRemoteAgentExecuteResult(ctx, asyncResult) } + +// handleWorkingCertManagement manages working certificates for remote targets +func (c *SolutionVendor) handleWorkingCertManagement(ctx context.Context, deployment model.DeploymentSpec, remove bool, namespace string) error { + sLog.InfofCtx(ctx, "V (Solution): handleWorkingCertManagement for remote target: %s, remove: %t", deployment.RemoteTargetName, remove) + + if c.SolutionManager.GetCertProvider() == nil { + return fmt.Errorf("cert provider is not available") + } + + if remove { + // Delete working certificate when removing remote target + err := c.SolutionManager.SafeDeleteWorkingCert(ctx, deployment.RemoteTargetName, namespace) + if err != nil { + return fmt.Errorf("failed to delete working certificate for remote target %s: %w", deployment.RemoteTargetName, err) + } + sLog.InfofCtx(ctx, "V (Solution): successfully deleted working certificate for remote target: %s", deployment.RemoteTargetName) + } else { + // Create working certificate for remote target + err := c.SolutionManager.SafeCreateWorkingCert(ctx, deployment.RemoteTargetName, c.SolutionManager.CreateCertRequest(deployment.RemoteTargetName, namespace)) + if err != nil { + return fmt.Errorf("failed to create or update working certificate for remote target %s: %w", deployment.RemoteTargetName, err) + } else { + sLog.InfofCtx(ctx, "V (Solution): successfully created working certificate for remote target: %s", deployment.RemoteTargetName) + } + } + + return nil +} diff --git a/api/pkg/apis/v1alpha1/vendors/targets-vendor.go b/api/pkg/apis/v1alpha1/vendors/targets-vendor.go index 9d85b2044..d6654b870 100644 --- a/api/pkg/apis/v1alpha1/vendors/targets-vendor.go +++ b/api/pkg/apis/v1alpha1/vendors/targets-vendor.go @@ -35,8 +35,6 @@ import ( utils2 "github.com/eclipse-symphony/symphony/coa/pkg/apis/v1alpha2/utils" "github.com/golang-jwt/jwt/v4" "github.com/valyala/fasthttp" - "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" - "k8s.io/apimachinery/pkg/runtime/schema" ) var ( @@ -683,13 +681,14 @@ func (c *TargetsVendor) onHeartBeat(request v1alpha2.COARequest) v1alpha2.COARes return resp } -// getting a certificate for a target +// getting a certificate for a target - READ-ONLY +// Certificate creation is now handled by the solution-vendor's reconcile method func (c *TargetsVendor) onGetCert(request v1alpha2.COARequest) v1alpha2.COAResponse { ctx, span := observability.StartSpan("Targets Vendor", request.Context, &map[string]string{ "method": "onGetCert", }) defer span.End() - tLog.InfofCtx(ctx, "V (Targets) : onGetCert, method: %s", request.Method) + tLog.InfofCtx(ctx, "V (Targets) : onGetCert (READ-ONLY), method: %s", request.Method) id := request.Parameters["__name"] namespace, exist := request.Parameters["namespace"] if !exist { @@ -698,128 +697,47 @@ func (c *TargetsVendor) onGetCert(request v1alpha2.COARequest) v1alpha2.COARespo switch request.Method { case fasthttp.MethodPost: - subject := fmt.Sprintf("CN=%s-%s.%s", namespace, id, ServiceName) - // create a new GroupVersionKind for the certificate - gvk := schema.GroupVersionKind{ - Group: "cert-manager.io", - Version: "v1", - Kind: "Certificate", - } - - // create a new unstructured object for the certificate - cert := &unstructured.Unstructured{} - cert.SetGroupVersionKind(gvk) - - cert.SetName(id) - cert.SetNamespace(namespace) - - secretName := fmt.Sprintf("%s-tls", id) - - // Get configurable working certificate duration and renewBefore values with defaults - duration := c.getWorkingCertDuration() - renewBefore := c.getWorkingCertRenewBefore() - - spec := map[string]interface{}{ - "secretName": secretName, - "duration": duration, - "renewBefore": renewBefore, - "commonName": subject, - "dnsNames": []string{ - subject, - }, - "issuerRef": map[string]interface{}{ - "name": CAIssuer, - "kind": "Issuer", - }, - "subject": map[string]interface{}{ - "organizations": []interface{}{ - ServiceName, - }, - }, - "privateKey": map[string]interface{}{ - "algorithm": "RSA", - "size": 2048, - }, - } - - cert.Object["spec"] = spec - - upsertRequest := states.UpsertRequest{ - Value: states.StateEntry{ - ID: id, - Body: cert.Object, - }, - Metadata: map[string]interface{}{ - "namespace": namespace, - "group": gvk.Group, - "version": gvk.Version, - "resource": "certificates", - "kind": gvk.Kind, - }, - } - - // Check if Certificate already exists to avoid concurrent creation - getRequest := states.GetRequest{ - ID: id, - Metadata: map[string]interface{}{ - "namespace": namespace, - "group": gvk.Group, - "version": gvk.Version, - "resource": "certificates", - "kind": gvk.Kind, - }, - } - - _, err := c.TargetsManager.StateProvider.Get(ctx, getRequest) - if err == nil { - // Certificate already exists, log and proceed to wait - tLog.InfofCtx(ctx, "V (Targets) : Certificate %s already exists, waiting for ready state", id) - } else { - // Certificate doesn't exist, create it - jsonData, _ := json.Marshal(upsertRequest) - tLog.InfofCtx(ctx, "V (Targets) : create certificate object - %s", jsonData) - _, err := c.TargetsManager.StateProvider.Upsert(ctx, upsertRequest) - if err != nil { - tLog.ErrorfCtx(ctx, "V (Targets) : onGetCert failed - %s", err.Error()) - return observ_utils.CloseSpanWithCOAResponse(span, v1alpha2.COAResponse{ - State: v1alpha2.InternalError, - Body: []byte(err.Error()), - }) - } + // Check if targets manager is available for cert provider access + if c.TargetsManager == nil { + tLog.ErrorCtx(ctx, "V (Targets) : onGetCert failed - targets manager not available") + return observ_utils.CloseSpanWithCOAResponse(span, v1alpha2.COAResponse{ + State: v1alpha2.InternalError, + Body: []byte("targets manager not available for certificate operations"), + }) } - // Wait for Certificate to be ready and secret to be created with correct type - err = c.waitForCertificateReady(ctx, id, namespace, secretName) - if err != nil { - tLog.ErrorfCtx(ctx, "V (Targets) : onGetCert failed waiting for certificate - %s", err.Error()) + certProvider := c.TargetsManager.GetCertProvider() + if certProvider == nil { + tLog.ErrorCtx(ctx, "V (Targets) : onGetCert failed - cert provider not available") return observ_utils.CloseSpanWithCOAResponse(span, v1alpha2.COAResponse{ State: v1alpha2.InternalError, - Body: []byte(err.Error()), + Body: []byte("certificate provider not available"), }) } - // Use the fixed secret name directly - tLog.InfofCtx(ctx, "V (Targets) : Using fixed secret name: %s", secretName) - - public, err := readSecretWithRetry(ctx, c.TargetsManager.SecretProvider, secretName, "tls.crt", coa_utils.EvaluationContext{Namespace: namespace}) + // Use the working certificate ID (target name) to get existing certificate + certResponse, err := certProvider.GetCert(ctx, id, namespace) if err != nil { - tLog.ErrorfCtx(ctx, "V (Targets) : onGetCert failed - %s", err.Error()) + tLog.ErrorfCtx(ctx, "V (Targets) : onGetCert failed to retrieve certificate for target %s - %s", id, err.Error()) return observ_utils.CloseSpanWithCOAResponse(span, v1alpha2.COAResponse{ - State: v1alpha2.InternalError, - Body: []byte(err.Error()), + State: v1alpha2.NotFound, + Body: []byte(fmt.Sprintf("working certificate not found for target %s: %s", id, err.Error())), }) } - private, err := readSecretWithRetry(ctx, c.TargetsManager.SecretProvider, secretName, "tls.key", coa_utils.EvaluationContext{Namespace: namespace}) - if err != nil { - tLog.ErrorfCtx(ctx, "V (Targets) : onGetCert failed - %s", err.Error()) + + if certResponse == nil { + tLog.ErrorfCtx(ctx, "V (Targets) : onGetCert failed - nil certificate response for target %s", id) return observ_utils.CloseSpanWithCOAResponse(span, v1alpha2.COAResponse{ - State: v1alpha2.InternalError, - Body: []byte(err.Error()), + State: v1alpha2.NotFound, + Body: []byte(fmt.Sprintf("working certificate not found for target %s", id)), }) } - public = strings.ReplaceAll(public, "\n", " ") - private = strings.ReplaceAll(private, "\n", " ") + // Format certificate data for remote agent (remove newlines as expected) + public := strings.ReplaceAll(certResponse.PublicKey, "\n", " ") + private := strings.ReplaceAll(certResponse.PrivateKey, "\n", " ") + + tLog.InfofCtx(ctx, "V (Targets) : successfully retrieved working certificate for target %s (expires: %s)", id, certResponse.ExpiresAt.Format(time.RFC3339)) return observ_utils.CloseSpanWithCOAResponse(span, v1alpha2.COAResponse{ State: v1alpha2.OK, diff --git a/packages/helm/symphony/files/symphony-api.json b/packages/helm/symphony/files/symphony-api.json index ac35ad524..15669d06b 100644 --- a/packages/helm/symphony/files/symphony-api.json +++ b/packages/helm/symphony/files/symphony-api.json @@ -351,6 +351,14 @@ "config": { "inCluster": true } + }, + "working-cert": { + "type": "providers.cert.k8scert", + "config": { + "inCluster": true, + "defaultDuration": "{{ .Values.targets.workingCertDuration | default \"2160h\" }}", + "renewBefore": "{{ .Values.targets.workingCertRenewBefore | default \"360h\" }}" + } } } } @@ -519,7 +527,8 @@ "providers.config": "mock-config", "providers.queue": "redis-queue", "providers.secret": "mock-secret", - "providers.keylock": "mem-keylock" + "providers.keylock": "mem-keylock", + "providers.cert": "working-cert" }, "providers": { "redis-state": { @@ -557,6 +566,14 @@ "mock-secret": { "type": "providers.secret.mock", "config": {} + }, + "working-cert": { + "type": "providers.cert.k8scert", + "config": { + "inCluster": true, + "defaultDuration": "{{ .Values.solution.workingCertDuration | default \"2160h\" }}", + "renewBefore": "{{ .Values.solution.workingCertRenewBefore | default \"360h\" }}" + } } } } From ff5b3bfe3f280a67f037bb0e8d9053d040508752 Mon Sep 17 00:00:00 2001 From: yanjiaxin534 Date: Fri, 19 Sep 2025 10:53:49 +0800 Subject: [PATCH 2/8] add get certificate retry logic --- remote-agent/bootstrap/bootstrap.ps1 | 49 ++++++++++++++++++++-------- remote-agent/bootstrap/bootstrap.sh | 24 ++++++++++++-- 2 files changed, 58 insertions(+), 15 deletions(-) diff --git a/remote-agent/bootstrap/bootstrap.ps1 b/remote-agent/bootstrap/bootstrap.ps1 index d85e406b3..9de9db8bb 100644 --- a/remote-agent/bootstrap/bootstrap.ps1 +++ b/remote-agent/bootstrap/bootstrap.ps1 @@ -177,20 +177,43 @@ if ($protocol -eq 'http') { } # HTTP mode: Get certificates from server - try { - $WebRequestParams = @{ - Uri = "$($endpoint)/targets/getcert/$($target_name)?namespace=$($namespace)&osPlatform=windows" - Method = 'Post' - Certificate = $cert - Headers = @{ "Content-Type" = "application/json"; "User-Agent" = "PowerShell-Debug" } + # get certificates from symphony server with retries + Write-Host "Begin to get certificates from symphony server" -ForegroundColor Blue + $maxRetries = 12 + $retryCount = 0 + $success = $false + $response = $null + $WebRequestParams = @{ + Uri = "$($endpoint)/targets/getcert/$($target_name)?namespace=$($namespace)&osPlatform=windows" + Method = 'Post' + Certificate = $cert + Headers = @{ "Content-Type" = "application/json"; "User-Agent" = "PowerShell-Debug" } + } + while ($retryCount -lt $maxRetries -and -not $success) { + try { + Write-Host "WebRequestParams:" -ForegroundColor Cyan + $WebRequestParams.GetEnumerator() | ForEach-Object { Write-Host (" {0}: {1}" -f $_.Key, $_.Value) } + $response = Invoke-WebRequest @WebRequestParams -Verbose + $jsonResponse = $response.Content | ConvertFrom-Json + if ($jsonResponse.public -and $jsonResponse.private -and $jsonResponse.public -ne "null" -and $jsonResponse.private -ne "null") { + $success = $true + Write-Host "Successfully got working certificates from symphony server" -ForegroundColor Green + break + } else { + Write-Host "Certificate not ready, retrying in 10 seconds... ($($retryCount+1)/$maxRetries)" -ForegroundColor Yellow + } + } catch { + Write-Host "Error: Failed to send request to endpoint. Retrying in 10 seconds... ($($retryCount+1)/$maxRetries)" -ForegroundColor Red + Write-Host "Error Message: $($_.Exception.Message)" -ForegroundColor Red + } + Start-Sleep -Seconds 10 + $retryCount++ + } + if (-not $success) { + Write-Host "Error: Failed to get certificate after $($maxRetries*10) seconds." -ForegroundColor Red + if ($response -and $response.Content) { + Write-Host "Last response: $($response.Content)" -ForegroundColor Red } - Write-Host "WebRequestParams:" -ForegroundColor Cyan - $WebRequestParams.GetEnumerator() | ForEach-Object { Write-Host (" {0}: {1}" -f $_.Key, $_.Value) } - $response = Invoke-WebRequest @WebRequestParams -Verbose - Write-Host "Successfully got working certificates from symphony server" -ForegroundColor Green - } catch { - Write-Host "Error: Failed to send request to endpoint." -ForegroundColor Red - Write-Host "Error Message: $($_.Exception.Message)" -ForegroundColor Red exit 1 } diff --git a/remote-agent/bootstrap/bootstrap.sh b/remote-agent/bootstrap/bootstrap.sh index d069b5790..16e94db8e 100755 --- a/remote-agent/bootstrap/bootstrap.sh +++ b/remote-agent/bootstrap/bootstrap.sh @@ -190,8 +190,28 @@ if [ "$protocol" = "http" ]; then curl_cmd="$curl_cmd -k" fi - # Get certificate - result=$(eval "$curl_cmd -X POST \"$bootstrapCertEndpoint\" -H \"Content-Type: application/json\"") + # Get certificate with retry (10s interval, max 120s) + retry_count=0 + max_retries=12 + result="" + while true; do + result=$(eval "$curl_cmd -X POST \"$bootstrapCertEndpoint\" -H \"Content-Type: application/json\"") + if [ $? -eq 0 ]; then + # Check if response contains valid public and private fields + public=$(echo $result | jq -r '.public') + private=$(echo $result | jq -r '.private') + if [ "$public" != "null" ] && [ "$private" != "null" ] && [ -n "$public" ] && [ -n "$private" ]; then + break + fi + fi + retry_count=$((retry_count+1)) + if [ $retry_count -ge $max_retries ]; then + echo -e "\e[31mError: Failed to get certificate after $((max_retries*10)) seconds. Response: $result\e[0m" + exit 1 + fi + echo -e "\e[33mCertificate not ready, retrying in 10 seconds... ($retry_count/$max_retries)\e[0m" + sleep 10 + done if [ $? -ne 0 ]; then echo -e "\e[31mError: Failed to call certificate endpoint. Please check the endpoint and try again.\e[0m" From f5d79a49e8c4aa79d7763017ff848f4550a382d3 Mon Sep 17 00:00:00 2001 From: yanjiaxin534 Date: Fri, 19 Sep 2025 11:40:45 +0800 Subject: [PATCH 3/8] fix json formate --- packages/helm/symphony/files/symphony-api.json | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/packages/helm/symphony/files/symphony-api.json b/packages/helm/symphony/files/symphony-api.json index 15669d06b..f426a431a 100644 --- a/packages/helm/symphony/files/symphony-api.json +++ b/packages/helm/symphony/files/symphony-api.json @@ -356,8 +356,8 @@ "type": "providers.cert.k8scert", "config": { "inCluster": true, - "defaultDuration": "{{ .Values.targets.workingCertDuration | default \"2160h\" }}", - "renewBefore": "{{ .Values.targets.workingCertRenewBefore | default \"360h\" }}" + "defaultDuration": "{{ .Values.targets.workingCertDuration | default "2160h" }}", + "renewBefore": "{{ .Values.targets.workingCertRenewBefore | default "360h" }}" } } } @@ -571,8 +571,8 @@ "type": "providers.cert.k8scert", "config": { "inCluster": true, - "defaultDuration": "{{ .Values.solution.workingCertDuration | default \"2160h\" }}", - "renewBefore": "{{ .Values.solution.workingCertRenewBefore | default \"360h\" }}" + "defaultDuration": "{{ .Values.solution.workingCertDuration | default "2160h" }}", + "renewBefore": "{{ .Values.solution.workingCertRenewBefore | default "360h" }}" } } } From f463629cbded696f58a7d8e4db34aeb8b3c9c586 Mon Sep 17 00:00:00 2001 From: yanjiaxin534 Date: Fri, 19 Sep 2025 13:00:05 +0800 Subject: [PATCH 4/8] fix param --- packages/helm/symphony/files/symphony-api.json | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/packages/helm/symphony/files/symphony-api.json b/packages/helm/symphony/files/symphony-api.json index f426a431a..e8a46a835 100644 --- a/packages/helm/symphony/files/symphony-api.json +++ b/packages/helm/symphony/files/symphony-api.json @@ -329,8 +329,8 @@ "type": "vendors.targets", "route": "targets", "properties": { - "workingCertDuration": "{{ .Values.targets.workingCertDuration | default "2160h" }}", - "workingCertRenewBefore": "{{ .Values.targets.workingCertRenewBefore | default "360h" }}" + "workingCertDuration": "{{ .Values.cert.certDurationTime | default \"4320h\" }}", + "workingCertRenewBefore": "{{ .Values.cert.certRenewBeforeTime | default \"360h\" }}" }, "managers": [ { @@ -356,8 +356,8 @@ "type": "providers.cert.k8scert", "config": { "inCluster": true, - "defaultDuration": "{{ .Values.targets.workingCertDuration | default "2160h" }}", - "renewBefore": "{{ .Values.targets.workingCertRenewBefore | default "360h" }}" + "defaultDuration": "{{ .Values.cert.certDurationTime | default \"4320h\" }}", + "renewBefore": "{{ .Values.cert.certRenewBeforeTime | default \"360h\" }}" } } } @@ -571,8 +571,8 @@ "type": "providers.cert.k8scert", "config": { "inCluster": true, - "defaultDuration": "{{ .Values.solution.workingCertDuration | default "2160h" }}", - "renewBefore": "{{ .Values.solution.workingCertRenewBefore | default "360h" }}" + "defaultDuration": "{{ .Values.cert.certDurationTime | default \"4320h\" }}", + "renewBefore": "{{ .Values.cert.certRenewBeforeTime | default \"360h\" }}" } } } From cc10d56f541115bc5841f81b5c74d17ad1f25249 Mon Sep 17 00:00:00 2001 From: yanjiaxin534 Date: Fri, 19 Sep 2025 14:01:36 +0800 Subject: [PATCH 5/8] fix \ --- packages/helm/symphony/files/symphony-api.json | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/packages/helm/symphony/files/symphony-api.json b/packages/helm/symphony/files/symphony-api.json index e8a46a835..f9efa63f4 100644 --- a/packages/helm/symphony/files/symphony-api.json +++ b/packages/helm/symphony/files/symphony-api.json @@ -329,8 +329,8 @@ "type": "vendors.targets", "route": "targets", "properties": { - "workingCertDuration": "{{ .Values.cert.certDurationTime | default \"4320h\" }}", - "workingCertRenewBefore": "{{ .Values.cert.certRenewBeforeTime | default \"360h\" }}" + "workingCertDuration": "{{ .Values.cert.certDurationTime | default "4320h" }}", + "workingCertRenewBefore": "{{ .Values.cert.certRenewBeforeTime | default "360h" }}" }, "managers": [ { @@ -356,8 +356,8 @@ "type": "providers.cert.k8scert", "config": { "inCluster": true, - "defaultDuration": "{{ .Values.cert.certDurationTime | default \"4320h\" }}", - "renewBefore": "{{ .Values.cert.certRenewBeforeTime | default \"360h\" }}" + "defaultDuration": "{{ .Values.cert.certDurationTime | default "4320h" }}", + "renewBefore": "{{ .Values.cert.certRenewBeforeTime | default "360h" }}" } } } @@ -571,8 +571,8 @@ "type": "providers.cert.k8scert", "config": { "inCluster": true, - "defaultDuration": "{{ .Values.cert.certDurationTime | default \"4320h\" }}", - "renewBefore": "{{ .Values.cert.certRenewBeforeTime | default \"360h\" }}" + "defaultDuration": "{{ .Values.cert.certDurationTime | default "4320h" }}", + "renewBefore": "{{ .Values.cert.certRenewBeforeTime | default "360h" }}" } } } From 6cce19e7ccbc488413f6bae1902d59c46520fd38 Mon Sep 17 00:00:00 2001 From: yanjiaxin534 Date: Fri, 19 Sep 2025 15:15:37 +0800 Subject: [PATCH 6/8] remove certmanager --- .../providers/cert/certmanager/certmanager.go | 50 ------------------- 1 file changed, 50 deletions(-) delete mode 100644 api/pkg/apis/v1alpha1/providers/cert/certmanager/certmanager.go diff --git a/api/pkg/apis/v1alpha1/providers/cert/certmanager/certmanager.go b/api/pkg/apis/v1alpha1/providers/cert/certmanager/certmanager.go deleted file mode 100644 index e93b72a6e..000000000 --- a/api/pkg/apis/v1alpha1/providers/cert/certmanager/certmanager.go +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Copyright (c) Microsoft Corporation. - * Licensed under the MIT license. - * SPDX-License-Identifier: MIT - */ - -package certmanager - -import ( - "context" - - "github.com/eclipse-symphony/symphony/api/pkg/apis/v1alpha1/providers/cert" - "github.com/eclipse-symphony/symphony/coa/pkg/apis/v1alpha2/contexts" - "github.com/eclipse-symphony/symphony/coa/pkg/apis/v1alpha2/providers" -) - -// OSSCMCertProvider is a placeholder cert provider that implements the cert.ICertProvider interface -// This is used for backward compatibility in the provider factory -type OSSCMCertProvider struct { - Config providers.IProviderConfig - Context *contexts.ManagerContext -} - -func (o *OSSCMCertProvider) Init(config providers.IProviderConfig) error { - o.Config = config - return nil -} - -func (o *OSSCMCertProvider) SetContext(ctx *contexts.ManagerContext) { - o.Context = ctx -} - -func (o *OSSCMCertProvider) CreateCert(ctx context.Context, req cert.CertRequest) error { - return nil // placeholder implementation -} - -func (o *OSSCMCertProvider) DeleteCert(ctx context.Context, targetName, namespace string) error { - return nil // placeholder implementation -} - -func (o *OSSCMCertProvider) GetCert(ctx context.Context, targetName, namespace string) (*cert.CertResponse, error) { - return nil, nil // placeholder implementation -} - -func (o *OSSCMCertProvider) RotateCert(ctx context.Context, targetName, namespace string) error { - return nil // placeholder implementation -} - -func (o *OSSCMCertProvider) CheckCertStatus(ctx context.Context, targetName, namespace string) (*cert.CertStatus, error) { - return nil, nil // placeholder implementation From e4f778c21c2c9c024d264ad1034182acc9cd1aa3 Mon Sep 17 00:00:00 2001 From: yanjiaxin534 Date: Fri, 19 Sep 2025 15:18:36 +0800 Subject: [PATCH 7/8] fix --- api/pkg/apis/v1alpha1/utils/symphony-api.go | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/api/pkg/apis/v1alpha1/utils/symphony-api.go b/api/pkg/apis/v1alpha1/utils/symphony-api.go index 6088161ec..0b64482b5 100644 --- a/api/pkg/apis/v1alpha1/utils/symphony-api.go +++ b/api/pkg/apis/v1alpha1/utils/symphony-api.go @@ -663,8 +663,17 @@ func CreateSymphonyDeploymentFromTarget(ctx context.Context, target model.Target scope = constants.DefaultScope } + // Check if this is a remote target by looking for remote-agent components + remoteTargetName := "" + for _, component := range target.Spec.Components { + if component.Type == "remote-agent" { + remoteTargetName = target.ObjectMeta.Name + break + } + } ret := model.DeploymentSpec{ - ObjectNamespace: namespace, + ObjectNamespace: namespace, + RemoteTargetName: remoteTargetName, } solution := model.SolutionState{ ObjectMeta: model.ObjectMeta{ From 8b2b2ac471f44f6577458fb38a8fc4b595788263 Mon Sep 17 00:00:00 2001 From: yanjiaxin534 Date: Fri, 19 Sep 2025 18:53:09 +0800 Subject: [PATCH 8/8] fix integration test1 --- .../scenarios/13.remoteAgent/get_helm.sh | 347 ++++++++ .../scenarios/13.remoteAgent/magefile.go | 2 +- .../13.remoteAgent/utils/test_helpers.go | 833 +----------------- 3 files changed, 384 insertions(+), 798 deletions(-) create mode 100644 test/integration/scenarios/13.remoteAgent/get_helm.sh diff --git a/test/integration/scenarios/13.remoteAgent/get_helm.sh b/test/integration/scenarios/13.remoteAgent/get_helm.sh new file mode 100644 index 000000000..3aa44daee --- /dev/null +++ b/test/integration/scenarios/13.remoteAgent/get_helm.sh @@ -0,0 +1,347 @@ +#!/usr/bin/env bash + +# Copyright The Helm Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# The install script is based off of the MIT-licensed script from glide, +# the package manager for Go: https://github.com/Masterminds/glide.sh/blob/master/get + +: ${BINARY_NAME:="helm"} +: ${USE_SUDO:="true"} +: ${DEBUG:="false"} +: ${VERIFY_CHECKSUM:="true"} +: ${VERIFY_SIGNATURES:="false"} +: ${HELM_INSTALL_DIR:="/usr/local/bin"} +: ${GPG_PUBRING:="pubring.kbx"} + +HAS_CURL="$(type "curl" &> /dev/null && echo true || echo false)" +HAS_WGET="$(type "wget" &> /dev/null && echo true || echo false)" +HAS_OPENSSL="$(type "openssl" &> /dev/null && echo true || echo false)" +HAS_GPG="$(type "gpg" &> /dev/null && echo true || echo false)" +HAS_GIT="$(type "git" &> /dev/null && echo true || echo false)" +HAS_TAR="$(type "tar" &> /dev/null && echo true || echo false)" + +# initArch discovers the architecture for this system. +initArch() { + ARCH=$(uname -m) + case $ARCH in + armv5*) ARCH="armv5";; + armv6*) ARCH="armv6";; + armv7*) ARCH="arm";; + aarch64) ARCH="arm64";; + x86) ARCH="386";; + x86_64) ARCH="amd64";; + i686) ARCH="386";; + i386) ARCH="386";; + esac +} + +# initOS discovers the operating system for this system. +initOS() { + OS=$(echo `uname`|tr '[:upper:]' '[:lower:]') + + case "$OS" in + # Minimalist GNU for Windows + mingw*|cygwin*) OS='windows';; + esac +} + +# runs the given command as root (detects if we are root already) +runAsRoot() { + if [ $EUID -ne 0 -a "$USE_SUDO" = "true" ]; then + sudo "${@}" + else + "${@}" + fi +} + +# verifySupported checks that the os/arch combination is supported for +# binary builds, as well whether or not necessary tools are present. +verifySupported() { + local supported="darwin-amd64\ndarwin-arm64\nlinux-386\nlinux-amd64\nlinux-arm\nlinux-arm64\nlinux-ppc64le\nlinux-s390x\nlinux-riscv64\nwindows-amd64\nwindows-arm64" + if ! echo "${supported}" | grep -q "${OS}-${ARCH}"; then + echo "No prebuilt binary for ${OS}-${ARCH}." + echo "To build from source, go to https://github.com/helm/helm" + exit 1 + fi + + if [ "${HAS_CURL}" != "true" ] && [ "${HAS_WGET}" != "true" ]; then + echo "Either curl or wget is required" + exit 1 + fi + + if [ "${VERIFY_CHECKSUM}" == "true" ] && [ "${HAS_OPENSSL}" != "true" ]; then + echo "In order to verify checksum, openssl must first be installed." + echo "Please install openssl or set VERIFY_CHECKSUM=false in your environment." + exit 1 + fi + + if [ "${VERIFY_SIGNATURES}" == "true" ]; then + if [ "${HAS_GPG}" != "true" ]; then + echo "In order to verify signatures, gpg must first be installed." + echo "Please install gpg or set VERIFY_SIGNATURES=false in your environment." + exit 1 + fi + if [ "${OS}" != "linux" ]; then + echo "Signature verification is currently only supported on Linux." + echo "Please set VERIFY_SIGNATURES=false or verify the signatures manually." + exit 1 + fi + fi + + if [ "${HAS_GIT}" != "true" ]; then + echo "[WARNING] Could not find git. It is required for plugin installation." + fi + + if [ "${HAS_TAR}" != "true" ]; then + echo "[ERROR] Could not find tar. It is required to extract the helm binary archive." + exit 1 + fi +} + +# checkDesiredVersion checks if the desired version is available. +checkDesiredVersion() { + if [ "x$DESIRED_VERSION" == "x" ]; then + # Get tag from release URL + local latest_release_url="https://get.helm.sh/helm-latest-version" + local latest_release_response="" + if [ "${HAS_CURL}" == "true" ]; then + latest_release_response=$( curl -L --silent --show-error --fail "$latest_release_url" 2>&1 || true ) + elif [ "${HAS_WGET}" == "true" ]; then + latest_release_response=$( wget "$latest_release_url" -q -O - 2>&1 || true ) + fi + TAG=$( echo "$latest_release_response" | grep '^v[0-9]' ) + if [ "x$TAG" == "x" ]; then + printf "Could not retrieve the latest release tag information from %s: %s\n" "${latest_release_url}" "${latest_release_response}" + exit 1 + fi + else + TAG=$DESIRED_VERSION + fi +} + +# checkHelmInstalledVersion checks which version of helm is installed and +# if it needs to be changed. +checkHelmInstalledVersion() { + if [[ -f "${HELM_INSTALL_DIR}/${BINARY_NAME}" ]]; then + local version=$("${HELM_INSTALL_DIR}/${BINARY_NAME}" version --template="{{ .Version }}") + if [[ "$version" == "$TAG" ]]; then + echo "Helm ${version} is already ${DESIRED_VERSION:-latest}" + return 0 + else + echo "Helm ${TAG} is available. Changing from version ${version}." + return 1 + fi + else + return 1 + fi +} + +# downloadFile downloads the latest binary package and also the checksum +# for that binary. +downloadFile() { + HELM_DIST="helm-$TAG-$OS-$ARCH.tar.gz" + DOWNLOAD_URL="https://get.helm.sh/$HELM_DIST" + CHECKSUM_URL="$DOWNLOAD_URL.sha256" + HELM_TMP_ROOT="$(mktemp -dt helm-installer-XXXXXX)" + HELM_TMP_FILE="$HELM_TMP_ROOT/$HELM_DIST" + HELM_SUM_FILE="$HELM_TMP_ROOT/$HELM_DIST.sha256" + echo "Downloading $DOWNLOAD_URL" + if [ "${HAS_CURL}" == "true" ]; then + curl -SsL "$CHECKSUM_URL" -o "$HELM_SUM_FILE" + curl -SsL "$DOWNLOAD_URL" -o "$HELM_TMP_FILE" + elif [ "${HAS_WGET}" == "true" ]; then + wget -q -O "$HELM_SUM_FILE" "$CHECKSUM_URL" + wget -q -O "$HELM_TMP_FILE" "$DOWNLOAD_URL" + fi +} + +# verifyFile verifies the SHA256 checksum of the binary package +# and the GPG signatures for both the package and checksum file +# (depending on settings in environment). +verifyFile() { + if [ "${VERIFY_CHECKSUM}" == "true" ]; then + verifyChecksum + fi + if [ "${VERIFY_SIGNATURES}" == "true" ]; then + verifySignatures + fi +} + +# installFile installs the Helm binary. +installFile() { + HELM_TMP="$HELM_TMP_ROOT/$BINARY_NAME" + mkdir -p "$HELM_TMP" + tar xf "$HELM_TMP_FILE" -C "$HELM_TMP" + HELM_TMP_BIN="$HELM_TMP/$OS-$ARCH/helm" + echo "Preparing to install $BINARY_NAME into ${HELM_INSTALL_DIR}" + runAsRoot cp "$HELM_TMP_BIN" "$HELM_INSTALL_DIR/$BINARY_NAME" + echo "$BINARY_NAME installed into $HELM_INSTALL_DIR/$BINARY_NAME" +} + +# verifyChecksum verifies the SHA256 checksum of the binary package. +verifyChecksum() { + printf "Verifying checksum... " + local sum=$(openssl sha1 -sha256 ${HELM_TMP_FILE} | awk '{print $2}') + local expected_sum=$(cat ${HELM_SUM_FILE}) + if [ "$sum" != "$expected_sum" ]; then + echo "SHA sum of ${HELM_TMP_FILE} does not match. Aborting." + exit 1 + fi + echo "Done." +} + +# verifySignatures obtains the latest KEYS file from GitHub main branch +# as well as the signature .asc files from the specific GitHub release, +# then verifies that the release artifacts were signed by a maintainer's key. +verifySignatures() { + printf "Verifying signatures... " + local keys_filename="KEYS" + local github_keys_url="https://raw.githubusercontent.com/helm/helm/main/${keys_filename}" + if [ "${HAS_CURL}" == "true" ]; then + curl -SsL "${github_keys_url}" -o "${HELM_TMP_ROOT}/${keys_filename}" + elif [ "${HAS_WGET}" == "true" ]; then + wget -q -O "${HELM_TMP_ROOT}/${keys_filename}" "${github_keys_url}" + fi + local gpg_keyring="${HELM_TMP_ROOT}/keyring.gpg" + local gpg_homedir="${HELM_TMP_ROOT}/gnupg" + mkdir -p -m 0700 "${gpg_homedir}" + local gpg_stderr_device="/dev/null" + if [ "${DEBUG}" == "true" ]; then + gpg_stderr_device="/dev/stderr" + fi + gpg --batch --quiet --homedir="${gpg_homedir}" --import "${HELM_TMP_ROOT}/${keys_filename}" 2> "${gpg_stderr_device}" + gpg --batch --no-default-keyring --keyring "${gpg_homedir}/${GPG_PUBRING}" --export > "${gpg_keyring}" + local github_release_url="https://github.com/helm/helm/releases/download/${TAG}" + if [ "${HAS_CURL}" == "true" ]; then + curl -SsL "${github_release_url}/helm-${TAG}-${OS}-${ARCH}.tar.gz.sha256.asc" -o "${HELM_TMP_ROOT}/helm-${TAG}-${OS}-${ARCH}.tar.gz.sha256.asc" + curl -SsL "${github_release_url}/helm-${TAG}-${OS}-${ARCH}.tar.gz.asc" -o "${HELM_TMP_ROOT}/helm-${TAG}-${OS}-${ARCH}.tar.gz.asc" + elif [ "${HAS_WGET}" == "true" ]; then + wget -q -O "${HELM_TMP_ROOT}/helm-${TAG}-${OS}-${ARCH}.tar.gz.sha256.asc" "${github_release_url}/helm-${TAG}-${OS}-${ARCH}.tar.gz.sha256.asc" + wget -q -O "${HELM_TMP_ROOT}/helm-${TAG}-${OS}-${ARCH}.tar.gz.asc" "${github_release_url}/helm-${TAG}-${OS}-${ARCH}.tar.gz.asc" + fi + local error_text="If you think this might be a potential security issue," + error_text="${error_text}\nplease see here: https://github.com/helm/community/blob/master/SECURITY.md" + local num_goodlines_sha=$(gpg --verify --keyring="${gpg_keyring}" --status-fd=1 "${HELM_TMP_ROOT}/helm-${TAG}-${OS}-${ARCH}.tar.gz.sha256.asc" 2> "${gpg_stderr_device}" | grep -c -E '^\[GNUPG:\] (GOODSIG|VALIDSIG)') + if [[ ${num_goodlines_sha} -lt 2 ]]; then + echo "Unable to verify the signature of helm-${TAG}-${OS}-${ARCH}.tar.gz.sha256!" + echo -e "${error_text}" + exit 1 + fi + local num_goodlines_tar=$(gpg --verify --keyring="${gpg_keyring}" --status-fd=1 "${HELM_TMP_ROOT}/helm-${TAG}-${OS}-${ARCH}.tar.gz.asc" 2> "${gpg_stderr_device}" | grep -c -E '^\[GNUPG:\] (GOODSIG|VALIDSIG)') + if [[ ${num_goodlines_tar} -lt 2 ]]; then + echo "Unable to verify the signature of helm-${TAG}-${OS}-${ARCH}.tar.gz!" + echo -e "${error_text}" + exit 1 + fi + echo "Done." +} + +# fail_trap is executed if an error occurs. +fail_trap() { + result=$? + if [ "$result" != "0" ]; then + if [[ -n "$INPUT_ARGUMENTS" ]]; then + echo "Failed to install $BINARY_NAME with the arguments provided: $INPUT_ARGUMENTS" + help + else + echo "Failed to install $BINARY_NAME" + fi + echo -e "\tFor support, go to https://github.com/helm/helm." + fi + cleanup + exit $result +} + +# testVersion tests the installed client to make sure it is working. +testVersion() { + set +e + HELM="$(command -v $BINARY_NAME)" + if [ "$?" = "1" ]; then + echo "$BINARY_NAME not found. Is $HELM_INSTALL_DIR on your "'$PATH?' + exit 1 + fi + set -e +} + +# help provides possible cli installation arguments +help () { + echo "Accepted cli arguments are:" + echo -e "\t[--help|-h ] ->> prints this help" + echo -e "\t[--version|-v ] . When not defined it fetches the latest release tag from the Helm CDN" + echo -e "\te.g. --version v3.0.0 or -v canary" + echo -e "\t[--no-sudo] ->> install without sudo" +} + +# cleanup temporary files to avoid https://github.com/helm/helm/issues/2977 +cleanup() { + if [[ -d "${HELM_TMP_ROOT:-}" ]]; then + rm -rf "$HELM_TMP_ROOT" + fi +} + +# Execution + +#Stop execution on any error +trap "fail_trap" EXIT +set -e + +# Set debug if desired +if [ "${DEBUG}" == "true" ]; then + set -x +fi + +# Parsing input arguments (if any) +export INPUT_ARGUMENTS="${@}" +set -u +while [[ $# -gt 0 ]]; do + case $1 in + '--version'|-v) + shift + if [[ $# -ne 0 ]]; then + export DESIRED_VERSION="${1}" + if [[ "$1" != "v"* ]]; then + echo "Expected version arg ('${DESIRED_VERSION}') to begin with 'v', fixing..." + export DESIRED_VERSION="v${1}" + fi + else + echo -e "Please provide the desired version. e.g. --version v3.0.0 or -v canary" + exit 0 + fi + ;; + '--no-sudo') + USE_SUDO="false" + ;; + '--help'|-h) + help + exit 0 + ;; + *) exit 1 + ;; + esac + shift +done +set +u + +initArch +initOS +verifySupported +checkDesiredVersion +if ! checkHelmInstalledVersion; then + downloadFile + verifyFile + installFile +fi +testVersion +cleanup diff --git a/test/integration/scenarios/13.remoteAgent/magefile.go b/test/integration/scenarios/13.remoteAgent/magefile.go index 0fa7da5ec..f69b777cf 100644 --- a/test/integration/scenarios/13.remoteAgent/magefile.go +++ b/test/integration/scenarios/13.remoteAgent/magefile.go @@ -18,7 +18,7 @@ import ( // Test config const ( - TEST_NAME = "Remote Agent Communication scenario (HTTP and MQTT)" + TEST_NAME = "Remote Agent Communication scenario " TEST_TIMEOUT = "30m" ) diff --git a/test/integration/scenarios/13.remoteAgent/utils/test_helpers.go b/test/integration/scenarios/13.remoteAgent/utils/test_helpers.go index fba0b21bd..9db3314dc 100644 --- a/test/integration/scenarios/13.remoteAgent/utils/test_helpers.go +++ b/test/integration/scenarios/13.remoteAgent/utils/test_helpers.go @@ -15,9 +15,7 @@ import ( "os" "os/exec" "path/filepath" - "runtime" "strings" - "syscall" "testing" "time" @@ -787,8 +785,8 @@ func WaitForResourceDeleted(t *testing.T, resourceType, resourceName, namespace for { select { case <-ctx.Done(): - t.Logf("Timeout waiting for %s %s/%s to be deleted", resourceType, namespace, resourceName) - return // Don't fail the test, just log and continue + t.Fatalf("Timeout waiting for %s %s/%s to be deleted", resourceType, namespace, resourceName) + return case <-ticker.C: cmd := exec.Command("kubectl", "get", resourceType, resourceName, "-n", namespace) err := cmd.Run() @@ -879,6 +877,13 @@ func WaitForTargetCreated(t *testing.T, targetName, namespace string, timeout ti // WaitForTargetReady waits for a Target to reach ready state func WaitForTargetReady(t *testing.T, targetName, namespace string, timeout time.Duration) { + WaitForTargetStatus(t, targetName, namespace, "Succeeded", timeout) +} + +// WaitForTargetStatus waits for a Target to reach the expected status +// If expectedStatus is "Succeeded", it will report error if timeout and status is not "Succeeded" +// If expectedStatus is "Failed", it will report error if timeout and status is not "Failed" +func WaitForTargetStatus(t *testing.T, targetName, namespace string, expectedStatus string, timeout time.Duration) { dyn, err := GetDynamicClient() require.NoError(t, err) @@ -888,6 +893,8 @@ func WaitForTargetReady(t *testing.T, targetName, namespace string, timeout time ticker := time.NewTicker(5 * time.Second) defer ticker.Stop() + t.Logf("Waiting for Target %s/%s to reach status: %s", namespace, targetName, expectedStatus) + // Check immediately first target, err := dyn.Resource(schema.GroupVersionResource{ Group: "fabric.symphony", @@ -903,13 +910,10 @@ func WaitForTargetReady(t *testing.T, targetName, namespace string, timeout time statusStr, found, err := unstructured.NestedString(provisioningStatus, "status") if err == nil && found { t.Logf("Target %s/%s current status: %s", namespace, targetName, statusStr) - if statusStr == "Succeeded" { - t.Logf("Target %s/%s is already ready", namespace, targetName) + if statusStr == expectedStatus { + t.Logf("Target %s/%s is already at expected status: %s", namespace, targetName, expectedStatus) return } - if statusStr == "Failed" { - t.Fatalf("Target %s/%s failed to deploy", namespace, targetName) - } } } } @@ -918,30 +922,9 @@ func WaitForTargetReady(t *testing.T, targetName, namespace string, timeout time for { select { case <-ctx.Done(): - // Before failing, let's check the current status one more time and provide better diagnostics - target, err := dyn.Resource(schema.GroupVersionResource{ - Group: "fabric.symphony", - Version: "v1", - Resource: "targets", - }).Namespace(namespace).Get(context.Background(), targetName, metav1.GetOptions{}) + // Report error if timeout and status doesn't match expected + t.Fatalf("Timeout waiting for Target %s/%s to reach status %s.", namespace, targetName, expectedStatus) - if err != nil { - t.Logf("Failed to get target %s/%s for final status check: %v", namespace, targetName, err) - } else { - status, found, err := unstructured.NestedMap(target.Object, "status") - if err == nil && found { - statusJSON, _ := json.MarshalIndent(status, "", " ") - t.Logf("Final target %s/%s status: %s", namespace, targetName, string(statusJSON)) - } - } - - // Also check Symphony service status - cmd := exec.Command("kubectl", "get", "pods", "-n", "default", "-l", "app.kubernetes.io/name=symphony") - if output, err := cmd.CombinedOutput(); err == nil { - t.Logf("Symphony pods at timeout:\n%s", string(output)) - } - - t.Fatalf("Timeout waiting for Target %s/%s to be ready", namespace, targetName) case <-ticker.C: target, err := dyn.Resource(schema.GroupVersionResource{ Group: "fabric.symphony", @@ -956,14 +939,11 @@ func WaitForTargetReady(t *testing.T, targetName, namespace string, timeout time if err == nil && found { statusStr, found, err := unstructured.NestedString(provisioningStatus, "status") if err == nil && found { - t.Logf("Target %s/%s status: %s", namespace, targetName, statusStr) - if statusStr == "Succeeded" { - t.Logf("Target %s/%s is ready", namespace, targetName) + t.Logf("Target %s/%s status: %s (expecting: %s)", namespace, targetName, statusStr, expectedStatus) + if statusStr == expectedStatus { + t.Logf("Target %s/%s reached expected status: %s", namespace, targetName, expectedStatus) return } - if statusStr == "Failed" { - t.Fatalf("Target %s/%s failed to deploy", namespace, targetName) - } } else { t.Logf("Target %s/%s: provisioningStatus.status not found", namespace, targetName) } @@ -996,8 +976,7 @@ func WaitForInstanceReady(t *testing.T, instanceName, namespace string, timeout for { select { case <-ctx.Done(): - t.Logf("Timeout waiting for Instance %s/%s to be ready", namespace, instanceName) - // Don't fail the test, just continue - Instance deployment might take long + t.Fatalf("Timeout waiting for Instance %s/%s to be ready", namespace, instanceName) return case <-ticker.C: instance, err := dyn.Resource(schema.GroupVersionResource{ @@ -1268,8 +1247,12 @@ func StartRemoteAgentProcess(t *testing.T, config TestConfig) *exec.Cmd { stderrTee := io.TeeReader(stderrPipe, &stderr) err = cmd.Start() - - require.NoError(t, err) + if err != nil { + t.Logf("Failed to start remote agent process: %v", err) + t.Logf("Stdout: %s", stdout.String()) + t.Logf("Stderr: %s", stderr.String()) + } + require.NoError(t, err, "Failed to start remote agent process") // Start real-time log streaming in background goroutines go streamProcessLogs(t, stdoutTee, "Remote Agent STDOUT") @@ -2332,7 +2315,7 @@ func WaitForSymphonyServiceReady(t *testing.T, timeout time.Duration) { // Check pod status cmd := exec.Command("kubectl", "get", "pods", "-n", "default", "-l", "app.kubernetes.io/name=symphony") if output, err := cmd.CombinedOutput(); err == nil { - t.Logf("Symphony pods status:\n%s", string(output)) + t.Fatalf("Symphony pods at timeout:\n%s", string(output)) } // Check service status @@ -2913,7 +2896,7 @@ func WaitForSystemdService(t *testing.T, serviceName string, timeout time.Durati for { select { case <-ctx.Done(): - t.Logf("Timeout waiting for systemd service %s to be active", serviceName) + t.Fatalf("Timeout waiting for systemd service %s to be active", serviceName) // Before failing, check the final status CheckSystemdServiceStatus(t, serviceName) // Also check if the process is actually running @@ -3168,7 +3151,7 @@ func SetupExternalMQTTBroker(t *testing.T, certs MQTTCertificatePaths, brokerPor configContent := fmt.Sprintf(` port %d cafile /mqtt/certs/%s -certfile /mqtt/certs/%s +certfile /mqtt/certs/%s keyfile /mqtt/certs/%s require_certificate true use_identity_as_username false @@ -3386,748 +3369,6 @@ func TestMQTTConnectivity(t *testing.T, brokerAddress string, brokerPort int, ce }() // Test basic connectivity (simplified - in real implementation you'd use MQTT client library) - conn, err := net.DialTimeout("tcp", fmt.Sprintf("localhost:%d", brokerPort), 10*time.Second) - if err == nil { - conn.Close() - t.Logf("MQTT broker connectivity test passed") - } else { - t.Logf("MQTT broker connectivity test failed: %v", err) - require.NoError(t, err) - } -} - -// StartSymphonyWithMQTTConfigAlternative starts Symphony with MQTT configuration using direct Helm commands -func StartSymphonyWithMQTTConfigAlternative(t *testing.T, brokerAddress string) { - helmValues := fmt.Sprintf("--set remoteAgent.remoteCert.used=true "+ - "--set remoteAgent.remoteCert.trustCAs.secretName=mqtt-ca "+ - "--set remoteAgent.remoteCert.trustCAs.secretKey=ca.crt "+ - "--set remoteAgent.remoteCert.subjects=MyRootCA;localhost "+ - "--set http.enabled=true "+ - "--set mqtt.enabled=true "+ - "--set mqtt.useTLS=true "+ - "--set mqtt.mqttClientCert.enabled=true "+ - "--set mqtt.mqttClientCert.secretName=mqtt-client-secret "+ - "--set mqtt.brokerAddress=%s "+ - "--set certManager.enabled=true "+ - "--set api.env.ISSUER_NAME=symphony-ca-issuer "+ - "--set api.env.SYMPHONY_SERVICE_NAME=symphony-service", brokerAddress) - - t.Logf("Deploying Symphony with MQTT configuration using direct Helm approach...") - - projectRoot := GetProjectRoot(t) - localenvDir := filepath.Join(projectRoot, "test", "localenv") - - // Step 1: Ensure minikube and prerequisites are ready - t.Logf("Step 1: Setting up minikube and prerequisites...") - cmd := exec.Command("mage", "cluster:ensureminikubeup") - cmd.Dir = localenvDir - if err := cmd.Run(); err != nil { - t.Logf("Warning: ensureminikubeup failed: %v", err) - } - - // Step 2: Load images with timeout - t.Logf("Step 2: Loading Docker images...") - ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute) - defer cancel() - cmd = exec.CommandContext(ctx, "mage", "cluster:load") - cmd.Dir = localenvDir - if err := cmd.Run(); err != nil { - t.Logf("Warning: image loading failed or timed out: %v", err) - } - - // Step 3: Deploy cert-manager and trust-manager - t.Logf("Step 3: Setting up cert-manager and trust-manager...") - ctx, cancel = context.WithTimeout(context.Background(), 3*time.Minute) - defer cancel() - cmd = exec.CommandContext(ctx, "kubectl", "apply", "-f", "https://github.com/cert-manager/cert-manager/releases/download/v1.15.3/cert-manager.yaml", "--wait") - if err := cmd.Run(); err != nil { - t.Logf("Warning: cert-manager setup failed or timed out: %v", err) - } - - // Wait for cert-manager webhook - cmd = exec.Command("kubectl", "wait", "--for=condition=ready", "pod", "-l", "app.kubernetes.io/component=webhook", "-n", "cert-manager", "--timeout=90s") - if err := cmd.Run(); err != nil { - t.Logf("Warning: cert-manager webhook not ready: %v", err) - } - - // Step 3b: Set up trust-manager - t.Logf("Step 3b: Setting up trust-manager...") - cmd = exec.Command("helm", "repo", "add", "jetstack", "https://charts.jetstack.io", "--force-update") - if err := cmd.Run(); err != nil { - t.Logf("Warning: failed to add jetstack repo: %v", err) - } - - cmd = exec.Command("helm", "upgrade", "trust-manager", "jetstack/trust-manager", "--install", "--namespace", "cert-manager", "--wait", "--set", "app.trust.namespace=cert-manager") - if err := cmd.Run(); err != nil { - t.Logf("Warning: trust-manager setup failed: %v", err) - } - - // Step 4: Deploy Symphony with a shorter timeout and without hanging - t.Logf("Step 4: Deploying Symphony Helm chart...") - chartPath := "../../packages/helm/symphony" - valuesFile1 := "../../packages/helm/symphony/values.yaml" - valuesFile2 := "symphony-ghcr-values.yaml" - - // Build the complete Helm command - helmCmd := []string{ - "helm", "upgrade", "ecosystem", chartPath, - "--install", "-n", "default", "--create-namespace", - "-f", valuesFile1, - "-f", valuesFile2, - "--set", "symphonyImage.tag=latest", - "--set", "paiImage.tag=latest", - "--timeout", "8m0s", - } - - // Add the MQTT-specific values - helmValuesList := strings.Split(helmValues, " ") - helmCmd = append(helmCmd, helmValuesList...) - - t.Logf("Running Helm command: %v", helmCmd) - - ctx, cancel = context.WithTimeout(context.Background(), 10*time.Minute) - defer cancel() - cmd = exec.CommandContext(ctx, helmCmd[0], helmCmd[1:]...) - cmd.Dir = localenvDir - - var stdout, stderr bytes.Buffer - cmd.Stdout = &stdout - cmd.Stderr = &stderr - - err := cmd.Run() - if err != nil { - t.Logf("Helm deployment stdout: %s", stdout.String()) - t.Logf("Helm deployment stderr: %s", stderr.String()) - t.Fatalf("Helm deployment failed: %v", err) - } - - t.Logf("Helm deployment completed successfully") - t.Logf("Helm stdout: %s", stdout.String()) - - // Step 5: Wait for certificates manually - t.Logf("Step 5: Waiting for Symphony certificates...") - for _, cert := range []string{"symphony-api-serving-cert", "symphony-serving-cert"} { - cmd = exec.Command("kubectl", "wait", "--for=condition=ready", "certificates", cert, "-n", "default", "--timeout=90s") - if err := cmd.Run(); err != nil { - t.Logf("Warning: certificate %s not ready: %v", cert, err) - } - } - - t.Logf("Symphony deployment with MQTT configuration completed successfully") -} - -// StartSymphonyWithMQTTConfig starts Symphony with MQTT configuration -func StartSymphonyWithMQTTConfig(t *testing.T, brokerAddress string) { - helmValues := fmt.Sprintf("--set remoteAgent.remoteCert.used=true "+ - "--set remoteAgent.remoteCert.trustCAs.secretName=mqtt-ca "+ - "--set remoteAgent.remoteCert.trustCAs.secretKey=ca.crt "+ - "--set remoteAgent.remoteCert.subjects=MyRootCA;localhost "+ - "--set http.enabled=true "+ - "--set mqtt.enabled=true "+ - "--set mqtt.useTLS=true "+ - "--set mqtt.mqttClientCert.enabled=true "+ - "--set mqtt.mqttClientCert.secretName=mqtt-client-secret"+ - "--set mqtt.brokerAddress=%s "+ - "--set certManager.enabled=true "+ - "--set api.env.ISSUER_NAME=symphony-ca-issuer "+ - "--set api.env.SYMPHONY_SERVICE_NAME=symphony-service", brokerAddress) - - t.Logf("Deploying Symphony with MQTT configuration...") - t.Logf("Command: mage cluster:deployWithSettings \"%s\"", helmValues) - - // Execute mage command from localenv directory - projectRoot := GetProjectRoot(t) - localenvDir := filepath.Join(projectRoot, "test", "localenv") - - t.Logf("StartSymphonyWithMQTTConfig: Project root: %s", projectRoot) - t.Logf("StartSymphonyWithMQTTConfig: Localenv dir: %s", localenvDir) - - // Check if localenv directory exists - if _, err := os.Stat(localenvDir); os.IsNotExist(err) { - t.Fatalf("Localenv directory does not exist: %s", localenvDir) - } - - // Pre-deployment checks to ensure cluster is ready - t.Logf("Performing pre-deployment cluster readiness checks...") - - // Check if required secrets exist - cmd := exec.Command("kubectl", "get", "secret", "mqtt-ca", "-n", "cert-manager") - if err := cmd.Run(); err != nil { - t.Logf("Warning: mqtt-ca secret not found in cert-manager namespace: %v", err) - } else { - t.Logf("mqtt-ca secret found in cert-manager namespace") - } - - cmd = exec.Command("kubectl", "get", "secret", "remote-agent-client-secret", "-n", "default") - if err := cmd.Run(); err != nil { - t.Logf("Warning: mqtt-client-secret not found in default namespace: %v", err) - } else { - t.Logf("mqtt-client-secret found in default namespace") - } - - // Check cluster resource usage before deployment - cmd = exec.Command("kubectl", "top", "nodes") - if output, err := cmd.CombinedOutput(); err == nil { - t.Logf("Pre-deployment node resource usage:\n%s", string(output)) - } - - // Try to start the deployment without timeout first to see if it responds - t.Logf("Starting MQTT deployment with reduced timeout (10 minutes) and better error handling...") - - // Reduce timeout back to 10 minutes but with better error handling - ctx, cancel := context.WithTimeout(context.Background(), 10*time.Minute) - defer cancel() - cmd = exec.CommandContext(ctx, "mage", "cluster:deploywithsettings", helmValues) - cmd.Dir = localenvDir - - var stdout, stderr bytes.Buffer - cmd.Stdout = &stdout - cmd.Stderr = &stderr - - // Start the command and monitor its progress - err := cmd.Start() - if err != nil { - t.Fatalf("Failed to start deployment command: %v", err) - } - - // Monitor the deployment progress in background - go func() { - ticker := time.NewTicker(30 * time.Second) - defer ticker.Stop() - - for { - select { - case <-ctx.Done(): - return - case <-ticker.C: - // Check if any pods are being created - monitorCmd := exec.Command("kubectl", "get", "pods", "-n", "default", "--no-headers") - if output, err := monitorCmd.Output(); err == nil { - podCount := len(strings.Split(strings.TrimSpace(string(output)), "\n")) - if string(output) != "" { - t.Logf("Deployment progress: %d pods in default namespace", podCount) - } - } - } - } - }() - - // Wait for the command to complete - err = cmd.Wait() - - if err != nil { - t.Logf("Symphony MQTT deployment stdout: %s", stdout.String()) - t.Logf("Symphony MQTT deployment stderr: %s", stderr.String()) - - // Check for common deployment issues and provide more specific error handling - stderrStr := stderr.String() - stdoutStr := stdout.String() - - // Check if the error is related to cert-manager webhook - if strings.Contains(stderrStr, "cert-manager-webhook") && - strings.Contains(stderrStr, "x509: certificate signed by unknown authority") { - t.Logf("Detected cert-manager webhook certificate issue, attempting to fix...") - FixCertManagerWebhook(t) - - // Retry the deployment after fixing cert-manager - t.Logf("Retrying Symphony MQTT deployment after cert-manager fix...") - retryCtx, retryCancel := context.WithTimeout(context.Background(), 10*time.Minute) - defer retryCancel() - - retryCmd := exec.CommandContext(retryCtx, "mage", "cluster:deploywithsettings", helmValues) - retryCmd.Dir = localenvDir - - var retryStdout, retryStderr bytes.Buffer - retryCmd.Stdout = &retryStdout - retryCmd.Stderr = &retryStderr - - retryErr := retryCmd.Run() - if retryErr != nil { - t.Logf("Retry MQTT deployment stdout: %s", retryStdout.String()) - t.Logf("Retry MQTT deployment stderr: %s", retryStderr.String()) - require.NoError(t, retryErr) - } else { - t.Logf("Symphony MQTT deployment succeeded after cert-manager fix") - err = nil // Clear the original error since retry succeeded - } - } else if strings.Contains(stderrStr, "context deadline exceeded") { - t.Logf("Deployment timed out after 10 minutes. This might indicate resource constraints or stuck resources.") - t.Logf("Checking cluster resources...") - - // Log some debug information about cluster state - debugCmd := exec.Command("kubectl", "get", "pods", "--all-namespaces") - if debugOutput, debugErr := debugCmd.CombinedOutput(); debugErr == nil { - t.Logf("Current cluster pods:\n%s", string(debugOutput)) - } - - debugCmd = exec.Command("kubectl", "get", "pvc", "--all-namespaces") - if debugOutput, debugErr := debugCmd.CombinedOutput(); debugErr == nil { - t.Logf("Current PVCs:\n%s", string(debugOutput)) - } - - debugCmd = exec.Command("kubectl", "top", "nodes") - if debugOutput, debugErr := debugCmd.CombinedOutput(); debugErr == nil { - t.Logf("Node resource usage at timeout:\n%s", string(debugOutput)) - } - - // Check if helm is stuck - debugCmd = exec.Command("helm", "list", "-n", "default") - if debugOutput, debugErr := debugCmd.CombinedOutput(); debugErr == nil { - t.Logf("Helm releases in default namespace:\n%s", string(debugOutput)) - } - } else if strings.Contains(stdoutStr, "Release \"ecosystem\" does not exist. Installing it now.") && - strings.Contains(stderrStr, "Error: context deadline exceeded") { - t.Logf("Helm installation timed out. This is likely due to resource constraints or dependency issues.") - } - } - require.NoError(t, err) - - t.Logf("Helm deployment command completed successfully") - t.Logf("Started Symphony with MQTT configuration") -} - -// CleanupExternalMQTTBroker cleans up external MQTT broker Docker container -func CleanupExternalMQTTBroker(t *testing.T) { - t.Logf("Cleaning up external MQTT broker Docker container...") - - // Stop and remove Docker container - exec.Command("docker", "stop", "mqtt-broker").Run() - exec.Command("docker", "rm", "mqtt-broker").Run() - - t.Logf("External MQTT broker cleanup completed") -} - -// CleanupMQTTBroker cleans up MQTT broker deployment -func CleanupMQTTBroker(t *testing.T) { - t.Logf("Cleaning up MQTT broker...") - - // Delete broker deployment and service - exec.Command("kubectl", "delete", "deployment", "mosquitto-broker", "-n", "default", "--ignore-not-found=true").Run() - exec.Command("kubectl", "delete", "service", "mosquitto-service", "-n", "default", "--ignore-not-found=true").Run() - exec.Command("kubectl", "delete", "configmap", "mosquitto-config", "-n", "default", "--ignore-not-found=true").Run() - exec.Command("kubectl", "delete", "secret", "mqtt-server-certs", "-n", "default", "--ignore-not-found=true").Run() - - t.Logf("MQTT broker cleanup completed") -} - -// CleanupMQTTCASecret cleans up MQTT CA secret from cert-manager namespace -func CleanupMQTTCASecret(t *testing.T, secretName string) { - cmd := exec.Command("kubectl", "delete", "secret", secretName, "-n", "cert-manager", "--ignore-not-found=true") - cmd.Run() - t.Logf("Cleaned up MQTT CA secret %s from cert-manager namespace", secretName) -} - -// CleanupMQTTClientSecret cleans up MQTT client certificate secret from namespace -func CleanupMQTTClientSecret(t *testing.T, namespace, secretName string) { - cmd := exec.Command("kubectl", "delete", "secret", secretName, "-n", namespace, "--ignore-not-found=true") - cmd.Run() - t.Logf("Cleaned up MQTT client secret %s from namespace %s", secretName, namespace) -} - -// StartRemoteAgentProcessComplete starts remote agent as a complete process with full lifecycle management -func StartRemoteAgentProcessComplete(t *testing.T, config TestConfig) *exec.Cmd { - // First build the binary - binaryPath := BuildRemoteAgentBinary(t, config) - - // Phase 1: Get working certificates using bootstrap cert (HTTP protocol only) - var workingCertPath, workingKeyPath string - if config.Protocol == "http" { - t.Logf("Using HTTP protocol, obtaining working certificates...") - workingCertPath, workingKeyPath = GetWorkingCertificates(t, config.BaseURL, config.TargetName, config.Namespace, - config.ClientCertPath, config.ClientKeyPath, filepath.Dir(config.ConfigPath)) - } else { - // For MQTT, use bootstrap certificates directly - workingCertPath = config.ClientCertPath - workingKeyPath = config.ClientKeyPath - } - - // Phase 2: Start remote agent with working certificates - args := []string{ - "-config", config.ConfigPath, - "-client-cert", workingCertPath, - "-client-key", workingKeyPath, - "-target-name", config.TargetName, - "-namespace", config.Namespace, - "-topology", config.TopologyPath, - "-protocol", config.Protocol, - } - - if config.CACertPath != "" { - args = append(args, "-ca-cert", config.CACertPath) - } - - // Log the complete binary execution command to test output - t.Logf("=== Remote Agent Process Execution Command ===") - t.Logf("Binary Path: %s", binaryPath) - t.Logf("Working Directory: %s", filepath.Join(config.ProjectRoot, "remote-agent", "bootstrap")) - t.Logf("Command Line: %s %s", binaryPath, strings.Join(args, " ")) - t.Logf("Full Arguments: %v", args) - t.Logf("===============================================") - - t.Logf("Starting remote agent process with arguments: %v", args) - cmd := exec.Command(binaryPath, args...) - // Set working directory to where the binary is located - cmd.Dir = filepath.Join(config.ProjectRoot, "remote-agent", "bootstrap") - - // Create pipes for real-time log streaming - stdoutPipe, err := cmd.StdoutPipe() - require.NoError(t, err, "Failed to create stdout pipe") - - stderrPipe, err := cmd.StderrPipe() - require.NoError(t, err, "Failed to create stderr pipe") - - // Also capture to buffers for final output - var stdout, stderr bytes.Buffer - stdoutTee := io.TeeReader(stdoutPipe, &stdout) - stderrTee := io.TeeReader(stderrPipe, &stderr) - - err = cmd.Start() - require.NoError(t, err, "Failed to start remote agent process") - - // Start real-time log streaming in background goroutines - go streamProcessLogs(t, stdoutTee, "Process STDOUT") - go streamProcessLogs(t, stderrTee, "Process STDERR") - - // Final output logging when process exits - go func() { - cmd.Wait() - if stdout.Len() > 0 { - t.Logf("Remote agent process final stdout: %s", stdout.String()) - } - if stderr.Len() > 0 { - t.Logf("Remote agent process final stderr: %s", stderr.String()) - } - }() - - // Setup automatic cleanup - t.Cleanup(func() { - CleanupRemoteAgentProcess(t, cmd) - }) - - t.Logf("Started remote agent process with PID: %d using working certificates", cmd.Process.Pid) - t.Logf("Remote agent process logs will be shown in real-time with [Process STDOUT] and [Process STDERR] prefixes") - return cmd -} - -// StartRemoteAgentProcessWithoutCleanup starts remote agent as a complete process but doesn't set up automatic cleanup -// This function is used for process testing where we test direct process communication. -// For HTTP protocol: we get the binary from server endpoint and run it directly as a process -// For other protocols: we build the binary locally and run it as a process -// The caller is responsible for calling CleanupRemoteAgentProcess when needed -func StartRemoteAgentProcessWithoutCleanup(t *testing.T, config TestConfig) *exec.Cmd { - var binaryPath string - - // For HTTP protocol, get binary from server endpoint instead of building locally - if config.Protocol == "http" { - t.Logf("HTTP protocol detected - getting binary from server endpoint...") - // For HTTP process testing, get the binary from the server endpoint - binaryPath = GetRemoteAgentBinaryFromServer(t, config) - } else { - // For MQTT and other protocols, build the binary locally - t.Logf("Non-HTTP protocol (%s) detected - building binary locally...", config.Protocol) - binaryPath = BuildRemoteAgentBinary(t, config) - } - - // Phase 1: Get working certificates using bootstrap cert (HTTP protocol only) - var workingCertPath, workingKeyPath string - if config.Protocol == "http" { - t.Logf("Using HTTP protocol, obtaining working certificates...") - workingCertPath, workingKeyPath = GetWorkingCertificates(t, config.BaseURL, config.TargetName, config.Namespace, - config.ClientCertPath, config.ClientKeyPath, filepath.Dir(config.ConfigPath)) - } else { - // For MQTT, use bootstrap certificates directly - workingCertPath = config.ClientCertPath - workingKeyPath = config.ClientKeyPath - } - - // Phase 2: Start remote agent with working certificates - args := []string{ - "-config", config.ConfigPath, - "-client-cert", workingCertPath, - "-client-key", workingKeyPath, - "-target-name", config.TargetName, - "-namespace", config.Namespace, - "-topology", config.TopologyPath, - "-protocol", config.Protocol, - } - - if config.CACertPath != "" { - args = append(args, "-ca-cert", config.CACertPath) - } - - // Log the complete binary execution command to test output - t.Logf("=== Remote Agent Process Execution Command ===") - t.Logf("Binary Path: %s", binaryPath) - t.Logf("Working Directory: %s", filepath.Join(config.ProjectRoot, "remote-agent", "bootstrap")) - t.Logf("Command Line: %s %s", binaryPath, strings.Join(args, " ")) - t.Logf("Full Arguments: %v", args) - t.Logf("===============================================") - - t.Logf("Starting remote agent process with arguments: %v", args) - cmd := exec.Command(binaryPath, args...) - // Set working directory to where the binary is located - cmd.Dir = filepath.Join(config.ProjectRoot, "remote-agent", "bootstrap") - - // Create pipes for real-time log streaming - stdoutPipe, err := cmd.StdoutPipe() - require.NoError(t, err, "Failed to create stdout pipe") - - stderrPipe, err := cmd.StderrPipe() - require.NoError(t, err, "Failed to create stderr pipe") - - // Also capture to buffers for final output - var stdout, stderr bytes.Buffer - stdoutTee := io.TeeReader(stdoutPipe, &stdout) - stderrTee := io.TeeReader(stderrPipe, &stderr) - - err = cmd.Start() - require.NoError(t, err, "Failed to start remote agent process") - - // Start real-time log streaming in background goroutines - go streamProcessLogs(t, stdoutTee, "Process STDOUT") - go streamProcessLogs(t, stderrTee, "Process STDERR") - - // Final output logging when process exits with enhanced error reporting - go func() { - exitErr := cmd.Wait() - exitTime := time.Now() - - if exitErr != nil { - t.Logf("Remote agent process exited with error at %v: %v", exitTime, exitErr) - if exitError, ok := exitErr.(*exec.ExitError); ok { - t.Logf("Process exit code: %d", exitError.ExitCode()) - } - } else { - t.Logf("Remote agent process exited normally at %v", exitTime) - } - - if stdout.Len() > 0 { - t.Logf("Remote agent process final stdout: %s", stdout.String()) - } - if stderr.Len() > 0 { - t.Logf("Remote agent process final stderr: %s", stderr.String()) - } - - // Log process runtime information - if cmd.ProcessState != nil { - t.Logf("Process runtime information - PID: %d, System time: %v, User time: %v", - cmd.Process.Pid, cmd.ProcessState.SystemTime(), cmd.ProcessState.UserTime()) - } - }() - - // NOTE: No automatic cleanup - caller must call CleanupRemoteAgentProcess manually - - t.Logf("Started remote agent process with PID: %d using working certificates", cmd.Process.Pid) - t.Logf("Remote agent process logs will be shown in real-time with [Process STDOUT] and [Process STDERR] prefixes") - return cmd -} - -// WaitForProcessHealthy waits for a process to be healthy and ready -func WaitForProcessHealthy(t *testing.T, cmd *exec.Cmd, timeout time.Duration) { - t.Logf("Waiting for remote agent process to be healthy...") - - ctx, cancel := context.WithTimeout(context.Background(), timeout) - defer cancel() - - ticker := time.NewTicker(2 * time.Second) - defer ticker.Stop() - - startTime := time.Now() - - for { - select { - case <-ctx.Done(): - t.Fatalf("Timeout waiting for process to be healthy after %v", timeout) - case <-ticker.C: - // Check if process is still running - if cmd.ProcessState != nil && cmd.ProcessState.Exited() { - t.Fatalf("Process exited unexpectedly: %s", cmd.ProcessState.String()) - } - - elapsed := time.Since(startTime) - t.Logf("Process health check: PID %d running for %v", cmd.Process.Pid, elapsed) - - // Process is considered healthy if it's been running for at least 10 seconds - // without exiting (indicating successful startup and connection) - if elapsed >= 10*time.Second { - t.Logf("Process is healthy and ready (running for %v)", elapsed) - return - } - } - } -} - -// CleanupRemoteAgentProcess cleans up the remote agent process -func CleanupRemoteAgentProcess(t *testing.T, cmd *exec.Cmd) { - if cmd == nil { - t.Logf("No process to cleanup (cmd is nil)") - return - } - - if cmd.Process == nil { - t.Logf("No process to cleanup (cmd.Process is nil)") - return - } - - pid := cmd.Process.Pid - t.Logf("Cleaning up remote agent process with PID: %d", pid) - - // Check if process is already dead - if cmd.ProcessState != nil && cmd.ProcessState.Exited() { - t.Logf("Process PID %d already exited: %s", pid, cmd.ProcessState.String()) - return - } - - // Try to check if process is still alive using signal 0 - if err := cmd.Process.Signal(syscall.Signal(0)); err != nil { - t.Logf("Process PID %d is not alive or not accessible: %v", pid, err) - return - } - - t.Logf("Process PID %d is alive, attempting graceful termination...", pid) - - // First try graceful termination with SIGTERM - if err := cmd.Process.Signal(syscall.SIGTERM); err != nil { - t.Logf("Failed to send SIGTERM to PID %d: %v", pid, err) - } else { - t.Logf("Sent SIGTERM to PID %d, waiting for graceful shutdown...", pid) - } - - // Wait for graceful shutdown with timeout - gracefulTimeout := 5 * time.Second - done := make(chan error, 1) - go func() { - done <- cmd.Wait() - }() - - select { - case err := <-done: - if err != nil { - t.Logf("Process PID %d exited with error: %v", pid, err) - } else { - t.Logf("Process PID %d exited gracefully", pid) - } - return - case <-time.After(gracefulTimeout): - t.Logf("Process PID %d did not exit gracefully within %v, force killing...", pid, gracefulTimeout) - } - - // Force kill if graceful shutdown failed - if err := cmd.Process.Kill(); err != nil { - t.Logf("Failed to kill process PID %d: %v", pid, err) - - // Last resort: try to kill using OS-specific methods - if runtime.GOOS == "windows" { - killCmd := exec.Command("taskkill", "/F", "/PID", fmt.Sprintf("%d", pid)) - if killErr := killCmd.Run(); killErr != nil { - t.Logf("Failed to force kill process PID %d using taskkill: %v", pid, killErr) - } else { - t.Logf("Force killed process PID %d using taskkill", pid) - } - } else { - killCmd := exec.Command("kill", "-9", fmt.Sprintf("%d", pid)) - if killErr := killCmd.Run(); killErr != nil { - t.Logf("Failed to force kill process PID %d using kill -9: %v", pid, killErr) - } else { - t.Logf("Force killed process PID %d using kill -9", pid) - } - } - } else { - t.Logf("Process PID %d force killed successfully", pid) - } - - // Final wait with timeout - select { - case <-done: - t.Logf("Process PID %d cleanup completed", pid) - case <-time.After(3 * time.Second): - t.Logf("Warning: Process PID %d cleanup timed out, but continuing", pid) - } -} - -// CleanupStaleRemoteAgentProcesses kills any stale remote-agent processes that might be left from previous test runs -func CleanupStaleRemoteAgentProcesses(t *testing.T) { - t.Logf("Checking for stale remote-agent processes...") - - var cmd *exec.Cmd - if runtime.GOOS == "windows" { - // Windows: Use tasklist and taskkill - cmd = exec.Command("tasklist", "/FI", "IMAGENAME eq remote-agent*", "/FO", "CSV") - } else { - // Unix/Linux: Use ps and grep - cmd = exec.Command("ps", "aux") - } - - output, err := cmd.Output() - if err != nil { - t.Logf("Could not list processes to check for stale remote-agent: %v", err) - return - } - - outputStr := string(output) - if runtime.GOOS == "windows" { - // Windows: Look for remote-agent processes - if strings.Contains(strings.ToLower(outputStr), "remote-agent") { - t.Logf("Found potential stale remote-agent processes on Windows, attempting cleanup...") - killCmd := exec.Command("taskkill", "/F", "/IM", "remote-agent*") - if err := killCmd.Run(); err != nil { - t.Logf("Failed to kill stale remote-agent processes: %v", err) - } else { - t.Logf("Killed stale remote-agent processes") - } - } - } else { - // Unix/Linux: Look for remote-agent processes - lines := strings.Split(outputStr, "\n") - for _, line := range lines { - if strings.Contains(line, "remote-agent") && !strings.Contains(line, "grep") { - t.Logf("Found stale remote-agent process: %s", line) - // Extract PID (second column in ps aux output) - fields := strings.Fields(line) - if len(fields) >= 2 { - pid := fields[1] - killCmd := exec.Command("kill", "-9", pid) - if err := killCmd.Run(); err != nil { - t.Logf("Failed to kill process PID %s: %v", pid, err) - } else { - t.Logf("Killed stale process PID %s", pid) - } - } - } - } - } - - t.Logf("Stale process cleanup completed") -} - -// TestMQTTConnectionWithClientCert tests MQTT connection using specific client certificates -// This function attempts to make an actual MQTT connection (not just TLS) to verify certificate authentication -func TestMQTTConnectionWithClientCert(t *testing.T, brokerAddress string, brokerPort int, caCertPath, clientCertPath, clientKeyPath string) bool { - t.Logf("=== TESTING MQTT CONNECTION WITH CLIENT CERT ===") - t.Logf("Broker: %s:%d", brokerAddress, brokerPort) - t.Logf("CA Cert: %s", caCertPath) - t.Logf("Client Cert: %s", clientCertPath) - t.Logf("Client Key: %s", clientKeyPath) - - // First verify all certificate files exist - if !FileExists(caCertPath) { - t.Logf("❌ CA certificate file does not exist: %s", caCertPath) - return false - } - if !FileExists(clientCertPath) { - t.Logf("❌ Client certificate file does not exist: %s", clientCertPath) - return false - } - if !FileExists(clientKeyPath) { - t.Logf("❌ Client key file does not exist: %s", clientKeyPath) - return false - } - - // Test TLS connection first - t.Logf("Step 1: Testing TLS connection...") - DebugTLSConnection(t, brokerAddress, brokerPort, caCertPath, clientCertPath, clientKeyPath) - - // For now, we'll use a simple TLS test since implementing full MQTT client would require additional dependencies // In a more complete implementation, you could use an MQTT client library like: // - github.com/eclipse/paho.mqtt.golang // - github.com/at-wat/mqtt-go @@ -4135,11 +3376,11 @@ func TestMQTTConnectionWithClientCert(t *testing.T, brokerAddress string, broker t.Logf("Step 2: Simulating MQTT client connection test...") // Use openssl s_client to test the connection more thoroughly - cmd := exec.Command("timeout", "10s", "openssl", "s_client", + cmd = exec.Command("timeout", "10s", "openssl", "s_client", "-connect", fmt.Sprintf("%s:%d", brokerAddress, brokerPort), - "-CAfile", caCertPath, - "-cert", clientCertPath, - "-key", clientKeyPath, + "-CAfile", certs.CACert, + "-cert", certs.RemoteAgentCert, + "-key", certs.RemoteAgentKey, "-verify_return_error", "-quiet") @@ -4148,19 +3389,17 @@ func TestMQTTConnectionWithClientCert(t *testing.T, brokerAddress string, broker cmd.Stderr = &stderr cmd.Stdin = strings.NewReader("CONNECT\n") - err := cmd.Run() + err = cmd.Run() if err != nil { t.Logf("❌ MQTT/TLS connection test failed: %v", err) t.Logf("stdout: %s", stdout.String()) t.Logf("stderr: %s", stderr.String()) - return false + } else { + t.Logf("✅ MQTT/TLS connection test passed") + t.Logf("Connection output: %s", stdout.String()) } - t.Logf("✅ MQTT/TLS connection test passed") - t.Logf("Connection output: %s", stdout.String()) - t.Logf("=== MQTT CONNECTION TEST COMPLETED ===") - return true } // VerifyTargetTopologyUpdate verifies that a target has been updated with topology information