diff --git a/Makefile b/Makefile index 528bb0ce7..be2a4467d 100644 --- a/Makefile +++ b/Makefile @@ -31,6 +31,7 @@ export LOKI_OPERATOR_CHANNEL?=stable-6.4 IMAGE_LOGGING_VECTOR?=quay.io/openshift-logging/vector:v0.54.0 IMAGE_LOGFILEMETRICEXPORTER?=quay.io/openshift-logging/log-file-metric-exporter:latest IMAGE_LOGGING_EVENTROUTER?=quay.io/openshift-logging/eventrouter:v0.5.0 +IMAGE_TLS_SCANNER?=quay.io/jcantril/tls-scanner:latest REPLICAS?=0 export E2E_TEST_EXCLUDES?=flowcontrol @@ -224,6 +225,7 @@ test-env: ## Echo test environment, useful for running tests outside of the Make @echo \ RELATED_IMAGE_VECTOR=$(IMAGE_LOGGING_VECTOR) \ RELATED_IMAGE_LOG_FILE_METRIC_EXPORTER=$(IMAGE_LOGFILEMETRICEXPORTER) \ + IMAGE_TLS_SCANNER=$(IMAGE_TLS_SCANNER) \ .PHONY: test-functional test-functional: test-functional-benchmarker-vector @@ -320,6 +322,7 @@ test-e2e: $(JUNITREPORT) RELATED_IMAGE_VECTOR=$(IMAGE_LOGGING_VECTOR) \ RELATED_IMAGE_LOG_FILE_METRIC_EXPORTER=$(IMAGE_LOGFILEMETRICEXPORTER) \ IMAGE_LOGGING_EVENTROUTER=$(IMAGE_LOGGING_EVENTROUTER) \ + IMAGE_TLS_SCANNER=$(IMAGE_TLS_SCANNER) \ EXCLUDES="$(E2E_TEST_EXCLUDES)" CLF_EXCLUDES="$(CLF_TEST_EXCLUDES)" LOG_LEVEL=3 hack/test-e2e-olm.sh .PHONY: test-e2e-local @@ -329,6 +332,7 @@ test-e2e-local: $(JUNITREPORT) deploy-image RELATED_IMAGE_VECTOR=$(IMAGE_LOGGING_VECTOR) \ RELATED_IMAGE_LOG_FILE_METRIC_EXPORTER=$(IMAGE_LOGFILEMETRICEXPORTER) \ IMAGE_LOGGING_EVENTROUTER=$(IMAGE_LOGGING_EVENTROUTER) \ + IMAGE_TLS_SCANNER=$(IMAGE_TLS_SCANNER) \ CLF_INCLUDES=$(CLF_TEST_INCLUDES) \ EXCLUDES=$(E2E_TEST_EXCLUDES) \ IMAGE_CLUSTER_LOGGING_OPERATOR=image-registry.openshift-image-registry.svc:5000/openshift/origin-cluster-logging-operator:$(CURRENT_BRANCH) \ diff --git a/bundle/manifests/cluster-logging.clusterserviceversion.yaml b/bundle/manifests/cluster-logging.clusterserviceversion.yaml index 849cd8c3f..89dce6f35 100644 --- a/bundle/manifests/cluster-logging.clusterserviceversion.yaml +++ b/bundle/manifests/cluster-logging.clusterserviceversion.yaml @@ -2539,6 +2539,7 @@ spec: kubectl.kubernetes.io/default-container: cluster-logging-operator target.workload.openshift.io/management: '{"effect": "PreferredDuringScheduling"}' labels: + app.kubernetes.io/name: cluster-logging-operator control-plane: controller-manager name: cluster-logging-operator spec: diff --git a/config/manager/manager.yaml b/config/manager/manager.yaml index ded083b6b..0a6016197 100644 --- a/config/manager/manager.yaml +++ b/config/manager/manager.yaml @@ -13,6 +13,7 @@ spec: kubectl.kubernetes.io/default-container: cluster-logging-operator target.workload.openshift.io/management: '{"effect": "PreferredDuringScheduling"}' labels: + app.kubernetes.io/name: cluster-logging-operator name: cluster-logging-operator control-plane: controller-manager spec: diff --git a/olm_deploy/scripts/env.sh b/olm_deploy/scripts/env.sh index 93c4058e4..2ccc403d7 100755 --- a/olm_deploy/scripts/env.sh +++ b/olm_deploy/scripts/env.sh @@ -4,7 +4,7 @@ set -eou pipefail LOGGING_VERSION=${LOGGING_VERSION:-6.1} LOGGING_VECTOR_VERSION=${LOGGING_VECTOR_VERSION:-v0.54.0} -LOGGING_LOG_FILE_METRIC_EXPORTER_VERSION=${LOGGING_LOG_FILE_METRIC_EXPORTER_VERSION:-6.1} +LOGGING_LOG_FILE_METRIC_EXPORTER_VERSION=${LOGGING_LOG_FILE_METRIC_EXPORTER_VERSION:-latest} LOGGING_IS=${LOGGING_IS:-openshift-logging} export IMAGE_CLUSTER_LOGGING_OPERATOR_REGISTRY=${IMAGE_CLUSTER_LOGGING_OPERATOR_REGISTRY:-quay.io/${LOGGING_IS}/cluster-logging-operator-registry:${LOGGING_VERSION}} diff --git a/test/e2e/operator/tls/e2e_test.go b/test/e2e/operator/tls/e2e_test.go new file mode 100644 index 000000000..719019972 --- /dev/null +++ b/test/e2e/operator/tls/e2e_test.go @@ -0,0 +1,217 @@ +package tls + +import ( + "context" + "fmt" + + clolog "github.com/ViaQ/logerr/v2/log/static" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + configv1 "github.com/openshift/api/config/v1" + obs "github.com/openshift/cluster-logging-operator/api/observability/v1" + "github.com/openshift/cluster-logging-operator/internal/constants" + internalruntime "github.com/openshift/cluster-logging-operator/internal/runtime" + obsruntime "github.com/openshift/cluster-logging-operator/internal/runtime/observability" + internaltls "github.com/openshift/cluster-logging-operator/internal/tls" + "github.com/openshift/cluster-logging-operator/internal/utils/sets" + "github.com/openshift/cluster-logging-operator/test/client" + framework "github.com/openshift/cluster-logging-operator/test/framework/e2e" + tlsscanner "github.com/openshift/cluster-logging-operator/test/framework/e2e/tls" + "github.com/openshift/cluster-logging-operator/test/helpers/loki" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + crclient "sigs.k8s.io/controller-runtime/pkg/client" +) + +var _ = Describe("[E2E][Operator][TLS] TLS Scanner Validation", func() { + const ( + forwarderName = "tls-test-collector" + ) + + var ( + e2e *framework.E2ETestFramework + err error + k8sClient crclient.Client + profileSpec configv1.TLSProfileSpec + ) + + BeforeEach(func() { + e2e = framework.NewE2ETestFramework() + + // Create a controller-runtime client with configv1 scheme for fetching APIServer TLS profile + // This matches the production scheme setup in cmd/main.go + scheme := runtime.NewScheme() + Expect(configv1.AddToScheme(scheme)).To(Succeed()) + k8sClient, err = crclient.New(e2e.RestConfig, crclient.Options{Scheme: scheme}) + Expect(err).To(BeNil()) + + tlsProfile, err := internaltls.FetchAPIServerTlsProfile(k8sClient) + Expect(err).To(BeNil(), "Failed to fetch APIServer TLS profile") + + By("Fetching the cluster TLS profile") + profileSpec = internaltls.GetClusterTLSProfileSpec(tlsProfile) + clolog.Info("Cluster TLS Profile", "spec", profileSpec) + }) + + AfterEach(func() { + e2e.Cleanup() + }, framework.DefaultCleanUpTimeout) + + var ( + runTlsScanner = func(e2e *framework.E2ETestFramework, scanNS string) (results []tlsscanner.ScanResult, err error) { + By("Deploying TLS Scanner") + scanner := tlsscanner.NewScanner(e2e.KubeClient, &e2e.CleanupFns) + e2e.AddCleanup(func() error { + return e2e.KubeClient.BatchV1().Jobs(scanNS).Delete(context.TODO(), tlsscanner.Name, metav1.DeleteOptions{}) + }) + job, err := scanner.Deploy(scanNS, scanNS) + Expect(err).To(BeNil(), "Failed to deploy TLS Scanner") + Expect(job).NotTo(BeNil()) + + By("Waiting for TLS Scanner to complete") + err = scanner.WaitForCompletion(job, tlsscanner.JobTimeout) + Expect(err).To(BeNil(), "TLS Scanner job did not complete successfully. It may not have matched any components to scan") + + By("Retrieving TLS scan results") + results, err = scanner.GetResults(job) + Expect(err).To(BeNil(), "Failed to retrieve TLS scan results") + Expect(results).NotTo(BeEmpty(), "TLS Scanner returned no results") + return results, err + } + + verifyResultsHaveComponents = func(results []tlsscanner.ScanResult, epxComponents ...string) { + components := sets.NewString() + for _, result := range results { + components.Insert(result.Component) + } + Expect(components.List()).To(ConsistOf(epxComponents)) + } + ) + + Context("when inspecting deployed ClusterLogForwarder", func() { + + var ( + testNS string + clf *obs.ClusterLogForwarder + l *loki.Receiver + sa *corev1.ServiceAccount + ) + + BeforeEach(func() { + + testNS = e2e.CreateTestNamespace(func(namespace *corev1.Namespace) { + namespace.Labels = map[string]string{ + "pod-security.kubernetes.io/audit": "privileged", + "pod-security.kubernetes.io/enforce": "privileged", + "pod-security.kubernetes.io/warn": "privileged", + } + }) + + // Create service account for the collector with permissions for application and infrastructure logs + sa, err = e2e.BuildAuthorizationFor(testNS, forwarderName). + AllowClusterRole(framework.ClusterRoleCollectApplicationLogs). + AllowClusterRole(framework.ClusterRoleCollectInfrastructureLogs). + Create() + Expect(err).To(BeNil()) + + // Deploy Loki receiver + l = loki.NewReceiver(testNS, "loki-server") + Expect(l.Create(client.Get())).To(Succeed()) + + // Deploy ClusterLogForwarder with both default inputs and receiver inputs + // to ensure all input receiver types are running for TLS scanning + clf = obsruntime.NewClusterLogForwarder(testNS, forwarderName, internalruntime.Initialize, func(clf *obs.ClusterLogForwarder) { + clf.Spec.ServiceAccount.Name = sa.Name + clf.Spec.Inputs = []obs.InputSpec{ + { + Name: "http-receiver", + Type: obs.InputTypeReceiver, + Receiver: &obs.ReceiverSpec{ + Type: obs.ReceiverTypeHTTP, + Port: 8080, + HTTP: &obs.HTTPReceiver{ + Format: obs.HTTPReceiverFormatKubeAPIAudit, + }, + }, + }, + { + Name: "syslog-receiver", + Type: obs.InputTypeReceiver, + Receiver: &obs.ReceiverSpec{ + Type: obs.ReceiverTypeSyslog, + Port: 10514, + }, + }, + } + clf.Spec.Outputs = []obs.OutputSpec{ + { + Name: "loki-output", + Type: obs.OutputTypeLoki, + Loki: &obs.Loki{ + URLSpec: obs.URLSpec{ + URL: l.InternalURL("").String(), + }, + }, + }, + } + clf.Spec.Pipelines = []obs.PipelineSpec{ + { + Name: "test-app", + InputRefs: []string{string(obs.InputTypeApplication)}, + OutputRefs: []string{"loki-output"}, + }, + { + Name: "test-receivers", + InputRefs: []string{"http-receiver", "syslog-receiver"}, + OutputRefs: []string{"loki-output"}, + }, + } + }) + + if err := e2e.CreateObservabilityClusterLogForwarder(clf); err != nil { + Fail(fmt.Sprintf("Unable to create ClusterLogForwarder: %v", err)) + } + + if err := e2e.WaitForDaemonSet(clf.Namespace, clf.Name); err != nil { + Fail(fmt.Sprintf("Failed waiting for collector DaemonSet to be ready: %v", err)) + } + }) + + It("should validate the TLS server configurations match the cluster TLS profile", func() { + results, _ := runTlsScanner(e2e, testNS) + clolog.Info("TLS Scanner found endpoints", "count", len(results)) + clolog.V(2).Info("TLS endpoint scanned", "result", results) + verifyResultsHaveComponents(results, constants.VectorName) + + By("Validating TLS compliance") + err = tlsscanner.ValidateCompliance(results, profileSpec) + Expect(err).To(BeNil(), "TLS compliance validation failed") + }) + }) + + Context("when inspecting the operator and LogFileMetricExporter", func() { + It("should validate the TLS configurations matches the cluster TLS profile", func() { + + // Deploy LFME + lfme := internalruntime.NewLogFileMetricExporter(constants.OpenshiftNS, constants.SingletonName) + e2e.AddCleanup(func() error { + return e2e.KubeClient.AppsV1().DaemonSets(constants.OpenshiftNS).Delete(context.TODO(), lfme.Name, metav1.DeleteOptions{}) + }) + if err := e2e.Create(lfme); err != nil { + Fail(fmt.Sprintf("Unable to create LogFileMetricExporter: %v", err)) + } + if err := e2e.WaitForDaemonSet(lfme.Namespace, constants.LogfilesmetricexporterName); err != nil { + Fail(fmt.Sprintf("Failed waiting for lfme DaemonSet to be ready: %v", err)) + } + + results, _ := runTlsScanner(e2e, constants.OpenshiftNS) + clolog.Info("TLS Scanner found endpoints", "count", len(results)) + verifyResultsHaveComponents(results, constants.ClusterLoggingOperator, constants.LogfilesmetricexporterName) + + By("Validating TLS compliance") + err = tlsscanner.ValidateCompliance(results, profileSpec) + Expect(err).To(BeNil(), "TLS compliance validation failed") + }) + }) +}) diff --git a/test/e2e/operator/tls/suite_test.go b/test/e2e/operator/tls/suite_test.go new file mode 100644 index 000000000..cd0ec8e97 --- /dev/null +++ b/test/e2e/operator/tls/suite_test.go @@ -0,0 +1,13 @@ +package tls + +import ( + "testing" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +func TestSuite(t *testing.T) { + RegisterFailHandler(Fail) + RunSpecs(t, "[e2e][operator][tls] Suite") +} diff --git a/test/framework/e2e/framework.go b/test/framework/e2e/framework.go index b81baf16c..32a43ef43 100644 --- a/test/framework/e2e/framework.go +++ b/test/framework/e2e/framework.go @@ -177,16 +177,22 @@ func (tc *E2ETestFramework) DeployCURLLogGeneratorWithNamespaceAndEndpoint(names return client.Get().WaitFor(pod, client.PodRunning) } -func (tc *E2ETestFramework) CreateTestNamespace() string { - return tc.CreateTestNamespaceWithPrefix("clo-test") +// CreateTestNamespace with a default prefix of 'clo-test'. Optionally include a list of functions to modify the object +// before it is created +func (tc *E2ETestFramework) CreateTestNamespace(visitors ...func(*corev1.Namespace)) string { + return tc.CreateTestNamespaceWithPrefix("clo-test", visitors...) } -func (tc *E2ETestFramework) CreateTestNamespaceWithPrefix(prefix string) string { +// CreateTestNamespaceWithPrefix using the given prefix. Optionally include a list of functions to modify the object +// before it is created +func (tc *E2ETestFramework) CreateTestNamespaceWithPrefix(prefix string, visitors ...func(*corev1.Namespace)) string { name := fmt.Sprintf("%s-%d", prefix, rand.Intn(10000)) //nolint:gosec - return tc.CreateNamespace(name) + return tc.CreateNamespace(name, visitors...) } -func (tc *E2ETestFramework) CreateNamespace(name string) string { +// CreateNamespace using the given name. Optionally include a list of functions to modify the object +// before it is created +func (tc *E2ETestFramework) CreateNamespace(name string, visitors ...func(*corev1.Namespace)) string { if value, found := os.LookupEnv("GENERATOR_NS"); found { name = value } else { @@ -200,6 +206,9 @@ func (tc *E2ETestFramework) CreateNamespace(name string) string { Name: name, }, } + for _, visitor := range visitors { + visitor(namespace) + } if err := tc.Test.Recreate(namespace); err != nil { clolog.Error(err, "Error") diff --git a/test/framework/e2e/splunk.go b/test/framework/e2e/splunk.go index 405b5b7e7..f7e2b79de 100644 --- a/test/framework/e2e/splunk.go +++ b/test/framework/e2e/splunk.go @@ -3,9 +3,10 @@ package e2e import ( "context" "fmt" - "github.com/openshift/cluster-logging-operator/internal/runtime" "net/url" + "github.com/openshift/cluster-logging-operator/internal/runtime" + "github.com/openshift/cluster-logging-operator/internal/constants" "github.com/openshift/cluster-logging-operator/internal/utils" "github.com/openshift/cluster-logging-operator/test/helpers/oc" diff --git a/test/framework/e2e/tls/scanner.go b/test/framework/e2e/tls/scanner.go new file mode 100644 index 000000000..b994ac2ea --- /dev/null +++ b/test/framework/e2e/tls/scanner.go @@ -0,0 +1,451 @@ +package tls + +import ( + "context" + "encoding/json" + "fmt" + "io" + "os" + "strings" + "time" + + clolog "github.com/ViaQ/logerr/v2/log/static" + configv1 "github.com/openshift/api/config/v1" + "github.com/openshift/cluster-logging-operator/internal/constants" + "github.com/openshift/cluster-logging-operator/internal/runtime" + internaltls "github.com/openshift/cluster-logging-operator/internal/tls" + "github.com/openshift/cluster-logging-operator/internal/utils" + batchv1 "k8s.io/api/batch/v1" + corev1 "k8s.io/api/core/v1" + rbacv1 "k8s.io/api/rbac/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/wait" + "k8s.io/client-go/kubernetes" +) + +const ( + // JobTimeout is the timeout for the TLS Scanner Job to complete + JobTimeout = 10 * time.Minute + // ImageEnvVar is the environment variable for the TLS Scanner image + ImageEnvVar = "IMAGE_TLS_SCANNER" + + Name = "tls-scanner" +) + +var ( + Components = strings.Join([]string{constants.ClusterLoggingOperator, "eventrouter", constants.LogfilesmetricexporterName, constants.VectorName}, ",") +) + +// Scanner manages TLS Scanner deployment and result retrieval +type Scanner struct { + KubeClient *kubernetes.Clientset + CleanupFns *[]func() error +} + +// NewScanner creates a new TLS Scanner instance +func NewScanner(kubeClient *kubernetes.Clientset, cleanupFns *[]func() error) *Scanner { + return &Scanner{ + KubeClient: kubeClient, + CleanupFns: cleanupFns, + } +} + +// GetImage returns the TLS Scanner image to use +func GetImage() (image string) { + if image = os.Getenv(ImageEnvVar); image == "" { + clolog.Info("No tls-scanner image provided", "variable", ImageEnvVar) + os.Exit(1) + } + return image +} + +// Deploy deploys the TLS Scanner as a Job to scan the target namespace +func (s *Scanner) Deploy(scannerNamespace, targetNamespace string) (*batchv1.Job, error) { + clolog.Info("Deploying TLS Scanner", "scannerNamespace", scannerNamespace, "targetNamespace", targetNamespace) + + // Create ServiceAccount for the scanner + sa := runtime.NewServiceAccount(scannerNamespace, "tls-scanner") + if _, err := s.KubeClient.CoreV1().ServiceAccounts(scannerNamespace).Create(context.TODO(), sa, metav1.CreateOptions{}); err != nil { + if !apierrors.IsAlreadyExists(err) { + return nil, fmt.Errorf("failed to create ServiceAccount: %w", err) + } + } + s.addCleanup(func() error { + return s.KubeClient.CoreV1().ServiceAccounts(scannerNamespace).Delete(context.TODO(), sa.Name, metav1.DeleteOptions{}) + }) + + // Create ClusterRoleBindings + subject := rbacv1.Subject{ + Kind: "ServiceAccount", + Name: sa.Name, + Namespace: scannerNamespace, + } + for _, roleName := range []string{"cluster-reader", "system:openshift:scc:privileged", "dedicated-admins-project"} { + crb := runtime.NewClusterRoleBinding(fmt.Sprintf("tls-scanner-%s-%s", scannerNamespace, roleName), + rbacv1.RoleRef{ + APIGroup: "rbac.authorization.k8s.io", + Kind: "ClusterRole", + Name: roleName, + }, + subject) + if _, err := s.KubeClient.RbacV1().ClusterRoleBindings().Create(context.TODO(), crb, metav1.CreateOptions{}); err != nil { + if !apierrors.IsAlreadyExists(err) { + return nil, fmt.Errorf("failed to create ClusterRoleBinding: %w", err) + } + } + s.addCleanup(func() error { + return s.KubeClient.RbacV1().ClusterRoleBindings().Delete(context.TODO(), crb.Name, metav1.DeleteOptions{}) + }) + } + + // Create the Job + backoffLimit := int32(0) + + job := &batchv1.Job{ + ObjectMeta: metav1.ObjectMeta{ + Name: Name, + Namespace: scannerNamespace, + }, + Spec: batchv1.JobSpec{ + BackoffLimit: &backoffLimit, + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + ServiceAccountName: sa.Name, + RestartPolicy: corev1.RestartPolicyNever, + InitContainers: []corev1.Container{ + { + Name: "scanner", + Image: GetImage(), + Args: []string{ + "--all-pods", + "-component-filter", Components, + "-namespace-filter", targetNamespace, + "-json-file", "/tmp/scan-results.json", + "-j", "4", // Use 4 concurrent threads + }, + SecurityContext: &corev1.SecurityContext{ + Privileged: utils.GetPtr(true), + }, + VolumeMounts: []corev1.VolumeMount{ + { + Name: "results", + MountPath: "/tmp", + ReadOnly: false, + }, + }, + }, + }, + Containers: []corev1.Container{ + { + Name: "results", + Image: GetImage(), + Command: []string{"cat"}, + Args: []string{ + "/tmp/scan-results.json", + }, + VolumeMounts: []corev1.VolumeMount{ + { + Name: "results", + MountPath: "/tmp", + ReadOnly: true, + }, + }, + }, + }, + Volumes: []corev1.Volume{ + { + Name: "results", + VolumeSource: corev1.VolumeSource{ + EmptyDir: &corev1.EmptyDirVolumeSource{}, + }, + }, + }, + }, + }, + }, + } + + // Create the Job with bounded retry logic for AlreadyExists errors + const ( + maxRetries = 5 + retryInterval = 2 * time.Second + deletionTimeout = 30 * time.Second + deletionPollInterval = 1 * time.Second + ) + + deleteFn := func() error { + deletePolicy := metav1.DeletePropagationForeground + return s.KubeClient.BatchV1().Jobs(scannerNamespace).Delete(context.TODO(), job.Name, metav1.DeleteOptions{ + PropagationPolicy: &deletePolicy, + }) + } + + var lastErr error + for attempt := 0; attempt < maxRetries; attempt++ { + createdJob, err := s.KubeClient.BatchV1().Jobs(scannerNamespace).Create(context.TODO(), job, metav1.CreateOptions{}) + if err == nil { + // Success - add cleanup and return + s.addCleanup(deleteFn) + return createdJob, nil + } + + if !apierrors.IsAlreadyExists(err) { + // Non-retryable error + return nil, fmt.Errorf("failed to create TLS Scanner Job: %w", err) + } + + // Job already exists - delete it and wait for it to disappear + lastErr = err + clolog.V(2).Info("Job already exists, deleting and retrying", "job", job.Name, "attempt", attempt+1, "maxRetries", maxRetries) + + if err := deleteFn(); err != nil && !apierrors.IsNotFound(err) { + clolog.V(2).Error(err, "failed to delete existing job", "job", job.Name) + } + + // Wait for the job to be fully deleted before retrying + ctx, cancel := context.WithTimeout(context.TODO(), deletionTimeout) + pollErr := wait.PollUntilContextTimeout(ctx, deletionPollInterval, deletionTimeout, true, func(pollCtx context.Context) (bool, error) { + _, getErr := s.KubeClient.BatchV1().Jobs(scannerNamespace).Get(pollCtx, job.Name, metav1.GetOptions{}) + if apierrors.IsNotFound(getErr) { + return true, nil + } + return false, nil + }) + cancel() + + if pollErr != nil { + clolog.V(2).Info("Job deletion did not complete within timeout", "job", job.Name, "timeout", deletionTimeout) + } + + // Backoff before next retry (except on last attempt) + if attempt < maxRetries-1 { + time.Sleep(retryInterval) + } + } + + return nil, fmt.Errorf("failed to create TLS Scanner Job after %d attempts: %w", maxRetries, lastErr) + +} + +// WaitForCompletion waits for the TLS Scanner Job to complete +func (s *Scanner) WaitForCompletion(job *batchv1.Job, timeout time.Duration) error { + clolog.Info("Waiting for TLS Scanner Job to complete", "job", job.Name, "namespace", job.Namespace) + + return wait.PollUntilContextTimeout(context.TODO(), 10*time.Second, timeout, true, func(ctx context.Context) (bool, error) { + currentJob, err := s.KubeClient.BatchV1().Jobs(job.Namespace).Get(ctx, job.Name, metav1.GetOptions{}) + if err != nil { + clolog.V(0).Error(err, "error polling TLS Scanner Job", "job", job.Name) + return false, nil + } + + // Check if job has completed successfully + if currentJob.Status.Succeeded > 0 { + clolog.Info("TLS Scanner Job completed successfully", "job", job.Name) + return true, nil + } + + // Check if job has failed + if currentJob.Status.Failed > 0 { + clolog.Error(nil, "TLS Scanner Job failed", "job", job.Name) + return false, fmt.Errorf("TLS Scanner Job failed") + } + + clolog.V(3).Info("TLS Scanner Job still running", "job", job.Name, "active", currentJob.Status.Active) + return false, nil + }) +} + +// GetResults retrieves and parses the scan results from the TLS Scanner Job logs +func (s *Scanner) GetResults(job *batchv1.Job) ([]ScanResult, error) { + clolog.Info("Retrieving TLS Scanner results", "job", job.Name, "namespace", job.Namespace) + + // Get the pod for the job + pods, err := s.KubeClient.CoreV1().Pods(job.Namespace).List(context.TODO(), metav1.ListOptions{ + LabelSelector: fmt.Sprintf("job-name=%s", job.Name), + }) + if err != nil { + return nil, fmt.Errorf("failed to list pods for job: %w", err) + } + if len(pods.Items) == 0 { + return nil, fmt.Errorf("no pods found for job %s", job.Name) + } + + pod := pods.Items[0] + + // Get logs from the pod + logs, err := s.KubeClient.CoreV1().Pods(pod.Namespace).GetLogs(pod.Name, &corev1.PodLogOptions{ + Container: "results", + }).Stream(context.TODO()) + if err != nil { + return nil, fmt.Errorf("failed to get logs from pod: %w", err) + } + defer func() { _ = logs.Close() }() + clolog.V(3).Info("Results", "logs", logs) + + // Parse from logs + results, err := parseResults(logs) + if err != nil { + return nil, fmt.Errorf("failed to parse TLS scan results: %w", err) + } + + clolog.Info("Retrieved TLS Scanner results", "count", len(results)) + return results, nil +} + +// parseResults parses the JSON output from TLS Scanner +func parseResults(r io.Reader) ([]ScanResult, error) { + // Read all content from the reader + content, err := io.ReadAll(r) + if err != nil { + return nil, fmt.Errorf("failed to read scanner output: %w", err) + } + clolog.V(3).Info("Scanner output", "content", string(content)) + + // Parse JSON into ScanOutput struct + var scanOutput ScanOutput + if err := json.Unmarshal(content, &scanOutput); err != nil { + return nil, fmt.Errorf("failed to unmarshal scanner output: %w", err) + } + + // Convert ScanOutput to []ScanResult format + var results []ScanResult + for _, ipResult := range scanOutput.IPResults { + for _, portResult := range ipResult.PortResults { + result := ScanResult{ + Component: ipResult.OpenShiftComponent.Component, + IP: ipResult.IP, + Port: fmt.Sprintf("%d", portResult.Port), + Protocol: portResult.Protocol, + Service: portResult.Service, + Status: portResult.Status, + TLSReadiness: portResult.TLSReadiness, + } + + // Extract TLS versions from TLSReadiness if available + if portResult.TLSReadiness != nil { + if portResult.TLSReadiness.TLS13Offered { + result.TLSVersions = append(result.TLSVersions, "TLS 1.3") + } + if portResult.TLSReadiness.TLS12Only || (!portResult.TLSReadiness.TLS13Offered) { + result.TLSVersions = append(result.TLSVersions, "TLS 1.2") + } + } + + results = append(results, result) + } + } + + clolog.V(3).Info("Parsed TLS scanner results", "ipResults", len(scanOutput.IPResults), "portResults", len(results)) + return results, nil +} + +// ValidateCompliance validates that TLS scan results comply with the cluster TLS profile +func ValidateCompliance(results []ScanResult, profileSpec configv1.TLSProfileSpec) error { + clolog.Info("Validating TLS compliance", "results", len(results), "profile", profileSpec.MinTLSVersion) + + var failures []string + expectedMinTLS := internaltls.MinTLSVersion(profileSpec) + + for _, result := range results { + // Skip non-TLS endpoints + if result.Status == "NO_TLS" || result.Status == "LOCALHOST_ONLY" { + clolog.V(2).Info("Skipping non-TLS endpoint", "pod", result.Pod, "port", result.Port, "status", result.Status) + continue + } + + // Check for error statuses + if result.Status == "ERROR" || result.Status == "TIMEOUT" { + failures = append(failures, fmt.Sprintf("Pod %s/%s port %s: scan failed with status %s", + result.Namespace, result.Pod, result.Port, result.Status)) + continue + } + + // For MTLS_REQUIRED, we can't validate the TLS version, but it's not necessarily a failure + if result.Status == "MTLS_REQUIRED" { + clolog.V(2).Info("Endpoint requires mutual TLS", "pod", result.Pod, "port", result.Port) + continue + } + + // For successful scans, validate TLS version + if result.Status == "OK" { + if !containsTLSVersion(result.TLSVersions, expectedMinTLS) { + failures = append(failures, fmt.Sprintf("Pod %s/%s port %s: expected min TLS version %s, got versions: %s", + result.Namespace, result.Pod, result.Port, expectedMinTLS, result.TLSVersions)) + } + + if !result.TLSReadiness.PQCCapable { + failures = append(failures, fmt.Sprintf("%s is not 'pqc_capable': %s", result.Component, result.TLSReadiness.Notes)) + } + + // Log successful validation + clolog.V(2).Info("TLS endpoint validated", "pod", result.Pod, "port", result.Port, + "tlsVersions", result.TLSVersions, "status", result.Status) + } + } + + if len(failures) > 0 { + return fmt.Errorf("TLS compliance validation failed:\n%s", strings.Join(failures, "\n")) + } + + clolog.Info("TLS compliance validation passed", "results", len(results)) + return nil +} + +// containsTLSVersion checks if all TLS versions meet the minimum required version +func containsTLSVersion(tlsVersions []string, minVersion string) bool { + // If no TLS versions detected, fail validation + if len(tlsVersions) == 0 { + return false + } + + // Parse the minimum version + minVersionNum := parseTLSVersion(minVersion) + if minVersionNum == 0 { + // If we can't parse the min version, assume it's valid + return true + } + + // Check that ALL supported versions meet the minimum + foundVersion := false + for _, v := range tlsVersions { + v = strings.TrimSpace(v) + vNum := parseTLSVersion(v) + if vNum == 0 { + // Skip unparseable versions + continue + } + foundVersion = true + if vNum < minVersionNum { + // Found a version below the minimum - fail validation + return false + } + } + + // Return true only if we found at least one version and none were below minimum + return foundVersion +} + +// parseTLSVersion converts TLS version string to a comparable number +func parseTLSVersion(version string) int { + version = strings.TrimSpace(strings.ToLower(version)) + switch { + case strings.Contains(version, "1.0"): + return 10 + case strings.Contains(version, "1.1"): + return 11 + case strings.Contains(version, "1.2"): + return 12 + case strings.Contains(version, "1.3"): + return 13 + default: + return 0 + } +} + +// addCleanup adds a cleanup function to the list +func (s *Scanner) addCleanup(fn func() error) { + if s.CleanupFns != nil { + *s.CleanupFns = append(*s.CleanupFns, fn) + } +} diff --git a/test/framework/e2e/tls/types.go b/test/framework/e2e/tls/types.go new file mode 100644 index 000000000..46f59211d --- /dev/null +++ b/test/framework/e2e/tls/types.go @@ -0,0 +1,62 @@ +package tls + +import "time" + +// ScanResult represents a single TLS scan result +type ScanResult struct { + IP string + Port string + Protocol string + Service string + Pod string + Namespace string + Status string + TLSVersions []string + Ciphers string + Component string + ListenAddr string + TLSReadiness *TLSReadiness +} + +// ScanOutput represents the top-level JSON output from the TLS scanner +type ScanOutput struct { + Timestamp time.Time `json:"timestamp"` + TotalIPs int `json:"total_ips"` + ScannedIPs int `json:"scanned_ips"` + IPResults []IPResult `json:"ip_results"` +} + +// IPResult represents scan results for a single IP address +type IPResult struct { + IP string `json:"ip"` + Status string `json:"status"` + OpenPorts []int `json:"open_ports"` + PortResults []PortResult `json:"port_results"` + OpenShiftComponent OpenShiftComponent `json:"openshift_component"` +} + +type OpenShiftComponent struct { + Component string `json:"component"` + SourceLocation string `json:"source_location"` + MaintainerComponent string `json:"maintainer_component"` + IsBundler bool `json:"is_bundle"` +} + +// PortResult represents scan results for a single port +type PortResult struct { + Port int `json:"port"` + Protocol string `json:"protocol"` + State string `json:"state"` + Service string `json:"service"` + Status string `json:"status"` + Reason string `json:"reason"` + TLSReadiness *TLSReadiness `json:"tls_readiness,omitempty"` +} + +// TLSReadiness represents TLS capability information for a port +type TLSReadiness struct { + TLS13Offered bool `json:"tls13_offered"` + TLS12Only bool `json:"tls12_only"` + PQCCapable bool `json:"pqc_capable"` + Notes string `json:"notes"` +}