diff --git a/CHANGELOG.md b/CHANGELOG.md index 47d2bf5c..4f4015af 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,15 @@ All notable changes to this project will be documented in this file. ## [Unreleased] +### Fixed + +- Previously, the shared secret as well as the client spooling secret have been placed in immutable Kubernetes Secrets. + This caused problems, as they have been cached by Kubernetes, so re-creations of the mentioned Secrets (e.g. by deleting and re-creating the stacklet) + could cause Trino Pods to have different shared secrets, causing workers failing to join the coordinator. + This fix places the secrets in mutable Kubernetes Secrets going forward and migrates existing immutable Secrets to mutable by re-creating them ([#876]). + +[#876]: https://github.com/stackabletech/trino-operator/pull/876 + ## [26.3.0] - 2026-03-16 ## [26.3.0-rc1] - 2026-03-16 diff --git a/rust/operator-binary/src/controller.rs b/rust/operator-binary/src/controller.rs index 33acaa97..f0bf3028 100644 --- a/rust/operator-binary/src/controller.rs +++ b/rust/operator-binary/src/controller.rs @@ -46,7 +46,8 @@ use stackable_operator::{ apimachinery::pkg::{apis::meta::v1::LabelSelector, util::intstr::IntOrString}, }, kube::{ - Resource, ResourceExt, + Api, Resource, ResourceExt, + api::DeleteParams, core::{DeserializeGuard, error_boundary}, runtime::{controller::Action, reflector::ObjectRef}, }, @@ -186,6 +187,11 @@ pub enum Error { source: stackable_operator::client::Error, }, + #[snafu(display("failed to delete internal secret"))] + DeleteInternalSecret { + source: stackable_operator::kube::Error, + }, + #[snafu(display("invalid product config"))] InvalidProductConfig { source: stackable_operator::product_config_utils::Error, @@ -537,7 +543,7 @@ pub async fn reconcile_trino( None => None, }; - create_random_secret( + create_random_secret_if_not_exists( &shared_internal_secret_name(trino), ENV_INTERNAL_SECRET, 512, @@ -548,7 +554,7 @@ pub async fn reconcile_trino( // This secret is created even if spooling is not configured. // Trino currently requires the secret to be exactly 256 bits long. - create_random_secret( + create_random_secret_if_not_exists( &shared_spooling_secret_name(trino), ENV_SPOOLING_SECRET, 32, @@ -1525,44 +1531,93 @@ fn build_recommended_labels<'a>( } } -async fn create_random_secret( +/// This function creates a random Secret if it doesn't already exist. +/// +/// As this function generates random Secret contents, if the Secret already exists, it will *not* +/// be patched, as otherwise we would generate new Secret contents on every reconcile. Which would +/// in turn cause Pods restarts, which would cause reconciles ;) +/// +/// However, there is one special handling needed: +/// +/// We can't mark Secrets as immutable, as this caused problems, see https://github.com/stackabletech/issues/issues/843. +/// As Secrets have been created as immutable up to SDP release 26.3.0, we need to delete the, to be +/// able to re-create them as mutable. This function detects old (immutable) Secrets and re-creates +/// them as mutable. The contents of the Secret will be kept to prevent unnecessary Secret content +/// changes. +async fn create_random_secret_if_not_exists( secret_name: &str, secret_key: &str, secret_byte_size: usize, trino: &v1alpha1::TrinoCluster, client: &Client, ) -> Result<()> { - let mut internal_secret = BTreeMap::new(); - internal_secret.insert(secret_key.to_string(), get_random_base64(secret_byte_size)); + let secret_namespace = trino.namespace().context(ObjectHasNoNamespaceSnafu)?; + let existing_secret = client + .get_opt::(secret_name, &secret_namespace) + .await + .context(FailedToRetrieveInternalSecretSnafu)?; - let secret = Secret { - immutable: Some(true), - metadata: ObjectMetaBuilder::new() - .name(secret_name) - .namespace_opt(trino.namespace()) - .ownerreference_from_resource(trino, None, Some(true)) - .context(ObjectMissingMetadataForOwnerRefSnafu)? - .build(), - string_data: Some(internal_secret), - ..Secret::default() - }; + match existing_secret { + Some( + existing_secret @ Secret { + immutable: Some(true), + .. + }, + ) => { + tracing::info!( + k8s.secret.name = secret_name, + k8s.secret.namespace = secret_namespace, + "Old (immutable) Secret detected, re-creating it to be able to make it mutable. The contents will stay the same." + ); + Api::::namespaced(client.as_kube_client(), &secret_namespace) + .delete(secret_name, &DeleteParams::default()) + .await + .context(DeleteInternalSecretSnafu)?; - if client - .get_opt::( - &secret.name_any(), - secret - .namespace() - .as_deref() - .context(ObjectHasNoNamespaceSnafu)?, - ) - .await - .context(FailedToRetrieveInternalSecretSnafu)? - .is_none() - { - client - .apply_patch(CONTROLLER_NAME, &secret, &secret) - .await - .context(ApplyInternalSecretSnafu)?; + let mut mutable_secret = existing_secret; + mutable_secret.immutable = Some(false); + // Prevent "ApiError: resourceVersion should not be set on objects to be created" + mutable_secret.metadata.resource_version = None; + + client + .create(&mutable_secret) + .await + .context(ApplyInternalSecretSnafu)?; + + // Note: restart-controller will restart all Trino (coordinator and worker) Pods as the + // mounted Secret changed. + } + Some(_) => { + tracing::debug!( + k8s.secret.name = secret_name, + k8s.secret.namespace = secret_namespace, + "Existing (mutable) Secret detected, nothing to do" + ); + } + None => { + tracing::info!( + k8s.secret.name = secret_name, + k8s.secret.namespace = secret_namespace, + "Random Secret missing, creating it" + ); + let secret = Secret { + metadata: ObjectMetaBuilder::new() + .name(secret_name) + .namespace_opt(trino.namespace()) + .ownerreference_from_resource(trino, None, Some(true)) + .context(ObjectMissingMetadataForOwnerRefSnafu)? + .build(), + string_data: Some(BTreeMap::from([( + secret_key.to_string(), + get_random_base64(secret_byte_size), + )])), + ..Secret::default() + }; + client + .create(&secret) + .await + .context(ApplyInternalSecretSnafu)?; + } } Ok(())