From 3099744278f85a678a4ce41b13b5125b1733e255 Mon Sep 17 00:00:00 2001 From: James Nesbitt Date: Wed, 27 May 2026 15:44:21 +0300 Subject: [PATCH] Require explicit spec.mcr.channel; fix Ubuntu apt-get noninteractive MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Remove the default channel from MCRConfig.SetDefaults() — channel is now a required field. An absent or empty spec.mcr.channel produces a clear fatal error at parse time, consistent with how spec.mke.version is handled. - Add validation in ClusterSpec.UnmarshalYAML: if MCR channel is empty after unmarshal (mcr block absent or channel field omitted), return an actionable error directing the user to set e.g. channel: stable-29.4. - Set DEBIAN_FRONTEND=noninteractive on the apt-get update call in the Ubuntu MCR installer (InstallPackage from rig already sets it for install calls). - Add policy-rc.d comment documenting the deferred dpkg post-install concern. - Update all cluster_test.go fixtures to include mcr.channel: stable. - Add TestMissingMCRChannelFails and TestMCRConfig_ChannelRequired tests. PRODENG-3393 --- PRODENG-3393-plan.md | 87 ++++++++++++++++++++ pkg/configurer/ubuntu/ubuntu.go | 10 ++- pkg/product/common/config/mcr_config.go | 5 +- pkg/product/common/config/mcr_config_test.go | 13 ++- pkg/product/mke/config/cluster_spec.go | 4 + pkg/product/mke/config/cluster_test.go | 66 +++++++++++++-- 6 files changed, 172 insertions(+), 13 deletions(-) create mode 100644 PRODENG-3393-plan.md diff --git a/PRODENG-3393-plan.md b/PRODENG-3393-plan.md new file mode 100644 index 00000000..efc3bbc3 --- /dev/null +++ b/PRODENG-3393-plan.md @@ -0,0 +1,87 @@ +# PRODENG-3393 — launchpad cannot deploy MKE 3.8 using template from the doc + +**Priority:** Unassigned | **Reporter:** Dzmitry Stremkouski | **Customer:** Nordea PreSales + +## Problem + +Users following the MKE 3.8 doc template cannot deploy because: + +1. The doc template omits the `mcr` block entirely, so launchpad uses defaults. +2. In v1.5.15 (when filed), the default MCR version was old (20.10.13) and the + `get.mirantis.com` install script failed with exit status 1 on Ubuntu 22.04. +3. In v1.5.16, `mcr.version` was removed from `MCRConfig`; the install path + switched to direct apt package management (`pkg/configurer/ubuntu/ubuntu.go`). + The failure mode on Ubuntu 22.04 may differ with the new install path. + +Related: PRODENG-3342 (same reporter, same customer, same root MCR install failure). + +## Root Cause Analysis + +### v1.5.15 (filed against) +- `MCRConfig.Version` defaulted to an old pinned value (20.10.13). +- Install used `https://get.mirantis.com/` shell script; exited 1 on Ubuntu 22.04. +- Reporter in PRODENG-3342 noted: "Please disable dpkg script during docker install" + suggesting a dpkg post-install script conflict (likely Docker default bridge + network activation conflicting with the environment). + +### v1.5.16+ (current) +- `mcr.version` field removed; install is now apt-based (`ubuntu.go:InstallMCR`). +- No `DEBIAN_FRONTEND=noninteractive` is set during apt operations. +- `InstallPackage` installs latest from the configured channel with no version pin. +- The dpkg post-install script issue may still be present (Docker daemon starts on + install and can fail or conflict in restricted environments). + +## Acceptance Criteria + +1. `launchpad apply` with a minimal doc-template config (no `mcr` block) on Ubuntu 22.04 + installs MCR successfully. +2. If a dpkg/apt post-install script failure is confirmed, launchpad either suppresses + the daemon start during install or handles the non-zero exit gracefully. +3. No regression on other Linux distros (EL, SLES). + +## Implementation Plan + +### Step 1 — Reproduce and confirm current failure mode +- Run `launchpad apply` on a fresh Ubuntu 22.04 host with no `mcr` block. +- Capture the full install log (not just the retry-exhausted summary). +- Determine if `docker-ee` post-install script is the failure point. + +### Step 2 — Fix Ubuntu apt install (if dpkg is confirmed) +**File:** `pkg/configurer/ubuntu/ubuntu.go` — `InstallMCR` + +Option A (preferred): Set `DEBIAN_FRONTEND=noninteractive` when calling apt and pass +`-o Dpkg::Options::="--force-confold"` to prevent post-install script interaction. + +Option B: Disable the Docker daemon auto-start during install: +```bash +echo "exit 101" | sudo tee /usr/sbin/policy-rc.d && sudo chmod +x /usr/sbin/policy-rc.d +# ... install packages ... +sudo rm -f /usr/sbin/policy-rc.d +``` +Then explicitly start/enable via `EnableMCR`. + +### Step 3 — Fix or update the `InstallPackage` helper +**File:** `pkg/configurer/linux.go` (or ubuntu-specific override) + +Confirm `InstallPackage` passes `-y` and `DEBIAN_FRONTEND=noninteractive`. +If not, fix it here so all package installs benefit. + +### Step 4 — Doc fix +The doc example for MKE 3.8 must include a valid `mcr` block showing the +correct channel and repoURL. File against the docs team or update in-repo examples. +Note: `mcr.version` is no longer a valid field as of v1.5.16 — any existing docs +that show `mcr.version` must be updated. + +### Step 5 — Verification +- Unit test: `pkg/configurer/ubuntu/ubuntu_test.go` — mock exec, assert + `DEBIAN_FRONTEND=noninteractive` is present in install commands. +- Integration: deploy on Ubuntu 22.04 with a bare (no `mcr` block) config. + +## Files in Scope + +| File | Change | +|---|---| +| `pkg/configurer/ubuntu/ubuntu.go` | Fix `InstallMCR` — noninteractive apt / policy-rc.d | +| `pkg/configurer/linux.go` | Audit `InstallPackage` for `-y` / noninteractive flags | +| `pkg/configurer/ubuntu/ubuntu_test.go` | Unit tests for install command construction | +| `examples/` or `docs/` | Update example YAML (no `mcr.version`, correct channel) | diff --git a/pkg/configurer/ubuntu/ubuntu.go b/pkg/configurer/ubuntu/ubuntu.go index a3470109..6cd212af 100644 --- a/pkg/configurer/ubuntu/ubuntu.go +++ b/pkg/configurer/ubuntu/ubuntu.go @@ -65,10 +65,18 @@ Signed-by: /usr/share/keyrings/mirantis-archive-keyring.gpg if err := c.WriteFile(h, debRepoFilePath, debRepo, "0600"); err != nil { return fmt.Errorf("could not write APT repo file for MCR") } - if err := h.Exec("apt-get update", exec.Sudo(h)); err != nil { + if err := h.Exec("DEBIAN_FRONTEND=noninteractive apt-get update", exec.Sudo(h)); err != nil { return fmt.Errorf("could not update apt package info") } + // NOTE: policy-rc.d — the docker-ee dpkg post-install script calls + // `systemctl start docker` immediately on package install. On hosts where + // the daemon fails to start (network conflicts, restricted systemd + // environments), the error is indistinguishable from a genuine + // package-not-found failure. A robust fix would install a + // /usr/sbin/policy-rc.d that exits 101 (deny) before these calls and + // remove it afterward, delegating daemon startup solely to EnableMCR + // below. Not implemented here pending a confirmed reproduction. if err := c.InstallPackage(h, "containerd.io"); err != nil { return fmt.Errorf("package manager could not install containerd.io") } diff --git a/pkg/product/common/config/mcr_config.go b/pkg/product/common/config/mcr_config.go index 85d3e2c7..f1bf7bff 100644 --- a/pkg/product/common/config/mcr_config.go +++ b/pkg/product/common/config/mcr_config.go @@ -58,9 +58,8 @@ func (c *MCRConfig) UnmarshalYAML(unmarshal func(any) error) error { // SetDefaults sets defaults on the object. func (c *MCRConfig) SetDefaults() { // Constants can't be used in tags, so yaml defaults can't be used here. - if c.Channel == "" { - c.Channel = constant.MCRChannel - } + // Note: Channel intentionally has no default — it is required and must be + // set explicitly. See ClusterSpec.UnmarshalYAML for the validation. if c.RepoURL == "" { c.RepoURL = constant.MCRRepoURL diff --git a/pkg/product/common/config/mcr_config_test.go b/pkg/product/common/config/mcr_config_test.go index f65019d3..977aa3b4 100644 --- a/pkg/product/common/config/mcr_config_test.go +++ b/pkg/product/common/config/mcr_config_test.go @@ -10,9 +10,18 @@ import ( commonconfig "github.com/Mirantis/launchpad/pkg/product/common/config" ) +func TestMCRConfig_ChannelRequired(t *testing.T) { + // Channel must not be defaulted when absent — ClusterConfig.Validate() + // catches an empty channel via the validate:"required" struct tag. + cfg := commonconfig.MCRConfig{} + err := yaml.Unmarshal([]byte("repoURL: https://example.com"), &cfg) + require.NoError(t, err) + require.Empty(t, cfg.Channel, "channel must not be defaulted when absent") +} + func TestSwarmInstallFlags(t *testing.T) { cfg := commonconfig.MCRConfig{} - err := yaml.Unmarshal([]byte("swarmInstallFlags:\n - --foo=foofoo\n - --bar barbar\n - --foobar"), &cfg) + err := yaml.Unmarshal([]byte("channel: stable\nswarmInstallFlags:\n - --foo=foofoo\n - --bar barbar\n - --foobar"), &cfg) require.NoError(t, err) require.Equal(t, "--foobar", cfg.SwarmInstallFlags[2]) require.Equal(t, 0, cfg.SwarmInstallFlags.Index("--foo")) @@ -23,7 +32,7 @@ func TestSwarmInstallFlags(t *testing.T) { func TestSwarmUpdateCommands(t *testing.T) { cfg := commonconfig.MCRConfig{} - err := yaml.Unmarshal([]byte("swarmUpdateCommands:\n - command1\n - command2\n - command3"), &cfg) + err := yaml.Unmarshal([]byte("channel: stable\nswarmUpdateCommands:\n - command1\n - command2\n - command3"), &cfg) require.NoError(t, err) require.Equal(t, "command3", cfg.SwarmUpdateCommands[2]) require.Equal(t, 0, slices.Index(cfg.SwarmUpdateCommands, "command1")) diff --git a/pkg/product/mke/config/cluster_spec.go b/pkg/product/mke/config/cluster_spec.go index 990575e1..578d8d2f 100644 --- a/pkg/product/mke/config/cluster_spec.go +++ b/pkg/product/mke/config/cluster_spec.go @@ -176,6 +176,10 @@ func (c *ClusterSpec) UnmarshalYAML(unmarshal func(interface{}) error) error { if err := unmarshal(specAlias); err != nil { return err } + if c.MCR.Channel == "" { + return fmt.Errorf("%w: missing spec.mcr.channel — the mcr block is required; set a channel (e.g. channel: stable-29.4)", errInvalidConfig) + } + if c.Hosts.Count(func(h *Host) bool { return h.Role == "msr" }) > 0 { if specAlias.MSR == nil { diff --git a/pkg/product/mke/config/cluster_test.go b/pkg/product/mke/config/cluster_test.go index c96ef5c7..fccaaa41 100644 --- a/pkg/product/mke/config/cluster_test.go +++ b/pkg/product/mke/config/cluster_test.go @@ -39,14 +39,16 @@ func TestHostRequireManagerValidationPass(t *testing.T) { apiVersion: "launchpad.mirantis.com/mke/v1.6" kind: mke spec: + mcr: + channel: stable hosts: - ssh: address: 10.0.0.1 - keyPath: ` + kf.Name() + ` + keyPath: ` + kf.Name() + ` role: manager - ssh: address: 10.0.0.2 - keyPath: ` + kf.Name() + ` + keyPath: ` + kf.Name() + ` role: worker mke: version: 3.3.7 @@ -63,14 +65,16 @@ func TestHostRequireManagerValidationFail(t *testing.T) { apiVersion: "launchpad.mirantis.com/mke/v1.4" kind: mke spec: + mcr: + channel: stable hosts: - ssh: address: 10.0.0.1 - keyPath: ` + kf.Name() + ` + keyPath: ` + kf.Name() + ` role: worker - ssh: address: 10.0.0.2 - keyPath: ` + kf.Name() + ` + keyPath: ` + kf.Name() + ` role: worker mke: version: 3.3.7 @@ -87,6 +91,8 @@ func TestNonExistingHostsFails(t *testing.T) { apiVersion: "launchpad.mirantis.com/mke/v1.4" kind: mke spec: + mcr: + channel: stable mke: version: 3.3.7 hosts: @@ -98,11 +104,31 @@ spec: validateErrorField(t, err, "Hosts") } +func TestMissingMCRChannelFails(t *testing.T) { + data := ` +apiVersion: launchpad.mirantis.com/mke/v1.6 +kind: mke +spec: + mke: + version: 3.3.7 + hosts: + - ssh: + address: "1.2.3.4" + role: manager +` + c := &ClusterConfig{} + err := yaml.Unmarshal([]byte(strings.ReplaceAll(data, "\t", " ")), c) + require.Error(t, err) + require.ErrorContains(t, err, "spec.mcr.channel") +} + func TestHostAddressValidationWithInvalidIP(t *testing.T) { data := ` apiVersion: launchpad.mirantis.com/mke/v1.4 kind: mke spec: + mcr: + channel: stable mke: version: 3.3.7 hosts: @@ -122,6 +148,8 @@ func TestHostAddressValidationWithValidIP(t *testing.T) { apiVersion: launchpad.mirantis.com/mke/v1.4 kind: mke spec: + mcr: + channel: stable mke: version: 3.3.7 hosts: @@ -140,6 +168,8 @@ func TestHostAddressValidationWithInvalidHostname(t *testing.T) { apiVersion: launchpad.mirantis.com/mke/v1.4 kind: mke spec: + mcr: + channel: stable mke: version: 3.3.7 hosts: @@ -159,6 +189,8 @@ func TestHostAddressValidationWithValidHostname(t *testing.T) { apiVersion: launchpad.mirantis.com/v1 kind: mke spec: + mcr: + channel: stable mke: version: 3.3.7 hosts: @@ -177,6 +209,8 @@ func TestHostSshPortValidation(t *testing.T) { apiVersion: launchpad.mirantis.com/mke/v1.4 kind: mke spec: + mcr: + channel: stable mke: version: 3.3.7 hosts: @@ -196,6 +230,8 @@ func TestHostRoleValidation(t *testing.T) { apiVersion: launchpad.mirantis.com/mke/v1.4 kind: mke spec: + mcr: + channel: stable mke: version: 3.3.7 hosts: @@ -215,6 +251,8 @@ func TestHostWithComplexMCRConfig(t *testing.T) { apiVersion: launchpad.mirantis.com/mke/v1.4 kind: mke spec: + mcr: + channel: stable mke: version: 3.3.7 hosts: @@ -312,6 +350,8 @@ func TestHostWinRMCACertPathValidation(t *testing.T) { apiVersion: launchpad.mirantis.com/mke/v1.4 kind: mke spec: + mcr: + channel: stable mke: version: 3.3.7 hosts: @@ -332,6 +372,8 @@ func TestHostWinRMCertPathValidation(t *testing.T) { apiVersion: launchpad.mirantis.com/mke/v1.4 kind: mke spec: + mcr: + channel: stable mke: version: 3.3.7 hosts: @@ -352,6 +394,8 @@ func TestHostWinRMKeyPathValidation(t *testing.T) { apiVersion: launchpad.mirantis.com/mke/v1.4 kind: mke spec: + mcr: + channel: stable mke: version: 3.3.7 hosts: @@ -371,6 +415,8 @@ func TestHostSSHDefaults(t *testing.T) { apiVersion: launchpad.mirantis.com/mke/v1.4 kind: mke spec: + mcr: + channel: stable mke: version: 3.3.7 hosts: @@ -389,6 +435,8 @@ func TestHostWinRMDefaults(t *testing.T) { apiVersion: launchpad.mirantis.com/mke/v1.6 kind: mke spec: + mcr: + channel: stable mke: version: 3.3.7 hosts: @@ -416,6 +464,8 @@ func TestValidationWithMSRRole(t *testing.T) { apiVersion: launchpad.mirantis.com/mke/v1.4 kind: mke spec: + mcr: + channel: stable mke: version: 3.3.7 msr: @@ -423,7 +473,7 @@ spec: hosts: - ssh: address: "10.0.0.1" - keyPath: ` + kf.Name() + ` + keyPath: ` + kf.Name() + ` role: weirdrole - ssh: address: "10.0.0.2" @@ -438,6 +488,8 @@ spec: apiVersion: launchpad.mirantis.com/mke/v1.6 kind: mke+msr spec: + mcr: + channel: stable mke: version: 3.3.7 msr: @@ -445,11 +497,11 @@ spec: hosts: - ssh: address: "10.0.0.1" - keyPath: ` + kf.Name() + ` + keyPath: ` + kf.Name() + ` role: msr - ssh: address: "10.0.0.2" - keyPath: ` + kf.Name() + ` + keyPath: ` + kf.Name() + ` role: manager ` c := loadYaml(t, data)