From bd5edf543c3c5968735aa65d146d8b6969924dac Mon Sep 17 00:00:00 2001 From: Joanna Kossakowska Date: Wed, 27 May 2026 09:35:16 -0700 Subject: [PATCH 1/8] Add Dpkg::Lock::Timeout to all apt-get commands in INBD --- .../internal/os_updater/ubuntu/update.go | 26 ++++++++++--------- .../internal/os_updater/ubuntu/update_test.go | 26 +++++++++---------- 2 files changed, 27 insertions(+), 25 deletions(-) diff --git a/in-band-manageability/internal/os_updater/ubuntu/update.go b/in-band-manageability/internal/os_updater/ubuntu/update.go index e677b680..4887a43b 100644 --- a/in-band-manageability/internal/os_updater/ubuntu/update.go +++ b/in-band-manageability/internal/os_updater/ubuntu/update.go @@ -25,6 +25,8 @@ import ( "google.golang.org/protobuf/encoding/protojson" ) +const aptLockTimeoutOption = "Dpkg::Lock::Timeout=300" + // Updater is the concrete implementation of the Updater interface // for the Ubuntu OS. type Updater struct { @@ -214,11 +216,11 @@ func GetEstimatedSize(cmdExec common.Executor, packageList []string) (bool, uint // If specific packages are requested, check those; otherwise check system-wide upgrade if len(packageList) > 0 { // For specific packages: apt-get install --dry-run - cmd = append([]string{common.AptGetCmd, "-o", "Dpkg::Options::=--force-confdef", "-o", + cmd = append([]string{common.AptGetCmd, "-o", aptLockTimeoutOption, "-o", "Dpkg::Options::=--force-confdef", "-o", "Dpkg::Options::=--force-confold", "-u", "install", "--assume-no"}, packageList...) } else { // For system-wide upgrade - cmd = []string{common.AptGetCmd, "-o", "Dpkg::Options::=--force-confdef", "-o", + cmd = []string{common.AptGetCmd, "-o", aptLockTimeoutOption, "-o", "Dpkg::Options::=--force-confdef", "-o", "Dpkg::Options::=--force-confold", "--with-new-pkgs", "-u", "upgrade", "--assume-no"} } @@ -315,17 +317,17 @@ func noDownload(packages []string) [][]string { log.Println("No download mode") cmds := [][]string{ {common.DpkgCmd, "--configure", "-a", "--force-confdef", "--force-confold"}, - {common.AptGetCmd, "-o", "Dpkg::Options::=--force-confdef", "-o", + {common.AptGetCmd, "-o", aptLockTimeoutOption, "-o", "Dpkg::Options::=--force-confdef", "-o", "Dpkg::Options::=--force-confold", "-yq", "-f", "install"}, } if len(packages) == 0 { - cmds = append(cmds, []string{common.AptGetCmd, "-o", + cmds = append(cmds, []string{common.AptGetCmd, "-o", aptLockTimeoutOption, "-o", "Dpkg::Options::=--force-confdef", "-o", "Dpkg::Options::=--force-confold", "--with-new-pkgs", "--fix-missing", "-yq", "upgrade"}) } else { - cmds = append(cmds, [][]string{append([]string{common.AptGetCmd, "-o", + cmds = append(cmds, [][]string{append([]string{common.AptGetCmd, "-o", aptLockTimeoutOption, "-o", "Dpkg::Options::=--force-confdef", "-o", "Dpkg::Options::=--force-confold", "--fix-missing", "-yq", @@ -429,17 +431,17 @@ func downloadOnly(packages []string) [][]string { cmds := [][]string{ {common.DpkgCmd, "--configure", "-a", "--force-confdef", "--force-confold"}, - {common.AptGetCmd, "update"}, + {common.AptGetCmd, "-o", aptLockTimeoutOption, "update"}, } if len(packages) == 0 { - cmds = append(cmds, []string{common.AptGetCmd, "-o", + cmds = append(cmds, []string{common.AptGetCmd, "-o", aptLockTimeoutOption, "-o", "Dpkg::Options::=--force-confdef", "-o", "Dpkg::Options::=--force-confold", "--with-new-pkgs", "--download-only", "--fix-missing", "-yq", "upgrade"}) } else { - cmds = append(cmds, [][]string{append([]string{common.AptGetCmd, "-o", + cmds = append(cmds, [][]string{append([]string{common.AptGetCmd, "-o", aptLockTimeoutOption, "-o", "Dpkg::Options::=--force-confdef", "-o", "Dpkg::Options::=--force-confold", "--download-only", "--fix-missing", "-yq", "install"}, packages...)}...) @@ -452,15 +454,15 @@ func fullInstall(packages []string) [][]string { log.Println("Download and install mode") cmds := [][]string{ - {common.AptGetCmd, "update"}, - {common.AptGetCmd, "-yq", "-f", "install"}, // Fix broken dependencies + {common.AptGetCmd, "-o", aptLockTimeoutOption, "update"}, + {common.AptGetCmd, "-o", aptLockTimeoutOption, "-yq", "-f", "install"}, // Fix broken dependencies {common.DpkgCmd, "--configure", "-a", "--force-confdef", "--force-confold"}, } if len(packages) == 0 { - cmds = append(cmds, []string{common.AptGetCmd, "-yq", "-o", "Dpkg::Options::=--force-confdef", "-o", "Dpkg::Options::=--force-confold", "--with-new-pkgs", "upgrade"}) + cmds = append(cmds, []string{common.AptGetCmd, "-o", aptLockTimeoutOption, "-yq", "-o", "Dpkg::Options::=--force-confdef", "-o", "Dpkg::Options::=--force-confold", "--with-new-pkgs", "upgrade"}) } else { - cmds = append(cmds, []string{common.AptGetCmd, "-yq", "-o", "Dpkg::Options::=--force-confdef", "-o", "Dpkg::Options::=--force-confold", "install"}) + cmds = append(cmds, []string{common.AptGetCmd, "-o", aptLockTimeoutOption, "-yq", "-o", "Dpkg::Options::=--force-confdef", "-o", "Dpkg::Options::=--force-confold", "install"}) cmds = append(cmds, packages) } diff --git a/in-band-manageability/internal/os_updater/ubuntu/update_test.go b/in-band-manageability/internal/os_updater/ubuntu/update_test.go index 6c751438..717356c4 100644 --- a/in-band-manageability/internal/os_updater/ubuntu/update_test.go +++ b/in-band-manageability/internal/os_updater/ubuntu/update_test.go @@ -50,9 +50,9 @@ func TestNoDownload(t *testing.T) { t.Run("no packages", func(t *testing.T) { expectedCmds := [][]string{ {common.DpkgCmd, "--configure", "-a", "--force-confdef", "--force-confold"}, - {common.AptGetCmd, "-o", "Dpkg::Options::=--force-confdef", "-o", + {common.AptGetCmd, "-o", aptLockTimeoutOption, "-o", "Dpkg::Options::=--force-confdef", "-o", "Dpkg::Options::=--force-confold", "-yq", "-f", "install"}, - {common.AptGetCmd, "-o", "Dpkg::Options::=--force-confdef", + {common.AptGetCmd, "-o", aptLockTimeoutOption, "-o", "Dpkg::Options::=--force-confdef", "-o", "Dpkg::Options::=--force-confold", "--with-new-pkgs", "--fix-missing", "-yq", "upgrade"}, @@ -67,9 +67,9 @@ func TestNoDownload(t *testing.T) { packages := []string{"package1", "package2"} expectedCmds := [][]string{ {common.DpkgCmd, "--configure", "-a", "--force-confdef", "--force-confold"}, - {common.AptGetCmd, "-o", "Dpkg::Options::=--force-confdef", "-o", + {common.AptGetCmd, "-o", aptLockTimeoutOption, "-o", "Dpkg::Options::=--force-confdef", "-o", "Dpkg::Options::=--force-confold", "-yq", "-f", "install"}, - {common.AptGetCmd, "-o", "Dpkg::Options::=--force-confdef", + {common.AptGetCmd, "-o", aptLockTimeoutOption, "-o", "Dpkg::Options::=--force-confdef", "-o", "Dpkg::Options::=--force-confold", "--fix-missing", "-yq", "install", "package1", "package2"}, @@ -85,8 +85,8 @@ func TestDownloadOnly(t *testing.T) { t.Run("no packages", func(t *testing.T) { expectedCmds := [][]string{ {common.DpkgCmd, "--configure", "-a", "--force-confdef", "--force-confold"}, - {common.AptGetCmd, "update"}, - {common.AptGetCmd, "-o", "Dpkg::Options::=--force-confdef", + {common.AptGetCmd, "-o", aptLockTimeoutOption, "update"}, + {common.AptGetCmd, "-o", aptLockTimeoutOption, "-o", "Dpkg::Options::=--force-confdef", "-o", "Dpkg::Options::=--force-confold", "--with-new-pkgs", "--download-only", "--fix-missing", "-yq", "upgrade"}, @@ -101,8 +101,8 @@ func TestDownloadOnly(t *testing.T) { packages := []string{"package1", "package2"} expectedCmds := [][]string{ {common.DpkgCmd, "--configure", "-a", "--force-confdef", "--force-confold"}, - {common.AptGetCmd, "update"}, - {common.AptGetCmd, "-o", "Dpkg::Options::=--force-confdef", + {common.AptGetCmd, "-o", aptLockTimeoutOption, "update"}, + {common.AptGetCmd, "-o", aptLockTimeoutOption, "-o", "Dpkg::Options::=--force-confdef", "-o", "Dpkg::Options::=--force-confold", "--download-only", "--fix-missing", "-yq", "install", "package1", "package2"}, @@ -125,7 +125,7 @@ func TestGetEstimatedSize(t *testing.T) { assert.NoError(t, err) assert.Equal(t, uint64(0), size) assert.Equal(t, 1, len(mockExec.commands)) - assert.Equal(t, []string{common.AptGetCmd, "-o", "Dpkg::Options::=--force-confdef", "-o", "Dpkg::Options::=--force-confold", "--with-new-pkgs", "-u", "upgrade", "--assume-no"}, mockExec.commands[0]) + assert.Equal(t, []string{common.AptGetCmd, "-o", aptLockTimeoutOption, "-o", "Dpkg::Options::=--force-confdef", "-o", "Dpkg::Options::=--force-confold", "--with-new-pkgs", "-u", "upgrade", "--assume-no"}, mockExec.commands[0]) }) t.Run("successful size estimation", func(t *testing.T) { @@ -140,7 +140,7 @@ func TestGetEstimatedSize(t *testing.T) { assert.True(t, isUpdateAvail) assert.Equal(t, uint64(524288000), size) assert.Equal(t, 1, len(mockExec.commands)) - assert.Equal(t, []string{common.AptGetCmd, "-o", "Dpkg::Options::=--force-confdef", "-o", "Dpkg::Options::=--force-confold", "--with-new-pkgs", "-u", "upgrade", "--assume-no"}, mockExec.commands[0]) + assert.Equal(t, []string{common.AptGetCmd, "-o", aptLockTimeoutOption, "-o", "Dpkg::Options::=--force-confdef", "-o", "Dpkg::Options::=--force-confold", "--with-new-pkgs", "-u", "upgrade", "--assume-no"}, mockExec.commands[0]) }) t.Run("failed to get size estimation", func(t *testing.T) { @@ -155,7 +155,7 @@ func TestGetEstimatedSize(t *testing.T) { assert.False(t, isUpdateAvail) assert.Equal(t, uint64(0), size) assert.Equal(t, 1, len(mockExec.commands)) - assert.Equal(t, []string{common.AptGetCmd, "-o", "Dpkg::Options::=--force-confdef", "-o", "Dpkg::Options::=--force-confold", "--with-new-pkgs", "-u", "upgrade", "--assume-no"}, mockExec.commands[0]) + assert.Equal(t, []string{common.AptGetCmd, "-o", aptLockTimeoutOption, "-o", "Dpkg::Options::=--force-confdef", "-o", "Dpkg::Options::=--force-confold", "--with-new-pkgs", "-u", "upgrade", "--assume-no"}, mockExec.commands[0]) }) t.Run("no size information in output", func(t *testing.T) { @@ -169,7 +169,7 @@ func TestGetEstimatedSize(t *testing.T) { assert.False(t, isUpdateAvail) assert.Equal(t, uint64(0), size) assert.Equal(t, 1, len(mockExec.commands)) - assert.Equal(t, []string{common.AptGetCmd, "-o", "Dpkg::Options::=--force-confdef", "-o", "Dpkg::Options::=--force-confold", "--with-new-pkgs", "-u", "upgrade", "--assume-no"}, mockExec.commands[0]) + assert.Equal(t, []string{common.AptGetCmd, "-o", aptLockTimeoutOption, "-o", "Dpkg::Options::=--force-confdef", "-o", "Dpkg::Options::=--force-confold", "--with-new-pkgs", "-u", "upgrade", "--assume-no"}, mockExec.commands[0]) }) t.Run("command execution error but valid output", func(t *testing.T) { @@ -184,7 +184,7 @@ func TestGetEstimatedSize(t *testing.T) { assert.True(t, isUpdateAvail) assert.Equal(t, uint64(524288000), size) assert.Equal(t, 1, len(mockExec.commands)) - assert.Equal(t, []string{common.AptGetCmd, "-o", "Dpkg::Options::=--force-confdef", "-o", "Dpkg::Options::=--force-confold", "--with-new-pkgs", "-u", "upgrade", "--assume-no"}, mockExec.commands[0]) + assert.Equal(t, []string{common.AptGetCmd, "-o", aptLockTimeoutOption, "-o", "Dpkg::Options::=--force-confdef", "-o", "Dpkg::Options::=--force-confold", "--with-new-pkgs", "-u", "upgrade", "--assume-no"}, mockExec.commands[0]) }) } From 2eb221dc8b0b8c1da4397d9fd57be819e5a7b165 Mon Sep 17 00:00:00 2001 From: Joanna Kossakowska Date: Wed, 27 May 2026 09:44:29 -0700 Subject: [PATCH 2/8] Increased the default INBC SOTA RPC timeout from 11 minutes to 30 minutes --- in-band-manageability/internal/inbc/commands/constants.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/in-band-manageability/internal/inbc/commands/constants.go b/in-band-manageability/internal/inbc/commands/constants.go index d87c2a45..2c574840 100644 --- a/in-band-manageability/internal/inbc/commands/constants.go +++ b/in-band-manageability/internal/inbc/commands/constants.go @@ -10,7 +10,7 @@ package commands const clientDialTimeoutInSeconds = 5 const sourceTimeoutInSeconds = 15 const emtSoftwareUpdateTimerInSeconds = 1200 -const defaultSoftwareUpdateTimerInSeconds = 660 +const defaultSoftwareUpdateTimerInSeconds = 1800 const configTimeoutInSeconds = 15 const firmwareUpdateTimerInSeconds = 90 const queryTimeoutInSeconds = 15 From f1cc1433ba6f134d14520d4ceb1526511698e70e Mon Sep 17 00:00:00 2001 From: Joanna Kossakowska Date: Thu, 28 May 2026 07:27:59 -0700 Subject: [PATCH 3/8] Increased defaultSoftwareUpdateTimerInSeconds to avoid DeadlineExceeded error during long write/verify/snapshot steps during immutable OS update --- in-band-manageability/internal/inbc/commands/constants.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/in-band-manageability/internal/inbc/commands/constants.go b/in-band-manageability/internal/inbc/commands/constants.go index 2c574840..9646eacf 100644 --- a/in-band-manageability/internal/inbc/commands/constants.go +++ b/in-band-manageability/internal/inbc/commands/constants.go @@ -10,7 +10,7 @@ package commands const clientDialTimeoutInSeconds = 5 const sourceTimeoutInSeconds = 15 const emtSoftwareUpdateTimerInSeconds = 1200 -const defaultSoftwareUpdateTimerInSeconds = 1800 +const defaultSoftwareUpdateTimerInSeconds = 3600 const configTimeoutInSeconds = 15 const firmwareUpdateTimerInSeconds = 90 const queryTimeoutInSeconds = 15 From fc24f0191a73ae19ad6bf193a412732ea75fbd52 Mon Sep 17 00:00:00 2001 From: Joanna Kossakowska Date: Thu, 28 May 2026 07:37:56 -0700 Subject: [PATCH 4/8] Avoid false download failure on non-zero exit after SOTA success marker during immutable OS update --- .../internal/downloader/real_downloader.go | 8 +++ .../downloader/real_downloader_test.go | 51 +++++++++++++++++++ 2 files changed, 59 insertions(+) diff --git a/platform-update-agent/internal/downloader/real_downloader.go b/platform-update-agent/internal/downloader/real_downloader.go index b4b8be7a..2ff1d0b4 100644 --- a/platform-update-agent/internal/downloader/real_downloader.go +++ b/platform-update-agent/internal/downloader/real_downloader.go @@ -7,6 +7,7 @@ import ( "context" "fmt" "os/exec" + "strings" pb "github.com/open-edge-platform/infra-managers/maintenance/pkg/api/maintmgr/v1" "github.com/sirupsen/logrus" @@ -65,6 +66,13 @@ func (r *RealDownloadExecutor) runInbcCommand(ctx context.Context, args ...strin cmdArgs := append([]string{"inbc"}, args...) output, err := r.commandRunner.RunCommand(ctx, "sudo", cmdArgs...) if err != nil { + if strings.Contains(string(output), "SOTA Command Response: 200-Success") { + r.log.WithError(err).Warn("DOWNLOAD: command returned non-zero exit after explicit success response; treating as successful") + if ctxErr := ctx.Err(); ctxErr != nil { + r.log.WithError(ctxErr).Debug("DOWNLOAD: context state when command returned non-zero") + } + return nil + } return fmt.Errorf("command failed. output: \n%s\nerror: %w", string(output), err) } return nil diff --git a/platform-update-agent/internal/downloader/real_downloader_test.go b/platform-update-agent/internal/downloader/real_downloader_test.go index f71b6294..e2674020 100644 --- a/platform-update-agent/internal/downloader/real_downloader_test.go +++ b/platform-update-agent/internal/downloader/real_downloader_test.go @@ -5,6 +5,7 @@ package downloader import ( "context" + "errors" "sync" "testing" @@ -86,3 +87,53 @@ func TestNewDownloadExecutor_NoError(t *testing.T) { executor := NewDownloadExecutor(nil) assert.NotNil(t, executor, "NewDownloadExecutor(nil) should not return nil") } + +func TestRealDownloadExecutor_Download_SuccessMarkerWithKilledError(t *testing.T) { + logger := logrus.New() + logEntry := logrus.NewEntry(logger) + mockRunner := &MockCommandRunner{} + + r := &RealDownloadExecutor{ + log: logEntry, + commandRunner: mockRunner, + } + + url := "https://example.com/image" + sha := "1234567890abcdef" + + source := &pb.OSProfileUpdateSource{ + OsImageUrl: url, + OsImageSha: sha, + ProfileName: "test-profile", + } + + mockRunner.SetResponse([]byte("SOTA INBC Command was invoked.\nSOTA Command Response: 200-Success\n"), errors.New("signal: killed")) + + err := r.Download(context.Background(), "", source) + assert.NoError(t, err, "Download() should treat explicit success marker as successful even if wrapper exits non-zero") +} + +func TestRealDownloadExecutor_Download_FailsWithoutSuccessMarker(t *testing.T) { + logger := logrus.New() + logEntry := logrus.NewEntry(logger) + mockRunner := &MockCommandRunner{} + + r := &RealDownloadExecutor{ + log: logEntry, + commandRunner: mockRunner, + } + + url := "https://example.com/image" + sha := "1234567890abcdef" + + source := &pb.OSProfileUpdateSource{ + OsImageUrl: url, + OsImageSha: sha, + ProfileName: "test-profile", + } + + mockRunner.SetResponse([]byte("SOTA INBC Command was invoked.\n"), errors.New("signal: killed")) + + err := r.Download(context.Background(), "", source) + assert.Error(t, err, "Download() should fail when command exits non-zero without explicit success marker") +} From 4e89ef1cbaa601023d9c7e939b59c312abebb6b0 Mon Sep 17 00:00:00 2001 From: Joanna Kossakowska Date: Fri, 29 May 2026 03:15:08 -0700 Subject: [PATCH 5/8] Remove unrelated PUA changes --- .../internal/downloader/real_downloader.go | 8 --- .../downloader/real_downloader_test.go | 51 ------------------- 2 files changed, 59 deletions(-) diff --git a/platform-update-agent/internal/downloader/real_downloader.go b/platform-update-agent/internal/downloader/real_downloader.go index 2ff1d0b4..b4b8be7a 100644 --- a/platform-update-agent/internal/downloader/real_downloader.go +++ b/platform-update-agent/internal/downloader/real_downloader.go @@ -7,7 +7,6 @@ import ( "context" "fmt" "os/exec" - "strings" pb "github.com/open-edge-platform/infra-managers/maintenance/pkg/api/maintmgr/v1" "github.com/sirupsen/logrus" @@ -66,13 +65,6 @@ func (r *RealDownloadExecutor) runInbcCommand(ctx context.Context, args ...strin cmdArgs := append([]string{"inbc"}, args...) output, err := r.commandRunner.RunCommand(ctx, "sudo", cmdArgs...) if err != nil { - if strings.Contains(string(output), "SOTA Command Response: 200-Success") { - r.log.WithError(err).Warn("DOWNLOAD: command returned non-zero exit after explicit success response; treating as successful") - if ctxErr := ctx.Err(); ctxErr != nil { - r.log.WithError(ctxErr).Debug("DOWNLOAD: context state when command returned non-zero") - } - return nil - } return fmt.Errorf("command failed. output: \n%s\nerror: %w", string(output), err) } return nil diff --git a/platform-update-agent/internal/downloader/real_downloader_test.go b/platform-update-agent/internal/downloader/real_downloader_test.go index e2674020..f71b6294 100644 --- a/platform-update-agent/internal/downloader/real_downloader_test.go +++ b/platform-update-agent/internal/downloader/real_downloader_test.go @@ -5,7 +5,6 @@ package downloader import ( "context" - "errors" "sync" "testing" @@ -87,53 +86,3 @@ func TestNewDownloadExecutor_NoError(t *testing.T) { executor := NewDownloadExecutor(nil) assert.NotNil(t, executor, "NewDownloadExecutor(nil) should not return nil") } - -func TestRealDownloadExecutor_Download_SuccessMarkerWithKilledError(t *testing.T) { - logger := logrus.New() - logEntry := logrus.NewEntry(logger) - mockRunner := &MockCommandRunner{} - - r := &RealDownloadExecutor{ - log: logEntry, - commandRunner: mockRunner, - } - - url := "https://example.com/image" - sha := "1234567890abcdef" - - source := &pb.OSProfileUpdateSource{ - OsImageUrl: url, - OsImageSha: sha, - ProfileName: "test-profile", - } - - mockRunner.SetResponse([]byte("SOTA INBC Command was invoked.\nSOTA Command Response: 200-Success\n"), errors.New("signal: killed")) - - err := r.Download(context.Background(), "", source) - assert.NoError(t, err, "Download() should treat explicit success marker as successful even if wrapper exits non-zero") -} - -func TestRealDownloadExecutor_Download_FailsWithoutSuccessMarker(t *testing.T) { - logger := logrus.New() - logEntry := logrus.NewEntry(logger) - mockRunner := &MockCommandRunner{} - - r := &RealDownloadExecutor{ - log: logEntry, - commandRunner: mockRunner, - } - - url := "https://example.com/image" - sha := "1234567890abcdef" - - source := &pb.OSProfileUpdateSource{ - OsImageUrl: url, - OsImageSha: sha, - ProfileName: "test-profile", - } - - mockRunner.SetResponse([]byte("SOTA INBC Command was invoked.\n"), errors.New("signal: killed")) - - err := r.Download(context.Background(), "", source) - assert.Error(t, err, "Download() should fail when command exits non-zero without explicit success marker") -} From e3da8826533a924ea20a35c2f17b861c7af40676 Mon Sep 17 00:00:00 2001 From: Joanna Kossakowska Date: Sat, 30 May 2026 15:02:06 -0700 Subject: [PATCH 6/8] Remove sota timeout blocking mutable OS updates --- .../configs/inbd_schema.json | 5 + .../configs/intel_manageability.conf | 3 +- .../internal/inbc/commands/constants.go | 1 - .../internal/inbc/commands/sota.go | 15 +- .../internal/inbc/commands/sota_test.go | 50 +++++++ .../internal/inbd/utils/config.go | 31 ++++- .../internal/inbd/utils/config_test.go | 44 ++++-- .../os_updater/ubuntu/app_source/add_test.go | 20 +-- .../internal/os_updater/ubuntu/update.go | 130 +++++++++++++++++- 9 files changed, 255 insertions(+), 44 deletions(-) diff --git a/in-band-manageability/configs/inbd_schema.json b/in-band-manageability/configs/inbd_schema.json index cdb1cccb..d39857e7 100644 --- a/in-band-manageability/configs/inbd_schema.json +++ b/in-band-manageability/configs/inbd_schema.json @@ -16,6 +16,11 @@ "proceedWithoutRollback": { "type": "boolean", "description": "Indicates whether to proceed with updates without rollback support." + }, + "installIdleTimeoutSeconds": { + "type": "integer", + "minimum": 0, + "description": "Maximum idle duration without install output/progress, in seconds." } }, "required": ["trustedRepositories", "proceedWithoutRollback"], diff --git a/in-band-manageability/configs/intel_manageability.conf b/in-band-manageability/configs/intel_manageability.conf index c5b4988c..a5b5ec62 100644 --- a/in-band-manageability/configs/intel_manageability.conf +++ b/in-band-manageability/configs/intel_manageability.conf @@ -1,6 +1,7 @@ { "os_updater": { "trustedRepositories": [], - "proceedWithoutRollback": true + "proceedWithoutRollback": true, + "installIdleTimeoutSeconds": 3600 } } \ No newline at end of file diff --git a/in-band-manageability/internal/inbc/commands/constants.go b/in-band-manageability/internal/inbc/commands/constants.go index 9646eacf..20d2da69 100644 --- a/in-band-manageability/internal/inbc/commands/constants.go +++ b/in-band-manageability/internal/inbc/commands/constants.go @@ -10,7 +10,6 @@ package commands const clientDialTimeoutInSeconds = 5 const sourceTimeoutInSeconds = 15 const emtSoftwareUpdateTimerInSeconds = 1200 -const defaultSoftwareUpdateTimerInSeconds = 3600 const configTimeoutInSeconds = 15 const firmwareUpdateTimerInSeconds = 90 const queryTimeoutInSeconds = 15 diff --git a/in-band-manageability/internal/inbc/commands/sota.go b/in-band-manageability/internal/inbc/commands/sota.go index 9211db8c..bbddf1af 100644 --- a/in-band-manageability/internal/inbc/commands/sota.go +++ b/in-band-manageability/internal/inbc/commands/sota.go @@ -117,20 +117,19 @@ func handleSOTA( } }() - os, err := detectOS() + osType, err := detectOS() if err != nil { return fmt.Errorf("error detected OS type: %v", err) } - var timeout time.Duration = defaultSoftwareUpdateTimerInSeconds - if os == "EMT" { - timeout = emtSoftwareUpdateTimerInSeconds + updateCtx := context.Background() + if osType == "EMT" { + var updateCancel context.CancelFunc + updateCtx, updateCancel = context.WithTimeout(context.Background(), emtSoftwareUpdateTimerInSeconds*time.Second) + defer updateCancel() } - ctx, cancel = context.WithTimeout(context.Background(), timeout*time.Second) - defer cancel() - - resp, err := client.UpdateSystemSoftware(ctx, request) + resp, err := client.UpdateSystemSoftware(updateCtx, request) if err != nil { return fmt.Errorf("error updating system software: %v", err) } diff --git a/in-band-manageability/internal/inbc/commands/sota_test.go b/in-band-manageability/internal/inbc/commands/sota_test.go index ac3a332c..f5c9eb1b 100644 --- a/in-band-manageability/internal/inbc/commands/sota_test.go +++ b/in-band-manageability/internal/inbc/commands/sota_test.go @@ -10,6 +10,7 @@ import ( "context" "errors" "testing" + "time" pb "github.com/open-edge-platform/edge-node-agents/in-band-manageability/pkg/api/inbd/v1" "github.com/spf13/cobra" @@ -180,4 +181,53 @@ func TestHandleSOTA(t *testing.T) { err := handleSOTA(&socket, &url, &releaseDate, &mode, &reboot, &packageList, &signature, detectOS, dialer)(cmd, args) assert.NoError(t, err, "handleSOTA should not return an error even if Close fails") }) + + t.Run("SOTA request context has no deadline", func(t *testing.T) { + mockClient := new(MockInbServiceClient) + mockClient.On("UpdateSystemSoftware", mock.Anything, mock.Anything, mock.Anything). + Run(func(args mock.Arguments) { + ctxArg, ok := args.Get(0).(context.Context) + assert.True(t, ok) + _, hasDeadline := ctxArg.Deadline() + assert.False(t, hasDeadline) + }). + Return(&pb.UpdateResponse{StatusCode: 200, Error: ""}, nil) + + dialer := func(ctx context.Context, socket string) (pb.InbServiceClient, grpc.ClientConnInterface, error) { + return MockDialer(ctx, socket, mockClient, false) + } + detectOS := func() (string, error) { + return "ubuntu", nil + } + + err := handleSOTA(&socket, &url, &releaseDate, &mode, &reboot, &packageList, &signature, detectOS, dialer)(cmd, args) + assert.NoError(t, err) + mockClient.AssertExpectations(t) + }) + + t.Run("SOTA request context has EMT deadline", func(t *testing.T) { + mockClient := new(MockInbServiceClient) + mockClient.On("UpdateSystemSoftware", mock.Anything, mock.Anything, mock.Anything). + Run(func(args mock.Arguments) { + ctxArg, ok := args.Get(0).(context.Context) + assert.True(t, ok) + deadline, hasDeadline := ctxArg.Deadline() + assert.True(t, hasDeadline) + remaining := time.Until(deadline) + assert.Greater(t, remaining, 0*time.Second) + assert.LessOrEqual(t, remaining, emtSoftwareUpdateTimerInSeconds*time.Second) + }). + Return(&pb.UpdateResponse{StatusCode: 200, Error: ""}, nil) + + dialer := func(ctx context.Context, socket string) (pb.InbServiceClient, grpc.ClientConnInterface, error) { + return MockDialer(ctx, socket, mockClient, false) + } + detectOS := func() (string, error) { + return "EMT", nil + } + + err := handleSOTA(&socket, &url, &releaseDate, &mode, &reboot, &packageList, &signature, detectOS, dialer)(cmd, args) + assert.NoError(t, err) + mockClient.AssertExpectations(t) + }) } diff --git a/in-band-manageability/internal/inbd/utils/config.go b/in-band-manageability/internal/inbd/utils/config.go index f2f454e3..6a0e24cf 100644 --- a/in-band-manageability/internal/inbd/utils/config.go +++ b/in-band-manageability/internal/inbd/utils/config.go @@ -11,18 +11,25 @@ import ( "fmt" "log" "strings" + "time" "github.com/spf13/afero" ) -// Configurations represents the structure of the XML configuration file +// OSUpdaterConfig represents the OS updater section of the config file. +type OSUpdaterConfig struct { + TrustedRepositories []string `json:"trustedRepositories"` + ProceedWithoutRollback bool `json:"proceedWithoutRollback"` + InstallIdleTimeoutSeconds int `json:"installIdleTimeoutSeconds"` +} + +// Configurations represents the structure of the JSON configuration file. type Configurations struct { - OSUpdater struct { - TrustedRepositories []string `json:"trustedRepositories"` - ProceedWithoutRollback bool `json:"proceedWithoutRollback"` - } `json:"os_updater"` + OSUpdater OSUpdaterConfig `json:"os_updater"` } +const defaultInstallIdleTimeout = 60 * time.Minute + // LoadConfig loads the XML configuration file func LoadConfig(fs afero.Fs, filePath string) (*Configurations, error) { var content []byte @@ -66,3 +73,17 @@ func IsProceedWithoutRollback(config *Configurations) bool { log.Printf("Checking if rollback is allowed") return config.OSUpdater.ProceedWithoutRollback } + +// GetInstallIdleTimeout converts os_updater.installIdleTimeoutSeconds into a duration. +// Empty value falls back to the default idle watchdog timeout. +func GetInstallIdleTimeout(config *Configurations) (time.Duration, error) { + if config == nil || config.OSUpdater.InstallIdleTimeoutSeconds == 0 { + return defaultInstallIdleTimeout, nil + } + + if config.OSUpdater.InstallIdleTimeoutSeconds < 0 { + return 0, fmt.Errorf("invalid os_updater.installIdleTimeoutSeconds: must be >= 0") + } + + return time.Duration(config.OSUpdater.InstallIdleTimeoutSeconds) * time.Second, nil +} diff --git a/in-band-manageability/internal/inbd/utils/config_test.go b/in-band-manageability/internal/inbd/utils/config_test.go index f17c8b13..ffd9112b 100644 --- a/in-band-manageability/internal/inbd/utils/config_test.go +++ b/in-band-manageability/internal/inbd/utils/config_test.go @@ -8,6 +8,7 @@ package utils import ( "testing" + "time" "github.com/spf13/afero" "github.com/stretchr/testify/assert" @@ -61,10 +62,7 @@ func TestLoadConfig(t *testing.T) { func TestIsTrustedRepository(t *testing.T) { config := &Configurations{ - OSUpdater: struct { - TrustedRepositories []string `json:"trustedRepositories"` - ProceedWithoutRollback bool `json:"proceedWithoutRollback"` - }{ + OSUpdater: OSUpdaterConfig{ TrustedRepositories: []string{ "https://example.com/repo1", "https://example.com/repo2"}, @@ -93,10 +91,7 @@ func TestIsTrustedRepository(t *testing.T) { func TestIsProceedWithoutRollback_True(t *testing.T) { // Create a configuration where ProceedWithoutRollback is true config := &Configurations{ - OSUpdater: struct { - TrustedRepositories []string `json:"trustedRepositories"` - ProceedWithoutRollback bool `json:"proceedWithoutRollback"` - }{ + OSUpdater: OSUpdaterConfig{ ProceedWithoutRollback: true, }, } @@ -111,10 +106,7 @@ func TestIsProceedWithoutRollback_True(t *testing.T) { func TestIsProceedWithoutRollback_False(t *testing.T) { // Create a configuration where ProceedWithoutRollback is false config := &Configurations{ - OSUpdater: struct { - TrustedRepositories []string `json:"trustedRepositories"` - ProceedWithoutRollback bool `json:"proceedWithoutRollback"` - }{ + OSUpdater: OSUpdaterConfig{ ProceedWithoutRollback: false, }, } @@ -125,3 +117,31 @@ func TestIsProceedWithoutRollback_False(t *testing.T) { // Assertions assert.False(t, result, "Expected rollback to be disallowed") } + +func TestGetInstallIdleTimeout(t *testing.T) { + t.Run("empty value disabled", func(t *testing.T) { + cfg := &Configurations{OSUpdater: OSUpdaterConfig{}} + idleTimeout, err := GetInstallIdleTimeout(cfg) + assert.NoError(t, err) + assert.Equal(t, 60*time.Minute, idleTimeout) + }) + + t.Run("valid duration", func(t *testing.T) { + cfg := &Configurations{OSUpdater: OSUpdaterConfig{InstallIdleTimeoutSeconds: 2700}} + idleTimeout, err := GetInstallIdleTimeout(cfg) + assert.NoError(t, err) + assert.Equal(t, 45*time.Minute, idleTimeout) + }) + + t.Run("invalid duration", func(t *testing.T) { + cfg := &Configurations{OSUpdater: OSUpdaterConfig{InstallIdleTimeoutSeconds: -1}} + _, err := GetInstallIdleTimeout(cfg) + assert.ErrorContains(t, err, "invalid os_updater.installIdleTimeoutSeconds") + }) + + t.Run("negative duration", func(t *testing.T) { + cfg := &Configurations{OSUpdater: OSUpdaterConfig{InstallIdleTimeoutSeconds: -1}} + _, err := GetInstallIdleTimeout(cfg) + assert.ErrorContains(t, err, "must be >= 0") + }) +} diff --git a/in-band-manageability/internal/os_updater/ubuntu/app_source/add_test.go b/in-band-manageability/internal/os_updater/ubuntu/app_source/add_test.go index 9e1ab8fe..51c1169a 100644 --- a/in-band-manageability/internal/os_updater/ubuntu/app_source/add_test.go +++ b/in-band-manageability/internal/os_updater/ubuntu/app_source/add_test.go @@ -37,10 +37,7 @@ func TestAdd(t *testing.T) { }, loadConfigFunc: func(afero.Fs, string) (*utils.Configurations, error) { return &utils.Configurations{ - OSUpdater: struct { - TrustedRepositories []string `json:"trustedRepositories"` - ProceedWithoutRollback bool `json:"proceedWithoutRollback"` - }{ + OSUpdater: utils.OSUpdaterConfig{ TrustedRepositories: []string{ "https://example.com/repo1", "https://example.com/repo2", @@ -86,10 +83,7 @@ func TestAdd(t *testing.T) { adder: &Adder{ loadConfigFunc: func(afero.Fs, string) (*utils.Configurations, error) { return &utils.Configurations{ - OSUpdater: struct { - TrustedRepositories []string `json:"trustedRepositories"` - ProceedWithoutRollback bool `json:"proceedWithoutRollback"` - }{ + OSUpdater: utils.OSUpdaterConfig{ TrustedRepositories: []string{ "https://example.com/repo1", "https://example.com/repo2", @@ -115,10 +109,7 @@ func TestAdd(t *testing.T) { adder: &Adder{ loadConfigFunc: func(afero.Fs, string) (*utils.Configurations, error) { return &utils.Configurations{ - OSUpdater: struct { - TrustedRepositories []string `json:"trustedRepositories"` - ProceedWithoutRollback bool `json:"proceedWithoutRollback"` - }{ + OSUpdater: utils.OSUpdaterConfig{ TrustedRepositories: []string{ "https://example.com/repo1", "https://example.com/repo2", @@ -150,10 +141,7 @@ func TestAdd(t *testing.T) { }, loadConfigFunc: func(fs afero.Fs, path string) (*utils.Configurations, error) { return &utils.Configurations{ - OSUpdater: struct { - TrustedRepositories []string `json:"trustedRepositories"` - ProceedWithoutRollback bool `json:"proceedWithoutRollback"` - }{ + OSUpdater: utils.OSUpdaterConfig{ TrustedRepositories: []string{ "https://example.com/repo1", "https://example.com/repo2", diff --git a/in-band-manageability/internal/os_updater/ubuntu/update.go b/in-band-manageability/internal/os_updater/ubuntu/update.go index 4887a43b..67f38a3c 100644 --- a/in-band-manageability/internal/os_updater/ubuntu/update.go +++ b/in-band-manageability/internal/os_updater/ubuntu/update.go @@ -7,14 +7,20 @@ package ubuntu import ( + "bytes" "encoding/json" "fmt" + "io" "log" "os" + "os/exec" "path/filepath" "regexp" "strconv" "strings" + "sync" + "syscall" + "time" common "github.com/open-edge-platform/edge-node-agents/in-band-manageability/internal/common" utils "github.com/open-edge-platform/edge-node-agents/in-band-manageability/internal/inbd/utils" @@ -27,6 +33,103 @@ import ( const aptLockTimeoutOption = "Dpkg::Lock::Timeout=300" +func shouldApplyInstallIdleWatchdog(mode pb.UpdateSystemSoftwareRequest_DownloadMode) bool { + return mode == pb.UpdateSystemSoftwareRequest_DOWNLOAD_MODE_FULL || + mode == pb.UpdateSystemSoftwareRequest_DOWNLOAD_MODE_NO_DOWNLOAD +} + +type activityWriterFunc func() + +func (w activityWriterFunc) Write(p []byte) (int, error) { + if len(p) > 0 { + w() + } + return len(p), nil +} + +func executeCommandWithIdleWatchdog(command []string, idleTimeout time.Duration) ([]byte, []byte, error) { + if len(command) == 0 { + return nil, nil, fmt.Errorf("empty command") + } + + cmd := exec.Command(command[0], command[1:]...) + cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true} + + stdoutPipe, err := cmd.StdoutPipe() + if err != nil { + return nil, nil, fmt.Errorf("failed to get stdout pipe: %w", err) + } + stderrPipe, err := cmd.StderrPipe() + if err != nil { + return nil, nil, fmt.Errorf("failed to get stderr pipe: %w", err) + } + + var stdoutBuf bytes.Buffer + var stderrBuf bytes.Buffer + activityCh := make(chan struct{}, 1) + + signalActivity := func() { + select { + case activityCh <- struct{}{}: + default: + } + } + + stdoutWriter := io.MultiWriter(&stdoutBuf, activityWriterFunc(signalActivity)) + stderrWriter := io.MultiWriter(&stderrBuf, activityWriterFunc(signalActivity)) + + if err := cmd.Start(); err != nil { + return nil, nil, fmt.Errorf("failed to start command '%s': %w", strings.Join(command, " "), err) + } + + var wg sync.WaitGroup + wg.Add(2) + go func() { + defer wg.Done() + _, _ = io.Copy(stdoutWriter, stdoutPipe) + }() + go func() { + defer wg.Done() + _, _ = io.Copy(stderrWriter, stderrPipe) + }() + + errCh := make(chan error, 1) + go func() { + errCh <- cmd.Wait() + }() + + idleTimer := time.NewTimer(idleTimeout) + defer idleTimer.Stop() + + for { + select { + case err := <-errCh: + wg.Wait() + if err != nil { + return stdoutBuf.Bytes(), stderrBuf.Bytes(), fmt.Errorf("failed to run '%s' command - %w", strings.Join(command, " "), err) + } + return stdoutBuf.Bytes(), stderrBuf.Bytes(), nil + case <-activityCh: + if !idleTimer.Stop() { + select { + case <-idleTimer.C: + default: + } + } + idleTimer.Reset(idleTimeout) + case <-idleTimer.C: + if cmd.Process != nil { + if err := syscall.Kill(-cmd.Process.Pid, syscall.SIGKILL); err != nil && err != syscall.ESRCH { + _ = cmd.Process.Kill() + } + } + <-errCh + wg.Wait() + return stdoutBuf.Bytes(), stderrBuf.Bytes(), fmt.Errorf("command idle timeout exceeded after %s", idleTimeout) + } + } +} + // Updater is the concrete implementation of the Updater interface // for the Ubuntu OS. type Updater struct { @@ -165,8 +268,33 @@ func (u *Updater) Update() (bool, error) { } for _, cmd := range cmds { + log.Printf("Executing command: %s", cmd) - _, stderr, err := u.CommandExecutor.Execute(cmd) + + var stderr []byte + if shouldApplyInstallIdleWatchdog(u.Request.Mode) { + config, loadErr := utils.LoadConfig(fs, utils.ConfigFilePath) + if loadErr != nil { + log.Printf("Warning: failed to load install idle timeout config, continuing without watchdog: %v", loadErr) + _, stderr, err = u.CommandExecutor.Execute(cmd) + } else { + installIdleTimeout, idleErr := utils.GetInstallIdleTimeout(config) + if idleErr != nil { + emt.WriteUpdateStatus(fs, emt.FAIL, string(jsonString), idleErr.Error()) + emt.WriteGranularLogWithOSType(fs, emt.FAIL, emt.FAILURE_REASON_INBM, "ubuntu") + return false, fmt.Errorf("SOTA Aborted: failed to parse install idle timeout configuration: %v", idleErr) + } + + if installIdleTimeout > 0 { + _, stderr, err = executeCommandWithIdleWatchdog(cmd, installIdleTimeout) + } else { + _, stderr, err = u.CommandExecutor.Execute(cmd) + } + } + } else { + _, stderr, err = u.CommandExecutor.Execute(cmd) + } + if err != nil { emt.WriteUpdateStatus(fs, emt.FAIL, string(jsonString), err.Error()) emt.WriteGranularLogWithOSType(fs, emt.FAIL, emt.FAILURE_REASON_UPDATE_TOOL, "ubuntu") From 11782a13f37c791346ecdba44ed876b5d116b5e1 Mon Sep 17 00:00:00 2001 From: Joanna Kossakowska Date: Sat, 30 May 2026 15:07:34 -0700 Subject: [PATCH 7/8] Increate emt update timeout --- in-band-manageability/internal/inbc/commands/constants.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/in-band-manageability/internal/inbc/commands/constants.go b/in-band-manageability/internal/inbc/commands/constants.go index 20d2da69..140d54bd 100644 --- a/in-band-manageability/internal/inbc/commands/constants.go +++ b/in-band-manageability/internal/inbc/commands/constants.go @@ -9,7 +9,7 @@ package commands // Context Timeouts const clientDialTimeoutInSeconds = 5 const sourceTimeoutInSeconds = 15 -const emtSoftwareUpdateTimerInSeconds = 1200 +const emtSoftwareUpdateTimerInSeconds = 1800 const configTimeoutInSeconds = 15 const firmwareUpdateTimerInSeconds = 90 const queryTimeoutInSeconds = 15 From 37018abe710e1acac5a248cbb942c797dc799813 Mon Sep 17 00:00:00 2001 From: joanna kossakowska Date: Sun, 31 May 2026 22:27:24 +0100 Subject: [PATCH 8/8] Update sota.go --- in-band-manageability/internal/inbc/commands/sota.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/in-band-manageability/internal/inbc/commands/sota.go b/in-band-manageability/internal/inbc/commands/sota.go index bbddf1af..11e7a2df 100644 --- a/in-band-manageability/internal/inbc/commands/sota.go +++ b/in-band-manageability/internal/inbc/commands/sota.go @@ -117,13 +117,13 @@ func handleSOTA( } }() - osType, err := detectOS() + os, err := detectOS() if err != nil { return fmt.Errorf("error detected OS type: %v", err) } updateCtx := context.Background() - if osType == "EMT" { + if os == "EMT" { var updateCancel context.CancelFunc updateCtx, updateCancel = context.WithTimeout(context.Background(), emtSoftwareUpdateTimerInSeconds*time.Second) defer updateCancel()