From 4a33f36ca8a3c8bd0703b4d008e2dbad4ec9c257 Mon Sep 17 00:00:00 2001 From: Steven Miller Date: Wed, 8 Apr 2026 16:52:57 -0400 Subject: [PATCH] Retry instance directory deletion on ENOTEMPTY --- lib/instances/manager_test.go | 52 +++++++++++++++++++++++++++++++++++ lib/instances/storage.go | 31 ++++++++++++++++++++- 2 files changed, 82 insertions(+), 1 deletion(-) diff --git a/lib/instances/manager_test.go b/lib/instances/manager_test.go index d46b0173..169237b6 100644 --- a/lib/instances/manager_test.go +++ b/lib/instances/manager_test.go @@ -1289,6 +1289,58 @@ func TestStorageOperations(t *testing.T) { assert.ErrorIs(t, err, ErrNotFound) } +func TestRemoveAllWithRetryRetriesDirectoryNotEmpty(t *testing.T) { + t.Parallel() + + path := "/tmp/test-instance" + attempts := 0 + var slept []time.Duration + + err := removeAllWithRetry(path, func(got string) error { + attempts++ + require.Equal(t, path, got) + if attempts <= 3 { + return &os.PathError{Op: "unlinkat", Path: got, Err: syscall.ENOTEMPTY} + } + return nil + }, func(d time.Duration) { + slept = append(slept, d) + }) + + require.NoError(t, err) + assert.Equal(t, 4, attempts) + assert.Equal(t, []time.Duration{ + 10 * time.Millisecond, + 20 * time.Millisecond, + 40 * time.Millisecond, + }, slept) +} + +func TestRemoveAllWithRetryStopsAfterMaxRetries(t *testing.T) { + t.Parallel() + + attempts := 0 + var slept []time.Duration + + err := removeAllWithRetry("/tmp/test-instance", func(string) error { + attempts++ + return &os.PathError{Op: "unlinkat", Path: "/tmp/test-instance", Err: syscall.ENOTEMPTY} + }, func(d time.Duration) { + slept = append(slept, d) + }) + + require.Error(t, err) + assert.ErrorIs(t, err, syscall.ENOTEMPTY) + assert.Equal(t, deleteInstanceDataMaxRetries+1, attempts) + assert.Equal(t, []time.Duration{ + 10 * time.Millisecond, + 20 * time.Millisecond, + 40 * time.Millisecond, + 80 * time.Millisecond, + 100 * time.Millisecond, + }, slept) +} + func TestStandbyAndRestore(t *testing.T) { t.Parallel() // Require KVM access (don't skip, fail informatively) diff --git a/lib/instances/storage.go b/lib/instances/storage.go index cf176d49..f290298c 100644 --- a/lib/instances/storage.go +++ b/lib/instances/storage.go @@ -2,14 +2,23 @@ package instances import ( "encoding/json" + "errors" "fmt" "os" "path/filepath" + "syscall" + "time" "github.com/kernel/hypeman/lib/autostandby" "github.com/kernel/hypeman/lib/images" ) +const ( + deleteInstanceDataMaxRetries = 5 + deleteInstanceDataInitialBackoff = 10 * time.Millisecond + deleteInstanceDataMaxBackoff = 100 * time.Millisecond +) + // Filesystem structure: // {dataDir}/guests/{instance-id}/ // metadata.json # Instance metadata @@ -111,7 +120,7 @@ func (m *manager) createVolumeOverlayDisk(instanceID, volumeID string, sizeBytes func (m *manager) deleteInstanceData(id string) error { instDir := m.paths.InstanceDir(id) - if err := os.RemoveAll(instDir); err != nil { + if err := removeAllWithRetry(instDir, os.RemoveAll, time.Sleep); err != nil { return fmt.Errorf("remove instance directory: %w", err) } @@ -120,6 +129,26 @@ func (m *manager) deleteInstanceData(id string) error { return nil } +func removeAllWithRetry(path string, removeAll func(string) error, sleep func(time.Duration)) error { + backoff := deleteInstanceDataInitialBackoff + + for attempt := 0; ; attempt++ { + err := removeAll(path) + if err == nil { + return nil + } + if !errors.Is(err, syscall.ENOTEMPTY) || attempt >= deleteInstanceDataMaxRetries { + return err + } + + sleep(backoff) + backoff *= 2 + if backoff > deleteInstanceDataMaxBackoff { + backoff = deleteInstanceDataMaxBackoff + } + } +} + // listMetadataFiles returns paths to all instance metadata files func (m *manager) listMetadataFiles() ([]string, error) { guestsDir := m.paths.GuestsDir()