Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 32 additions & 0 deletions cmd/api/api/instances.go
Original file line number Diff line number Diff line change
Expand Up @@ -542,6 +542,16 @@ func (s *ApiService) StandbyInstance(ctx context.Context, request oapi.StandbyIn
}
standbyReq.Compression = compression
}
if request.Body != nil && request.Body.CompressionDelay != nil {
compressionDelay, err := parseOptionalDuration(*request.Body.CompressionDelay, "compression_delay")
if err != nil {
return oapi.StandbyInstance400JSONResponse{
Code: "invalid_compression_delay",
Message: err.Error(),
}, nil
}
standbyReq.CompressionDelay = compressionDelay
}

result, err := s.InstanceManager.StandbyInstance(ctx, inst.Id, standbyReq)
if err != nil {
Expand Down Expand Up @@ -1152,6 +1162,13 @@ func toInstanceSnapshotPolicy(policy oapi.SnapshotPolicy) (*instances.SnapshotPo
}
out.Compression = compression
}
if policy.StandbyCompressionDelay != nil {
delay, err := parseOptionalDuration(*policy.StandbyCompressionDelay, "standby_compression_delay")
if err != nil {
return nil, err
}
out.StandbyCompressionDelay = delay
}
return out, nil
}

Expand All @@ -1176,5 +1193,20 @@ func toOAPISnapshotPolicy(policy instances.SnapshotPolicy) oapi.SnapshotPolicy {
compression := toOAPISnapshotCompressionConfig(*policy.Compression)
out.Compression = &compression
}
if policy.StandbyCompressionDelay != nil {
delay := policy.StandbyCompressionDelay.String()
out.StandbyCompressionDelay = &delay
}
return out
}

func parseOptionalDuration(value string, field string) (*time.Duration, error) {
duration, err := time.ParseDuration(value)
if err != nil {
return nil, fmt.Errorf("%s must be a valid duration: %w", field, err)
}
if duration < 0 {
return nil, fmt.Errorf("%s cannot be negative", field)
}
return &duration, nil
}
152 changes: 151 additions & 1 deletion cmd/api/api/instances_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -480,10 +480,85 @@ func TestCreateInstance_MapsNetworkEgressEnforcementMode(t *testing.T) {
assert.Equal(t, instances.EgressEnforcementModeHTTPHTTPSOnly, mockMgr.lastReq.NetworkEgress.EnforcementMode)
}

func TestCreateInstance_MapsAutoStandbyPolicy(t *testing.T) {
func TestCreateInstance_MapsStandbyCompressionDelayInSnapshotPolicy(t *testing.T) {
t.Parallel()

svc := newTestService(t)
origMgr := svc.InstanceManager
mockMgr := &captureCreateManager{Manager: origMgr}
svc.InstanceManager = mockMgr

delay := "2m30s"
resp, err := svc.CreateInstance(ctx(), oapi.CreateInstanceRequestObject{
Body: &oapi.CreateInstanceRequest{
Name: "test-standby-compression-delay",
Image: "docker.io/library/alpine:latest",
SnapshotPolicy: &oapi.SnapshotPolicy{
StandbyCompressionDelay: &delay,
},
},
})
require.NoError(t, err)
_, ok := resp.(oapi.CreateInstance201JSONResponse)
require.True(t, ok, "expected 201 response")

require.NotNil(t, mockMgr.lastReq)
require.NotNil(t, mockMgr.lastReq.SnapshotPolicy)
require.NotNil(t, mockMgr.lastReq.SnapshotPolicy.StandbyCompressionDelay)
assert.Equal(t, 150*time.Second, *mockMgr.lastReq.SnapshotPolicy.StandbyCompressionDelay)
}

func TestCreateInstance_InvalidStandbyCompressionDelayInSnapshotPolicy(t *testing.T) {
t.Parallel()

svc := newTestService(t)
delay := "not-a-duration"

resp, err := svc.CreateInstance(ctx(), oapi.CreateInstanceRequestObject{
Body: &oapi.CreateInstanceRequest{
Name: "test-invalid-standby-delay",
Image: "docker.io/library/alpine:latest",
SnapshotPolicy: &oapi.SnapshotPolicy{
StandbyCompressionDelay: &delay,
},
},
})
require.NoError(t, err)

badReq, ok := resp.(oapi.CreateInstance400JSONResponse)
require.True(t, ok, "expected 400 response")
assert.Equal(t, "invalid_snapshot_policy", badReq.Code)
assert.Contains(t, badReq.Message, "standby_compression_delay")
}

func TestInstanceToOAPI_EmitsStandbyCompressionDelayInSnapshotPolicy(t *testing.T) {
t.Parallel()

delay := 90 * time.Second
inst := instances.Instance{
StoredMetadata: instances.StoredMetadata{
Id: "inst-standby-delay",
Name: "inst-standby-delay",
Image: "docker.io/library/alpine:latest",
CreatedAt: time.Now(),
HypervisorType: hypervisor.TypeCloudHypervisor,
SnapshotPolicy: &instances.SnapshotPolicy{
StandbyCompressionDelay: &delay,
},
},
State: instances.StateStandby,
}

oapiInst := instanceToOAPI(inst)
require.NotNil(t, oapiInst.SnapshotPolicy)
require.NotNil(t, oapiInst.SnapshotPolicy.StandbyCompressionDelay)
assert.Equal(t, "1m30s", *oapiInst.SnapshotPolicy.StandbyCompressionDelay)
}

func TestCreateInstance_MapsAutoStandbyPolicy(t *testing.T) {
t.Parallel()

svc := newTestService(t)
origMgr := svc.InstanceManager
mockMgr := &captureCreateManager{Manager: origMgr}
svc.InstanceManager = mockMgr
Expand Down Expand Up @@ -907,6 +982,81 @@ func TestStandbyInstance_InvalidRequest(t *testing.T) {
assert.Contains(t, badReq.Message, "invalid snapshot compression level")
}

func TestStandbyInstance_MapsCompressionDelay(t *testing.T) {
t.Parallel()

svc := newTestService(t)
now := time.Now()
source := instances.Instance{
StoredMetadata: instances.StoredMetadata{
Id: "standby-delay-src",
Name: "standby-delay-src",
Image: "docker.io/library/alpine:latest",
CreatedAt: now,
HypervisorType: hypervisor.TypeCloudHypervisor,
},
State: instances.StateRunning,
}

mockMgr := &captureStandbyManager{
Manager: svc.InstanceManager,
result: &source,
}
svc.InstanceManager = mockMgr

delay := "45s"
resp, err := svc.StandbyInstance(
mw.WithResolvedInstance(ctx(), source.Id, source),
oapi.StandbyInstanceRequestObject{
Id: source.Id,
Body: &oapi.StandbyInstanceRequest{
CompressionDelay: &delay,
},
},
)
require.NoError(t, err)
_, ok := resp.(oapi.StandbyInstance200JSONResponse)
require.True(t, ok, "expected 200 response")

require.NotNil(t, mockMgr.lastReq)
require.NotNil(t, mockMgr.lastReq.CompressionDelay)
assert.Equal(t, 45*time.Second, *mockMgr.lastReq.CompressionDelay)
}

func TestStandbyInstance_InvalidCompressionDelay(t *testing.T) {
t.Parallel()

svc := newTestService(t)
now := time.Now()
source := instances.Instance{
StoredMetadata: instances.StoredMetadata{
Id: "standby-invalid-delay-src",
Name: "standby-invalid-delay-src",
Image: "docker.io/library/alpine:latest",
CreatedAt: now,
HypervisorType: hypervisor.TypeCloudHypervisor,
},
State: instances.StateRunning,
}

delay := "-5s"
resp, err := svc.StandbyInstance(
mw.WithResolvedInstance(ctx(), source.Id, source),
oapi.StandbyInstanceRequestObject{
Id: source.Id,
Body: &oapi.StandbyInstanceRequest{
CompressionDelay: &delay,
},
},
)
require.NoError(t, err)

badReq, ok := resp.(oapi.StandbyInstance400JSONResponse)
require.True(t, ok, "expected 400 response")
assert.Equal(t, "invalid_compression_delay", badReq.Code)
assert.Contains(t, badReq.Message, "compression_delay")
}

func TestForkInstance_FromRunningFlagForwarded(t *testing.T) {
t.Parallel()
svc := newTestService(t)
Expand Down
50 changes: 42 additions & 8 deletions lib/instances/compression_integration_linux_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -259,30 +259,64 @@ func waitForRunningAndExecReady(t *testing.T, ctx context.Context, mgr *manager,
require.NoError(t, waitHypervisorUp(ctx, inst))
}
require.NoError(t, waitForExecAgent(ctx, mgr, instanceID, 30*time.Second))
waitForGuestExecReady(t, ctx, inst)
return inst
}

func writeGuestMarker(t *testing.T, ctx context.Context, inst *Instance, path string, value string) {
func waitForGuestExecReady(t *testing.T, ctx context.Context, inst *Instance) {
t.Helper()
execCtx, cancel := context.WithTimeout(ctx, integrationTestTimeout(compressionGuestExecTimeout))
defer cancel()

output, exitCode, err := execCommand(execCtx, inst, "sh", "-c", fmt.Sprintf("printf %q > %s && sync", value, path))
require.Eventually(t, func() bool {
execCtx, cancel := context.WithTimeout(ctx, integrationTestTimeout(5*time.Second))
defer cancel()

output, exitCode, err := execCommand(execCtx, inst, "true")
return err == nil && exitCode == 0 && output == ""
}, integrationTestTimeout(15*time.Second), 500*time.Millisecond, "guest exec should succeed after restore")
}

func writeGuestMarker(t *testing.T, ctx context.Context, inst *Instance, path string, value string) {
t.Helper()
output, exitCode, err := execCommandWithRetry(ctx, inst, compressionGuestExecTimeout, "sh", "-c", fmt.Sprintf("printf %q > %s && sync", value, path))
require.NoError(t, err)
require.Equal(t, 0, exitCode, output)
}

func assertGuestMarker(t *testing.T, ctx context.Context, inst *Instance, path string, expected string) {
t.Helper()
execCtx, cancel := context.WithTimeout(ctx, integrationTestTimeout(compressionGuestExecTimeout))
defer cancel()

output, exitCode, err := execCommand(execCtx, inst, "cat", path)
output, exitCode, err := execCommandWithRetry(ctx, inst, compressionGuestExecTimeout, "cat", path)
require.NoError(t, err)
require.Equal(t, 0, exitCode, output)
assert.Equal(t, expected, output)
}

func execCommandWithRetry(ctx context.Context, inst *Instance, timeout time.Duration, command ...string) (string, int, error) {
deadline := time.Now().Add(integrationTestTimeout(timeout))
var lastOutput string
var lastExitCode int
var lastErr error

for {
execCtx, cancel := context.WithTimeout(ctx, integrationTestTimeout(5*time.Second))
output, exitCode, err := execCommand(execCtx, inst, command...)
cancel()

if err == nil {
return output, exitCode, nil
}

lastOutput = output
lastExitCode = exitCode
lastErr = err

if time.Now().After(deadline) {
return lastOutput, lastExitCode, lastErr
}

time.Sleep(500 * time.Millisecond)
}
}

func waitForCompressionJobStart(t *testing.T, mgr *manager, key string, timeout time.Duration) {
t.Helper()
deadline := time.Now().Add(timeout)
Expand Down
5 changes: 5 additions & 0 deletions lib/instances/create.go
Original file line number Diff line number Diff line change
Expand Up @@ -599,6 +599,11 @@ func validateCreateRequest(req *CreateInstanceRequest) error {
return err
}
}
if req.SnapshotPolicy != nil && req.SnapshotPolicy.StandbyCompressionDelay != nil {
if _, err := normalizeStandbyCompressionDelay(req.SnapshotPolicy.StandbyCompressionDelay); err != nil {
return err
}
}
normalizedAutoStandby, err := normalizeAutoStandbyPolicy(req.AutoStandby)
if err != nil {
return err
Expand Down
4 changes: 2 additions & 2 deletions lib/instances/delete.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,8 @@ func (m *manager) deleteInstance(
if err != nil {
return fmt.Errorf("wait for instance compression to stop: %w", err)
}
if target != nil {
m.recordSnapshotCompressionPreemption(ctx, snapshotCompressionPreemptionDeleteInstance, *target)
if target != nil && target.State == compressionJobStateRunning {
m.recordSnapshotCompressionPreemption(ctx, snapshotCompressionPreemptionDeleteInstance, target.Target)
}

// 2. Get network allocation BEFORE killing VMM (while we can still query it)
Expand Down
1 change: 1 addition & 0 deletions lib/instances/fork.go
Original file line number Diff line number Diff line change
Expand Up @@ -472,6 +472,7 @@ func (m *manager) cleanupForkInstanceOnError(ctx context.Context, forkID string)

func cloneStoredMetadata(src StoredMetadata) StoredMetadata {
dst := src
dst.PendingStandbyCompression = nil

if src.Env != nil {
dst.Env = make(map[string]string, len(src.Env))
Expand Down
4 changes: 4 additions & 0 deletions lib/instances/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,7 @@ type manager struct {
snapshotDefaults SnapshotPolicy
compressionMu sync.Mutex
compressionJobs map[string]*compressionJob
compressionTimerFactory func(time.Duration) compressionTimer
nativeCodecMu sync.Mutex
nativeCodecPaths map[string]string
imageUsageRecorder ImageUsageRecorder
Expand Down Expand Up @@ -212,6 +213,9 @@ func NewManagerWithConfig(p *paths.Paths, imageManager images.Manager, systemMan
m.lifecycleEvents.onDrop = func(ctx context.Context, consumer LifecycleEventConsumer) {
m.recordLifecycleEventDropped(ctx, consumer, lifecycleEventDropReasonBufferFull)
}
if err := m.recoverPendingStandbyCompressionJobs(context.Background()); err != nil {
logger.FromContext(context.Background()).WarnContext(context.Background(), "failed to recover pending standby compression jobs", "error", err)
}

return m
}
Expand Down
Loading
Loading