diff --git a/acceptance/bundle/deployment/bind/dashboard/recreation/out.state_after_bind.direct.json b/acceptance/bundle/deployment/bind/dashboard/recreation/out.state_after_bind.direct.json index 771dd3f908b..948a7e838a9 100644 --- a/acceptance/bundle/deployment/bind/dashboard/recreation/out.state_after_bind.direct.json +++ b/acceptance/bundle/deployment/bind/dashboard/recreation/out.state_after_bind.direct.json @@ -12,7 +12,7 @@ "etag": [ETAG], "parent_path": "/Workspace/Users/[USERNAME]/.bundle/test-bundle-[UNIQUE_NAME]/default/resources", "published": true, - "serialized_dashboard": "{\"pages\":[{\"displayName\":\"Page One\",\"name\":\"02724bf2\"}]}", + "serialized_dashboard": "sha256:[DASHBOARD_ID][DASHBOARD_ID]", "warehouse_id": "[TEST_DEFAULT_WAREHOUSE_ID]" } } diff --git a/acceptance/bundle/migrate/dashboards/out.new_state.json b/acceptance/bundle/migrate/dashboards/out.new_state.json index 695d14602a7..0cb70b8172c 100644 --- a/acceptance/bundle/migrate/dashboards/out.new_state.json +++ b/acceptance/bundle/migrate/dashboards/out.new_state.json @@ -12,7 +12,7 @@ "etag": "[NUMID]", "parent_path": "/Workspace/Users/[USERNAME]", "published": true, - "serialized_dashboard": "{\"pages\":[{\"name\":\"02724bf2\",\"displayName\":\"Dashboard test bundle-deploy-dashboard\"}]}\n", + "serialized_dashboard": "sha256:9dc171d249749a592c717bb13f2d948d2f1fa5530b82ac308a3df0c48d347ef3", "warehouse_id": "123456" } } diff --git a/acceptance/bundle/migrate/dashboards/out.plan_after_migrate.json b/acceptance/bundle/migrate/dashboards/out.plan_after_migrate.json index 6b55f64bd8d..3cdd9cc82f5 100644 --- a/acceptance/bundle/migrate/dashboards/out.plan_after_migrate.json +++ b/acceptance/bundle/migrate/dashboards/out.plan_after_migrate.json @@ -30,9 +30,9 @@ "serialized_dashboard": { "action": "skip", "reason": "etag_based", - "old": "{\"pages\":[{\"name\":\"02724bf2\",\"displayName\":\"Dashboard test bundle-deploy-dashboard\"}]}\n", - "new": "{\"pages\":[{\"name\":\"02724bf2\",\"displayName\":\"Dashboard test bundle-deploy-dashboard\"}]}\n", - "remote": "{\"pages\":[{\"displayName\":\"Dashboard test bundle-deploy-dashboard\",\"name\":\"02724bf2\",\"pageType\":\"PAGE_TYPE_CANVAS\"}]}\n" + "old": "sha256:9dc171d249749a592c717bb13f2d948d2f1fa5530b82ac308a3df0c48d347ef3", + "new": "sha256:9dc171d249749a592c717bb13f2d948d2f1fa5530b82ac308a3df0c48d347ef3", + "remote": "sha256:aae99d4284b3390b1b35974f7bd4c03603cacd6a5a4a838ca563f652257ab6a7" } } } diff --git a/acceptance/bundle/resources/dashboards/detect-change/out.plan.direct.json b/acceptance/bundle/resources/dashboards/detect-change/out.plan.direct.json index 7af15357c25..eaaafcb6c33 100644 --- a/acceptance/bundle/resources/dashboards/detect-change/out.plan.direct.json +++ b/acceptance/bundle/resources/dashboards/detect-change/out.plan.direct.json @@ -39,9 +39,9 @@ "serialized_dashboard": { "action": "skip", "reason": "etag_based", - "old": "{\n \"pages\": [\n {\n \"displayName\": \"New Page\",\n \"layout\": [\n {\n \"position\": {\n \"height\": 2,\n \"width\": 6,\n \"x\": 0,\n \"y\": 0\n },\n \"widget\": {\n \"name\": \"82eb9107\",\n \"textbox_spec\": \"# I'm a title\"\n }\n },\n {\n \"position\": {\n \"height\": 2,\n \"width\": 6,\n \"x\": 0,\n \"y\": 2\n },\n \"widget\": {\n \"name\": \"ffa6de4f\",\n \"textbox_spec\": \"Text\"\n }\n }\n ],\n \"name\": \"fdd21a3c\"\n }\n ]\n}\n", - "new": "{\n \"pages\": [\n {\n \"displayName\": \"New Page\",\n \"layout\": [\n {\n \"position\": {\n \"height\": 2,\n \"width\": 6,\n \"x\": 0,\n \"y\": 0\n },\n \"widget\": {\n \"name\": \"82eb9107\",\n \"textbox_spec\": \"# I'm a title\"\n }\n },\n {\n \"position\": {\n \"height\": 2,\n \"width\": 6,\n \"x\": 0,\n \"y\": 2\n },\n \"widget\": {\n \"name\": \"ffa6de4f\",\n \"textbox_spec\": \"Text\"\n }\n }\n ],\n \"name\": \"fdd21a3c\"\n }\n ]\n}\n", - "remote": "{}\n" + "old": "sha256:[ALPHANUMID]", + "new": "sha256:[ALPHANUMID]", + "remote": "sha256:[ALPHANUMID]" } } } diff --git a/acceptance/bundle/resources/dashboards/simple/out.plan.direct.json b/acceptance/bundle/resources/dashboards/simple/out.plan.direct.json index 59ab070e00e..aaade035814 100644 --- a/acceptance/bundle/resources/dashboards/simple/out.plan.direct.json +++ b/acceptance/bundle/resources/dashboards/simple/out.plan.direct.json @@ -30,9 +30,9 @@ "serialized_dashboard": { "action": "skip", "reason": "etag_based", - "old": "{ }\n", - "new": "{ }\n", - "remote": "{}\n" + "old": "sha256:cf207804e[NUMID]becbb5703765c4978cd7a8466dbdd[NUMID]e571ad09", + "new": "sha256:cf207804e[NUMID]becbb5703765c4978cd7a8466dbdd[NUMID]e571ad09", + "remote": "sha256:bb511d1e2723214ac1c3710d[NUMID]d57e31b0145d33d427d9628b209be38" } } } diff --git a/acceptance/bundle/resources/dashboards/unpublish-out-of-band/out.plan.direct.json b/acceptance/bundle/resources/dashboards/unpublish-out-of-band/out.plan.direct.json index 558a4ddcfd8..77174a8a0e9 100644 --- a/acceptance/bundle/resources/dashboards/unpublish-out-of-band/out.plan.direct.json +++ b/acceptance/bundle/resources/dashboards/unpublish-out-of-band/out.plan.direct.json @@ -46,9 +46,9 @@ "serialized_dashboard": { "action": "skip", "reason": "etag_based", - "old": "{\"pages\":[{\"displayName\":\"Test Dashboard\",\"name\":\"test-page\"}]}", - "new": "{\"pages\":[{\"displayName\":\"Test Dashboard\",\"name\":\"test-page\"}]}", - "remote": "{\"pages\":[{\"displayName\":\"Test Dashboard\",\"name\":\"test-page\",\"pageType\":\"PAGE_TYPE_CANVAS\"}]}" + "old": "sha256:a196fded8b5e0ebe5af2c6bed934d90fe055644de7773bbd84c851b577ff4f19", + "new": "sha256:a196fded8b5e0ebe5af2c6bed934d90fe055644de7773bbd84c851b577ff4f19", + "remote": "sha256:df70601c54be8ac2a66ac4a01dbd5224f4fa20ca9e8d15a615beab794cfdca08" } } } diff --git a/acceptance/bundle/resources/genie_spaces/inline/out.plan.json b/acceptance/bundle/resources/genie_spaces/inline/out.plan.json index 975688268b2..3569622b6d0 100644 --- a/acceptance/bundle/resources/genie_spaces/inline/out.plan.json +++ b/acceptance/bundle/resources/genie_spaces/inline/out.plan.json @@ -24,9 +24,9 @@ "serialized_space": { "action": "skip", "reason": "etag_based", - "old": "{\"config\":{\"sample_questions\":[{\"id\":\"sq-001\",\"question\":[\"What is the total revenue?\"]}]},\"data_sources\":{\"tables\":[{\"column_configs\":[{\"column_name\":\"amount\",\"get_example_values\":true}],\"identifier\":\"main.sales.orders\"}]},\"version\":1}", - "new": "{\"config\":{\"sample_questions\":[{\"id\":\"sq-001\",\"question\":[\"What is the total revenue?\"]}]},\"data_sources\":{\"tables\":[{\"column_configs\":[{\"column_name\":\"amount\",\"get_example_values\":true}],\"identifier\":\"main.sales.orders\"}]},\"version\":1}", - "remote": "{\"config\":{\"sample_questions\":[{\"id\":\"sq-001\",\"question\":[\"What is the total revenue?\"]}]},\"data_sources\":{\"tables\":[{\"column_configs\":[{\"column_name\":\"amount\",\"get_example_values\":true}],\"identifier\":\"main.sales.orders\"}]},\"version\":2}" + "old": "sha256:ea4b505f061a2b65db49891ea5cab6ac875350aa7a8afed4778860a50d8f4db5", + "new": "sha256:ea4b505f061a2b65db49891ea5cab6ac875350aa7a8afed4778860a50d8f4db5", + "remote": "sha256:8df899b73697f19d25ed51f3a97ddce65f97d61cd7fc3b402aef349eb2fe93e8" } } } diff --git a/acceptance/bundle/resources/genie_spaces/version_migration/output.txt b/acceptance/bundle/resources/genie_spaces/version_migration/output.txt index e554b79825c..74443ccd812 100644 --- a/acceptance/bundle/resources/genie_spaces/version_migration/output.txt +++ b/acceptance/bundle/resources/genie_spaces/version_migration/output.txt @@ -12,8 +12,7 @@ Plan: 0 to add, 0 to change, 0 to delete, 1 unchanged === serialized_space: local version 1, remote version 2, skipped (etag_based){ "action": "skip", "reason": "etag_based", - "local_version": 1, - "remote_version": 2 + "local_remote_differ": true } >>> [CLI] bundle destroy --auto-approve diff --git a/acceptance/bundle/resources/genie_spaces/version_migration/script b/acceptance/bundle/resources/genie_spaces/version_migration/script index 141efd5bef6..116d81a72c4 100644 --- a/acceptance/bundle/resources/genie_spaces/version_migration/script +++ b/acceptance/bundle/resources/genie_spaces/version_migration/script @@ -18,9 +18,12 @@ trace $CLI bundle plan # The mismatch the etag masks: the local config stays version 1 while the remote # (and the etag guarding it) is version 2. The serialized_space change is # recorded but skipped via the etag_based rule. +# +# serialized_space is stored in state as a content hash (it is never compared +# against the remote value), so the plan reports old/new/remote as hashes, not +# the full content. We assert that local and remote differ and that the change is +# skipped, rather than parsing version numbers out of the content. title "serialized_space: local version 1, remote version 2, skipped (etag_based)" $CLI bundle plan -o json \ | jq '.plan["resources.genie_spaces.foo"].changes.serialized_space - | {action, reason, - local_version: (.new | fromjson | .version), - remote_version: (.remote | fromjson | .version)}' + | {action, reason, local_remote_differ: (.new != .remote)}' diff --git a/bundle/direct/apply.go b/bundle/direct/apply.go index a4a61f727f2..14d85ed6d2d 100644 --- a/bundle/direct/apply.go +++ b/bundle/direct/apply.go @@ -50,6 +50,16 @@ func (d *DeploymentUnit) Deploy(ctx context.Context, db *dstate.DeploymentState, } } +// saveState compacts the state (replacing large equality-only fields with content +// hashes, see Adapter.CompactState) before persisting it. +func (d *DeploymentUnit) saveState(db *dstate.DeploymentState, id string, newState any) error { + compacted, err := d.Adapter.CompactState(newState) + if err != nil { + return fmt.Errorf("compacting state: %w", err) + } + return db.SaveState(d.ResourceKey, id, compacted, d.DependsOn) +} + func (d *DeploymentUnit) Create(ctx context.Context, db *dstate.DeploymentState, newState any) error { var newID string var remoteState any @@ -75,7 +85,7 @@ func (d *DeploymentUnit) Create(ctx context.Context, db *dstate.DeploymentState, return err } - err = db.SaveState(d.ResourceKey, newID, newState, d.DependsOn) + err = d.saveState(db, newID, newState) if err != nil { return fmt.Errorf("saving state after creating id=%s: %w", newID, err) } @@ -146,7 +156,7 @@ func (d *DeploymentUnit) Update(ctx context.Context, db *dstate.DeploymentState, return err } - err = db.SaveState(d.ResourceKey, id, newState, d.DependsOn) + err = d.saveState(db, id, newState) if err != nil { return fmt.Errorf("saving state id=%s: %w", id, err) } @@ -190,7 +200,7 @@ func (d *DeploymentUnit) UpdateWithID(ctx context.Context, db *dstate.Deployment return err } - err = db.SaveState(d.ResourceKey, newID, newState, d.DependsOn) + err = d.saveState(db, newID, newState) if err != nil { return fmt.Errorf("saving state id=%s: %w", oldID, err) } @@ -252,7 +262,7 @@ func (d *DeploymentUnit) Resize(ctx context.Context, db *dstate.DeploymentState, return fmt.Errorf("resizing id=%s: %w", id, err) } - err = db.SaveState(d.ResourceKey, id, newState, d.DependsOn) + err = d.saveState(db, id, newState) if err != nil { return fmt.Errorf("saving state id=%s: %w", id, err) } diff --git a/bundle/direct/bind.go b/bundle/direct/bind.go index 9760ce95666..1b8e84591da 100644 --- a/bundle/direct/bind.go +++ b/bundle/direct/bind.go @@ -145,13 +145,22 @@ func (b *DeploymentBundle) Bind(ctx context.Context, client *databricks.Workspac } } + adapter, err := b.getAdapterForKey(resourceKey) + if err != nil { + return nil, err + } + compacted, err := adapter.CompactState(sv.Value) + if err != nil { + return nil, fmt.Errorf("compacting state: %w", err) + } + err = b.StateDB.Open(ctx, tmpStatePath, dstate.WithRecovery(true), dstate.WithWrite(true)) if err != nil { os.Remove(tmpStatePath) return nil, err } - err = b.StateDB.SaveState(resourceKey, resourceID, sv.Value, dependsOn) + err = b.StateDB.SaveState(resourceKey, resourceID, compacted, dependsOn) if err != nil { os.Remove(tmpStatePath) return nil, err diff --git a/bundle/direct/bundle_apply.go b/bundle/direct/bundle_apply.go index a9981ee63d8..8cf4565f170 100644 --- a/bundle/direct/bundle_apply.go +++ b/bundle/direct/bundle_apply.go @@ -130,7 +130,7 @@ func (b *DeploymentBundle) Apply(ctx context.Context, client *databricks.Workspa logdiag.LogError(ctx, fmt.Errorf("state entry not found for %q", resourceKey)) return false } - err = b.StateDB.SaveState(resourceKey, id, sv.Value, entry.DependsOn) + err = d.saveState(&b.StateDB, id, sv.Value) } else { // TODO: redo calcDiff to downgrade planned action if possible (?) err = d.Deploy(ctx, &b.StateDB, sv.Value, action, entry) diff --git a/bundle/direct/bundle_plan.go b/bundle/direct/bundle_plan.go index 552d2067cc3..187c26cbf19 100644 --- a/bundle/direct/bundle_plan.go +++ b/bundle/direct/bundle_plan.go @@ -197,6 +197,15 @@ func (b *DeploymentBundle) CalculatePlan(ctx context.Context, client *databricks return false } + // Compact every value that enters the diff so they share one comparison form. + // For saved state this also hashes legacy full-content entries on the fly; the + // on-disk entry is rewritten compactly on the next save (lazy migration). + savedState, err = adapter.CompactState(savedState) + if err != nil { + logdiag.LogError(ctx, fmt.Errorf("%s: compacting saved state: %w", errorPrefix, err)) + return false + } + // Note, currently we're diffing static structs, not dynamic value. // This means for fields that contain references like ${resources.group.foo.id} we do one of the following: // for strings: comparing unresolved string like "${resoures.group.foo.id}" with actual object id. As long as IDs do not have ${...} format we're good. @@ -208,7 +217,15 @@ func (b *DeploymentBundle) CalculatePlan(ctx context.Context, client *databricks logdiag.LogError(ctx, fmt.Errorf("%s: internal error: no state cache entry found for %q", errorPrefix, resourceKey)) return false } - localDiff, err := structdiff.GetStructDiff(savedState, sv.Value, adapter.KeyedSlices()) + + // Compact a copy for comparison only; sv.Value keeps the full contents, which + // the deploy sends to the API. + localState, err := adapter.CompactState(sv.Value) + if err != nil { + logdiag.LogError(ctx, fmt.Errorf("%s: compacting local state: %w", errorPrefix, err)) + return false + } + localDiff, err := structdiff.GetStructDiff(savedState, localState, adapter.KeyedSlices()) if err != nil { logdiag.LogError(ctx, fmt.Errorf("%s: diffing local state: %w", errorPrefix, err)) return false @@ -241,7 +258,13 @@ func (b *DeploymentBundle) CalculatePlan(ctx context.Context, client *databricks return false } - remoteDiff, err = structdiff.GetStructDiff(remoteStateComparable, sv.Value, adapter.KeyedSlices()) + remoteStateComparable, err = adapter.CompactState(remoteStateComparable) + if err != nil { + logdiag.LogError(ctx, fmt.Errorf("%s: compacting remote state id=%q: %w", errorPrefix, dbentry.ID, err)) + return false + } + + remoteDiff, err = structdiff.GetStructDiff(remoteStateComparable, localState, adapter.KeyedSlices()) if err != nil { logdiag.LogError(ctx, fmt.Errorf("%s: diffing remote state: %w", errorPrefix, err)) return false diff --git a/bundle/direct/dresources/README.md b/bundle/direct/dresources/README.md index a20b68c4ebd..d904deac2d5 100644 --- a/bundle/direct/dresources/README.md +++ b/bundle/direct/dresources/README.md @@ -47,6 +47,17 @@ If the API may return a slice's elements in a different order between calls (e.g The state struct is serialized to JSON and persisted between deploys. Backward incompatible changes will result in a drift, which depending on field behaviour might result in recreate. See dstate/migrate.go on how to handle state migration. +## CompactState: storing large fields as content hashes + +Implement the optional `CompactState(state *T) (*T, error)` method when a field holds large content that is only ever compared for equality and never read back from state. It returns a copy of the state with such fields replaced by a content hash (use `hashStateValue`), and the framework applies it both before persisting state and to every value entering the diff, so stored and compared values share one form. + +A field qualifies only if **all** of the following hold: + - it is declared `ignore_remote_changes` (so it is never meaningfully compared against the remote value — typically `etag_based` drift detection), + - it is not read back from state by any code path (e.g. not consumed raw by `OverrideChangeDesc` or by state export), and + - it can be large (a small field gains nothing — the hash placeholder is ~70 bytes). + +`dashboards.serialized_dashboard` and `genie_spaces.serialized_space` use this: both inline a file's contents into state, both detect drift via `etag`, and both always send the full contents to the API from the plan's `new_state`. As a result the plan reports such fields as hashes (`sha256:...`) rather than full content. No state version bump is needed: legacy full-content state is hashed on read for comparison and rewritten compactly on the next save. Add a test asserting the field is `ignore_remote_changes` to guard the invariant. + ## OverrideChangeDesc Use `OverrideChangeDesc` only as a last resort when `resources.yml` settings cannot express the needed logic. Skipping an action with `change.Action = deployplan.Skip` in `OverrideChangeDesc` creates a silent no-op: the plan shows no change even if the user's config differs from remote. Document the skip reason clearly in both the comment and `change.Reason`. diff --git a/bundle/direct/dresources/adapter.go b/bundle/direct/dresources/adapter.go index 6891632b626..7e00cd03034 100644 --- a/bundle/direct/dresources/adapter.go +++ b/bundle/direct/dresources/adapter.go @@ -51,6 +51,13 @@ type IResource interface { // [Optional] OverrideChangeDesc can implement custom logic to update a given ChangeDesc; it is run last after built-in classifiers and field triggers. OverrideChangeDesc(ctx context.Context, path *structpath.PathNode, changedesc *ChangeDesc, remoteState any) error + // [Optional] CompactState returns a copy of state with large, equality-only fields + // (e.g. dashboards' serialized_dashboard) replaced by content hashes before the state + // is persisted and diffed. Must be idempotent. Only valid for fields that are + // ignore_remote_changes and never read back from state. + // Example: func (r *ResourceDashboard) CompactState(state *DashboardState) (*DashboardState, error) + CompactState(state any) (any, error) + // DoCreate creates a new resource from the newState. Returns id of the resource and optionally remote state. // If remote state is available as part of the operation, return it; otherwise return nil. // Example: func (r *ResourceVolume) DoCreate(ctx context.Context, newState *catalog.CreateVolumeRequestContent) (string, *catalog.VolumeInfo, error) @@ -102,6 +109,7 @@ type Adapter struct { waitAfterUpdate *calladapt.BoundCaller waitAfterDelete *calladapt.BoundCaller overrideChangeDesc *calladapt.BoundCaller + compactState *calladapt.BoundCaller doResize *calladapt.BoundCaller resourceConfig *ResourceLifecycleConfig @@ -135,6 +143,7 @@ func NewAdapter(typedNil any, resourceType string, client *databricks.WorkspaceC waitAfterUpdate: nil, waitAfterDelete: nil, overrideChangeDesc: nil, + compactState: nil, resourceConfig: GetResourceConfig(resourceType), generatedResourceConfig: GetGeneratedResourceConfig(resourceType), keyedSlices: nil, @@ -226,6 +235,11 @@ func (a *Adapter) initMethods(resource any) error { return err } + a.compactState, err = calladapt.PrepareCall(resource, reflect.TypeFor[IResource](), "CompactState") + if err != nil { + return err + } + a.doResize, err = calladapt.PrepareCall(resource, reflect.TypeFor[IResource](), "DoResize") if err != nil { return err @@ -339,6 +353,13 @@ func (a *Adapter) validate() error { validations = append(validations, "WaitAfterUpdate remoteState return", a.waitAfterUpdate.OutTypes[0], remoteType) } + if a.compactState != nil { + validations = append(validations, + "CompactState input", a.compactState.InTypes[0], stateType, + "CompactState return", a.compactState.OutTypes[0], stateType, + ) + } + err = validateTypes(validations...) if err != nil { return err @@ -556,6 +577,22 @@ func (a *Adapter) KeyedSlices() map[string]any { return a.keyedSlices } +// CompactState returns a copy of state with large, equality-only fields replaced by +// content hashes for compact storage, or state unchanged if the resource does not +// implement CompactState. It is applied both before persisting state and to every +// value entering the state diff, so stored and compared values share one form. +func (a *Adapter) CompactState(state any) (any, error) { + if a.compactState == nil { + return state, nil + } + + outs, err := a.compactState.Call(state) + if err != nil { + return nil, err + } + return outs[0], nil +} + // prepareCallRequired prepares a call and ensures the method is found. func prepareCallRequired(resource any, methodName string) (*calladapt.BoundCaller, error) { caller, err := calladapt.PrepareCall(resource, reflect.TypeFor[IResource](), methodName) diff --git a/bundle/direct/dresources/dashboard.go b/bundle/direct/dresources/dashboard.go index dbf492a0bee..67fd8364a07 100644 --- a/bundle/direct/dresources/dashboard.go +++ b/bundle/direct/dresources/dashboard.go @@ -106,6 +106,22 @@ func (r *ResourceDashboard) RemapState(state *DashboardState) *DashboardState { } } +// CompactState replaces the inlined serialized_dashboard contents with a content +// hash before the state is persisted. The full contents are never needed back from +// state: remote drift is detected via etag (serialized_dashboard is ignore_remote_changes, +// see resources.yml), and a deploy always sends the contents from the plan's new_state, +// never from saved state. Hashing keeps resources.json small for dashboards whose +// serialized form can be several megabytes. +func (r *ResourceDashboard) CompactState(state *DashboardState) (*DashboardState, error) { + hashed, err := hashStateValue(state.SerializedDashboard) + if err != nil { + return nil, err + } + compacted := *state + compacted.SerializedDashboard = hashed + return &compacted, nil +} + func (r *ResourceDashboard) DoRead(ctx context.Context, id string) (*DashboardState, error) { var dashboard *dashboards.Dashboard var publishedDashboard *dashboards.PublishedDashboard diff --git a/bundle/direct/dresources/dashboard_test.go b/bundle/direct/dresources/dashboard_test.go index 177e1236221..2b1ece93566 100644 --- a/bundle/direct/dresources/dashboard_test.go +++ b/bundle/direct/dresources/dashboard_test.go @@ -2,9 +2,11 @@ package dresources import ( "encoding/json" + "strings" "testing" "github.com/databricks/cli/bundle/config/resources" + "github.com/databricks/cli/libs/structs/structpath" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -23,3 +25,48 @@ func TestDashboardState_JSONSerialization_PublishedField(t *testing.T) { assert.Contains(t, string(data), `"published":true`) } + +func TestDashboardCompactState(t *testing.T) { + r := &ResourceDashboard{} + state := &DashboardState{ + DashboardConfig: resources.DashboardConfig{ + DisplayName: "test-dashboard", + Etag: "etag-123", + SerializedDashboard: `{"pages":[{"name":"p1"}]}`, + }, + } + + compacted, err := r.CompactState(state) + require.NoError(t, err) + + // serialized_dashboard is replaced by a content hash; other fields are preserved. + require.IsType(t, "", compacted.SerializedDashboard) + assert.True(t, strings.HasPrefix(compacted.SerializedDashboard.(string), stateHashPrefix)) + assert.Equal(t, "test-dashboard", compacted.DisplayName) + assert.Equal(t, "etag-123", compacted.Etag) + + // The original state is not mutated. + assert.Equal(t, `{"pages":[{"name":"p1"}]}`, state.SerializedDashboard) + + // Compacting is idempotent. + again, err := r.CompactState(compacted) + require.NoError(t, err) + assert.Equal(t, compacted.SerializedDashboard, again.SerializedDashboard) +} + +// TestDashboardSerializedDashboardIsIgnoreRemoteChanges guards the SHA-only invariant: +// because serialized_dashboard is stored as a content hash, it must never be compared +// against the (full-content) remote value, i.e. it must be declared ignore_remote_changes. +func TestDashboardSerializedDashboardIsIgnoreRemoteChanges(t *testing.T) { + cfg := GetResourceConfig("dashboards") + path := structpath.NewStringKey(nil, "serialized_dashboard") + + found := false + for _, rule := range cfg.IgnoreRemoteChanges { + if path.HasPatternPrefix(rule.Field) { + found = true + break + } + } + assert.True(t, found, "serialized_dashboard must be ignore_remote_changes for SHA-only state to be correct") +} diff --git a/bundle/direct/dresources/genie_space.go b/bundle/direct/dresources/genie_space.go index 16aa0a57e8a..fcb4f8b5905 100644 --- a/bundle/direct/dresources/genie_space.go +++ b/bundle/direct/dresources/genie_space.go @@ -58,6 +58,21 @@ func (r *ResourceGenieSpace) RemapState(remote *resources.GenieSpaceConfig) *res } } +// CompactState replaces the inlined serialized_space contents with a content hash +// before the state is persisted. As with dashboards, the full contents are never +// needed back from state: drift is detected via etag (serialized_space is +// ignore_remote_changes, see resources.yml), and a deploy always sends the contents +// from the plan's new_state, never from saved state. +func (r *ResourceGenieSpace) CompactState(state *resources.GenieSpaceConfig) (*resources.GenieSpaceConfig, error) { + hashed, err := hashStateValue(state.SerializedSpace) + if err != nil { + return nil, err + } + compacted := *state + compacted.SerializedSpace = hashed + return &compacted, nil +} + func (r *ResourceGenieSpace) DoRead(ctx context.Context, id string) (*resources.GenieSpaceConfig, error) { space, err := r.client.Genie.GetSpace(ctx, dashboards.GenieGetSpaceRequest{ SpaceId: id, diff --git a/bundle/direct/dresources/genie_space_test.go b/bundle/direct/dresources/genie_space_test.go index f77289083d3..c298d6f7ebd 100644 --- a/bundle/direct/dresources/genie_space_test.go +++ b/bundle/direct/dresources/genie_space_test.go @@ -1,6 +1,7 @@ package dresources import ( + "strings" "testing" "github.com/databricks/cli/bundle/config/resources" @@ -199,3 +200,40 @@ func TestGenieSpaceOverrideChangeDescEtag(t *testing.T) { assert.Equal(t, deployplan.Update, change.Action) }) } + +func TestGenieSpaceCompactState(t *testing.T) { + r := &ResourceGenieSpace{} + state := &resources.GenieSpaceConfig{ + Title: "test-space", + Etag: "etag-7", + SerializedSpace: `{"datasets":[{"name":"d1"}]}`, + } + + compacted, err := r.CompactState(state) + require.NoError(t, err) + + require.IsType(t, "", compacted.SerializedSpace) + assert.True(t, strings.HasPrefix(compacted.SerializedSpace.(string), stateHashPrefix)) + assert.Equal(t, "test-space", compacted.Title) + assert.Equal(t, "etag-7", compacted.Etag) + + // The original state is not mutated. + assert.Equal(t, `{"datasets":[{"name":"d1"}]}`, state.SerializedSpace) +} + +// TestGenieSpaceSerializedSpaceIsIgnoreRemoteChanges guards the SHA-only invariant: +// serialized_space is stored as a content hash, so it must never be compared against +// the remote value, i.e. it must be declared ignore_remote_changes. +func TestGenieSpaceSerializedSpaceIsIgnoreRemoteChanges(t *testing.T) { + cfg := GetResourceConfig("genie_spaces") + path := structpath.NewStringKey(nil, "serialized_space") + + found := false + for _, rule := range cfg.IgnoreRemoteChanges { + if path.HasPatternPrefix(rule.Field) { + found = true + break + } + } + assert.True(t, found, "serialized_space must be ignore_remote_changes for SHA-only state to be correct") +} diff --git a/bundle/direct/dresources/state_compaction.go b/bundle/direct/dresources/state_compaction.go new file mode 100644 index 00000000000..691255137c1 --- /dev/null +++ b/bundle/direct/dresources/state_compaction.go @@ -0,0 +1,40 @@ +package dresources + +import ( + "crypto/sha256" + "encoding/hex" + "encoding/json" + "strings" +) + +// stateHashPrefix marks a state value that holds a content hash instead of the +// full content. It is part of the on-disk state format, so changing it is a +// backward-incompatible change. +const stateHashPrefix = "sha256:" + +// hashStateValue returns a content-hash placeholder ("sha256:") over the JSON +// encoding of v. It is used to store large, equality-only fields (e.g. a dashboard's +// serialized_dashboard) compactly in state instead of their full contents. +// +// It is idempotent and stable: nil, an empty string, and a value that is already a +// placeholder are returned unchanged, so re-compacting an already-compact state and +// comparing a fresh config against stored state both behave predictably. +func hashStateValue(v any) (any, error) { + if s, ok := v.(string); ok { + if s == "" || strings.HasPrefix(s, stateHashPrefix) { + return v, nil + } + } + if v == nil { + return v, nil + } + + // json.Marshal is deterministic (map keys are sorted), so equal content always + // produces an equal hash across runs and platforms. + data, err := json.Marshal(v) + if err != nil { + return nil, err + } + sum := sha256.Sum256(data) + return stateHashPrefix + hex.EncodeToString(sum[:]), nil +} diff --git a/bundle/direct/dresources/state_compaction_test.go b/bundle/direct/dresources/state_compaction_test.go new file mode 100644 index 00000000000..d2c5d8b6846 --- /dev/null +++ b/bundle/direct/dresources/state_compaction_test.go @@ -0,0 +1,53 @@ +package dresources + +import ( + "strings" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestHashStateValue(t *testing.T) { + stringHash, err := hashStateValue("hello") + require.NoError(t, err) + require.IsType(t, "", stringHash) + assert.True(t, strings.HasPrefix(stringHash.(string), stateHashPrefix)) + + // Same content always hashes to the same value. + again, err := hashStateValue("hello") + require.NoError(t, err) + assert.Equal(t, stringHash, again) + + // Different content hashes differently. + other, err := hashStateValue("world") + require.NoError(t, err) + assert.NotEqual(t, stringHash, other) + + // A map hashes deterministically regardless of literal key order. + mapHash, err := hashStateValue(map[string]any{"a": 1, "b": 2}) + require.NoError(t, err) + mapHash2, err := hashStateValue(map[string]any{"b": 2, "a": 1}) + require.NoError(t, err) + assert.Equal(t, mapHash, mapHash2) +} + +func TestHashStateValueIdempotent(t *testing.T) { + hashed, err := hashStateValue("some content") + require.NoError(t, err) + + // Re-hashing a placeholder returns it unchanged. + again, err := hashStateValue(hashed) + require.NoError(t, err) + assert.Equal(t, hashed, again) +} + +func TestHashStateValueEmptyAndNil(t *testing.T) { + empty, err := hashStateValue("") + require.NoError(t, err) + assert.Empty(t, empty) + + null, err := hashStateValue(nil) + require.NoError(t, err) + assert.Nil(t, null) +}