From 4066dd65f0bec721d206a934347c134a5a55d4dd Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Mon, 15 Jun 2026 14:24:24 +0000 Subject: [PATCH] direct: store serialized_dashboard/serialized_space in state as content hashes The direct deploy engine persists the full planned config per resource in resources.json. For dashboards and genie spaces, that includes the inlined serialized_dashboard / serialized_space contents, which routinely run into the hundreds of KB to several MB. These fields are never read back from state: drift is detected via etag (they are ignore_remote_changes), and a deploy always sends the contents from the plan's new_state, never from saved state. Add an optional CompactState adapter method that replaces such equality-only fields with a "sha256:" content hash. The framework applies it both before persisting state and to every value entering the diff, so stored and compared values share one form and unchanged content still produces an equal-hash skip. The plan's new_state (sent to the API) and the raw remote_state snapshot keep full content. No state version bump: legacy full-content state is hashed on read for the comparison and rewritten compactly on the next save. Co-authored-by: Isaac --- .../out.state_after_bind.direct.json | 2 +- .../migrate/dashboards/out.new_state.json | 2 +- .../dashboards/out.plan_after_migrate.json | 6 +-- .../detect-change/out.plan.direct.json | 6 +-- .../dashboards/simple/out.plan.direct.json | 6 +-- .../out.plan.direct.json | 6 +-- .../genie_spaces/inline/out.plan.json | 6 +-- .../genie_spaces/version_migration/output.txt | 3 +- .../genie_spaces/version_migration/script | 9 ++-- bundle/direct/apply.go | 18 +++++-- bundle/direct/bind.go | 11 +++- bundle/direct/bundle_apply.go | 2 +- bundle/direct/bundle_plan.go | 27 +++++++++- bundle/direct/dresources/README.md | 11 ++++ bundle/direct/dresources/adapter.go | 37 +++++++++++++ bundle/direct/dresources/dashboard.go | 16 ++++++ bundle/direct/dresources/dashboard_test.go | 47 ++++++++++++++++ bundle/direct/dresources/genie_space.go | 15 ++++++ bundle/direct/dresources/genie_space_test.go | 38 +++++++++++++ bundle/direct/dresources/state_compaction.go | 40 ++++++++++++++ .../dresources/state_compaction_test.go | 53 +++++++++++++++++++ 21 files changed, 331 insertions(+), 30 deletions(-) create mode 100644 bundle/direct/dresources/state_compaction.go create mode 100644 bundle/direct/dresources/state_compaction_test.go diff --git a/acceptance/bundle/deployment/bind/dashboard/recreation/out.state_after_bind.direct.json b/acceptance/bundle/deployment/bind/dashboard/recreation/out.state_after_bind.direct.json index 771dd3f908b..948a7e838a9 100644 --- a/acceptance/bundle/deployment/bind/dashboard/recreation/out.state_after_bind.direct.json +++ b/acceptance/bundle/deployment/bind/dashboard/recreation/out.state_after_bind.direct.json @@ -12,7 +12,7 @@ "etag": [ETAG], "parent_path": "/Workspace/Users/[USERNAME]/.bundle/test-bundle-[UNIQUE_NAME]/default/resources", "published": true, - "serialized_dashboard": "{\"pages\":[{\"displayName\":\"Page One\",\"name\":\"02724bf2\"}]}", + "serialized_dashboard": "sha256:[DASHBOARD_ID][DASHBOARD_ID]", "warehouse_id": "[TEST_DEFAULT_WAREHOUSE_ID]" } } diff --git a/acceptance/bundle/migrate/dashboards/out.new_state.json b/acceptance/bundle/migrate/dashboards/out.new_state.json index 695d14602a7..0cb70b8172c 100644 --- a/acceptance/bundle/migrate/dashboards/out.new_state.json +++ b/acceptance/bundle/migrate/dashboards/out.new_state.json @@ -12,7 +12,7 @@ "etag": "[NUMID]", "parent_path": "/Workspace/Users/[USERNAME]", "published": true, - "serialized_dashboard": "{\"pages\":[{\"name\":\"02724bf2\",\"displayName\":\"Dashboard test bundle-deploy-dashboard\"}]}\n", + "serialized_dashboard": "sha256:9dc171d249749a592c717bb13f2d948d2f1fa5530b82ac308a3df0c48d347ef3", "warehouse_id": "123456" } } diff --git a/acceptance/bundle/migrate/dashboards/out.plan_after_migrate.json b/acceptance/bundle/migrate/dashboards/out.plan_after_migrate.json index 6b55f64bd8d..3cdd9cc82f5 100644 --- a/acceptance/bundle/migrate/dashboards/out.plan_after_migrate.json +++ b/acceptance/bundle/migrate/dashboards/out.plan_after_migrate.json @@ -30,9 +30,9 @@ "serialized_dashboard": { "action": "skip", "reason": "etag_based", - "old": "{\"pages\":[{\"name\":\"02724bf2\",\"displayName\":\"Dashboard test bundle-deploy-dashboard\"}]}\n", - "new": "{\"pages\":[{\"name\":\"02724bf2\",\"displayName\":\"Dashboard test bundle-deploy-dashboard\"}]}\n", - "remote": "{\"pages\":[{\"displayName\":\"Dashboard test bundle-deploy-dashboard\",\"name\":\"02724bf2\",\"pageType\":\"PAGE_TYPE_CANVAS\"}]}\n" + "old": "sha256:9dc171d249749a592c717bb13f2d948d2f1fa5530b82ac308a3df0c48d347ef3", + "new": "sha256:9dc171d249749a592c717bb13f2d948d2f1fa5530b82ac308a3df0c48d347ef3", + "remote": "sha256:aae99d4284b3390b1b35974f7bd4c03603cacd6a5a4a838ca563f652257ab6a7" } } } diff --git a/acceptance/bundle/resources/dashboards/detect-change/out.plan.direct.json b/acceptance/bundle/resources/dashboards/detect-change/out.plan.direct.json index 7af15357c25..eaaafcb6c33 100644 --- a/acceptance/bundle/resources/dashboards/detect-change/out.plan.direct.json +++ b/acceptance/bundle/resources/dashboards/detect-change/out.plan.direct.json @@ -39,9 +39,9 @@ "serialized_dashboard": { "action": "skip", "reason": "etag_based", - "old": "{\n \"pages\": [\n {\n \"displayName\": \"New Page\",\n \"layout\": [\n {\n \"position\": {\n \"height\": 2,\n \"width\": 6,\n \"x\": 0,\n \"y\": 0\n },\n \"widget\": {\n \"name\": \"82eb9107\",\n \"textbox_spec\": \"# I'm a title\"\n }\n },\n {\n \"position\": {\n \"height\": 2,\n \"width\": 6,\n \"x\": 0,\n \"y\": 2\n },\n \"widget\": {\n \"name\": \"ffa6de4f\",\n \"textbox_spec\": \"Text\"\n }\n }\n ],\n \"name\": \"fdd21a3c\"\n }\n ]\n}\n", - "new": "{\n \"pages\": [\n {\n \"displayName\": \"New Page\",\n \"layout\": [\n {\n \"position\": {\n \"height\": 2,\n \"width\": 6,\n \"x\": 0,\n \"y\": 0\n },\n \"widget\": {\n \"name\": \"82eb9107\",\n \"textbox_spec\": \"# I'm a title\"\n }\n },\n {\n \"position\": {\n \"height\": 2,\n \"width\": 6,\n \"x\": 0,\n \"y\": 2\n },\n \"widget\": {\n \"name\": \"ffa6de4f\",\n \"textbox_spec\": \"Text\"\n }\n }\n ],\n \"name\": \"fdd21a3c\"\n }\n ]\n}\n", - "remote": "{}\n" + "old": "sha256:[ALPHANUMID]", + "new": "sha256:[ALPHANUMID]", + "remote": "sha256:[ALPHANUMID]" } } } diff --git a/acceptance/bundle/resources/dashboards/simple/out.plan.direct.json b/acceptance/bundle/resources/dashboards/simple/out.plan.direct.json index 59ab070e00e..aaade035814 100644 --- a/acceptance/bundle/resources/dashboards/simple/out.plan.direct.json +++ b/acceptance/bundle/resources/dashboards/simple/out.plan.direct.json @@ -30,9 +30,9 @@ "serialized_dashboard": { "action": "skip", "reason": "etag_based", - "old": "{ }\n", - "new": "{ }\n", - "remote": "{}\n" + "old": "sha256:cf207804e[NUMID]becbb5703765c4978cd7a8466dbdd[NUMID]e571ad09", + "new": "sha256:cf207804e[NUMID]becbb5703765c4978cd7a8466dbdd[NUMID]e571ad09", + "remote": "sha256:bb511d1e2723214ac1c3710d[NUMID]d57e31b0145d33d427d9628b209be38" } } } diff --git a/acceptance/bundle/resources/dashboards/unpublish-out-of-band/out.plan.direct.json b/acceptance/bundle/resources/dashboards/unpublish-out-of-band/out.plan.direct.json index 558a4ddcfd8..77174a8a0e9 100644 --- a/acceptance/bundle/resources/dashboards/unpublish-out-of-band/out.plan.direct.json +++ b/acceptance/bundle/resources/dashboards/unpublish-out-of-band/out.plan.direct.json @@ -46,9 +46,9 @@ "serialized_dashboard": { "action": "skip", "reason": "etag_based", - "old": "{\"pages\":[{\"displayName\":\"Test Dashboard\",\"name\":\"test-page\"}]}", - "new": "{\"pages\":[{\"displayName\":\"Test Dashboard\",\"name\":\"test-page\"}]}", - "remote": "{\"pages\":[{\"displayName\":\"Test Dashboard\",\"name\":\"test-page\",\"pageType\":\"PAGE_TYPE_CANVAS\"}]}" + "old": "sha256:a196fded8b5e0ebe5af2c6bed934d90fe055644de7773bbd84c851b577ff4f19", + "new": "sha256:a196fded8b5e0ebe5af2c6bed934d90fe055644de7773bbd84c851b577ff4f19", + "remote": "sha256:df70601c54be8ac2a66ac4a01dbd5224f4fa20ca9e8d15a615beab794cfdca08" } } } diff --git a/acceptance/bundle/resources/genie_spaces/inline/out.plan.json b/acceptance/bundle/resources/genie_spaces/inline/out.plan.json index 975688268b2..3569622b6d0 100644 --- a/acceptance/bundle/resources/genie_spaces/inline/out.plan.json +++ b/acceptance/bundle/resources/genie_spaces/inline/out.plan.json @@ -24,9 +24,9 @@ "serialized_space": { "action": "skip", "reason": "etag_based", - "old": "{\"config\":{\"sample_questions\":[{\"id\":\"sq-001\",\"question\":[\"What is the total revenue?\"]}]},\"data_sources\":{\"tables\":[{\"column_configs\":[{\"column_name\":\"amount\",\"get_example_values\":true}],\"identifier\":\"main.sales.orders\"}]},\"version\":1}", - "new": "{\"config\":{\"sample_questions\":[{\"id\":\"sq-001\",\"question\":[\"What is the total revenue?\"]}]},\"data_sources\":{\"tables\":[{\"column_configs\":[{\"column_name\":\"amount\",\"get_example_values\":true}],\"identifier\":\"main.sales.orders\"}]},\"version\":1}", - "remote": "{\"config\":{\"sample_questions\":[{\"id\":\"sq-001\",\"question\":[\"What is the total revenue?\"]}]},\"data_sources\":{\"tables\":[{\"column_configs\":[{\"column_name\":\"amount\",\"get_example_values\":true}],\"identifier\":\"main.sales.orders\"}]},\"version\":2}" + "old": "sha256:ea4b505f061a2b65db49891ea5cab6ac875350aa7a8afed4778860a50d8f4db5", + "new": "sha256:ea4b505f061a2b65db49891ea5cab6ac875350aa7a8afed4778860a50d8f4db5", + "remote": "sha256:8df899b73697f19d25ed51f3a97ddce65f97d61cd7fc3b402aef349eb2fe93e8" } } } diff --git a/acceptance/bundle/resources/genie_spaces/version_migration/output.txt b/acceptance/bundle/resources/genie_spaces/version_migration/output.txt index e554b79825c..74443ccd812 100644 --- a/acceptance/bundle/resources/genie_spaces/version_migration/output.txt +++ b/acceptance/bundle/resources/genie_spaces/version_migration/output.txt @@ -12,8 +12,7 @@ Plan: 0 to add, 0 to change, 0 to delete, 1 unchanged === serialized_space: local version 1, remote version 2, skipped (etag_based){ "action": "skip", "reason": "etag_based", - "local_version": 1, - "remote_version": 2 + "local_remote_differ": true } >>> [CLI] bundle destroy --auto-approve diff --git a/acceptance/bundle/resources/genie_spaces/version_migration/script b/acceptance/bundle/resources/genie_spaces/version_migration/script index 141efd5bef6..116d81a72c4 100644 --- a/acceptance/bundle/resources/genie_spaces/version_migration/script +++ b/acceptance/bundle/resources/genie_spaces/version_migration/script @@ -18,9 +18,12 @@ trace $CLI bundle plan # The mismatch the etag masks: the local config stays version 1 while the remote # (and the etag guarding it) is version 2. The serialized_space change is # recorded but skipped via the etag_based rule. +# +# serialized_space is stored in state as a content hash (it is never compared +# against the remote value), so the plan reports old/new/remote as hashes, not +# the full content. We assert that local and remote differ and that the change is +# skipped, rather than parsing version numbers out of the content. title "serialized_space: local version 1, remote version 2, skipped (etag_based)" $CLI bundle plan -o json \ | jq '.plan["resources.genie_spaces.foo"].changes.serialized_space - | {action, reason, - local_version: (.new | fromjson | .version), - remote_version: (.remote | fromjson | .version)}' + | {action, reason, local_remote_differ: (.new != .remote)}' diff --git a/bundle/direct/apply.go b/bundle/direct/apply.go index a4a61f727f2..14d85ed6d2d 100644 --- a/bundle/direct/apply.go +++ b/bundle/direct/apply.go @@ -50,6 +50,16 @@ func (d *DeploymentUnit) Deploy(ctx context.Context, db *dstate.DeploymentState, } } +// saveState compacts the state (replacing large equality-only fields with content +// hashes, see Adapter.CompactState) before persisting it. +func (d *DeploymentUnit) saveState(db *dstate.DeploymentState, id string, newState any) error { + compacted, err := d.Adapter.CompactState(newState) + if err != nil { + return fmt.Errorf("compacting state: %w", err) + } + return db.SaveState(d.ResourceKey, id, compacted, d.DependsOn) +} + func (d *DeploymentUnit) Create(ctx context.Context, db *dstate.DeploymentState, newState any) error { var newID string var remoteState any @@ -75,7 +85,7 @@ func (d *DeploymentUnit) Create(ctx context.Context, db *dstate.DeploymentState, return err } - err = db.SaveState(d.ResourceKey, newID, newState, d.DependsOn) + err = d.saveState(db, newID, newState) if err != nil { return fmt.Errorf("saving state after creating id=%s: %w", newID, err) } @@ -146,7 +156,7 @@ func (d *DeploymentUnit) Update(ctx context.Context, db *dstate.DeploymentState, return err } - err = db.SaveState(d.ResourceKey, id, newState, d.DependsOn) + err = d.saveState(db, id, newState) if err != nil { return fmt.Errorf("saving state id=%s: %w", id, err) } @@ -190,7 +200,7 @@ func (d *DeploymentUnit) UpdateWithID(ctx context.Context, db *dstate.Deployment return err } - err = db.SaveState(d.ResourceKey, newID, newState, d.DependsOn) + err = d.saveState(db, newID, newState) if err != nil { return fmt.Errorf("saving state id=%s: %w", oldID, err) } @@ -252,7 +262,7 @@ func (d *DeploymentUnit) Resize(ctx context.Context, db *dstate.DeploymentState, return fmt.Errorf("resizing id=%s: %w", id, err) } - err = db.SaveState(d.ResourceKey, id, newState, d.DependsOn) + err = d.saveState(db, id, newState) if err != nil { return fmt.Errorf("saving state id=%s: %w", id, err) } diff --git a/bundle/direct/bind.go b/bundle/direct/bind.go index 9760ce95666..1b8e84591da 100644 --- a/bundle/direct/bind.go +++ b/bundle/direct/bind.go @@ -145,13 +145,22 @@ func (b *DeploymentBundle) Bind(ctx context.Context, client *databricks.Workspac } } + adapter, err := b.getAdapterForKey(resourceKey) + if err != nil { + return nil, err + } + compacted, err := adapter.CompactState(sv.Value) + if err != nil { + return nil, fmt.Errorf("compacting state: %w", err) + } + err = b.StateDB.Open(ctx, tmpStatePath, dstate.WithRecovery(true), dstate.WithWrite(true)) if err != nil { os.Remove(tmpStatePath) return nil, err } - err = b.StateDB.SaveState(resourceKey, resourceID, sv.Value, dependsOn) + err = b.StateDB.SaveState(resourceKey, resourceID, compacted, dependsOn) if err != nil { os.Remove(tmpStatePath) return nil, err diff --git a/bundle/direct/bundle_apply.go b/bundle/direct/bundle_apply.go index a9981ee63d8..8cf4565f170 100644 --- a/bundle/direct/bundle_apply.go +++ b/bundle/direct/bundle_apply.go @@ -130,7 +130,7 @@ func (b *DeploymentBundle) Apply(ctx context.Context, client *databricks.Workspa logdiag.LogError(ctx, fmt.Errorf("state entry not found for %q", resourceKey)) return false } - err = b.StateDB.SaveState(resourceKey, id, sv.Value, entry.DependsOn) + err = d.saveState(&b.StateDB, id, sv.Value) } else { // TODO: redo calcDiff to downgrade planned action if possible (?) err = d.Deploy(ctx, &b.StateDB, sv.Value, action, entry) diff --git a/bundle/direct/bundle_plan.go b/bundle/direct/bundle_plan.go index 552d2067cc3..187c26cbf19 100644 --- a/bundle/direct/bundle_plan.go +++ b/bundle/direct/bundle_plan.go @@ -197,6 +197,15 @@ func (b *DeploymentBundle) CalculatePlan(ctx context.Context, client *databricks return false } + // Compact every value that enters the diff so they share one comparison form. + // For saved state this also hashes legacy full-content entries on the fly; the + // on-disk entry is rewritten compactly on the next save (lazy migration). + savedState, err = adapter.CompactState(savedState) + if err != nil { + logdiag.LogError(ctx, fmt.Errorf("%s: compacting saved state: %w", errorPrefix, err)) + return false + } + // Note, currently we're diffing static structs, not dynamic value. // This means for fields that contain references like ${resources.group.foo.id} we do one of the following: // for strings: comparing unresolved string like "${resoures.group.foo.id}" with actual object id. As long as IDs do not have ${...} format we're good. @@ -208,7 +217,15 @@ func (b *DeploymentBundle) CalculatePlan(ctx context.Context, client *databricks logdiag.LogError(ctx, fmt.Errorf("%s: internal error: no state cache entry found for %q", errorPrefix, resourceKey)) return false } - localDiff, err := structdiff.GetStructDiff(savedState, sv.Value, adapter.KeyedSlices()) + + // Compact a copy for comparison only; sv.Value keeps the full contents, which + // the deploy sends to the API. + localState, err := adapter.CompactState(sv.Value) + if err != nil { + logdiag.LogError(ctx, fmt.Errorf("%s: compacting local state: %w", errorPrefix, err)) + return false + } + localDiff, err := structdiff.GetStructDiff(savedState, localState, adapter.KeyedSlices()) if err != nil { logdiag.LogError(ctx, fmt.Errorf("%s: diffing local state: %w", errorPrefix, err)) return false @@ -241,7 +258,13 @@ func (b *DeploymentBundle) CalculatePlan(ctx context.Context, client *databricks return false } - remoteDiff, err = structdiff.GetStructDiff(remoteStateComparable, sv.Value, adapter.KeyedSlices()) + remoteStateComparable, err = adapter.CompactState(remoteStateComparable) + if err != nil { + logdiag.LogError(ctx, fmt.Errorf("%s: compacting remote state id=%q: %w", errorPrefix, dbentry.ID, err)) + return false + } + + remoteDiff, err = structdiff.GetStructDiff(remoteStateComparable, localState, adapter.KeyedSlices()) if err != nil { logdiag.LogError(ctx, fmt.Errorf("%s: diffing remote state: %w", errorPrefix, err)) return false diff --git a/bundle/direct/dresources/README.md b/bundle/direct/dresources/README.md index a20b68c4ebd..d904deac2d5 100644 --- a/bundle/direct/dresources/README.md +++ b/bundle/direct/dresources/README.md @@ -47,6 +47,17 @@ If the API may return a slice's elements in a different order between calls (e.g The state struct is serialized to JSON and persisted between deploys. Backward incompatible changes will result in a drift, which depending on field behaviour might result in recreate. See dstate/migrate.go on how to handle state migration. +## CompactState: storing large fields as content hashes + +Implement the optional `CompactState(state *T) (*T, error)` method when a field holds large content that is only ever compared for equality and never read back from state. It returns a copy of the state with such fields replaced by a content hash (use `hashStateValue`), and the framework applies it both before persisting state and to every value entering the diff, so stored and compared values share one form. + +A field qualifies only if **all** of the following hold: + - it is declared `ignore_remote_changes` (so it is never meaningfully compared against the remote value — typically `etag_based` drift detection), + - it is not read back from state by any code path (e.g. not consumed raw by `OverrideChangeDesc` or by state export), and + - it can be large (a small field gains nothing — the hash placeholder is ~70 bytes). + +`dashboards.serialized_dashboard` and `genie_spaces.serialized_space` use this: both inline a file's contents into state, both detect drift via `etag`, and both always send the full contents to the API from the plan's `new_state`. As a result the plan reports such fields as hashes (`sha256:...`) rather than full content. No state version bump is needed: legacy full-content state is hashed on read for comparison and rewritten compactly on the next save. Add a test asserting the field is `ignore_remote_changes` to guard the invariant. + ## OverrideChangeDesc Use `OverrideChangeDesc` only as a last resort when `resources.yml` settings cannot express the needed logic. Skipping an action with `change.Action = deployplan.Skip` in `OverrideChangeDesc` creates a silent no-op: the plan shows no change even if the user's config differs from remote. Document the skip reason clearly in both the comment and `change.Reason`. diff --git a/bundle/direct/dresources/adapter.go b/bundle/direct/dresources/adapter.go index 6891632b626..7e00cd03034 100644 --- a/bundle/direct/dresources/adapter.go +++ b/bundle/direct/dresources/adapter.go @@ -51,6 +51,13 @@ type IResource interface { // [Optional] OverrideChangeDesc can implement custom logic to update a given ChangeDesc; it is run last after built-in classifiers and field triggers. OverrideChangeDesc(ctx context.Context, path *structpath.PathNode, changedesc *ChangeDesc, remoteState any) error + // [Optional] CompactState returns a copy of state with large, equality-only fields + // (e.g. dashboards' serialized_dashboard) replaced by content hashes before the state + // is persisted and diffed. Must be idempotent. Only valid for fields that are + // ignore_remote_changes and never read back from state. + // Example: func (r *ResourceDashboard) CompactState(state *DashboardState) (*DashboardState, error) + CompactState(state any) (any, error) + // DoCreate creates a new resource from the newState. Returns id of the resource and optionally remote state. // If remote state is available as part of the operation, return it; otherwise return nil. // Example: func (r *ResourceVolume) DoCreate(ctx context.Context, newState *catalog.CreateVolumeRequestContent) (string, *catalog.VolumeInfo, error) @@ -102,6 +109,7 @@ type Adapter struct { waitAfterUpdate *calladapt.BoundCaller waitAfterDelete *calladapt.BoundCaller overrideChangeDesc *calladapt.BoundCaller + compactState *calladapt.BoundCaller doResize *calladapt.BoundCaller resourceConfig *ResourceLifecycleConfig @@ -135,6 +143,7 @@ func NewAdapter(typedNil any, resourceType string, client *databricks.WorkspaceC waitAfterUpdate: nil, waitAfterDelete: nil, overrideChangeDesc: nil, + compactState: nil, resourceConfig: GetResourceConfig(resourceType), generatedResourceConfig: GetGeneratedResourceConfig(resourceType), keyedSlices: nil, @@ -226,6 +235,11 @@ func (a *Adapter) initMethods(resource any) error { return err } + a.compactState, err = calladapt.PrepareCall(resource, reflect.TypeFor[IResource](), "CompactState") + if err != nil { + return err + } + a.doResize, err = calladapt.PrepareCall(resource, reflect.TypeFor[IResource](), "DoResize") if err != nil { return err @@ -339,6 +353,13 @@ func (a *Adapter) validate() error { validations = append(validations, "WaitAfterUpdate remoteState return", a.waitAfterUpdate.OutTypes[0], remoteType) } + if a.compactState != nil { + validations = append(validations, + "CompactState input", a.compactState.InTypes[0], stateType, + "CompactState return", a.compactState.OutTypes[0], stateType, + ) + } + err = validateTypes(validations...) if err != nil { return err @@ -556,6 +577,22 @@ func (a *Adapter) KeyedSlices() map[string]any { return a.keyedSlices } +// CompactState returns a copy of state with large, equality-only fields replaced by +// content hashes for compact storage, or state unchanged if the resource does not +// implement CompactState. It is applied both before persisting state and to every +// value entering the state diff, so stored and compared values share one form. +func (a *Adapter) CompactState(state any) (any, error) { + if a.compactState == nil { + return state, nil + } + + outs, err := a.compactState.Call(state) + if err != nil { + return nil, err + } + return outs[0], nil +} + // prepareCallRequired prepares a call and ensures the method is found. func prepareCallRequired(resource any, methodName string) (*calladapt.BoundCaller, error) { caller, err := calladapt.PrepareCall(resource, reflect.TypeFor[IResource](), methodName) diff --git a/bundle/direct/dresources/dashboard.go b/bundle/direct/dresources/dashboard.go index dbf492a0bee..67fd8364a07 100644 --- a/bundle/direct/dresources/dashboard.go +++ b/bundle/direct/dresources/dashboard.go @@ -106,6 +106,22 @@ func (r *ResourceDashboard) RemapState(state *DashboardState) *DashboardState { } } +// CompactState replaces the inlined serialized_dashboard contents with a content +// hash before the state is persisted. The full contents are never needed back from +// state: remote drift is detected via etag (serialized_dashboard is ignore_remote_changes, +// see resources.yml), and a deploy always sends the contents from the plan's new_state, +// never from saved state. Hashing keeps resources.json small for dashboards whose +// serialized form can be several megabytes. +func (r *ResourceDashboard) CompactState(state *DashboardState) (*DashboardState, error) { + hashed, err := hashStateValue(state.SerializedDashboard) + if err != nil { + return nil, err + } + compacted := *state + compacted.SerializedDashboard = hashed + return &compacted, nil +} + func (r *ResourceDashboard) DoRead(ctx context.Context, id string) (*DashboardState, error) { var dashboard *dashboards.Dashboard var publishedDashboard *dashboards.PublishedDashboard diff --git a/bundle/direct/dresources/dashboard_test.go b/bundle/direct/dresources/dashboard_test.go index 177e1236221..2b1ece93566 100644 --- a/bundle/direct/dresources/dashboard_test.go +++ b/bundle/direct/dresources/dashboard_test.go @@ -2,9 +2,11 @@ package dresources import ( "encoding/json" + "strings" "testing" "github.com/databricks/cli/bundle/config/resources" + "github.com/databricks/cli/libs/structs/structpath" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -23,3 +25,48 @@ func TestDashboardState_JSONSerialization_PublishedField(t *testing.T) { assert.Contains(t, string(data), `"published":true`) } + +func TestDashboardCompactState(t *testing.T) { + r := &ResourceDashboard{} + state := &DashboardState{ + DashboardConfig: resources.DashboardConfig{ + DisplayName: "test-dashboard", + Etag: "etag-123", + SerializedDashboard: `{"pages":[{"name":"p1"}]}`, + }, + } + + compacted, err := r.CompactState(state) + require.NoError(t, err) + + // serialized_dashboard is replaced by a content hash; other fields are preserved. + require.IsType(t, "", compacted.SerializedDashboard) + assert.True(t, strings.HasPrefix(compacted.SerializedDashboard.(string), stateHashPrefix)) + assert.Equal(t, "test-dashboard", compacted.DisplayName) + assert.Equal(t, "etag-123", compacted.Etag) + + // The original state is not mutated. + assert.Equal(t, `{"pages":[{"name":"p1"}]}`, state.SerializedDashboard) + + // Compacting is idempotent. + again, err := r.CompactState(compacted) + require.NoError(t, err) + assert.Equal(t, compacted.SerializedDashboard, again.SerializedDashboard) +} + +// TestDashboardSerializedDashboardIsIgnoreRemoteChanges guards the SHA-only invariant: +// because serialized_dashboard is stored as a content hash, it must never be compared +// against the (full-content) remote value, i.e. it must be declared ignore_remote_changes. +func TestDashboardSerializedDashboardIsIgnoreRemoteChanges(t *testing.T) { + cfg := GetResourceConfig("dashboards") + path := structpath.NewStringKey(nil, "serialized_dashboard") + + found := false + for _, rule := range cfg.IgnoreRemoteChanges { + if path.HasPatternPrefix(rule.Field) { + found = true + break + } + } + assert.True(t, found, "serialized_dashboard must be ignore_remote_changes for SHA-only state to be correct") +} diff --git a/bundle/direct/dresources/genie_space.go b/bundle/direct/dresources/genie_space.go index 16aa0a57e8a..fcb4f8b5905 100644 --- a/bundle/direct/dresources/genie_space.go +++ b/bundle/direct/dresources/genie_space.go @@ -58,6 +58,21 @@ func (r *ResourceGenieSpace) RemapState(remote *resources.GenieSpaceConfig) *res } } +// CompactState replaces the inlined serialized_space contents with a content hash +// before the state is persisted. As with dashboards, the full contents are never +// needed back from state: drift is detected via etag (serialized_space is +// ignore_remote_changes, see resources.yml), and a deploy always sends the contents +// from the plan's new_state, never from saved state. +func (r *ResourceGenieSpace) CompactState(state *resources.GenieSpaceConfig) (*resources.GenieSpaceConfig, error) { + hashed, err := hashStateValue(state.SerializedSpace) + if err != nil { + return nil, err + } + compacted := *state + compacted.SerializedSpace = hashed + return &compacted, nil +} + func (r *ResourceGenieSpace) DoRead(ctx context.Context, id string) (*resources.GenieSpaceConfig, error) { space, err := r.client.Genie.GetSpace(ctx, dashboards.GenieGetSpaceRequest{ SpaceId: id, diff --git a/bundle/direct/dresources/genie_space_test.go b/bundle/direct/dresources/genie_space_test.go index f77289083d3..c298d6f7ebd 100644 --- a/bundle/direct/dresources/genie_space_test.go +++ b/bundle/direct/dresources/genie_space_test.go @@ -1,6 +1,7 @@ package dresources import ( + "strings" "testing" "github.com/databricks/cli/bundle/config/resources" @@ -199,3 +200,40 @@ func TestGenieSpaceOverrideChangeDescEtag(t *testing.T) { assert.Equal(t, deployplan.Update, change.Action) }) } + +func TestGenieSpaceCompactState(t *testing.T) { + r := &ResourceGenieSpace{} + state := &resources.GenieSpaceConfig{ + Title: "test-space", + Etag: "etag-7", + SerializedSpace: `{"datasets":[{"name":"d1"}]}`, + } + + compacted, err := r.CompactState(state) + require.NoError(t, err) + + require.IsType(t, "", compacted.SerializedSpace) + assert.True(t, strings.HasPrefix(compacted.SerializedSpace.(string), stateHashPrefix)) + assert.Equal(t, "test-space", compacted.Title) + assert.Equal(t, "etag-7", compacted.Etag) + + // The original state is not mutated. + assert.Equal(t, `{"datasets":[{"name":"d1"}]}`, state.SerializedSpace) +} + +// TestGenieSpaceSerializedSpaceIsIgnoreRemoteChanges guards the SHA-only invariant: +// serialized_space is stored as a content hash, so it must never be compared against +// the remote value, i.e. it must be declared ignore_remote_changes. +func TestGenieSpaceSerializedSpaceIsIgnoreRemoteChanges(t *testing.T) { + cfg := GetResourceConfig("genie_spaces") + path := structpath.NewStringKey(nil, "serialized_space") + + found := false + for _, rule := range cfg.IgnoreRemoteChanges { + if path.HasPatternPrefix(rule.Field) { + found = true + break + } + } + assert.True(t, found, "serialized_space must be ignore_remote_changes for SHA-only state to be correct") +} diff --git a/bundle/direct/dresources/state_compaction.go b/bundle/direct/dresources/state_compaction.go new file mode 100644 index 00000000000..691255137c1 --- /dev/null +++ b/bundle/direct/dresources/state_compaction.go @@ -0,0 +1,40 @@ +package dresources + +import ( + "crypto/sha256" + "encoding/hex" + "encoding/json" + "strings" +) + +// stateHashPrefix marks a state value that holds a content hash instead of the +// full content. It is part of the on-disk state format, so changing it is a +// backward-incompatible change. +const stateHashPrefix = "sha256:" + +// hashStateValue returns a content-hash placeholder ("sha256:") over the JSON +// encoding of v. It is used to store large, equality-only fields (e.g. a dashboard's +// serialized_dashboard) compactly in state instead of their full contents. +// +// It is idempotent and stable: nil, an empty string, and a value that is already a +// placeholder are returned unchanged, so re-compacting an already-compact state and +// comparing a fresh config against stored state both behave predictably. +func hashStateValue(v any) (any, error) { + if s, ok := v.(string); ok { + if s == "" || strings.HasPrefix(s, stateHashPrefix) { + return v, nil + } + } + if v == nil { + return v, nil + } + + // json.Marshal is deterministic (map keys are sorted), so equal content always + // produces an equal hash across runs and platforms. + data, err := json.Marshal(v) + if err != nil { + return nil, err + } + sum := sha256.Sum256(data) + return stateHashPrefix + hex.EncodeToString(sum[:]), nil +} diff --git a/bundle/direct/dresources/state_compaction_test.go b/bundle/direct/dresources/state_compaction_test.go new file mode 100644 index 00000000000..d2c5d8b6846 --- /dev/null +++ b/bundle/direct/dresources/state_compaction_test.go @@ -0,0 +1,53 @@ +package dresources + +import ( + "strings" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestHashStateValue(t *testing.T) { + stringHash, err := hashStateValue("hello") + require.NoError(t, err) + require.IsType(t, "", stringHash) + assert.True(t, strings.HasPrefix(stringHash.(string), stateHashPrefix)) + + // Same content always hashes to the same value. + again, err := hashStateValue("hello") + require.NoError(t, err) + assert.Equal(t, stringHash, again) + + // Different content hashes differently. + other, err := hashStateValue("world") + require.NoError(t, err) + assert.NotEqual(t, stringHash, other) + + // A map hashes deterministically regardless of literal key order. + mapHash, err := hashStateValue(map[string]any{"a": 1, "b": 2}) + require.NoError(t, err) + mapHash2, err := hashStateValue(map[string]any{"b": 2, "a": 1}) + require.NoError(t, err) + assert.Equal(t, mapHash, mapHash2) +} + +func TestHashStateValueIdempotent(t *testing.T) { + hashed, err := hashStateValue("some content") + require.NoError(t, err) + + // Re-hashing a placeholder returns it unchanged. + again, err := hashStateValue(hashed) + require.NoError(t, err) + assert.Equal(t, hashed, again) +} + +func TestHashStateValueEmptyAndNil(t *testing.T) { + empty, err := hashStateValue("") + require.NoError(t, err) + assert.Empty(t, empty) + + null, err := hashStateValue(nil) + require.NoError(t, err) + assert.Nil(t, null) +}