diff --git a/acceptance/bundle/config-remote-sync/dashboard_etag/dashboard.lvdash.json b/acceptance/bundle/config-remote-sync/dashboard_etag/dashboard.lvdash.json new file mode 100644 index 00000000000..397a9a1259c --- /dev/null +++ b/acceptance/bundle/config-remote-sync/dashboard_etag/dashboard.lvdash.json @@ -0,0 +1,34 @@ +{ + "pages": [ + { + "displayName": "New Page", + "layout": [ + { + "position": { + "height": 2, + "width": 6, + "x": 0, + "y": 0 + }, + "widget": { + "name": "82eb9107", + "textbox_spec": "# I'm a title" + } + }, + { + "position": { + "height": 2, + "width": 6, + "x": 0, + "y": 2 + }, + "widget": { + "name": "ffa6de4f", + "textbox_spec": "Text" + } + } + ], + "name": "fdd21a3c" + } + ] +} diff --git a/acceptance/bundle/config-remote-sync/dashboard_etag/databricks.yml.tmpl b/acceptance/bundle/config-remote-sync/dashboard_etag/databricks.yml.tmpl new file mode 100644 index 00000000000..26b1e7b7771 --- /dev/null +++ b/acceptance/bundle/config-remote-sync/dashboard_etag/databricks.yml.tmpl @@ -0,0 +1,9 @@ +bundle: + name: test-bundle-$UNIQUE_NAME + +resources: + dashboards: + my_dashboard: + display_name: test-dashboard-$UNIQUE_NAME + file_path: ./dashboard.lvdash.json + warehouse_id: $TEST_DEFAULT_WAREHOUSE_ID diff --git a/acceptance/bundle/config-remote-sync/dashboard_etag/out.test.toml b/acceptance/bundle/config-remote-sync/dashboard_etag/out.test.toml new file mode 100644 index 00000000000..4c2be3166c4 --- /dev/null +++ b/acceptance/bundle/config-remote-sync/dashboard_etag/out.test.toml @@ -0,0 +1,5 @@ +Local = true +Cloud = true +RequiresWarehouse = true +GOOS.windows = false +EnvMatrix.DATABRICKS_BUNDLE_ENGINE = ["direct", "terraform"] diff --git a/acceptance/bundle/config-remote-sync/dashboard_etag/output.txt b/acceptance/bundle/config-remote-sync/dashboard_etag/output.txt new file mode 100644 index 00000000000..a0f902fb002 --- /dev/null +++ b/acceptance/bundle/config-remote-sync/dashboard_etag/output.txt @@ -0,0 +1,32 @@ +Uploading bundle files to /Workspace/Users/[USERNAME]/.bundle/test-bundle-[UNIQUE_NAME]/default/files... +Deploying resources... +Updating deployment state... +Deployment complete! + +=== Modify the dashboard out of band to bump its remote etag +{ + "lifecycle_state": "ACTIVE" +} + +=== Detect changes: etag drift must not be reported + +>>> [CLI] bundle config-remote-sync +No changes detected. + + +=== Save changes: config must not be modified + +>>> [CLI] bundle config-remote-sync --save +No changes detected. + + +>>> diff.py databricks.yml.backup databricks.yml + +>>> [CLI] bundle destroy --auto-approve +The following resources will be deleted: + delete resources.dashboards.my_dashboard + +All files and directories at the following location will be deleted: /Workspace/Users/[USERNAME]/.bundle/test-bundle-[UNIQUE_NAME]/default + +Deleting files... +Destroy complete! diff --git a/acceptance/bundle/config-remote-sync/dashboard_etag/script b/acceptance/bundle/config-remote-sync/dashboard_etag/script new file mode 100644 index 00000000000..990988c5906 --- /dev/null +++ b/acceptance/bundle/config-remote-sync/dashboard_etag/script @@ -0,0 +1,28 @@ +#!/bin/bash + +envsubst < databricks.yml.tmpl > databricks.yml + +cleanup() { + trace $CLI bundle destroy --auto-approve +} +trap cleanup EXIT + +$CLI bundle deploy +dashboard_id=$(read_id.py my_dashboard) + +title "Modify the dashboard out of band to bump its remote etag" +echo +# Keep only lifecycle_state: the full response carries a nondeterministic etag +# and update_time that would churn the snapshot. +$CLI lakeview update "$dashboard_id" --json '{"serialized_dashboard": "{}", "warehouse_id": "'$TEST_DEFAULT_WAREHOUSE_ID'"}' | jq '{lifecycle_state}' + +title "Detect changes: etag drift must not be reported" +echo +trace $CLI bundle config-remote-sync + +title "Save changes: config must not be modified" +echo +cp databricks.yml databricks.yml.backup +trace $CLI bundle config-remote-sync --save +trace diff.py databricks.yml.backup databricks.yml +rm databricks.yml.backup diff --git a/acceptance/bundle/config-remote-sync/dashboard_etag/test.toml b/acceptance/bundle/config-remote-sync/dashboard_etag/test.toml new file mode 100644 index 00000000000..b0adfc8e33b --- /dev/null +++ b/acceptance/bundle/config-remote-sync/dashboard_etag/test.toml @@ -0,0 +1,12 @@ +Cloud = true +RequiresWarehouse = true + +RecordRequests = false + +Ignore = [".databricks", "databricks.yml", "databricks.yml.backup"] + +[Env] +DATABRICKS_BUNDLE_ENABLE_EXPERIMENTAL_YAML_SYNC = "true" + +[EnvMatrix] +DATABRICKS_BUNDLE_ENGINE = ["direct", "terraform"] diff --git a/bundle/configsync/defaults.go b/bundle/configsync/defaults.go index 480f90f99db..79d18162537 100644 --- a/bundle/configsync/defaults.go +++ b/bundle/configsync/defaults.go @@ -1,202 +1,122 @@ package configsync import ( - "reflect" + "slices" "strings" -) - -type ( - skipAlways struct{} - skipIfZeroOrNil struct{} - skipIfEmptyOrDefault struct { - defaults map[string]any - } -) -var ( - alwaysSkip = skipAlways{} - zeroOrNil = skipIfZeroOrNil{} - emptyEmailNotifications = skipIfEmptyOrDefault{defaults: map[string]any{"no_alert_for_skipped_runs": false}} + "github.com/databricks/cli/bundle/direct/dresources" + "github.com/databricks/cli/libs/structs/structpath" ) -// serverSideDefaults contains all hardcoded server-side defaults. -// This is a temporary solution until the bundle plan issue is resolved. -// Fields mapped to alwaysSkip are always considered defaults regardless of value. -// Other fields are compared using reflect.DeepEqual. -var serverSideDefaults = map[string]any{ - // Job-level fields - "resources.jobs.*.timeout_seconds": zeroOrNil, - "resources.jobs.*.email_notifications": emptyEmailNotifications, - "resources.jobs.*.webhook_notifications": map[string]any{}, - "resources.jobs.*.edit_mode": alwaysSkip, // set by CLI - "resources.jobs.*.performance_target": "PERFORMANCE_OPTIMIZED", - - // Task-level fields - "resources.jobs.*.tasks[*].run_if": "ALL_SUCCESS", - "resources.jobs.*.tasks[*].disabled": false, - "resources.jobs.*.tasks[*].timeout_seconds": zeroOrNil, - "resources.jobs.*.tasks[*].notebook_task.source": "WORKSPACE", - "resources.jobs.*.tasks[*].email_notifications": emptyEmailNotifications, - "resources.jobs.*.tasks[*].webhook_notifications": map[string]any{}, - "resources.jobs.*.tasks[*].pipeline_task.full_refresh": false, - - "resources.jobs.*.tasks[*].for_each_task.task.run_if": "ALL_SUCCESS", - "resources.jobs.*.tasks[*].for_each_task.task.disabled": false, - "resources.jobs.*.tasks[*].for_each_task.task.timeout_seconds": zeroOrNil, - "resources.jobs.*.tasks[*].for_each_task.task.notebook_task.source": "WORKSPACE", - "resources.jobs.*.tasks[*].for_each_task.task.email_notifications": emptyEmailNotifications, - "resources.jobs.*.tasks[*].for_each_task.task.webhook_notifications": map[string]any{}, - - // Cluster fields (tasks) - "resources.jobs.*.tasks[*].new_cluster.aws_attributes": alwaysSkip, - "resources.jobs.*.tasks[*].new_cluster.azure_attributes": alwaysSkip, - "resources.jobs.*.tasks[*].new_cluster.gcp_attributes": alwaysSkip, - "resources.jobs.*.tasks[*].new_cluster.data_security_mode": "SINGLE_USER", // TODO this field is computed on some workspaces in integration tests, check why and if we can skip it - "resources.jobs.*.tasks[*].new_cluster.enable_elastic_disk": alwaysSkip, // deprecated field - "resources.jobs.*.tasks[*].new_cluster.single_user_name": alwaysSkip, - - // Cluster fields (job_clusters) - "resources.jobs.*.job_clusters[*].new_cluster.aws_attributes": alwaysSkip, - "resources.jobs.*.job_clusters[*].new_cluster.azure_attributes": alwaysSkip, - "resources.jobs.*.job_clusters[*].new_cluster.gcp_attributes": alwaysSkip, - "resources.jobs.*.job_clusters[*].new_cluster.data_security_mode": "SINGLE_USER", // TODO this field is computed on some workspaces in integration tests, check why and if we can skip it - "resources.jobs.*.job_clusters[*].new_cluster.enable_elastic_disk": alwaysSkip, // deprecated field - "resources.jobs.*.job_clusters[*].new_cluster.single_user_name": alwaysSkip, - - // Standalone cluster fields - "resources.clusters.*.aws_attributes": alwaysSkip, - "resources.clusters.*.azure_attributes": alwaysSkip, - "resources.clusters.*.gcp_attributes": alwaysSkip, - "resources.clusters.*.data_security_mode": "SINGLE_USER", - "resources.clusters.*.driver_node_type_id": alwaysSkip, - "resources.clusters.*.enable_elastic_disk": alwaysSkip, - "resources.clusters.*.single_user_name": alwaysSkip, - - // Experiment fields - "resources.experiments.*.artifact_location": alwaysSkip, - - // Registered model fields - "resources.registered_models.*.full_name": alwaysSkip, - "resources.registered_models.*.metastore_id": alwaysSkip, - "resources.registered_models.*.owner": alwaysSkip, - "resources.registered_models.*.storage_location": alwaysSkip, - - // Volume fields - "resources.volumes.*.storage_location": alwaysSkip, - - // SQL warehouse fields - "resources.sql_warehouses.*.creator_name": alwaysSkip, - "resources.sql_warehouses.*.min_num_clusters": int64(1), - "resources.sql_warehouses.*.warehouse_type": "CLASSIC", - - // Terraform defaults - "resources.jobs.*.run_as": alwaysSkip, - - // Pipeline fields - "resources.pipelines.*.storage": alwaysSkip, - "resources.pipelines.*.continuous": false, -} - -// shouldSkipField checks if a field should be skipped in change detection. -// When hasConfigValue is true (field is set in config or saved state), only -// "always skip" fields are skipped. Backend defaults are only skipped when the -// field is not in config/state, matching the behavior of shouldSkipBackendDefault -// in the direct deployment engine. -func shouldSkipField(path string, value any, hasConfigValue bool) bool { - for pattern, expected := range serverSideDefaults { - if matchPattern(pattern, path) { - if _, ok := expected.(skipAlways); ok { - return true - } - if hasConfigValue { - return false - } - if _, ok := expected.(skipIfZeroOrNil); ok { - return value == nil || value == int64(0) - } - if marker, ok := expected.(skipIfEmptyOrDefault); ok { - m, ok := value.(map[string]any) - if !ok { - return false - } - if len(m) == 0 { - return true - } - return reflect.DeepEqual(m, marker.defaults) - } - return reflect.DeepEqual(value, expected) - } +// shouldSkipForSync reports whether a remote change at path should be excluded +// from config sync, based on the resource lifecycle metadata (resources.yml and +// resources.generated.yml in bundle/direct/dresources). +// +// The metadata is matched directly instead of relying on the plan's per-field +// actions: the plan answers "what will deploy do", which is not the same as +// "does this field belong in configuration". For example, the plan re-promotes +// dashboard etag drift to Update via ResourceDashboard.OverrideChangeDesc (for +// modified-remotely detection), yet etag must never be written to YAML. +// +// Fields under ignore_remote_changes never belong in configuration (output-only, +// etag-based, input-only), so they are skipped regardless of value. Fields under +// backend_defaults are skipped only when absent from the config (hasConfigValue +// is false) and the remote value matches the rule. +func shouldSkipForSync(resourceType string, path *structpath.PathNode, value any, hasConfigValue bool) bool { + cfg := dresources.GetResourceConfig(resourceType) + generatedCfg := dresources.GetGeneratedResourceConfig(resourceType) + + if _, ok := dresources.FindMatchingRule(path, cfg.IgnoreRemoteChanges); ok { + return true } - return false -} - -func matchPattern(pattern, path string) bool { - patternParts := strings.Split(pattern, ".") - pathParts := strings.Split(path, ".") - return matchParts(patternParts, pathParts) -} - -func matchParts(patternParts, pathParts []string) bool { - if len(patternParts) == 0 && len(pathParts) == 0 { + if _, ok := dresources.FindMatchingRule(path, generatedCfg.IgnoreRemoteChanges); ok { return true } - if len(patternParts) == 0 || len(pathParts) == 0 { + + if hasConfigValue { return false } - patternPart := patternParts[0] - pathPart := pathParts[0] + return cfg.MatchesBackendDefault(path, value) || generatedCfg.MatchesBackendDefault(path, value) +} - if patternPart == "*" { - return matchParts(patternParts[1:], pathParts[1:]) - } +// fieldsIgnoredForSync defines fields set by the CLI that cannot be specified +// in databricks.yml and should be excluded from config-remote-sync results. +// These are sync-specific rules: the deploy plan has different drift semantics +// for them, so they do not belong in resources.yml. +var fieldsIgnoredForSync = map[string][]*structpath.PatternNode{ + "jobs": { + structpath.MustParsePattern("edit_mode"), + }, +} - if strings.Contains(patternPart, "[*]") { - prefix := strings.Split(patternPart, "[*]")[0] +// fieldsKeptForSync lists fields that nested entity filtering (filterEntityDefaults) +// must keep even though they match a backend_defaults rule. The rules are asymmetric +// between plan and sync: for the plan, node_type_id is computed — policy-backed +// clusters materialize the policy-resolved value remotely while config legitimately +// omits it, so leaf-level drift must be skipped. For sync, a remotely added cluster's +// explicit node_type_id is required configuration: stripping it would make the synced +// YAML undeployable (cluster creation fails with 400 "Unknown node type id" unless a +// pool or policy provides the node type). +// +// Keeping it unconditionally cannot produce a node_type_id/instance_pool_id conflict: +// the Jobs API rejects requests carrying both ("The field 'node_type_id' cannot be +// supplied when an instance pool ID is provided") and jobs/get does not materialize +// node_type_id for pool-backed clusters, so a remote cluster never has both fields. +var fieldsKeptForSync = map[string][]*structpath.PatternNode{ + "jobs": { + structpath.MustParsePattern("tasks[*].new_cluster.node_type_id"), + structpath.MustParsePattern("job_clusters[*].new_cluster.node_type_id"), + }, +} - if strings.HasPrefix(pathPart, prefix) && strings.Contains(pathPart, "[") { - return matchParts(patternParts[1:], pathParts[1:]) - } - return false - } +// isKeptForSync reports whether a field of a remotely added entity must survive +// filterEntityDefaults even though it matches the lifecycle metadata. +func isKeptForSync(resourceType string, path *structpath.PathNode) bool { + return slices.ContainsFunc(fieldsKeptForSync[resourceType], path.HasPatternPrefix) +} - if patternPart == pathPart { - return matchParts(patternParts[1:], pathParts[1:]) - } +func isIgnoredForSync(resourceType string, path *structpath.PathNode) bool { + return slices.ContainsFunc(fieldsIgnoredForSync[resourceType], path.HasPatternPrefix) +} - return false +type resetRule struct { + field *structpath.PatternNode + value any } // resetValues contains all values that should be used to reset CLI-defaulted fields. // If CLI-defaulted field is changed on remote and should be disabled (e.g. queueing disabled -> remote field is nil) // we can't define it in the config as "null" because CLI default will be applied again. -var resetValues = map[string]any{ - "resources.jobs.*.queue": map[string]any{ - "enabled": false, +var resetValues = map[string][]resetRule{ + "jobs": { + {field: structpath.MustParsePattern("queue"), value: map[string]any{"enabled": false}}, }, } -func resetValueIfNeeded(path string, value any) any { - for pattern, expected := range resetValues { - if matchPattern(pattern, path) { - return expected +func resetValueIfNeeded(resourceType string, path *structpath.PathNode, value any) any { + for _, rule := range resetValues[resourceType] { + // Exact match, not prefix: a change at a subfield (e.g. queue.enabled) + // must keep its own value; only a change of the whole field is reset. + if path.Len() == rule.field.Len() && path.HasPatternPrefix(rule.field) { + return rule.value } } return value } -// prefixedNameFields lists resource name field patterns where the name prefix +// prefixedNameFields lists resource name fields where the name prefix // (e.g. "[dev user] ") is applied during deployment and should be stripped // when syncing remote changes back to config. -var prefixedNameFields = []string{ - "resources.jobs.*.name", - "resources.pipelines.*.name", - "resources.dashboards.*.display_name", +var prefixedNameFields = map[string][]*structpath.PatternNode{ + "jobs": {structpath.MustParsePattern("name")}, + "pipelines": {structpath.MustParsePattern("name")}, + "dashboards": {structpath.MustParsePattern("display_name")}, } // stripNamePrefix strips the configured name prefix from name field values // so that the raw (unprefixed) name is written back to the config YAML. -func stripNamePrefix(path string, value any, prefix string) any { +func stripNamePrefix(resourceType string, path *structpath.PathNode, value any, prefix string) any { if prefix == "" { return value } @@ -206,10 +126,8 @@ func stripNamePrefix(path string, value any, prefix string) any { return value } - for _, pattern := range prefixedNameFields { - if matchPattern(pattern, path) { - return strings.TrimPrefix(s, prefix) - } + if slices.ContainsFunc(prefixedNameFields[resourceType], path.HasPatternPrefix) { + return strings.TrimPrefix(s, prefix) } return value diff --git a/bundle/configsync/diff.go b/bundle/configsync/diff.go index 4a0b4f01ca3..1e9aa67721a 100644 --- a/bundle/configsync/diff.go +++ b/bundle/configsync/diff.go @@ -10,6 +10,7 @@ import ( "path/filepath" "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/config" "github.com/databricks/cli/bundle/config/engine" "github.com/databricks/cli/bundle/deploy" "github.com/databricks/cli/bundle/deployplan" @@ -18,6 +19,7 @@ import ( "github.com/databricks/cli/libs/dyn" "github.com/databricks/cli/libs/dyn/convert" "github.com/databricks/cli/libs/log" + "github.com/databricks/cli/libs/structs/structpath" ) type OperationType string @@ -48,44 +50,68 @@ func normalizeValue(v any) (any, error) { return dynValue.AsAny(), nil } -func filterEntityDefaults(basePath string, value any) any { - if value == nil { - return nil - } - - if arr, ok := value.([]any); ok { - result := make([]any, 0, len(arr)) - for i, elem := range arr { - elementPath := fmt.Sprintf("%s[%d]", basePath, i) - result = append(result, filterEntityDefaults(elementPath, elem)) +// filterEntityDefaults removes server-side default fields from an entity that +// is added remotely as a whole (e.g. a new task object). Each nested field is +// matched against the same lifecycle metadata as top-level changes. Maps that +// become empty after filtering are pruned to nil at the top level and in map +// fields, but kept as {} inside arrays: pruning an element would change the +// array arity. Element identity keys like task_key normally prevent full +// pruning there anyway. +func filterEntityDefaults(resourceType string, basePath *structpath.PathNode, value any) any { + switch v := value.(type) { + case map[string]any: + filtered := filterEntityMap(resourceType, basePath, v) + if len(filtered) == 0 { + return nil + } + return filtered + case []any: + result := make([]any, 0, len(v)) + for i, elem := range v { + elementPath := structpath.NewIndex(basePath, i) + if m, ok := elem.(map[string]any); ok { + result = append(result, filterEntityMap(resourceType, elementPath, m)) + } else { + result = append(result, filterEntityDefaults(resourceType, elementPath, elem)) + } } return result - } - - m, ok := value.(map[string]any) - if !ok { + default: return value } +} +// filterEntityMap drops map fields matched by the lifecycle metadata and prunes +// nested map fields that become empty after filtering. The result may itself be +// empty; pruning it is the caller's decision (see filterEntityDefaults). +func filterEntityMap(resourceType string, basePath *structpath.PathNode, m map[string]any) map[string]any { result := make(map[string]any) for key, val := range m { - fieldPath := basePath + "." + key + fieldPath := structpath.NewStringKey(basePath, key) - if shouldSkipField(fieldPath, val, false) { + if shouldSkipForSync(resourceType, fieldPath, val, false) && !isKeptForSync(resourceType, fieldPath) { continue } if nestedMap, ok := val.(map[string]any); ok { - result[key] = filterEntityDefaults(fieldPath, nestedMap) - } else { - result[key] = val + filtered := filterEntityMap(resourceType, fieldPath, nestedMap) + if len(filtered) > 0 { + result[key] = filtered + } + continue } + result[key] = val } - return result } -func convertChangeDesc(path string, cd *deployplan.ChangeDesc) (*ConfigChangeDesc, error) { +func convertChangeDesc(resourceType string, path *structpath.PathNode, cd *deployplan.ChangeDesc) (*ConfigChangeDesc, error) { + if isIgnoredForSync(resourceType, path) { + return &ConfigChangeDesc{ + Operation: OperationSkip, + }, nil + } + // Use cd.New (current config) to decide whether the field exists "on the config side". // cd.Old (saved state) must not be considered: when the user has already synced a rename // locally (cd.New == nil for the old key) but state still holds the prior key, including @@ -97,14 +123,14 @@ func convertChangeDesc(path string, cd *deployplan.ChangeDesc) (*ConfigChangeDes return nil, fmt.Errorf("failed to normalize remote value: %w", err) } - if shouldSkipField(path, normalizedValue, hasConfigValue) { + if shouldSkipForSync(resourceType, path, normalizedValue, hasConfigValue) { return &ConfigChangeDesc{ Operation: OperationSkip, }, nil } - normalizedValue = filterEntityDefaults(path, normalizedValue) - normalizedValue = resetValueIfNeeded(path, normalizedValue) + normalizedValue = filterEntityDefaults(resourceType, path, normalizedValue) + normalizedValue = resetValueIfNeeded(resourceType, path, normalizedValue) var op OperationType if normalizedValue == nil && hasConfigValue { @@ -152,6 +178,7 @@ func DetectChanges(ctx context.Context, b *bundle.Bundle, engine engine.EngineTy for resourceKey, entry := range plan.Plan { resourceChanges := make(ResourceChanges) + resourceType := config.GetResourceTypeFromKey(resourceKey) if entry.Changes != nil { for path, changeDesc := range entry.Changes { @@ -159,15 +186,19 @@ func DetectChanges(ctx context.Context, b *bundle.Bundle, engine engine.EngineTy continue } - fullPath := resourceKey + "." + path - change, err := convertChangeDesc(fullPath, changeDesc) + fieldPath, err := structpath.ParsePath(path) + if err != nil { + return nil, fmt.Errorf("failed to parse path %s: %w", path, err) + } + + change, err := convertChangeDesc(resourceType, fieldPath, changeDesc) if err != nil { return nil, fmt.Errorf("failed to compute config change for path %s: %w", path, err) } if change.Operation == OperationSkip { continue } - change.Value = stripNamePrefix(fullPath, change.Value, b.Config.Presets.NamePrefix) + change.Value = stripNamePrefix(resourceType, fieldPath, change.Value, b.Config.Presets.NamePrefix) resourceChanges[path] = change } } diff --git a/bundle/configsync/diff_test.go b/bundle/configsync/diff_test.go index 5402fa5ab01..86cf34b47a8 100644 --- a/bundle/configsync/diff_test.go +++ b/bundle/configsync/diff_test.go @@ -4,45 +4,51 @@ import ( "testing" "github.com/databricks/cli/bundle/deployplan" + "github.com/databricks/cli/libs/structs/structpath" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) func TestConvertChangeDesc(t *testing.T) { tests := []struct { - name string - path string - cd *deployplan.ChangeDesc - wantOp OperationType - wantVal any + name string + resourceType string + path string + cd *deployplan.ChangeDesc + wantOp OperationType + wantVal any }{ { - name: "add: new in remote only", - path: "resources.jobs.my_job.description", - cd: &deployplan.ChangeDesc{Old: nil, New: nil, Remote: "remote-desc"}, - wantOp: OperationAdd, - wantVal: "remote-desc", + name: "add: new in remote only", + resourceType: "jobs", + path: "description", + cd: &deployplan.ChangeDesc{Old: nil, New: nil, Remote: "remote-desc"}, + wantOp: OperationAdd, + wantVal: "remote-desc", }, { - name: "remove: in config, missing from remote", - path: "resources.jobs.my_job.description", - cd: &deployplan.ChangeDesc{Old: "state-desc", New: "config-desc", Remote: nil}, - wantOp: OperationRemove, - wantVal: nil, + name: "remove: in config, missing from remote", + resourceType: "jobs", + path: "description", + cd: &deployplan.ChangeDesc{Old: "state-desc", New: "config-desc", Remote: nil}, + wantOp: OperationRemove, + wantVal: nil, }, { - name: "replace: differs between config and remote", - path: "resources.jobs.my_job.description", - cd: &deployplan.ChangeDesc{Old: "state-desc", New: "config-desc", Remote: "remote-desc"}, - wantOp: OperationReplace, - wantVal: "remote-desc", + name: "replace: differs between config and remote", + resourceType: "jobs", + path: "description", + cd: &deployplan.ChangeDesc{Old: "state-desc", New: "config-desc", Remote: "remote-desc"}, + wantOp: OperationReplace, + wantVal: "remote-desc", }, { - name: "skip: absent everywhere", - path: "resources.jobs.my_job.description", - cd: &deployplan.ChangeDesc{Old: nil, New: nil, Remote: nil}, - wantOp: OperationSkip, - wantVal: nil, + name: "skip: absent everywhere", + resourceType: "jobs", + path: "description", + cd: &deployplan.ChangeDesc{Old: nil, New: nil, Remote: nil}, + wantOp: OperationSkip, + wantVal: nil, }, // Regression: rename-back-and-forth. State holds the old key (user did not // redeploy after the first sync), config holds the intermediate key, and @@ -50,8 +56,9 @@ func TestConvertChangeDesc(t *testing.T) { // config, so the change must be Add — Replace would error in resolveSelectors // because the keyed element no longer exists in the YAML. { - name: "add: rename-back path, state has it but config does not", - path: "resources.jobs.my_job.tasks[task_key='new_task']", + name: "add: rename-back path, state has it but config does not", + resourceType: "jobs", + path: "tasks[task_key='new_task']", cd: &deployplan.ChangeDesc{ Old: map[string]any{"task_key": "new_task"}, New: nil, @@ -61,8 +68,9 @@ func TestConvertChangeDesc(t *testing.T) { wantVal: map[string]any{"task_key": "new_task"}, }, { - name: "skip: state has it, config and remote do not", - path: "resources.jobs.my_job.tasks[task_key='gone']", + name: "skip: state has it, config and remote do not", + resourceType: "jobs", + path: "tasks[task_key='gone']", cd: &deployplan.ChangeDesc{ Old: map[string]any{"task_key": "gone"}, New: nil, @@ -71,11 +79,147 @@ func TestConvertChangeDesc(t *testing.T) { wantOp: OperationSkip, wantVal: nil, }, + // The plan re-promotes etag drift to Update via + // ResourceDashboard.OverrideChangeDesc, so sync must skip it based on + // the ignore_remote_changes metadata, not the plan action. + { + name: "skip: dashboard etag is output-only", + resourceType: "dashboards", + path: "etag", + cd: &deployplan.ChangeDesc{Old: "etag-1", New: nil, Remote: "etag-2"}, + wantOp: OperationSkip, + wantVal: nil, + }, + // create_time comes from resources.generated.yml (spec:output_only). + { + name: "skip: dashboard create_time is output-only (generated rule)", + resourceType: "dashboards", + path: "create_time", + cd: &deployplan.ChangeDesc{Old: nil, New: nil, Remote: "2025-01-01T00:00:00Z"}, + wantOp: OperationSkip, + wantVal: nil, + }, + { + name: "skip: backend default not in config", + resourceType: "jobs", + path: "performance_target", + cd: &deployplan.ChangeDesc{Old: nil, New: nil, Remote: "PERFORMANCE_OPTIMIZED"}, + wantOp: OperationSkip, + wantVal: nil, + }, + { + name: "replace: backend default value but field is set in config", + resourceType: "jobs", + path: "performance_target", + cd: &deployplan.ChangeDesc{Old: "STANDARD", New: "STANDARD", Remote: "PERFORMANCE_OPTIMIZED"}, + wantOp: OperationReplace, + wantVal: "PERFORMANCE_OPTIMIZED", + }, + { + name: "add: non-default value of backend default field", + resourceType: "jobs", + path: "performance_target", + cd: &deployplan.ChangeDesc{Old: nil, New: nil, Remote: "STANDARD"}, + wantOp: OperationAdd, + wantVal: "STANDARD", + }, + { + name: "skip: edit_mode is CLI-managed", + resourceType: "jobs", + path: "edit_mode", + cd: &deployplan.ChangeDesc{Old: nil, New: nil, Remote: "UI_LOCKED"}, + wantOp: OperationSkip, + wantVal: nil, + }, + // node_type_id drift on a cluster that omits it in config comes from a + // cluster policy (jobs/get returns the policy-resolved value in stored + // settings) or, for standalone clusters, from clusters/get reporting the + // pool's node type; it must not be written to YAML. + { + name: "skip: node_type_id materialized remotely for cluster omitting it", + resourceType: "jobs", + path: "tasks[task_key='t1'].new_cluster.node_type_id", + cd: &deployplan.ChangeDesc{Old: nil, New: nil, Remote: "i3.xlarge"}, + wantOp: OperationSkip, + wantVal: nil, + }, + { + name: "skip: standalone cluster node_type_id materialized for pool-backed cluster", + resourceType: "clusters", + path: "node_type_id", + cd: &deployplan.ChangeDesc{Old: nil, New: nil, Remote: "c5d.xlarge"}, + wantOp: OperationSkip, + wantVal: nil, + }, + { + name: "replace: queue removed remotely resets to disabled", + resourceType: "jobs", + path: "queue", + cd: &deployplan.ChangeDesc{Old: map[string]any{"enabled": true}, New: map[string]any{"enabled": true}, Remote: nil}, + wantOp: OperationReplace, + wantVal: map[string]any{"enabled": false}, + }, + // A task added remotely as a whole: backend defaults (run_if, + // timeout_seconds, data_security_mode, the deprecated + // no_alert_for_skipped_runs the backend populates in + // email_notifications) and ignored fields (aws_attributes) are + // stripped; maps that become empty are pruned (email_notifications, + // webhook_notifications); node_type_id must be kept or the synced + // config would be undeployable. + { + name: "add: remotely added task is filtered by metadata", + resourceType: "jobs", + path: "tasks[task_key='new_task']", + cd: &deployplan.ChangeDesc{ + Old: nil, + New: nil, + Remote: map[string]any{ + "task_key": "new_task", + "run_if": "ALL_SUCCESS", + "timeout_seconds": 0, + "email_notifications": map[string]any{"no_alert_for_skipped_runs": false}, + "webhook_notifications": map[string]any{}, + "new_cluster": map[string]any{ + "spark_version": "13.3.x-scala2.12", + "node_type_id": "i3.xlarge", + "num_workers": 1, + "data_security_mode": "SINGLE_USER", + "aws_attributes": map[string]any{"availability": "SPOT_WITH_FALLBACK"}, + }, + }, + }, + wantOp: OperationAdd, + wantVal: map[string]any{ + "task_key": "new_task", + "new_cluster": map[string]any{ + "spark_version": "13.3.x-scala2.12", + "node_type_id": "i3.xlarge", + "num_workers": int64(1), + }, + }, + }, + // When a field set in config filters entirely to backend defaults on + // the remote side, the operation is Remove (drop the field from YAML), + // not Replace-with-{}. + { + name: "remove: config field whose remote value filters to empty", + resourceType: "jobs", + path: "email_notifications", + cd: &deployplan.ChangeDesc{ + Old: map[string]any{"on_failure": []any{"someone@example.com"}}, + New: map[string]any{"on_failure": []any{"someone@example.com"}}, + Remote: map[string]any{"no_alert_for_skipped_runs": false}, + }, + wantOp: OperationRemove, + wantVal: nil, + }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - got, err := convertChangeDesc(tt.path, tt.cd) + path, err := structpath.ParsePath(tt.path) + require.NoError(t, err) + got, err := convertChangeDesc(tt.resourceType, path, tt.cd) require.NoError(t, err) assert.Equal(t, tt.wantOp, got.Operation) assert.Equal(t, tt.wantVal, got.Value) @@ -83,159 +227,325 @@ func TestConvertChangeDesc(t *testing.T) { } } -func TestStripNamePrefix(t *testing.T) { +func TestFilterEntityDefaults(t *testing.T) { tests := []struct { - name string - path string - value any - prefix string - want any + name string + basePath string + value any + want any }{ { - name: "job name with normal prefix", - path: "resources.jobs.my_job.name", - value: "[dev user] my_job", - prefix: "[dev user] ", - want: "my_job", + name: "array element filtering to empty is kept to preserve arity", + basePath: "tasks", + value: []any{ + map[string]any{"task_key": "t1", "run_if": "ALL_SUCCESS"}, + map[string]any{"run_if": "ALL_SUCCESS"}, + }, + want: []any{ + map[string]any{"task_key": "t1"}, + map[string]any{}, + }, }, { - name: "pipeline name with normal prefix", - path: "resources.pipelines.my_pipeline.name", - value: "[dev user] my_pipeline", - prefix: "[dev user] ", - want: "my_pipeline", + name: "map field filtering to empty is pruned", + basePath: "tasks[task_key='t1']", + value: map[string]any{ + "task_key": "t1", + "email_notifications": map[string]any{"no_alert_for_skipped_runs": false}, + }, + want: map[string]any{"task_key": "t1"}, }, { - name: "dashboard display_name with prefix", - path: "resources.dashboards.my_dash.display_name", - value: "[dev user] my_dash", - prefix: "[dev user] ", - want: "my_dash", + name: "whole value filtering to empty becomes nil", + basePath: "email_notifications", + value: map[string]any{"no_alert_for_skipped_runs": false}, + want: nil, }, { - name: "name does not start with prefix", - path: "resources.jobs.my_job.name", - value: "my_job", - prefix: "[dev user] ", - want: "my_job", + name: "scalar is returned unchanged", + basePath: "description", + value: "desc", + want: "desc", }, + // node_type_id matches backend_defaults (policy-backed clusters + // materialize it remotely) but is in fieldsKeptForSync: a remotely + // added cluster carrying it explicitly, even alongside policy_id, + // must keep it or the synced config would be undeployable. { - name: "empty prefix is noop", - path: "resources.jobs.my_job.name", - value: "[dev user] my_job", - prefix: "", - want: "[dev user] my_job", + name: "remotely added policy-backed cluster keeps node_type_id", + basePath: "job_clusters[0].new_cluster", + value: map[string]any{ + "policy_id": "ABC123", + "node_type_id": "i3.xlarge", + "spark_version": "13.3.x-scala2.12", + "num_workers": 1, + "data_security_mode": "SINGLE_USER", + }, + want: map[string]any{ + "policy_id": "ABC123", + "node_type_id": "i3.xlarge", + "spark_version": "13.3.x-scala2.12", + "num_workers": 1, + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + basePath, err := structpath.ParsePath(tt.basePath) + require.NoError(t, err) + got := filterEntityDefaults("jobs", basePath, tt.value) + assert.Equal(t, tt.want, got) + }) + } +} + +func TestShouldSkipForSync(t *testing.T) { + tests := []struct { + name string + resourceType string + path string + value any + hasConfigValue bool + want bool + }{ + { + name: "ignore_remote_changes skips regardless of value", + resourceType: "dashboards", + path: "etag", + value: "etag-2", + want: true, + }, + { + name: "ignore_remote_changes skips even when field is in config", + resourceType: "dashboards", + path: "serialized_dashboard", + value: "{}", + hasConfigValue: true, + want: true, + }, + { + name: "generated ignore_remote_changes rule", + resourceType: "dashboards", + path: "lifecycle_state", + value: "ACTIVE", + want: true, + }, + { + name: "ignore_remote_changes matches nested path by prefix", + resourceType: "jobs", + path: "tasks[task_key='t1'].new_cluster.aws_attributes.availability", + value: "SPOT", + want: true, + }, + { + name: "backend default with matching value", + resourceType: "jobs", + path: "performance_target", + value: "PERFORMANCE_OPTIMIZED", + want: true, + }, + { + name: "backend default with non-matching value", + resourceType: "jobs", + path: "performance_target", + value: "STANDARD", + want: false, + }, + { + name: "backend default not applied when field is in config", + resourceType: "jobs", + path: "performance_target", + value: "PERFORMANCE_OPTIMIZED", + hasConfigValue: true, + want: false, + }, + { + name: "backend default without values matches any value", + resourceType: "jobs", + path: "run_as", + value: map[string]any{"user_name": "someone@example.com"}, + want: true, }, { - name: "non-name field is not stripped", - path: "resources.jobs.my_job.description", - value: "[dev user] some description", - prefix: "[dev user] ", - want: "[dev user] some description", + name: "backend default with wildcard pattern", + resourceType: "jobs", + path: "tasks[task_key='t1'].run_if", + value: "ALL_SUCCESS", + want: true, }, + // node_type_id matches backend_defaults; nested filtering overrides + // this via fieldsKeptForSync (see TestFilterEntityDefaults). { - name: "non-string value is unchanged", - path: "resources.jobs.my_job.name", - value: 42, - prefix: "[dev user] ", - want: 42, + name: "backend default node_type_id", + resourceType: "jobs", + path: "tasks[task_key='t1'].new_cluster.node_type_id", + value: "i3.xlarge", + want: true, }, { - name: "nil value is unchanged", - path: "resources.jobs.my_job.name", - value: nil, - prefix: "[dev user] ", - want: nil, + name: "regular field is not skipped", + resourceType: "jobs", + path: "description", + value: "some description", + want: false, + }, + { + name: "unknown resource type is not skipped", + resourceType: "unknown", + path: "anything", + value: "value", + want: false, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - got := stripNamePrefix(tt.path, tt.value, tt.prefix) + path, err := structpath.ParsePath(tt.path) + require.NoError(t, err) + got := shouldSkipForSync(tt.resourceType, path, tt.value, tt.hasConfigValue) assert.Equal(t, tt.want, got) }) } } -func TestMatchPattern(t *testing.T) { +func TestResetValueIfNeeded(t *testing.T) { tests := []struct { - name string - pattern string - path string - want bool + name string + resourceType string + path string + value any + want any }{ { - name: "exact match", - pattern: "timeout_seconds", - path: "timeout_seconds", - want: true, + name: "whole queue field is reset", + resourceType: "jobs", + path: "queue", + value: nil, + want: map[string]any{"enabled": false}, }, { - name: "exact match no match", - pattern: "timeout_seconds", - path: "other_field", - want: false, + name: "queue subfield keeps its own value", + resourceType: "jobs", + path: "queue.enabled", + value: false, + want: false, }, { - name: "array wildcard match", - pattern: "tasks[*].run_if", - path: "tasks[task_key='my_task'].run_if", - want: true, + name: "unrelated field is unchanged", + resourceType: "jobs", + path: "description", + value: "desc", + want: "desc", + }, + { + name: "other resource type is unchanged", + resourceType: "pipelines", + path: "queue", + value: nil, + want: nil, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + path, err := structpath.ParsePath(tt.path) + require.NoError(t, err) + got := resetValueIfNeeded(tt.resourceType, path, tt.value) + assert.Equal(t, tt.want, got) + }) + } +} + +func TestStripNamePrefix(t *testing.T) { + tests := []struct { + name string + resourceType string + path string + value any + prefix string + want any + }{ + { + name: "job name with normal prefix", + resourceType: "jobs", + path: "name", + value: "[dev user] my_job", + prefix: "[dev user] ", + want: "my_job", }, { - name: "array wildcard no match", - pattern: "tasks[*].run_if", - path: "tasks[task_key='my_task'].disabled", - want: false, + name: "pipeline name with normal prefix", + resourceType: "pipelines", + path: "name", + value: "[dev user] my_pipeline", + prefix: "[dev user] ", + want: "my_pipeline", }, { - name: "nested array wildcard match", - pattern: "tasks[*].new_cluster.azure_attributes.availability", - path: "tasks[task_key='task1'].new_cluster.azure_attributes.availability", - want: true, + name: "dashboard display_name with prefix", + resourceType: "dashboards", + path: "display_name", + value: "[dev user] my_dash", + prefix: "[dev user] ", + want: "my_dash", }, { - name: "job_clusters array wildcard match", - pattern: "job_clusters[*].new_cluster.aws_attributes.availability", - path: "job_clusters[job_cluster_key='cluster1'].new_cluster.aws_attributes.availability", - want: true, + name: "name does not start with prefix", + resourceType: "jobs", + path: "name", + value: "my_job", + prefix: "[dev user] ", + want: "my_job", }, { - name: "wildcard segment match", - pattern: "*.timeout_seconds", - path: "timeout_seconds", - want: false, + name: "empty prefix is noop", + resourceType: "jobs", + path: "name", + value: "[dev user] my_job", + prefix: "", + want: "[dev user] my_job", }, { - name: "different array prefix no match", - pattern: "tasks[*].run_if", - path: "jobs[task_key='my_task'].run_if", - want: false, + name: "non-name field is not stripped", + resourceType: "jobs", + path: "description", + value: "[dev user] some description", + prefix: "[dev user] ", + want: "[dev user] some description", }, { - name: "nested path match", - pattern: "tasks[*].notebook_task.source", - path: "tasks[task_key='notebook'].notebook_task.source", - want: true, + name: "name of resource type without prefix support", + resourceType: "experiments", + path: "name", + value: "[dev user] my_experiment", + prefix: "[dev user] ", + want: "[dev user] my_experiment", }, { - name: "path shorter than pattern", - pattern: "tasks[*].new_cluster.azure_attributes", - path: "tasks[task_key='task1'].new_cluster", - want: false, + name: "non-string value is unchanged", + resourceType: "jobs", + path: "name", + value: 42, + prefix: "[dev user] ", + want: 42, }, { - name: "path longer than pattern", - pattern: "tasks[*].new_cluster", - path: "tasks[task_key='task1'].new_cluster.azure_attributes", - want: false, + name: "nil value is unchanged", + resourceType: "jobs", + path: "name", + value: nil, + prefix: "[dev user] ", + want: nil, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - got := matchPattern(tt.pattern, tt.path) - assert.Equal(t, tt.want, got, "matchPattern(%q, %q)", tt.pattern, tt.path) + path, err := structpath.ParsePath(tt.path) + require.NoError(t, err) + got := stripNamePrefix(tt.resourceType, path, tt.value, tt.prefix) + assert.Equal(t, tt.want, got) }) } } diff --git a/bundle/direct/bundle_plan.go b/bundle/direct/bundle_plan.go index fea24b79808..fa560957720 100644 --- a/bundle/direct/bundle_plan.go +++ b/bundle/direct/bundle_plan.go @@ -2,7 +2,6 @@ package direct import ( "context" - "encoding/json" "errors" "fmt" "maps" @@ -418,23 +417,14 @@ func addPerFieldActions(ctx context.Context, adapter *dresources.Adapter, change return nil } -func findMatchingRule(path *structpath.PathNode, rules []dresources.FieldRule) (string, bool) { - for _, r := range rules { - if path.HasPatternPrefix(r.Field) { - return r.Reason, true - } - } - return "", false -} - func shouldSkip(cfg *dresources.ResourceLifecycleConfig, path *structpath.PathNode, ch *deployplan.ChangeDesc) (string, bool) { if cfg == nil { return "", false } - if reason, ok := findMatchingRule(path, cfg.IgnoreLocalChanges); ok && !structdiff.IsEqual(ch.Old, ch.New) { + if reason, ok := dresources.FindMatchingRule(path, cfg.IgnoreLocalChanges); ok && !structdiff.IsEqual(ch.Old, ch.New) { return reason, true } - if reason, ok := findMatchingRule(path, cfg.IgnoreRemoteChanges); ok && structdiff.IsEqual(ch.Old, ch.New) { + if reason, ok := dresources.FindMatchingRule(path, cfg.IgnoreRemoteChanges); ok && structdiff.IsEqual(ch.Old, ch.New) { return reason, true } return "", false @@ -454,10 +444,10 @@ func shouldSkipNormalized(cfg *dresources.ResourceLifecycleConfig, path *structp if !newOk || !remoteOk { return "", false } - if reason, ok := findMatchingRule(path, cfg.NormalizeCase); ok && strings.EqualFold(newStr, remoteStr) { + if reason, ok := dresources.FindMatchingRule(path, cfg.NormalizeCase); ok && strings.EqualFold(newStr, remoteStr) { return reason, true } - if reason, ok := findMatchingRule(path, cfg.NormalizeSlash); ok && strings.TrimRight(newStr, "/") == strings.TrimRight(remoteStr, "/") { + if reason, ok := dresources.FindMatchingRule(path, cfg.NormalizeSlash); ok && strings.TrimRight(newStr, "/") == strings.TrimRight(remoteStr, "/") { return reason, true } return "", false @@ -467,10 +457,10 @@ func shouldUpdateOrRecreate(cfg *dresources.ResourceLifecycleConfig, path *struc if cfg == nil { return deployplan.Undefined, "" } - if reason, ok := findMatchingRule(path, cfg.RecreateOnChanges); ok { + if reason, ok := dresources.FindMatchingRule(path, cfg.RecreateOnChanges); ok { return deployplan.Recreate, reason } - if reason, ok := findMatchingRule(path, cfg.UpdateIDOnChanges); ok { + if reason, ok := dresources.FindMatchingRule(path, cfg.UpdateIDOnChanges); ok { return deployplan.UpdateWithID, reason } return deployplan.Undefined, "" @@ -480,39 +470,15 @@ func shouldUpdateOrRecreate(cfg *dresources.ResourceLifecycleConfig, path *struc // is a known backend default. Applies when old and new are nil but remote is set. // If the rule has allowed values, the remote value must match one of them. func shouldSkipBackendDefault(cfg *dresources.ResourceLifecycleConfig, path *structpath.PathNode, ch *deployplan.ChangeDesc) (string, bool) { - if cfg == nil || ch.Old != nil || ch.New != nil || ch.Remote == nil { + if ch.Old != nil || ch.New != nil { return "", false } - for _, rule := range cfg.BackendDefaults { - if !path.HasPatternPrefix(rule.Field) { - continue - } - if len(rule.Values) == 0 { - return deployplan.ReasonBackendDefault, true - } - if matchesAllowedValue(ch.Remote, rule.Values) { - return deployplan.ReasonBackendDefault, true - } + if cfg.MatchesBackendDefault(path, ch.Remote) { + return deployplan.ReasonBackendDefault, true } return "", false } -// matchesAllowedValue checks if the remote value matches one of the allowed JSON values. -// Each json.RawMessage is unmarshaled into the same type as remote for comparison. -func matchesAllowedValue(remote any, values []json.RawMessage) bool { - remoteType := reflect.TypeOf(remote) - for _, raw := range values { - candidate := reflect.New(remoteType).Interface() - if err := json.Unmarshal(raw, candidate); err != nil { - continue - } - if structdiff.IsEqual(remote, reflect.ValueOf(candidate).Elem().Interface()) { - return true - } - } - return false -} - func allEmpty(values ...any) bool { for _, v := range values { if v == nil { diff --git a/bundle/direct/dresources/README.md b/bundle/direct/dresources/README.md index a20b68c4ebd..be0923d4dde 100644 --- a/bundle/direct/dresources/README.md +++ b/bundle/direct/dresources/README.md @@ -17,7 +17,7 @@ Each field with special plan/deploy behavior must be declared in `resources.yml`. Choose the right category: - - **`backend_defaults`**: The backend may fill in a value when the user doesn't specify one. Suppresses the diff when the user's config is nil/empty but remote has a value. Optionally restrict to specific allowed remote values via `values:`. Use for fields the API fills in as defaults (e.g., `format`, `run_if`, `node_type_id`). Link to TF provider suppression comment in the same format as existing entries. + - **`backend_defaults`**: The backend may fill in a value when the user doesn't specify one. Suppresses the diff when the user's config is nil/empty but remote has a value. Optionally restrict to specific allowed remote values via `values:`. Use for fields the API fills in as defaults (e.g., `format`, `run_if`, `driver_node_type_id`). Link to TF provider suppression comment in the same format as existing entries. - **`ignore_remote_changes`**: Ignore changes the remote makes to this field. Use for fields the backend manages (e.g., cloud-provider attributes like `aws_attributes`, `gcp_attributes`) or fields not returned by the update endpoint. Reason codes: - `output_only` — the field is computed by the backend; the user never sets it - `input_only` — accepted on create/update but not returned by GET (e.g., write-only tokens, flags) diff --git a/bundle/direct/dresources/match.go b/bundle/direct/dresources/match.go new file mode 100644 index 00000000000..febe379dae3 --- /dev/null +++ b/bundle/direct/dresources/match.go @@ -0,0 +1,57 @@ +package dresources + +import ( + "encoding/json" + "reflect" + + "github.com/databricks/cli/libs/structs/structdiff" + "github.com/databricks/cli/libs/structs/structpath" +) + +// FindMatchingRule returns the reason of the first rule whose field pattern +// is a prefix of the given path. +func FindMatchingRule(path *structpath.PathNode, rules []FieldRule) (string, bool) { + for _, r := range rules { + if path.HasPatternPrefix(r.Field) { + return r.Reason, true + } + } + return "", false +} + +// MatchesBackendDefault reports whether the remote value at path matches one of +// the backend_defaults rules. If a rule has allowed values, the remote value +// must match one of them. +func (cfg *ResourceLifecycleConfig) MatchesBackendDefault(path *structpath.PathNode, remote any) bool { + if cfg == nil || remote == nil { + return false + } + for _, rule := range cfg.BackendDefaults { + if !path.HasPatternPrefix(rule.Field) { + continue + } + if len(rule.Values) == 0 { + return true + } + if MatchesAllowedValue(remote, rule.Values) { + return true + } + } + return false +} + +// MatchesAllowedValue checks if the remote value matches one of the allowed JSON values. +// Each json.RawMessage is unmarshaled into the same type as remote for comparison. +func MatchesAllowedValue(remote any, values []json.RawMessage) bool { + remoteType := reflect.TypeOf(remote) + for _, raw := range values { + candidate := reflect.New(remoteType).Interface() + if err := json.Unmarshal(raw, candidate); err != nil { + continue + } + if structdiff.IsEqual(remote, reflect.ValueOf(candidate).Elem().Interface()) { + return true + } + } + return false +} diff --git a/bundle/direct/dresources/match_test.go b/bundle/direct/dresources/match_test.go new file mode 100644 index 00000000000..7cd4e7ca224 --- /dev/null +++ b/bundle/direct/dresources/match_test.go @@ -0,0 +1,177 @@ +package dresources + +import ( + "encoding/json" + "testing" + + "github.com/databricks/cli/libs/structs/structpath" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestFindMatchingRule(t *testing.T) { + rules := []FieldRule{ + {Field: structpath.MustParsePattern("tasks[*].run_if"), Reason: "first"}, + {Field: structpath.MustParsePattern("aws_attributes"), Reason: "second"}, + } + + tests := []struct { + name string + path string + wantReason string + wantOk bool + }{ + { + name: "wildcard match", + path: "tasks[task_key='t1'].run_if", + wantReason: "first", + wantOk: true, + }, + { + name: "prefix match on nested path", + path: "aws_attributes.availability", + wantReason: "second", + wantOk: true, + }, + { + name: "no match", + path: "description", + wantOk: false, + }, + { + name: "path shorter than pattern", + path: "tasks[task_key='t1']", + wantOk: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + path, err := structpath.ParsePath(tt.path) + require.NoError(t, err) + reason, ok := FindMatchingRule(path, rules) + assert.Equal(t, tt.wantOk, ok) + assert.Equal(t, tt.wantReason, reason) + }) + } +} + +func TestMatchesAllowedValue(t *testing.T) { + tests := []struct { + name string + remote any + values []json.RawMessage + want bool + }{ + { + name: "string match", + remote: "ALL_SUCCESS", + values: []json.RawMessage{json.RawMessage(`"ALL_SUCCESS"`)}, + want: true, + }, + { + name: "string no match", + remote: "ALL_DONE", + values: []json.RawMessage{json.RawMessage(`"ALL_SUCCESS"`)}, + want: false, + }, + { + name: "one of several values", + remote: "SINGLE_TASK", + values: []json.RawMessage{json.RawMessage(`"MULTI_TASK"`), json.RawMessage(`"SINGLE_TASK"`)}, + want: true, + }, + { + name: "int match", + remote: int64(0), + values: []json.RawMessage{json.RawMessage(`0`)}, + want: true, + }, + { + name: "bool match", + remote: false, + values: []json.RawMessage{json.RawMessage(`false`)}, + want: true, + }, + { + name: "type mismatch", + remote: int64(1), + values: []json.RawMessage{json.RawMessage(`"1"`)}, + want: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + assert.Equal(t, tt.want, MatchesAllowedValue(tt.remote, tt.values)) + }) + } +} + +func TestMatchesBackendDefault(t *testing.T) { + cfg := &ResourceLifecycleConfig{ + BackendDefaults: []BackendDefaultRule{ + {Field: structpath.MustParsePattern("run_as")}, + {Field: structpath.MustParsePattern("tasks[*].run_if"), Values: []json.RawMessage{json.RawMessage(`"ALL_SUCCESS"`)}}, + }, + } + + tests := []struct { + name string + cfg *ResourceLifecycleConfig + path string + remote any + want bool + }{ + { + name: "rule without values matches any remote value", + cfg: cfg, + path: "run_as", + remote: map[string]any{"user_name": "someone@example.com"}, + want: true, + }, + { + name: "rule with values matches allowed value", + cfg: cfg, + path: "tasks[task_key='t1'].run_if", + remote: "ALL_SUCCESS", + want: true, + }, + { + name: "rule with values rejects other value", + cfg: cfg, + path: "tasks[task_key='t1'].run_if", + remote: "ALL_DONE", + want: false, + }, + { + name: "no rule for path", + cfg: cfg, + path: "description", + remote: "x", + want: false, + }, + { + name: "nil remote never matches", + cfg: cfg, + path: "run_as", + remote: nil, + want: false, + }, + { + name: "nil config never matches", + cfg: nil, + path: "run_as", + remote: "x", + want: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + path, err := structpath.ParsePath(tt.path) + require.NoError(t, err) + assert.Equal(t, tt.want, tt.cfg.MatchesBackendDefault(path, tt.remote)) + }) + } +} diff --git a/bundle/direct/dresources/resources.yml b/bundle/direct/dresources/resources.yml index 5a017a4b24d..4462de0fb54 100644 --- a/bundle/direct/dresources/resources.yml +++ b/bundle/direct/dresources/resources.yml @@ -47,6 +47,11 @@ resources: # Same as clusters.node_type_id — see clusters/resource_cluster.go#L333 # s.SchemaPath("node_type_id").SetComputed() + # Config legitimately omits node_type_id when a cluster policy fixes it: jobs/get + # then returns the policy-resolved value in the stored settings (with or without + # apply_policy_default_values), so the drift must be skipped. + # config-remote-sync keeps these fields for remotely added clusters via + # fieldsKeptForSync in bundle/configsync/defaults.go. - field: tasks[*].new_cluster.node_type_id - field: job_clusters[*].new_cluster.node_type_id @@ -79,6 +84,41 @@ resources: # s.SchemaPath("run_as").SetComputed() - field: run_as + # The backend returns PERFORMANCE_OPTIMIZED for all jobs when the field + # is not set, not only for serverless jobs. + - field: performance_target + values: ["PERFORMANCE_OPTIMIZED"] + + # The backend returns timeout_seconds: 0 for tasks when not set. The plan + # already skips this via the all-empty check; the rule is needed so that + # config-remote-sync does not write timeout_seconds: 0 into YAML for + # tasks added remotely. + - field: tasks[*].timeout_seconds + values: [0] + - field: tasks[*].for_each_task.task.timeout_seconds + values: [0] + + # The backend returns disabled: false for tasks when not set. Same as + # timeout_seconds above: inert for the plan, needed by config-remote-sync. + - field: tasks[*].disabled + values: [false] + - field: tasks[*].for_each_task.task.disabled + values: [false] + + # When notifications are not configured, the backend returns + # email_notifications populated with the deprecated + # no_alert_for_skipped_runs: false field. Inert for the plan: the + # all-empty check in addPerFieldActions runs before backend defaults + # and already skips zero values. Needed so config-remote-sync drops the + # leaf (and then prunes the empty email_notifications map) instead of + # writing it into YAML. + - field: email_notifications.no_alert_for_skipped_runs + values: [false] + - field: tasks[*].email_notifications.no_alert_for_skipped_runs + values: [false] + - field: tasks[*].for_each_task.task.email_notifications.no_alert_for_skipped_runs + values: [false] + # https://github.com/databricks/terraform-provider-databricks/blob/4eba541abe1a9f50993ea7b9dd83874207e224a1/jobs/resource_job.go#L521-L524 # s.SchemaPath("task", "notebook_task", "source").SetSuppressDiff() # s.SchemaPath("task", "spark_python_task", "source").SetSuppressDiff() @@ -97,6 +137,12 @@ resources: - field: tasks[*].new_cluster.data_security_mode - field: job_clusters[*].new_cluster.data_security_mode + # Same as clusters.single_user_name: backend sets it when the effective + # data security mode is single-user. + # See https://github.com/databricks/cli/issues/4418 + - field: tasks[*].new_cluster.single_user_name + - field: job_clusters[*].new_cluster.single_user_name + pipelines: recreate_on_changes: - field: storage @@ -138,10 +184,6 @@ resources: # Backend generates storage path like dbfs:/pipelines/ when not set by user. - field: storage - # https://github.com/databricks/terraform-provider-databricks/blob/4eba541abe1a9f50993ea7b9dd83874207e224a1/pipelines/resource_pipeline.go#L218 - # s.SchemaPath("cluster", "node_type_id").SetComputed() - - field: clusters[*].node_type_id - # https://github.com/databricks/terraform-provider-databricks/blob/4eba541abe1a9f50993ea7b9dd83874207e224a1/pipelines/resource_pipeline.go#L219 # s.SchemaPath("cluster", "driver_node_type_id").SetComputed() - field: clusters[*].driver_node_type_id @@ -375,6 +417,14 @@ resources: - field: serialized_dashboard reason: etag_based + # "etag" is output-only and never present in the config. This rule does not + # change deploy behavior: ResourceDashboard.OverrideChangeDesc runs after the + # metadata chain and re-promotes etag drift to Update so that out-of-band + # modifications are detected during deploy. config-remote-sync matches this + # rule directly (not the plan action) to keep etag out of YAML. + - field: etag + reason: output_only + # "dataset_catalog" and "dataset_schema" are write-only fields that are not returned by the server. # They will always differ between local config (which has values) and remote state (which has empty strings). - field: dataset_catalog @@ -464,6 +514,9 @@ resources: # https://github.com/databricks/terraform-provider-databricks/blob/4eba541abe1a9f50993ea7b9dd83874207e224a1/clusters/resource_cluster.go#L333 # s.SchemaPath("node_type_id").SetComputed() + # Config legitimately omits node_type_id when instance_pool_id is set (the API + # rejects specifying both) or a cluster policy fixes it; clusters/get reports the + # actual node type at the top level, so the drift must be skipped. - field: node_type_id # https://github.com/databricks/terraform-provider-databricks/blob/4eba541abe1a9f50993ea7b9dd83874207e224a1/clusters/resource_cluster.go#L334 @@ -478,6 +531,12 @@ resources: # See https://github.com/databricks/cli/issues/4418 - field: single_user_name + # Some backends compute alternate node types for standalone clusters when + # flexibility is not specified in the config (observed on AWS staging + # workspaces). + - field: driver_node_type_flexibility + - field: worker_node_type_flexibility + # We have custom handler for this in cluster.go # https://github.com/databricks/terraform-provider-databricks/blob/4eba541abe1a9f50993ea7b9dd83874207e224a1/clusters/resource_cluster.go#L109-L118 # DataSecurityModeDiffSuppressFunc: suppress when old != "" && new == "" diff --git a/libs/structs/structpath/path.go b/libs/structs/structpath/path.go index 5ae81019ee4..90d1fdb6c60 100644 --- a/libs/structs/structpath/path.go +++ b/libs/structs/structpath/path.go @@ -637,6 +637,15 @@ func MustParsePath(s string) *PathNode { return path } +// MustParsePattern parses a pattern string and panics on error. Wildcards are allowed. +func MustParsePattern(s string) *PatternNode { + pattern, err := ParsePattern(s) + if err != nil { + panic(err) + } + return pattern +} + // isReservedFieldChar checks if character is reserved and cannot be used in field names func isReservedFieldChar(ch byte) bool { switch ch { diff --git a/libs/structs/structpath/path_test.go b/libs/structs/structpath/path_test.go index 2f8fe4bf6be..1d449843768 100644 --- a/libs/structs/structpath/path_test.go +++ b/libs/structs/structpath/path_test.go @@ -567,6 +567,15 @@ func TestParseErrors(t *testing.T) { } } +func TestMustParsePattern(t *testing.T) { + pattern := MustParsePattern("tasks[*].run_if") + assert.Equal(t, "tasks[*].run_if", pattern.String()) + + assert.Panics(t, func() { + MustParsePattern("tasks[") + }) +} + func TestNewIndexPanic(t *testing.T) { defer func() { if r := recover(); r != nil {