Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
bundle:
name: test-bundle-$UNIQUE_NAME

resources:
jobs:
job_one:
max_concurrent_runs: 1
tasks:
- task_key: main
notebook_task:
notebook_path: /Users/{{workspace_user_name}}/job1
new_cluster:
spark_version: $DEFAULT_SPARK_VERSION
node_type_id: $NODE_TYPE_ID
num_workers: 1

job_two:
max_concurrent_runs: 2
tasks:
- task_key: main
notebook_task:
notebook_path: /Users/{{workspace_user_name}}/job2
new_cluster:
spark_version: $DEFAULT_SPARK_VERSION
node_type_id: $NODE_TYPE_ID
num_workers: 1

targets:
default:
mode: development

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

66 changes: 66 additions & 0 deletions acceptance/bundle/config-remote-sync/select_basic/output.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
Uploading bundle files to /Workspace/Users/[USERNAME]/.bundle/test-bundle-[UNIQUE_NAME]/default/files...
Deploying resources...
Updating deployment state...
Deployment complete!

=== Modify both jobs remotely
=== Sync only job_one, selected by its type and deployed resource id
Detected changes in 1 resource(s):

Resource: resources.jobs.job_one
max_concurrent_runs: replace



=== Only job_one is updated; job_two is left untouched

>>> diff.py databricks.yml.backup databricks.yml
--- databricks.yml.backup
+++ databricks.yml
@@ -5,5 +5,5 @@
jobs:
job_one:
- max_concurrent_runs: 1
+ max_concurrent_runs: 5
tasks:
- task_key: main

=== Selecting job_one again is idempotent
No changes detected.


=== Unfiltered sync still detects the job_two drift (no lost updates)
Detected changes in 1 resource(s):

Resource: resources.jobs.job_two
max_concurrent_runs: replace



=== An unknown resource id is rejected
>>> [CLI] bundle config-remote-sync --select jobs:no-such-id-123
Error: no deployed jobs resource with id no-such-id-123

Exit code: 1

=== A selector without a type is rejected
>>> [CLI] bundle config-remote-sync --select no-such-id-123
Error: invalid --select value "no-such-id-123", expected <type>:<id> (e.g. jobs:[NUMID])

Exit code: 1

=== An id that exists under a different type is rejected (no cross-type collision)
>>> [CLI] bundle config-remote-sync --select pipelines:[JOB_ONE_ID]
Error: no deployed pipelines resource with id [JOB_ONE_ID]

Exit code: 1

>>> [CLI] bundle destroy --auto-approve
The following resources will be deleted:
delete resources.jobs.job_one
delete resources.jobs.job_two

All files and directories at the following location will be deleted: /Workspace/Users/[USERNAME]/.bundle/test-bundle-[UNIQUE_NAME]/default

Deleting files...
Destroy complete!
48 changes: 48 additions & 0 deletions acceptance/bundle/config-remote-sync/select_basic/script
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
#!/bin/bash

envsubst < databricks.yml.tmpl > databricks.yml

cleanup() {
trace $CLI bundle destroy --auto-approve
}
trap cleanup EXIT

$CLI bundle deploy
job_one_id="$(read_id.py job_one)"
job_two_id="$(read_id.py job_two)"

title "Modify both jobs remotely"
edit_resource.py jobs $job_one_id <<EOF
r["max_concurrent_runs"] = 5
EOF

edit_resource.py jobs $job_two_id <<EOF
r["max_concurrent_runs"] = 10
EOF

title "Sync only job_one, selected by its type and deployed resource id"
echo
cp databricks.yml databricks.yml.backup
$CLI bundle config-remote-sync --select "jobs:$job_one_id" --save

title "Only job_one is updated; job_two is left untouched"
echo
trace diff.py databricks.yml.backup databricks.yml
rm databricks.yml.backup

title "Selecting job_one again is idempotent"
echo
$CLI bundle config-remote-sync --select "jobs:$job_one_id"

title "Unfiltered sync still detects the job_two drift (no lost updates)"
echo
$CLI bundle config-remote-sync

title "An unknown resource id is rejected"
errcode trace $CLI bundle config-remote-sync --select jobs:no-such-id-123

title "A selector without a type is rejected"
errcode trace $CLI bundle config-remote-sync --select no-such-id-123

title "An id that exists under a different type is rejected (no cross-type collision)"
errcode trace $CLI bundle config-remote-sync --select "pipelines:$job_one_id"
10 changes: 10 additions & 0 deletions acceptance/bundle/config-remote-sync/select_basic/test.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
Cloud = true

RecordRequests = false
Ignore = [".databricks", "databricks.yml", "databricks.yml.backup"]

[Env]
DATABRICKS_BUNDLE_ENABLE_EXPERIMENTAL_YAML_SYNC = "true"

[EnvMatrix]
DATABRICKS_BUNDLE_ENGINE = ["direct", "terraform"]
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
bundle:
name: test-bundle-$UNIQUE_NAME

resources:
jobs:
job_a:
max_concurrent_runs: 1
tasks:
- task_key: main
notebook_task:
notebook_path: /Users/{{workspace_user_name}}/job_a
new_cluster:
spark_version: $DEFAULT_SPARK_VERSION
node_type_id: $NODE_TYPE_ID
num_workers: 1

job_b:
max_concurrent_runs: 1
tasks:
- task_key: main
notebook_task:
notebook_path: /Users/{{workspace_user_name}}/job_b
new_cluster:
spark_version: $DEFAULT_SPARK_VERSION
node_type_id: $NODE_TYPE_ID
num_workers: 1

job_c:
max_concurrent_runs: 1
tasks:
- task_key: main
notebook_task:
notebook_path: /Users/{{workspace_user_name}}/job_c
new_cluster:
spark_version: $DEFAULT_SPARK_VERSION
node_type_id: $NODE_TYPE_ID
num_workers: 1

targets:
default:
mode: development

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

74 changes: 74 additions & 0 deletions acceptance/bundle/config-remote-sync/select_multiple/output.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
Uploading bundle files to /Workspace/Users/[USERNAME]/.bundle/test-bundle-[UNIQUE_NAME]/default/files...
Deploying resources...
Updating deployment state...
Deployment complete!

=== Modify all three jobs remotely
=== Comma-separated selectors preview two of three resources
Detected changes in 2 resource(s):

Resource: resources.jobs.job_a
max_concurrent_runs: replace

Resource: resources.jobs.job_b
max_concurrent_runs: replace



=== Repeating the same selector dedupes silently
Detected changes in 1 resource(s):

Resource: resources.jobs.job_c
max_concurrent_runs: replace



=== Save with repeated --select flags
Detected changes in 2 resource(s):

Resource: resources.jobs.job_a
max_concurrent_runs: replace

Resource: resources.jobs.job_b
max_concurrent_runs: replace



=== job_a and job_b are updated, job_c is untouched

>>> diff.py databricks.yml.backup databricks.yml
--- databricks.yml.backup
+++ databricks.yml
@@ -5,5 +5,5 @@
jobs:
job_a:
- max_concurrent_runs: 1
+ max_concurrent_runs: 5
tasks:
- task_key: main
@@ -16,5 +16,5 @@

job_b:
- max_concurrent_runs: 1
+ max_concurrent_runs: 5
tasks:
- task_key: main

=== Unfiltered sync still detects the job_c drift
Detected changes in 1 resource(s):

Resource: resources.jobs.job_c
max_concurrent_runs: replace



>>> [CLI] bundle destroy --auto-approve
The following resources will be deleted:
delete resources.jobs.job_a
delete resources.jobs.job_b
delete resources.jobs.job_c

All files and directories at the following location will be deleted: /Workspace/Users/[USERNAME]/.bundle/test-bundle-[UNIQUE_NAME]/default

Deleting files...
Destroy complete!
42 changes: 42 additions & 0 deletions acceptance/bundle/config-remote-sync/select_multiple/script
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
#!/bin/bash

envsubst < databricks.yml.tmpl > databricks.yml

cleanup() {
trace $CLI bundle destroy --auto-approve
}
trap cleanup EXIT

$CLI bundle deploy
job_a_id="$(read_id.py job_a)"
job_b_id="$(read_id.py job_b)"
job_c_id="$(read_id.py job_c)"

title "Modify all three jobs remotely"
for id in $job_a_id $job_b_id $job_c_id; do
edit_resource.py jobs $id <<EOF
r["max_concurrent_runs"] = 5
EOF
done

title "Comma-separated selectors preview two of three resources"
echo
$CLI bundle config-remote-sync --select "jobs:$job_a_id,jobs:$job_b_id"

title "Repeating the same selector dedupes silently"
echo
$CLI bundle config-remote-sync --select "jobs:$job_c_id,jobs:$job_c_id"

title "Save with repeated --select flags"
echo
cp databricks.yml databricks.yml.backup
$CLI bundle config-remote-sync --select "jobs:$job_a_id" --select "jobs:$job_b_id" --save

title "job_a and job_b are updated, job_c is untouched"
echo
trace diff.py databricks.yml.backup databricks.yml
rm databricks.yml.backup

title "Unfiltered sync still detects the job_c drift"
echo
$CLI bundle config-remote-sync
10 changes: 10 additions & 0 deletions acceptance/bundle/config-remote-sync/select_multiple/test.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
Cloud = true

RecordRequests = false
Ignore = [".databricks", "databricks.yml", "databricks.yml.backup"]

[Env]
DATABRICKS_BUNDLE_ENABLE_EXPERIMENTAL_YAML_SYNC = "true"

[EnvMatrix]
DATABRICKS_BUNDLE_ENGINE = ["direct", "terraform"]
39 changes: 23 additions & 16 deletions bundle/configsync/diff.go
Original file line number Diff line number Diff line change
Expand Up @@ -123,28 +123,35 @@ func convertChangeDesc(path string, cd *deployplan.ChangeDesc) (*ConfigChangeDes
}, nil
}

// DetectChanges compares current remote state with the last deployed state
// and returns a map of resource changes.
func DetectChanges(ctx context.Context, b *bundle.Bundle, engine engine.EngineType) (Changes, error) {
changes := make(Changes)

err := ensureSnapshotAvailable(ctx, b, engine)
if err != nil {
// OpenDeploymentState returns the deployment bundle whose StateDB is open for
// reading. For the direct engine the caller (process.go) has already opened
// b.DeploymentBundle; for the terraform engine the config snapshot is opened
// here. Both yield read-mode state, so GetResourceID and Data.State are usable.
// Open the state once per command and pass it to DetectChanges and
// ResolveResourceSelectors so the terraform snapshot is read only once.
func OpenDeploymentState(ctx context.Context, b *bundle.Bundle, engine engine.EngineType) (*direct.DeploymentBundle, error) {
if err := ensureSnapshotAvailable(ctx, b, engine); err != nil {
return nil, fmt.Errorf("state snapshot not available: %w", err)
}

var deployBundle *direct.DeploymentBundle
if engine.IsDirect() {
// For direct engine, state is already opened by the caller (process.go).
deployBundle = &b.DeploymentBundle
} else {
deployBundle = &direct.DeploymentBundle{}
_, statePath := b.StateFilenameConfigSnapshot(ctx)
if err := deployBundle.StateDB.Open(ctx, statePath, dstate.WithRecovery(true), dstate.WithWrite(false)); err != nil {
return nil, fmt.Errorf("failed to open state: %w", err)
}
return &b.DeploymentBundle, nil
}

deployBundle := &direct.DeploymentBundle{}
_, statePath := b.StateFilenameConfigSnapshot(ctx)
if err := deployBundle.StateDB.Open(ctx, statePath, dstate.WithRecovery(true), dstate.WithWrite(false)); err != nil {
return nil, fmt.Errorf("failed to open state: %w", err)
}
return deployBundle, nil
}

// DetectChanges compares current remote state with the last deployed state
// and returns a map of resource changes. deployBundle must already be open
// (see OpenDeploymentState).
func DetectChanges(ctx context.Context, b *bundle.Bundle, deployBundle *direct.DeploymentBundle) (Changes, error) {
changes := make(Changes)

plan, err := deployBundle.CalculatePlan(ctx, b.WorkspaceClient(ctx), &b.Config)
if err != nil {
return nil, fmt.Errorf("failed to calculate plan: %w", err)
Expand Down
Loading
Loading