From fc72eb7810a92d9ee0e6a3d86c2fa60ecb2907fb Mon Sep 17 00:00:00 2001 From: Steven Tan Date: Tue, 10 Mar 2026 10:41:23 +0800 Subject: [PATCH] Fix install.sh skill dir append bug and add MCP tool references to jobs and UC skills Consolidates PRs #272, #261, and #259 into a single PR: - install.sh: Fix Claude skill directory using = instead of +=, which overwrites directories from previously processed tools - databricks-jobs: Add MCP Tool Integration section with manage_jobs and manage_job_runs usage examples - databricks-unity-catalog: Expand MCP Tool Integration with 8 governance tool references (manage_uc_objects, grants, tags, storage, connections, security_policies, monitors, sharing) - Fix pre-existing ruff format issues in auth.py and test_sql.py --- databricks-skills/databricks-jobs/SKILL.md | 37 +++++++++++++ .../databricks-unity-catalog/SKILL.md | 43 +++++++++++++-- .../databricks_tools_core/auth.py | 8 +-- databricks-tools-core/tests/unit/test_sql.py | 54 ++++++++----------- install.sh | 2 +- 5 files changed, 101 insertions(+), 43 deletions(-) diff --git a/databricks-skills/databricks-jobs/SKILL.md b/databricks-skills/databricks-jobs/SKILL.md index 2f0f8c73..edfeba09 100644 --- a/databricks-skills/databricks-jobs/SKILL.md +++ b/databricks-skills/databricks-jobs/SKILL.md @@ -218,6 +218,43 @@ tasks: custom_param: "value" ``` +## MCP Tool Integration + +Use these MCP tools for job management: + +```python +# Create or update a job +manage_jobs(action="create", job_config={ + "name": "my_etl_job", + "tasks": [{"task_key": "extract", "notebook_task": {"notebook_path": "/src/extract"}}] +}) + +# List all jobs +manage_jobs(action="list") + +# Get job details +manage_jobs(action="get", job_id=12345) + +# Delete a job +manage_jobs(action="delete", job_id=12345) + +# Run a job immediately +manage_job_runs(action="run_now", job_id=12345) + +# Run with parameters +manage_job_runs(action="run_now", job_id=12345, + job_parameters={"env": "prod", "date": "2024-01-15"}) + +# Check run status +manage_job_runs(action="get_run", run_id=67890) + +# Cancel a run +manage_job_runs(action="cancel", run_id=67890) + +# List recent runs for a job +manage_job_runs(action="list_runs", job_id=12345) +``` + ## Common Operations ### Python SDK Operations diff --git a/databricks-skills/databricks-unity-catalog/SKILL.md b/databricks-skills/databricks-unity-catalog/SKILL.md index 30f34e3d..b3c4ccfb 100644 --- a/databricks-skills/databricks-unity-catalog/SKILL.md +++ b/databricks-skills/databricks-unity-catalog/SKILL.md @@ -85,11 +85,48 @@ GROUP BY workspace_id, sku_name; ## MCP Tool Integration -Use `mcp__databricks__execute_sql` for system table queries: +### Governance Tools + +Use these MCP tools for Unity Catalog governance operations: + +```python +# Manage catalogs, schemas, tables, volumes, functions +manage_uc_objects(action="list", object_type="catalogs") +manage_uc_objects(action="create", object_type="schema", catalog="main", schema="my_schema") +manage_uc_objects(action="describe", object_type="table", full_name="main.schema.table") + +# Manage grants and permissions +manage_uc_grants(action="list", securable_type="table", full_name="main.schema.table") +manage_uc_grants(action="grant", securable_type="schema", full_name="main.my_schema", + principal="data-engineers", privileges=["USE_SCHEMA", "SELECT"]) + +# Manage tags for classification +manage_uc_tags(action="set", securable_type="table", full_name="main.schema.table", + tags={"pii": "true", "team": "analytics"}) + +# Manage storage credentials and external locations +manage_uc_storage(action="list", storage_type="credentials") +manage_uc_storage(action="list", storage_type="external_locations") + +# Manage Lakehouse Federation connections +manage_uc_connections(action="list") + +# Manage row filters and column masks +manage_uc_security_policies(action="list", securable_type="table", full_name="main.schema.table") + +# Manage data quality monitors +manage_uc_monitors(action="list", table_name="main.schema.table") + +# Manage Delta Sharing +manage_uc_sharing(action="list", sharing_type="shares") +``` + +### SQL Queries + +Use `execute_sql` for system table queries: ```python -# Query lineage -mcp__databricks__execute_sql( +execute_sql( sql_query=""" SELECT source_table_full_name, target_table_full_name FROM system.access.table_lineage diff --git a/databricks-tools-core/databricks_tools_core/auth.py b/databricks-tools-core/databricks_tools_core/auth.py index 21913983..c3db9fb4 100644 --- a/databricks-tools-core/databricks_tools_core/auth.py +++ b/databricks-tools-core/databricks_tools_core/auth.py @@ -160,9 +160,7 @@ def get_workspace_client() -> WorkspaceClient: # Cross-workspace: explicit token overrides env OAuth so tool operations # target the caller-specified workspace instead of the app's own workspace if force and host and token: - return tag_client( - WorkspaceClient(host=host, token=token, auth_type="pat", **product_kwargs) - ) + return tag_client(WorkspaceClient(host=host, token=token, auth_type="pat", **product_kwargs)) # In Databricks Apps (OAuth credentials in env), explicitly use OAuth M2M. # Setting auth_type="oauth-m2m" prevents the SDK from also reading @@ -185,9 +183,7 @@ def get_workspace_client() -> WorkspaceClient: # Development mode: use explicit token if provided if host and token: - return tag_client( - WorkspaceClient(host=host, token=token, auth_type="pat", **product_kwargs) - ) + return tag_client(WorkspaceClient(host=host, token=token, auth_type="pat", **product_kwargs)) if host: return tag_client(WorkspaceClient(host=host, **product_kwargs)) diff --git a/databricks-tools-core/tests/unit/test_sql.py b/databricks-tools-core/tests/unit/test_sql.py index d1b661c6..42137ba5 100644 --- a/databricks-tools-core/tests/unit/test_sql.py +++ b/databricks-tools-core/tests/unit/test_sql.py @@ -121,8 +121,7 @@ def test_executor_without_query_tags_omits_from_api(self, mock_get_client): assert "query_tags" not in call_kwargs -def _make_warehouse(id, name, state, creator_name="other@example.com", - enable_serverless_compute=False): +def _make_warehouse(id, name, state, creator_name="other@example.com", enable_serverless_compute=False): """Helper to create a mock warehouse object.""" w = mock.Mock() w.id = id @@ -141,33 +140,29 @@ class TestSortWithinTier: def test_serverless_first(self): """Serverless warehouses should come before classic ones.""" classic = _make_warehouse("c1", "Classic WH", State.RUNNING) - serverless = _make_warehouse("s1", "Serverless WH", State.RUNNING, - enable_serverless_compute=True) + serverless = _make_warehouse("s1", "Serverless WH", State.RUNNING, enable_serverless_compute=True) result = _sort_within_tier([classic, serverless], current_user=None) assert result[0].id == "s1" assert result[1].id == "c1" def test_serverless_before_user_owned(self): """Serverless should be preferred over user-owned classic.""" - classic_owned = _make_warehouse("c1", "My WH", State.RUNNING, - creator_name="me@example.com") - serverless_other = _make_warehouse("s1", "Other WH", State.RUNNING, - creator_name="other@example.com", - enable_serverless_compute=True) - result = _sort_within_tier([classic_owned, serverless_other], - current_user="me@example.com") + classic_owned = _make_warehouse("c1", "My WH", State.RUNNING, creator_name="me@example.com") + serverless_other = _make_warehouse( + "s1", "Other WH", State.RUNNING, creator_name="other@example.com", enable_serverless_compute=True + ) + result = _sort_within_tier([classic_owned, serverless_other], current_user="me@example.com") assert result[0].id == "s1" def test_serverless_user_owned_first(self): """Among serverless, user-owned should come first.""" - serverless_other = _make_warehouse("s1", "Other Serverless", State.RUNNING, - creator_name="other@example.com", - enable_serverless_compute=True) - serverless_owned = _make_warehouse("s2", "My Serverless", State.RUNNING, - creator_name="me@example.com", - enable_serverless_compute=True) - result = _sort_within_tier([serverless_other, serverless_owned], - current_user="me@example.com") + serverless_other = _make_warehouse( + "s1", "Other Serverless", State.RUNNING, creator_name="other@example.com", enable_serverless_compute=True + ) + serverless_owned = _make_warehouse( + "s2", "My Serverless", State.RUNNING, creator_name="me@example.com", enable_serverless_compute=True + ) + result = _sort_within_tier([serverless_other, serverless_owned], current_user="me@example.com") assert result[0].id == "s2" assert result[1].id == "s1" @@ -177,8 +172,7 @@ def test_empty_list(self): def test_no_current_user(self): """Without a current user, only serverless preference applies.""" classic = _make_warehouse("c1", "Classic", State.RUNNING) - serverless = _make_warehouse("s1", "Serverless", State.RUNNING, - enable_serverless_compute=True) + serverless = _make_warehouse("s1", "Serverless", State.RUNNING, enable_serverless_compute=True) result = _sort_within_tier([classic, serverless], current_user=None) assert result[0].id == "s1" @@ -186,14 +180,12 @@ def test_no_current_user(self): class TestGetBestWarehouseServerless: """Tests for serverless preference in get_best_warehouse.""" - @mock.patch("databricks_tools_core.sql.warehouse.get_current_username", - return_value="me@example.com") + @mock.patch("databricks_tools_core.sql.warehouse.get_current_username", return_value="me@example.com") @mock.patch("databricks_tools_core.sql.warehouse.get_workspace_client") def test_prefers_serverless_within_running_shared(self, mock_client_fn, mock_user): """Among running shared warehouses, serverless should be picked.""" classic_shared = _make_warehouse("c1", "Shared WH", State.RUNNING) - serverless_shared = _make_warehouse("s1", "Shared Serverless", State.RUNNING, - enable_serverless_compute=True) + serverless_shared = _make_warehouse("s1", "Shared Serverless", State.RUNNING, enable_serverless_compute=True) mock_client = mock.Mock() mock_client.warehouses.list.return_value = [classic_shared, serverless_shared] mock_client_fn.return_value = mock_client @@ -201,14 +193,12 @@ def test_prefers_serverless_within_running_shared(self, mock_client_fn, mock_use result = get_best_warehouse() assert result == "s1" - @mock.patch("databricks_tools_core.sql.warehouse.get_current_username", - return_value="me@example.com") + @mock.patch("databricks_tools_core.sql.warehouse.get_current_username", return_value="me@example.com") @mock.patch("databricks_tools_core.sql.warehouse.get_workspace_client") def test_prefers_serverless_within_running_other(self, mock_client_fn, mock_user): """Among running non-shared warehouses, serverless should be picked.""" classic = _make_warehouse("c1", "My WH", State.RUNNING) - serverless = _make_warehouse("s1", "Fast WH", State.RUNNING, - enable_serverless_compute=True) + serverless = _make_warehouse("s1", "Fast WH", State.RUNNING, enable_serverless_compute=True) mock_client = mock.Mock() mock_client.warehouses.list.return_value = [classic, serverless] mock_client_fn.return_value = mock_client @@ -216,14 +206,12 @@ def test_prefers_serverless_within_running_other(self, mock_client_fn, mock_user result = get_best_warehouse() assert result == "s1" - @mock.patch("databricks_tools_core.sql.warehouse.get_current_username", - return_value="me@example.com") + @mock.patch("databricks_tools_core.sql.warehouse.get_current_username", return_value="me@example.com") @mock.patch("databricks_tools_core.sql.warehouse.get_workspace_client") def test_tier_order_preserved_over_serverless(self, mock_client_fn, mock_user): """A running shared classic should still beat a stopped serverless.""" running_shared_classic = _make_warehouse("c1", "Shared WH", State.RUNNING) - stopped_serverless = _make_warehouse("s1", "Fast WH", State.STOPPED, - enable_serverless_compute=True) + stopped_serverless = _make_warehouse("s1", "Fast WH", State.STOPPED, enable_serverless_compute=True) mock_client = mock.Mock() mock_client.warehouses.list.return_value = [stopped_serverless, running_shared_classic] mock_client_fn.return_value = mock_client diff --git a/install.sh b/install.sh index 868f2adb..10ff0f9a 100755 --- a/install.sh +++ b/install.sh @@ -690,7 +690,7 @@ install_skills() { # Determine target directories (array so paths with spaces work) for tool in $TOOLS; do case $tool in - claude) dirs=("$base_dir/.claude/skills") ;; + claude) dirs+=("$base_dir/.claude/skills") ;; cursor) echo "$TOOLS" | grep -q claude || dirs+=("$base_dir/.cursor/skills") ;; copilot) dirs+=("$base_dir/.github/skills") ;; codex) dirs+=("$base_dir/.agents/skills") ;;