From fc72eb7810a92d9ee0e6a3d86c2fa60ecb2907fb Mon Sep 17 00:00:00 2001
From: Steven Tan <cheeyutcy@gmail.com>
Date: Tue, 10 Mar 2026 10:41:23 +0800
Subject: [PATCH] Fix install.sh skill dir append bug and add MCP tool
 references to jobs and UC skills

Consolidates PRs #272, #261, and #259 into a single PR:

- install.sh: Fix Claude skill directory using = instead of +=, which
  overwrites directories from previously processed tools
- databricks-jobs: Add MCP Tool Integration section with manage_jobs
  and manage_job_runs usage examples
- databricks-unity-catalog: Expand MCP Tool Integration with 8 governance
  tool references (manage_uc_objects, grants, tags, storage, connections,
  security_policies, monitors, sharing)
- Fix pre-existing ruff format issues in auth.py and test_sql.py
---
 databricks-skills/databricks-jobs/SKILL.md    | 37 +++++++++++++
 .../databricks-unity-catalog/SKILL.md         | 43 +++++++++++++--
 .../databricks_tools_core/auth.py             |  8 +--
 databricks-tools-core/tests/unit/test_sql.py  | 54 ++++++++-----------
 install.sh                                    |  2 +-
 5 files changed, 101 insertions(+), 43 deletions(-)

diff --git a/databricks-skills/databricks-jobs/SKILL.md b/databricks-skills/databricks-jobs/SKILL.md
index 2f0f8c73..edfeba09 100644
--- a/databricks-skills/databricks-jobs/SKILL.md
+++ b/databricks-skills/databricks-jobs/SKILL.md
@@ -218,6 +218,43 @@ tasks:
         custom_param: "value"
 ```
 
+## MCP Tool Integration
+
+Use these MCP tools for job management:
+
+```python
+# Create or update a job
+manage_jobs(action="create", job_config={
+    "name": "my_etl_job",
+    "tasks": [{"task_key": "extract", "notebook_task": {"notebook_path": "/src/extract"}}]
+})
+
+# List all jobs
+manage_jobs(action="list")
+
+# Get job details
+manage_jobs(action="get", job_id=12345)
+
+# Delete a job
+manage_jobs(action="delete", job_id=12345)
+
+# Run a job immediately
+manage_job_runs(action="run_now", job_id=12345)
+
+# Run with parameters
+manage_job_runs(action="run_now", job_id=12345,
+                job_parameters={"env": "prod", "date": "2024-01-15"})
+
+# Check run status
+manage_job_runs(action="get_run", run_id=67890)
+
+# Cancel a run
+manage_job_runs(action="cancel", run_id=67890)
+
+# List recent runs for a job
+manage_job_runs(action="list_runs", job_id=12345)
+```
+
 ## Common Operations
 
 ### Python SDK Operations
diff --git a/databricks-skills/databricks-unity-catalog/SKILL.md b/databricks-skills/databricks-unity-catalog/SKILL.md
index 30f34e3d..b3c4ccfb 100644
--- a/databricks-skills/databricks-unity-catalog/SKILL.md
+++ b/databricks-skills/databricks-unity-catalog/SKILL.md
@@ -85,11 +85,48 @@ GROUP BY workspace_id, sku_name;
 
 ## MCP Tool Integration
 
-Use `mcp__databricks__execute_sql` for system table queries:
+### Governance Tools
+
+Use these MCP tools for Unity Catalog governance operations:
+
+```python
+# Manage catalogs, schemas, tables, volumes, functions
+manage_uc_objects(action="list", object_type="catalogs")
+manage_uc_objects(action="create", object_type="schema", catalog="main", schema="my_schema")
+manage_uc_objects(action="describe", object_type="table", full_name="main.schema.table")
+
+# Manage grants and permissions
+manage_uc_grants(action="list", securable_type="table", full_name="main.schema.table")
+manage_uc_grants(action="grant", securable_type="schema", full_name="main.my_schema",
+                 principal="data-engineers", privileges=["USE_SCHEMA", "SELECT"])
+
+# Manage tags for classification
+manage_uc_tags(action="set", securable_type="table", full_name="main.schema.table",
+               tags={"pii": "true", "team": "analytics"})
+
+# Manage storage credentials and external locations
+manage_uc_storage(action="list", storage_type="credentials")
+manage_uc_storage(action="list", storage_type="external_locations")
+
+# Manage Lakehouse Federation connections
+manage_uc_connections(action="list")
+
+# Manage row filters and column masks
+manage_uc_security_policies(action="list", securable_type="table", full_name="main.schema.table")
+
+# Manage data quality monitors
+manage_uc_monitors(action="list", table_name="main.schema.table")
+
+# Manage Delta Sharing
+manage_uc_sharing(action="list", sharing_type="shares")
+```
+
+### SQL Queries
+
+Use `execute_sql` for system table queries:
 
 ```python
-# Query lineage
-mcp__databricks__execute_sql(
+execute_sql(
     sql_query="""
         SELECT source_table_full_name, target_table_full_name
         FROM system.access.table_lineage
diff --git a/databricks-tools-core/databricks_tools_core/auth.py b/databricks-tools-core/databricks_tools_core/auth.py
index 21913983..c3db9fb4 100644
--- a/databricks-tools-core/databricks_tools_core/auth.py
+++ b/databricks-tools-core/databricks_tools_core/auth.py
@@ -160,9 +160,7 @@ def get_workspace_client() -> WorkspaceClient:
     # Cross-workspace: explicit token overrides env OAuth so tool operations
     # target the caller-specified workspace instead of the app's own workspace
     if force and host and token:
-        return tag_client(
-            WorkspaceClient(host=host, token=token, auth_type="pat", **product_kwargs)
-        )
+        return tag_client(WorkspaceClient(host=host, token=token, auth_type="pat", **product_kwargs))
 
     # In Databricks Apps (OAuth credentials in env), explicitly use OAuth M2M.
     # Setting auth_type="oauth-m2m" prevents the SDK from also reading
@@ -185,9 +183,7 @@ def get_workspace_client() -> WorkspaceClient:
 
     # Development mode: use explicit token if provided
     if host and token:
-        return tag_client(
-            WorkspaceClient(host=host, token=token, auth_type="pat", **product_kwargs)
-        )
+        return tag_client(WorkspaceClient(host=host, token=token, auth_type="pat", **product_kwargs))
 
     if host:
         return tag_client(WorkspaceClient(host=host, **product_kwargs))
diff --git a/databricks-tools-core/tests/unit/test_sql.py b/databricks-tools-core/tests/unit/test_sql.py
index d1b661c6..42137ba5 100644
--- a/databricks-tools-core/tests/unit/test_sql.py
+++ b/databricks-tools-core/tests/unit/test_sql.py
@@ -121,8 +121,7 @@ def test_executor_without_query_tags_omits_from_api(self, mock_get_client):
         assert "query_tags" not in call_kwargs
 
 
-def _make_warehouse(id, name, state, creator_name="other@example.com",
-                    enable_serverless_compute=False):
+def _make_warehouse(id, name, state, creator_name="other@example.com", enable_serverless_compute=False):
     """Helper to create a mock warehouse object."""
     w = mock.Mock()
     w.id = id
@@ -141,33 +140,29 @@ class TestSortWithinTier:
     def test_serverless_first(self):
         """Serverless warehouses should come before classic ones."""
         classic = _make_warehouse("c1", "Classic WH", State.RUNNING)
-        serverless = _make_warehouse("s1", "Serverless WH", State.RUNNING,
-                                     enable_serverless_compute=True)
+        serverless = _make_warehouse("s1", "Serverless WH", State.RUNNING, enable_serverless_compute=True)
         result = _sort_within_tier([classic, serverless], current_user=None)
         assert result[0].id == "s1"
         assert result[1].id == "c1"
 
     def test_serverless_before_user_owned(self):
         """Serverless should be preferred over user-owned classic."""
-        classic_owned = _make_warehouse("c1", "My WH", State.RUNNING,
-                                        creator_name="me@example.com")
-        serverless_other = _make_warehouse("s1", "Other WH", State.RUNNING,
-                                           creator_name="other@example.com",
-                                           enable_serverless_compute=True)
-        result = _sort_within_tier([classic_owned, serverless_other],
-                                   current_user="me@example.com")
+        classic_owned = _make_warehouse("c1", "My WH", State.RUNNING, creator_name="me@example.com")
+        serverless_other = _make_warehouse(
+            "s1", "Other WH", State.RUNNING, creator_name="other@example.com", enable_serverless_compute=True
+        )
+        result = _sort_within_tier([classic_owned, serverless_other], current_user="me@example.com")
         assert result[0].id == "s1"
 
     def test_serverless_user_owned_first(self):
         """Among serverless, user-owned should come first."""
-        serverless_other = _make_warehouse("s1", "Other Serverless", State.RUNNING,
-                                           creator_name="other@example.com",
-                                           enable_serverless_compute=True)
-        serverless_owned = _make_warehouse("s2", "My Serverless", State.RUNNING,
-                                           creator_name="me@example.com",
-                                           enable_serverless_compute=True)
-        result = _sort_within_tier([serverless_other, serverless_owned],
-                                   current_user="me@example.com")
+        serverless_other = _make_warehouse(
+            "s1", "Other Serverless", State.RUNNING, creator_name="other@example.com", enable_serverless_compute=True
+        )
+        serverless_owned = _make_warehouse(
+            "s2", "My Serverless", State.RUNNING, creator_name="me@example.com", enable_serverless_compute=True
+        )
+        result = _sort_within_tier([serverless_other, serverless_owned], current_user="me@example.com")
         assert result[0].id == "s2"
         assert result[1].id == "s1"
 
@@ -177,8 +172,7 @@ def test_empty_list(self):
     def test_no_current_user(self):
         """Without a current user, only serverless preference applies."""
         classic = _make_warehouse("c1", "Classic", State.RUNNING)
-        serverless = _make_warehouse("s1", "Serverless", State.RUNNING,
-                                     enable_serverless_compute=True)
+        serverless = _make_warehouse("s1", "Serverless", State.RUNNING, enable_serverless_compute=True)
         result = _sort_within_tier([classic, serverless], current_user=None)
         assert result[0].id == "s1"
 
@@ -186,14 +180,12 @@ def test_no_current_user(self):
 class TestGetBestWarehouseServerless:
     """Tests for serverless preference in get_best_warehouse."""
 
-    @mock.patch("databricks_tools_core.sql.warehouse.get_current_username",
-                return_value="me@example.com")
+    @mock.patch("databricks_tools_core.sql.warehouse.get_current_username", return_value="me@example.com")
     @mock.patch("databricks_tools_core.sql.warehouse.get_workspace_client")
     def test_prefers_serverless_within_running_shared(self, mock_client_fn, mock_user):
         """Among running shared warehouses, serverless should be picked."""
         classic_shared = _make_warehouse("c1", "Shared WH", State.RUNNING)
-        serverless_shared = _make_warehouse("s1", "Shared Serverless", State.RUNNING,
-                                            enable_serverless_compute=True)
+        serverless_shared = _make_warehouse("s1", "Shared Serverless", State.RUNNING, enable_serverless_compute=True)
         mock_client = mock.Mock()
         mock_client.warehouses.list.return_value = [classic_shared, serverless_shared]
         mock_client_fn.return_value = mock_client
@@ -201,14 +193,12 @@ def test_prefers_serverless_within_running_shared(self, mock_client_fn, mock_use
         result = get_best_warehouse()
         assert result == "s1"
 
-    @mock.patch("databricks_tools_core.sql.warehouse.get_current_username",
-                return_value="me@example.com")
+    @mock.patch("databricks_tools_core.sql.warehouse.get_current_username", return_value="me@example.com")
     @mock.patch("databricks_tools_core.sql.warehouse.get_workspace_client")
     def test_prefers_serverless_within_running_other(self, mock_client_fn, mock_user):
         """Among running non-shared warehouses, serverless should be picked."""
         classic = _make_warehouse("c1", "My WH", State.RUNNING)
-        serverless = _make_warehouse("s1", "Fast WH", State.RUNNING,
-                                     enable_serverless_compute=True)
+        serverless = _make_warehouse("s1", "Fast WH", State.RUNNING, enable_serverless_compute=True)
         mock_client = mock.Mock()
         mock_client.warehouses.list.return_value = [classic, serverless]
         mock_client_fn.return_value = mock_client
@@ -216,14 +206,12 @@ def test_prefers_serverless_within_running_other(self, mock_client_fn, mock_user
         result = get_best_warehouse()
         assert result == "s1"
 
-    @mock.patch("databricks_tools_core.sql.warehouse.get_current_username",
-                return_value="me@example.com")
+    @mock.patch("databricks_tools_core.sql.warehouse.get_current_username", return_value="me@example.com")
     @mock.patch("databricks_tools_core.sql.warehouse.get_workspace_client")
     def test_tier_order_preserved_over_serverless(self, mock_client_fn, mock_user):
         """A running shared classic should still beat a stopped serverless."""
         running_shared_classic = _make_warehouse("c1", "Shared WH", State.RUNNING)
-        stopped_serverless = _make_warehouse("s1", "Fast WH", State.STOPPED,
-                                             enable_serverless_compute=True)
+        stopped_serverless = _make_warehouse("s1", "Fast WH", State.STOPPED, enable_serverless_compute=True)
         mock_client = mock.Mock()
         mock_client.warehouses.list.return_value = [stopped_serverless, running_shared_classic]
         mock_client_fn.return_value = mock_client
diff --git a/install.sh b/install.sh
index 868f2adb..10ff0f9a 100755
--- a/install.sh
+++ b/install.sh
@@ -690,7 +690,7 @@ install_skills() {
     # Determine target directories (array so paths with spaces work)
     for tool in $TOOLS; do
         case $tool in
-            claude) dirs=("$base_dir/.claude/skills") ;;
+            claude) dirs+=("$base_dir/.claude/skills") ;;
             cursor) echo "$TOOLS" | grep -q claude || dirs+=("$base_dir/.cursor/skills") ;;
             copilot) dirs+=("$base_dir/.github/skills") ;;
             codex) dirs+=("$base_dir/.agents/skills") ;;