Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 37 additions & 0 deletions databricks-skills/databricks-jobs/SKILL.md
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,43 @@ tasks:
custom_param: "value"
```

## MCP Tool Integration

Use these MCP tools for job management:

```python
# Create or update a job
manage_jobs(action="create", job_config={
"name": "my_etl_job",
"tasks": [{"task_key": "extract", "notebook_task": {"notebook_path": "/src/extract"}}]
})

# List all jobs
manage_jobs(action="list")

# Get job details
manage_jobs(action="get", job_id=12345)

# Delete a job
manage_jobs(action="delete", job_id=12345)

# Run a job immediately
manage_job_runs(action="run_now", job_id=12345)

# Run with parameters
manage_job_runs(action="run_now", job_id=12345,
job_parameters={"env": "prod", "date": "2024-01-15"})

# Check run status
manage_job_runs(action="get_run", run_id=67890)

# Cancel a run
manage_job_runs(action="cancel", run_id=67890)

# List recent runs for a job
manage_job_runs(action="list_runs", job_id=12345)
```

## Common Operations

### Python SDK Operations
Expand Down
43 changes: 40 additions & 3 deletions databricks-skills/databricks-unity-catalog/SKILL.md
Original file line number Diff line number Diff line change
Expand Up @@ -85,11 +85,48 @@ GROUP BY workspace_id, sku_name;

## MCP Tool Integration

Use `mcp__databricks__execute_sql` for system table queries:
### Governance Tools

Use these MCP tools for Unity Catalog governance operations:

```python
# Manage catalogs, schemas, tables, volumes, functions
manage_uc_objects(action="list", object_type="catalogs")
manage_uc_objects(action="create", object_type="schema", catalog="main", schema="my_schema")
manage_uc_objects(action="describe", object_type="table", full_name="main.schema.table")

# Manage grants and permissions
manage_uc_grants(action="list", securable_type="table", full_name="main.schema.table")
manage_uc_grants(action="grant", securable_type="schema", full_name="main.my_schema",
principal="data-engineers", privileges=["USE_SCHEMA", "SELECT"])

# Manage tags for classification
manage_uc_tags(action="set", securable_type="table", full_name="main.schema.table",
tags={"pii": "true", "team": "analytics"})

# Manage storage credentials and external locations
manage_uc_storage(action="list", storage_type="credentials")
manage_uc_storage(action="list", storage_type="external_locations")

# Manage Lakehouse Federation connections
manage_uc_connections(action="list")

# Manage row filters and column masks
manage_uc_security_policies(action="list", securable_type="table", full_name="main.schema.table")

# Manage data quality monitors
manage_uc_monitors(action="list", table_name="main.schema.table")

# Manage Delta Sharing
manage_uc_sharing(action="list", sharing_type="shares")
```

### SQL Queries

Use `execute_sql` for system table queries:

```python
# Query lineage
mcp__databricks__execute_sql(
execute_sql(
sql_query="""
SELECT source_table_full_name, target_table_full_name
FROM system.access.table_lineage
Expand Down
8 changes: 2 additions & 6 deletions databricks-tools-core/databricks_tools_core/auth.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,9 +160,7 @@ def get_workspace_client() -> WorkspaceClient:
# Cross-workspace: explicit token overrides env OAuth so tool operations
# target the caller-specified workspace instead of the app's own workspace
if force and host and token:
return tag_client(
WorkspaceClient(host=host, token=token, auth_type="pat", **product_kwargs)
)
return tag_client(WorkspaceClient(host=host, token=token, auth_type="pat", **product_kwargs))

# In Databricks Apps (OAuth credentials in env), explicitly use OAuth M2M.
# Setting auth_type="oauth-m2m" prevents the SDK from also reading
Expand All @@ -185,9 +183,7 @@ def get_workspace_client() -> WorkspaceClient:

# Development mode: use explicit token if provided
if host and token:
return tag_client(
WorkspaceClient(host=host, token=token, auth_type="pat", **product_kwargs)
)
return tag_client(WorkspaceClient(host=host, token=token, auth_type="pat", **product_kwargs))

if host:
return tag_client(WorkspaceClient(host=host, **product_kwargs))
Expand Down
54 changes: 21 additions & 33 deletions databricks-tools-core/tests/unit/test_sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,8 +121,7 @@ def test_executor_without_query_tags_omits_from_api(self, mock_get_client):
assert "query_tags" not in call_kwargs


def _make_warehouse(id, name, state, creator_name="other@example.com",
enable_serverless_compute=False):
def _make_warehouse(id, name, state, creator_name="other@example.com", enable_serverless_compute=False):
"""Helper to create a mock warehouse object."""
w = mock.Mock()
w.id = id
Expand All @@ -141,33 +140,29 @@ class TestSortWithinTier:
def test_serverless_first(self):
"""Serverless warehouses should come before classic ones."""
classic = _make_warehouse("c1", "Classic WH", State.RUNNING)
serverless = _make_warehouse("s1", "Serverless WH", State.RUNNING,
enable_serverless_compute=True)
serverless = _make_warehouse("s1", "Serverless WH", State.RUNNING, enable_serverless_compute=True)
result = _sort_within_tier([classic, serverless], current_user=None)
assert result[0].id == "s1"
assert result[1].id == "c1"

def test_serverless_before_user_owned(self):
"""Serverless should be preferred over user-owned classic."""
classic_owned = _make_warehouse("c1", "My WH", State.RUNNING,
creator_name="me@example.com")
serverless_other = _make_warehouse("s1", "Other WH", State.RUNNING,
creator_name="other@example.com",
enable_serverless_compute=True)
result = _sort_within_tier([classic_owned, serverless_other],
current_user="me@example.com")
classic_owned = _make_warehouse("c1", "My WH", State.RUNNING, creator_name="me@example.com")
serverless_other = _make_warehouse(
"s1", "Other WH", State.RUNNING, creator_name="other@example.com", enable_serverless_compute=True
)
result = _sort_within_tier([classic_owned, serverless_other], current_user="me@example.com")
assert result[0].id == "s1"

def test_serverless_user_owned_first(self):
"""Among serverless, user-owned should come first."""
serverless_other = _make_warehouse("s1", "Other Serverless", State.RUNNING,
creator_name="other@example.com",
enable_serverless_compute=True)
serverless_owned = _make_warehouse("s2", "My Serverless", State.RUNNING,
creator_name="me@example.com",
enable_serverless_compute=True)
result = _sort_within_tier([serverless_other, serverless_owned],
current_user="me@example.com")
serverless_other = _make_warehouse(
"s1", "Other Serverless", State.RUNNING, creator_name="other@example.com", enable_serverless_compute=True
)
serverless_owned = _make_warehouse(
"s2", "My Serverless", State.RUNNING, creator_name="me@example.com", enable_serverless_compute=True
)
result = _sort_within_tier([serverless_other, serverless_owned], current_user="me@example.com")
assert result[0].id == "s2"
assert result[1].id == "s1"

Expand All @@ -177,53 +172,46 @@ def test_empty_list(self):
def test_no_current_user(self):
"""Without a current user, only serverless preference applies."""
classic = _make_warehouse("c1", "Classic", State.RUNNING)
serverless = _make_warehouse("s1", "Serverless", State.RUNNING,
enable_serverless_compute=True)
serverless = _make_warehouse("s1", "Serverless", State.RUNNING, enable_serverless_compute=True)
result = _sort_within_tier([classic, serverless], current_user=None)
assert result[0].id == "s1"


class TestGetBestWarehouseServerless:
"""Tests for serverless preference in get_best_warehouse."""

@mock.patch("databricks_tools_core.sql.warehouse.get_current_username",
return_value="me@example.com")
@mock.patch("databricks_tools_core.sql.warehouse.get_current_username", return_value="me@example.com")
@mock.patch("databricks_tools_core.sql.warehouse.get_workspace_client")
def test_prefers_serverless_within_running_shared(self, mock_client_fn, mock_user):
"""Among running shared warehouses, serverless should be picked."""
classic_shared = _make_warehouse("c1", "Shared WH", State.RUNNING)
serverless_shared = _make_warehouse("s1", "Shared Serverless", State.RUNNING,
enable_serverless_compute=True)
serverless_shared = _make_warehouse("s1", "Shared Serverless", State.RUNNING, enable_serverless_compute=True)
mock_client = mock.Mock()
mock_client.warehouses.list.return_value = [classic_shared, serverless_shared]
mock_client_fn.return_value = mock_client

result = get_best_warehouse()
assert result == "s1"

@mock.patch("databricks_tools_core.sql.warehouse.get_current_username",
return_value="me@example.com")
@mock.patch("databricks_tools_core.sql.warehouse.get_current_username", return_value="me@example.com")
@mock.patch("databricks_tools_core.sql.warehouse.get_workspace_client")
def test_prefers_serverless_within_running_other(self, mock_client_fn, mock_user):
"""Among running non-shared warehouses, serverless should be picked."""
classic = _make_warehouse("c1", "My WH", State.RUNNING)
serverless = _make_warehouse("s1", "Fast WH", State.RUNNING,
enable_serverless_compute=True)
serverless = _make_warehouse("s1", "Fast WH", State.RUNNING, enable_serverless_compute=True)
mock_client = mock.Mock()
mock_client.warehouses.list.return_value = [classic, serverless]
mock_client_fn.return_value = mock_client

result = get_best_warehouse()
assert result == "s1"

@mock.patch("databricks_tools_core.sql.warehouse.get_current_username",
return_value="me@example.com")
@mock.patch("databricks_tools_core.sql.warehouse.get_current_username", return_value="me@example.com")
@mock.patch("databricks_tools_core.sql.warehouse.get_workspace_client")
def test_tier_order_preserved_over_serverless(self, mock_client_fn, mock_user):
"""A running shared classic should still beat a stopped serverless."""
running_shared_classic = _make_warehouse("c1", "Shared WH", State.RUNNING)
stopped_serverless = _make_warehouse("s1", "Fast WH", State.STOPPED,
enable_serverless_compute=True)
stopped_serverless = _make_warehouse("s1", "Fast WH", State.STOPPED, enable_serverless_compute=True)
mock_client = mock.Mock()
mock_client.warehouses.list.return_value = [stopped_serverless, running_shared_classic]
mock_client_fn.return_value = mock_client
Expand Down
2 changes: 1 addition & 1 deletion install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -690,7 +690,7 @@ install_skills() {
# Determine target directories (array so paths with spaces work)
for tool in $TOOLS; do
case $tool in
claude) dirs=("$base_dir/.claude/skills") ;;
claude) dirs+=("$base_dir/.claude/skills") ;;
cursor) echo "$TOOLS" | grep -q claude || dirs+=("$base_dir/.cursor/skills") ;;
copilot) dirs+=("$base_dir/.github/skills") ;;
codex) dirs+=("$base_dir/.agents/skills") ;;
Expand Down