diff --git a/databricks-skills/databricks-config/SKILL.md b/databricks-skills/databricks-config/SKILL.md index 88382c33..34a561a2 100644 --- a/databricks-skills/databricks-config/SKILL.md +++ b/databricks-skills/databricks-config/SKILL.md @@ -20,3 +20,37 @@ Use the `manage_workspace` MCP tool for all workspace operations. Do NOT edit `~ 4. Present the result. For `status`/`switch`/`login`: show host, profile, username. For `list`: formatted table with the active profile marked. > **Note:** The switch is session-scoped — it resets on MCP server restart. For permanent profile setup, use `databricks auth login -p ` and update `~/.databrickscfg` with `cluster_id` or `serverless_compute_id = auto`. + +## Secrets Management + +Use Databricks Secrets to store API keys, tokens, and credentials securely. Secrets are never exposed in plaintext via the API — only metadata is returned. + +### Quick Reference + +```python +from databricks.sdk import WorkspaceClient +w = WorkspaceClient() + +# Create scope → store secret → retrieve in notebook +w.secrets.create_scope(scope="my-scope") +w.secrets.put_secret(scope="my-scope", key="api-key", string_value="sk-...") + +# In notebooks: dbutils.secrets.get(scope="my-scope", key="api-key") +``` + +### Secret ACLs + +```python +# Grant READ to a group +w.secrets.put_acl(scope="my-scope", principal="data-team", permission="READ") +# Permissions: READ (get values), WRITE (put/delete secrets), MANAGE (full control + ACLs) +``` + +### CLI Commands + +```bash +databricks secrets create-scope my-scope +databricks secrets put-secret my-scope api-key --string-value "sk-..." +databricks secrets list-secrets my-scope +databricks secrets delete-secret my-scope api-key +``` diff --git a/databricks-skills/databricks-model-serving/SKILL.md b/databricks-skills/databricks-model-serving/SKILL.md index 9c248aa9..ab6935e6 100644 --- a/databricks-skills/databricks-model-serving/SKILL.md +++ b/databricks-skills/databricks-model-serving/SKILL.md @@ -244,6 +244,49 @@ query_serving_endpoint( --- +## Agent Deployment Patterns + +### UC Volume Access in Serving + +Model Serving endpoints can access UC Volumes at runtime, but `mlflow.models.predict` (pre-deployment validation) **cannot**. Bundle files with a fallback: + +```python +# When logging the model — bundle files from volumes +mlflow.pyfunc.log_model( + python_model="agent.py", + code_paths=["/path/to/local/copy"], # copied from volume +) +``` + +In the agent, resolve paths with fallback: + +```python +from pathlib import Path + +def _resolve_path() -> str: + volume_path = os.environ.get("MY_PATH", "/Volumes/catalog/schema/volume") + if Path(volume_path).exists(): + return volume_path + for candidate in [Path(__file__).parent / "bundle", Path(__file__).parent / "code" / "bundle"]: + if candidate.exists(): + return str(candidate) + return volume_path +``` + +### Notebook Deployment Pattern (%%writefile) + +1. `%pip install ...` + `dbutils.library.restartPython()` +2. Set widgets + config + `os.environ[...] = ...` +3. `%%writefile agent.py` — agent code with env var fallbacks +4. `dbutils.library.restartPython()` — **critical** to pick up new file +5. `from agent import AGENT` — test locally +6. `mlflow.pyfunc.log_model(python_model="agent.py", ...)` +7. `agents.deploy(...)` + wait loop + +> The `restartPython()` between writing and importing is critical — without it, Python caches a stale module. + +--- + ## Common Issues | Issue | Solution | @@ -254,6 +297,9 @@ query_serving_endpoint( | **Tool timeout** | Use job-based deployment, not synchronous calls | | **Auth error on endpoint** | Ensure `resources` specified in `log_model` for auto passthrough | | **Model not found** | Check Unity Catalog path: `catalog.schema.model_name` | +| **UC Volume not found during `mlflow.models.predict`** | Bundle files via `code_paths` + add fallback path resolution (see Deployment Patterns above) | +| **Stale module after `%%writefile`** | Add `dbutils.library.restartPython()` between write and import | +| **`UPDATE_FAILED` with `DEPLOYMENT_ABORTED`** | Infrastructure timeout — retry deployment usually works | ### Critical: ResponsesAgent Output Format diff --git a/databricks-skills/databricks-unity-catalog/SKILL.md b/databricks-skills/databricks-unity-catalog/SKILL.md index 30f34e3d..cd0e3a19 100644 --- a/databricks-skills/databricks-unity-catalog/SKILL.md +++ b/databricks-skills/databricks-unity-catalog/SKILL.md @@ -99,6 +99,30 @@ mcp__databricks__execute_sql( ) ``` +## Cost & Billing Analysis + +Use `system.billing.usage` joined with `system.billing.list_prices` for cost analysis: + +```sql +-- Top 10 cost drivers (last 30 days) +SELECT + u.billing_origin_product, + ROUND(SUM(u.usage_quantity), 2) AS total_dbus, + ROUND(SUM(u.usage_quantity * COALESCE(p.pricing.effective_list.default, p.pricing.default)), 2) AS est_cost_usd +FROM system.billing.usage u +LEFT JOIN system.billing.list_prices p + ON u.sku_name = p.sku_name AND u.cloud = p.cloud + AND u.usage_unit = p.usage_unit AND p.price_end_time IS NULL +WHERE u.usage_date >= CURRENT_DATE() - INTERVAL 30 DAYS +GROUP BY u.billing_origin_product +ORDER BY est_cost_usd DESC +LIMIT 10 +``` + +> **CRITICAL:** `usage_metadata` is a STRUCT — use dot notation (`usage_metadata.job_id`), NOT colon notation (`usage_metadata:job_id`). + +Key `usage_metadata` fields: `.job_id`, `.job_name`, `.warehouse_id`, `.endpoint_name`, `.cluster_id`, `.notebook_path`, `.dlt_pipeline_id`, `.app_name`. + ## Best Practices 1. **Filter by date** - System tables can be large; always use date filters diff --git a/databricks-tools-core/databricks_tools_core/auth.py b/databricks-tools-core/databricks_tools_core/auth.py index 21913983..c3db9fb4 100644 --- a/databricks-tools-core/databricks_tools_core/auth.py +++ b/databricks-tools-core/databricks_tools_core/auth.py @@ -160,9 +160,7 @@ def get_workspace_client() -> WorkspaceClient: # Cross-workspace: explicit token overrides env OAuth so tool operations # target the caller-specified workspace instead of the app's own workspace if force and host and token: - return tag_client( - WorkspaceClient(host=host, token=token, auth_type="pat", **product_kwargs) - ) + return tag_client(WorkspaceClient(host=host, token=token, auth_type="pat", **product_kwargs)) # In Databricks Apps (OAuth credentials in env), explicitly use OAuth M2M. # Setting auth_type="oauth-m2m" prevents the SDK from also reading @@ -185,9 +183,7 @@ def get_workspace_client() -> WorkspaceClient: # Development mode: use explicit token if provided if host and token: - return tag_client( - WorkspaceClient(host=host, token=token, auth_type="pat", **product_kwargs) - ) + return tag_client(WorkspaceClient(host=host, token=token, auth_type="pat", **product_kwargs)) if host: return tag_client(WorkspaceClient(host=host, **product_kwargs)) diff --git a/databricks-tools-core/tests/unit/test_sql.py b/databricks-tools-core/tests/unit/test_sql.py index d1b661c6..42137ba5 100644 --- a/databricks-tools-core/tests/unit/test_sql.py +++ b/databricks-tools-core/tests/unit/test_sql.py @@ -121,8 +121,7 @@ def test_executor_without_query_tags_omits_from_api(self, mock_get_client): assert "query_tags" not in call_kwargs -def _make_warehouse(id, name, state, creator_name="other@example.com", - enable_serverless_compute=False): +def _make_warehouse(id, name, state, creator_name="other@example.com", enable_serverless_compute=False): """Helper to create a mock warehouse object.""" w = mock.Mock() w.id = id @@ -141,33 +140,29 @@ class TestSortWithinTier: def test_serverless_first(self): """Serverless warehouses should come before classic ones.""" classic = _make_warehouse("c1", "Classic WH", State.RUNNING) - serverless = _make_warehouse("s1", "Serverless WH", State.RUNNING, - enable_serverless_compute=True) + serverless = _make_warehouse("s1", "Serverless WH", State.RUNNING, enable_serverless_compute=True) result = _sort_within_tier([classic, serverless], current_user=None) assert result[0].id == "s1" assert result[1].id == "c1" def test_serverless_before_user_owned(self): """Serverless should be preferred over user-owned classic.""" - classic_owned = _make_warehouse("c1", "My WH", State.RUNNING, - creator_name="me@example.com") - serverless_other = _make_warehouse("s1", "Other WH", State.RUNNING, - creator_name="other@example.com", - enable_serverless_compute=True) - result = _sort_within_tier([classic_owned, serverless_other], - current_user="me@example.com") + classic_owned = _make_warehouse("c1", "My WH", State.RUNNING, creator_name="me@example.com") + serverless_other = _make_warehouse( + "s1", "Other WH", State.RUNNING, creator_name="other@example.com", enable_serverless_compute=True + ) + result = _sort_within_tier([classic_owned, serverless_other], current_user="me@example.com") assert result[0].id == "s1" def test_serverless_user_owned_first(self): """Among serverless, user-owned should come first.""" - serverless_other = _make_warehouse("s1", "Other Serverless", State.RUNNING, - creator_name="other@example.com", - enable_serverless_compute=True) - serverless_owned = _make_warehouse("s2", "My Serverless", State.RUNNING, - creator_name="me@example.com", - enable_serverless_compute=True) - result = _sort_within_tier([serverless_other, serverless_owned], - current_user="me@example.com") + serverless_other = _make_warehouse( + "s1", "Other Serverless", State.RUNNING, creator_name="other@example.com", enable_serverless_compute=True + ) + serverless_owned = _make_warehouse( + "s2", "My Serverless", State.RUNNING, creator_name="me@example.com", enable_serverless_compute=True + ) + result = _sort_within_tier([serverless_other, serverless_owned], current_user="me@example.com") assert result[0].id == "s2" assert result[1].id == "s1" @@ -177,8 +172,7 @@ def test_empty_list(self): def test_no_current_user(self): """Without a current user, only serverless preference applies.""" classic = _make_warehouse("c1", "Classic", State.RUNNING) - serverless = _make_warehouse("s1", "Serverless", State.RUNNING, - enable_serverless_compute=True) + serverless = _make_warehouse("s1", "Serverless", State.RUNNING, enable_serverless_compute=True) result = _sort_within_tier([classic, serverless], current_user=None) assert result[0].id == "s1" @@ -186,14 +180,12 @@ def test_no_current_user(self): class TestGetBestWarehouseServerless: """Tests for serverless preference in get_best_warehouse.""" - @mock.patch("databricks_tools_core.sql.warehouse.get_current_username", - return_value="me@example.com") + @mock.patch("databricks_tools_core.sql.warehouse.get_current_username", return_value="me@example.com") @mock.patch("databricks_tools_core.sql.warehouse.get_workspace_client") def test_prefers_serverless_within_running_shared(self, mock_client_fn, mock_user): """Among running shared warehouses, serverless should be picked.""" classic_shared = _make_warehouse("c1", "Shared WH", State.RUNNING) - serverless_shared = _make_warehouse("s1", "Shared Serverless", State.RUNNING, - enable_serverless_compute=True) + serverless_shared = _make_warehouse("s1", "Shared Serverless", State.RUNNING, enable_serverless_compute=True) mock_client = mock.Mock() mock_client.warehouses.list.return_value = [classic_shared, serverless_shared] mock_client_fn.return_value = mock_client @@ -201,14 +193,12 @@ def test_prefers_serverless_within_running_shared(self, mock_client_fn, mock_use result = get_best_warehouse() assert result == "s1" - @mock.patch("databricks_tools_core.sql.warehouse.get_current_username", - return_value="me@example.com") + @mock.patch("databricks_tools_core.sql.warehouse.get_current_username", return_value="me@example.com") @mock.patch("databricks_tools_core.sql.warehouse.get_workspace_client") def test_prefers_serverless_within_running_other(self, mock_client_fn, mock_user): """Among running non-shared warehouses, serverless should be picked.""" classic = _make_warehouse("c1", "My WH", State.RUNNING) - serverless = _make_warehouse("s1", "Fast WH", State.RUNNING, - enable_serverless_compute=True) + serverless = _make_warehouse("s1", "Fast WH", State.RUNNING, enable_serverless_compute=True) mock_client = mock.Mock() mock_client.warehouses.list.return_value = [classic, serverless] mock_client_fn.return_value = mock_client @@ -216,14 +206,12 @@ def test_prefers_serverless_within_running_other(self, mock_client_fn, mock_user result = get_best_warehouse() assert result == "s1" - @mock.patch("databricks_tools_core.sql.warehouse.get_current_username", - return_value="me@example.com") + @mock.patch("databricks_tools_core.sql.warehouse.get_current_username", return_value="me@example.com") @mock.patch("databricks_tools_core.sql.warehouse.get_workspace_client") def test_tier_order_preserved_over_serverless(self, mock_client_fn, mock_user): """A running shared classic should still beat a stopped serverless.""" running_shared_classic = _make_warehouse("c1", "Shared WH", State.RUNNING) - stopped_serverless = _make_warehouse("s1", "Fast WH", State.STOPPED, - enable_serverless_compute=True) + stopped_serverless = _make_warehouse("s1", "Fast WH", State.STOPPED, enable_serverless_compute=True) mock_client = mock.Mock() mock_client.warehouses.list.return_value = [stopped_serverless, running_shared_classic] mock_client_fn.return_value = mock_client