From c2a31f1c6ef4cd5fbb863e2c03dd970885566fb5 Mon Sep 17 00:00:00 2001 From: Arseny Kravchenko Date: Thu, 12 Mar 2026 14:27:52 +0100 Subject: [PATCH 1/4] add more tips on non-appkit --- manifest.json | 15 +- skills/databricks-apps/SKILL.md | 193 ++++++++++++- .../references/platform-guide.md | 269 ++++++++++++++++++ 3 files changed, 461 insertions(+), 16 deletions(-) create mode 100644 skills/databricks-apps/references/platform-guide.md diff --git a/manifest.json b/manifest.json index 02a7801..750917a 100644 --- a/manifest.json +++ b/manifest.json @@ -1,10 +1,10 @@ { "version": "1", - "updated_at": "2026-03-10T11:33:11Z", + "updated_at": "2026-03-12T13:25:42Z", "skills": { "databricks": { "version": "0.1.0", - "updated_at": "2026-03-10T11:32:46Z", + "updated_at": "2026-03-11T17:08:20Z", "files": [ "SKILL.md", "asset-bundles.md", @@ -14,8 +14,8 @@ ] }, "databricks-apps": { - "version": "0.1.0", - "updated_at": "2026-03-10T11:32:58Z", + "version": "0.1.1", + "updated_at": "2026-03-12T13:25:31Z", "files": [ "SKILL.md", "references/appkit/appkit-sdk.md", @@ -24,26 +24,27 @@ "references/appkit/overview.md", "references/appkit/sql-queries.md", "references/appkit/trpc.md", + "references/platform-guide.md", "references/testing.md" ] }, "databricks-jobs": { "version": "0.1.0", - "updated_at": "2026-03-10T11:32:46Z", + "updated_at": "2026-03-11T17:08:20Z", "files": [ "SKILL.md" ] }, "databricks-lakebase": { "version": "0.1.0", - "updated_at": "2026-03-10T11:32:58Z", + "updated_at": "2026-03-11T17:08:20Z", "files": [ "SKILL.md" ] }, "databricks-pipelines": { "version": "0.1.0", - "updated_at": "2026-03-10T11:32:46Z", + "updated_at": "2026-03-11T17:08:20Z", "files": [ "SKILL.md", "references/auto-cdc-python.md", diff --git a/skills/databricks-apps/SKILL.md b/skills/databricks-apps/SKILL.md index b4e25f1..955669c 100644 --- a/skills/databricks-apps/SKILL.md +++ b/skills/databricks-apps/SKILL.md @@ -3,7 +3,7 @@ name: databricks-apps description: Build apps on Databricks Apps platform. Use when asked to create dashboards, data apps, analytics tools, or visualizations. Invoke BEFORE starting implementation. compatibility: Requires databricks CLI (>= v0.292.0) metadata: - version: "0.1.0" + version: "0.1.1" parent: databricks --- @@ -23,16 +23,189 @@ Build apps that deploy to Databricks Apps platform. | Using `useAnalyticsQuery` | [AppKit SDK](references/appkit/appkit-sdk.md) | | Adding API endpoints | [tRPC Guide](references/appkit/trpc.md) | | Using Lakebase (OLTP database) | [Lakebase Guide](references/appkit/lakebase.md) | +| Non-AppKit app (Streamlit, FastAPI, Flask, Gradio, Next.js,etc.) | [Platform Guide](references/platform-guide.md) | ## Generic Guidelines -These apply regardless of framework: +- **App name**: ≤26 characters, lowercase letters/numbers/hyphens only (no underscores). dev- prefix adds 4 chars, max 30 total. +- **Validation**: `databricks apps validate --profile ` before deploying. +- **Smoke tests** (AppKit only): ALWAYS update `tests/smoke.spec.ts` selectors BEFORE running validation. Default template checks for "Minimal Databricks App" heading and "hello world" text — these WILL fail in your custom app. See [testing guide](references/testing.md). +- **Authentication**: covered by parent `databricks` skill. -- **Deployment**: `databricks apps deploy --profile ` (⚠️ USER CONSENT REQUIRED) -- **Validation**: `databricks apps validate --profile ` before deploying -- **App name**: Must be ≤26 characters, lowercase letters/numbers/hyphens only (no underscores). dev- prefix adds 4 chars, max 30 total. -- **Smoke tests**: ALWAYS update `tests/smoke.spec.ts` selectors BEFORE running validation. Default template checks for "Minimal Databricks App" heading and "hello world" text — these WILL fail in your custom app. See [testing guide](references/testing.md). -- **Authentication**: covered by parent `databricks` skill +## Platform Rules + +These rules apply to ALL Databricks Apps regardless of framework (AppKit, Streamlit, FastAPI, etc.). + +### Service Principal Permissions + +**The #1 cause of runtime crashes after deployment.** + +When your app uses a Databricks resource (SQL warehouse, model serving endpoint, vector search index, volume, secret scope), the app's **service principal** must have explicit permissions on that resource. + +#### ⚠️ CRITICAL AGENT BEHAVIOR +After adding ANY resource to the app, IMMEDIATELY grant the SP access. Do NOT wait for a permission error — do it proactively every time. + +#### How to Grant Permissions + +```bash +# 1. find the app's service principal +databricks apps get --profile +# look for service_principal_id in the output + +# 2. grant permissions per resource type: + +# SQL Warehouse +databricks warehouses set-permissions \ + --json '{"access_control_list": [{"service_principal_name": "", "permission_level": "CAN_USE"}]}' \ + --profile + +# Model Serving Endpoint +databricks serving-endpoints set-permissions \ + --json '{"access_control_list": [{"service_principal_name": "", "permission_level": "CAN_QUERY"}]}' \ + --profile + +# Secret Scope — deploying user needs MANAGE permission +databricks secrets put-acl READ --profile + +# Unity Catalog resources (tables, volumes, vector search indexes) +# use SQL GRANT statements via a SQL warehouse: +# GRANT SELECT ON TABLE catalog.schema.table TO `` +# GRANT READ_VOLUME ON VOLUME catalog.schema.volume TO `` +``` + +#### Permission Matrix + +| Resource Type | Permission Level | Notes | +|---------------|-----------------|-------| +| SQL Warehouse | CAN_USE | Minimum for query execution | +| Model Serving Endpoint | CAN_QUERY | For inference calls | +| Vector Search Index | SELECT on underlying table | VS index is a UC securable of type TABLE | +| Volume | READ_VOLUME or WRITE_VOLUME | Via UC GRANT | +| Secret Scope | READ | Deploying user needs MANAGE | +| Feature Table | SELECT | Via UC GRANT | + +### Resource Types & Injection + +**NEVER hardcode workspace-specific IDs in source code.** Always inject via environment variables with `valueFrom`. + +| Resource Type | Default Key | Use Case | +|---------------|-------------|----------| +| SQL Warehouse | `sql-warehouse` | Query compute | +| Model Serving Endpoint | `serving-endpoint` | Model inference | +| Vector Search Index | `vector-search-index` | Semantic search | +| Lakebase Database | `database` | OLTP storage | +| Secret | `secret` | Sensitive values | +| UC Table | `table` | Structured data | +| UC Connection | `connection` | External data sources | +| Genie Space | `genie-space` | AI analytics | +| MLflow Experiment | `experiment` | ML tracking | +| Lakeflow Job | `job` | Data workflows | +| UDF | `function` | SQL/Python functions | +| Databricks App | `app` | App-to-app communication | + +```python +# ✅ GOOD +warehouse_id = os.environ["DATABRICKS_WAREHOUSE_ID"] +``` + +```yaml +# app.yaml / databricks.yml env section +env: + - name: DATABRICKS_WAREHOUSE_ID + valueFrom: sql-warehouse + - name: SERVING_ENDPOINT + valueFrom: serving-endpoint +``` + +### Authentication: OBO vs Service Principal + +| Context | When Used | Token Source | Cached Per | +|---------|-----------|--------------|------------| +| **Service Principal (SP)** | Default; background tasks, shared data | Auto-injected `DATABRICKS_CLIENT_ID` + `DATABRICKS_CLIENT_SECRET` | All users (shared) | +| **On-Behalf-Of (OBO)** | User-specific data, user-scoped access | `x-forwarded-access-token` header | Per user | + +**SP auth** is auto-configured — `WorkspaceClient()` picks up injected env vars. + +**OBO** requires extracting the token from request headers and declaring scopes: + +| Scope | Purpose | +|-------|---------| +| `sql` | Query SQL warehouses | +| `dashboards.genie` | Manage Genie spaces | +| `files.files` | Manage files/directories | +| `iam.access-control:read` | Read permissions (default) | +| `iam.current-user:read` | Read current user info (default) | + +⚠️ Databricks blocks access outside approved scopes even if the user has permission. + +### Deployment Workflow + +```bash +# 1. validate +databricks apps validate --profile + +# 2. deploy code +databricks bundle deploy -t --profile + +# 3. apply config and start/restart the app +databricks bundle run -t --profile +``` + +❌ **Common mistake:** Running only `bundle deploy` and expecting the app to update. Deploy uploads code but does NOT apply config changes or restart the app. + +#### ⚠️ Destructive Updates Warning + +`databricks apps update` (and `bundle run`) performs a **full replacement**, not a merge: +- Adding a new resource can silently **wipe** existing `user_api_scopes` +- OBO permissions may be stripped on every deployment + +**Workaround:** After each deployment, verify OBO scopes are intact. + +### Runtime Environment + +| Constraint | Value | +|------------|-------| +| Max file size | 10 MB per file | +| Available port | Only `DATABRICKS_APP_PORT` | +| Auto-injected env vars | `DATABRICKS_HOST`, `DATABRICKS_APP_PORT`, `DATABRICKS_APP_NAME`, `DATABRICKS_WORKSPACE_ID`, `DATABRICKS_CLIENT_ID`, `DATABRICKS_CLIENT_SECRET` | +| No root access | Cannot use `apt-get`, `yum`, or `apk` — use PyPI/npm packages only | +| Graceful shutdown | SIGTERM → 15 seconds to shut down → SIGKILL | +| Logging | Only stdout/stderr are captured — file-based logs are lost on container recycle | +| Filesystem | Ephemeral — no persistent local storage; use UC Volumes/tables | + +### Compute & Limits + +| Size | RAM | vCPU | DBU/hour | Notes | +|------|-----|------|----------|-------| +| Medium | 6 GB | Up to 2 | 0.5 | Default | +| Large | 12 GB | Up to 4 | 1.0 | Select during app creation or edit | + +- No GPU access. Use model serving endpoints for inference. +- Apps must start within **10 minutes** (including dependency installation). +- Max apps per workspace: **100**. + +### HTTP Proxy & Streaming + +The Databricks Apps reverse proxy enforces a **120-second per-request timeout** (NOT configurable). + +| Behavior | Detail | +|----------|--------| +| 504 in app logs? | **No** — the error is generated at the proxy. App logs show nothing. | +| SSE streaming | Responses may be **buffered** and delivered in chunks, not token-by-token | +| WebSockets | Bypass the 120s limit — working but undocumented | + +For long-running agent interactions, use **WebSockets** instead of SSE. + +### Common Errors + +| Error | Cause | Fix | +|-------|-------|-----| +| `PERMISSION_DENIED` after deploy | SP missing permissions | Grant SP access to all declared resources | +| App deploys but config doesn't change | Only ran `bundle deploy` | Also run `bundle run ` | +| `File is larger than 10485760 bytes` | Bundled dependencies | Use requirements.txt / package.json | +| OBO scopes missing after deploy | Destructive update wiped them | Re-apply scopes after each deploy | +| `${var.xxx}` appears literally in env | Variables not resolved in config | Use literal values, not DABs variables | +| 504 Gateway Timeout | Request exceeded 120s | Use WebSockets for long operations | ## Project Structure (after `databricks apps init --features analytics`) - `client/src/App.tsx` — main React component (start here) @@ -143,6 +316,8 @@ databricks apps init --name my-app-name --features analytics --set "..." --profi `databricks apps init` creates directories in kebab-case matching the app name. App names must be lowercase with hyphens only (≤26 chars). -### Other Frameworks +### Other Frameworks (Streamlit, FastAPI, Flask, Gradio, Dash, Next.js, etc.) + +Databricks Apps supports any framework that runs as an HTTP server. LLMs already know these frameworks — the challenge is Databricks platform integration. -Databricks Apps supports any framework that can run as a web server (Flask, FastAPI, Streamlit, Gradio, etc.). Use standard framework documentation - this skill focuses on AppKit. +**READ [Platform Guide](references/platform-guide.md) BEFORE building any non-AppKit app.** It covers port/host configuration, `app.yaml` and `databricks.yml` setup, dependency management, networking, and framework-specific gotchas. For universal platform rules (permissions, deployment, timeouts), see the Platform Rules section above. diff --git a/skills/databricks-apps/references/platform-guide.md b/skills/databricks-apps/references/platform-guide.md new file mode 100644 index 0000000..45a6c19 --- /dev/null +++ b/skills/databricks-apps/references/platform-guide.md @@ -0,0 +1,269 @@ +# Databricks Apps Platform Guide (Non-AppKit Frameworks) + +This guide covers framework-specific setup for non-AppKit apps (Streamlit, FastAPI, Flask, Gradio, Dash, Django, Next.js, React, etc.). + +For universal platform rules (permissions, deployment, timeouts, resource injection), see the main skill. + +## 1. Port & Host Configuration + +**The #1 cause of 502 Bad Gateway errors.** + +| Setting | Required Value | Common Mistake | +|---------|---------------|----------------| +| Port | `DATABRICKS_APP_PORT` env var | Hardcoding 8080, 3000, or 3001 | +| Host | `0.0.0.0` | Binding to `localhost` or `127.0.0.1` | + +The platform dynamically assigns a port via `DATABRICKS_APP_PORT`. Use `8000` as a local dev fallback only. + +### Framework-Specific Port Configuration + +#### Streamlit +```yaml +# app.yaml +command: + - streamlit + - run + - app.py + - --server.port + - "${DATABRICKS_APP_PORT:-8000}" + - --server.address + - "0.0.0.0" +``` + +#### FastAPI / Uvicorn +```python +if __name__ == "__main__": + import uvicorn + port = int(os.environ.get("DATABRICKS_APP_PORT", 8000)) + uvicorn.run(app, host="0.0.0.0", port=port) +``` + +#### Flask +```python +port = int(os.environ.get("DATABRICKS_APP_PORT", 8000)) +app.run(host="0.0.0.0", port=port) +``` + +#### Gradio +```python +demo.launch(server_name="0.0.0.0", + server_port=int(os.environ.get("DATABRICKS_APP_PORT", 8000))) +``` + +#### Dash +```python +app.run(host="0.0.0.0", + port=int(os.environ.get("DATABRICKS_APP_PORT", 8000))) +``` + +#### Next.js +```json +// package.json +"scripts": { + "start": "next start -p ${DATABRICKS_APP_PORT:-8000} -H 0.0.0.0" +} +``` + +⚠️ **Only ONE service can bind to `DATABRICKS_APP_PORT`.** If you need multiple services (e.g., frontend + backend), use a reverse proxy or serve everything from one process. + +## 2. app.yaml vs databricks.yml + +These two files serve different purposes. Getting them wrong causes silent deployment failures. + +### app.yaml — Runtime Configuration +- Defines the **start command** and **environment variables** for the running app +- Used by the Databricks Apps runtime directly +- `valueFrom:` injects resource IDs from workspace configuration + +```yaml +# app.yaml +command: + - python + - app.py +env: + - name: DATABRICKS_WAREHOUSE_ID + valueFrom: sql-warehouse + - name: MY_CUSTOM_VAR + value: "some-value" +``` + +### databricks.yml — Bundle/Deployment Configuration +- Defines the **app resource** for DABs (Databricks Asset Bundles) +- `config:` section only takes effect after `bundle run`, NOT just `bundle deploy` + +```yaml +# databricks.yml +bundle: + name: my-app-bundle + +resources: + apps: + my-app: + name: my-app + source_code_path: . + config: + command: ['python', 'app.py'] + env: + - name: DATABRICKS_WAREHOUSE_ID + valueFrom: sql-warehouse + permissions: + - service_principal_name: ${bundle.target}.my-app + level: CAN_MANAGE + +targets: + dev: + default: true +``` + +### Critical Rules + +| Rule | Why | +|------|-----| +| Always provide BOTH `app.yaml` AND `databricks.yml` config | UI deployments use app.yaml; DABs uses databricks.yml | +| Always run `bundle deploy` THEN `bundle run ` | `deploy` uploads code; `run` applies config and starts the app | +| Never use `${var.xxx}` in config env values | Variables are NOT resolved in config — values appear literally | + +## 3. Using OBO in Non-AppKit Apps + +```python +# FastAPI example +from fastapi import Request +from databricks.sdk import WorkspaceClient + +@app.get("/user-data") +def get_user_data(request: Request): + token = request.headers.get("x-forwarded-access-token") + + # create user-scoped client + w = WorkspaceClient(token=token, host=os.environ["DATABRICKS_HOST"]) + # use w for user-scoped operations +``` + +```python +# SP auth is auto-configured — just use the SDK +from databricks.sdk import WorkspaceClient +w = WorkspaceClient() # picks up auto-injected env vars +``` + +## 4. Framework-Specific Timeout Gotchas + +| Framework | Default Timeout | Fix | +|-----------|----------------|-----| +| Gradio | 30 seconds (internal) | Set `fn` timeout explicitly or use `gradio.queue()` | +| Gunicorn | 30 seconds (worker timeout) | Set `--timeout 120` in gunicorn command | +| Uvicorn | None (no default timeout) | Already fine | + +## 5. Common Errors (Non-AppKit Specific) + +| Error | Cause | Fix | +|-------|-------|-----| +| 502 Bad Gateway | Wrong port or host | Bind to `0.0.0.0:${DATABRICKS_APP_PORT:-8000}` | +| App works locally but 502 in prod | Binding to localhost | Change to `0.0.0.0` | +| `ModuleNotFoundError` at runtime | Dependency not in requirements.txt or version conflict | Pin exact versions; validate locally first | +| Wrong script runs on deploy | No `command` in app.yaml, platform picked wrong .py file | Always specify `command` explicitly in app.yaml | +| `apt-get: command not found` | No root access in container | Use pure-Python wheels from PyPI; no system packages | + +## 6. Dependency Management + +### Python + +Only `requirements.txt` is natively supported. No native support for `pyproject.toml`, `uv.lock`, or Poetry. + +**Workaround for `uv`:** +``` +# requirements.txt +uv +``` +```yaml +# app.yaml +command: + - uv + - run + - app.py +``` +Define actual dependencies in `pyproject.toml`. Note: This moves dependency installation from build to run step, slowing startup. + +**Custom package repositories:** +- Set `PIP_INDEX_URL` as a secret in the app configuration +- Deploying user needs **MANAGE** permission on the secret scope (not just USE/READ) + +### Node.js + +- `package.json` is supported — `npm install` runs at startup +- Do NOT include `node_modules/` in source code (10 MB file limit) +- Large npm installs may exceed the 10-minute startup window +- In egress-restricted workspaces, add `registry.npmjs.org` to egress policy AND restart the app (egress changes require restart) + +## 7. Networking & CORS + +### CORS + +- CORS headers are **not customizable** on the Databricks Apps reverse proxy +- Workspace origin (`*.databricks.com`) differs from app origin (`*.databricksapps.com`) +- Cross-app API calls return **302 redirect to login page** instead of the expected response + +**Workaround:** Keep frontend and backend in a single app to avoid CORS entirely. + +### Private Link / Hardened Environments + +- Azure apps use `*.azure.databricksapps.com` — NOT `*.azuredatabricks.net` +- Existing Private Link DNS zones don't cover the apps domain +- Fix: Create a separate Private DNS Zone for `azure.databricksapps.com` with conditional DNS forwarding + +### Egress Restrictions + +- Egress policy changes require **app restart** to take effect +- For npm: allowlist `registry.npmjs.org` +- For pip: allowlist `pypi.org` and `files.pythonhosted.org` +- For custom registries: use `PIP_INDEX_URL` secret (see Dependency Management) + +## 8. Streamlit-Specific Gotchas + +### Required Environment Variables + +```yaml +# app.yaml +command: + - streamlit + - run + - app.py + - --server.port + - "${DATABRICKS_APP_PORT:-8000}" + - --server.address + - "0.0.0.0" +env: + - name: STREAMLIT_SERVER_ENABLE_CORS + value: "false" + - name: STREAMLIT_SERVER_ENABLE_XSRF_PROTECTION + value: "false" +``` + +⚠️ **Both CORS and XSRF must be disabled** for Streamlit on Databricks Apps. The reverse proxy origin (`*.databricksapps.com`) differs from the workspace origin, triggering Streamlit's CORS/XSRF protection. + +### OBO Token Staleness + +Streamlit caches initial HTTP request headers, then switches to WebSocket. The OBO token from `x-forwarded-access-token` **never refreshes** — it goes stale. + +**Workaround:** Periodically trigger a full page refresh. No clean in-Streamlit solution exists. + +### Connection Exhaustion (Hangs After Initial Queries) + +Streamlit re-runs the entire script on every user interaction. If `sql.connect()` is called during each render cycle, the rapid succession of TCP handshakes and OAuth negotiations exhausts the connection pool, causing 2-3 minute freezes. + +**Fix:** Use `@st.cache_resource` to maintain persistent connections: +```python +@st.cache_resource +def get_connection(): + from databricks import sql + from databricks.sdk.core import Config + cfg = Config() + return sql.connect( + server_hostname=cfg.host, + http_path=os.environ["DATABRICKS_HTTP_PATH"], + credentials_provider=lambda: cfg.authenticate, + ) +``` + +### Transient 502s During Startup + +Streamlit apps commonly show brief 502 errors during startup. This is expected and does not indicate a problem. From 873a69eb634311457cff62c7659d6ea6f9d38f50 Mon Sep 17 00:00:00 2001 From: Arseny Kravchenko Date: Thu, 12 Mar 2026 14:34:06 +0100 Subject: [PATCH 2/4] restructure --- manifest.json | 5 +- skills/databricks-apps/SKILL.md | 180 +-------- .../references/other-frameworks.md | 269 ++++++++++++++ .../references/platform-guide.md | 349 +++++++----------- 4 files changed, 403 insertions(+), 400 deletions(-) create mode 100644 skills/databricks-apps/references/other-frameworks.md diff --git a/manifest.json b/manifest.json index 750917a..c7d5709 100644 --- a/manifest.json +++ b/manifest.json @@ -1,6 +1,6 @@ { "version": "1", - "updated_at": "2026-03-12T13:25:42Z", + "updated_at": "2026-03-12T13:32:45Z", "skills": { "databricks": { "version": "0.1.0", @@ -15,7 +15,7 @@ }, "databricks-apps": { "version": "0.1.1", - "updated_at": "2026-03-12T13:25:31Z", + "updated_at": "2026-03-12T13:32:37Z", "files": [ "SKILL.md", "references/appkit/appkit-sdk.md", @@ -24,6 +24,7 @@ "references/appkit/overview.md", "references/appkit/sql-queries.md", "references/appkit/trpc.md", + "references/other-frameworks.md", "references/platform-guide.md", "references/testing.md" ] diff --git a/skills/databricks-apps/SKILL.md b/skills/databricks-apps/SKILL.md index 955669c..9478c4f 100644 --- a/skills/databricks-apps/SKILL.md +++ b/skills/databricks-apps/SKILL.md @@ -23,7 +23,8 @@ Build apps that deploy to Databricks Apps platform. | Using `useAnalyticsQuery` | [AppKit SDK](references/appkit/appkit-sdk.md) | | Adding API endpoints | [tRPC Guide](references/appkit/trpc.md) | | Using Lakebase (OLTP database) | [Lakebase Guide](references/appkit/lakebase.md) | -| Non-AppKit app (Streamlit, FastAPI, Flask, Gradio, Next.js,etc.) | [Platform Guide](references/platform-guide.md) | +| Platform rules (permissions, deployment, limits) | [Platform Guide](references/platform-guide.md) — READ for ALL apps including AppKit | +| Non-AppKit app (Streamlit, FastAPI, Flask, Gradio, Next.js, etc.) | [Other Frameworks](references/other-frameworks.md) | ## Generic Guidelines @@ -32,181 +33,6 @@ Build apps that deploy to Databricks Apps platform. - **Smoke tests** (AppKit only): ALWAYS update `tests/smoke.spec.ts` selectors BEFORE running validation. Default template checks for "Minimal Databricks App" heading and "hello world" text — these WILL fail in your custom app. See [testing guide](references/testing.md). - **Authentication**: covered by parent `databricks` skill. -## Platform Rules - -These rules apply to ALL Databricks Apps regardless of framework (AppKit, Streamlit, FastAPI, etc.). - -### Service Principal Permissions - -**The #1 cause of runtime crashes after deployment.** - -When your app uses a Databricks resource (SQL warehouse, model serving endpoint, vector search index, volume, secret scope), the app's **service principal** must have explicit permissions on that resource. - -#### ⚠️ CRITICAL AGENT BEHAVIOR -After adding ANY resource to the app, IMMEDIATELY grant the SP access. Do NOT wait for a permission error — do it proactively every time. - -#### How to Grant Permissions - -```bash -# 1. find the app's service principal -databricks apps get --profile -# look for service_principal_id in the output - -# 2. grant permissions per resource type: - -# SQL Warehouse -databricks warehouses set-permissions \ - --json '{"access_control_list": [{"service_principal_name": "", "permission_level": "CAN_USE"}]}' \ - --profile - -# Model Serving Endpoint -databricks serving-endpoints set-permissions \ - --json '{"access_control_list": [{"service_principal_name": "", "permission_level": "CAN_QUERY"}]}' \ - --profile - -# Secret Scope — deploying user needs MANAGE permission -databricks secrets put-acl READ --profile - -# Unity Catalog resources (tables, volumes, vector search indexes) -# use SQL GRANT statements via a SQL warehouse: -# GRANT SELECT ON TABLE catalog.schema.table TO `` -# GRANT READ_VOLUME ON VOLUME catalog.schema.volume TO `` -``` - -#### Permission Matrix - -| Resource Type | Permission Level | Notes | -|---------------|-----------------|-------| -| SQL Warehouse | CAN_USE | Minimum for query execution | -| Model Serving Endpoint | CAN_QUERY | For inference calls | -| Vector Search Index | SELECT on underlying table | VS index is a UC securable of type TABLE | -| Volume | READ_VOLUME or WRITE_VOLUME | Via UC GRANT | -| Secret Scope | READ | Deploying user needs MANAGE | -| Feature Table | SELECT | Via UC GRANT | - -### Resource Types & Injection - -**NEVER hardcode workspace-specific IDs in source code.** Always inject via environment variables with `valueFrom`. - -| Resource Type | Default Key | Use Case | -|---------------|-------------|----------| -| SQL Warehouse | `sql-warehouse` | Query compute | -| Model Serving Endpoint | `serving-endpoint` | Model inference | -| Vector Search Index | `vector-search-index` | Semantic search | -| Lakebase Database | `database` | OLTP storage | -| Secret | `secret` | Sensitive values | -| UC Table | `table` | Structured data | -| UC Connection | `connection` | External data sources | -| Genie Space | `genie-space` | AI analytics | -| MLflow Experiment | `experiment` | ML tracking | -| Lakeflow Job | `job` | Data workflows | -| UDF | `function` | SQL/Python functions | -| Databricks App | `app` | App-to-app communication | - -```python -# ✅ GOOD -warehouse_id = os.environ["DATABRICKS_WAREHOUSE_ID"] -``` - -```yaml -# app.yaml / databricks.yml env section -env: - - name: DATABRICKS_WAREHOUSE_ID - valueFrom: sql-warehouse - - name: SERVING_ENDPOINT - valueFrom: serving-endpoint -``` - -### Authentication: OBO vs Service Principal - -| Context | When Used | Token Source | Cached Per | -|---------|-----------|--------------|------------| -| **Service Principal (SP)** | Default; background tasks, shared data | Auto-injected `DATABRICKS_CLIENT_ID` + `DATABRICKS_CLIENT_SECRET` | All users (shared) | -| **On-Behalf-Of (OBO)** | User-specific data, user-scoped access | `x-forwarded-access-token` header | Per user | - -**SP auth** is auto-configured — `WorkspaceClient()` picks up injected env vars. - -**OBO** requires extracting the token from request headers and declaring scopes: - -| Scope | Purpose | -|-------|---------| -| `sql` | Query SQL warehouses | -| `dashboards.genie` | Manage Genie spaces | -| `files.files` | Manage files/directories | -| `iam.access-control:read` | Read permissions (default) | -| `iam.current-user:read` | Read current user info (default) | - -⚠️ Databricks blocks access outside approved scopes even if the user has permission. - -### Deployment Workflow - -```bash -# 1. validate -databricks apps validate --profile - -# 2. deploy code -databricks bundle deploy -t --profile - -# 3. apply config and start/restart the app -databricks bundle run -t --profile -``` - -❌ **Common mistake:** Running only `bundle deploy` and expecting the app to update. Deploy uploads code but does NOT apply config changes or restart the app. - -#### ⚠️ Destructive Updates Warning - -`databricks apps update` (and `bundle run`) performs a **full replacement**, not a merge: -- Adding a new resource can silently **wipe** existing `user_api_scopes` -- OBO permissions may be stripped on every deployment - -**Workaround:** After each deployment, verify OBO scopes are intact. - -### Runtime Environment - -| Constraint | Value | -|------------|-------| -| Max file size | 10 MB per file | -| Available port | Only `DATABRICKS_APP_PORT` | -| Auto-injected env vars | `DATABRICKS_HOST`, `DATABRICKS_APP_PORT`, `DATABRICKS_APP_NAME`, `DATABRICKS_WORKSPACE_ID`, `DATABRICKS_CLIENT_ID`, `DATABRICKS_CLIENT_SECRET` | -| No root access | Cannot use `apt-get`, `yum`, or `apk` — use PyPI/npm packages only | -| Graceful shutdown | SIGTERM → 15 seconds to shut down → SIGKILL | -| Logging | Only stdout/stderr are captured — file-based logs are lost on container recycle | -| Filesystem | Ephemeral — no persistent local storage; use UC Volumes/tables | - -### Compute & Limits - -| Size | RAM | vCPU | DBU/hour | Notes | -|------|-----|------|----------|-------| -| Medium | 6 GB | Up to 2 | 0.5 | Default | -| Large | 12 GB | Up to 4 | 1.0 | Select during app creation or edit | - -- No GPU access. Use model serving endpoints for inference. -- Apps must start within **10 minutes** (including dependency installation). -- Max apps per workspace: **100**. - -### HTTP Proxy & Streaming - -The Databricks Apps reverse proxy enforces a **120-second per-request timeout** (NOT configurable). - -| Behavior | Detail | -|----------|--------| -| 504 in app logs? | **No** — the error is generated at the proxy. App logs show nothing. | -| SSE streaming | Responses may be **buffered** and delivered in chunks, not token-by-token | -| WebSockets | Bypass the 120s limit — working but undocumented | - -For long-running agent interactions, use **WebSockets** instead of SSE. - -### Common Errors - -| Error | Cause | Fix | -|-------|-------|-----| -| `PERMISSION_DENIED` after deploy | SP missing permissions | Grant SP access to all declared resources | -| App deploys but config doesn't change | Only ran `bundle deploy` | Also run `bundle run ` | -| `File is larger than 10485760 bytes` | Bundled dependencies | Use requirements.txt / package.json | -| OBO scopes missing after deploy | Destructive update wiped them | Re-apply scopes after each deploy | -| `${var.xxx}` appears literally in env | Variables not resolved in config | Use literal values, not DABs variables | -| 504 Gateway Timeout | Request exceeded 120s | Use WebSockets for long operations | - ## Project Structure (after `databricks apps init --features analytics`) - `client/src/App.tsx` — main React component (start here) - `config/queries/*.sql` — SQL query files (queryKey = filename without .sql) @@ -320,4 +146,4 @@ App names must be lowercase with hyphens only (≤26 chars). Databricks Apps supports any framework that runs as an HTTP server. LLMs already know these frameworks — the challenge is Databricks platform integration. -**READ [Platform Guide](references/platform-guide.md) BEFORE building any non-AppKit app.** It covers port/host configuration, `app.yaml` and `databricks.yml` setup, dependency management, networking, and framework-specific gotchas. For universal platform rules (permissions, deployment, timeouts), see the Platform Rules section above. +**READ [Other Frameworks Guide](references/other-frameworks.md) BEFORE building any non-AppKit app.** It covers port/host configuration, `app.yaml` and `databricks.yml` setup, dependency management, networking, and framework-specific gotchas. diff --git a/skills/databricks-apps/references/other-frameworks.md b/skills/databricks-apps/references/other-frameworks.md new file mode 100644 index 0000000..c658f49 --- /dev/null +++ b/skills/databricks-apps/references/other-frameworks.md @@ -0,0 +1,269 @@ +# Databricks Apps — Other Frameworks (Non-AppKit) + +Setup guide for non-AppKit apps: Streamlit, FastAPI, Flask, Gradio, Dash, Django, Next.js, React, etc. + +For universal platform rules (permissions, deployment, timeouts, resource injection), see [Platform Guide](platform-guide.md). + +## 1. Port & Host Configuration + +**The #1 cause of 502 Bad Gateway errors.** + +| Setting | Required Value | Common Mistake | +|---------|---------------|----------------| +| Port | `DATABRICKS_APP_PORT` env var | Hardcoding 8080, 3000, or 3001 | +| Host | `0.0.0.0` | Binding to `localhost` or `127.0.0.1` | + +The platform dynamically assigns a port via `DATABRICKS_APP_PORT`. Use `8000` as a local dev fallback only. + +### Framework-Specific Port Configuration + +#### Streamlit +```yaml +# app.yaml +command: + - streamlit + - run + - app.py + - --server.port + - "${DATABRICKS_APP_PORT:-8000}" + - --server.address + - "0.0.0.0" +``` + +#### FastAPI / Uvicorn +```python +if __name__ == "__main__": + import uvicorn + port = int(os.environ.get("DATABRICKS_APP_PORT", 8000)) + uvicorn.run(app, host="0.0.0.0", port=port) +``` + +#### Flask +```python +port = int(os.environ.get("DATABRICKS_APP_PORT", 8000)) +app.run(host="0.0.0.0", port=port) +``` + +#### Gradio +```python +demo.launch(server_name="0.0.0.0", + server_port=int(os.environ.get("DATABRICKS_APP_PORT", 8000))) +``` + +#### Dash +```python +app.run(host="0.0.0.0", + port=int(os.environ.get("DATABRICKS_APP_PORT", 8000))) +``` + +#### Next.js +```json +// package.json +"scripts": { + "start": "next start -p ${DATABRICKS_APP_PORT:-8000} -H 0.0.0.0" +} +``` + +⚠️ **Only ONE service can bind to `DATABRICKS_APP_PORT`.** If you need multiple services (e.g., frontend + backend), use a reverse proxy or serve everything from one process. + +## 2. app.yaml vs databricks.yml + +These two files serve different purposes. Getting them wrong causes silent deployment failures. + +### app.yaml — Runtime Configuration +- Defines the **start command** and **environment variables** for the running app +- Used by the Databricks Apps runtime directly +- `valueFrom:` injects resource IDs from workspace configuration + +```yaml +# app.yaml +command: + - python + - app.py +env: + - name: DATABRICKS_WAREHOUSE_ID + valueFrom: sql-warehouse + - name: MY_CUSTOM_VAR + value: "some-value" +``` + +### databricks.yml — Bundle/Deployment Configuration +- Defines the **app resource** for DABs (Databricks Asset Bundles) +- `config:` section only takes effect after `bundle run`, NOT just `bundle deploy` + +```yaml +# databricks.yml +bundle: + name: my-app-bundle + +resources: + apps: + my-app: + name: my-app + source_code_path: . + config: + command: ['python', 'app.py'] + env: + - name: DATABRICKS_WAREHOUSE_ID + valueFrom: sql-warehouse + permissions: + - service_principal_name: ${bundle.target}.my-app + level: CAN_MANAGE + +targets: + dev: + default: true +``` + +### Critical Rules + +| Rule | Why | +|------|-----| +| Always provide BOTH `app.yaml` AND `databricks.yml` config | UI deployments use app.yaml; DABs uses databricks.yml | +| Always run `bundle deploy` THEN `bundle run ` | `deploy` uploads code; `run` applies config and starts the app | +| Never use `${var.xxx}` in config env values | Variables are NOT resolved in config — values appear literally | + +## 3. Using OBO in Non-AppKit Apps + +```python +# FastAPI example +from fastapi import Request +from databricks.sdk import WorkspaceClient + +@app.get("/user-data") +def get_user_data(request: Request): + token = request.headers.get("x-forwarded-access-token") + + # create user-scoped client + w = WorkspaceClient(token=token, host=os.environ["DATABRICKS_HOST"]) + # use w for user-scoped operations +``` + +```python +# SP auth is auto-configured — just use the SDK +from databricks.sdk import WorkspaceClient +w = WorkspaceClient() # picks up auto-injected env vars +``` + +## 4. Framework-Specific Timeout Gotchas + +| Framework | Default Timeout | Fix | +|-----------|----------------|-----| +| Gradio | 30 seconds (internal) | Set `fn` timeout explicitly or use `gradio.queue()` | +| Gunicorn | 30 seconds (worker timeout) | Set `--timeout 120` in gunicorn command | +| Uvicorn | None (no default timeout) | Already fine | + +## 5. Common Errors (Non-AppKit Specific) + +| Error | Cause | Fix | +|-------|-------|-----| +| 502 Bad Gateway | Wrong port or host | Bind to `0.0.0.0:${DATABRICKS_APP_PORT:-8000}` | +| App works locally but 502 in prod | Binding to localhost | Change to `0.0.0.0` | +| `ModuleNotFoundError` at runtime | Dependency not in requirements.txt or version conflict | Pin exact versions; validate locally first | +| Wrong script runs on deploy | No `command` in app.yaml, platform picked wrong .py file | Always specify `command` explicitly in app.yaml | +| `apt-get: command not found` | No root access in container | Use pure-Python wheels from PyPI; no system packages | + +## 6. Dependency Management + +### Python + +Only `requirements.txt` is natively supported. No native support for `pyproject.toml`, `uv.lock`, or Poetry. + +**Workaround for `uv`:** +``` +# requirements.txt +uv +``` +```yaml +# app.yaml +command: + - uv + - run + - app.py +``` +Define actual dependencies in `pyproject.toml`. Note: This moves dependency installation from build to run step, slowing startup. + +**Custom package repositories:** +- Set `PIP_INDEX_URL` as a secret in the app configuration +- Deploying user needs **MANAGE** permission on the secret scope (not just USE/READ) + +### Node.js + +- `package.json` is supported — `npm install` runs at startup +- Do NOT include `node_modules/` in source code (10 MB file limit) +- Large npm installs may exceed the 10-minute startup window +- In egress-restricted workspaces, add `registry.npmjs.org` to egress policy AND restart the app (egress changes require restart) + +## 7. Networking & CORS + +### CORS + +- CORS headers are **not customizable** on the Databricks Apps reverse proxy +- Workspace origin (`*.databricks.com`) differs from app origin (`*.databricksapps.com`) +- Cross-app API calls return **302 redirect to login page** instead of the expected response + +**Workaround:** Keep frontend and backend in a single app to avoid CORS entirely. + +### Private Link / Hardened Environments + +- Azure apps use `*.azure.databricksapps.com` — NOT `*.azuredatabricks.net` +- Existing Private Link DNS zones don't cover the apps domain +- Fix: Create a separate Private DNS Zone for `azure.databricksapps.com` with conditional DNS forwarding + +### Egress Restrictions + +- Egress policy changes require **app restart** to take effect +- For npm: allowlist `registry.npmjs.org` +- For pip: allowlist `pypi.org` and `files.pythonhosted.org` +- For custom registries: use `PIP_INDEX_URL` secret (see Dependency Management) + +## 8. Streamlit-Specific Gotchas + +### Required Environment Variables + +```yaml +# app.yaml +command: + - streamlit + - run + - app.py + - --server.port + - "${DATABRICKS_APP_PORT:-8000}" + - --server.address + - "0.0.0.0" +env: + - name: STREAMLIT_SERVER_ENABLE_CORS + value: "false" + - name: STREAMLIT_SERVER_ENABLE_XSRF_PROTECTION + value: "false" +``` + +⚠️ **Both CORS and XSRF must be disabled** for Streamlit on Databricks Apps. The reverse proxy origin (`*.databricksapps.com`) differs from the workspace origin, triggering Streamlit's CORS/XSRF protection. + +### OBO Token Staleness + +Streamlit caches initial HTTP request headers, then switches to WebSocket. The OBO token from `x-forwarded-access-token` **never refreshes** — it goes stale. + +**Workaround:** Periodically trigger a full page refresh. No clean in-Streamlit solution exists. + +### Connection Exhaustion (Hangs After Initial Queries) + +Streamlit re-runs the entire script on every user interaction. If `sql.connect()` is called during each render cycle, the rapid succession of TCP handshakes and OAuth negotiations exhausts the connection pool, causing 2-3 minute freezes. + +**Fix:** Use `@st.cache_resource` to maintain persistent connections: +```python +@st.cache_resource +def get_connection(): + from databricks import sql + from databricks.sdk.core import Config + cfg = Config() + return sql.connect( + server_hostname=cfg.host, + http_path=os.environ["DATABRICKS_HTTP_PATH"], + credentials_provider=lambda: cfg.authenticate, + ) +``` + +### Transient 502s During Startup + +Streamlit apps commonly show brief 502 errors during startup. This is expected and does not indicate a problem. diff --git a/skills/databricks-apps/references/platform-guide.md b/skills/databricks-apps/references/platform-guide.md index 45a6c19..576807b 100644 --- a/skills/databricks-apps/references/platform-guide.md +++ b/skills/databricks-apps/references/platform-guide.md @@ -1,269 +1,176 @@ -# Databricks Apps Platform Guide (Non-AppKit Frameworks) +# Databricks Apps Platform Guide -This guide covers framework-specific setup for non-AppKit apps (Streamlit, FastAPI, Flask, Gradio, Dash, Django, Next.js, React, etc.). +Universal platform rules that apply to ALL Databricks Apps regardless of framework (AppKit, Streamlit, FastAPI, etc.). -For universal platform rules (permissions, deployment, timeouts, resource injection), see the main skill. +For non-AppKit framework-specific setup (port config, app.yaml, Streamlit gotchas), see [Other Frameworks](other-frameworks.md). -## 1. Port & Host Configuration +## Service Principal Permissions -**The #1 cause of 502 Bad Gateway errors.** +**The #1 cause of runtime crashes after deployment.** -| Setting | Required Value | Common Mistake | -|---------|---------------|----------------| -| Port | `DATABRICKS_APP_PORT` env var | Hardcoding 8080, 3000, or 3001 | -| Host | `0.0.0.0` | Binding to `localhost` or `127.0.0.1` | +When your app uses a Databricks resource (SQL warehouse, model serving endpoint, vector search index, volume, secret scope), the app's **service principal** must have explicit permissions on that resource. -The platform dynamically assigns a port via `DATABRICKS_APP_PORT`. Use `8000` as a local dev fallback only. +### ⚠️ CRITICAL AGENT BEHAVIOR +After adding ANY resource to the app, IMMEDIATELY grant the SP access. Do NOT wait for a permission error — do it proactively every time. -### Framework-Specific Port Configuration +### How to Grant Permissions -#### Streamlit -```yaml -# app.yaml -command: - - streamlit - - run - - app.py - - --server.port - - "${DATABRICKS_APP_PORT:-8000}" - - --server.address - - "0.0.0.0" -``` +```bash +# 1. find the app's service principal +databricks apps get --profile +# look for service_principal_id in the output -#### FastAPI / Uvicorn -```python -if __name__ == "__main__": - import uvicorn - port = int(os.environ.get("DATABRICKS_APP_PORT", 8000)) - uvicorn.run(app, host="0.0.0.0", port=port) -``` +# 2. grant permissions per resource type: -#### Flask -```python -port = int(os.environ.get("DATABRICKS_APP_PORT", 8000)) -app.run(host="0.0.0.0", port=port) -``` +# SQL Warehouse +databricks warehouses set-permissions \ + --json '{"access_control_list": [{"service_principal_name": "", "permission_level": "CAN_USE"}]}' \ + --profile -#### Gradio -```python -demo.launch(server_name="0.0.0.0", - server_port=int(os.environ.get("DATABRICKS_APP_PORT", 8000))) -``` +# Model Serving Endpoint +databricks serving-endpoints set-permissions \ + --json '{"access_control_list": [{"service_principal_name": "", "permission_level": "CAN_QUERY"}]}' \ + --profile -#### Dash -```python -app.run(host="0.0.0.0", - port=int(os.environ.get("DATABRICKS_APP_PORT", 8000))) -``` +# Secret Scope — deploying user needs MANAGE permission +databricks secrets put-acl READ --profile -#### Next.js -```json -// package.json -"scripts": { - "start": "next start -p ${DATABRICKS_APP_PORT:-8000} -H 0.0.0.0" -} +# Unity Catalog resources (tables, volumes, vector search indexes) +# use SQL GRANT statements via a SQL warehouse: +# GRANT SELECT ON TABLE catalog.schema.table TO `` +# GRANT READ_VOLUME ON VOLUME catalog.schema.volume TO `` ``` -⚠️ **Only ONE service can bind to `DATABRICKS_APP_PORT`.** If you need multiple services (e.g., frontend + backend), use a reverse proxy or serve everything from one process. - -## 2. app.yaml vs databricks.yml - -These two files serve different purposes. Getting them wrong causes silent deployment failures. +### Permission Matrix + +| Resource Type | Permission Level | Notes | +|---------------|-----------------|-------| +| SQL Warehouse | CAN_USE | Minimum for query execution | +| Model Serving Endpoint | CAN_QUERY | For inference calls | +| Vector Search Index | SELECT on underlying table | VS index is a UC securable of type TABLE | +| Volume | READ_VOLUME or WRITE_VOLUME | Via UC GRANT | +| Secret Scope | READ | Deploying user needs MANAGE | +| Feature Table | SELECT | Via UC GRANT | + +## Resource Types & Injection + +**NEVER hardcode workspace-specific IDs in source code.** Always inject via environment variables with `valueFrom`. + +| Resource Type | Default Key | Use Case | +|---------------|-------------|----------| +| SQL Warehouse | `sql-warehouse` | Query compute | +| Model Serving Endpoint | `serving-endpoint` | Model inference | +| Vector Search Index | `vector-search-index` | Semantic search | +| Lakebase Database | `database` | OLTP storage | +| Secret | `secret` | Sensitive values | +| UC Table | `table` | Structured data | +| UC Connection | `connection` | External data sources | +| Genie Space | `genie-space` | AI analytics | +| MLflow Experiment | `experiment` | ML tracking | +| Lakeflow Job | `job` | Data workflows | +| UDF | `function` | SQL/Python functions | +| Databricks App | `app` | App-to-app communication | -### app.yaml — Runtime Configuration -- Defines the **start command** and **environment variables** for the running app -- Used by the Databricks Apps runtime directly -- `valueFrom:` injects resource IDs from workspace configuration +```python +# ✅ GOOD +warehouse_id = os.environ["DATABRICKS_WAREHOUSE_ID"] +``` ```yaml -# app.yaml -command: - - python - - app.py +# app.yaml / databricks.yml env section env: - name: DATABRICKS_WAREHOUSE_ID valueFrom: sql-warehouse - - name: MY_CUSTOM_VAR - value: "some-value" -``` - -### databricks.yml — Bundle/Deployment Configuration -- Defines the **app resource** for DABs (Databricks Asset Bundles) -- `config:` section only takes effect after `bundle run`, NOT just `bundle deploy` - -```yaml -# databricks.yml -bundle: - name: my-app-bundle - -resources: - apps: - my-app: - name: my-app - source_code_path: . - config: - command: ['python', 'app.py'] - env: - - name: DATABRICKS_WAREHOUSE_ID - valueFrom: sql-warehouse - permissions: - - service_principal_name: ${bundle.target}.my-app - level: CAN_MANAGE - -targets: - dev: - default: true + - name: SERVING_ENDPOINT + valueFrom: serving-endpoint ``` -### Critical Rules +## Authentication: OBO vs Service Principal -| Rule | Why | -|------|-----| -| Always provide BOTH `app.yaml` AND `databricks.yml` config | UI deployments use app.yaml; DABs uses databricks.yml | -| Always run `bundle deploy` THEN `bundle run ` | `deploy` uploads code; `run` applies config and starts the app | -| Never use `${var.xxx}` in config env values | Variables are NOT resolved in config — values appear literally | +| Context | When Used | Token Source | Cached Per | +|---------|-----------|--------------|------------| +| **Service Principal (SP)** | Default; background tasks, shared data | Auto-injected `DATABRICKS_CLIENT_ID` + `DATABRICKS_CLIENT_SECRET` | All users (shared) | +| **On-Behalf-Of (OBO)** | User-specific data, user-scoped access | `x-forwarded-access-token` header | Per user | -## 3. Using OBO in Non-AppKit Apps +**SP auth** is auto-configured — `WorkspaceClient()` picks up injected env vars. -```python -# FastAPI example -from fastapi import Request -from databricks.sdk import WorkspaceClient - -@app.get("/user-data") -def get_user_data(request: Request): - token = request.headers.get("x-forwarded-access-token") +**OBO** requires extracting the token from request headers and declaring scopes: - # create user-scoped client - w = WorkspaceClient(token=token, host=os.environ["DATABRICKS_HOST"]) - # use w for user-scoped operations -``` +| Scope | Purpose | +|-------|---------| +| `sql` | Query SQL warehouses | +| `dashboards.genie` | Manage Genie spaces | +| `files.files` | Manage files/directories | +| `iam.access-control:read` | Read permissions (default) | +| `iam.current-user:read` | Read current user info (default) | -```python -# SP auth is auto-configured — just use the SDK -from databricks.sdk import WorkspaceClient -w = WorkspaceClient() # picks up auto-injected env vars -``` +⚠️ Databricks blocks access outside approved scopes even if the user has permission. -## 4. Framework-Specific Timeout Gotchas +## Deployment Workflow -| Framework | Default Timeout | Fix | -|-----------|----------------|-----| -| Gradio | 30 seconds (internal) | Set `fn` timeout explicitly or use `gradio.queue()` | -| Gunicorn | 30 seconds (worker timeout) | Set `--timeout 120` in gunicorn command | -| Uvicorn | None (no default timeout) | Already fine | +```bash +# 1. validate +databricks apps validate --profile -## 5. Common Errors (Non-AppKit Specific) +# 2. deploy code +databricks bundle deploy -t --profile -| Error | Cause | Fix | -|-------|-------|-----| -| 502 Bad Gateway | Wrong port or host | Bind to `0.0.0.0:${DATABRICKS_APP_PORT:-8000}` | -| App works locally but 502 in prod | Binding to localhost | Change to `0.0.0.0` | -| `ModuleNotFoundError` at runtime | Dependency not in requirements.txt or version conflict | Pin exact versions; validate locally first | -| Wrong script runs on deploy | No `command` in app.yaml, platform picked wrong .py file | Always specify `command` explicitly in app.yaml | -| `apt-get: command not found` | No root access in container | Use pure-Python wheels from PyPI; no system packages | - -## 6. Dependency Management - -### Python - -Only `requirements.txt` is natively supported. No native support for `pyproject.toml`, `uv.lock`, or Poetry. - -**Workaround for `uv`:** -``` -# requirements.txt -uv +# 3. apply config and start/restart the app +databricks bundle run -t --profile ``` -```yaml -# app.yaml -command: - - uv - - run - - app.py -``` -Define actual dependencies in `pyproject.toml`. Note: This moves dependency installation from build to run step, slowing startup. - -**Custom package repositories:** -- Set `PIP_INDEX_URL` as a secret in the app configuration -- Deploying user needs **MANAGE** permission on the secret scope (not just USE/READ) -### Node.js +❌ **Common mistake:** Running only `bundle deploy` and expecting the app to update. Deploy uploads code but does NOT apply config changes or restart the app. -- `package.json` is supported — `npm install` runs at startup -- Do NOT include `node_modules/` in source code (10 MB file limit) -- Large npm installs may exceed the 10-minute startup window -- In egress-restricted workspaces, add `registry.npmjs.org` to egress policy AND restart the app (egress changes require restart) +### ⚠️ Destructive Updates Warning -## 7. Networking & CORS +`databricks apps update` (and `bundle run`) performs a **full replacement**, not a merge: +- Adding a new resource can silently **wipe** existing `user_api_scopes` +- OBO permissions may be stripped on every deployment -### CORS +**Workaround:** After each deployment, verify OBO scopes are intact. -- CORS headers are **not customizable** on the Databricks Apps reverse proxy -- Workspace origin (`*.databricks.com`) differs from app origin (`*.databricksapps.com`) -- Cross-app API calls return **302 redirect to login page** instead of the expected response +## Runtime Environment -**Workaround:** Keep frontend and backend in a single app to avoid CORS entirely. +| Constraint | Value | +|------------|-------| +| Max file size | 10 MB per file | +| Available port | Only `DATABRICKS_APP_PORT` | +| Auto-injected env vars | `DATABRICKS_HOST`, `DATABRICKS_APP_PORT`, `DATABRICKS_APP_NAME`, `DATABRICKS_WORKSPACE_ID`, `DATABRICKS_CLIENT_ID`, `DATABRICKS_CLIENT_SECRET` | +| No root access | Cannot use `apt-get`, `yum`, or `apk` — use PyPI/npm packages only | +| Graceful shutdown | SIGTERM → 15 seconds to shut down → SIGKILL | +| Logging | Only stdout/stderr are captured — file-based logs are lost on container recycle | +| Filesystem | Ephemeral — no persistent local storage; use UC Volumes/tables | -### Private Link / Hardened Environments +## Compute & Limits -- Azure apps use `*.azure.databricksapps.com` — NOT `*.azuredatabricks.net` -- Existing Private Link DNS zones don't cover the apps domain -- Fix: Create a separate Private DNS Zone for `azure.databricksapps.com` with conditional DNS forwarding +| Size | RAM | vCPU | DBU/hour | Notes | +|------|-----|------|----------|-------| +| Medium | 6 GB | Up to 2 | 0.5 | Default | +| Large | 12 GB | Up to 4 | 1.0 | Select during app creation or edit | -### Egress Restrictions +- No GPU access. Use model serving endpoints for inference. +- Apps must start within **10 minutes** (including dependency installation). +- Max apps per workspace: **100**. -- Egress policy changes require **app restart** to take effect -- For npm: allowlist `registry.npmjs.org` -- For pip: allowlist `pypi.org` and `files.pythonhosted.org` -- For custom registries: use `PIP_INDEX_URL` secret (see Dependency Management) +## HTTP Proxy & Streaming -## 8. Streamlit-Specific Gotchas +The Databricks Apps reverse proxy enforces a **120-second per-request timeout** (NOT configurable). -### Required Environment Variables +| Behavior | Detail | +|----------|--------| +| 504 in app logs? | **No** — the error is generated at the proxy. App logs show nothing. | +| SSE streaming | Responses may be **buffered** and delivered in chunks, not token-by-token | +| WebSockets | Bypass the 120s limit — working but undocumented | -```yaml -# app.yaml -command: - - streamlit - - run - - app.py - - --server.port - - "${DATABRICKS_APP_PORT:-8000}" - - --server.address - - "0.0.0.0" -env: - - name: STREAMLIT_SERVER_ENABLE_CORS - value: "false" - - name: STREAMLIT_SERVER_ENABLE_XSRF_PROTECTION - value: "false" -``` +For long-running agent interactions, use **WebSockets** instead of SSE. -⚠️ **Both CORS and XSRF must be disabled** for Streamlit on Databricks Apps. The reverse proxy origin (`*.databricksapps.com`) differs from the workspace origin, triggering Streamlit's CORS/XSRF protection. +## Common Errors -### OBO Token Staleness - -Streamlit caches initial HTTP request headers, then switches to WebSocket. The OBO token from `x-forwarded-access-token` **never refreshes** — it goes stale. - -**Workaround:** Periodically trigger a full page refresh. No clean in-Streamlit solution exists. - -### Connection Exhaustion (Hangs After Initial Queries) - -Streamlit re-runs the entire script on every user interaction. If `sql.connect()` is called during each render cycle, the rapid succession of TCP handshakes and OAuth negotiations exhausts the connection pool, causing 2-3 minute freezes. - -**Fix:** Use `@st.cache_resource` to maintain persistent connections: -```python -@st.cache_resource -def get_connection(): - from databricks import sql - from databricks.sdk.core import Config - cfg = Config() - return sql.connect( - server_hostname=cfg.host, - http_path=os.environ["DATABRICKS_HTTP_PATH"], - credentials_provider=lambda: cfg.authenticate, - ) -``` - -### Transient 502s During Startup - -Streamlit apps commonly show brief 502 errors during startup. This is expected and does not indicate a problem. +| Error | Cause | Fix | +|-------|-------|-----| +| `PERMISSION_DENIED` after deploy | SP missing permissions | Grant SP access to all declared resources | +| App deploys but config doesn't change | Only ran `bundle deploy` | Also run `bundle run ` | +| `File is larger than 10485760 bytes` | Bundled dependencies | Use requirements.txt / package.json | +| OBO scopes missing after deploy | Destructive update wiped them | Re-apply scopes after each deploy | +| `${var.xxx}` appears literally in env | Variables not resolved in config | Use literal values, not DABs variables | +| 504 Gateway Timeout | Request exceeded 120s | Use WebSockets for long operations | From e5ac823bea01f1f25867eb4fda76a975641e3093 Mon Sep 17 00:00:00 2001 From: Arseny Kravchenko Date: Thu, 12 Mar 2026 16:24:03 +0100 Subject: [PATCH 3/4] fix review findings in databricks-apps skill MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - fix SQL GRANT syntax: READ_VOLUME → READ VOLUME - fix Streamlit snippet: use DATABRICKS_WAREHOUSE_ID instead of nonexistent DATABRICKS_HTTP_PATH - restore deployment safety warning (USER CONSENT REQUIRED) - use jsonc tag for Next.js snippet with comments --- skills/databricks-apps/references/appkit/overview.md | 2 +- skills/databricks-apps/references/other-frameworks.md | 4 ++-- skills/databricks-apps/references/platform-guide.md | 6 ++++-- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/skills/databricks-apps/references/appkit/overview.md b/skills/databricks-apps/references/appkit/overview.md index 19ac011..5f96cae 100644 --- a/skills/databricks-apps/references/appkit/overview.md +++ b/skills/databricks-apps/references/appkit/overview.md @@ -19,7 +19,7 @@ See [Lakebase Guide](lakebase.md) for full Lakebase scaffolding and app-code pat 1. **Scaffold**: Run `databricks apps manifest`, then `databricks apps init` with `--features` and `--set` as in parent SKILL.md (App Manifest and Scaffolding) 2. **Develop**: `cd && npm install && npm run dev` 3. **Validate**: `databricks apps validate` -4. **Deploy**: `databricks apps deploy --profile ` +4. **Deploy**: `databricks apps deploy --profile ` (⚠️ USER CONSENT REQUIRED) ## Data Discovery (Before Writing SQL) diff --git a/skills/databricks-apps/references/other-frameworks.md b/skills/databricks-apps/references/other-frameworks.md index c658f49..ff8b2a2 100644 --- a/skills/databricks-apps/references/other-frameworks.md +++ b/skills/databricks-apps/references/other-frameworks.md @@ -57,7 +57,7 @@ app.run(host="0.0.0.0", ``` #### Next.js -```json +```jsonc // package.json "scripts": { "start": "next start -p ${DATABRICKS_APP_PORT:-8000} -H 0.0.0.0" @@ -259,7 +259,7 @@ def get_connection(): cfg = Config() return sql.connect( server_hostname=cfg.host, - http_path=os.environ["DATABRICKS_HTTP_PATH"], + http_path=f"/sql/1.0/warehouses/{os.environ['DATABRICKS_WAREHOUSE_ID']}", credentials_provider=lambda: cfg.authenticate, ) ``` diff --git a/skills/databricks-apps/references/platform-guide.md b/skills/databricks-apps/references/platform-guide.md index 576807b..49aff1c 100644 --- a/skills/databricks-apps/references/platform-guide.md +++ b/skills/databricks-apps/references/platform-guide.md @@ -38,7 +38,7 @@ databricks secrets put-acl READ --profile # Unity Catalog resources (tables, volumes, vector search indexes) # use SQL GRANT statements via a SQL warehouse: # GRANT SELECT ON TABLE catalog.schema.table TO `` -# GRANT READ_VOLUME ON VOLUME catalog.schema.volume TO `` +# GRANT READ VOLUME ON VOLUME catalog.schema.volume TO `` ``` ### Permission Matrix @@ -48,7 +48,7 @@ databricks secrets put-acl READ --profile | SQL Warehouse | CAN_USE | Minimum for query execution | | Model Serving Endpoint | CAN_QUERY | For inference calls | | Vector Search Index | SELECT on underlying table | VS index is a UC securable of type TABLE | -| Volume | READ_VOLUME or WRITE_VOLUME | Via UC GRANT | +| Volume | READ VOLUME or WRITE VOLUME | Via UC GRANT | | Secret Scope | READ | Deploying user needs MANAGE | | Feature Table | SELECT | Via UC GRANT | @@ -108,6 +108,8 @@ env: ## Deployment Workflow +⚠️ **USER CONSENT REQUIRED** — always confirm with the user before deploying. + ```bash # 1. validate databricks apps validate --profile From 7f63b9348f32769849b701c64bf4aee8d1de7ab8 Mon Sep 17 00:00:00 2001 From: Arseny Kravchenko Date: Fri, 13 Mar 2026 12:56:50 +0100 Subject: [PATCH 4/4] address review: auto-grant SP permissions, apps deploy Co-Authored-By: Claude Opus 4.6 --- .../references/platform-guide.md | 74 +++++++++---------- 1 file changed, 34 insertions(+), 40 deletions(-) diff --git a/skills/databricks-apps/references/platform-guide.md b/skills/databricks-apps/references/platform-guide.md index 49aff1c..26fd486 100644 --- a/skills/databricks-apps/references/platform-guide.md +++ b/skills/databricks-apps/references/platform-guide.md @@ -10,47 +10,42 @@ For non-AppKit framework-specific setup (port config, app.yaml, Streamlit gotcha When your app uses a Databricks resource (SQL warehouse, model serving endpoint, vector search index, volume, secret scope), the app's **service principal** must have explicit permissions on that resource. -### ⚠️ CRITICAL AGENT BEHAVIOR -After adding ANY resource to the app, IMMEDIATELY grant the SP access. Do NOT wait for a permission error — do it proactively every time. - -### How to Grant Permissions - -```bash -# 1. find the app's service principal -databricks apps get --profile -# look for service_principal_id in the output - -# 2. grant permissions per resource type: - -# SQL Warehouse -databricks warehouses set-permissions \ - --json '{"access_control_list": [{"service_principal_name": "", "permission_level": "CAN_USE"}]}' \ - --profile - -# Model Serving Endpoint -databricks serving-endpoints set-permissions \ - --json '{"access_control_list": [{"service_principal_name": "", "permission_level": "CAN_QUERY"}]}' \ - --profile +### How Permissions Work -# Secret Scope — deploying user needs MANAGE permission -databricks secrets put-acl READ --profile +When you declare a resource in `app.yaml` / `databricks.yml` with a `permission` field, the platform **automatically grants** that permission to the app's SP on deployment. You do NOT need to run manual `set-permissions` commands for declared resources. -# Unity Catalog resources (tables, volumes, vector search indexes) -# use SQL GRANT statements via a SQL warehouse: -# GRANT SELECT ON TABLE catalog.schema.table TO `` -# GRANT READ VOLUME ON VOLUME catalog.schema.volume TO `` +```yaml +# databricks.yml — declaring resources with permissions +resources: + apps: + my_app: + resources: + - name: my-warehouse + sql_warehouse: + id: ${var.warehouse_id} + permission: CAN_USE # auto-granted to SP on deploy + - name: my-endpoint + serving_endpoint: + name: ${var.endpoint_name} + permission: CAN_QUERY # auto-granted to SP on deploy ``` -### Permission Matrix +### Default Permissions by Resource Type -| Resource Type | Permission Level | Notes | -|---------------|-----------------|-------| +| Resource Type | Default Permission | Notes | +|---------------|-------------------|-------| | SQL Warehouse | CAN_USE | Minimum for query execution | | Model Serving Endpoint | CAN_QUERY | For inference calls | -| Vector Search Index | SELECT on underlying table | VS index is a UC securable of type TABLE | -| Volume | READ VOLUME or WRITE VOLUME | Via UC GRANT | -| Secret Scope | READ | Deploying user needs MANAGE | -| Feature Table | SELECT | Via UC GRANT | +| Vector Search Index (UC) | SELECT | UC securable of type TABLE | +| Volume (UC) | READ_VOLUME | Via UC securable | +| Secret Scope | READ | Deploying user needs MANAGE on the scope | +| Job | CAN_MANAGE_RUN | | +| Lakebase Database | CAN_CONNECT_AND_CREATE | | +| Genie Space | CAN_VIEW | | + +### ⚠️ CRITICAL AGENT BEHAVIOR + +Always declare resources in `databricks.yml` with the correct `permission` field — do NOT skip this. The platform handles granting automatically on deploy. ## Resource Types & Injection @@ -111,17 +106,16 @@ env: ⚠️ **USER CONSENT REQUIRED** — always confirm with the user before deploying. ```bash -# 1. validate -databricks apps validate --profile +# Option A: single command (recommended) — validates, deploys, and runs +databricks apps deploy -t --profile -# 2. deploy code +# Option B: step by step +databricks apps validate --profile databricks bundle deploy -t --profile - -# 3. apply config and start/restart the app databricks bundle run -t --profile ``` -❌ **Common mistake:** Running only `bundle deploy` and expecting the app to update. Deploy uploads code but does NOT apply config changes or restart the app. +❌ **Common mistake:** Running only `bundle deploy` and expecting the app to update. Deploy uploads code but does NOT apply config changes or restart the app. Use `databricks apps deploy` or add `bundle run` after `bundle deploy`. ### ⚠️ Destructive Updates Warning