diff --git a/manifest.json b/manifest.json index 02a7801..c7d5709 100644 --- a/manifest.json +++ b/manifest.json @@ -1,10 +1,10 @@ { "version": "1", - "updated_at": "2026-03-10T11:33:11Z", + "updated_at": "2026-03-12T13:32:45Z", "skills": { "databricks": { "version": "0.1.0", - "updated_at": "2026-03-10T11:32:46Z", + "updated_at": "2026-03-11T17:08:20Z", "files": [ "SKILL.md", "asset-bundles.md", @@ -14,8 +14,8 @@ ] }, "databricks-apps": { - "version": "0.1.0", - "updated_at": "2026-03-10T11:32:58Z", + "version": "0.1.1", + "updated_at": "2026-03-12T13:32:37Z", "files": [ "SKILL.md", "references/appkit/appkit-sdk.md", @@ -24,26 +24,28 @@ "references/appkit/overview.md", "references/appkit/sql-queries.md", "references/appkit/trpc.md", + "references/other-frameworks.md", + "references/platform-guide.md", "references/testing.md" ] }, "databricks-jobs": { "version": "0.1.0", - "updated_at": "2026-03-10T11:32:46Z", + "updated_at": "2026-03-11T17:08:20Z", "files": [ "SKILL.md" ] }, "databricks-lakebase": { "version": "0.1.0", - "updated_at": "2026-03-10T11:32:58Z", + "updated_at": "2026-03-11T17:08:20Z", "files": [ "SKILL.md" ] }, "databricks-pipelines": { "version": "0.1.0", - "updated_at": "2026-03-10T11:32:46Z", + "updated_at": "2026-03-11T17:08:20Z", "files": [ "SKILL.md", "references/auto-cdc-python.md", diff --git a/skills/databricks-apps/SKILL.md b/skills/databricks-apps/SKILL.md index b4e25f1..9478c4f 100644 --- a/skills/databricks-apps/SKILL.md +++ b/skills/databricks-apps/SKILL.md @@ -3,7 +3,7 @@ name: databricks-apps description: Build apps on Databricks Apps platform. Use when asked to create dashboards, data apps, analytics tools, or visualizations. Invoke BEFORE starting implementation. compatibility: Requires databricks CLI (>= v0.292.0) metadata: - version: "0.1.0" + version: "0.1.1" parent: databricks --- @@ -23,16 +23,15 @@ Build apps that deploy to Databricks Apps platform. | Using `useAnalyticsQuery` | [AppKit SDK](references/appkit/appkit-sdk.md) | | Adding API endpoints | [tRPC Guide](references/appkit/trpc.md) | | Using Lakebase (OLTP database) | [Lakebase Guide](references/appkit/lakebase.md) | +| Platform rules (permissions, deployment, limits) | [Platform Guide](references/platform-guide.md) — READ for ALL apps including AppKit | +| Non-AppKit app (Streamlit, FastAPI, Flask, Gradio, Next.js, etc.) | [Other Frameworks](references/other-frameworks.md) | ## Generic Guidelines -These apply regardless of framework: - -- **Deployment**: `databricks apps deploy --profile ` (⚠️ USER CONSENT REQUIRED) -- **Validation**: `databricks apps validate --profile ` before deploying -- **App name**: Must be ≤26 characters, lowercase letters/numbers/hyphens only (no underscores). dev- prefix adds 4 chars, max 30 total. -- **Smoke tests**: ALWAYS update `tests/smoke.spec.ts` selectors BEFORE running validation. Default template checks for "Minimal Databricks App" heading and "hello world" text — these WILL fail in your custom app. See [testing guide](references/testing.md). -- **Authentication**: covered by parent `databricks` skill +- **App name**: ≤26 characters, lowercase letters/numbers/hyphens only (no underscores). dev- prefix adds 4 chars, max 30 total. +- **Validation**: `databricks apps validate --profile ` before deploying. +- **Smoke tests** (AppKit only): ALWAYS update `tests/smoke.spec.ts` selectors BEFORE running validation. Default template checks for "Minimal Databricks App" heading and "hello world" text — these WILL fail in your custom app. See [testing guide](references/testing.md). +- **Authentication**: covered by parent `databricks` skill. ## Project Structure (after `databricks apps init --features analytics`) - `client/src/App.tsx` — main React component (start here) @@ -143,6 +142,8 @@ databricks apps init --name my-app-name --features analytics --set "..." --profi `databricks apps init` creates directories in kebab-case matching the app name. App names must be lowercase with hyphens only (≤26 chars). -### Other Frameworks +### Other Frameworks (Streamlit, FastAPI, Flask, Gradio, Dash, Next.js, etc.) + +Databricks Apps supports any framework that runs as an HTTP server. LLMs already know these frameworks — the challenge is Databricks platform integration. -Databricks Apps supports any framework that can run as a web server (Flask, FastAPI, Streamlit, Gradio, etc.). Use standard framework documentation - this skill focuses on AppKit. +**READ [Other Frameworks Guide](references/other-frameworks.md) BEFORE building any non-AppKit app.** It covers port/host configuration, `app.yaml` and `databricks.yml` setup, dependency management, networking, and framework-specific gotchas. diff --git a/skills/databricks-apps/references/appkit/overview.md b/skills/databricks-apps/references/appkit/overview.md index 19ac011..5f96cae 100644 --- a/skills/databricks-apps/references/appkit/overview.md +++ b/skills/databricks-apps/references/appkit/overview.md @@ -19,7 +19,7 @@ See [Lakebase Guide](lakebase.md) for full Lakebase scaffolding and app-code pat 1. **Scaffold**: Run `databricks apps manifest`, then `databricks apps init` with `--features` and `--set` as in parent SKILL.md (App Manifest and Scaffolding) 2. **Develop**: `cd && npm install && npm run dev` 3. **Validate**: `databricks apps validate` -4. **Deploy**: `databricks apps deploy --profile ` +4. **Deploy**: `databricks apps deploy --profile ` (⚠️ USER CONSENT REQUIRED) ## Data Discovery (Before Writing SQL) diff --git a/skills/databricks-apps/references/other-frameworks.md b/skills/databricks-apps/references/other-frameworks.md new file mode 100644 index 0000000..ff8b2a2 --- /dev/null +++ b/skills/databricks-apps/references/other-frameworks.md @@ -0,0 +1,269 @@ +# Databricks Apps — Other Frameworks (Non-AppKit) + +Setup guide for non-AppKit apps: Streamlit, FastAPI, Flask, Gradio, Dash, Django, Next.js, React, etc. + +For universal platform rules (permissions, deployment, timeouts, resource injection), see [Platform Guide](platform-guide.md). + +## 1. Port & Host Configuration + +**The #1 cause of 502 Bad Gateway errors.** + +| Setting | Required Value | Common Mistake | +|---------|---------------|----------------| +| Port | `DATABRICKS_APP_PORT` env var | Hardcoding 8080, 3000, or 3001 | +| Host | `0.0.0.0` | Binding to `localhost` or `127.0.0.1` | + +The platform dynamically assigns a port via `DATABRICKS_APP_PORT`. Use `8000` as a local dev fallback only. + +### Framework-Specific Port Configuration + +#### Streamlit +```yaml +# app.yaml +command: + - streamlit + - run + - app.py + - --server.port + - "${DATABRICKS_APP_PORT:-8000}" + - --server.address + - "0.0.0.0" +``` + +#### FastAPI / Uvicorn +```python +if __name__ == "__main__": + import uvicorn + port = int(os.environ.get("DATABRICKS_APP_PORT", 8000)) + uvicorn.run(app, host="0.0.0.0", port=port) +``` + +#### Flask +```python +port = int(os.environ.get("DATABRICKS_APP_PORT", 8000)) +app.run(host="0.0.0.0", port=port) +``` + +#### Gradio +```python +demo.launch(server_name="0.0.0.0", + server_port=int(os.environ.get("DATABRICKS_APP_PORT", 8000))) +``` + +#### Dash +```python +app.run(host="0.0.0.0", + port=int(os.environ.get("DATABRICKS_APP_PORT", 8000))) +``` + +#### Next.js +```jsonc +// package.json +"scripts": { + "start": "next start -p ${DATABRICKS_APP_PORT:-8000} -H 0.0.0.0" +} +``` + +⚠️ **Only ONE service can bind to `DATABRICKS_APP_PORT`.** If you need multiple services (e.g., frontend + backend), use a reverse proxy or serve everything from one process. + +## 2. app.yaml vs databricks.yml + +These two files serve different purposes. Getting them wrong causes silent deployment failures. + +### app.yaml — Runtime Configuration +- Defines the **start command** and **environment variables** for the running app +- Used by the Databricks Apps runtime directly +- `valueFrom:` injects resource IDs from workspace configuration + +```yaml +# app.yaml +command: + - python + - app.py +env: + - name: DATABRICKS_WAREHOUSE_ID + valueFrom: sql-warehouse + - name: MY_CUSTOM_VAR + value: "some-value" +``` + +### databricks.yml — Bundle/Deployment Configuration +- Defines the **app resource** for DABs (Databricks Asset Bundles) +- `config:` section only takes effect after `bundle run`, NOT just `bundle deploy` + +```yaml +# databricks.yml +bundle: + name: my-app-bundle + +resources: + apps: + my-app: + name: my-app + source_code_path: . + config: + command: ['python', 'app.py'] + env: + - name: DATABRICKS_WAREHOUSE_ID + valueFrom: sql-warehouse + permissions: + - service_principal_name: ${bundle.target}.my-app + level: CAN_MANAGE + +targets: + dev: + default: true +``` + +### Critical Rules + +| Rule | Why | +|------|-----| +| Always provide BOTH `app.yaml` AND `databricks.yml` config | UI deployments use app.yaml; DABs uses databricks.yml | +| Always run `bundle deploy` THEN `bundle run ` | `deploy` uploads code; `run` applies config and starts the app | +| Never use `${var.xxx}` in config env values | Variables are NOT resolved in config — values appear literally | + +## 3. Using OBO in Non-AppKit Apps + +```python +# FastAPI example +from fastapi import Request +from databricks.sdk import WorkspaceClient + +@app.get("/user-data") +def get_user_data(request: Request): + token = request.headers.get("x-forwarded-access-token") + + # create user-scoped client + w = WorkspaceClient(token=token, host=os.environ["DATABRICKS_HOST"]) + # use w for user-scoped operations +``` + +```python +# SP auth is auto-configured — just use the SDK +from databricks.sdk import WorkspaceClient +w = WorkspaceClient() # picks up auto-injected env vars +``` + +## 4. Framework-Specific Timeout Gotchas + +| Framework | Default Timeout | Fix | +|-----------|----------------|-----| +| Gradio | 30 seconds (internal) | Set `fn` timeout explicitly or use `gradio.queue()` | +| Gunicorn | 30 seconds (worker timeout) | Set `--timeout 120` in gunicorn command | +| Uvicorn | None (no default timeout) | Already fine | + +## 5. Common Errors (Non-AppKit Specific) + +| Error | Cause | Fix | +|-------|-------|-----| +| 502 Bad Gateway | Wrong port or host | Bind to `0.0.0.0:${DATABRICKS_APP_PORT:-8000}` | +| App works locally but 502 in prod | Binding to localhost | Change to `0.0.0.0` | +| `ModuleNotFoundError` at runtime | Dependency not in requirements.txt or version conflict | Pin exact versions; validate locally first | +| Wrong script runs on deploy | No `command` in app.yaml, platform picked wrong .py file | Always specify `command` explicitly in app.yaml | +| `apt-get: command not found` | No root access in container | Use pure-Python wheels from PyPI; no system packages | + +## 6. Dependency Management + +### Python + +Only `requirements.txt` is natively supported. No native support for `pyproject.toml`, `uv.lock`, or Poetry. + +**Workaround for `uv`:** +``` +# requirements.txt +uv +``` +```yaml +# app.yaml +command: + - uv + - run + - app.py +``` +Define actual dependencies in `pyproject.toml`. Note: This moves dependency installation from build to run step, slowing startup. + +**Custom package repositories:** +- Set `PIP_INDEX_URL` as a secret in the app configuration +- Deploying user needs **MANAGE** permission on the secret scope (not just USE/READ) + +### Node.js + +- `package.json` is supported — `npm install` runs at startup +- Do NOT include `node_modules/` in source code (10 MB file limit) +- Large npm installs may exceed the 10-minute startup window +- In egress-restricted workspaces, add `registry.npmjs.org` to egress policy AND restart the app (egress changes require restart) + +## 7. Networking & CORS + +### CORS + +- CORS headers are **not customizable** on the Databricks Apps reverse proxy +- Workspace origin (`*.databricks.com`) differs from app origin (`*.databricksapps.com`) +- Cross-app API calls return **302 redirect to login page** instead of the expected response + +**Workaround:** Keep frontend and backend in a single app to avoid CORS entirely. + +### Private Link / Hardened Environments + +- Azure apps use `*.azure.databricksapps.com` — NOT `*.azuredatabricks.net` +- Existing Private Link DNS zones don't cover the apps domain +- Fix: Create a separate Private DNS Zone for `azure.databricksapps.com` with conditional DNS forwarding + +### Egress Restrictions + +- Egress policy changes require **app restart** to take effect +- For npm: allowlist `registry.npmjs.org` +- For pip: allowlist `pypi.org` and `files.pythonhosted.org` +- For custom registries: use `PIP_INDEX_URL` secret (see Dependency Management) + +## 8. Streamlit-Specific Gotchas + +### Required Environment Variables + +```yaml +# app.yaml +command: + - streamlit + - run + - app.py + - --server.port + - "${DATABRICKS_APP_PORT:-8000}" + - --server.address + - "0.0.0.0" +env: + - name: STREAMLIT_SERVER_ENABLE_CORS + value: "false" + - name: STREAMLIT_SERVER_ENABLE_XSRF_PROTECTION + value: "false" +``` + +⚠️ **Both CORS and XSRF must be disabled** for Streamlit on Databricks Apps. The reverse proxy origin (`*.databricksapps.com`) differs from the workspace origin, triggering Streamlit's CORS/XSRF protection. + +### OBO Token Staleness + +Streamlit caches initial HTTP request headers, then switches to WebSocket. The OBO token from `x-forwarded-access-token` **never refreshes** — it goes stale. + +**Workaround:** Periodically trigger a full page refresh. No clean in-Streamlit solution exists. + +### Connection Exhaustion (Hangs After Initial Queries) + +Streamlit re-runs the entire script on every user interaction. If `sql.connect()` is called during each render cycle, the rapid succession of TCP handshakes and OAuth negotiations exhausts the connection pool, causing 2-3 minute freezes. + +**Fix:** Use `@st.cache_resource` to maintain persistent connections: +```python +@st.cache_resource +def get_connection(): + from databricks import sql + from databricks.sdk.core import Config + cfg = Config() + return sql.connect( + server_hostname=cfg.host, + http_path=f"/sql/1.0/warehouses/{os.environ['DATABRICKS_WAREHOUSE_ID']}", + credentials_provider=lambda: cfg.authenticate, + ) +``` + +### Transient 502s During Startup + +Streamlit apps commonly show brief 502 errors during startup. This is expected and does not indicate a problem. diff --git a/skills/databricks-apps/references/platform-guide.md b/skills/databricks-apps/references/platform-guide.md new file mode 100644 index 0000000..26fd486 --- /dev/null +++ b/skills/databricks-apps/references/platform-guide.md @@ -0,0 +1,172 @@ +# Databricks Apps Platform Guide + +Universal platform rules that apply to ALL Databricks Apps regardless of framework (AppKit, Streamlit, FastAPI, etc.). + +For non-AppKit framework-specific setup (port config, app.yaml, Streamlit gotchas), see [Other Frameworks](other-frameworks.md). + +## Service Principal Permissions + +**The #1 cause of runtime crashes after deployment.** + +When your app uses a Databricks resource (SQL warehouse, model serving endpoint, vector search index, volume, secret scope), the app's **service principal** must have explicit permissions on that resource. + +### How Permissions Work + +When you declare a resource in `app.yaml` / `databricks.yml` with a `permission` field, the platform **automatically grants** that permission to the app's SP on deployment. You do NOT need to run manual `set-permissions` commands for declared resources. + +```yaml +# databricks.yml — declaring resources with permissions +resources: + apps: + my_app: + resources: + - name: my-warehouse + sql_warehouse: + id: ${var.warehouse_id} + permission: CAN_USE # auto-granted to SP on deploy + - name: my-endpoint + serving_endpoint: + name: ${var.endpoint_name} + permission: CAN_QUERY # auto-granted to SP on deploy +``` + +### Default Permissions by Resource Type + +| Resource Type | Default Permission | Notes | +|---------------|-------------------|-------| +| SQL Warehouse | CAN_USE | Minimum for query execution | +| Model Serving Endpoint | CAN_QUERY | For inference calls | +| Vector Search Index (UC) | SELECT | UC securable of type TABLE | +| Volume (UC) | READ_VOLUME | Via UC securable | +| Secret Scope | READ | Deploying user needs MANAGE on the scope | +| Job | CAN_MANAGE_RUN | | +| Lakebase Database | CAN_CONNECT_AND_CREATE | | +| Genie Space | CAN_VIEW | | + +### ⚠️ CRITICAL AGENT BEHAVIOR + +Always declare resources in `databricks.yml` with the correct `permission` field — do NOT skip this. The platform handles granting automatically on deploy. + +## Resource Types & Injection + +**NEVER hardcode workspace-specific IDs in source code.** Always inject via environment variables with `valueFrom`. + +| Resource Type | Default Key | Use Case | +|---------------|-------------|----------| +| SQL Warehouse | `sql-warehouse` | Query compute | +| Model Serving Endpoint | `serving-endpoint` | Model inference | +| Vector Search Index | `vector-search-index` | Semantic search | +| Lakebase Database | `database` | OLTP storage | +| Secret | `secret` | Sensitive values | +| UC Table | `table` | Structured data | +| UC Connection | `connection` | External data sources | +| Genie Space | `genie-space` | AI analytics | +| MLflow Experiment | `experiment` | ML tracking | +| Lakeflow Job | `job` | Data workflows | +| UDF | `function` | SQL/Python functions | +| Databricks App | `app` | App-to-app communication | + +```python +# ✅ GOOD +warehouse_id = os.environ["DATABRICKS_WAREHOUSE_ID"] +``` + +```yaml +# app.yaml / databricks.yml env section +env: + - name: DATABRICKS_WAREHOUSE_ID + valueFrom: sql-warehouse + - name: SERVING_ENDPOINT + valueFrom: serving-endpoint +``` + +## Authentication: OBO vs Service Principal + +| Context | When Used | Token Source | Cached Per | +|---------|-----------|--------------|------------| +| **Service Principal (SP)** | Default; background tasks, shared data | Auto-injected `DATABRICKS_CLIENT_ID` + `DATABRICKS_CLIENT_SECRET` | All users (shared) | +| **On-Behalf-Of (OBO)** | User-specific data, user-scoped access | `x-forwarded-access-token` header | Per user | + +**SP auth** is auto-configured — `WorkspaceClient()` picks up injected env vars. + +**OBO** requires extracting the token from request headers and declaring scopes: + +| Scope | Purpose | +|-------|---------| +| `sql` | Query SQL warehouses | +| `dashboards.genie` | Manage Genie spaces | +| `files.files` | Manage files/directories | +| `iam.access-control:read` | Read permissions (default) | +| `iam.current-user:read` | Read current user info (default) | + +⚠️ Databricks blocks access outside approved scopes even if the user has permission. + +## Deployment Workflow + +⚠️ **USER CONSENT REQUIRED** — always confirm with the user before deploying. + +```bash +# Option A: single command (recommended) — validates, deploys, and runs +databricks apps deploy -t --profile + +# Option B: step by step +databricks apps validate --profile +databricks bundle deploy -t --profile +databricks bundle run -t --profile +``` + +❌ **Common mistake:** Running only `bundle deploy` and expecting the app to update. Deploy uploads code but does NOT apply config changes or restart the app. Use `databricks apps deploy` or add `bundle run` after `bundle deploy`. + +### ⚠️ Destructive Updates Warning + +`databricks apps update` (and `bundle run`) performs a **full replacement**, not a merge: +- Adding a new resource can silently **wipe** existing `user_api_scopes` +- OBO permissions may be stripped on every deployment + +**Workaround:** After each deployment, verify OBO scopes are intact. + +## Runtime Environment + +| Constraint | Value | +|------------|-------| +| Max file size | 10 MB per file | +| Available port | Only `DATABRICKS_APP_PORT` | +| Auto-injected env vars | `DATABRICKS_HOST`, `DATABRICKS_APP_PORT`, `DATABRICKS_APP_NAME`, `DATABRICKS_WORKSPACE_ID`, `DATABRICKS_CLIENT_ID`, `DATABRICKS_CLIENT_SECRET` | +| No root access | Cannot use `apt-get`, `yum`, or `apk` — use PyPI/npm packages only | +| Graceful shutdown | SIGTERM → 15 seconds to shut down → SIGKILL | +| Logging | Only stdout/stderr are captured — file-based logs are lost on container recycle | +| Filesystem | Ephemeral — no persistent local storage; use UC Volumes/tables | + +## Compute & Limits + +| Size | RAM | vCPU | DBU/hour | Notes | +|------|-----|------|----------|-------| +| Medium | 6 GB | Up to 2 | 0.5 | Default | +| Large | 12 GB | Up to 4 | 1.0 | Select during app creation or edit | + +- No GPU access. Use model serving endpoints for inference. +- Apps must start within **10 minutes** (including dependency installation). +- Max apps per workspace: **100**. + +## HTTP Proxy & Streaming + +The Databricks Apps reverse proxy enforces a **120-second per-request timeout** (NOT configurable). + +| Behavior | Detail | +|----------|--------| +| 504 in app logs? | **No** — the error is generated at the proxy. App logs show nothing. | +| SSE streaming | Responses may be **buffered** and delivered in chunks, not token-by-token | +| WebSockets | Bypass the 120s limit — working but undocumented | + +For long-running agent interactions, use **WebSockets** instead of SSE. + +## Common Errors + +| Error | Cause | Fix | +|-------|-------|-----| +| `PERMISSION_DENIED` after deploy | SP missing permissions | Grant SP access to all declared resources | +| App deploys but config doesn't change | Only ran `bundle deploy` | Also run `bundle run ` | +| `File is larger than 10485760 bytes` | Bundled dependencies | Use requirements.txt / package.json | +| OBO scopes missing after deploy | Destructive update wiped them | Re-apply scopes after each deploy | +| `${var.xxx}` appears literally in env | Variables not resolved in config | Use literal values, not DABs variables | +| 504 Gateway Timeout | Request exceeded 120s | Use WebSockets for long operations |