Skip to content

Commit 2d3dd0d

Browse files
Refactor create_or_update_genie function for improved handling of serialized_space
- Streamline logic for updating or creating Genie spaces based on the presence of serialized_space and space_id. - Enhance error handling for non-existent spaces and ensure proper updates are made when serialized_space is provided. - Update documentation to clarify the workflow for creating and updating Genie spaces, including handling sample questions.
1 parent d1d16a0 commit 2d3dd0d

2 files changed

Lines changed: 162 additions & 94 deletions

File tree

  • databricks-mcp-server/databricks_mcp_server/tools
  • databricks-skills/databricks-genie

databricks-mcp-server/databricks_mcp_server/tools/genie.py

Lines changed: 86 additions & 84 deletions
Original file line numberDiff line numberDiff line change
@@ -107,104 +107,106 @@ def create_or_update_genie(
107107

108108
operation = "created"
109109

110-
# When serialized_space is provided, use the public genie/spaces API
111-
if serialized_space:
112-
if space_id:
113-
# Update existing space with serialized config
114-
manager.genie_update_with_serialized_space(
115-
space_id=space_id,
116-
serialized_space=serialized_space,
117-
title=display_name,
118-
description=description,
119-
warehouse_id=warehouse_id,
120-
)
121-
operation = "updated"
122-
else:
123-
# Check if exists by name, then create or update
124-
existing = manager.genie_find_by_name(display_name)
125-
if existing:
126-
operation = "updated"
127-
space_id = existing.space_id
110+
# When serialized_space is provided
111+
if serialized_space:
112+
if space_id:
113+
# Update existing space with serialized config
128114
manager.genie_update_with_serialized_space(
129115
space_id=space_id,
130116
serialized_space=serialized_space,
131117
title=display_name,
132118
description=description,
133119
warehouse_id=warehouse_id,
134120
)
135-
else:
136-
result = manager.genie_import(
137-
warehouse_id=warehouse_id,
138-
serialized_space=serialized_space,
139-
title=display_name,
140-
description=description,
141-
)
142-
space_id = result.get("space_id", "")
143-
else:
144-
if space_id:
145-
# Update existing space by ID
146-
existing = manager.genie_get(space_id)
147-
if existing:
148121
operation = "updated"
149-
manager.genie_update(
150-
space_id=space_id,
151-
display_name=display_name,
152-
description=description,
153-
warehouse_id=warehouse_id,
154-
table_identifiers=table_identifiers,
155-
sample_questions=sample_questions,
156-
)
157122
else:
158-
return {"error": f"Genie space {space_id} not found"}
123+
# Check if exists by name, then create or update
124+
existing = manager.genie_find_by_name(display_name)
125+
if existing:
126+
operation = "updated"
127+
space_id = existing.space_id
128+
manager.genie_update_with_serialized_space(
129+
space_id=space_id,
130+
serialized_space=serialized_space,
131+
title=display_name,
132+
description=description,
133+
warehouse_id=warehouse_id,
134+
)
135+
else:
136+
result = manager.genie_import(
137+
warehouse_id=warehouse_id,
138+
serialized_space=serialized_space,
139+
title=display_name,
140+
description=description,
141+
)
142+
space_id = result.get("space_id", "")
143+
144+
# When serialized_space is not provided
159145
else:
160-
# Check if exists by name first
161-
existing = manager.genie_find_by_name(display_name)
162-
if existing:
163-
operation = "updated"
164-
manager.genie_update(
165-
space_id=existing.space_id,
166-
display_name=display_name,
167-
description=description,
168-
warehouse_id=warehouse_id,
169-
table_identifiers=table_identifiers,
170-
sample_questions=sample_questions,
171-
)
172-
space_id = existing.space_id
146+
if space_id:
147+
# Update existing space by ID
148+
existing = manager.genie_get(space_id)
149+
if existing:
150+
operation = "updated"
151+
manager.genie_update(
152+
space_id=space_id,
153+
display_name=display_name,
154+
description=description,
155+
warehouse_id=warehouse_id,
156+
table_identifiers=table_identifiers,
157+
sample_questions=sample_questions,
158+
)
159+
else:
160+
return {"error": f"Genie space {space_id} not found"}
173161
else:
174-
# Create new
175-
result = manager.genie_create(
176-
display_name=display_name,
177-
warehouse_id=warehouse_id,
178-
table_identifiers=table_identifiers,
179-
description=description,
180-
)
181-
space_id = result.get("space_id", "")
162+
# Check if exists by name first
163+
existing = manager.genie_find_by_name(display_name)
164+
if existing:
165+
operation = "updated"
166+
manager.genie_update(
167+
space_id=existing.space_id,
168+
display_name=display_name,
169+
description=description,
170+
warehouse_id=warehouse_id,
171+
table_identifiers=table_identifiers,
172+
sample_questions=sample_questions,
173+
)
174+
space_id = existing.space_id
175+
else:
176+
# Create new
177+
result = manager.genie_create(
178+
display_name=display_name,
179+
warehouse_id=warehouse_id,
180+
table_identifiers=table_identifiers,
181+
description=description,
182+
)
183+
space_id = result.get("space_id", "")
184+
185+
# Add sample questions if provided
186+
if sample_questions and space_id:
187+
manager.genie_add_sample_questions_batch(space_id, sample_questions)
182188

183-
# Add sample questions if provided
184-
if sample_questions and space_id:
185-
manager.genie_add_sample_questions_batch(space_id, sample_questions)
186-
187-
response = {
188-
"space_id": space_id,
189-
"display_name": display_name,
190-
"operation": operation,
191-
"warehouse_id": warehouse_id,
192-
"table_count": len(table_identifiers),
193-
}
194-
195-
try:
196-
if space_id:
197-
from ..manifest import track_resource
189+
response = {
190+
"space_id": space_id,
191+
"display_name": display_name,
192+
"operation": operation,
193+
"warehouse_id": warehouse_id,
194+
"table_count": len(table_identifiers),
195+
}
198196

199-
track_resource(
200-
resource_type="genie_space",
201-
name=display_name,
202-
resource_id=space_id,
203-
)
204-
except Exception:
205-
pass
197+
try:
198+
if space_id:
199+
from ..manifest import track_resource
200+
201+
track_resource(
202+
resource_type="genie_space",
203+
name=display_name,
204+
resource_id=space_id,
205+
)
206+
except Exception:
207+
pass
206208

207-
return response
209+
return response
208210

209211
except Exception as e:
210212
return {"error": f"Failed to create/update Genie space '{display_name}': {e}"}

databricks-skills/databricks-genie/SKILL.md

Lines changed: 76 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -113,28 +113,92 @@ import_genie(
113113

114114
#### Example: Migrating Genie Spaces from Prod to Dev
115115

116-
When migrating Genie Spaces between environments (e.g., from a `prod` target to a `dev` target defined in your `databricks.yml`), you must update the catalog references within the serialized space.
116+
When migrating Genie Spaces between environments (e.g., from a `prod` target to a `dev` target defined in your `databricks.yml`), you must update the catalog references within the serialized space.
117117

118118
**Note:** Genie Space migration assumes that the underlying data assets (schemas and tables) remain structurally identical across environments. The migration of the actual catalogs, schemas, or tables themselves is outside the scope of Genie Space migration skills.
119119

120-
For instance, if your production tables reside in the `healthverity_claims_sample_patient_dataset` catalog, but your development tables are in `healthverity_claims_sample_patient_dataset_dev`, you can perform a string replacement on the exported configuration before importing it into the target workspace:
120+
##### The Challenge: MCP Servers Are Workspace-Scoped
121+
122+
Each Databricks MCP server instance connects to exactly one workspace (set via `DATABRICKS_CONFIG_PROFILE` at startup). This means a single MCP server cannot export from PROD and import into DEV in the same session — you need two server instances.
123+
124+
##### Recommended Setup: Dual MCP Server Profiles
125+
126+
Configure two Databricks MCP server entries in your IDE's MCP config (e.g. `~/.cursor/mcp.json`), one per workspace:
127+
128+
```json
129+
"databricks-prod": {
130+
"command": "/path/to/.venv/bin/python",
131+
"args": ["/path/to/databricks-mcp-server/run_server.py"],
132+
"env": { "DATABRICKS_CONFIG_PROFILE": "prod" }
133+
},
134+
"databricks-dev": {
135+
"command": "/path/to/.venv/bin/python",
136+
"args": ["/path/to/databricks-mcp-server/run_server.py"],
137+
"env": { "DATABRICKS_CONFIG_PROFILE": "dev" }
138+
}
139+
```
140+
141+
Both servers run simultaneously after one IDE reload. This lets you call `export_genie` against `databricks-prod` and `import_genie` against `databricks-dev` within the same conversation — no further reloads needed.
142+
143+
> **Tip:** The Databricks CLI profiles (`prod`, `dev`) referenced above must be defined in `~/.databrickscfg`. Both token-based and OAuth (`auth_type = databricks-cli`) profiles are supported.
144+
145+
##### Full Migration Workflow
146+
147+
**Step 1 — Export from PROD** using the `databricks-prod` MCP server:
121148

122149
```python
123-
# 1. Export the Genie Space from the production workspace
150+
# Call export_genie via the prod-scoped MCP server
124151
exported = export_genie(space_id="<prod_space_id>")
152+
# exported["serialized_space"] contains the full config
153+
# exported["warehouse_id"] is the PROD warehouse — do NOT reuse it for DEV
154+
```
155+
156+
**Step 2 — Find the DEV warehouse ID:**
125157

126-
# 2. Remap the catalog name for the development environment
158+
```python
159+
# Call list_warehouses via the dev-scoped MCP server
160+
list_warehouses() # note the warehouse_id for the DEV workspace
161+
```
162+
163+
**Step 3 — Remap the catalog and import into DEV** using the `databricks-dev` MCP server:
164+
165+
```python
166+
# Catalog name differs between environments — replace ALL occurrences.
167+
# serialized_space embeds the catalog in table identifiers, SQL FROM clauses,
168+
# join specs, and filter snippets, so a single string replace covers everything.
127169
dev_serialized_space = exported["serialized_space"].replace(
128-
"healthverity_claims_sample_patient_dataset",
129-
"healthverity_claims_sample_patient_dataset_dev"
170+
"my_prod_catalog",
171+
"my_dev_catalog"
130172
)
131173

132-
# 3. Import the modified space into the dev workspace
133-
import_genie(
174+
# Call import_genie via the dev-scoped MCP server
175+
result = import_genie(
134176
warehouse_id="<dev_warehouse_id>",
135177
serialized_space=dev_serialized_space,
136-
title="HealthVerity Claims (Dev)"
178+
title="My Space"
137179
)
180+
# result["space_id"] is the new DEV space ID
181+
```
182+
183+
**Step 4 — Update `databricks.yml`** with the new DEV space IDs so they are tracked in the bundle:
184+
185+
```yaml
186+
targets:
187+
dev:
188+
variables:
189+
genie_space_ids: "<new_dev_space_id_1>,<new_dev_space_id_2>,<new_dev_space_id_3>"
190+
```
191+
192+
**Step 5 — Save exports locally** for version control and future re-migrations:
193+
194+
```json
195+
// genie_exports/MySpace.json
196+
{
197+
"space_id": "<prod_space_id>",
198+
"title": "MySpace",
199+
"warehouse_id": "<prod_warehouse_id>",
200+
"serialized_space": "{ ... }"
201+
}
138202
```
139203

140204
## Workflow
@@ -176,7 +240,9 @@ Use these skills in sequence:
176240
| **`import_genie` fails with permission error** | Ensure you have CREATE privileges in the target workspace folder |
177241
| **Tables not found after migration** | Catalog name was not remapped — replace the source catalog name in `serialized_space` before calling `import_genie` |
178242
| **Catalog name appears in SQL queries too** | `serialized_space` embeds catalog in table identifiers, SQL FROM clauses, join specs, and filters — a single `.replace(src, tgt)` on the whole string covers all occurrences |
179-
243+
| **`export_genie` / `import_genie` land in the wrong workspace** | Each MCP server is workspace-scoped. Set up two named MCP server entries (one per profile) in your IDE's MCP config instead of switching a single server's profile mid-session |
244+
| **MCP server doesn't pick up profile change** | The MCP process reads `DATABRICKS_CONFIG_PROFILE` once at startup — editing the config file requires an IDE reload to take effect |
245+
| **`import_genie` fails with JSON parse error** | The `serialized_space` string may contain multi-line SQL arrays with `\n` escape sequences; flatten SQL arrays to single-line strings before passing to avoid double-escaping issues |
180246
## Related Skills
181247

182248
- **[databricks-agent-bricks](../databricks-agent-bricks/SKILL.md)** - Use Genie Spaces as agents inside Supervisor Agents

0 commit comments

Comments
 (0)