From c88ec27b5b38e9f79892a7500d8836f4fee14062 Mon Sep 17 00:00:00 2001 From: "yang.yang" Date: Sun, 8 Mar 2026 14:34:34 -0700 Subject: [PATCH 1/6] Add serialized space support in Genie API methods - Enhance the `create_or_update_genie` function to accept a `serialized_space` parameter for creating or updating Genie spaces using a full serialized configuration. Introduce new methods in `AgentBricksManager` for exporting and importing Genie spaces with serialized payloads, allowing for full configuration management. Update documentation to reflect these changes and provide usage examples. - new skills: `export_genie` and `import_genie` to support the new functionality including migration skills. - update documentation to reflect these changes and provide usage examples. --- .../databricks_mcp_server/tools/genie.py | 213 +++++++++++++++++- .../agent_bricks/manager.py | 106 +++++++++ 2 files changed, 316 insertions(+), 3 deletions(-) diff --git a/databricks-mcp-server/databricks_mcp_server/tools/genie.py b/databricks-mcp-server/databricks_mcp_server/tools/genie.py index 5a606ecf..d9a829ec 100644 --- a/databricks-mcp-server/databricks_mcp_server/tools/genie.py +++ b/databricks-mcp-server/databricks_mcp_server/tools/genie.py @@ -42,6 +42,7 @@ def create_or_update_genie( description: Optional[str] = None, sample_questions: Optional[List[str]] = None, space_id: Optional[str] = None, + serialized_space: Optional[str] = None, ) -> Dict[str, Any]: """ Create or update a Genie Space for SQL-based data exploration. @@ -49,6 +50,11 @@ def create_or_update_genie( A Genie Space allows users to ask natural language questions about data and get SQL-generated answers. It connects to tables in Unity Catalog. + When serialized_space is provided, the space is created/updated using the + full serialized configuration via the public /api/2.0/genie/spaces API. + This preserves all instructions, SQL examples, and settings from the source. + Obtain a serialized_space string via export_genie(). + Args: display_name: Display name for the Genie space table_identifiers: List of tables to include @@ -58,6 +64,10 @@ def create_or_update_genie( description: Optional description of what the Genie space does sample_questions: Optional list of sample questions to help users space_id: Optional existing space_id to update instead of create + serialized_space: Optional full serialized space config JSON string + (from export_genie). When provided, tables/instructions/SQL examples + from the serialized config are used and the public genie/spaces API + is called instead of data-rooms. Returns: Dictionary with: @@ -75,6 +85,15 @@ def create_or_update_genie( ... sample_questions=["What were total sales last month?"] ... ) {"space_id": "abc123...", "display_name": "Sales Analytics", "operation": "created", ...} + + >>> # Update with serialized config (preserves all instructions and SQL examples) + >>> exported = export_genie("abc123...") + >>> create_or_update_genie( + ... display_name="Sales Analytics", + ... table_identifiers=[], + ... space_id="abc123...", + ... serialized_space=exported["serialized_space"] + ... ) """ try: description = with_description_footer(description) @@ -88,7 +107,42 @@ def create_or_update_genie( operation = "created" + # When serialized_space is provided, use the public genie/spaces API + if serialized_space: if space_id: + # Update existing space with serialized config + manager.genie_update_with_serialized_space( + space_id=space_id, + serialized_space=serialized_space, + title=display_name, + description=description, + warehouse_id=warehouse_id, + ) + operation = "updated" + else: + # Check if exists by name, then create or update + existing = manager.genie_find_by_name(display_name) + if existing: + operation = "updated" + space_id = existing.space_id + manager.genie_update_with_serialized_space( + space_id=space_id, + serialized_space=serialized_space, + title=display_name, + description=description, + warehouse_id=warehouse_id, + ) + else: + result = manager.genie_import( + warehouse_id=warehouse_id, + serialized_space=serialized_space, + title=display_name, + description=description, + ) + space_id = result.get("space_id", "") + else: + if space_id: + # Update existing space by ID existing = manager.genie_get(space_id) if existing: operation = "updated" @@ -103,6 +157,7 @@ def create_or_update_genie( else: return {"error": f"Genie space {space_id} not found"} else: + # Check if exists by name first existing = manager.genie_find_by_name(display_name) if existing: operation = "updated" @@ -116,6 +171,7 @@ def create_or_update_genie( ) space_id = existing.space_id else: + # Create new result = manager.genie_create( display_name=display_name, warehouse_id=warehouse_id, @@ -124,6 +180,7 @@ def create_or_update_genie( ) space_id = result.get("space_id", "") + # Add sample questions if provided if sample_questions and space_id: manager.genie_add_sample_questions_batch(space_id, sample_questions) @@ -154,7 +211,7 @@ def create_or_update_genie( @mcp.tool -def get_genie(space_id: Optional[str] = None) -> Dict[str, Any]: +def get_genie(space_id: Optional[str] = None, include_serialized_space: bool = False) -> Dict[str, Any]: """ Get details of a Genie Space, or list all spaces. @@ -163,14 +220,28 @@ def get_genie(space_id: Optional[str] = None) -> Dict[str, Any]: Args: space_id: The Genie space ID. If omitted, lists all spaces. + include_serialized_space: If True, include the full serialized space configuration + in the response (requires at least CAN EDIT permission). Useful when you + want to inspect or export the space config. Default: False. Returns: - Single space dict (if space_id provided) or {"spaces": [...]}. + Single space dictionary with Genie space details including: + - space_id: The space ID + - display_name: The display name + - description: The description + - warehouse_id: The SQL warehouse ID + - table_identifiers: List of configured tables + - sample_questions: List of sample questions + - serialized_space: Full space config JSON string (only when include_serialized_space=True) + Multiple spaces: List of space dictionaries (only when space_id is omitted) Example: >>> get_genie("abc123...") {"space_id": "abc123...", "display_name": "Sales Analytics", ...} + >>> get_genie("abc123...", include_serialized_space=True) + {"space_id": "abc123...", ..., "serialized_space": "{\"version\":1,...}"} + >>> get_genie() {"spaces": [{"space_id": "abc123...", "title": "Sales Analytics", ...}, ...]} """ @@ -185,7 +256,7 @@ def get_genie(space_id: Optional[str] = None) -> Dict[str, Any]: questions_response = manager.genie_list_questions(space_id, question_type="SAMPLE_QUESTION") sample_questions = [q.get("question_text", "") for q in questions_response.get("curated_questions", [])] - return { + response = { "space_id": result.get("space_id", space_id), "display_name": result.get("display_name", ""), "description": result.get("description", ""), @@ -193,6 +264,13 @@ def get_genie(space_id: Optional[str] = None) -> Dict[str, Any]: "table_identifiers": result.get("table_identifiers", []), "sample_questions": sample_questions, } + + if include_serialized_space: + exported = manager.genie_export(space_id) + response["serialized_space"] = exported.get("serialized_space", "") + + return response + except Exception as e: return {"error": f"Failed to get Genie space {space_id}: {e}"} @@ -215,6 +293,7 @@ def get_genie(space_id: Optional[str] = None) -> Dict[str, Any]: return {"error": str(e)} + @mcp.tool def delete_genie(space_id: str) -> Dict[str, Any]: """ @@ -246,6 +325,134 @@ def delete_genie(space_id: str) -> Dict[str, Any]: return {"success": False, "space_id": space_id, "error": str(e)} +@mcp.tool +def export_genie(space_id: str) -> Dict[str, Any]: + """ + Export a Genie Space with its full serialized configuration. + + Retrieves the complete Genie Space definition including tables, instructions, + SQL queries, and layout as a serialized JSON string. Use this to clone or + migrate a Genie Space to another workspace or location. + + Requires at least CAN EDIT permission on the space. + + Args: + space_id: The Genie space ID to export + + Returns: + Dictionary with: + - space_id: The space ID + - title: The space title + - description: The description (if set) + - warehouse_id: The SQL warehouse ID + - serialized_space: JSON string with the full space configuration. + Pass this value directly to import_genie() to clone or migrate. + + Example: + >>> result = export_genie("abc123...") + >>> print(result["title"]) + "Sales Analytics" + >>> # Then clone it: + >>> import_genie( + ... warehouse_id=result["warehouse_id"], + ... serialized_space=result["serialized_space"], + ... title="Sales Analytics (Clone)" + ... ) + """ + manager = _get_manager() + try: + result = manager.genie_export(space_id) + return { + "space_id": result.get("space_id", space_id), + "title": result.get("title", ""), + "description": result.get("description", ""), + "warehouse_id": result.get("warehouse_id", ""), + "serialized_space": result.get("serialized_space", ""), + } + except Exception as e: + return {"error": str(e), "space_id": space_id} + + +@mcp.tool +def import_genie( + warehouse_id: str, + serialized_space: str, + title: Optional[str] = None, + description: Optional[str] = None, + parent_path: Optional[str] = None, +) -> Dict[str, Any]: + """ + Create a new Genie Space from a serialized payload (import/clone/migrate). + + Use this to clone an existing Genie Space or migrate it to a new workspace. + The serialized_space string is obtained from export_genie(). + + Workflow: + 1. Export: result = export_genie(source_space_id) + 2. Import: import_genie(warehouse_id, result["serialized_space"], title="New Name") + + Args: + warehouse_id: SQL warehouse ID to associate with the new space. + Use list_warehouses() or get_best_warehouse() to find one. + serialized_space: The JSON string from export_genie() containing the full + space configuration (tables, instructions, SQL queries, layout). + Can also be constructed manually: + '{"version":1,"data_sources":{"tables":[{"identifier":"cat.schema.table"}]}}' + title: Optional title override (defaults to the exported space's title) + description: Optional description override + parent_path: Optional workspace folder path where the space will be registered + (e.g., "/Workspace/Users/you@company.com/Genie Spaces") + + Returns: + Dictionary with: + - space_id: The newly created Genie space ID + - title: The space title + - description: The description + - operation: 'imported' + + Example: + >>> # Clone a space within the same workspace + >>> exported = export_genie("abc123...") + >>> import_genie( + ... warehouse_id=exported["warehouse_id"], + ... serialized_space=exported["serialized_space"], + ... title="Sales Analytics (Dev Copy)" + ... ) + {"space_id": "def456...", "title": "Sales Analytics (Dev Copy)", "operation": "imported"} + """ + manager = _get_manager() + try: + result = manager.genie_import( + warehouse_id=warehouse_id, + serialized_space=serialized_space, + title=title, + description=description, + parent_path=parent_path, + ) + space_id = result.get("space_id", "") + + # Track resource + if space_id: + try: + from ..manifest import track_resource + track_resource( + resource_type="genie_space", + name=title or result.get("title", space_id), + resource_id=space_id, + ) + except Exception: + pass + + return { + "space_id": space_id, + "title": result.get("title", title or ""), + "description": result.get("description", description or ""), + "operation": "imported", + } + except Exception as e: + return {"error": str(e)} + + # ============================================================================ # Genie Conversation API Tools # ============================================================================ diff --git a/databricks-tools-core/databricks_tools_core/agent_bricks/manager.py b/databricks-tools-core/databricks_tools_core/agent_bricks/manager.py index 7b829bbc..206f9adf 100644 --- a/databricks-tools-core/databricks_tools_core/agent_bricks/manager.py +++ b/databricks-tools-core/databricks_tools_core/agent_bricks/manager.py @@ -74,6 +74,9 @@ class AgentBricksManager: - genie_get(): Get Genie space - genie_update(): Update Genie space - genie_delete(): Delete Genie space + - genie_export(): Export space with full serialized config + - genie_import(): Create new space from serialized payload + - genie_update_with_serialized_space(): Full update via serialized payload - genie_add_sample_questions_batch(): Add sample questions - genie_add_sql_instructions_batch(): Add SQL examples - genie_add_benchmarks_batch(): Add benchmarks @@ -924,6 +927,100 @@ def genie_delete(self, space_id: str) -> None: """Delete a Genie space.""" self._delete(f"/api/2.0/data-rooms/{space_id}") + def genie_export(self, space_id: str) -> Dict[str, Any]: + """Export a Genie space with its full serialized configuration. + + Uses the public /api/2.0/genie/spaces endpoint with include_serialized_space=true. + Requires at least CAN EDIT permission on the space. + + Args: + space_id: The Genie space ID to export + + Returns: + Dictionary with space metadata including: + - space_id: The space ID + - title: The space title + - description: The description + - warehouse_id: The SQL warehouse ID + - serialized_space: JSON string with full space config (tables, instructions, + SQL queries, layout). Pass this to genie_import() to clone/migrate the space. + """ + return self._get( + f"/api/2.0/genie/spaces/{space_id}", + params={"include_serialized_space": "true"}, + ) + + def genie_import( + self, + warehouse_id: str, + serialized_space: str, + title: Optional[str] = None, + description: Optional[str] = None, + parent_path: Optional[str] = None, + ) -> Dict[str, Any]: + """Create a new Genie space from a serialized payload (import/clone). + + Uses the public /api/2.0/genie/spaces endpoint with serialized_space in the body. + The serialized_space string is obtained from genie_export(). + + Args: + warehouse_id: SQL warehouse ID to associate with the new space + serialized_space: The JSON string from genie_export() containing the full + space configuration (tables, instructions, SQL queries, layout) + title: Optional title override (defaults to the exported space's title) + description: Optional description override + parent_path: Optional workspace folder path where the space will be registered + (e.g., "/Workspace/Users/you@company.com/Genie Spaces") + + Returns: + Dictionary with the newly created space details including space_id + """ + payload: Dict[str, Any] = { + "warehouse_id": warehouse_id, + "serialized_space": serialized_space, + } + if title: + payload["title"] = title + if description: + payload["description"] = description + if parent_path: + payload["parent_path"] = parent_path + return self._post("/api/2.0/genie/spaces", payload) + + def genie_update_with_serialized_space( + self, + space_id: str, + serialized_space: str, + title: Optional[str] = None, + description: Optional[str] = None, + warehouse_id: Optional[str] = None, + ) -> Dict[str, Any]: + """Update a Genie space using a serialized payload (full replacement). + + Uses the public /api/2.0/genie/spaces/{space_id} endpoint (PUT) with + serialized_space in the body. This replaces the entire space configuration. + + Args: + space_id: The Genie space ID to update + serialized_space: The JSON string containing the new space configuration. + Obtain from genie_export() or construct manually: + '{"version":1,"data_sources":{"tables":[{"identifier":"cat.schema.table"}]}}' + title: Optional title override + description: Optional description override + warehouse_id: Optional warehouse override + + Returns: + Dictionary with the updated space details + """ + payload: Dict[str, Any] = {"serialized_space": serialized_space} + if title: + payload["title"] = title + if description: + payload["description"] = description + if warehouse_id: + payload["warehouse_id"] = warehouse_id + return self._put(f"/api/2.0/genie/spaces/{space_id}", payload) + def genie_list_questions( self, space_id: str, question_type: str = "SAMPLE_QUESTION" ) -> GenieListQuestionsResponseDict: @@ -1142,6 +1239,15 @@ def _patch(self, path: str, body: Dict[str, Any]) -> Dict[str, Any]: self._handle_response_error(response, "PATCH", path) return response.json() + def _put(self, path: str, body: Dict[str, Any]) -> Dict[str, Any]: + headers = self.w.config.authenticate() + headers["Content-Type"] = "application/json" + url = f"{self.w.config.host}{path}" + response = requests.put(url, headers=headers, json=body, timeout=20) + if response.status_code >= 400: + self._handle_response_error(response, "PUT", path) + return response.json() + def _delete(self, path: str) -> Dict[str, Any]: headers = self.w.config.authenticate() url = f"{self.w.config.host}{path}" From d1d16a080f6c02762d806b125f1198142b3e911e Mon Sep 17 00:00:00 2001 From: "yang.yang" Date: Sun, 8 Mar 2026 14:36:50 -0700 Subject: [PATCH 2/6] Enhance documentation for Genie Space export and import features including migration skills Add detailed instructions for exporting and importing Genie Spaces using `export_genie` and `import_genie` methods. Include information on serialized space structure, migration across workspaces with catalog remapping, and permissions required for operations. Update examples to illustrate cloning and migrating Genie Spaces effectively. --- databricks-skills/databricks-genie/SKILL.md | 63 ++++++++- databricks-skills/databricks-genie/spaces.md | 134 +++++++++++++++++++ 2 files changed, 195 insertions(+), 2 deletions(-) diff --git a/databricks-skills/databricks-genie/SKILL.md b/databricks-skills/databricks-genie/SKILL.md index e5b32b6e..185675d7 100644 --- a/databricks-skills/databricks-genie/SKILL.md +++ b/databricks-skills/databricks-genie/SKILL.md @@ -18,6 +18,12 @@ Use this skill when: - Adding sample questions to guide users - Connecting Unity Catalog tables to a conversational interface - Asking questions to a Genie Space programmatically (Conversation API) +- Exporting a Genie Space configuration (serialized_space) for backup or migration +- Importing / cloning a Genie Space from a serialized payload +- Migrating a Genie Space between workspaces or environments (dev → staging → prod) + - Only supports catalog remapping where catalog names differ across environments + - Not supported for schema and/or table names that differ across environments + - Not including migration of tables between environments (only migration of Genie Spaces) ## MCP Tools @@ -25,9 +31,12 @@ Use this skill when: | Tool | Purpose | |------|---------| -| `create_or_update_genie` | Create or update a Genie Space | -| `get_genie` | Get space details (by ID) or list all spaces (no ID) | +| `list_genie` | List all Genie Spaces accessible to you | +| `create_or_update_genie` | Create or update a Genie Space (supports `serialized_space`) | +| `get_genie` | Get space details (by ID and support `include_serialized_space` parameter) or list all spaces (no ID) | | `delete_genie` | Delete a Genie Space | +| `export_genie` | Export a Genie Space with full serialized configuration | +| `import_genie` | Import / clone a Genie Space from a serialized payload | ### Conversation API @@ -83,6 +92,51 @@ ask_genie( # Returns: SQL, columns, data, row_count ``` +### 4. Export & Import (Clone / Migrate) + +Export a space (preserves all tables, instructions, SQL examples, and layout): + +```python +exported = export_genie(space_id="your_space_id") +# exported["serialized_space"] contains the full config +``` + +Clone to a new space (same catalog): + +```python +import_genie( + warehouse_id=exported["warehouse_id"], + serialized_space=exported["serialized_space"], + title="Sales Analytics (Dev Copy)" +) +``` + +#### Example: Migrating Genie Spaces from Prod to Dev + +When migrating Genie Spaces between environments (e.g., from a `prod` target to a `dev` target defined in your `databricks.yml`), you must update the catalog references within the serialized space. + +**Note:** Genie Space migration assumes that the underlying data assets (schemas and tables) remain structurally identical across environments. The migration of the actual catalogs, schemas, or tables themselves is outside the scope of Genie Space migration skills. + +For instance, if your production tables reside in the `healthverity_claims_sample_patient_dataset` catalog, but your development tables are in `healthverity_claims_sample_patient_dataset_dev`, you can perform a string replacement on the exported configuration before importing it into the target workspace: + +```python +# 1. Export the Genie Space from the production workspace +exported = export_genie(space_id="") + +# 2. Remap the catalog name for the development environment +dev_serialized_space = exported["serialized_space"].replace( + "healthverity_claims_sample_patient_dataset", + "healthverity_claims_sample_patient_dataset_dev" +) + +# 3. Import the modified space into the dev workspace +import_genie( + warehouse_id="", + serialized_space=dev_serialized_space, + title="HealthVerity Claims (Dev)" +) +``` + ## Workflow ``` @@ -90,6 +144,7 @@ ask_genie( 2. Create space → create_or_update_genie 3. Query space → ask_genie (or test in Databricks UI) 4. Curate (optional) → Use Databricks UI to add instructions +5. Export/migrate → export_genie → import_genie ``` ## Reference Files @@ -117,6 +172,10 @@ Use these skills in sequence: | **No warehouse available** | Create a SQL warehouse or provide `warehouse_id` explicitly | | **Poor query generation** | Add instructions and sample questions that reference actual column names | | **Slow queries** | Ensure warehouse is running; use OPTIMIZE on tables | +| **`export_genie` returns empty `serialized_space`** | Requires at least CAN EDIT permission on the space | +| **`import_genie` fails with permission error** | Ensure you have CREATE privileges in the target workspace folder | +| **Tables not found after migration** | Catalog name was not remapped — replace the source catalog name in `serialized_space` before calling `import_genie` | +| **Catalog name appears in SQL queries too** | `serialized_space` embeds catalog in table identifiers, SQL FROM clauses, join specs, and filters — a single `.replace(src, tgt)` on the whole string covers all occurrences | ## Related Skills diff --git a/databricks-skills/databricks-genie/spaces.md b/databricks-skills/databricks-genie/spaces.md index 225efe0e..92b1f7bf 100644 --- a/databricks-skills/databricks-genie/spaces.md +++ b/databricks-skills/databricks-genie/spaces.md @@ -161,6 +161,140 @@ To update an existing space: The tool finds the existing space by name and updates it. +## Export, Import & Migration + +Genie Spaces can be exported as a `serialized_space` JSON string that captures the full configuration: tables, instructions, certified SQL queries, sample questions, and layout. This enables cloning, backup, and cross-workspace migration. + +Use the `export_genie` and `import_genie` MCP tools for all export/import operations — no direct REST calls needed. + +### What is `serialized_space`? + +The `serialized_space` field is a JSON-encoded string returned by the Genie API. It contains: +- Data sources (Unity Catalog table identifiers — fully qualified as `catalog.schema.table`) +- Curated instructions and business logic +- Certified SQL queries (including inline catalog references) +- Join specifications and SQL filters +- Sample questions and benchmarks +- Space layout and version metadata + +Minimum structure: +```json +{"version": 1, "data_sources": {"tables": [{"identifier": "catalog.schema.table"}]}} +``` + +### Exporting a Space + +Use `export_genie` to export the full configuration (requires CAN EDIT permission): + +```python +exported = export_genie(space_id="01abc123...") +# Returns: +# { +# "space_id": "01abc123...", +# "title": "Sales Analytics", +# "warehouse_id": "abc123def456", +# "serialized_space": "{\"version\":2,\"data_sources\":{...},\"instructions\":{...}}" +# } +``` + +You can also get `serialized_space` inline via `get_genie`: + +```python +details = get_genie(space_id="01abc123...", include_serialized_space=True) +serialized = details["serialized_space"] +``` + +### Cloning a Space (Same Workspace) + +```python +# Step 1: Export the source space +source = export_genie(space_id="01abc123...") + +# Step 2: Import as a new space +import_genie( + warehouse_id=source["warehouse_id"], + serialized_space=source["serialized_space"], + title="Sales Analytics (Dev Copy)" +) +# Returns: {"space_id": "01def456...", "title": "Sales Analytics (Dev Copy)", "operation": "imported"} +``` + +### Migrating Across Workspaces with Catalog Remapping + +When migrating between environments (e.g. prod → dev), Unity Catalog names are often different. The `serialized_space` string contains the source catalog name **everywhere** — in table identifiers, SQL queries, join specs, and filter snippets. You must remap it before importing. + +**Agent workflow (3 steps):** + +**Step 1 — Export from source workspace:** +```python +exported = export_genie(space_id="01f106e1239d14b28d6ab46f9c15e540") +# exported["serialized_space"] contains all references to source catalog +``` + +**Step 2 — Remap catalog name in `serialized_space`:** + +The agent does this as an inline string substitution between the two MCP calls: +```python +modified_serialized = exported["serialized_space"].replace( + "source_catalog_name", # e.g. "healthverity_claims_sample_patient_dataset" + "target_catalog_name" # e.g. "healthverity_claims_sample_patient_dataset_dev" +) +``` +This replaces all occurrences — table identifiers, SQL FROM clauses, join specs, and filter snippets. + +**Step 3 — Import to target workspace using `import_genie`:** +```python +import_genie( + warehouse_id="", # from list_warehouses() on target + serialized_space=modified_serialized, + title=exported["title"], + description=exported["description"] +) +``` + +### Batch Migration of Multiple Spaces + +To migrate several spaces at once, loop through space IDs. The agent calls `export_genie`, remaps the catalog, then calls `import_genie` for each: + +``` +For each space_id in [id1, id2, id3]: + 1. exported = export_genie(space_id) + 2. modified = exported["serialized_space"].replace(src_catalog, tgt_catalog) + 3. result = import_genie(warehouse_id, modified, title=exported["title"]) + 4. record result["space_id"] for updating databricks.yml +``` + +After migration, update `databricks.yml` with the new dev `space_id` values under the `dev` target's `genie_space_ids` variable. + +### Updating an Existing Space with New Config + +Use `create_or_update_genie` with `serialized_space` to push a config to an already-existing space without creating a new one: + +```python +# 1. Export from dev +dev_space = export_genie(space_id=DEV_SPACE_ID) + +# 2. Remap catalog if environments use different catalog names +remapped = dev_space["serialized_space"].replace("dev_catalog", "prod_catalog") + +# 3. Push to prod (updates in place) +create_or_update_genie( + display_name="Sales Analytics", + table_identifiers=[], # ignored when serialized_space is provided + space_id=PROD_SPACE_ID, + warehouse_id=PROD_WAREHOUSE_ID, + serialized_space=remapped +) +``` + +### Permissions Required + +| Operation | Required Permission | +|-----------|-------------------| +| `export_genie` / `get_genie(include_serialized_space=True)` | CAN EDIT on source space | +| `import_genie` | Can create items in target workspace folder | +| `create_or_update_genie` with `serialized_space` (update) | CAN EDIT on target space | + ## Example End-to-End Workflow 1. **Generate synthetic data** using `databricks-synthetic-data-gen` skill: From 2d3dd0d5929a3731ab8e25bdf99e0d2706519c91 Mon Sep 17 00:00:00 2001 From: "yang.yang" Date: Sun, 8 Mar 2026 17:24:56 -0700 Subject: [PATCH 3/6] Refactor create_or_update_genie function for improved handling of serialized_space - Streamline logic for updating or creating Genie spaces based on the presence of serialized_space and space_id. - Enhance error handling for non-existent spaces and ensure proper updates are made when serialized_space is provided. - Update documentation to clarify the workflow for creating and updating Genie spaces, including handling sample questions. --- .../databricks_mcp_server/tools/genie.py | 170 +++++++++--------- databricks-skills/databricks-genie/SKILL.md | 86 +++++++-- 2 files changed, 162 insertions(+), 94 deletions(-) diff --git a/databricks-mcp-server/databricks_mcp_server/tools/genie.py b/databricks-mcp-server/databricks_mcp_server/tools/genie.py index d9a829ec..521216bf 100644 --- a/databricks-mcp-server/databricks_mcp_server/tools/genie.py +++ b/databricks-mcp-server/databricks_mcp_server/tools/genie.py @@ -107,24 +107,10 @@ def create_or_update_genie( operation = "created" - # When serialized_space is provided, use the public genie/spaces API - if serialized_space: - if space_id: - # Update existing space with serialized config - manager.genie_update_with_serialized_space( - space_id=space_id, - serialized_space=serialized_space, - title=display_name, - description=description, - warehouse_id=warehouse_id, - ) - operation = "updated" - else: - # Check if exists by name, then create or update - existing = manager.genie_find_by_name(display_name) - if existing: - operation = "updated" - space_id = existing.space_id + # When serialized_space is provided + if serialized_space: + if space_id: + # Update existing space with serialized config manager.genie_update_with_serialized_space( space_id=space_id, serialized_space=serialized_space, @@ -132,79 +118,95 @@ def create_or_update_genie( description=description, warehouse_id=warehouse_id, ) - else: - result = manager.genie_import( - warehouse_id=warehouse_id, - serialized_space=serialized_space, - title=display_name, - description=description, - ) - space_id = result.get("space_id", "") - else: - if space_id: - # Update existing space by ID - existing = manager.genie_get(space_id) - if existing: operation = "updated" - manager.genie_update( - space_id=space_id, - display_name=display_name, - description=description, - warehouse_id=warehouse_id, - table_identifiers=table_identifiers, - sample_questions=sample_questions, - ) else: - return {"error": f"Genie space {space_id} not found"} + # Check if exists by name, then create or update + existing = manager.genie_find_by_name(display_name) + if existing: + operation = "updated" + space_id = existing.space_id + manager.genie_update_with_serialized_space( + space_id=space_id, + serialized_space=serialized_space, + title=display_name, + description=description, + warehouse_id=warehouse_id, + ) + else: + result = manager.genie_import( + warehouse_id=warehouse_id, + serialized_space=serialized_space, + title=display_name, + description=description, + ) + space_id = result.get("space_id", "") + + # When serialized_space is not provided else: - # Check if exists by name first - existing = manager.genie_find_by_name(display_name) - if existing: - operation = "updated" - manager.genie_update( - space_id=existing.space_id, - display_name=display_name, - description=description, - warehouse_id=warehouse_id, - table_identifiers=table_identifiers, - sample_questions=sample_questions, - ) - space_id = existing.space_id + if space_id: + # Update existing space by ID + existing = manager.genie_get(space_id) + if existing: + operation = "updated" + manager.genie_update( + space_id=space_id, + display_name=display_name, + description=description, + warehouse_id=warehouse_id, + table_identifiers=table_identifiers, + sample_questions=sample_questions, + ) + else: + return {"error": f"Genie space {space_id} not found"} else: - # Create new - result = manager.genie_create( - display_name=display_name, - warehouse_id=warehouse_id, - table_identifiers=table_identifiers, - description=description, - ) - space_id = result.get("space_id", "") + # Check if exists by name first + existing = manager.genie_find_by_name(display_name) + if existing: + operation = "updated" + manager.genie_update( + space_id=existing.space_id, + display_name=display_name, + description=description, + warehouse_id=warehouse_id, + table_identifiers=table_identifiers, + sample_questions=sample_questions, + ) + space_id = existing.space_id + else: + # Create new + result = manager.genie_create( + display_name=display_name, + warehouse_id=warehouse_id, + table_identifiers=table_identifiers, + description=description, + ) + space_id = result.get("space_id", "") + + # Add sample questions if provided + if sample_questions and space_id: + manager.genie_add_sample_questions_batch(space_id, sample_questions) - # Add sample questions if provided - if sample_questions and space_id: - manager.genie_add_sample_questions_batch(space_id, sample_questions) - - response = { - "space_id": space_id, - "display_name": display_name, - "operation": operation, - "warehouse_id": warehouse_id, - "table_count": len(table_identifiers), - } - - try: - if space_id: - from ..manifest import track_resource + response = { + "space_id": space_id, + "display_name": display_name, + "operation": operation, + "warehouse_id": warehouse_id, + "table_count": len(table_identifiers), + } - track_resource( - resource_type="genie_space", - name=display_name, - resource_id=space_id, - ) - except Exception: - pass + try: + if space_id: + from ..manifest import track_resource + + track_resource( + resource_type="genie_space", + name=display_name, + resource_id=space_id, + ) + except Exception: + pass - return response + return response except Exception as e: return {"error": f"Failed to create/update Genie space '{display_name}': {e}"} diff --git a/databricks-skills/databricks-genie/SKILL.md b/databricks-skills/databricks-genie/SKILL.md index 185675d7..e1f4017a 100644 --- a/databricks-skills/databricks-genie/SKILL.md +++ b/databricks-skills/databricks-genie/SKILL.md @@ -113,28 +113,92 @@ import_genie( #### Example: Migrating Genie Spaces from Prod to Dev -When migrating Genie Spaces between environments (e.g., from a `prod` target to a `dev` target defined in your `databricks.yml`), you must update the catalog references within the serialized space. +When migrating Genie Spaces between environments (e.g., from a `prod` target to a `dev` target defined in your `databricks.yml`), you must update the catalog references within the serialized space. **Note:** Genie Space migration assumes that the underlying data assets (schemas and tables) remain structurally identical across environments. The migration of the actual catalogs, schemas, or tables themselves is outside the scope of Genie Space migration skills. -For instance, if your production tables reside in the `healthverity_claims_sample_patient_dataset` catalog, but your development tables are in `healthverity_claims_sample_patient_dataset_dev`, you can perform a string replacement on the exported configuration before importing it into the target workspace: +##### The Challenge: MCP Servers Are Workspace-Scoped + +Each Databricks MCP server instance connects to exactly one workspace (set via `DATABRICKS_CONFIG_PROFILE` at startup). This means a single MCP server cannot export from PROD and import into DEV in the same session — you need two server instances. + +##### Recommended Setup: Dual MCP Server Profiles + +Configure two Databricks MCP server entries in your IDE's MCP config (e.g. `~/.cursor/mcp.json`), one per workspace: + +```json +"databricks-prod": { + "command": "/path/to/.venv/bin/python", + "args": ["/path/to/databricks-mcp-server/run_server.py"], + "env": { "DATABRICKS_CONFIG_PROFILE": "prod" } +}, +"databricks-dev": { + "command": "/path/to/.venv/bin/python", + "args": ["/path/to/databricks-mcp-server/run_server.py"], + "env": { "DATABRICKS_CONFIG_PROFILE": "dev" } +} +``` + +Both servers run simultaneously after one IDE reload. This lets you call `export_genie` against `databricks-prod` and `import_genie` against `databricks-dev` within the same conversation — no further reloads needed. + +> **Tip:** The Databricks CLI profiles (`prod`, `dev`) referenced above must be defined in `~/.databrickscfg`. Both token-based and OAuth (`auth_type = databricks-cli`) profiles are supported. + +##### Full Migration Workflow + +**Step 1 — Export from PROD** using the `databricks-prod` MCP server: ```python -# 1. Export the Genie Space from the production workspace +# Call export_genie via the prod-scoped MCP server exported = export_genie(space_id="") +# exported["serialized_space"] contains the full config +# exported["warehouse_id"] is the PROD warehouse — do NOT reuse it for DEV +``` + +**Step 2 — Find the DEV warehouse ID:** -# 2. Remap the catalog name for the development environment +```python +# Call list_warehouses via the dev-scoped MCP server +list_warehouses() # note the warehouse_id for the DEV workspace +``` + +**Step 3 — Remap the catalog and import into DEV** using the `databricks-dev` MCP server: + +```python +# Catalog name differs between environments — replace ALL occurrences. +# serialized_space embeds the catalog in table identifiers, SQL FROM clauses, +# join specs, and filter snippets, so a single string replace covers everything. dev_serialized_space = exported["serialized_space"].replace( - "healthverity_claims_sample_patient_dataset", - "healthverity_claims_sample_patient_dataset_dev" + "my_prod_catalog", + "my_dev_catalog" ) -# 3. Import the modified space into the dev workspace -import_genie( +# Call import_genie via the dev-scoped MCP server +result = import_genie( warehouse_id="", serialized_space=dev_serialized_space, - title="HealthVerity Claims (Dev)" + title="My Space" ) +# result["space_id"] is the new DEV space ID +``` + +**Step 4 — Update `databricks.yml`** with the new DEV space IDs so they are tracked in the bundle: + +```yaml +targets: + dev: + variables: + genie_space_ids: ",," +``` + +**Step 5 — Save exports locally** for version control and future re-migrations: + +```json +// genie_exports/MySpace.json +{ + "space_id": "", + "title": "MySpace", + "warehouse_id": "", + "serialized_space": "{ ... }" +} ``` ## Workflow @@ -176,7 +240,9 @@ Use these skills in sequence: | **`import_genie` fails with permission error** | Ensure you have CREATE privileges in the target workspace folder | | **Tables not found after migration** | Catalog name was not remapped — replace the source catalog name in `serialized_space` before calling `import_genie` | | **Catalog name appears in SQL queries too** | `serialized_space` embeds catalog in table identifiers, SQL FROM clauses, join specs, and filters — a single `.replace(src, tgt)` on the whole string covers all occurrences | - +| **`export_genie` / `import_genie` land in the wrong workspace** | Each MCP server is workspace-scoped. Set up two named MCP server entries (one per profile) in your IDE's MCP config instead of switching a single server's profile mid-session | +| **MCP server doesn't pick up profile change** | The MCP process reads `DATABRICKS_CONFIG_PROFILE` once at startup — editing the config file requires an IDE reload to take effect | +| **`import_genie` fails with JSON parse error** | The `serialized_space` string may contain multi-line SQL arrays with `\n` escape sequences; flatten SQL arrays to single-line strings before passing to avoid double-escaping issues | ## Related Skills - **[databricks-agent-bricks](../databricks-agent-bricks/SKILL.md)** - Use Genie Spaces as agents inside Supervisor Agents From 85defafca2cab97923d0177387da63b0204bb6f3 Mon Sep 17 00:00:00 2001 From: "yang.yang" Date: Sun, 8 Mar 2026 17:34:18 -0700 Subject: [PATCH 4/6] Fix formatting issues and clean up whitespace in genie.py - Remove unnecessary blank lines to improve code readability. - Ensure consistent formatting across the file for better maintainability. --- databricks-mcp-server/databricks_mcp_server/tools/genie.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/databricks-mcp-server/databricks_mcp_server/tools/genie.py b/databricks-mcp-server/databricks_mcp_server/tools/genie.py index 521216bf..a068bee3 100644 --- a/databricks-mcp-server/databricks_mcp_server/tools/genie.py +++ b/databricks-mcp-server/databricks_mcp_server/tools/genie.py @@ -140,7 +140,7 @@ def create_or_update_genie( description=description, ) space_id = result.get("space_id", "") - + # When serialized_space is not provided else: if space_id: @@ -295,7 +295,6 @@ def get_genie(space_id: Optional[str] = None, include_serialized_space: bool = F return {"error": str(e)} - @mcp.tool def delete_genie(space_id: str) -> Dict[str, Any]: """ @@ -437,6 +436,7 @@ def import_genie( if space_id: try: from ..manifest import track_resource + track_resource( resource_type="genie_space", name=title or result.get("title", space_id), From 5a6064d443c8916310d5a4df5c4bc98da96a7b49 Mon Sep 17 00:00:00 2001 From: "yang.yang" Date: Sun, 8 Mar 2026 20:51:19 -0700 Subject: [PATCH 5/6] Enhance skills documentation and functionality for Genie Spaces - Updated the description in SKILL.md to include additional functionalities related to exporting, importing, and migrating Genie Spaces. - Improved the management and querying capabilities of Genie Spaces in the documentation. - Clarified the usage of `create_or_update_genie` for updating existing spaces, including new examples for metadata updates and full configuration updates. - Added detailed information on the structure of the exported data, including keys for title and description, to facilitate better understanding and usage of the export/import processes. --- databricks-skills/databricks-genie/SKILL.md | 24 +++--- databricks-skills/databricks-genie/spaces.md | 85 ++++++++++++++++---- 2 files changed, 77 insertions(+), 32 deletions(-) diff --git a/databricks-skills/databricks-genie/SKILL.md b/databricks-skills/databricks-genie/SKILL.md index e1f4017a..f0f1d0c9 100644 --- a/databricks-skills/databricks-genie/SKILL.md +++ b/databricks-skills/databricks-genie/SKILL.md @@ -1,11 +1,11 @@ --- name: databricks-genie -description: "Create and query Databricks Genie Spaces for natural language SQL exploration. Use when building Genie Spaces or asking questions via the Genie Conversation API." +description: "Create and query Databricks Genie Spaces for natural language SQL exploration. Use when building Genie Spaces, exporting and importing Genie Spaces, migrating Genie Spaces between workspaces or environments, or asking questions via the Genie Conversation API." --- # Databricks Genie -Create and query Databricks Genie Spaces - natural language interfaces for SQL-based data exploration. +Create, manage, and query Databricks Genie Spaces - natural language interfaces for SQL-based data exploration. ## Overview @@ -107,7 +107,8 @@ Clone to a new space (same catalog): import_genie( warehouse_id=exported["warehouse_id"], serialized_space=exported["serialized_space"], - title="Sales Analytics (Dev Copy)" + title=exported["title"],, # override title; omit to keep original + description=exported["description"], ) ``` @@ -149,7 +150,7 @@ Both servers run simultaneously after one IDE reload. This lets you call `export ```python # Call export_genie via the prod-scoped MCP server exported = export_genie(space_id="") -# exported["serialized_space"] contains the full config +# exported keys: warehouse_id, title, description, serialized_space # exported["warehouse_id"] is the PROD warehouse — do NOT reuse it for DEV ``` @@ -175,27 +176,20 @@ dev_serialized_space = exported["serialized_space"].replace( result = import_genie( warehouse_id="", serialized_space=dev_serialized_space, - title="My Space" + title=exported["title"], + description=exported["description"], ) # result["space_id"] is the new DEV space ID ``` -**Step 4 — Update `databricks.yml`** with the new DEV space IDs so they are tracked in the bundle: - -```yaml -targets: - dev: - variables: - genie_space_ids: ",," -``` - -**Step 5 — Save exports locally** for version control and future re-migrations: +**Step 4 — Save exports locally** for version control and future re-migrations: ```json // genie_exports/MySpace.json { "space_id": "", "title": "MySpace", + "description": "", "warehouse_id": "", "serialized_space": "{ ... }" } diff --git a/databricks-skills/databricks-genie/spaces.md b/databricks-skills/databricks-genie/spaces.md index 92b1f7bf..cd7c6947 100644 --- a/databricks-skills/databricks-genie/spaces.md +++ b/databricks-skills/databricks-genie/spaces.md @@ -153,33 +153,81 @@ Write sample questions that: ## Updating a Genie Space -To update an existing space: +`create_or_update_genie` handles both create and update automatically. There are two ways it locates an existing space to update: -1. **Add/remove tables**: Call `create_or_update_genie` with updated `table_identifiers` -2. **Update questions**: Include new `sample_questions` -3. **Change warehouse**: Provide a different `warehouse_id` +- **By `space_id`** (explicit, preferred): pass `space_id=` to target a specific space. +- **By `display_name`** (implicit fallback): if `space_id` is omitted, the tool searches for a space with a matching name and updates it if found; otherwise it creates a new one. -The tool finds the existing space by name and updates it. +### Simple field updates (tables, questions, warehouse) + +To update metadata without a serialized config: + +```python +create_or_update_genie( + display_name="Sales Analytics", + space_id="01abc123...", # omit to match by name instead + table_identifiers=[ # updated table list + "my_catalog.sales.customers", + "my_catalog.sales.orders", + "my_catalog.sales.products", + ], + sample_questions=[ # updated sample questions + "What were total sales last month?", + "Who are our top 10 customers by revenue?", + ], + warehouse_id="abc123def456", # omit to keep current / auto-detect + description="Updated description.", +) +``` + +### Full config update via `serialized_space` + +To push a complete serialized configuration to an existing space (the dict contains all regular table metadata, plus it preserves all instructions, SQL examples, join specs, etc.): + +```python +create_or_update_genie( + display_name="Sales Analytics", # overrides title embedded in serialized_space + table_identifiers=[], # ignored when serialized_space is provided + space_id="01abc123...", # target space to overwrite + warehouse_id="abc123def456", # overrides warehouse embedded in serialized_space + description="Updated description.", # overrides description embedded in serialized_space; omit to keep the one in the payload + serialized_space=remapped_config, # JSON string from export_genie (after catalog remap if needed) +) +``` + +> **Note:** When `serialized_space` is provided, `table_identifiers` and `sample_questions` are ignored — the full config comes from the serialized payload. However, `display_name`, `warehouse_id`, and `description` are still applied as top-level overrides on top of the serialized payload. Omit any of them to keep the values embedded in `serialized_space`. ## Export, Import & Migration -Genie Spaces can be exported as a `serialized_space` JSON string that captures the full configuration: tables, instructions, certified SQL queries, sample questions, and layout. This enables cloning, backup, and cross-workspace migration. +`export_genie` returns a dictionary with four top-level keys: + +| Key | Description | +|-----|-------------| +| `space_id` | ID of the exported space | +| `title` | Display name of the space | +| `description` | Description of the space | +| `warehouse_id` | SQL warehouse associated with the space (workspace-specific — do **not** reuse across workspaces) | +| `serialized_space` | JSON-encoded string with the full space configuration (see below) | -Use the `export_genie` and `import_genie` MCP tools for all export/import operations — no direct REST calls needed. +This envelope enables cloning, backup, and cross-workspace migration. Use the `export_genie` and `import_genie` MCP tools for all export/import operations — no direct REST calls needed. ### What is `serialized_space`? -The `serialized_space` field is a JSON-encoded string returned by the Genie API. It contains: -- Data sources (Unity Catalog table identifiers — fully qualified as `catalog.schema.table`) -- Curated instructions and business logic -- Certified SQL queries (including inline catalog references) -- Join specifications and SQL filters -- Sample questions and benchmarks -- Space layout and version metadata +`serialized_space` is a JSON string (version 2) embedded inside the export envelope. Its top-level keys are: + +| Key | Contents | +|-----|----------| +| `version` | Schema version (currently `2`) | +| `config` | Space-level config: `sample_questions` shown in the UI | +| `data_sources` | `tables` array — each entry has a fully-qualified `identifier` (`catalog.schema.table`) and optional `column_configs` (format assistance, entity matching per column) | +| `instructions` | `example_question_sqls` (certified Q&A pairs), `join_specs` (join relationships between tables), `sql_snippets` (`filters` and `measures` with display names and usage instructions) | +| `benchmarks` | Evaluation Q&A pairs used to measure space quality | + +Catalog names appear **everywhere** inside `serialized_space` — in `data_sources.tables[].identifier`, SQL strings in `example_question_sqls`, `join_specs`, and `sql_snippets`. A single `.replace(src_catalog, tgt_catalog)` on the whole string is sufficient for catalog remapping. Minimum structure: ```json -{"version": 1, "data_sources": {"tables": [{"identifier": "catalog.schema.table"}]}} +{"version": 2, "data_sources": {"tables": [{"identifier": "catalog.schema.table"}]}} ``` ### Exporting a Space @@ -192,6 +240,7 @@ exported = export_genie(space_id="01abc123...") # { # "space_id": "01abc123...", # "title": "Sales Analytics", +# "description": "Explore sales data...", # "warehouse_id": "abc123def456", # "serialized_space": "{\"version\":2,\"data_sources\":{...},\"instructions\":{...}}" # } @@ -214,7 +263,8 @@ source = export_genie(space_id="01abc123...") import_genie( warehouse_id=source["warehouse_id"], serialized_space=source["serialized_space"], - title="Sales Analytics (Dev Copy)" + title=source["title"], # override title; omit to keep original + description=source["description"], ) # Returns: {"space_id": "01def456...", "title": "Sales Analytics (Dev Copy)", "operation": "imported"} ``` @@ -228,6 +278,7 @@ When migrating between environments (e.g. prod → dev), Unity Catalog names are **Step 1 — Export from source workspace:** ```python exported = export_genie(space_id="01f106e1239d14b28d6ab46f9c15e540") +# exported keys: warehouse_id, title, description, serialized_space # exported["serialized_space"] contains all references to source catalog ``` @@ -260,7 +311,7 @@ To migrate several spaces at once, loop through space IDs. The agent calls `expo For each space_id in [id1, id2, id3]: 1. exported = export_genie(space_id) 2. modified = exported["serialized_space"].replace(src_catalog, tgt_catalog) - 3. result = import_genie(warehouse_id, modified, title=exported["title"]) + 3. result = import_genie(warehouse_id, modified, title=exported["title"], description=exported["description"]) 4. record result["space_id"] for updating databricks.yml ``` From a2ff094c2926dbdd8bc710e67d68bce041051cd4 Mon Sep 17 00:00:00 2001 From: "yang.yang" Date: Mon, 9 Mar 2026 19:28:03 -0700 Subject: [PATCH 6/6] Tackling PR review feedback: Refactor Genie API methods and update documentation for serialized space handling - Streamlined the `create_or_update_genie` function to improve response structure and error handling. - Updated the `import_genie` method to reflect changes in serialized space versioning. - Enhanced documentation in SKILL.md and spaces.md to clarify usage of serialized space and migration processes. - Fixed minor formatting issues in the documentation for better readability. --- .../databricks_mcp_server/tools/genie.py | 38 +++--- databricks-skills/databricks-genie/SKILL.md | 108 +----------------- databricks-skills/databricks-genie/spaces.md | 50 ++++---- .../agent_bricks/manager.py | 2 +- 4 files changed, 49 insertions(+), 149 deletions(-) diff --git a/databricks-mcp-server/databricks_mcp_server/tools/genie.py b/databricks-mcp-server/databricks_mcp_server/tools/genie.py index a068bee3..ac401c6e 100644 --- a/databricks-mcp-server/databricks_mcp_server/tools/genie.py +++ b/databricks-mcp-server/databricks_mcp_server/tools/genie.py @@ -186,27 +186,27 @@ def create_or_update_genie( if sample_questions and space_id: manager.genie_add_sample_questions_batch(space_id, sample_questions) - response = { - "space_id": space_id, - "display_name": display_name, - "operation": operation, - "warehouse_id": warehouse_id, - "table_count": len(table_identifiers), - } + response = { + "space_id": space_id, + "display_name": display_name, + "operation": operation, + "warehouse_id": warehouse_id, + "table_count": len(table_identifiers), + } - try: - if space_id: - from ..manifest import track_resource + try: + if space_id: + from ..manifest import track_resource - track_resource( - resource_type="genie_space", - name=display_name, - resource_id=space_id, - ) - except Exception: - pass + track_resource( + resource_type="genie_space", + name=display_name, + resource_id=space_id, + ) + except Exception: + pass - return response + return response except Exception as e: return {"error": f"Failed to create/update Genie space '{display_name}': {e}"} @@ -398,7 +398,7 @@ def import_genie( serialized_space: The JSON string from export_genie() containing the full space configuration (tables, instructions, SQL queries, layout). Can also be constructed manually: - '{"version":1,"data_sources":{"tables":[{"identifier":"cat.schema.table"}]}}' + '{"version":2,"data_sources":{"tables":[{"identifier":"cat.schema.table"}]}}' title: Optional title override (defaults to the exported space's title) description: Optional description override parent_path: Optional workspace folder path where the space will be registered diff --git a/databricks-skills/databricks-genie/SKILL.md b/databricks-skills/databricks-genie/SKILL.md index f0f1d0c9..a5c000f7 100644 --- a/databricks-skills/databricks-genie/SKILL.md +++ b/databricks-skills/databricks-genie/SKILL.md @@ -107,103 +107,12 @@ Clone to a new space (same catalog): import_genie( warehouse_id=exported["warehouse_id"], serialized_space=exported["serialized_space"], - title=exported["title"],, # override title; omit to keep original + title=exported["title"], # override title; omit to keep original description=exported["description"], ) ``` -#### Example: Migrating Genie Spaces from Prod to Dev - -When migrating Genie Spaces between environments (e.g., from a `prod` target to a `dev` target defined in your `databricks.yml`), you must update the catalog references within the serialized space. - -**Note:** Genie Space migration assumes that the underlying data assets (schemas and tables) remain structurally identical across environments. The migration of the actual catalogs, schemas, or tables themselves is outside the scope of Genie Space migration skills. - -##### The Challenge: MCP Servers Are Workspace-Scoped - -Each Databricks MCP server instance connects to exactly one workspace (set via `DATABRICKS_CONFIG_PROFILE` at startup). This means a single MCP server cannot export from PROD and import into DEV in the same session — you need two server instances. - -##### Recommended Setup: Dual MCP Server Profiles - -Configure two Databricks MCP server entries in your IDE's MCP config (e.g. `~/.cursor/mcp.json`), one per workspace: - -```json -"databricks-prod": { - "command": "/path/to/.venv/bin/python", - "args": ["/path/to/databricks-mcp-server/run_server.py"], - "env": { "DATABRICKS_CONFIG_PROFILE": "prod" } -}, -"databricks-dev": { - "command": "/path/to/.venv/bin/python", - "args": ["/path/to/databricks-mcp-server/run_server.py"], - "env": { "DATABRICKS_CONFIG_PROFILE": "dev" } -} -``` - -Both servers run simultaneously after one IDE reload. This lets you call `export_genie` against `databricks-prod` and `import_genie` against `databricks-dev` within the same conversation — no further reloads needed. - -> **Tip:** The Databricks CLI profiles (`prod`, `dev`) referenced above must be defined in `~/.databrickscfg`. Both token-based and OAuth (`auth_type = databricks-cli`) profiles are supported. - -##### Full Migration Workflow - -**Step 1 — Export from PROD** using the `databricks-prod` MCP server: - -```python -# Call export_genie via the prod-scoped MCP server -exported = export_genie(space_id="") -# exported keys: warehouse_id, title, description, serialized_space -# exported["warehouse_id"] is the PROD warehouse — do NOT reuse it for DEV -``` - -**Step 2 — Find the DEV warehouse ID:** - -```python -# Call list_warehouses via the dev-scoped MCP server -list_warehouses() # note the warehouse_id for the DEV workspace -``` - -**Step 3 — Remap the catalog and import into DEV** using the `databricks-dev` MCP server: - -```python -# Catalog name differs between environments — replace ALL occurrences. -# serialized_space embeds the catalog in table identifiers, SQL FROM clauses, -# join specs, and filter snippets, so a single string replace covers everything. -dev_serialized_space = exported["serialized_space"].replace( - "my_prod_catalog", - "my_dev_catalog" -) - -# Call import_genie via the dev-scoped MCP server -result = import_genie( - warehouse_id="", - serialized_space=dev_serialized_space, - title=exported["title"], - description=exported["description"], -) -# result["space_id"] is the new DEV space ID -``` - -**Step 4 — Save exports locally** for version control and future re-migrations: - -```json -// genie_exports/MySpace.json -{ - "space_id": "", - "title": "MySpace", - "description": "", - "warehouse_id": "", - "serialized_space": "{ ... }" -} -``` - -## Workflow - -``` -1. Inspect tables → get_table_details -2. Create space → create_or_update_genie -3. Query space → ask_genie (or test in Databricks UI) -4. Curate (optional) → Use Databricks UI to add instructions -5. Export/migrate → export_genie → import_genie -``` +> **Cross-workspace migration:** Each MCP server is workspace-scoped. Configure one server entry per workspace profile in your IDE's MCP config, then export from the source server and import via the target server. See [spaces.md §Migration](spaces.md#migrating-across-workspaces-with-catalog-remapping) for the full workflow. ## Reference Files @@ -225,18 +134,7 @@ Use these skills in sequence: ## Common Issues -| Issue | Solution | -|-------|----------| -| **No warehouse available** | Create a SQL warehouse or provide `warehouse_id` explicitly | -| **Poor query generation** | Add instructions and sample questions that reference actual column names | -| **Slow queries** | Ensure warehouse is running; use OPTIMIZE on tables | -| **`export_genie` returns empty `serialized_space`** | Requires at least CAN EDIT permission on the space | -| **`import_genie` fails with permission error** | Ensure you have CREATE privileges in the target workspace folder | -| **Tables not found after migration** | Catalog name was not remapped — replace the source catalog name in `serialized_space` before calling `import_genie` | -| **Catalog name appears in SQL queries too** | `serialized_space` embeds catalog in table identifiers, SQL FROM clauses, join specs, and filters — a single `.replace(src, tgt)` on the whole string covers all occurrences | -| **`export_genie` / `import_genie` land in the wrong workspace** | Each MCP server is workspace-scoped. Set up two named MCP server entries (one per profile) in your IDE's MCP config instead of switching a single server's profile mid-session | -| **MCP server doesn't pick up profile change** | The MCP process reads `DATABRICKS_CONFIG_PROFILE` once at startup — editing the config file requires an IDE reload to take effect | -| **`import_genie` fails with JSON parse error** | The `serialized_space` string may contain multi-line SQL arrays with `\n` escape sequences; flatten SQL arrays to single-line strings before passing to avoid double-escaping issues | +See [spaces.md §Troubleshooting](spaces.md#troubleshooting) for a full list of issues and solutions. ## Related Skills - **[databricks-agent-bricks](../databricks-agent-bricks/SKILL.md)** - Use Genie Spaces as agents inside Supervisor Agents diff --git a/databricks-skills/databricks-genie/spaces.md b/databricks-skills/databricks-genie/spaces.md index cd7c6947..c902fd11 100644 --- a/databricks-skills/databricks-genie/spaces.md +++ b/databricks-skills/databricks-genie/spaces.md @@ -4,12 +4,7 @@ This guide covers creating and managing Genie Spaces for SQL-based data explorat ## What is a Genie Space? -A Genie Space connects to Unity Catalog tables and translates natural language questions into SQL queries. The system: - -1. **Understands** the table schemas and relationships -2. **Generates** SQL queries from natural language -3. **Executes** queries on a SQL warehouse -4. **Presents** results in a conversational format +A Genie Space connects to Unity Catalog tables and translates natural language questions into SQL — understanding schemas, generating queries, executing them on a SQL warehouse, and presenting results conversationally. ## Creation Workflow @@ -319,24 +314,7 @@ After migration, update `databricks.yml` with the new dev `space_id` values unde ### Updating an Existing Space with New Config -Use `create_or_update_genie` with `serialized_space` to push a config to an already-existing space without creating a new one: - -```python -# 1. Export from dev -dev_space = export_genie(space_id=DEV_SPACE_ID) - -# 2. Remap catalog if environments use different catalog names -remapped = dev_space["serialized_space"].replace("dev_catalog", "prod_catalog") - -# 3. Push to prod (updates in place) -create_or_update_genie( - display_name="Sales Analytics", - table_identifiers=[], # ignored when serialized_space is provided - space_id=PROD_SPACE_ID, - warehouse_id=PROD_WAREHOUSE_ID, - serialized_space=remapped -) -``` +To push a serialized config to an already-existing space (rather than creating a new one), use `create_or_update_genie` with `space_id=` and `serialized_space=`. The export → remap → push pattern is identical to the migration steps above; just replace `import_genie` with `create_or_update_genie(space_id=TARGET_SPACE_ID, ...)` as the final call. ### Permissions Required @@ -386,3 +364,27 @@ create_or_update_genie( - Add table and column comments - Include sample questions that demonstrate the vocabulary - Add instructions via the Databricks Genie UI + +### `export_genie` returns empty `serialized_space` + +Requires at least **CAN EDIT** permission on the space. + +### `import_genie` fails with permission error + +Ensure you have CREATE privileges in the target workspace folder. + +### Tables not found after migration + +Catalog name was not remapped — replace the source catalog name in `serialized_space` before calling `import_genie`. The catalog appears in table identifiers, SQL FROM clauses, join specs, and filter snippets; a single `.replace(src_catalog, tgt_catalog)` on the whole string covers all occurrences. + +### `export_genie` / `import_genie` land in the wrong workspace + +Each MCP server is workspace-scoped. Set up two named MCP server entries (one per profile) in your IDE's MCP config instead of switching a single server's profile mid-session. + +### MCP server doesn't pick up profile change + +The MCP process reads `DATABRICKS_CONFIG_PROFILE` once at startup — editing the config file requires an IDE reload to take effect. + +### `import_genie` fails with JSON parse error + +The `serialized_space` string may contain multi-line SQL arrays with `\n` escape sequences. Flatten SQL arrays to single-line strings before passing to avoid double-escaping issues. diff --git a/databricks-tools-core/databricks_tools_core/agent_bricks/manager.py b/databricks-tools-core/databricks_tools_core/agent_bricks/manager.py index 206f9adf..3aa8e5fc 100644 --- a/databricks-tools-core/databricks_tools_core/agent_bricks/manager.py +++ b/databricks-tools-core/databricks_tools_core/agent_bricks/manager.py @@ -1004,7 +1004,7 @@ def genie_update_with_serialized_space( space_id: The Genie space ID to update serialized_space: The JSON string containing the new space configuration. Obtain from genie_export() or construct manually: - '{"version":1,"data_sources":{"tables":[{"identifier":"cat.schema.table"}]}}' + '{"version":2,"data_sources":{"tables":[{"identifier":"cat.schema.table"}]}}' title: Optional title override description: Optional description override warehouse_id: Optional warehouse override