From c83471e12db234605e8fbfc036913f959d72371d Mon Sep 17 00:00:00 2001 From: JC-wk <196318169+JC-wk@users.noreply.github.com> Date: Tue, 4 Nov 2025 13:04:07 +0000 Subject: [PATCH 1/3] Implement address space removal --- CHANGELOG.md | 1 + api_app/_version.py | 2 +- api_app/api/routes/workspaces.py | 21 +++++++++- .../authoring-workspace-templates.md | 2 +- .../azureml/template_schema.json | 7 ++++ .../databricks/template_schema.json | 39 +++++++++++++++---- 6 files changed, 61 insertions(+), 11 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a09d0c7605..5baac30a9f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -31,6 +31,7 @@ BUG FIXES: * Fix issue where multiple lists in config.yaml incorrectly caused a validation error ([#4711](https://github.com/microsoft/AzureTRE/pull/4711)) * Updated config_schema.json to include missing values. ([#4712](https://github.com/microsoft/AzureTRE/issues/4712))([#4714](https://github.com/microsoft/AzureTRE/issues/4714)) * Remove workspace upgrade step from databricks template ([#4726](https://github.com/microsoft/AzureTRE/pull/4726)) +* `address_spaces` will now be removed from a workspace when a workspace service that uses an `address_space` is deleted to prevent IP address range exhaustion ([#4727](https://github.com/microsoft/AzureTRE/issues/4727)) * Update Starlette and FastAPI versions ([#4738](https://github.com/microsoft/AzureTRE/pull/4738)) ## 0.25.0 (July 18, 2025) diff --git a/api_app/_version.py b/api_app/_version.py index 71290cd566..97b0e6226c 100644 --- a/api_app/_version.py +++ b/api_app/_version.py @@ -1 +1 @@ -__version__ = "0.25.1" +__version__ = "0.25.2" diff --git a/api_app/api/routes/workspaces.py b/api_app/api/routes/workspaces.py index a2dcbe52fa..0f99b0b2b2 100644 --- a/api_app/api/routes/workspaces.py +++ b/api_app/api/routes/workspaces.py @@ -310,7 +310,26 @@ async def patch_workspace_service(resource_patch: ResourcePatch, response: Respo @workspace_services_workspace_router.delete("/workspaces/{workspace_id}/workspace-services/{service_id}", response_model=OperationInResponse, name=strings.API_DELETE_WORKSPACE_SERVICE, dependencies=[Depends(get_current_workspace_owner_user)]) -async def delete_workspace_service(response: Response, user=Depends(get_current_workspace_owner_user), workspace=Depends(get_workspace_by_id_from_path), workspace_service=Depends(get_workspace_service_by_id_from_path), workspace_service_repo=Depends(get_repository(WorkspaceServiceRepository)), user_resource_repo=Depends(get_repository(UserResourceRepository)), operations_repo=Depends(get_repository(OperationRepository)), resource_template_repo=Depends(get_repository(ResourceTemplateRepository)), resource_history_repo=Depends(get_repository(ResourceHistoryRepository))) -> OperationInResponse: +async def delete_workspace_service(response: Response, user=Depends(get_current_workspace_owner_user), workspace=Depends(get_workspace_by_id_from_path), workspace_service=Depends(get_workspace_service_by_id_from_path), workspace_service_repo=Depends(get_repository(WorkspaceServiceRepository)), user_resource_repo=Depends(get_repository(UserResourceRepository)), operations_repo=Depends(get_repository(OperationRepository)), resource_template_repo=Depends(get_repository(ResourceTemplateRepository)), resource_history_repo=Depends(get_repository(ResourceHistoryRepository)), workspace_repo=Depends(get_repository(WorkspaceRepository))) -> OperationInResponse: + # If the workspace service owns an allocated address_space, remove it from the parent workspace before uninstall + try: + address_to_free = workspace_service.properties.get("address_space") + if address_to_free: + # ensure the workspace has the address_spaces property + workspace_address_spaces = workspace.properties.get("address_spaces", []) + if address_to_free in workspace_address_spaces: + new_address_spaces = [a for a in workspace_address_spaces if a != address_to_free] + workspace_patch = ResourcePatch() + workspace_patch.properties = {"address_spaces": new_address_spaces} + try: + await workspace_repo.patch_workspace(workspace, workspace_patch, workspace.etag, resource_template_repo, resource_history_repo, user, False) + except CosmosAccessConditionFailedError: + # let the client try again or fail the uninstall flow due to etag conflict + raise HTTPException(status_code=status.HTTP_409_CONFLICT, detail=strings.ETAG_CONFLICT) + except Exception: + # don't block uninstall if patching the workspace fails for unexpected reasons + logger.exception("Failed to free workspace address space before uninstall") + if await delete_validation(workspace_service, workspace_service_repo): operation = await send_uninstall_message( resource=workspace_service, diff --git a/docs/tre-workspace-authors/authoring-workspace-templates.md b/docs/tre-workspace-authors/authoring-workspace-templates.md index 23614bb29f..801b3c4423 100644 --- a/docs/tre-workspace-authors/authoring-workspace-templates.md +++ b/docs/tre-workspace-authors/authoring-workspace-templates.md @@ -109,7 +109,7 @@ The size of the `address_space` will default to `/24`, however other sizes can b The `address_space` allocation will only take place during the install phase of a deployment, as this is a breaking change to your template you should increment the major version of your template, this means a you must deploy a new resource instead of upgrading an existing one. -In your install pipeline you also need to include a workspace upgrade step for the workspace to update it's `address_spaces` property. +In your install and uninstall pipelines you also need to include a workspace upgrade step for the workspace to update it's `address_spaces` property. ```json "pipeline": { diff --git a/templates/workspace_services/azureml/template_schema.json b/templates/workspace_services/azureml/template_schema.json index 581b8993df..8c8b910e63 100644 --- a/templates/workspace_services/azureml/template_schema.json +++ b/templates/workspace_services/azureml/template_schema.json @@ -391,6 +391,13 @@ }, { "stepId": "main" + }, + { + "stepId": "f720975a-c81e-477e-854e-53fde86e5e57", + "stepTitle": "Upgrade to ensure workspace is aware of address space removal", + "resourceType": "workspace", + "resourceAction": "upgrade", + "properties": [] } ] } diff --git a/templates/workspace_services/databricks/template_schema.json b/templates/workspace_services/databricks/template_schema.json index 1e43ba0395..531d9ddb37 100644 --- a/templates/workspace_services/databricks/template_schema.json +++ b/templates/workspace_services/databricks/template_schema.json @@ -78,7 +78,9 @@ "name": "databricks", "description": "Communication with Azure Databricks dependancies.", "source_addresses": "{{ resource.properties.databricks_address_prefixes }}", - "destination_addresses": [ "AzureDatabricks"], + "destination_addresses": [ + "AzureDatabricks" + ], "destination_ports": [ "443" ], @@ -114,9 +116,15 @@ "name": "AzureAD", "description": "AAD access", "source_addresses": "{{ resource.properties.workspace_address_spaces }}", - "destination_addresses": ["AzureActiveDirectory"], - "destination_ports": ["*"], - "protocols": ["TCP"] + "destination_addresses": [ + "AzureActiveDirectory" + ], + "destination_ports": [ + "*" + ], + "protocols": [ + "TCP" + ] } ] } @@ -212,7 +220,9 @@ "name": "databricks", "description": "Communication with Azure Databricks dependancies.", "source_addresses": "{{ resource.properties.databricks_address_prefixes }}", - "destination_addresses": [ "AzureDatabricks"], + "destination_addresses": [ + "AzureDatabricks" + ], "destination_ports": [ "443" ], @@ -248,9 +258,15 @@ "name": "AzureAD", "description": "AAD access", "source_addresses": "{{ resource.properties.workspace_address_spaces }}", - "destination_addresses": ["AzureActiveDirectory"], - "destination_ports": ["*"], - "protocols": ["TCP"] + "destination_addresses": [ + "AzureActiveDirectory" + ], + "destination_ports": [ + "*" + ], + "protocols": [ + "TCP" + ] } ] } @@ -352,6 +368,13 @@ }, { "stepId": "main" + }, + { + "stepId": "9c4dc64b-8fbf-4e77-a7f6-48fb33423504", + "stepTitle": "Upgrade to ensure workspace is aware of address space removal", + "resourceType": "workspace", + "resourceAction": "upgrade", + "properties": [] } ] } From 029f5068dba30b64d87bb1f893531b59e01f2ce1 Mon Sep 17 00:00:00 2001 From: JC-wk <196318169+JC-wk@users.noreply.github.com> Date: Tue, 4 Nov 2025 13:10:53 +0000 Subject: [PATCH 2/3] update template versions --- templates/workspace_services/azureml/porter.yaml | 2 +- templates/workspace_services/databricks/porter.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/templates/workspace_services/azureml/porter.yaml b/templates/workspace_services/azureml/porter.yaml index 0cf012d630..ff9c631345 100644 --- a/templates/workspace_services/azureml/porter.yaml +++ b/templates/workspace_services/azureml/porter.yaml @@ -1,7 +1,7 @@ --- schemaVersion: 1.0.0 name: tre-service-azureml -version: 0.10.0 +version: 0.10.1 description: "An Azure TRE service for Azure Machine Learning" registry: azuretre dockerfile: Dockerfile.tmpl diff --git a/templates/workspace_services/databricks/porter.yaml b/templates/workspace_services/databricks/porter.yaml index 8fdffa2db8..e3222da2f5 100644 --- a/templates/workspace_services/databricks/porter.yaml +++ b/templates/workspace_services/databricks/porter.yaml @@ -1,7 +1,7 @@ --- schemaVersion: 1.0.0 name: tre-service-databricks -version: 1.0.14 +version: 1.0.15 description: "An Azure TRE service for Azure Databricks." registry: azuretre dockerfile: Dockerfile.tmpl From c9f9307590e67b9870f5284195953cc48292f947 Mon Sep 17 00:00:00 2001 From: JC-wk <196318169+JC-wk@users.noreply.github.com> Date: Wed, 12 Nov 2025 17:30:13 +0000 Subject: [PATCH 3/3] Move address_space deletion to service_bus --- api_app/api/routes/workspaces.py | 21 +----------- .../service_bus/deployment_status_updater.py | 33 ++++++++++++++++++- 2 files changed, 33 insertions(+), 21 deletions(-) diff --git a/api_app/api/routes/workspaces.py b/api_app/api/routes/workspaces.py index 0f99b0b2b2..a2dcbe52fa 100644 --- a/api_app/api/routes/workspaces.py +++ b/api_app/api/routes/workspaces.py @@ -310,26 +310,7 @@ async def patch_workspace_service(resource_patch: ResourcePatch, response: Respo @workspace_services_workspace_router.delete("/workspaces/{workspace_id}/workspace-services/{service_id}", response_model=OperationInResponse, name=strings.API_DELETE_WORKSPACE_SERVICE, dependencies=[Depends(get_current_workspace_owner_user)]) -async def delete_workspace_service(response: Response, user=Depends(get_current_workspace_owner_user), workspace=Depends(get_workspace_by_id_from_path), workspace_service=Depends(get_workspace_service_by_id_from_path), workspace_service_repo=Depends(get_repository(WorkspaceServiceRepository)), user_resource_repo=Depends(get_repository(UserResourceRepository)), operations_repo=Depends(get_repository(OperationRepository)), resource_template_repo=Depends(get_repository(ResourceTemplateRepository)), resource_history_repo=Depends(get_repository(ResourceHistoryRepository)), workspace_repo=Depends(get_repository(WorkspaceRepository))) -> OperationInResponse: - # If the workspace service owns an allocated address_space, remove it from the parent workspace before uninstall - try: - address_to_free = workspace_service.properties.get("address_space") - if address_to_free: - # ensure the workspace has the address_spaces property - workspace_address_spaces = workspace.properties.get("address_spaces", []) - if address_to_free in workspace_address_spaces: - new_address_spaces = [a for a in workspace_address_spaces if a != address_to_free] - workspace_patch = ResourcePatch() - workspace_patch.properties = {"address_spaces": new_address_spaces} - try: - await workspace_repo.patch_workspace(workspace, workspace_patch, workspace.etag, resource_template_repo, resource_history_repo, user, False) - except CosmosAccessConditionFailedError: - # let the client try again or fail the uninstall flow due to etag conflict - raise HTTPException(status_code=status.HTTP_409_CONFLICT, detail=strings.ETAG_CONFLICT) - except Exception: - # don't block uninstall if patching the workspace fails for unexpected reasons - logger.exception("Failed to free workspace address space before uninstall") - +async def delete_workspace_service(response: Response, user=Depends(get_current_workspace_owner_user), workspace=Depends(get_workspace_by_id_from_path), workspace_service=Depends(get_workspace_service_by_id_from_path), workspace_service_repo=Depends(get_repository(WorkspaceServiceRepository)), user_resource_repo=Depends(get_repository(UserResourceRepository)), operations_repo=Depends(get_repository(OperationRepository)), resource_template_repo=Depends(get_repository(ResourceTemplateRepository)), resource_history_repo=Depends(get_repository(ResourceHistoryRepository))) -> OperationInResponse: if await delete_validation(workspace_service, workspace_service_repo): operation = await send_uninstall_message( resource=workspace_service, diff --git a/api_app/service_bus/deployment_status_updater.py b/api_app/service_bus/deployment_status_updater.py index 41670464c7..43c7852765 100644 --- a/api_app/service_bus/deployment_status_updater.py +++ b/api_app/service_bus/deployment_status_updater.py @@ -6,7 +6,7 @@ from pydantic import ValidationError, parse_obj_as from api.routes.resource_helpers import get_timestamp -from models.domain.resource import Output +from models.domain.resource import Output, ResourceType from db.repositories.resources_history import ResourceHistoryRepository from models.domain.request_action import RequestAction from db.repositories.resource_templates import ResourceTemplateRepository @@ -21,6 +21,9 @@ from models.domain.operation import DeploymentStatusUpdateMessage, Operation, OperationStep, Status from resources import strings from services.logging import logger, tracer +from db.repositories.workspaces import WorkspaceRepository +from models.schemas.resource import ResourcePatch +from azure.cosmos.exceptions import CosmosAccessConditionFailedError class DeploymentStatusUpdater(): @@ -187,6 +190,34 @@ async def update_status_in_database(self, message: DeploymentStatusUpdateMessage next_step.status = Status.UpdatingFailed await self.update_overall_operation_status(operation, next_step, is_last_step) await self.operations_repo.update_item(operation) + # If the 'main' step succeeded for an uninstall operation, free any allocated address space + # owned by a WorkspaceService resource. We trigger cleanup when the step with templateStepId == 'main' + # is successful; this ensures the primary resource has been destroyed successfully before attempting to free the ip address space + try: + # if the step that just succeeded is the main step for this operation, and this is an uninstall, + # proceed with post-uninstall cleanup. No need to scan the operation.steps list again. + if step_to_update.templateStepId == "main" and step_to_update.is_success() and operation.action == RequestAction.UnInstall: + if resource_to_persist.get("resourceType") == ResourceType.WorkspaceService: + address_to_free = resource_to_persist.get("properties", {}).get("address_space") + parent_workspace_id = resource_to_persist.get("workspaceId") + if address_to_free and parent_workspace_id: + try: + workspace_repo = await WorkspaceRepository.create() + workspace = await workspace_repo.get_workspace_by_id(parent_workspace_id) + workspace_address_spaces = workspace.properties.get("address_spaces", []) + if address_to_free in workspace_address_spaces: + new_address_spaces = [a for a in workspace_address_spaces if a != address_to_free] + workspace_patch = ResourcePatch() + workspace_patch.properties = {"address_spaces": new_address_spaces} + try: + await workspace_repo.patch_workspace(workspace, workspace_patch, workspace.etag, self.resource_template_repo, self.resource_history_repo, operation.user, False) + logger.info(f"Freed address space {address_to_free} from workspace {parent_workspace_id} after successful uninstall of {resource_id}") + except CosmosAccessConditionFailedError: + logger.exception("ETag conflict when freeing workspace address space after successful uninstall") + except Exception: + logger.exception("Failed to free workspace address space after successful uninstall") + except Exception: + logger.exception("Unexpected error during post-uninstall address space cleanup") result = True