diff --git a/runner_manager/backend/scaleway.py b/runner_manager/backend/scaleway.py index 5cb5f098..320e92f0 100644 --- a/runner_manager/backend/scaleway.py +++ b/runner_manager/backend/scaleway.py @@ -1,4 +1,4 @@ -# pyright: reportOptionalMemberAccess=false, reportArgumentType=false, reportReturnType=false, reportMissingTypeStubs=false +# pyright: reportOptionalMemberAccess=false, reportArgumentType=false, reportReturnType=false, reportMissingTypeStubs=false, reportAttributeAccessIssue=false import logging import os import re @@ -7,17 +7,16 @@ from pydantic import Field from redis_om import NotFoundError -from scaleway import Client # type: ignore[import-untyped] -from scaleway.instance.v1 import ( # type: ignore[import-untyped] +from scaleway import Client +from scaleway.instance.v1 import ( Image, Server, ServerAction, ServerState, + VolumeServerTemplate, ) -from scaleway.instance.v1.custom_api import ( - InstanceUtilsV1API, # type: ignore[import-untyped] -) -from scaleway.marketplace.v2 import MarketplaceV2API # type: ignore[import-untyped] +from scaleway.instance.v1.custom_api import InstanceUtilsV1API +from scaleway.marketplace.v2 import MarketplaceV2API from runner_manager.backend.base import BaseBackend from runner_manager.models.backend import ( @@ -264,6 +263,52 @@ def create(self, runner: Runner) -> Runner: use_gateway = bool(self.instance_config.public_gateway_id) dynamic_ip_required = self.instance_config.enable_public_ip and not use_gateway + # Prepare volumes configuration + # Create explicit boot volume with specified size to avoid default 10GB + # Volume types: + # - l_ssd: Local SSD storage (fast, can create raw volumes) + # - sbs_volume: Block Storage (cannot create raw, must use base_snapshot from image) + volumes_config = None + + if not self.instance_config.volumes: + volume_size_bytes = self.instance_config.volume_size_gb * 1000000000 + + if self.instance_config.volume_type == "l_ssd": + volumes_config = { + "0": VolumeServerTemplate( + name=f"{runner.name}-boot", + size=volume_size_bytes, + volume_type="l_ssd", + ) + } + log.info( + f"Creating l_ssd boot volume: {self.instance_config.volume_size_gb}GB" + ) + else: + img = self.get_image(self.instance_config.image) + if img.root_volume and img.root_volume.id: + volumes_config = { + "0": VolumeServerTemplate( + name=f"{runner.name}-boot", + size=volume_size_bytes, + volume_type="sbs_volume", + base_snapshot=img.root_volume.id, + boot=True, + ) + } + log.info( + f"Creating sbs_volume boot volume: {self.instance_config.volume_size_gb}GB " + f"from snapshot {img.root_volume.id}" + ) + else: + log.warning( + f"Image {img.id} has no root_volume, using default volume from image" + ) + volumes_config = None + else: + # Use user-provided volumes configuration + volumes_config = self.instance_config.volumes + # Create server using _create_server # Note: In SDK 2.10.3, 'protected' is a required parameter response = self.client._create_server( @@ -278,6 +323,7 @@ def create(self, runner: Runner) -> Runner: project=self.config.project_id, organization=self.config.organization_id, security_group=security_group, + volumes=volumes_config, ) server = response.server @@ -391,6 +437,10 @@ def delete(self, runner: Runner) -> int: log.info(f"Server {runner.instance_id} deleted successfully") # Delete associated volumes + # Note: The behavior differs by volume type: + # - l_ssd (local storage): Usually auto-deleted with the server + # - sbs_volume (block storage): Persists after server deletion, must be deleted manually + # The Instance API manages both types, but sbs volumes need explicit cleanup for volume_id in volume_ids: try: self.client.delete_volume( @@ -399,7 +449,15 @@ def delete(self, runner: Runner) -> int: ) log.info(f"Volume {volume_id} deleted successfully") except Exception as vol_error: - log.warning(f"Failed to delete volume {volume_id}: {vol_error}") + error_msg = str(vol_error) + # Volume may already be deleted automatically (especially l_ssd volumes) + # or might not be found if searching in wrong scope + if "404" in error_msg or "not_found" in error_msg.lower(): + log.info( + f"Volume {volume_id} not found - may have been auto-deleted with server or already cleaned up" + ) + else: + log.warning(f"Failed to delete volume {volume_id}: {vol_error}") except Exception as e: if "404" in str(e) or "not found" in str(e).lower(): diff --git a/runner_manager/models/backend.py b/runner_manager/models/backend.py index 0a219027..dc32883a 100644 --- a/runner_manager/models/backend.py +++ b/runner_manager/models/backend.py @@ -330,3 +330,7 @@ class ScalewayInstanceConfig(InstanceConfig): boot_type: str = "local" volumes: Dict[str, str] = {} tags: List[str] = [] + volume_size_gb: int = 20 # Size of boot volume in GB + volume_type: Literal["l_ssd", "sbs_volume"] = ( + "sbs_volume" # Local or block storage (sbs_volume is more universal) + ) diff --git a/tests/unit/backend/test_scaleway.py b/tests/unit/backend/test_scaleway.py index 3cb0e6fe..f6e8f786 100644 --- a/tests/unit/backend/test_scaleway.py +++ b/tests/unit/backend/test_scaleway.py @@ -443,6 +443,74 @@ def mock_wait(self, server_id, target_state, timeout=300): assert result == 1 +def test_delete_with_volume_not_found_404( + scaleway_runner, fake_scaleway_group, caplog, monkeypatch +): + """Test instance deletion when volume returns 404 error.""" + backend = fake_scaleway_group.backend + scaleway_runner.instance_id = "test-server-id" + scaleway_runner.save() + + # Mock server with volumes + mock_volume = MagicMock() + mock_volume.id = "test-volume-id" + mock_server = MagicMock() + mock_server.id = "test-server-id" + mock_server.state = ServerState.RUNNING + mock_server.volumes = {"0": mock_volume} + + mock_client = backend.client + mock_client.get_server.return_value = MagicMock(server=mock_server) + mock_client.delete_volume.side_effect = Exception("Error 404: Volume not found") + + # Restore wait_for_server_state mock + def mock_wait(self, server_id, target_state, timeout=300): + return mock_server + + monkeypatch.setattr(ScalewayBackend, "wait_for_server_state", mock_wait) + + result = backend.delete(scaleway_runner) + + # Verify info log for not_found volume (not warning) + assert "not found - may have been auto-deleted" in caplog.text + assert "Failed to delete volume" not in caplog.text + assert result == 1 + + +def test_delete_with_volume_not_found_string( + scaleway_runner, fake_scaleway_group, caplog, monkeypatch +): + """Test instance deletion when volume returns 'not_found' in error message.""" + backend = fake_scaleway_group.backend + scaleway_runner.instance_id = "test-server-id" + scaleway_runner.save() + + # Mock server with volumes + mock_volume = MagicMock() + mock_volume.id = "test-volume-id" + mock_server = MagicMock() + mock_server.id = "test-server-id" + mock_server.state = ServerState.RUNNING + mock_server.volumes = {"0": mock_volume} + + mock_client = backend.client + mock_client.get_server.return_value = MagicMock(server=mock_server) + mock_client.delete_volume.side_effect = Exception("resource_not_found") + + # Restore wait_for_server_state mock + def mock_wait(self, server_id, target_state, timeout=300): + return mock_server + + monkeypatch.setattr(ScalewayBackend, "wait_for_server_state", mock_wait) + + result = backend.delete(scaleway_runner) + + # Verify info log for not_found volume (not warning) + assert "not found - may have been auto-deleted" in caplog.text + assert "Failed to delete volume" not in caplog.text + assert result == 1 + + def test_delete_stopped_server(scaleway_runner, fake_scaleway_group): """Test deletion of already stopped server.""" backend = fake_scaleway_group.backend @@ -606,3 +674,171 @@ def test_list(scaleway_runner, scaleway_group): scaleway_group.backend.delete(runner) with pytest.raises(NotFoundError): scaleway_group.backend.get(runner.instance_id) + + +def test_create_with_default_sbs_volume( + scaleway_runner, fake_scaleway_group, monkeypatch, caplog +): + """Test instance creation with default sbs_volume configuration.""" + # Mock image with root_volume + mock_image = MagicMock() + mock_image.id = "test-image-id" + mock_root_volume = MagicMock() + mock_root_volume.id = "snapshot-id" + mock_image.root_volume = mock_root_volume + + # Patch get_image at class level + def mock_get_image(self, image_name): + return mock_image + + monkeypatch.setattr(ScalewayBackend, "get_image", mock_get_image) + + backend = fake_scaleway_group.backend + backend.create(scaleway_runner) + + # Verify volumes parameter was passed to _create_server + mock_client = backend.client + create_call = mock_client._create_server.call_args + volumes = create_call.kwargs.get("volumes") + + assert volumes is not None + assert "0" in volumes + assert volumes["0"].volume_type == "sbs_volume" + assert volumes["0"].size == 20_000_000_000 # 20GB default + assert volumes["0"].base_snapshot == "snapshot-id" + + # Verify log message + assert "Creating sbs_volume boot volume: 20GB" in caplog.text + assert "from snapshot snapshot-id" in caplog.text + + +def test_create_with_l_ssd_volume( + scaleway_runner, fake_scaleway_group, monkeypatch, caplog +): + """Test instance creation with l_ssd volume type.""" + # Mock image + mock_image = MagicMock() + mock_image.id = "test-image-id" + + # Patch get_image at class level + def mock_get_image(self, image_name): + return mock_image + + monkeypatch.setattr(ScalewayBackend, "get_image", mock_get_image) + + # Configure l_ssd + fake_scaleway_group.backend.instance_config.volume_type = "l_ssd" + fake_scaleway_group.backend.instance_config.volume_size_gb = 80 + + backend = fake_scaleway_group.backend + backend.create(scaleway_runner) + + # Verify volumes parameter + mock_client = backend.client + create_call = mock_client._create_server.call_args + volumes = create_call.kwargs.get("volumes") + + assert volumes is not None + assert "0" in volumes + assert volumes["0"].volume_type == "l_ssd" + assert volumes["0"].size == 80_000_000_000 # 80GB + # For l_ssd, no base_snapshot should be set + assert ( + not hasattr(volumes["0"], "base_snapshot") or volumes["0"].base_snapshot is None + ) + + # Verify log message + assert "Creating l_ssd boot volume: 80GB" in caplog.text + + +def test_create_with_custom_volume_size( + scaleway_runner, fake_scaleway_group, monkeypatch +): + """Test instance creation with custom volume size.""" + mock_image = MagicMock() + mock_image.id = "test-image-id" + mock_root_volume = MagicMock() + mock_root_volume.id = "snapshot-id" + mock_image.root_volume = mock_root_volume + + # Patch get_image at class level + def mock_get_image(self, image_name): + return mock_image + + monkeypatch.setattr(ScalewayBackend, "get_image", mock_get_image) + + # Set custom size + fake_scaleway_group.backend.instance_config.volume_size_gb = 100 + backend = fake_scaleway_group.backend + + backend.create(scaleway_runner) + + mock_client = backend.client + create_call = mock_client._create_server.call_args + volumes = create_call.kwargs.get("volumes") + + assert volumes["0"].size == 100_000_000_000 # 100GB + + +def test_create_with_no_root_volume_fallback( + scaleway_runner, fake_scaleway_group, monkeypatch, caplog +): + """Test fallback when image has no root_volume.""" + mock_image = MagicMock() + mock_image.id = "test-image-id" + mock_image.root_volume = None # No root volume + + # Patch get_image at class level + def mock_get_image(self, image_name): + return mock_image + + monkeypatch.setattr(ScalewayBackend, "get_image", mock_get_image) + + backend = fake_scaleway_group.backend + backend.create(scaleway_runner) + + # Verify warning was logged + assert "has no root_volume, using default volume from image" in caplog.text + + # Verify volumes=None was passed + mock_client = backend.client + create_call = mock_client._create_server.call_args + volumes = create_call.kwargs.get("volumes") + assert volumes is None + + +def test_create_with_user_provided_volumes( + scaleway_runner, fake_scaleway_group, monkeypatch +): + """Test instance creation with user-provided volumes configuration.""" + from scaleway.instance.v1 import VolumeServerTemplate + + # Mock image + mock_image = MagicMock() + mock_image.id = "test-image-id" + + # Patch get_image at class level + def mock_get_image(self, image_name): + return mock_image + + monkeypatch.setattr(ScalewayBackend, "get_image", mock_get_image) + + # Set user-provided volumes + custom_volumes = { + "0": VolumeServerTemplate( + volume_type="sbs_volume", + size=50_000_000_000, + base_snapshot="custom-snapshot-id", + ) + } + fake_scaleway_group.backend.instance_config.volumes = custom_volumes + + backend = fake_scaleway_group.backend + backend.create(scaleway_runner) + + # Verify user-provided volumes were used + mock_client = backend.client + create_call = mock_client._create_server.call_args + volumes = create_call.kwargs.get("volumes") + + assert volumes == custom_volumes