Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
74 changes: 66 additions & 8 deletions runner_manager/backend/scaleway.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# pyright: reportOptionalMemberAccess=false, reportArgumentType=false, reportReturnType=false, reportMissingTypeStubs=false
# pyright: reportOptionalMemberAccess=false, reportArgumentType=false, reportReturnType=false, reportMissingTypeStubs=false, reportAttributeAccessIssue=false
import logging
import os
import re
Expand All @@ -7,17 +7,16 @@

from pydantic import Field
from redis_om import NotFoundError
from scaleway import Client # type: ignore[import-untyped]
from scaleway.instance.v1 import ( # type: ignore[import-untyped]
from scaleway import Client
from scaleway.instance.v1 import (
Image,
Server,
ServerAction,
ServerState,
VolumeServerTemplate,
)
from scaleway.instance.v1.custom_api import (
InstanceUtilsV1API, # type: ignore[import-untyped]
)
from scaleway.marketplace.v2 import MarketplaceV2API # type: ignore[import-untyped]
from scaleway.instance.v1.custom_api import InstanceUtilsV1API
from scaleway.marketplace.v2 import MarketplaceV2API

from runner_manager.backend.base import BaseBackend
from runner_manager.models.backend import (
Expand Down Expand Up @@ -264,6 +263,52 @@ def create(self, runner: Runner) -> Runner:
use_gateway = bool(self.instance_config.public_gateway_id)
dynamic_ip_required = self.instance_config.enable_public_ip and not use_gateway

# Prepare volumes configuration
# Create explicit boot volume with specified size to avoid default 10GB
# Volume types:
# - l_ssd: Local SSD storage (fast, can create raw volumes)
# - sbs_volume: Block Storage (cannot create raw, must use base_snapshot from image)
volumes_config = None

if not self.instance_config.volumes:
volume_size_bytes = self.instance_config.volume_size_gb * 1000000000

if self.instance_config.volume_type == "l_ssd":
volumes_config = {
"0": VolumeServerTemplate(
name=f"{runner.name}-boot",
size=volume_size_bytes,
volume_type="l_ssd",
)
}
log.info(
f"Creating l_ssd boot volume: {self.instance_config.volume_size_gb}GB"
)
else:
img = self.get_image(self.instance_config.image)
if img.root_volume and img.root_volume.id:
volumes_config = {
"0": VolumeServerTemplate(
name=f"{runner.name}-boot",
size=volume_size_bytes,
volume_type="sbs_volume",
base_snapshot=img.root_volume.id,
boot=True,
)
}
log.info(
f"Creating sbs_volume boot volume: {self.instance_config.volume_size_gb}GB "
f"from snapshot {img.root_volume.id}"
)
else:
log.warning(
f"Image {img.id} has no root_volume, using default volume from image"
)
volumes_config = None
else:
# Use user-provided volumes configuration
volumes_config = self.instance_config.volumes

# Create server using _create_server
# Note: In SDK 2.10.3, 'protected' is a required parameter
response = self.client._create_server(
Expand All @@ -278,6 +323,7 @@ def create(self, runner: Runner) -> Runner:
project=self.config.project_id,
organization=self.config.organization_id,
security_group=security_group,
volumes=volumes_config,
)

server = response.server
Expand Down Expand Up @@ -391,6 +437,10 @@ def delete(self, runner: Runner) -> int:
log.info(f"Server {runner.instance_id} deleted successfully")

# Delete associated volumes
# Note: The behavior differs by volume type:
# - l_ssd (local storage): Usually auto-deleted with the server
# - sbs_volume (block storage): Persists after server deletion, must be deleted manually
# The Instance API manages both types, but sbs volumes need explicit cleanup
for volume_id in volume_ids:
try:
self.client.delete_volume(
Expand All @@ -399,7 +449,15 @@ def delete(self, runner: Runner) -> int:
)
log.info(f"Volume {volume_id} deleted successfully")
except Exception as vol_error:
log.warning(f"Failed to delete volume {volume_id}: {vol_error}")
error_msg = str(vol_error)
# Volume may already be deleted automatically (especially l_ssd volumes)
# or might not be found if searching in wrong scope
if "404" in error_msg or "not_found" in error_msg.lower():
log.info(
f"Volume {volume_id} not found - may have been auto-deleted with server or already cleaned up"
)
else:
log.warning(f"Failed to delete volume {volume_id}: {vol_error}")

except Exception as e:
if "404" in str(e) or "not found" in str(e).lower():
Expand Down
4 changes: 4 additions & 0 deletions runner_manager/models/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -330,3 +330,7 @@ class ScalewayInstanceConfig(InstanceConfig):
boot_type: str = "local"
volumes: Dict[str, str] = {}
tags: List[str] = []
volume_size_gb: int = 20 # Size of boot volume in GB
volume_type: Literal["l_ssd", "sbs_volume"] = (
"sbs_volume" # Local or block storage (sbs_volume is more universal)
)
236 changes: 236 additions & 0 deletions tests/unit/backend/test_scaleway.py
Original file line number Diff line number Diff line change
Expand Up @@ -443,6 +443,74 @@ def mock_wait(self, server_id, target_state, timeout=300):
assert result == 1


def test_delete_with_volume_not_found_404(
scaleway_runner, fake_scaleway_group, caplog, monkeypatch
):
"""Test instance deletion when volume returns 404 error."""
backend = fake_scaleway_group.backend
scaleway_runner.instance_id = "test-server-id"
scaleway_runner.save()

# Mock server with volumes
mock_volume = MagicMock()
mock_volume.id = "test-volume-id"
mock_server = MagicMock()
mock_server.id = "test-server-id"
mock_server.state = ServerState.RUNNING
mock_server.volumes = {"0": mock_volume}

mock_client = backend.client
mock_client.get_server.return_value = MagicMock(server=mock_server)
mock_client.delete_volume.side_effect = Exception("Error 404: Volume not found")

# Restore wait_for_server_state mock
def mock_wait(self, server_id, target_state, timeout=300):
return mock_server

monkeypatch.setattr(ScalewayBackend, "wait_for_server_state", mock_wait)

result = backend.delete(scaleway_runner)

# Verify info log for not_found volume (not warning)
assert "not found - may have been auto-deleted" in caplog.text
assert "Failed to delete volume" not in caplog.text
assert result == 1


def test_delete_with_volume_not_found_string(
scaleway_runner, fake_scaleway_group, caplog, monkeypatch
):
"""Test instance deletion when volume returns 'not_found' in error message."""
backend = fake_scaleway_group.backend
scaleway_runner.instance_id = "test-server-id"
scaleway_runner.save()

# Mock server with volumes
mock_volume = MagicMock()
mock_volume.id = "test-volume-id"
mock_server = MagicMock()
mock_server.id = "test-server-id"
mock_server.state = ServerState.RUNNING
mock_server.volumes = {"0": mock_volume}

mock_client = backend.client
mock_client.get_server.return_value = MagicMock(server=mock_server)
mock_client.delete_volume.side_effect = Exception("resource_not_found")

# Restore wait_for_server_state mock
def mock_wait(self, server_id, target_state, timeout=300):
return mock_server

monkeypatch.setattr(ScalewayBackend, "wait_for_server_state", mock_wait)

result = backend.delete(scaleway_runner)

# Verify info log for not_found volume (not warning)
assert "not found - may have been auto-deleted" in caplog.text
assert "Failed to delete volume" not in caplog.text
assert result == 1


def test_delete_stopped_server(scaleway_runner, fake_scaleway_group):
"""Test deletion of already stopped server."""
backend = fake_scaleway_group.backend
Expand Down Expand Up @@ -606,3 +674,171 @@ def test_list(scaleway_runner, scaleway_group):
scaleway_group.backend.delete(runner)
with pytest.raises(NotFoundError):
scaleway_group.backend.get(runner.instance_id)


def test_create_with_default_sbs_volume(
scaleway_runner, fake_scaleway_group, monkeypatch, caplog
):
"""Test instance creation with default sbs_volume configuration."""
# Mock image with root_volume
mock_image = MagicMock()
mock_image.id = "test-image-id"
mock_root_volume = MagicMock()
mock_root_volume.id = "snapshot-id"
mock_image.root_volume = mock_root_volume

# Patch get_image at class level
def mock_get_image(self, image_name):
return mock_image

monkeypatch.setattr(ScalewayBackend, "get_image", mock_get_image)

backend = fake_scaleway_group.backend
backend.create(scaleway_runner)

# Verify volumes parameter was passed to _create_server
mock_client = backend.client
create_call = mock_client._create_server.call_args
volumes = create_call.kwargs.get("volumes")

assert volumes is not None
assert "0" in volumes
assert volumes["0"].volume_type == "sbs_volume"
assert volumes["0"].size == 20_000_000_000 # 20GB default
assert volumes["0"].base_snapshot == "snapshot-id"

# Verify log message
assert "Creating sbs_volume boot volume: 20GB" in caplog.text
assert "from snapshot snapshot-id" in caplog.text


def test_create_with_l_ssd_volume(
scaleway_runner, fake_scaleway_group, monkeypatch, caplog
):
"""Test instance creation with l_ssd volume type."""
# Mock image
mock_image = MagicMock()
mock_image.id = "test-image-id"

# Patch get_image at class level
def mock_get_image(self, image_name):
return mock_image

monkeypatch.setattr(ScalewayBackend, "get_image", mock_get_image)

# Configure l_ssd
fake_scaleway_group.backend.instance_config.volume_type = "l_ssd"
fake_scaleway_group.backend.instance_config.volume_size_gb = 80

backend = fake_scaleway_group.backend
backend.create(scaleway_runner)

# Verify volumes parameter
mock_client = backend.client
create_call = mock_client._create_server.call_args
volumes = create_call.kwargs.get("volumes")

assert volumes is not None
assert "0" in volumes
assert volumes["0"].volume_type == "l_ssd"
assert volumes["0"].size == 80_000_000_000 # 80GB
# For l_ssd, no base_snapshot should be set
assert (
not hasattr(volumes["0"], "base_snapshot") or volumes["0"].base_snapshot is None
)

# Verify log message
assert "Creating l_ssd boot volume: 80GB" in caplog.text


def test_create_with_custom_volume_size(
scaleway_runner, fake_scaleway_group, monkeypatch
):
"""Test instance creation with custom volume size."""
mock_image = MagicMock()
mock_image.id = "test-image-id"
mock_root_volume = MagicMock()
mock_root_volume.id = "snapshot-id"
mock_image.root_volume = mock_root_volume

# Patch get_image at class level
def mock_get_image(self, image_name):
return mock_image

monkeypatch.setattr(ScalewayBackend, "get_image", mock_get_image)

# Set custom size
fake_scaleway_group.backend.instance_config.volume_size_gb = 100
backend = fake_scaleway_group.backend

backend.create(scaleway_runner)

mock_client = backend.client
create_call = mock_client._create_server.call_args
volumes = create_call.kwargs.get("volumes")

assert volumes["0"].size == 100_000_000_000 # 100GB


def test_create_with_no_root_volume_fallback(
scaleway_runner, fake_scaleway_group, monkeypatch, caplog
):
"""Test fallback when image has no root_volume."""
mock_image = MagicMock()
mock_image.id = "test-image-id"
mock_image.root_volume = None # No root volume

# Patch get_image at class level
def mock_get_image(self, image_name):
return mock_image

monkeypatch.setattr(ScalewayBackend, "get_image", mock_get_image)

backend = fake_scaleway_group.backend
backend.create(scaleway_runner)

# Verify warning was logged
assert "has no root_volume, using default volume from image" in caplog.text

# Verify volumes=None was passed
mock_client = backend.client
create_call = mock_client._create_server.call_args
volumes = create_call.kwargs.get("volumes")
assert volumes is None


def test_create_with_user_provided_volumes(
scaleway_runner, fake_scaleway_group, monkeypatch
):
"""Test instance creation with user-provided volumes configuration."""
from scaleway.instance.v1 import VolumeServerTemplate

# Mock image
mock_image = MagicMock()
mock_image.id = "test-image-id"

# Patch get_image at class level
def mock_get_image(self, image_name):
return mock_image

monkeypatch.setattr(ScalewayBackend, "get_image", mock_get_image)

# Set user-provided volumes
custom_volumes = {
"0": VolumeServerTemplate(
volume_type="sbs_volume",
size=50_000_000_000,
base_snapshot="custom-snapshot-id",
)
}
fake_scaleway_group.backend.instance_config.volumes = custom_volumes

backend = fake_scaleway_group.backend
backend.create(scaleway_runner)

# Verify user-provided volumes were used
mock_client = backend.client
create_call = mock_client._create_server.call_args
volumes = create_call.kwargs.get("volumes")

assert volumes == custom_volumes
Loading