diff --git a/stackbox/cli/__main__.py b/stackbox/cli/__main__.py index a50e92d..a862101 100644 --- a/stackbox/cli/__main__.py +++ b/stackbox/cli/__main__.py @@ -257,6 +257,15 @@ def init( debug=verbose, ) + # Copy libvirt Dockerfile + click.echo(" Copying libvirt Dockerfile...") + libvirt_dockerfile_src = ( + Path(__file__).parent.parent / "templates" / "libvirt" / "Dockerfile" + ) + libvirt_dockerfile_dest = config_dir_path / "config" / "libvirt" / "Dockerfile" + libvirt_dockerfile_dest.parent.mkdir(parents=True, exist_ok=True) + shutil.copy2(libvirt_dockerfile_src, libvirt_dockerfile_dest) + # Generate docker-compose.yml click.echo(" Generating docker-compose.yml...") compose_file = config_dir_path / "docker-compose.yml" @@ -273,6 +282,39 @@ def init( click.echo(f"\nāŒ Failed to generate configuration: {e}", err=True) sys.exit(1) + # Phase 2.5: Build libvirt image + click.echo("\n🐳 Building libvirt image...") + try: + from stackbox.core.container import get_container_runtime + + runtime_cmd, _ = get_container_runtime() + + # Build libvirt image + click.echo(" Building libvirt container (this may take a few minutes)...") + result = subprocess.run( + [ + runtime_cmd, + "build", + "-t", + "stackbox-libvirt:latest", + "-f", + str(libvirt_dockerfile_dest), + str(config_dir_path), + ], + capture_output=True, + text=True, + timeout=300, # 5 minutes should be enough + ) + + if result.returncode != 0: + raise RuntimeError(f"Libvirt image build failed: {result.stderr}") + + click.echo("āœ… Libvirt image built successfully") + + except Exception as e: + click.echo(f"\nāŒ Failed to build libvirt image: {e}", err=True) + sys.exit(1) + # Phase 3: Start infrastructure services (MariaDB, RabbitMQ, etc.) click.echo("\n🐳 Starting infrastructure services...") try: @@ -496,12 +538,12 @@ def init( driver="redfish", deploy_interface="direct", ) - click.echo(f" āœ… Generated: {tempest_conf.relative_to(Path.cwd())}") + click.echo(f" āœ… Generated: {tempest_conf.relative_to(config_dir_path)}") # Generate accounts.yaml accounts_file = config_dir_path / "config" / "tempest" / "accounts.yaml" config_gen.generate_tempest_accounts(output_path=accounts_file) - click.echo(f" āœ… Generated: {accounts_file.relative_to(Path.cwd())}") + click.echo(f" āœ… Generated: {accounts_file.relative_to(config_dir_path)}") # Validate configuration if config_gen.validate_tempest_config(tempest_conf): @@ -523,20 +565,26 @@ def init( tempest_dockerfile_dest = config_dir_path / "config" / "tempest" / "Dockerfile" shutil.copy2(tempest_dockerfile_src, tempest_dockerfile_dest) - click.echo(f" āœ… Copied: {tempest_dockerfile_dest.relative_to(Path.cwd())}") + click.echo(f" āœ… Copied: {tempest_dockerfile_dest.relative_to(config_dir_path)}") # Build Tempest image click.echo(" šŸ”Ø Building Tempest image (this may take a few minutes)...") + from stackbox.core.container import get_compose_command + + compose_file = config_dir_path / "docker-compose.yml" + cmd = get_compose_command(str(compose_file)) + cmd.extend(["--profile", "testing", "build", "tempest"]) + result = subprocess.run( - ["docker-compose", "build", "tempest"], - cwd=config_dir_path, + cmd, capture_output=True, text=True, + timeout=600, # 10 minutes for image build ) if result.returncode != 0: - raise RuntimeError(f"Docker build failed: {result.stderr}") + raise RuntimeError(f"Build failed: {result.stderr}") click.echo(" āœ… Tempest image built successfully") @@ -549,7 +597,7 @@ def init( click.echo(f" Virtual node: {node_name}") click.echo(" BMC endpoint: http://localhost:8000/redfish/v1/") click.echo(f" Enrolled in Ironic: {enrolled_node['uuid']}") - click.echo(f" Tempest config: {tempest_conf.relative_to(Path.cwd())}") + click.echo(f" Tempest config: {tempest_conf.relative_to(config_dir_path)}") click.echo("\nšŸ’” Ready to run tests:") click.echo(" sb test # Run all Tempest tests") diff --git a/stackbox/core/builder.py b/stackbox/core/builder.py index 8f8c44a..f30942b 100644 --- a/stackbox/core/builder.py +++ b/stackbox/core/builder.py @@ -6,6 +6,7 @@ import os from pathlib import Path +import shutil import subprocess import time @@ -54,6 +55,17 @@ def build_ironic_image( if not dockerfile_path.exists(): raise RuntimeError(f"Dockerfile not found: {dockerfile_path}") + # Copy healthcheck.sh to build context (required by Dockerfile COPY) + healthcheck_src = Path(__file__).parent.parent / "templates" / "ironic" / "healthcheck.sh" + healthcheck_dest = ironic_source_path / "healthcheck.sh" + + # Track if we need to clean up healthcheck.sh after build + cleanup_healthcheck = False + + if healthcheck_src.exists(): + shutil.copy2(healthcheck_src, healthcheck_dest) + cleanup_healthcheck = True + click.echo(f"Building Ironic image from {ironic_source_path}") click.echo(f"Using Dockerfile: {dockerfile_path}") @@ -86,31 +98,37 @@ def build_ironic_image( start_time = time.time() try: - result = subprocess.run( - cmd, - env=env, - capture_output=not verbose, - text=True, - check=False, - timeout=timeout, - ) - - elapsed = time.time() - start_time - - if result.returncode != 0: - click.echo("\nāŒ Build failed!", err=True) - if not verbose and result.stderr: - click.echo(result.stderr, err=True) - raise RuntimeError(f"Docker build failed with exit code {result.returncode}") - - click.echo(f"\nāœ… Build completed in {elapsed:.1f}s") - - return elapsed - - except FileNotFoundError as e: - raise RuntimeError("Docker command not found. Is Docker installed and in PATH?") from e - except subprocess.TimeoutExpired as e: - raise RuntimeError(f"Build timeout after {timeout}s") from e + try: + result = subprocess.run( + cmd, + env=env, + capture_output=not verbose, + text=True, + check=False, + timeout=timeout, + ) + + elapsed = time.time() - start_time + + if result.returncode != 0: + click.echo("\nāŒ Build failed!", err=True) + if not verbose and result.stderr: + click.echo(result.stderr, err=True) + raise RuntimeError(f"Docker build failed with exit code {result.returncode}") + + click.echo(f"\nāœ… Build completed in {elapsed:.1f}s") + + return elapsed + + except FileNotFoundError as e: + raise RuntimeError("Docker command not found. Is Docker installed and in PATH?") from e + except subprocess.TimeoutExpired as e: + raise RuntimeError(f"Build timeout after {timeout}s") from e + + finally: + # Clean up healthcheck.sh if we copied it + if cleanup_healthcheck and healthcheck_dest.exists(): + healthcheck_dest.unlink() def validate_image(tag: str = "stackbox-ironic:latest") -> bool: diff --git a/stackbox/core/compose.py b/stackbox/core/compose.py index c217dcd..96a7698 100644 --- a/stackbox/core/compose.py +++ b/stackbox/core/compose.py @@ -11,6 +11,8 @@ from jinja2 import Template import yaml +from stackbox.core.container import get_compose_command, get_container_runtime + class ValidationResult(TypedDict): """Result from validate_environment().""" @@ -193,18 +195,60 @@ def start_infrastructure( # Start services try: - cmd = ["docker-compose", "-f", str(compose_file), "up", "-d"] - - # Add specific services if provided - if services: - cmd.extend(services) - - subprocess.run( - cmd, - check=True, - capture_output=True, - text=True, - ) + _, compose_cmd = get_container_runtime() + + # Workaround: podman-compose hangs when starting multiple services at once + # Start them one by one if using podman-compose + if "podman-compose" in compose_cmd and services and len(services) > 1: + for service in services: + cmd = [compose_cmd, "-f", str(compose_file), "up", "-d", service] + result = subprocess.run( + cmd, + check=False, # Don't raise on non-zero exit + capture_output=True, + text=True, + timeout=60, # Reduced timeout - container creation should be fast + ) + # Check if container was created even if compose command hung + # podman-compose often exits with timeout but container is created + if result.returncode != 0: + # Check if service is actually running + runtime_cmd, _ = get_container_runtime() + check_result = subprocess.run( + [ + runtime_cmd, + "ps", + "-a", + "--filter", + f"name={service}", + "--format", + "{{.Status}}", + ], + capture_output=True, + text=True, + check=False, + ) + if "Created" in check_result.stdout or "Up" in check_result.stdout: + # Container exists, manually start it + subprocess.run( + [runtime_cmd, "start", service], check=False, capture_output=True + ) + elif result.stderr and "port is already allocated" not in result.stderr: + # Real error, not just a timeout + raise RuntimeError(f"Failed to start {service}: {result.stderr}") + else: + cmd = [compose_cmd, "-f", str(compose_file), "up", "-d"] + + # Add specific services if provided + if services: + cmd.extend(services) + + subprocess.run( + cmd, + check=True, + capture_output=True, + text=True, + ) except subprocess.CalledProcessError as e: # Parse stderr for common issues if "port is already allocated" in e.stderr: @@ -236,7 +280,8 @@ def stop_infrastructure(compose_file: Path, remove_volumes: bool = False) -> Non # Gracefully handle missing file (already cleaned up) return - cmd = ["docker-compose", "-f", str(compose_file), "down"] + _, compose_cmd = get_container_runtime() + cmd = [compose_cmd, "-f", str(compose_file), "down"] if remove_volumes: cmd.append("-v") @@ -261,8 +306,10 @@ def wait_for_healthy(compose_file: Path, timeout: int = 120) -> None: while time.time() - start_time < timeout: # Get service status + cmd = get_compose_command(str(compose_file)) + cmd.extend(["ps", "--format", "json"]) result = subprocess.run( - ["docker", "compose", "-f", str(compose_file), "ps", "--format", "json"], + cmd, capture_output=True, text=True, check=False, @@ -272,18 +319,43 @@ def wait_for_healthy(compose_file: Path, timeout: int = 120) -> None: raise RuntimeError(f"Failed to check service status: {result.stderr}") # Parse JSON output - services = [] - for line in result.stdout.strip().split("\n"): - if line: - services.append(json.loads(line)) + # podman-compose returns JSON array: [{"Name": ...}, ...] + # docker-compose v2 returns JSONL: {"Name": ...}\n{"Name": ...} + output = result.stdout.strip() + if not output: + services = [] + elif output.startswith("["): + # JSON array format (podman-compose) + services = json.loads(output) + else: + # JSONL format (docker-compose v2) + services = [] + for line in output.split("\n"): + if line: + services.append(json.loads(line)) # Check if all services are healthy unhealthy = [] for service in services: + # Get service name (podman-compose uses "Names" list, docker-compose uses "Name" string) + name = service.get("Name") or (service.get("Names", ["unknown"])[0]) + + # Get health status + # Docker Compose v2: "Health" field + # podman-compose: parse from "Status" field like "Up 3 minutes (healthy)" health = service.get("Health", "") + if not health: + status = service.get("Status", "") + if "(healthy)" in status: + health = "healthy" + elif "(starting)" in status or "Starting" in status: + health = "starting" + elif "(unhealthy)" in status: + health = "unhealthy" + + # Check if service is running but not yet healthy if health != "healthy" and service.get("State") == "running": - # Service is running but not yet healthy - unhealthy.append(service["Name"]) + unhealthy.append(name) if not unhealthy: click.echo("āœ… All services are healthy!") diff --git a/stackbox/core/config.py b/stackbox/core/config.py index 3df5441..0e31def 100644 --- a/stackbox/core/config.py +++ b/stackbox/core/config.py @@ -26,8 +26,8 @@ def __init__(self, template_dir: Path | None = None) -> None: def generate_ironic_conf( self, output_path: Path, - database_url: str = "mysql+pymysql://ironic:ironic@mariadb/ironic?charset=utf8", - rabbitmq_url: str = "rabbit://ironic:ironic@rabbitmq:5672/", + database_url: str = "mysql+pymysql://ironic:stackbox-secret@mariadb/ironic?charset=utf8", + rabbitmq_url: str = "rabbit://stackrabbit:stackbox-secret@rabbitmq:5672/", api_host: str = "0.0.0.0", api_port: int = 6385, ) -> None: diff --git a/stackbox/core/container.py b/stackbox/core/container.py new file mode 100644 index 0000000..e035ed8 --- /dev/null +++ b/stackbox/core/container.py @@ -0,0 +1,70 @@ +"""Container runtime detection for StackBox. + +Auto-detects Docker or Podman and provides the correct commands. +""" + +import shutil + + +def get_container_runtime() -> tuple[str, str]: + """Detect available container runtime. + + Returns: + Tuple of (runtime_cmd, compose_cmd) + e.g. ("docker", "docker-compose") or ("podman", "podman-compose") + + Raises: + RuntimeError: If no container runtime found + """ + # Check for docker first (most common) + if shutil.which("docker"): + # Docker found, check for docker-compose + if shutil.which("docker-compose"): + return ("docker", "docker-compose") + # Docker found but no docker-compose, try podman-compose (Fedora/RHEL pattern) + if shutil.which("podman-compose"): + return ("docker", "podman-compose") + raise RuntimeError( + "Docker found but no compose tool. Install docker-compose or podman-compose:\n" + " Fedora/RHEL: sudo dnf install podman-compose\n" + " Ubuntu/Debian: sudo apt install docker-compose\n" + " macOS: brew install docker-compose" + ) + + # Check for podman + if shutil.which("podman"): + if shutil.which("podman-compose"): + return ("podman", "podman-compose") + raise RuntimeError( + "Podman found but podman-compose not installed:\n" + " Fedora/RHEL: sudo dnf install podman-compose\n" + " pip: pip install podman-compose" + ) + + # Neither found + raise RuntimeError( + "No container runtime found. Install Docker or Podman:\n" + " Fedora/RHEL: sudo dnf install podman-docker podman-compose\n" + " Ubuntu/Debian: sudo apt install docker.io docker-compose\n" + " macOS: brew install docker" + ) + + +def get_compose_command(compose_file: str) -> list[str]: + """Get compose command as list for subprocess. + + Args: + compose_file: Path to docker-compose.yml + + Returns: + List of command parts, e.g. ["docker-compose", "-f", "path"] + or ["docker", "compose", "-f", "path"] for Docker Compose V2 + """ + _, compose_cmd = get_container_runtime() + + # podman-compose and docker-compose use hyphenated command + if "-" in compose_cmd: + return [compose_cmd, "-f", compose_file] + + # Docker Compose V2 uses space-separated "docker compose" + return ["docker", "compose", "-f", compose_file] diff --git a/stackbox/core/enrollment.py b/stackbox/core/enrollment.py index 9af0300..ed74767 100644 --- a/stackbox/core/enrollment.py +++ b/stackbox/core/enrollment.py @@ -214,6 +214,23 @@ def get_node_power_state(self, node_id: str) -> str | None: node = self.get_node(node_id) return node.get("power_state") + def set_provision_state(self, node_id: str, target: str) -> None: + """Set node provision state. + + Args: + node_id: Node UUID or name + target: Target provision state (e.g., 'manage', 'provide', 'active') + + Raises: + requests.HTTPError: If API request fails + """ + resp = self.session.put( + f"{self.api_url}/v1/nodes/{node_id}/states/provision", + json={"target": target}, + timeout=30, + ) + resp.raise_for_status() + def enroll_node( ironic_client: IronicClient, @@ -266,6 +283,43 @@ def enroll_node( pxe_enabled=True, ) + # Transition to manageable state to enable power operations + # Node is created in 'enroll' state, must move to 'manageable' for power control + try: + click.echo(" Moving node to manageable state...") + ironic_client.set_provision_state(node["uuid"], "manage") + + # Wait for state transition + max_wait = 30 # seconds + start = time.time() + while time.time() - start < max_wait: + current_node = ironic_client.get_node(node["uuid"]) + provision_state = current_node.get("provision_state") + + if provision_state == "manageable": + click.echo(" āœ… Node is now manageable") + break + elif provision_state in ("manage failed", "error"): + raise RuntimeError( + f"Node transition to manageable failed with state: {provision_state}\n" + f"Check Ironic conductor logs for details" + ) + + time.sleep(2) + else: + # Timeout + raise RuntimeError( + f"Timeout waiting for node to become manageable. " + f"Current state: {provision_state}" + ) + except requests.HTTPError as e: + # Log warning but don't fail enrollment + click.echo( + f" āš ļø Could not transition to manageable state: {e}\n" + f" Power control may not work until manually managed", + err=True, + ) + return node diff --git a/stackbox/core/migrations.py b/stackbox/core/migrations.py index b5cf60a..39d5c70 100644 --- a/stackbox/core/migrations.py +++ b/stackbox/core/migrations.py @@ -6,6 +6,8 @@ import click +from stackbox.core.container import get_container_runtime + def wait_for_database( config_dir: Path, @@ -29,27 +31,29 @@ def wait_for_database( click.echo("Waiting for database to be ready...") + # Detect container runtime + runtime_cmd, _ = get_container_runtime() + start = time.time() while time.time() - start < timeout: - # Test database connection using oslo.db + # Test database connection using mariadb client + # Simpler and more reliable than oslo.db check cmd = [ - "docker", + runtime_cmd, "run", "--rm", "--network", "stackbox-network", - "-v", - f"{config_file.parent}:/etc/ironic:ro", - ironic_image, - "python3", - "-c", - ( - "from oslo_db.sqlalchemy import enginefacade; " - "from oslo_config import cfg; " - "CONF = cfg.CONF; " - "CONF(['--config-file', '/etc/ironic/ironic.conf']); " - "enginefacade.get_legacy_facade().get_engine().connect()" - ), + "docker.io/library/mariadb:11.2", + "mariadb", + "-h", + "stackbox-mariadb", + "-u", + "ironic", + "-pstackbox-secret", + "ironic", + "-e", + "SELECT 1;", ] result = subprocess.run( @@ -66,18 +70,18 @@ def wait_for_database( # Check for permission errors (fail fast instead of timing out) if result.stderr and ( - "Access denied" in result.stderr or "Permission denied" in result.stderr + "Access denied" in result.stderr or "Access denied for user" in result.stderr ): raise RuntimeError( "Database permission error. Check credentials in ironic.conf\n" - "Verify with: docker exec stackbox-mariadb mysql -uironic -pstackbox-secret ironic -e 'SHOW TABLES;'\n" + f"Verify with: {runtime_cmd} exec stackbox-mariadb mariadb -uironic -pstackbox-secret ironic -e 'SHOW TABLES;'\n" f"Details: {result.stderr}" ) time.sleep(2) raise RuntimeError( - f"Database not ready after {timeout}s. " f"Check: docker logs stackbox-mariadb" + f"Database not ready after {timeout}s. " f"Check: {runtime_cmd} logs stackbox-mariadb" ) @@ -103,42 +107,64 @@ def run_ironic_migrations( click.echo("Running database migrations...") + # Detect container runtime + runtime_cmd, _ = get_container_runtime() + # Run ironic-dbsync in a temporary container cmd = [ - "docker", + runtime_cmd, "run", "--rm", "--network", "stackbox-network", "-v", - f"{config_file.parent}:/etc/ironic:ro", + f"{config_file.parent.absolute()}:/etc/ironic:ro,Z", ironic_image, "ironic-dbsync", "--config-file", "/etc/ironic/ironic.conf", + "--log-file", + "", # Disable log file, output to stdout/stderr "upgrade", ] + if verbose: + click.echo(f"DEBUG: Running command: {' '.join(cmd)}") + try: + # Always capture output so we can inspect errors result = subprocess.run( cmd, - capture_output=not verbose, + capture_output=True, text=True, check=False, timeout=120, # 2 minute timeout ) + # Show output if verbose + if verbose and result.stdout: + click.echo(result.stdout) + if verbose and result.stderr: + click.echo(result.stderr, err=True) + if result.returncode == 0: click.echo("āœ… Database migrations completed successfully") else: # Check for specific error patterns stderr = result.stderr if result.stderr else "" + stdout = result.stdout if result.stdout else "" + + # Debug: show what we got + if verbose: + click.echo(f"DEBUG: returncode={result.returncode}") + click.echo(f"DEBUG: stdout={stdout}") + click.echo(f"DEBUG: stderr={stderr}") # Permission errors if "Access denied" in stderr or "Permission denied" in stderr: raise RuntimeError( "Database permission error. Check credentials in ironic.conf\n" - "Verify with: docker exec stackbox-mariadb mysql -uironic -pstackbox-secret ironic -e 'SHOW TABLES;'\n" + f"Verify with: {runtime_cmd} exec stackbox-mariadb mysql -uironic -pstackbox-secret ironic -e 'SHOW TABLES;'\n" f"Details: {stderr}" ) @@ -148,18 +174,23 @@ def run_ironic_migrations( or "version mismatch" in stderr.lower() or "alembic" in stderr.lower() ): + _, compose_cmd = get_container_runtime() raise RuntimeError( "Database version mismatch detected.\n" "Reset database with:\n" - " docker-compose -f .stackbox/docker-compose.yml down -v\n" + f" {compose_cmd} -f .stackbox/docker-compose.yml down -v\n" " sb init \n" f"Details: {stderr}" ) # Generic error error_msg = "Database migrations failed" - if not verbose and stderr: - error_msg += f"\n{stderr}" + if stderr: + error_msg += f"\nStderr: {stderr}" + if stdout: + error_msg += f"\nStdout: {stdout}" + if not stderr and not stdout: + error_msg += "\nNo error output captured. Try with --verbose flag." raise RuntimeError(error_msg) except subprocess.TimeoutExpired as e: @@ -189,19 +220,24 @@ def verify_migrations( click.echo("Verifying database schema...") + # Detect container runtime + runtime_cmd, _ = get_container_runtime() + # Check database version cmd = [ - "docker", + runtime_cmd, "run", "--rm", "--network", "stackbox-network", "-v", - f"{config_file.parent}:/etc/ironic:ro", + f"{config_file.parent.absolute()}:/etc/ironic:ro,Z", ironic_image, "ironic-dbsync", "--config-file", "/etc/ironic/ironic.conf", + "--log-file", + "", # Disable log file, output to stdout/stderr "version", ] diff --git a/stackbox/core/vm.py b/stackbox/core/vm.py index 733a900..6664798 100644 --- a/stackbox/core/vm.py +++ b/stackbox/core/vm.py @@ -10,6 +10,8 @@ import click from jinja2 import Environment, FileSystemLoader +from stackbox.core.container import get_container_runtime + class LibvirtManager: """Manage libvirt VMs inside containerized libvirt service.""" @@ -26,10 +28,28 @@ def __init__(self, config_dir: Path, container_name: str = "stackbox-libvirt"): self.vm_dir.mkdir(parents=True, exist_ok=True) self.container_name = container_name + # Detect container runtime + self.runtime_cmd, _ = get_container_runtime() + # Setup Jinja2 for XML templates template_dir = Path(__file__).parent.parent / "templates" self.env = Environment(loader=FileSystemLoader(str(template_dir))) + def vm_exists(self, name: str) -> bool: + """Check if a VM exists. + + Args: + name: VM name + + Returns: + True if VM exists, False otherwise + """ + try: + vms = self.list_vms() + return any(vm["name"] == name for vm in vms) + except RuntimeError: + return False + def create_vm( self, name: str, @@ -55,6 +75,20 @@ def create_vm( Raises: RuntimeError: If VM creation fails """ + # Delete VM if it already exists (clean slate for sb init) + if self.vm_exists(name): + click.echo(f" VM '{name}' already exists, deleting...") + try: + self.delete_vm(name, delete_disk=True) + except RuntimeError as e: + # If VM is running, force stop it first + if "running" in str(e).lower() or "active" in str(e).lower(): + click.echo(f" Stopping running VM '{name}'...") + self.stop_vm(name, force=True) + self.delete_vm(name, delete_disk=True) + else: + raise + vm_uuid = str(uuid.uuid4()) # Generate MAC if not provided @@ -116,7 +150,7 @@ def _create_disk(self, vm_name: str, size_gb: int) -> None: container_disk_path = f"/var/lib/libvirt/images/{vm_name}.qcow2" cmd = [ - "docker", + self.runtime_cmd, "exec", self.container_name, "qemu-img", @@ -162,7 +196,7 @@ def _virsh(self, args: list[str]) -> subprocess.CompletedProcess: Raises: RuntimeError: If virsh command fails """ - cmd = ["docker", "exec", self.container_name, "virsh", *args] + cmd = [self.runtime_cmd, "exec", self.container_name, "virsh", *args] result = subprocess.run(cmd, capture_output=True, text=True, check=False) @@ -182,7 +216,7 @@ def _copy_to_container(self, local_path: Path, container_path: str) -> None: RuntimeError: If copy fails """ cmd = [ - "docker", + self.runtime_cmd, "cp", str(local_path), f"{self.container_name}:{container_path}", @@ -241,7 +275,7 @@ def delete_vm(self, name: str, delete_disk: bool = True) -> None: try: subprocess.run( [ - "docker", + self.runtime_cmd, "exec", self.container_name, "rm", @@ -311,9 +345,17 @@ def ensure_default_network(self) -> None: if self.network_exists("default"): # Network exists, check if it's active result = self._virsh(["net-info", "default"]) - if "Active: yes" not in result.stdout: + # Check for "Active: yes" or "Active: yes" (flexible whitespace) + if "active: yes" not in result.stdout.lower(): click.echo("Starting default network...") - self._virsh(["net-start", "default"]) + try: + self._virsh(["net-start", "default"]) + except RuntimeError as e: + # Ignore "already active" errors + if "already active" in str(e).lower(): + click.echo(" (network was already active)") + else: + raise return click.echo("Creating default libvirt network...") @@ -335,7 +377,7 @@ def ensure_default_network(self) -> None: # Copy XML into container subprocess.run( [ - "docker", + self.runtime_cmd, "exec", "-i", self.container_name, diff --git a/stackbox/templates/docker-compose.yml.j2 b/stackbox/templates/docker-compose.yml.j2 index 2a250e6..3110a47 100644 --- a/stackbox/templates/docker-compose.yml.j2 +++ b/stackbox/templates/docker-compose.yml.j2 @@ -2,7 +2,7 @@ version: '3.8' services: mariadb: - image: mariadb:11.2 + image: docker.io/library/mariadb:11.2 container_name: stackbox-mariadb environment: MYSQL_ROOT_PASSWORD: {{ db_password }} @@ -14,7 +14,7 @@ services: ports: - "3306:3306" healthcheck: - test: ["CMD", "mysqladmin", "ping", "-h", "localhost", "-p{{ db_password }}"] + test: ["CMD", "mariadb-admin", "ping", "-h", "localhost", "-p{{ db_password }}"] interval: 10s timeout: 5s retries: 5 @@ -22,7 +22,7 @@ services: - stackbox rabbitmq: - image: rabbitmq:3.12-management + image: docker.io/library/rabbitmq:3.12-management container_name: stackbox-rabbitmq environment: RABBITMQ_DEFAULT_USER: stackrabbit @@ -38,9 +38,9 @@ services: networks: - stackbox - # Libvirt container (runs VMs inside container - Metal3 style) + # Libvirt container (runs VMs inside container - KubeVirt pattern) libvirt: - image: quay.io/metal3-io/ironic:latest + image: stackbox-libvirt:latest container_name: stackbox-libvirt privileged: true networks: @@ -52,7 +52,6 @@ services: - libvirt-images:/var/lib/libvirt/images environment: - LIBVIRT_DEFAULT_URI=qemu:///system - command: libvirtd --listen ports: - "16509:16509" healthcheck: @@ -72,7 +71,7 @@ services: ports: - "8000:8000" volumes: - - ./config/sushy:/etc/sushy:ro + - ./config/sushy:/etc/sushy:ro,Z environment: - SUSHY_EMULATOR_LIBVIRT_URI=qemu+tcp://libvirt:16509/system - SUSHY_EMULATOR_LISTEN_IP=0.0.0.0 @@ -81,7 +80,7 @@ services: libvirt: condition: service_healthy healthcheck: - test: ["CMD", "curl", "-f", "http://localhost:8000/redfish/v1/"] + test: ["CMD", "python3", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8000/redfish/v1/', timeout=5)"] interval: 10s timeout: 5s retries: 5 @@ -95,7 +94,7 @@ services: - "6385:6385" # Ironic API - "8080:8080" # HTTP boot server volumes: - - ./config/ironic.conf:/etc/ironic/ironic.conf:ro + - ./config/ironic.conf:/etc/ironic/ironic.conf:ro,Z - ironic-lib:/var/lib/ironic - ironic-log:/var/log/ironic depends_on: @@ -119,7 +118,7 @@ services: command: ironic-conductor privileged: true # Required for hardware access volumes: - - ./config/ironic.conf:/etc/ironic/ironic.conf:ro + - ./config/ironic.conf:/etc/ironic/ironic.conf:ro,Z - ironic-lib:/var/lib/ironic - ironic-log:/var/log/ironic - /dev:/dev # Hardware device access diff --git a/stackbox/templates/ironic/Dockerfile b/stackbox/templates/ironic/Dockerfile index bc368a9..b7a0fbb 100644 --- a/stackbox/templates/ironic/Dockerfile +++ b/stackbox/templates/ironic/Dockerfile @@ -51,11 +51,39 @@ RUN pip3 install --no-cache-dir \ FROM dependencies AS ironic # Copy Ironic source code -COPY . /tmp/ironic-source +COPY . /opt/ironic + +# Install Ironic (use WORKDIR and . to ensure scripts are generated) +WORKDIR /opt/ironic + +# Upgrade pip, setuptools, and wheel to latest versions +# This ensures entry points are properly registered +RUN pip3 install --no-cache-dir --upgrade pip setuptools wheel # Install Ironic -RUN pip3 install --no-cache-dir /tmp/ironic-source && \ - rm -rf /tmp/ironic-source +RUN pip3 install --no-cache-dir . + +# Verify entry points are registered +RUN python3 -c "from stevedore import driver; d = driver.DriverManager('ironic.database.migration_backend', 'sqlalchemy'); print('āœ“ Migration backend registered')" + +# Create console script wrappers (pbr doesn't always generate them from source) +RUN echo '#!/usr/bin/env python3' > /usr/local/bin/ironic-api && \ + echo 'from ironic.command.api import main' >> /usr/local/bin/ironic-api && \ + echo 'if __name__ == "__main__":' >> /usr/local/bin/ironic-api && \ + echo ' main()' >> /usr/local/bin/ironic-api && \ + chmod +x /usr/local/bin/ironic-api + +RUN echo '#!/usr/bin/env python3' > /usr/local/bin/ironic-conductor && \ + echo 'from ironic.command.conductor import main' >> /usr/local/bin/ironic-conductor && \ + echo 'if __name__ == "__main__":' >> /usr/local/bin/ironic-conductor && \ + echo ' main()' >> /usr/local/bin/ironic-conductor && \ + chmod +x /usr/local/bin/ironic-conductor + +RUN echo '#!/usr/bin/env python3' > /usr/local/bin/ironic-dbsync && \ + echo 'from ironic.command.dbsync import main' >> /usr/local/bin/ironic-dbsync && \ + echo 'if __name__ == "__main__":' >> /usr/local/bin/ironic-dbsync && \ + echo ' main()' >> /usr/local/bin/ironic-dbsync && \ + chmod +x /usr/local/bin/ironic-dbsync # Create directories RUN mkdir -p /var/lib/ironic \ diff --git a/stackbox/templates/libvirt/Dockerfile b/stackbox/templates/libvirt/Dockerfile new file mode 100644 index 0000000..fe09d65 --- /dev/null +++ b/stackbox/templates/libvirt/Dockerfile @@ -0,0 +1,33 @@ +# Libvirt container for running VMs +FROM ubuntu:22.04 + +ENV DEBIAN_FRONTEND=noninteractive + +# Install libvirt and QEMU +RUN apt-get update && apt-get install -y \ + libvirt-daemon-system \ + libvirt-clients \ + qemu-kvm \ + qemu-utils \ + dnsmasq \ + iproute2 \ + iptables \ + && rm -rf /var/lib/apt/lists/* + +# Enable TCP listening for remote connections +RUN mkdir -p /etc/libvirt && \ + echo 'listen_tls = 0' >> /etc/libvirt/libvirtd.conf && \ + echo 'listen_tcp = 1' >> /etc/libvirt/libvirtd.conf && \ + echo 'tcp_port = "16509"' >> /etc/libvirt/libvirtd.conf && \ + echo 'auth_tcp = "none"' >> /etc/libvirt/libvirtd.conf && \ + echo 'listen_addr = "0.0.0.0"' >> /etc/libvirt/libvirtd.conf + +# Create required directories +RUN mkdir -p /var/lib/libvirt/images \ + /var/log/libvirt \ + /run/libvirt + +EXPOSE 16509 + +# Start libvirtd +CMD ["/usr/sbin/libvirtd", "--listen"] diff --git a/stackbox/templates/sushy/sushy-emulator.conf.j2 b/stackbox/templates/sushy/sushy-emulator.conf.j2 index 5c647c8..dbf1421 100644 --- a/stackbox/templates/sushy/sushy-emulator.conf.j2 +++ b/stackbox/templates/sushy/sushy-emulator.conf.j2 @@ -1,15 +1,15 @@ # sushy-tools emulator configuration for containerized libvirt +# This is a Python file executed by Flask - no INI-style sections -[DEFAULT] # Use libvirt backend -SUSHY_EMULATOR_BACKEND = libvirt +SUSHY_EMULATOR_BACKEND = "libvirt" # Libvirt URI (TCP connection to containerized libvirt) # Note: Unix socket not available - libvirt runs in separate container -SUSHY_EMULATOR_LIBVIRT_URI = qemu+tcp://libvirt:16509/system +SUSHY_EMULATOR_LIBVIRT_URI = "qemu+tcp://libvirt:16509/system" # Listen on all interfaces inside container -SUSHY_EMULATOR_LISTEN_IP = 0.0.0.0 +SUSHY_EMULATOR_LISTEN_IP = "0.0.0.0" SUSHY_EMULATOR_LISTEN_PORT = 8000 # SSL (disabled for local dev) diff --git a/tests/unit/core/test_compose.py b/tests/unit/core/test_compose.py index 61efd8c..51182ab 100644 --- a/tests/unit/core/test_compose.py +++ b/tests/unit/core/test_compose.py @@ -14,6 +14,23 @@ ) +@pytest.fixture(autouse=True) +def mock_container_runtime() -> None: + """Mock get_container_runtime for all tests in this module.""" + # Patch both the source and the import location + with ( + patch( + "stackbox.core.container.get_container_runtime", + return_value=("docker", "docker-compose"), + ), + patch( + "stackbox.core.compose.get_container_runtime", + return_value=("docker", "docker-compose"), + ), + ): + yield + + class TestGenerateComposeFile: """Tests for generate_compose_file function.""" @@ -145,11 +162,13 @@ def test_health_checks_configured(self, tmp_path: Path) -> None: class TestStartInfrastructure: """Tests for start_infrastructure function.""" + @patch("stackbox.core.compose.get_container_runtime") @patch("subprocess.run") def test_start_calls_docker_compose_correctly( - self, mock_run: MagicMock, tmp_path: Path + self, mock_run: MagicMock, mock_runtime: MagicMock, tmp_path: Path ) -> None: """Test that start_infrastructure calls docker-compose with correct args.""" + mock_runtime.return_value = ("docker", "docker-compose") compose_file = tmp_path / "docker-compose.yml" compose_file.write_text("version: '3.8'") @@ -203,9 +222,13 @@ def test_start_handles_port_conflict(self, mock_run: MagicMock, tmp_path: Path) with pytest.raises(RuntimeError, match="Port conflict detected"): start_infrastructure(compose_file, skip_validation=True) + @patch("stackbox.core.compose.get_container_runtime") @patch("subprocess.run") - def test_start_with_specific_services(self, mock_run: MagicMock, tmp_path: Path) -> None: + def test_start_with_specific_services( + self, mock_run: MagicMock, mock_runtime: MagicMock, tmp_path: Path + ) -> None: """Test that specific services can be started.""" + mock_runtime.return_value = ("docker", "docker-compose") compose_file = tmp_path / "docker-compose.yml" compose_file.write_text("version: '3.8'") @@ -231,9 +254,13 @@ def test_start_with_specific_services(self, mock_run: MagicMock, tmp_path: Path) class TestStopInfrastructure: """Tests for stop_infrastructure function.""" + @patch("stackbox.core.compose.get_container_runtime") @patch("subprocess.run") - def test_stop_calls_docker_compose_correctly(self, mock_run: MagicMock, tmp_path: Path) -> None: + def test_stop_calls_docker_compose_correctly( + self, mock_run: MagicMock, mock_runtime: MagicMock, tmp_path: Path + ) -> None: """Test that stop_infrastructure calls docker-compose with correct args.""" + mock_runtime.return_value = ("docker", "docker-compose") compose_file = tmp_path / "docker-compose.yml" compose_file.write_text("version: '3.8'") @@ -246,9 +273,13 @@ def test_stop_calls_docker_compose_correctly(self, mock_run: MagicMock, tmp_path text=True, ) + @patch("stackbox.core.compose.get_container_runtime") @patch("subprocess.run") - def test_stop_with_remove_volumes(self, mock_run: MagicMock, tmp_path: Path) -> None: + def test_stop_with_remove_volumes( + self, mock_run: MagicMock, mock_runtime: MagicMock, tmp_path: Path + ) -> None: """Test that -v flag is added when remove_volumes=True.""" + mock_runtime.return_value = ("docker", "docker-compose") compose_file = tmp_path / "docker-compose.yml" compose_file.write_text("version: '3.8'") diff --git a/tests/unit/core/test_config.py b/tests/unit/core/test_config.py index 26993f4..63adc6a 100644 --- a/tests/unit/core/test_config.py +++ b/tests/unit/core/test_config.py @@ -36,8 +36,8 @@ def test_generate_ironic_conf_with_defaults(self, tmp_path: Path) -> None: # Should contain default database URL content = output_file.read_text() - assert "mysql+pymysql://ironic:ironic@mariadb/ironic" in content - assert "rabbit://ironic:ironic@rabbitmq:5672/" in content + assert "mysql+pymysql://ironic:stackbox-secret@mariadb/ironic" in content + assert "rabbit://stackrabbit:stackbox-secret@rabbitmq:5672/" in content assert "host = 0.0.0.0" in content assert "port = 6385" in content @@ -87,8 +87,8 @@ def test_generate_sushy_conf_with_debug_true(self, tmp_path: Path) -> None: # Should contain TCP libvirt URI content = output_file.read_text() assert "qemu+tcp://libvirt:16509/system" in content - assert "SUSHY_EMULATOR_BACKEND = libvirt" in content - assert "SUSHY_EMULATOR_LISTEN_IP = 0.0.0.0" in content + assert 'SUSHY_EMULATOR_BACKEND = "libvirt"' in content + assert 'SUSHY_EMULATOR_LISTEN_IP = "0.0.0.0"' in content assert "SUSHY_EMULATOR_LISTEN_PORT = 8000" in content assert "SUSHY_EMULATOR_DEBUG = True" in content diff --git a/tests/unit/core/test_enrollment.py b/tests/unit/core/test_enrollment.py index 0ecf63d..de45a47 100644 --- a/tests/unit/core/test_enrollment.py +++ b/tests/unit/core/test_enrollment.py @@ -333,12 +333,16 @@ class TestEnrollNode: """Tests for enroll_node helper function.""" @patch("click.echo") + @patch("stackbox.core.enrollment.IronicClient.get_node") + @patch("stackbox.core.enrollment.IronicClient.set_provision_state") @patch("stackbox.core.enrollment.IronicClient.create_port") @patch("stackbox.core.enrollment.IronicClient.create_node") def test_enroll_node_full_flow( self, mock_create_node: MagicMock, mock_create_port: MagicMock, + mock_set_provision: MagicMock, + mock_get_node: MagicMock, mock_echo: MagicMock, ) -> None: """Test that enroll_node creates node and port.""" @@ -347,6 +351,8 @@ def test_enroll_node_full_flow( "name": "test-node", "driver": "redfish", } + # Mock provision state transition + mock_get_node.return_value = {"provision_state": "manageable"} node_info = { "name": "test-node", diff --git a/tests/unit/core/test_migrations.py b/tests/unit/core/test_migrations.py index 4ac24ee..e29d5e6 100644 --- a/tests/unit/core/test_migrations.py +++ b/tests/unit/core/test_migrations.py @@ -13,6 +13,16 @@ ) +@pytest.fixture(autouse=True) +def mock_container_runtime() -> None: + """Mock get_container_runtime for all tests in this module.""" + with patch( + "stackbox.core.migrations.get_container_runtime", + return_value=("docker", "docker-compose"), + ): + yield + + class TestWaitForDatabase: """Tests for wait_for_database function.""" @@ -37,14 +47,14 @@ def test_wait_succeeds_when_database_ready( wait_for_database(config_dir) - # Should have called docker run with oslo.db test + # Should have called docker run with mariadb client test assert mock_run.called call_args = mock_run.call_args[0][0] - assert "docker" in call_args assert "run" in call_args assert "--network" in call_args assert "stackbox-network" in call_args - assert "oslo_db" in " ".join(call_args) + assert "mariadb" in " ".join(call_args) + assert "SELECT 1" in " ".join(call_args) @patch("subprocess.run") @patch("time.time") diff --git a/tests/unit/core/test_validation.py b/tests/unit/core/test_validation.py index 75b383a..4cb00d2 100644 --- a/tests/unit/core/test_validation.py +++ b/tests/unit/core/test_validation.py @@ -13,6 +13,23 @@ ) +@pytest.fixture(autouse=True) +def mock_container_runtime() -> None: + """Mock get_container_runtime for all tests in this module.""" + # Patch both the source and the import location + with ( + patch( + "stackbox.core.container.get_container_runtime", + return_value=("docker", "docker-compose"), + ), + patch( + "stackbox.core.compose.get_container_runtime", + return_value=("docker", "docker-compose"), + ), + ): + yield + + class TestPortValidation: """Tests for port availability checking.""" diff --git a/tests/unit/core/test_vm.py b/tests/unit/core/test_vm.py index c9e1be7..f64095e 100644 --- a/tests/unit/core/test_vm.py +++ b/tests/unit/core/test_vm.py @@ -10,6 +10,13 @@ from stackbox.core.vm import LibvirtManager +@pytest.fixture(autouse=True) +def mock_container_runtime() -> None: + """Mock get_container_runtime for all tests in this module.""" + with patch("stackbox.core.vm.get_container_runtime", return_value=("docker", "docker-compose")): + yield + + class TestLibvirtManager: """Tests for LibvirtManager class.""" @@ -67,8 +74,14 @@ def test_create_vm_with_defaults( xml_path = mock_copy.call_args[0][0] assert xml_path.name == "test-node.xml" - # Verify virsh define was called - mock_virsh.assert_called_once_with(["define", "/tmp/test-node.xml"]) + # Verify virsh was called for vm_exists check and define + # First call: list --all --name (for vm_exists check) + # Second call: define + from unittest.mock import call + + assert mock_virsh.call_count == 2 + assert mock_virsh.call_args_list[0] == call(["list", "--all", "--name"]) + assert mock_virsh.call_args_list[1] == call(["define", "/tmp/test-node.xml"]) # Verify XML file was saved locally saved_xml = tmp_path / "vms" / "test-node.xml" @@ -458,17 +471,19 @@ class TestEnsureDefaultNetwork: def test_ensure_network_when_active( self, mock_echo: MagicMock, mock_virsh: MagicMock, tmp_path: Path ) -> None: - """Test that ensure_default_network does nothing if network is active.""" + """Test that ensure_default_network handles already active network.""" manager = LibvirtManager(tmp_path) + # Simulate network already active - net-start will raise RuntimeError mock_virsh.side_effect = [ MagicMock(returncode=0), # net-info for exists check MagicMock(stdout="Active: yes"), # net-info for active check + RuntimeError("error: network is already active"), # net-start fails ] manager.ensure_default_network() - # Should only check network status - assert mock_virsh.call_count == 2 + # Should check status and try to start (which fails but is caught) + assert mock_virsh.call_count == 3 @patch("stackbox.core.vm.LibvirtManager._virsh") @patch("click.echo")