From db9f777bbe6eefa5865a3e3e7ccd9c0cda34ca7e Mon Sep 17 00:00:00 2001 From: Juan Sugg Date: Fri, 1 May 2026 01:18:49 -0300 Subject: [PATCH 1/3] Introduce recovery orchestrator and fail-closed backup controls --- platform/docs/BACKUP_AND_RECOVERY.md | 12 ++ .../platform/docs/BACKUP_AND_RECOVERY.md | 12 ++ src/clawops/recovery/__init__.py | 19 +++ src/clawops/recovery/backends.py | 65 ++++++++++ src/clawops/recovery/checkpoint.py | 14 +++ src/clawops/recovery/models.py | 51 ++++++++ src/clawops/recovery/orchestrator.py | 82 +++++++++++++ src/clawops/recovery/planner.py | 58 +++++++++ src/clawops/recovery/policy.py | 41 +++++++ src/clawops/recovery/retention.py | 11 ++ src/clawops/recovery/telemetry.py | 10 ++ src/clawops/strongclaw_recovery.py | 111 ++++++++++++------ .../unit/clawops/recovery/test_policy.py | 56 +++++++++ .../unit/clawops/test_strongclaw_recovery.py | 86 ++++++++++++++ 14 files changed, 590 insertions(+), 38 deletions(-) create mode 100644 src/clawops/recovery/__init__.py create mode 100644 src/clawops/recovery/backends.py create mode 100644 src/clawops/recovery/checkpoint.py create mode 100644 src/clawops/recovery/models.py create mode 100644 src/clawops/recovery/orchestrator.py create mode 100644 src/clawops/recovery/planner.py create mode 100644 src/clawops/recovery/policy.py create mode 100644 src/clawops/recovery/retention.py create mode 100644 src/clawops/recovery/telemetry.py create mode 100644 tests/suites/unit/clawops/recovery/test_policy.py diff --git a/platform/docs/BACKUP_AND_RECOVERY.md b/platform/docs/BACKUP_AND_RECOVERY.md index 548eaec..6db8d6a 100644 --- a/platform/docs/BACKUP_AND_RECOVERY.md +++ b/platform/docs/BACKUP_AND_RECOVERY.md @@ -24,6 +24,16 @@ archive traversal. OpenClaw CLI path (`openclaw-cli`) or the local tar fallback path (`tar-fallback`) so automation can tell which recovery mode actually ran. +Backup creation accepts a policy profile and an explicit fallback gate: + +- `--profile` (default `control-plane`; supported: `control-plane`, `devflow-checkpoint`, `hypermemory-fast`, `full-data-plane`) +- `--dry-run` (render deterministic include/exclude/retention plan, do not write archives) +- `--allow-fallback` (permit tar fallback if OpenClaw backup create fails) + +Default mode is fail-closed for OpenClaw backup create failures: if OpenClaw is +available but `openclaw backup create` fails, the command fails unless +`--allow-fallback` is set. + ## Scheduled maintenance StrongClaw host service activation now installs independent daily jobs: @@ -47,6 +57,8 @@ launchd agents: Commands: - `clawops recovery --home-dir backup-create` +- `clawops recovery --home-dir backup-create --profile control-plane --dry-run` +- `clawops recovery --home-dir backup-create --allow-fallback` - `clawops recovery --home-dir backup-verify latest` - `clawops recovery --home-dir prune-retention` diff --git a/src/clawops/assets/platform/docs/BACKUP_AND_RECOVERY.md b/src/clawops/assets/platform/docs/BACKUP_AND_RECOVERY.md index 548eaec..6db8d6a 100644 --- a/src/clawops/assets/platform/docs/BACKUP_AND_RECOVERY.md +++ b/src/clawops/assets/platform/docs/BACKUP_AND_RECOVERY.md @@ -24,6 +24,16 @@ archive traversal. OpenClaw CLI path (`openclaw-cli`) or the local tar fallback path (`tar-fallback`) so automation can tell which recovery mode actually ran. +Backup creation accepts a policy profile and an explicit fallback gate: + +- `--profile` (default `control-plane`; supported: `control-plane`, `devflow-checkpoint`, `hypermemory-fast`, `full-data-plane`) +- `--dry-run` (render deterministic include/exclude/retention plan, do not write archives) +- `--allow-fallback` (permit tar fallback if OpenClaw backup create fails) + +Default mode is fail-closed for OpenClaw backup create failures: if OpenClaw is +available but `openclaw backup create` fails, the command fails unless +`--allow-fallback` is set. + ## Scheduled maintenance StrongClaw host service activation now installs independent daily jobs: @@ -47,6 +57,8 @@ launchd agents: Commands: - `clawops recovery --home-dir backup-create` +- `clawops recovery --home-dir backup-create --profile control-plane --dry-run` +- `clawops recovery --home-dir backup-create --allow-fallback` - `clawops recovery --home-dir backup-verify latest` - `clawops recovery --home-dir prune-retention` diff --git a/src/clawops/recovery/__init__.py b/src/clawops/recovery/__init__.py new file mode 100644 index 0000000..d481b68 --- /dev/null +++ b/src/clawops/recovery/__init__.py @@ -0,0 +1,19 @@ +"""Policy-driven recovery orchestration helpers.""" + +from clawops.recovery.models import BackupCreateExecution, BackupPlan, RecoveryProfile +from clawops.recovery.orchestrator import create_backup_execution +from clawops.recovery.policy import ( + DEFAULT_RECOVERY_PROFILE, + RECOVERY_PROFILES, + ensure_recovery_profile, +) + +__all__ = [ + "BackupCreateExecution", + "BackupPlan", + "DEFAULT_RECOVERY_PROFILE", + "RECOVERY_PROFILES", + "RecoveryProfile", + "create_backup_execution", + "ensure_recovery_profile", +] diff --git a/src/clawops/recovery/backends.py b/src/clawops/recovery/backends.py new file mode 100644 index 0000000..e3db7c1 --- /dev/null +++ b/src/clawops/recovery/backends.py @@ -0,0 +1,65 @@ +"""Recovery backup backend strategies.""" + +from __future__ import annotations + +import pathlib +from collections.abc import Callable +from typing import Protocol + +from clawops.strongclaw_runtime import ExecResult + +type WhichFunc = Callable[..., str | bytes | None] +type RunCommandFunc = Callable[..., ExecResult] +type TarWriter = Callable[ + [pathlib.Path], + None, +] + + +class BackupBackend(Protocol): + """Interface for backup backend execution.""" + + name: str + + def create(self, archive_tmp_path: pathlib.Path) -> tuple[bool, str | None]: + """Create one archive and return status + optional failure reason.""" + ... + + +class OpenClawBackupBackend: + """OpenClaw CLI backend for backup creation.""" + + name = "openclaw-cli" + + def __init__(self, *, which: WhichFunc, run_command: RunCommandFunc) -> None: + self._which = which + self._run_command = run_command + + def is_available(self) -> bool: + """Return whether the OpenClaw executable is available.""" + return self._which("openclaw", None) is not None + + def create(self, archive_tmp_path: pathlib.Path) -> tuple[bool, str | None]: + """Create one archive through OpenClaw.""" + result = self._run_command( + ["openclaw", "backup", "create", str(archive_tmp_path)], + timeout_seconds=600, + ) + if result.ok: + return True, None + detail = result.stderr.strip() or result.stdout.strip() or "openclaw backup create failed" + return False, detail + + +class TarBackupBackend: + """StrongClaw tar fallback backend.""" + + name = "tar-fallback" + + def __init__(self, *, writer: TarWriter) -> None: + self._writer = writer + + def create(self, archive_tmp_path: pathlib.Path) -> tuple[bool, str | None]: + """Create one archive through the fallback tar writer.""" + self._writer(archive_tmp_path) + return True, None diff --git a/src/clawops/recovery/checkpoint.py b/src/clawops/recovery/checkpoint.py new file mode 100644 index 0000000..d826327 --- /dev/null +++ b/src/clawops/recovery/checkpoint.py @@ -0,0 +1,14 @@ +"""Checkpoint metadata contracts for recovery surfaces.""" + +from __future__ import annotations + +import dataclasses + + +@dataclasses.dataclass(frozen=True, slots=True) +class CheckpointRecord: + """Minimal checkpoint metadata record.""" + + checkpoint_id: str + scope: str + created_at_ms: int diff --git a/src/clawops/recovery/models.py b/src/clawops/recovery/models.py new file mode 100644 index 0000000..2389476 --- /dev/null +++ b/src/clawops/recovery/models.py @@ -0,0 +1,51 @@ +"""Data models for the recovery subsystem.""" + +from __future__ import annotations + +import dataclasses +import pathlib +from typing import Literal + +type RecoveryProfile = Literal[ + "control-plane", + "devflow-checkpoint", + "hypermemory-fast", + "full-data-plane", +] + + +@dataclasses.dataclass(frozen=True, slots=True) +class BackupPlan: + """Deterministic backup plan payload.""" + + profile: RecoveryProfile + include_roots: tuple[pathlib.Path, ...] + exclude_roots: tuple[pathlib.Path, ...] + backend_candidates: tuple[str, ...] + estimated_bytes: int + estimated_file_count: int + retention: dict[str, object] + + def to_payload(self) -> dict[str, object]: + """Render a JSON-safe payload.""" + return { + "profile": self.profile, + "include_roots": [path.as_posix() for path in self.include_roots], + "exclude_roots": [path.as_posix() for path in self.exclude_roots], + "backend_candidates": list(self.backend_candidates), + "estimated_bytes": self.estimated_bytes, + "estimated_file_count": self.estimated_file_count, + "retention": dict(self.retention), + } + + +@dataclasses.dataclass(frozen=True, slots=True) +class BackupCreateExecution: + """Result of backup orchestration.""" + + plan: BackupPlan + dry_run: bool + archive_path: pathlib.Path | None = None + mode: str | None = None + fallback_used: bool = False + fallback_reason: str | None = None diff --git a/src/clawops/recovery/orchestrator.py b/src/clawops/recovery/orchestrator.py new file mode 100644 index 0000000..6b6fb97 --- /dev/null +++ b/src/clawops/recovery/orchestrator.py @@ -0,0 +1,82 @@ +"""Recovery create/plan orchestration.""" + +from __future__ import annotations + +import pathlib +import tarfile +import time +from collections.abc import Callable + +from clawops.recovery.backends import OpenClawBackupBackend, TarBackupBackend, WhichFunc +from clawops.recovery.models import BackupCreateExecution, RecoveryProfile +from clawops.recovery.planner import build_backup_plan +from clawops.strongclaw_runtime import CommandError, ExecResult + +type RunCommandFunc = Callable[..., ExecResult] +type TarWriter = Callable[[pathlib.Path], None] +type SafeUnlink = Callable[[pathlib.Path], None] + + +def create_backup_execution( + *, + home_dir: pathlib.Path, + openclaw_state_root: pathlib.Path, + backup_root: pathlib.Path, + legacy_backup_root: pathlib.Path, + profile: RecoveryProfile, + allow_fallback: bool, + dry_run: bool, + tar_writer: TarWriter, + safe_unlink: SafeUnlink, + which: WhichFunc, + run_command: RunCommandFunc, +) -> BackupCreateExecution: + """Plan or create one recovery backup archive.""" + plan = build_backup_plan( + profile=profile, + include_root=openclaw_state_root, + backup_root=backup_root, + legacy_backup_root=legacy_backup_root, + ) + if dry_run: + return BackupCreateExecution(plan=plan, dry_run=True) + + backup_root.mkdir(parents=True, exist_ok=True) + stamp = time.strftime("%Y%m%d-%H%M%S", time.localtime()) + archive_path = backup_root / f"openclaw-{stamp}.tar.gz" + archive_tmp_path = backup_root / f".{archive_path.name}.tmp" + openclaw_backend = OpenClawBackupBackend(which=which, run_command=run_command) + fallback_reason: str | None = None + if openclaw_backend.is_available(): + safe_unlink(archive_tmp_path) + backend_ok, backend_error = openclaw_backend.create(archive_tmp_path) + if backend_ok: + archive_tmp_path.replace(archive_path) + return BackupCreateExecution( + plan=plan, + dry_run=False, + archive_path=archive_path, + mode="openclaw-cli", + ) + safe_unlink(archive_tmp_path) + if not allow_fallback: + raise CommandError(backend_error or "openclaw backup create failed") + fallback_reason = backend_error + + safe_unlink(archive_tmp_path) + fallback_backend = TarBackupBackend(writer=tar_writer) + try: + fallback_backend.create(archive_tmp_path) + archive_tmp_path.replace(archive_path) + except (OSError, tarfile.TarError) as exc: + safe_unlink(archive_tmp_path) + safe_unlink(archive_path) + raise CommandError(f"backup creation failed: {exc}") from exc + return BackupCreateExecution( + plan=plan, + dry_run=False, + archive_path=archive_path, + mode="tar-fallback", + fallback_used=fallback_reason is not None, + fallback_reason=fallback_reason, + ) diff --git a/src/clawops/recovery/planner.py b/src/clawops/recovery/planner.py new file mode 100644 index 0000000..141bd57 --- /dev/null +++ b/src/clawops/recovery/planner.py @@ -0,0 +1,58 @@ +"""Deterministic recovery backup planning.""" + +from __future__ import annotations + +import pathlib + +from clawops.recovery.models import BackupPlan, RecoveryProfile +from clawops.recovery.policy import retention_for_profile + + +def _is_path_within(path: pathlib.Path, root: pathlib.Path) -> bool: + """Return whether *path* is contained by *root*.""" + try: + path.relative_to(root) + except ValueError: + return False + return True + + +def _estimate_plan( + include_root: pathlib.Path, exclude_roots: tuple[pathlib.Path, ...] +) -> tuple[int, int]: + """Estimate bytes and file count for the plan.""" + file_count = 0 + total_bytes = 0 + for path in sorted(include_root.rglob("*")): + if path.is_symlink() or not path.is_file(): + continue + resolved = path.resolve() + if any(_is_path_within(resolved, root) for root in exclude_roots): + continue + file_count += 1 + total_bytes += path.stat().st_size + return file_count, total_bytes + + +def build_backup_plan( + *, + profile: RecoveryProfile, + include_root: pathlib.Path, + backup_root: pathlib.Path, + legacy_backup_root: pathlib.Path, +) -> BackupPlan: + """Build one deterministic backup plan.""" + include_root_resolved = include_root.resolve() + backup_root_resolved = backup_root.resolve() + legacy_backup_root_resolved = legacy_backup_root.resolve() + exclude_roots = (backup_root_resolved, legacy_backup_root_resolved) + estimated_file_count, estimated_bytes = _estimate_plan(include_root_resolved, exclude_roots) + return BackupPlan( + profile=profile, + include_roots=(include_root_resolved,), + exclude_roots=exclude_roots, + backend_candidates=("openclaw-cli", "tar-fallback"), + estimated_bytes=estimated_bytes, + estimated_file_count=estimated_file_count, + retention=retention_for_profile(profile), + ) diff --git a/src/clawops/recovery/policy.py b/src/clawops/recovery/policy.py new file mode 100644 index 0000000..40aee2b --- /dev/null +++ b/src/clawops/recovery/policy.py @@ -0,0 +1,41 @@ +"""Recovery policy defaults and profile validation.""" + +from __future__ import annotations + +from typing import TypeGuard + +from clawops.recovery.models import RecoveryProfile + +RECOVERY_PROFILES: tuple[RecoveryProfile, ...] = ( + "control-plane", + "devflow-checkpoint", + "hypermemory-fast", + "full-data-plane", +) +DEFAULT_RECOVERY_PROFILE: RecoveryProfile = "control-plane" + + +def ensure_recovery_profile(raw_profile: str) -> RecoveryProfile: + """Validate and normalize one recovery profile.""" + if not _is_recovery_profile(raw_profile): + choices = ", ".join(RECOVERY_PROFILES) + raise ValueError( + f"unsupported recovery profile {raw_profile!r}; expected one of: {choices}" + ) + return raw_profile + + +def _is_recovery_profile(raw_profile: str) -> TypeGuard[RecoveryProfile]: + """Return whether *raw_profile* is one of the declared recovery profiles.""" + return raw_profile in RECOVERY_PROFILES + + +def retention_for_profile(profile: RecoveryProfile) -> dict[str, object]: + """Return profile-specific retention policy metadata.""" + if profile == "control-plane": + return {"daily": 7, "weekly": 2} + if profile == "devflow-checkpoint": + return {"active_run_latest": 3, "completed_runs": 5, "completed_retention_days": 7} + if profile == "hypermemory-fast": + return {"checkpoints": 5} + return {"weekly": 2} diff --git a/src/clawops/recovery/retention.py b/src/clawops/recovery/retention.py new file mode 100644 index 0000000..aa37c11 --- /dev/null +++ b/src/clawops/recovery/retention.py @@ -0,0 +1,11 @@ +"""Retention helpers for recovery profiles.""" + +from __future__ import annotations + +from clawops.recovery.models import RecoveryProfile +from clawops.recovery.policy import retention_for_profile + + +def retention_policy_payload(profile: RecoveryProfile) -> dict[str, object]: + """Return the retention payload for operator-facing plan output.""" + return retention_for_profile(profile) diff --git a/src/clawops/recovery/telemetry.py b/src/clawops/recovery/telemetry.py new file mode 100644 index 0000000..4ca5555 --- /dev/null +++ b/src/clawops/recovery/telemetry.py @@ -0,0 +1,10 @@ +"""Recovery telemetry event helpers.""" + +from __future__ import annotations + +from collections.abc import Mapping + + +def event_payload(event: str, fields: Mapping[str, object]) -> dict[str, object]: + """Build one structured recovery telemetry payload.""" + return {"event": event, **dict(fields)} diff --git a/src/clawops/strongclaw_recovery.py b/src/clawops/strongclaw_recovery.py index e6f5400..b800c7e 100644 --- a/src/clawops/strongclaw_recovery.py +++ b/src/clawops/strongclaw_recovery.py @@ -3,7 +3,6 @@ from __future__ import annotations import argparse -import dataclasses import json import pathlib import shutil @@ -12,6 +11,13 @@ from clawops.app_paths import strongclaw_state_dir from clawops.cli_roots import add_ignored_repo_root_alias, warn_ignored_repo_root_argument +from clawops.recovery.models import BackupCreateExecution +from clawops.recovery.orchestrator import create_backup_execution +from clawops.recovery.policy import ( + DEFAULT_RECOVERY_PROFILE, + RECOVERY_PROFILES, + ensure_recovery_profile, +) from clawops.strongclaw_runtime import ( CommandError, resolve_home_dir, @@ -21,14 +27,6 @@ _OPENCLAW_VERIFY_MANIFEST_MISMATCH = "Expected exactly one backup manifest entry" -@dataclasses.dataclass(frozen=True, slots=True) -class BackupCreateResult: - """Archive path plus the mechanism that created it.""" - - archive_path: pathlib.Path - mode: str - - def backups_dir(*, home_dir: pathlib.Path | None = None) -> pathlib.Path: """Return the StrongClaw-managed backup archive directory.""" resolved_home = resolve_home_dir(home_dir) @@ -90,47 +88,58 @@ def _write_tar_archive( archive.add(path, arcname=path.relative_to(include_root), recursive=False) -def _create_backup_result(*, home_dir: pathlib.Path | None = None) -> BackupCreateResult: +def _create_backup_result( + *, + home_dir: pathlib.Path | None = None, + profile: str = DEFAULT_RECOVERY_PROFILE, + allow_fallback: bool = False, + dry_run: bool = False, +) -> BackupCreateExecution: """Create one backup archive and record which backup path was used.""" resolved_home_dir = resolve_home_dir(home_dir) archive_root = backups_dir(home_dir=home_dir) - archive_root.mkdir(parents=True, exist_ok=True) - stamp = time.strftime("%Y%m%d-%H%M%S", time.localtime()) - archive_path = archive_root / f"openclaw-{stamp}.tar.gz" - archive_tmp_path = archive_root / f".{archive_path.name}.tmp" + selected_profile = ensure_recovery_profile(profile) exclude_roots: tuple[pathlib.Path, ...] = ( archive_root.resolve(), legacy_backups_dir(home_dir=home_dir).resolve(), ) - if shutil.which("openclaw") is not None: - _safe_unlink(archive_tmp_path) - result = run_command( - ["openclaw", "backup", "create", str(archive_tmp_path)], timeout_seconds=600 - ) - if result.ok: - archive_tmp_path.replace(archive_path) - return BackupCreateResult(archive_path=archive_path, mode="openclaw-cli") - _safe_unlink(archive_tmp_path) state_dir = openclaw_state_dir(home_dir=home_dir) - _safe_unlink(archive_tmp_path) - try: - _write_tar_archive( + return create_backup_execution( + home_dir=resolved_home_dir, + openclaw_state_root=state_dir, + backup_root=archive_root, + legacy_backup_root=legacy_backups_dir(home_dir=home_dir), + profile=selected_profile, + allow_fallback=allow_fallback, + dry_run=dry_run, + tar_writer=lambda archive_tmp_path: _write_tar_archive( archive_tmp_path, state_dir=state_dir, include_root=resolved_home_dir, exclude_roots=exclude_roots, - ) - archive_tmp_path.replace(archive_path) - except (OSError, tarfile.TarError) as exc: - _safe_unlink(archive_tmp_path) - _safe_unlink(archive_path) - raise CommandError(f"backup creation failed: {exc}") from exc - return BackupCreateResult(archive_path=archive_path, mode="tar-fallback") + ), + safe_unlink=_safe_unlink, + which=shutil.which, + run_command=run_command, + ) -def create_backup(*, home_dir: pathlib.Path | None = None) -> pathlib.Path: +def create_backup( + *, + home_dir: pathlib.Path | None = None, + profile: str = DEFAULT_RECOVERY_PROFILE, + allow_fallback: bool = False, +) -> pathlib.Path: """Create one backup archive, preferring the OpenClaw CLI when available.""" - return _create_backup_result(home_dir=home_dir).archive_path + result = _create_backup_result( + home_dir=home_dir, + profile=profile, + allow_fallback=allow_fallback, + dry_run=False, + ) + if result.archive_path is None: + raise CommandError("backup creation produced no archive path") + return result.archive_path def verify_backup( @@ -260,7 +269,18 @@ def parse_args(argv: list[str] | None = None) -> argparse.Namespace: add_ignored_repo_root_alias(parser) parser.add_argument("--home-dir", type=pathlib.Path, default=pathlib.Path.home()) subparsers = parser.add_subparsers(dest="command", required=True) - subparsers.add_parser("backup-create") + backup_create_parser = subparsers.add_parser("backup-create") + backup_create_parser.add_argument("--profile", choices=RECOVERY_PROFILES, default=None) + backup_create_parser.add_argument( + "--dry-run", + action="store_true", + help="Print the deterministic backup plan without writing archives.", + ) + backup_create_parser.add_argument( + "--allow-fallback", + action="store_true", + help="Allow fallback tar creation when OpenClaw CLI backup creation fails.", + ) verify_parser = subparsers.add_parser("backup-verify") verify_parser.add_argument("target", nargs="?", default="latest") restore_parser = subparsers.add_parser("restore") @@ -286,8 +306,23 @@ def main(argv: list[str] | None = None) -> int: ) home_dir = resolve_home_dir(args.home_dir) if args.command == "backup-create": - result = _create_backup_result(home_dir=home_dir) - payload = {"ok": True, "archive": str(result.archive_path), "mode": result.mode} + execution = _create_backup_result( + home_dir=home_dir, + profile=args.profile or DEFAULT_RECOVERY_PROFILE, + allow_fallback=bool(args.allow_fallback), + dry_run=bool(args.dry_run), + ) + payload: dict[str, object] = { + "ok": True, + **execution.plan.to_payload(), + "dry_run": execution.dry_run, + } + if execution.archive_path is not None and execution.mode is not None: + payload["archive"] = str(execution.archive_path) + payload["mode"] = execution.mode + payload["fallback_used"] = execution.fallback_used + if execution.fallback_reason is not None: + payload["fallback_reason"] = execution.fallback_reason elif args.command == "backup-verify": payload = {"ok": True, "archive": str(verify_backup(args.target, home_dir=home_dir))} elif args.command == "restore": diff --git a/tests/suites/unit/clawops/recovery/test_policy.py b/tests/suites/unit/clawops/recovery/test_policy.py new file mode 100644 index 0000000..9d436b2 --- /dev/null +++ b/tests/suites/unit/clawops/recovery/test_policy.py @@ -0,0 +1,56 @@ +"""Unit coverage for recovery policy and planning helpers.""" + +from __future__ import annotations + +from pathlib import Path + +import pytest + +from clawops.recovery.planner import build_backup_plan +from clawops.recovery.policy import ( + DEFAULT_RECOVERY_PROFILE, + RECOVERY_PROFILES, + ensure_recovery_profile, + retention_for_profile, +) + + +def test_recovery_profile_defaults_to_control_plane() -> None: + """Recovery should default to the control-plane profile.""" + assert DEFAULT_RECOVERY_PROFILE == "control-plane" + + +def test_recovery_profile_accepts_known_values() -> None: + """Recovery profile validation should accept all declared profiles.""" + for profile in RECOVERY_PROFILES: + assert ensure_recovery_profile(profile) == profile + + +def test_recovery_profile_rejects_unknown_value() -> None: + """Recovery profile validation should fail for unsupported values.""" + with pytest.raises(ValueError, match="unsupported recovery profile"): + ensure_recovery_profile("unknown-profile") + + +def test_backup_planner_tracks_include_exclude_roots(tmp_path: Path) -> None: + """Backup planner should include OpenClaw state and exclude backup roots.""" + home_dir = tmp_path / "home" + openclaw_state = home_dir / ".openclaw" + backup_root = home_dir / ".local" / "state" / "strongclaw" / "backups" + legacy_backup_root = openclaw_state / "backups" + (openclaw_state / "logs").mkdir(parents=True, exist_ok=True) + (openclaw_state / "logs" / "gateway.log").write_text("ready\n", encoding="utf-8") + (legacy_backup_root / "old.tar.gz").parent.mkdir(parents=True, exist_ok=True) + (legacy_backup_root / "old.tar.gz").write_text("old", encoding="utf-8") + plan = build_backup_plan( + profile="control-plane", + include_root=openclaw_state, + backup_root=backup_root, + legacy_backup_root=legacy_backup_root, + ) + + assert plan.include_roots == (openclaw_state.resolve(),) + assert plan.exclude_roots == (backup_root.resolve(), legacy_backup_root.resolve()) + assert plan.backend_candidates == ("openclaw-cli", "tar-fallback") + assert plan.estimated_file_count == 1 + assert plan.retention == retention_for_profile("control-plane") diff --git a/tests/suites/unit/clawops/test_strongclaw_recovery.py b/tests/suites/unit/clawops/test_strongclaw_recovery.py index 21c97e3..cb24a29 100644 --- a/tests/suites/unit/clawops/test_strongclaw_recovery.py +++ b/tests/suites/unit/clawops/test_strongclaw_recovery.py @@ -80,6 +80,42 @@ def test_backup_create_cli_reports_tar_fallback_and_round_trips( ) == "ready\n" +def test_backup_create_cli_dry_run_outputs_plan_without_writing_archive( + tmp_path: Path, + capsys: pytest.CaptureFixture[str], + test_context: TestContext, +) -> None: + """Dry-run backup creation should emit the manifest without writing archives.""" + home_dir = tmp_path / "home" + _init_openclaw_home(home_dir) + test_context.patch.patch_object(strongclaw_recovery.shutil, "which", new=_missing_tool) + + exit_code = strongclaw_recovery.main( + [ + "--home-dir", + str(home_dir), + "backup-create", + "--profile", + "control-plane", + "--dry-run", + ] + ) + payload = json.loads(capsys.readouterr().out) + + assert exit_code == 0 + assert payload["ok"] is True + assert payload["dry_run"] is True + assert payload["profile"] == "control-plane" + assert payload["backend_candidates"] == ["openclaw-cli", "tar-fallback"] + assert payload["include_roots"] == [str(home_dir / ".openclaw")] + assert payload["exclude_roots"] == [ + str(strongclaw_recovery.backups_dir(home_dir=home_dir)), + str(strongclaw_recovery.legacy_backups_dir(home_dir=home_dir)), + ] + backup_root = strongclaw_recovery.backups_dir(home_dir=home_dir) + assert not backup_root.exists() + + def test_backup_root_defaults_to_strongclaw_state_dir(tmp_path: Path) -> None: """Backups should default to the StrongClaw-owned state tree.""" home_dir = tmp_path / "home" @@ -137,6 +173,56 @@ def _failing_write_tar_archive(*args: object, **kwargs: object) -> None: assert remaining_files == [] +def test_backup_create_fails_closed_without_allow_fallback_when_openclaw_create_fails( + tmp_path: Path, + test_context: TestContext, +) -> None: + """Create should fail closed when the OpenClaw backend fails and fallback is not allowed.""" + home_dir = tmp_path / "home" + _init_openclaw_home(home_dir) + + def _run_command(_command: list[str], **_kwargs: object) -> ExecResult: + return ExecResult( + argv=("openclaw", "backup", "create", "target"), + returncode=1, + stdout="", + stderr="openclaw backend failed", + duration_ms=1, + ) + + test_context.patch.patch_object(strongclaw_recovery.shutil, "which", new=_openclaw_only) + test_context.patch.patch_object(strongclaw_recovery, "run_command", new=_run_command) + + with pytest.raises(CommandError, match="openclaw backend failed"): + strongclaw_recovery.create_backup(home_dir=home_dir, allow_fallback=False) + + +def test_backup_create_allows_explicit_fallback_when_openclaw_create_fails( + tmp_path: Path, + test_context: TestContext, +) -> None: + """Create should use tar fallback only when the operator opts into fallback mode.""" + home_dir = tmp_path / "home" + _init_openclaw_home(home_dir) + + def _run_command(_command: list[str], **_kwargs: object) -> ExecResult: + return ExecResult( + argv=("openclaw", "backup", "create", "target"), + returncode=1, + stdout="", + stderr="openclaw backend failed", + duration_ms=1, + ) + + test_context.patch.patch_object(strongclaw_recovery.shutil, "which", new=_openclaw_only) + test_context.patch.patch_object(strongclaw_recovery, "run_command", new=_run_command) + + archive_path = strongclaw_recovery.create_backup(home_dir=home_dir, allow_fallback=True) + assert archive_path.is_file() + test_context.patch.patch_object(strongclaw_recovery.shutil, "which", new=_missing_tool) + assert strongclaw_recovery.verify_backup(archive_path, home_dir=home_dir) == archive_path + + def test_restore_backup_rejects_traversal_members( tmp_path: Path, test_context: TestContext, From 81d56cff477c9427a9893d79ca13830474620332 Mon Sep 17 00:00:00 2001 From: Juan Sugg Date: Fri, 1 May 2026 01:39:38 -0300 Subject: [PATCH 2/3] Fix recovery OpenClaw backend availability probe --- src/clawops/recovery/backends.py | 2 +- .../unit/clawops/recovery/test_backends.py | 27 +++++++++++++++++++ 2 files changed, 28 insertions(+), 1 deletion(-) create mode 100644 tests/suites/unit/clawops/recovery/test_backends.py diff --git a/src/clawops/recovery/backends.py b/src/clawops/recovery/backends.py index e3db7c1..3dc181d 100644 --- a/src/clawops/recovery/backends.py +++ b/src/clawops/recovery/backends.py @@ -37,7 +37,7 @@ def __init__(self, *, which: WhichFunc, run_command: RunCommandFunc) -> None: def is_available(self) -> bool: """Return whether the OpenClaw executable is available.""" - return self._which("openclaw", None) is not None + return self._which("openclaw") is not None def create(self, archive_tmp_path: pathlib.Path) -> tuple[bool, str | None]: """Create one archive through OpenClaw.""" diff --git a/tests/suites/unit/clawops/recovery/test_backends.py b/tests/suites/unit/clawops/recovery/test_backends.py new file mode 100644 index 0000000..bcd5b32 --- /dev/null +++ b/tests/suites/unit/clawops/recovery/test_backends.py @@ -0,0 +1,27 @@ +"""Unit coverage for recovery backend strategy helpers.""" + +from __future__ import annotations + +from clawops.recovery.backends import OpenClawBackupBackend +from clawops.strongclaw_runtime import ExecResult + + +def test_openclaw_backend_availability_uses_standard_which_signature() -> None: + """Availability checks should call which() with only the command string.""" + + def fake_which(command: str) -> str | None: + if command == "openclaw": + return "/usr/local/bin/openclaw" + return None + + def fake_run_command(*_args: object, **_kwargs: object) -> ExecResult: + return ExecResult( + argv=("openclaw", "backup", "create"), + returncode=0, + stdout="", + stderr="", + duration_ms=1, + ) + + backend = OpenClawBackupBackend(which=fake_which, run_command=fake_run_command) + assert backend.is_available() is True From 49a76489246af500570b1bbc8a753e0cf97c10e1 Mon Sep 17 00:00:00 2001 From: Juan Sugg Date: Fri, 1 May 2026 01:52:11 -0300 Subject: [PATCH 3/3] Use explicit fallback in recovery smoke workflow --- tests/suites/unit/ci/test_security_workflow.py | 5 ++++- tests/utils/helpers/_ci_workflows/security.py | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/tests/suites/unit/ci/test_security_workflow.py b/tests/suites/unit/ci/test_security_workflow.py index 227f687..22e03b0 100644 --- a/tests/suites/unit/ci/test_security_workflow.py +++ b/tests/suites/unit/ci/test_security_workflow.py @@ -295,6 +295,7 @@ def test_run_recovery_smoke_executes_cli_and_fallback_modes_when_openclaw_availa ) -> None: """Recovery smoke should execute backup, verify, and restore in both modes.""" seen_openclaw_resolution: list[str | None] = [] + seen_allow_fallback_flags: list[bool] = [] archive_path = tmp_path / "archive.tar.gz" def fake_which(command: str, *_args: object, **_kwargs: object) -> str | None: @@ -302,8 +303,9 @@ def fake_which(command: str, *_args: object, **_kwargs: object) -> str | None: return "/usr/local/bin/openclaw" return None - def fake_create_backup(*, home_dir: Path) -> Path: + def fake_create_backup(*, home_dir: Path, allow_fallback: bool = False) -> Path: del home_dir + seen_allow_fallback_flags.append(allow_fallback) seen_openclaw_resolution.append(security_helpers.recovery_helpers.shutil.which("openclaw")) archive_path.write_text("archive", encoding="utf-8") return archive_path @@ -340,6 +342,7 @@ def fake_restore_backup(target: Path, *, destination: Path, home_dir: Path) -> P None, None, ] + assert seen_allow_fallback_flags == [True, True] def test_run_recovery_smoke_requires_openclaw_cli_when_requested( diff --git a/tests/utils/helpers/_ci_workflows/security.py b/tests/utils/helpers/_ci_workflows/security.py index efe834c..6c902a9 100644 --- a/tests/utils/helpers/_ci_workflows/security.py +++ b/tests/utils/helpers/_ci_workflows/security.py @@ -760,7 +760,7 @@ def _run_recovery_cycle( _force_tar_fallback_for_recovery() if force_tar_fallback else contextlib.nullcontext() ) with recovery_context: - archive_path = create_backup(home_dir=home_dir) + archive_path = create_backup(home_dir=home_dir, allow_fallback=True) verified_archive = verify_backup(archive_path, home_dir=home_dir) restore_destination = resolved_tmp_root / f"recovery-restore-{mode_label}" restore_backup(verified_archive, destination=restore_destination, home_dir=home_dir)