diff --git a/backend/handler/filesystem/base_handler.py b/backend/handler/filesystem/base_handler.py index 9ab06a4053..776f3840a3 100644 --- a/backend/handler/filesystem/base_handler.py +++ b/backend/handler/filesystem/base_handler.py @@ -16,8 +16,9 @@ from starlette.datastructures import UploadFile from config.config_manager import config_manager as cm +from logger.logger import log from models.base import FILE_NAME_MAX_LENGTH -from utils.filesystem import iter_directories, iter_files +from utils.filesystem import iter_directories, iter_files, link_or_copy_file TAG_REGEX = re.compile(r"\(([^)]+)\)|\[([^]]+)\]") EXTENSION_REGEX = re.compile(r"\.(([a-z]+\.)*\w+)$") @@ -148,13 +149,22 @@ class Asset(Enum): class FSHandler: - def __init__(self, base_path: str): + def __init__(self, base_path: str, tolerate_missing_base: bool = False): self.base_path = Path(base_path).resolve() self._locks: dict[str, asyncio.Lock] = {} self._lock_mutex = asyncio.Lock() - # Create base directory synchronously during initialization - self.base_path.mkdir(parents=True, exist_ok=True) + # Create base directory synchronously during initialization. + try: + self.base_path.mkdir(parents=True, exist_ok=True) + except OSError: + if not tolerate_missing_base: + raise + + log.warning( + f"Could not create or access {self.base_path}; " + "feature will be unavailable until the directory is writable." + ) async def _get_file_lock(self, file_path: str) -> asyncio.Lock: """Get or create a lock for a specific file path.""" @@ -487,13 +497,21 @@ async def stream_file(self, file_path: str): return await open_file(full_path, "rb") - async def copy_file(self, source_full_path: Path, dest_path: str) -> None: + async def copy_file( + self, + source_full_path: Path, + dest_path: str, + allow_link: bool = False, + ) -> None: """ Copy a file from source to destination. Args: source_full_path: Absolute path to the source file dest_path: Relative path to the destination file + allow_link: Try a hardlink first and fall back to + a copy when the link isn't possible (cross-device, + unsupported filesystem, etc.) Raises: FileNotFoundError: If source file does not exist @@ -518,7 +536,10 @@ async def copy_file(self, source_full_path: Path, dest_path: str) -> None: # Create destination directory if needed dest_parent_anyio_path = AnyioPath(str(dest_full_path.parent)) await dest_parent_anyio_path.mkdir(parents=True, exist_ok=True) - shutil.copy2(str(source_full_path), str(dest_full_path)) + if allow_link: + link_or_copy_file(source_full_path, dest_full_path) + else: + shutil.copy2(str(source_full_path), str(dest_full_path)) async def move_file_or_folder(self, source_path: str, dest_path: str) -> None: """ diff --git a/backend/handler/filesystem/resources_handler.py b/backend/handler/filesystem/resources_handler.py index f3e1f5c794..457c8730f1 100644 --- a/backend/handler/filesystem/resources_handler.py +++ b/backend/handler/filesystem/resources_handler.py @@ -129,7 +129,9 @@ async def _store_cover( log.warning(f"Cover file not found: {url_cover}") return None dest_path = f"{cover_file}/{size.value}.png" - await self.copy_file(resolved, dest_path) + # Small-size covers get resized in place, which would mutate + # the user's source image if the destination were a hardlink. + await self.copy_file(resolved, dest_path, allow_link=False) if ENABLE_SCHEDULED_CONVERT_IMAGES_TO_WEBP: self.image_converter.convert_to_webp( @@ -302,7 +304,9 @@ async def _store_screenshot(self, rom: Rom, url_screenhot: str, idx: int): if resolved is None or not await AnyioPath(resolved).exists(): log.warning(f"Screenshot file not found: {url_screenhot}") return None - await self.copy_file(resolved, f"{screenshot_path}/{idx}.jpg") + await self.copy_file( + resolved, f"{screenshot_path}/{idx}.jpg", allow_link=True + ) except Exception as exc: log.error(f"Unable to copy screenshot file {url_screenhot}: {str(exc)}") return None @@ -416,7 +420,9 @@ async def _store_manual(self, rom: Rom, url_manual: str): if resolved is None or not await AnyioPath(resolved).exists(): log.warning(f"Manual file not found: {url_manual}") return None - await self.copy_file(resolved, f"{manual_path}/{rom.id}.pdf") + await self.copy_file( + resolved, f"{manual_path}/{rom.id}.pdf", allow_link=True + ) except Exception as exc: log.error(f"Unable to copy manual file {url_manual}: {str(exc)}") return None @@ -555,7 +561,7 @@ async def store_media_file(self, url_media: str, dest_path: str) -> None: try: resolved = _resolve_local_file_uri(url_media) if resolved is not None and await AnyioPath(resolved).exists(): - await self.copy_file(resolved, dest_path) + await self.copy_file(resolved, dest_path, allow_link=True) except Exception as exc: log.error(f"Unable to copy media file {url_media}: {str(exc)}") return None diff --git a/backend/handler/filesystem/sync_handler.py b/backend/handler/filesystem/sync_handler.py index ba09ebcb20..b67a2e4ebb 100644 --- a/backend/handler/filesystem/sync_handler.py +++ b/backend/handler/filesystem/sync_handler.py @@ -12,7 +12,7 @@ class FSSyncHandler(FSHandler): """Filesystem handler for sync folder operations (File Transfer mode).""" def __init__(self) -> None: - super().__init__(base_path=SYNC_BASE_PATH) + super().__init__(base_path=SYNC_BASE_PATH, tolerate_missing_base=True) def build_incoming_path( self, device_id: str, platform_slug: str | None = None diff --git a/backend/tasks/manual/cleanup_orphaned_resources.py b/backend/tasks/manual/cleanup_orphaned_resources.py index 0ed7e660b2..ebefd22e04 100644 --- a/backend/tasks/manual/cleanup_orphaned_resources.py +++ b/backend/tasks/manual/cleanup_orphaned_resources.py @@ -87,7 +87,7 @@ async def run(self) -> dict[str, int]: platform_dirs: set[int] = { int(entry.name) async for entry in roms_resources_dir.iterdir() - if await entry.is_dir() + if entry.name.isdigit() and await entry.is_dir() } rom_dirs_by_platform: dict[int, set[int]] = {} @@ -97,7 +97,7 @@ async def run(self) -> dict[str, int]: rom_dirs_by_platform[platform_dir] = { int(entry.name) async for entry in platform_dir_path.iterdir() - if await entry.is_dir() + if entry.name.isdigit() and await entry.is_dir() } cleanup_stats.update( diff --git a/backend/tests/handler/filesystem/test_base_handler.py b/backend/tests/handler/filesystem/test_base_handler.py index 68f585acad..f8c10e243b 100644 --- a/backend/tests/handler/filesystem/test_base_handler.py +++ b/backend/tests/handler/filesystem/test_base_handler.py @@ -1,4 +1,5 @@ import asyncio +import errno import shutil import tempfile from io import BytesIO @@ -515,3 +516,119 @@ async def create_and_remove_dir(dir_name): dirs = await handler.list_directories(".") for i in range(5): assert f"test_dir_{i}" not in dirs + + async def test_copy_file_does_not_hardlink_by_default( + self, handler: FSHandler, sample_file_content + ): + """copy_file defaults to allow_link=False: the destination is a real + copy, so mutating it won't affect the source. Callers must opt in to + hardlinking explicitly.""" + source_full = handler.base_path / "source.bin" + source_full.write_bytes(sample_file_content) + + await handler.copy_file(source_full, "dest.bin") + + dest_full = handler.base_path / "dest.bin" + assert dest_full.read_bytes() == sample_file_content + assert source_full.stat().st_ino != dest_full.stat().st_ino + + async def test_copy_file_allow_link_true_hardlinks( + self, handler: FSHandler, sample_file_content + ): + """allow_link=True: when source and dest are on the same filesystem, + the destination is a hardlink to the source (same inode).""" + source_full = handler.base_path / "source.bin" + source_full.write_bytes(sample_file_content) + + await handler.copy_file(source_full, "dest.bin", allow_link=True) + + dest_full = handler.base_path / "dest.bin" + assert dest_full.read_bytes() == sample_file_content + assert source_full.stat().st_ino == dest_full.stat().st_ino + + async def test_copy_file_allow_link_false_does_real_copy( + self, handler: FSHandler, sample_file_content + ): + """allow_link=False (explicit) produces a real copy — content matches + but inodes differ. This matches the default but is exercised explicitly + to lock in the contract.""" + source_full = handler.base_path / "source.bin" + source_full.write_bytes(sample_file_content) + + await handler.copy_file(source_full, "dest.bin", allow_link=False) + + dest_full = handler.base_path / "dest.bin" + assert dest_full.read_bytes() == sample_file_content + assert source_full.stat().st_ino != dest_full.stat().st_ino + + async def test_copy_file_allow_link_falls_back_to_copy_on_exdev( + self, handler: FSHandler, sample_file_content + ): + """When allow_link=True and os.link raises EXDEV (cross-filesystem), + copy_file transparently falls back to a real copy.""" + source_full = handler.base_path / "source.bin" + source_full.write_bytes(sample_file_content) + + with patch( + "utils.filesystem.os.link", + side_effect=OSError(errno.EXDEV, "Cross-device link"), + ): + await handler.copy_file(source_full, "dest.bin", allow_link=True) + + dest_full = handler.base_path / "dest.bin" + assert dest_full.read_bytes() == sample_file_content + assert source_full.stat().st_ino != dest_full.stat().st_ino + + async def test_copy_file_nonexistent_source(self, handler: FSHandler): + """Missing source must raise FileNotFoundError regardless of link mode.""" + with pytest.raises(FileNotFoundError, match="Source file not found"): + await handler.copy_file(handler.base_path / "missing.bin", "dest.bin") + + +class TestFSHandlerTolerateMissingBase: + """Tests for the tolerate_missing_base flag, which lets optional features + (like the sync folder) come up degraded instead of crashing the whole app + when the base path can't be created.""" + + def test_default_raises_on_mkdir_failure(self): + """Default behavior: an OSError from mkdir propagates, so misconfigured + critical paths (resources, library) still fail loudly at startup.""" + with patch.object( + Path, "mkdir", side_effect=PermissionError(errno.EACCES, "denied") + ): + with pytest.raises(PermissionError): + FSHandler("/some/unwritable/path") + + def test_tolerate_missing_base_swallows_oserror(self, tmp_path, caplog): + """With tolerate_missing_base=True, mkdir failure is logged but does + not raise — the handler instance is still constructed so module-level + imports don't crash.""" + target = tmp_path / "missing" + + with patch.object( + Path, "mkdir", side_effect=PermissionError(errno.EACCES, "denied") + ): + handler = FSHandler(str(target), tolerate_missing_base=True) + + # Handler exists and base_path is set, even though the directory + # could not be created. + assert handler.base_path == target.resolve() + assert not handler.base_path.exists() + + def test_tolerate_missing_base_still_creates_when_possible(self, tmp_path): + """tolerate_missing_base=True should not change behavior when mkdir + succeeds — the directory must still be created normally.""" + target = tmp_path / "new_dir" + assert not target.exists() + + handler = FSHandler(str(target), tolerate_missing_base=True) + + assert handler.base_path.exists() + assert handler.base_path.is_dir() + + def test_tolerate_missing_base_reraises_non_oserror(self, tmp_path): + """Only OSError gets swallowed; programming errors (e.g. a RuntimeError + from corrupted state) must still surface.""" + with patch.object(Path, "mkdir", side_effect=RuntimeError("unexpected")): + with pytest.raises(RuntimeError, match="unexpected"): + FSHandler(str(tmp_path / "x"), tolerate_missing_base=True) diff --git a/backend/tests/handler/filesystem/test_sync_handler.py b/backend/tests/handler/filesystem/test_sync_handler.py index b6a156b711..658eb16872 100644 --- a/backend/tests/handler/filesystem/test_sync_handler.py +++ b/backend/tests/handler/filesystem/test_sync_handler.py @@ -1,9 +1,11 @@ """Tests for filesystem sync handler.""" +import errno import os import shutil import tempfile from pathlib import Path +from unittest.mock import patch import pytest @@ -133,3 +135,21 @@ def test_remove_incoming_file_outside_base_raises( def test_remove_incoming_file_nonexistent(self, handler: FSSyncHandler): # Should not raise for nonexistent files handler.remove_incoming_file("/nonexistent/path/file.sav") + + +class TestFSSyncHandlerStartup: + """Sync is an optional feature: if /romm/sync isn't writable (bad mount, + wrong ownership), the app must still boot. Failures should surface when + sync is actually used, not at module-import time.""" + + def test_init_does_not_raise_when_base_path_unwritable(self): + """Regression test for the PermissionError on /romm/sync at boot. + FSSyncHandler must construct successfully even when mkdir fails.""" + with patch.object( + Path, "mkdir", side_effect=PermissionError(errno.EACCES, "denied") + ): + handler = FSSyncHandler() + + assert handler is not None + # base_path is set even though the directory wasn't created. + assert isinstance(handler.base_path, Path) diff --git a/backend/tests/utils/test_filesystem.py b/backend/tests/utils/test_filesystem.py new file mode 100644 index 0000000000..de0a7939c9 --- /dev/null +++ b/backend/tests/utils/test_filesystem.py @@ -0,0 +1,182 @@ +"""Tests for utils.filesystem helpers.""" + +import errno +import os +from unittest.mock import patch + +import pytest + +from utils.filesystem import link_or_copy_file + + +class TestLinkOrCopyFile: + """Test the hardlink-with-copy-fallback helper used by importers and exporters.""" + + def test_same_filesystem_creates_hardlink(self, tmp_path): + """When source and dest are on the same filesystem, the helper creates a + hardlink — source and dest share an inode and st_nlink reflects both.""" + source = tmp_path / "source.bin" + source.write_bytes(b"payload") + dest = tmp_path / "dest.bin" + + link_or_copy_file(source, dest) + + assert dest.read_bytes() == b"payload" + # Hardlink: shared inode, link count >= 2 + assert source.stat().st_ino == dest.stat().st_ino + assert source.stat().st_nlink >= 2 + + def test_falls_back_to_copy_on_exdev(self, tmp_path): + """When os.link raises EXDEV (cross-device), the helper falls back to + shutil.copy2 — content matches but inodes differ.""" + source = tmp_path / "source.bin" + source.write_bytes(b"payload") + dest = tmp_path / "dest.bin" + + exdev = OSError(errno.EXDEV, "Cross-device link") + + with patch("utils.filesystem.os.link", side_effect=exdev): + link_or_copy_file(source, dest) + + assert dest.read_bytes() == b"payload" + # Real copy: separate inodes + assert source.stat().st_ino != dest.stat().st_ino + + def test_falls_back_to_copy_on_eperm(self, tmp_path): + """EPERM (filesystem doesn't permit hardlinks, e.g. FAT32) must also + trigger the copy fallback.""" + source = tmp_path / "source.bin" + source.write_bytes(b"payload") + dest = tmp_path / "dest.bin" + + eperm = OSError(errno.EPERM, "Operation not permitted") + + with patch("utils.filesystem.os.link", side_effect=eperm): + link_or_copy_file(source, dest) + + assert dest.read_bytes() == b"payload" + assert source.stat().st_ino != dest.stat().st_ino + + def test_reraises_non_fallback_oserror(self, tmp_path): + """An OSError that isn't in the fallback set (e.g. ENOSPC — disk full) + must propagate; we don't want to mask real disk errors.""" + source = tmp_path / "source.bin" + source.write_bytes(b"payload") + dest = tmp_path / "dest.bin" + + enospc = OSError(errno.ENOSPC, "No space left on device") + + with patch("utils.filesystem.os.link", side_effect=enospc): + with pytest.raises(OSError) as excinfo: + link_or_copy_file(source, dest) + + assert excinfo.value.errno == errno.ENOSPC + assert not dest.exists() + + def test_mutation_after_link_affects_source(self, tmp_path): + """Document hardlink semantics: writing through the dest path with + O_TRUNC truncates the shared inode and therefore mutates the source. + This is why callers that mutate dest in place (e.g. PIL resize) must + not opt into hardlinking — copy_file defaults to allow_link=False to + keep this hazard from being the easy path.""" + source = tmp_path / "source.bin" + source.write_bytes(b"original") + dest = tmp_path / "dest.bin" + + link_or_copy_file(source, dest) + + # Truncating-write through dest affects source (same inode). + with open(dest, "wb") as f: + f.write(b"mutated") + + assert source.read_bytes() == b"mutated" + + def test_mutation_after_copy_does_not_affect_source(self, tmp_path): + """When the helper falls back to copy, the inodes are independent — + mutation through dest leaves the source untouched.""" + source = tmp_path / "source.bin" + source.write_bytes(b"original") + dest = tmp_path / "dest.bin" + + with patch( + "utils.filesystem.os.link", + side_effect=OSError(errno.EXDEV, "Cross-device link"), + ): + link_or_copy_file(source, dest) + + with open(dest, "wb") as f: + f.write(b"mutated") + + assert source.read_bytes() == b"original" + + def test_dest_already_exists_is_overwritten(self, tmp_path): + """When dest already exists, link_or_copy_file atomically replaces it. + This mirrors shutil.copy2's overwrite-on-exists semantics so callers + re-fetching a resource don't have to pre-unlink to avoid EEXIST.""" + source = tmp_path / "source.bin" + source.write_bytes(b"new payload") + dest = tmp_path / "dest.bin" + dest.write_bytes(b"old payload") + + link_or_copy_file(source, dest) + + assert dest.read_bytes() == b"new payload" + # Same-fs: dest is now a hardlink to source. + assert source.stat().st_ino == dest.stat().st_ino + + def test_dest_already_exists_is_overwritten_on_copy_fallback(self, tmp_path): + """Overwrite semantics also hold when the link fails and we fall back + to copy — replace happens after the tempfile is populated either way.""" + source = tmp_path / "source.bin" + source.write_bytes(b"new payload") + dest = tmp_path / "dest.bin" + dest.write_bytes(b"old payload") + + with patch( + "utils.filesystem.os.link", + side_effect=OSError(errno.EXDEV, "Cross-device link"), + ): + link_or_copy_file(source, dest) + + assert dest.read_bytes() == b"new payload" + # Fell back to copy: separate inodes. + assert source.stat().st_ino != dest.stat().st_ino + + def test_tempfile_cleaned_up_on_failure(self, tmp_path): + """If linking and the copy fallback both fail (e.g. ENOSPC), the helper + must not leave a stray tempfile behind in dest's directory.""" + source = tmp_path / "source.bin" + source.write_bytes(b"payload") + dest = tmp_path / "dest.bin" + + enospc = OSError(errno.ENOSPC, "No space left on device") + + with patch("utils.filesystem.os.link", side_effect=enospc): + with pytest.raises(OSError) as excinfo: + link_or_copy_file(source, dest) + + assert excinfo.value.errno == errno.ENOSPC + assert not dest.exists() + # No leftover .romm_link_tmp_* file in the directory. + leftovers = [ + p for p in tmp_path.iterdir() if p.name.startswith(".romm_link_tmp_") + ] + assert leftovers == [] + + def test_helper_uses_os_link_first(self, tmp_path): + """Sanity-check that the helper calls os.link before any copy logic — + guards against a future refactor regressing to copy-only. The second + arg is a tempfile in dest's parent (not dest itself), since we + link-then-replace for atomic overwrite.""" + source = tmp_path / "source.bin" + source.write_bytes(b"payload") + dest = tmp_path / "dest.bin" + + with patch("utils.filesystem.os.link", wraps=os.link) as link_spy: + link_or_copy_file(source, dest) + + link_spy.assert_called_once() + args = link_spy.call_args.args + assert args[0] == source + assert args[1].parent == dest.parent + assert args[1].name.startswith(".romm_link_tmp_") diff --git a/backend/utils/filesystem.py b/backend/utils/filesystem.py index 7fa4ca002a..be87aead54 100644 --- a/backend/utils/filesystem.py +++ b/backend/utils/filesystem.py @@ -1,5 +1,7 @@ +import errno import os import re +import shutil from collections.abc import Iterator from pathlib import Path @@ -37,6 +39,54 @@ def iter_directories(path: str, recursive: bool = False) -> Iterator[tuple[Path, break +# errno values that mean "hardlink not possible here, fall back to copy". +# EXDEV: cross-device link. EPERM: filesystem doesn't permit/support hardlinks +# (e.g. FAT32, exFAT, some network mounts). EOPNOTSUPP/ENOTSUP: same, on BSD/macOS. +# EMLINK: source already has the maximum number of hardlinks for the filesystem. +_LINK_FALLBACK_ERRNOS: frozenset[int] = frozenset( + e + for e in ( + getattr(errno, "EXDEV", None), + getattr(errno, "EPERM", None), + getattr(errno, "EOPNOTSUPP", None), + getattr(errno, "ENOTSUP", None), + getattr(errno, "EMLINK", None), + getattr(errno, "EACCES", None), + ) + if e is not None +) + + +def link_or_copy_file(source: Path, dest: Path) -> None: + """Place ``source`` at ``dest`` via hardlink (preferred) or copy (fallback), + atomically replacing ``dest`` if it already exists. Caller is responsible + for creating ``dest.parent``. + + Hardlinking is preferred because it's instantaneous and uses no extra disk + space, but only works within a single filesystem. If linking isn't possible, + we transparently fall back to ``shutil.copy2`` (preserving metadata). + + Overwriting is atomic: we link/copy to a tempfile in dest's directory, then + rename it onto dest, which mirrors shutil.copy2's overwrite-on-exists + behavior. + """ + tmp_path = dest.parent / f".romm_link_tmp_{os.urandom(8).hex()}" + try: + try: + os.link(source, tmp_path) + except OSError as exc: + if exc.errno not in _LINK_FALLBACK_ERRNOS: + raise + shutil.copy2(source, tmp_path) + os.replace(tmp_path, dest) + except BaseException: + try: + os.unlink(tmp_path) + except OSError: + pass + raise + + INVALID_CHARS_HYPHENS = re.compile(r"[\\/:|]") INVALID_CHARS_EMPTY = re.compile(r'[*?"<>+]') diff --git a/backend/utils/gamelist_exporter.py b/backend/utils/gamelist_exporter.py index 567625d7e7..7a688b5da2 100644 --- a/backend/utils/gamelist_exporter.py +++ b/backend/utils/gamelist_exporter.py @@ -1,4 +1,3 @@ -import shutil from datetime import UTC, datetime from pathlib import Path from xml.etree.ElementTree import ( # trunk-ignore(bandit/B405) @@ -17,6 +16,7 @@ from handler.filesystem import fs_platform_handler, fs_resource_handler from logger.logger import log from models.rom import Rom +from utils.filesystem import link_or_copy_file # Map gamelist asset keys to subdirectory names inside assets/ ASSET_DIRS: dict[str, str] = { @@ -151,14 +151,14 @@ def _build_asset_refs( return refs def _copy_asset(self, source: Path, dest: Path) -> bool: - """Copy a file from source to dest. Returns True on success.""" + """Place ``source`` at ``dest`` via hardlink (same filesystem) or copy + (otherwise). Returns True on success.""" dest.parent.mkdir(parents=True, exist_ok=True) if dest.exists(): return True try: - with open(source, "rb") as src, open(dest, "wb") as dst: - shutil.copyfileobj(src, dst) + link_or_copy_file(source, dest) return True except OSError as e: log.warning(f"Failed to copy {source} -> {dest}: {e}") diff --git a/backend/utils/pegasus_exporter.py b/backend/utils/pegasus_exporter.py index 52d457e3d1..5dedc9e607 100644 --- a/backend/utils/pegasus_exporter.py +++ b/backend/utils/pegasus_exporter.py @@ -1,4 +1,3 @@ -import shutil from datetime import UTC, datetime from pathlib import Path @@ -8,6 +7,7 @@ from handler.filesystem import fs_platform_handler, fs_resource_handler from logger.logger import log from models.rom import Rom +from utils.filesystem import link_or_copy_file # Map Pegasus asset keys to subdirectory names inside assets/ ASSET_DIRS: dict[str, str] = { @@ -177,14 +177,14 @@ def _create_game_entry( return "\n".join(lines) def _copy_asset(self, source: Path, dest: Path) -> bool: - """Copy a file from source to dest using raw read/write. Returns True on success.""" + """Place ``source`` at ``dest`` via hardlink (same filesystem) or copy + (otherwise). Returns True on success.""" dest.parent.mkdir(parents=True, exist_ok=True) if dest.exists(): return True try: - with open(source, "rb") as src, open(dest, "wb") as dst: - shutil.copyfileobj(src, dst) + link_or_copy_file(source, dest) return True except OSError as e: log.warning(f"Failed to copy {source} -> {dest}: {e}")