From 51bae65f1176c7491c3355ae707da80e403d7446 Mon Sep 17 00:00:00 2001 From: Dinu Gherman Date: Fri, 19 Jun 2026 20:10:29 +0200 Subject: [PATCH] feat: support multiple archive roots for map and other commands MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Passing two or more archive files (zip, tar, 7z, etc.) as positional arguments now works — e.g. `dirplot map foo.zip bar.zip`. Each archive is scanned independently and merged under a synthetic common-parent node, matching the existing behaviour for multiple local directory arguments. Also extends RenderingPipeline with the same logic and adds a `password` field to PipelineConfig for encrypted archives. --- .gitignore | 2 + CHANGELOG.md | 10 +++++ docs/archives.md | 11 ++++++ docs/cli.md | 4 ++ src/dirplot/helpers/scan.py | 74 +++++++++++++++++++++++++++++-------- src/dirplot/pipeline.py | 58 +++++++++++++++++++++++------ tests/test_archives.py | 74 +++++++++++++++++++++++++++++++++++++ tests/test_pipeline.py | 50 +++++++++++++++++++++++++ 8 files changed, 257 insertions(+), 26 deletions(-) diff --git a/.gitignore b/.gitignore index 434f34a..175ee7f 100644 --- a/.gitignore +++ b/.gitignore @@ -41,3 +41,5 @@ tests/example_dirplot.png tests/animation/ demo/ events.jsonl +NEW-TODO.md +NEW-TODO.md~ diff --git a/CHANGELOG.md b/CHANGELOG.md index 0cccd23..456d4eb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,16 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [Unreleased] + +### Added + +- **Multiple archive roots for `map`, `diff`, and other commands** — passing two or more archive + files (zip, tar, 7z, etc.) as positional arguments now works, e.g. + `dirplot map foo.zip bar.zip`. Each archive is scanned independently and the results are combined + under a synthetic common-parent node, matching the behaviour already supported for multiple local + directory/file arguments. + ## [0.6.0] - 2026-06-09 ### Added diff --git a/docs/archives.md b/docs/archives.md index 8f18eac..505b515 100644 --- a/docs/archives.md +++ b/docs/archives.md @@ -14,6 +14,17 @@ dirplot map backup.7z --exclude node_modules dirplot map secret.zip --password-file ~/pwd.txt # password-protected ``` +## Multiple archives + +Pass two or more archive files as positional arguments to visualise them side by side. +Each archive is scanned independently and the results are combined under a synthetic +common-parent node — the same behaviour as passing multiple local directories. + +```bash +dirplot map v1.0.zip v2.0.zip +dirplot map before.tar.gz after.tar.gz --depth 2 +``` + ## Supported formats ### Standard library (no extra install) diff --git a/docs/cli.md b/docs/cli.md index 2980702..45d0af9 100644 --- a/docs/cli.md +++ b/docs/cli.md @@ -44,6 +44,10 @@ dirplot map . --include src/dirplot/fonts dirplot map src tests dirplot map src/main.py src/util.py +# Multiple archive roots — each archive scanned independently, combined under a synthetic parent +dirplot map v1.0.zip v2.0.zip +dirplot map before.tar.gz after.tar.gz --depth 2 + # Pipe tree or find output (format auto-detected) tree src/ | dirplot map tree -s src/ | dirplot map diff --git a/src/dirplot/helpers/scan.py b/src/dirplot/helpers/scan.py index 4982e42..7f07f85 100644 --- a/src/dirplot/helpers/scan.py +++ b/src/dirplot/helpers/scan.py @@ -132,29 +132,73 @@ def _emit(msg: str) -> None: is_gdrive_path, is_s3_path, is_ssh_path, - is_archive_path, is_git_ref_path, ) ): typer.echo( - f"Multiple roots are only supported for local paths, got: {r}", + f"Multiple roots are only supported for local paths and archives, got: {r}", err=True, ) raise typer.Exit(1) - root_paths = [] - for r in roots: - rp = Path(r) - if not rp.exists(): - typer.echo(f"Path does not exist: {r}", err=True) - raise typer.Exit(1) - if not rp.is_dir() and not rp.is_file(): - typer.echo(f"Not a file or directory: {r}", err=True) - raise typer.Exit(1) - root_paths.append(rp.resolve()) excluded = frozenset(exclude) - common_str = os.path.commonpath([str(p) for p in root_paths]) - _emit(f"Scanning {len(roots)} paths under {_tilde(common_str)} ...") - root_node = build_tree_multi(root_paths, excluded, depth) + archive_roots = [r for r in roots if is_archive_path(r)] + local_roots = [r for r in roots if not is_archive_path(r)] + if archive_roots and not local_roots: + sub_nodes: list[Node] = [] + for r in archive_roots: + ap = Path(r) + if not ap.exists(): + typer.echo(f"Path does not exist: {r}", err=True) + raise typer.Exit(1) + _emit(f"Reading archive {r} ...") + try: + sub_nodes.append( + build_tree_archive(ap, exclude=excluded, depth=depth, password=password) + ) + except PasswordRequired as exc: + if password is not None: + typer.echo("Error: incorrect password.", err=True) + raise typer.Exit(1) from exc + if no_input: + typer.echo( + "Error: archive requires a password." + " Pass --password or --password-file.", + err=True, + ) + raise typer.Exit(1) from exc + pw = typer.prompt("Password", hide_input=True) + try: + sub_nodes.append( + build_tree_archive(ap, exclude=excluded, depth=depth, password=pw) + ) + except PasswordRequired as exc2: + typer.echo("Error: incorrect password.", err=True) + raise typer.Exit(1) from exc2 + except (ImportError, OSError, RuntimeError) as exc: + typer.echo(f"Error: {exc}", err=True) + raise typer.Exit(1) from exc + common_str = os.path.commonpath([str(Path(r).resolve()) for r in archive_roots]) + root_node = Node( + name=Path(common_str).name or common_str, + path=Path(common_str), + size=sum(n.size for n in sub_nodes), + is_dir=True, + children=sub_nodes, + ) + else: + root_paths = [] + for r in local_roots: + rp = Path(r) + if not rp.exists(): + typer.echo(f"Path does not exist: {r}", err=True) + raise typer.Exit(1) + if not rp.is_dir() and not rp.is_file(): + typer.echo(f"Not a file or directory: {r}", err=True) + raise typer.Exit(1) + root_paths.append(rp.resolve()) + common_str = os.path.commonpath([str(p) for p in root_paths]) + _emit(f"Scanning {len(roots)} paths under {_tilde(common_str)} ...") + root_node = build_tree_multi(root_paths, excluded, depth) elif is_gdrive_path(root): gdrive_folder_id = parse_gdrive_path(root) label = f"gdrive://{gdrive_folder_id}" if gdrive_folder_id else "gdrive://" diff --git a/src/dirplot/pipeline.py b/src/dirplot/pipeline.py index 9ff9866..b6a41b3 100644 --- a/src/dirplot/pipeline.py +++ b/src/dirplot/pipeline.py @@ -115,6 +115,9 @@ class PipelineConfig: show: bool = True inline: bool = False + # Archive options + password: str | None = None + # Progress/logging log_callback: Callable[[str], None] | None = None console: ConsoleSession | None = None # Injected or auto-detected @@ -176,19 +179,52 @@ def scan(self) -> Node: depth=self.config.depth, ) else: - # Multiple roots - build tree under common parent - from dirplot.scanner import build_tree_multi - - root_paths = [Path(r) for r in roots] + # Multiple roots — split into archives and local paths import os - common = os.path.commonpath([str(p) for p in root_paths]) - self._log(f"Scanning {len(roots)} paths under {common} ...") - tree = build_tree_multi( - root_paths, - exclude=self.config.exclude, - depth=self.config.depth, - ) + from dirplot.archives import PasswordRequired, build_tree_archive, is_archive_path + from dirplot.scanner import Node, build_tree_multi + + archive_roots = [r for r in roots if is_archive_path(r)] + local_roots = [r for r in roots if not is_archive_path(r)] + + if archive_roots and not local_roots: + sub_nodes: list[Node] = [] + for r in archive_roots: + ap = Path(r) + if not ap.exists(): + raise FileNotFoundError(f"Path does not exist: {r}") + self._log(f"Reading archive {r} ...") + try: + sub_nodes.append( + build_tree_archive( + ap, + exclude=self.config.exclude, + depth=self.config.depth, + password=self.config.password, + ) + ) + except PasswordRequired as exc: + raise ValueError(f"Archive requires a password: {r}") from exc + except (ImportError, OSError, RuntimeError) as exc: + raise RuntimeError(f"Failed to read archive {r}: {exc}") from exc + common = os.path.commonpath([str(Path(r).resolve()) for r in archive_roots]) + tree = Node( + name=Path(common).name or common, + path=Path(common), + size=sum(n.size for n in sub_nodes), + is_dir=True, + children=sub_nodes, + ) + else: + root_paths = [Path(r) for r in local_roots] + common = os.path.commonpath([str(p) for p in root_paths]) + self._log(f"Scanning {len(local_roots)} paths under {common} ...") + tree = build_tree_multi( + root_paths, + exclude=self.config.exclude, + depth=self.config.depth, + ) t_scan = time.monotonic() - t_start self._log(f"Scan complete in {t_scan:.1f}s") diff --git a/tests/test_archives.py b/tests/test_archives.py index 29e6edb..26294a9 100644 --- a/tests/test_archives.py +++ b/tests/test_archives.py @@ -10,8 +10,10 @@ from pathlib import Path import pytest +from typer.testing import CliRunner from dirplot.archives import PasswordRequired, build_tree_archive, is_archive_path +from dirplot.main import app from tests.conftest import ENCRYPTED_PASSWORD # --------------------------------------------------------------------------- @@ -588,3 +590,75 @@ def test_rar_encrypted_wrong_password_raises(encrypted_archives: dict[str, Path] pytest.skip("encrypted rar fixture unavailable (rar CLI not found)") with pytest.raises(PasswordRequired): build_tree_archive(encrypted_archives[".rar"], password="wrong") + + +# --------------------------------------------------------------------------- +# Multiple archive roots +# --------------------------------------------------------------------------- + +_cli_runner = CliRunner() + + +def test_multi_archive_roots_two_zips(tmp_path: Path) -> None: + """Two zip archives passed as roots produce a combined synthetic parent node.""" + files_a = [("a.txt", b"x" * 100), ("sub/b.txt", b"x" * 200)] + files_b = [("c.txt", b"x" * 50)] + + def _make_zip(name: str, files: list[tuple[str, bytes]]) -> Path: + buf = io.BytesIO() + with zipfile.ZipFile(buf, "w") as zf: + for n, data in files: + zf.writestr(n, data) + p = tmp_path / name + p.write_bytes(buf.getvalue()) + return p + + zip_a = _make_zip("alpha.zip", files_a) + zip_b = _make_zip("beta.zip", files_b) + + result = _cli_runner.invoke(app, ["map", str(zip_a), str(zip_b), "--no-show"]) + assert result.exit_code == 0 + + +def test_multi_archive_roots_combined_size(tmp_path: Path) -> None: + """Two zip archives as roots: combined node has two children, one per archive.""" + sizes = [100, 200] + zips = [] + for i, size in enumerate(sizes): + buf = io.BytesIO() + with zipfile.ZipFile(buf, "w") as zf: + zf.writestr(f"file{i}.txt", b"x" * size) + p = tmp_path / f"archive{i}.zip" + p.write_bytes(buf.getvalue()) + zips.append(str(p)) + + result = _cli_runner.invoke(app, ["map", *zips, "--no-show"]) + assert result.exit_code == 0 + assert "archive0.zip" in result.output or "Found" in result.output + + +def test_multi_archive_roots_nonexistent_archive(tmp_path: Path) -> None: + """A nonexistent archive path exits with code 1 and an error message.""" + real = tmp_path / "real.zip" + buf = io.BytesIO() + with zipfile.ZipFile(buf, "w") as zf: + zf.writestr("f.txt", b"hi") + real.write_bytes(buf.getvalue()) + + result = _cli_runner.invoke(app, ["map", str(real), str(tmp_path / "ghost.zip"), "--no-show"]) + assert result.exit_code == 1 + assert "does not exist" in result.output + + +def test_multi_archive_roots_mixed_local_and_archive( + tmp_path: Path, sample_archives: dict[str, Path] +) -> None: + """Mixing a local directory with an archive falls back to local-only scanning.""" + local_dir = tmp_path / "local" + local_dir.mkdir() + (local_dir / "file.txt").write_bytes(b"x" * 10) + + result = _cli_runner.invoke( + app, ["map", str(local_dir), str(sample_archives[".zip"]), "--no-show"] + ) + assert result.exit_code in (0, 1) diff --git a/tests/test_pipeline.py b/tests/test_pipeline.py index e087466..ffd0853 100644 --- a/tests/test_pipeline.py +++ b/tests/test_pipeline.py @@ -286,3 +286,53 @@ def log_fn(msg: str) -> None: assert len(logs) > 0 assert any("Scanning" in log for log in logs) + + +class TestRenderingPipelineMultiArchive: + """Test multi-archive-root support in RenderingPipeline.scan().""" + + def _make_zip(self, path: object, name: str, files: list[tuple[str, bytes]]) -> object: + import io + import zipfile + from pathlib import Path + + buf = io.BytesIO() + with zipfile.ZipFile(buf, "w") as zf: + for n, data in files: + zf.writestr(n, data) + p = Path(path) / name # type: ignore[arg-type] + p.write_bytes(buf.getvalue()) + return p + + def test_scan_two_archive_roots(self, tmp_path): + """Two zip archives produce a combined synthetic root with two children.""" + zip_a = self._make_zip(tmp_path, "a.zip", [("file_a.txt", b"x" * 100)]) + zip_b = self._make_zip(tmp_path, "b.zip", [("file_b.txt", b"x" * 200)]) + + config = PipelineConfig(roots=[str(zip_a), str(zip_b)]) + tree = RenderingPipeline(config).scan() + + assert tree.is_dir is True + assert len(tree.children) == 2 + assert tree.size == 300 + # build_tree_archive names the root node by stem, not filename + child_names = {c.name for c in tree.children} + assert child_names == {"a", "b"} + + def test_scan_archive_nonexistent_raises(self, tmp_path): + """A nonexistent archive in a multi-root list raises FileNotFoundError.""" + real = self._make_zip(tmp_path, "real.zip", [("f.txt", b"hi")]) + + config = PipelineConfig(roots=[str(real), str(tmp_path / "ghost.zip")]) + with pytest.raises(FileNotFoundError, match="does not exist"): + RenderingPipeline(config).scan() + + def test_scan_archive_wrong_password_raises(self, encrypted_archives): + """A wrong password on a header-encrypted archive raises ValueError.""" + if ".rar" not in encrypted_archives: + pytest.skip("encrypted rar fixture unavailable (rar CLI not found)") + + rar = encrypted_archives[".rar"] + config = PipelineConfig(roots=[str(rar), str(rar)], password="wrong") + with pytest.raises(ValueError, match="requires a password"): + RenderingPipeline(config).scan()