From f04d773896c23d709fccb3b78ff791463679fcef Mon Sep 17 00:00:00 2001 From: Ben Spoor Date: Mon, 25 May 2026 18:12:20 +0200 Subject: [PATCH] Prevent hanging in svn+ssh --- CHANGELOG.rst | 1 + dfetch/vcs/svn.py | 86 ++++++++++++++++--- doc/explanation/threat_model_supply_chain.rst | 18 ++-- doc/explanation/threat_model_usage.rst | 46 +++++----- example/dfetch.yaml | 2 +- tests/test_svn.py | 61 +++++++++++++ 6 files changed, 169 insertions(+), 45 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 3f104f1a6..e4ba1bb96 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -19,6 +19,7 @@ Release 0.14.0 (unreleased) * Fix arbitrary file write via malicious tar/zip symlink (#1152) * Prevent SSH command injection (#1152) * Allow manifests with no ``projects`` key so ``dfetch add`` can bootstrap empty manifest (#1197) +* Run ``svn+ssh://`` connections in non-interactive mode to prevent hanging (#0) Release 0.13.0 (released 2026-03-30) ==================================== diff --git a/dfetch/vcs/svn.py b/dfetch/vcs/svn.py index e6c44d1c0..b728eb787 100644 --- a/dfetch/vcs/svn.py +++ b/dfetch/vcs/svn.py @@ -1,12 +1,14 @@ """Svn repository.""" import contextlib +import functools import os import pathlib import re from collections.abc import Callable, Generator, Sequence from pathlib import Path from typing import NamedTuple +from urllib.parse import urlparse from dfetch.log import get_logger from dfetch.util.cmdline import SubprocessCommandError, run_on_cmdline @@ -15,6 +17,43 @@ logger = get_logger(__name__) +_SSH_HOST_KEY_MSGS = ("host key verification failed", "authenticity of host") + + +# As a cli tool, we can safely assume this remains stable during the runtime, caching for speed is better +@functools.lru_cache +def _extend_env_for_non_interactive_mode() -> dict[str, str]: + """Extend the environment vars for svn running in non-interactive mode.""" + env = os.environ.copy() + ssh_cmd = env.get("SVN_SSH", "ssh") + if "BatchMode=" not in ssh_cmd: + ssh_cmd += " -o BatchMode=yes" + else: + logger.debug('BatchMode already configured in SVN_SSH: "%s"', ssh_cmd) + env["SVN_SSH"] = ssh_cmd + return env + + +def _ssh_target_from_url(url: str) -> str: + """Return the ``[user@]host`` portion of a svn+ssh URL, or the URL itself.""" + parsed = urlparse(url) + host = parsed.hostname or url + return f"{parsed.username}@{host}" if parsed.username else host + + +def _raise_if_ssh_host_key_error(url: str, exc: SubprocessCommandError) -> None: + """Raise a helpful RuntimeError if *exc* looks like an SSH host-key failure.""" + stderr_lower = exc.stderr.lower() + if any(msg in stderr_lower for msg in _SSH_HOST_KEY_MSGS): + target = _ssh_target_from_url(url) + raise RuntimeError( + f"SSH host key verification failed while connecting to '{url}'.\n" + "Add the host to your known hosts file, for example by running:\n" + f" ssh-keyscan {target} >> ~/.ssh/known_hosts\n" + "Or test the SSH connection manually:\n" + f" ssh -T {target}" + ) from exc + def get_svn_version() -> tuple[str, str]: """Get the name and version of svn.""" @@ -49,9 +88,14 @@ def __init__(self, remote: str) -> None: def is_svn(self) -> bool: """Check if is SVN.""" try: - run_on_cmdline(logger, ["svn", "info", self._remote, "--non-interactive"]) + run_on_cmdline( + logger, + ["svn", "info", self._remote, "--non-interactive"], + env=_extend_env_for_non_interactive_mode(), + ) return True except SubprocessCommandError as exc: + _raise_if_ssh_host_key_error(self._remote, exc) if exc.stderr.startswith("svn: E170013"): raise RuntimeError( f">>>{exc.cmd}<<< failed!\n" @@ -67,20 +111,30 @@ def list_of_branches(self) -> list[str]: result = run_on_cmdline( logger, ["svn", "ls", "--non-interactive", f"{self._remote}/branches"], + env=_extend_env_for_non_interactive_mode(), ) return [ line.strip("/\r") for line in result.stdout.decode().splitlines() if line.strip("/\r") ] - except (SubprocessCommandError, RuntimeError): + except SubprocessCommandError as exc: + _raise_if_ssh_host_key_error(self._remote, exc) + return [] + except RuntimeError: return [] def list_of_tags(self) -> list[str]: """Get list of all available tags.""" - result = run_on_cmdline( - logger, ["svn", "ls", "--non-interactive", f"{self._remote}/tags"] - ) + try: + result = run_on_cmdline( + logger, + ["svn", "ls", "--non-interactive", f"{self._remote}/tags"], + env=_extend_env_for_non_interactive_mode(), + ) + except SubprocessCommandError as exc: + _raise_if_ssh_host_key_error(self._remote, exc) + raise return [ str(tag).strip("/\r") for tag in result.stdout.decode().split("\n") if tag ] @@ -116,7 +170,9 @@ def ls_tree(self, url_path: str) -> list[tuple[str, bool]]: """List immediate children of *url_path* as ``(name, is_dir)`` pairs.""" try: result = run_on_cmdline( - logger, ["svn", "ls", "--non-interactive", url_path] + logger, + ["svn", "ls", "--non-interactive", url_path], + env=_extend_env_for_non_interactive_mode(), ) entries: list[tuple[str, bool]] = [] for line in result.stdout.decode().splitlines(): @@ -126,7 +182,10 @@ def ls_tree(self, url_path: str) -> list[tuple[str, bool]]: is_dir = line.endswith("/") entries.append((line.rstrip("/"), is_dir)) return entries - except (SubprocessCommandError, RuntimeError): + except SubprocessCommandError as exc: + _raise_if_ssh_host_key_error(url_path, exc) + return [] + except RuntimeError: return [] @@ -176,7 +235,7 @@ def externals_from_url(url: str, revision: str = "") -> list[External]: if revision: cmd += ["--revision", revision] cmd += [url] - result = run_on_cmdline(logger, cmd) + result = run_on_cmdline(logger, cmd, env=_extend_env_for_non_interactive_mode()) repo_root = SvnRepo.get_info_from_target(url)["Repository Root"] normalized = SvnRepo._normalize_url_prefix(result.stdout.decode(), url) return SvnRepo._parse_externals(normalized, repo_root) @@ -292,9 +351,12 @@ def get_info_from_target(target: str = "") -> dict[str, str]: """Get the info of the given target.""" try: result = run_on_cmdline( - logger, ["svn", "info", "--non-interactive", target.strip()] + logger, + ["svn", "info", "--non-interactive", target.strip()], + env=_extend_env_for_non_interactive_mode(), ).stdout.decode() except SubprocessCommandError as exc: + _raise_if_ssh_host_key_error(target, exc) if exc.stderr.startswith("svn: E170013"): raise RuntimeError( f">>>{exc.cmd}<<< failed!\n" @@ -335,6 +397,7 @@ def get_last_changed_revision(target: str | Path) -> str: "last-changed-revision", target_str, ], + env=_extend_env_for_non_interactive_mode(), ) .stdout.decode() .strip() @@ -382,6 +445,7 @@ def export(url: str, rev: str = "", dst: str = ".") -> None: ["svn", "export", "--non-interactive", "--force"] + (["--revision", rev] if rev else []) + [url, dst], + env=_extend_env_for_non_interactive_mode(), ) @staticmethod @@ -390,7 +454,9 @@ def files_in_path(url_path: str) -> list[str]: return [ str(line) for line in run_on_cmdline( - logger, ["svn", "list", "--non-interactive", url_path] + logger, + ["svn", "list", "--non-interactive", url_path], + env=_extend_env_for_non_interactive_mode(), ) .stdout.decode() .splitlines() diff --git a/doc/explanation/threat_model_supply_chain.rst b/doc/explanation/threat_model_supply_chain.rst index bdf34118b..cc26af7c8 100644 --- a/doc/explanation/threat_model_supply_chain.rst +++ b/doc/explanation/threat_model_supply_chain.rst @@ -546,22 +546,22 @@ Dataflows * - DF-22: PR enters code review - A-01b: GitHub Repository (feature branches / PRs) - A-04: Release Gate / Code Review - - + - * - DF-12: Main branch workflows drive CI execution - A-01: GitHub Repository (main / protected) - A-06: GitHub Actions Workflow - - + - * - DF-13a: PR CI checkout - A-01b: GitHub Repository (feature branches / PRs) - A-02: GitHub Actions Infrastructure - - + - * - DF-13b: Release CI checkout - A-01: GitHub Repository (main / protected) - A-02: GitHub Actions Infrastructure - - + - * - DF-14: CI cache restore - A-08b: GitHub Actions Build Cache @@ -571,12 +571,12 @@ Dataflows * - DF-15: Workflow triggers build step - A-06: GitHub Actions Workflow - A-08: Python Build (wheel / sdist) - - + - * - DF-15b: Built wheel/sdist artifacts - A-08: Python Build (wheel / sdist) - A-02: GitHub Actions Infrastructure - - + - * - DF-16: CI fetches build/dev deps from PyPI - A-03: PyPI / TestPyPI @@ -586,7 +586,7 @@ Dataflows * - DF-17: Build tools consumed by build step - A-07: dfetch Build / Dev Dependencies - A-08: Python Build (wheel / sdist) - - + - * - DF-18: CI cache write - A-02: GitHub Actions Infrastructure @@ -601,7 +601,7 @@ Dataflows * - DF-23: Approved merge to main - A-04: Release Gate / Code Review - A-01: GitHub Repository (main / protected) - - + - * - DF-24: Publish wheel to PyPI (OIDC) - A-02: GitHub Actions Infrastructure @@ -1022,5 +1022,3 @@ Controls - Test result attestation on source archive - DFT-31 - The CI test workflow (``test.yml``) generates an in-toto test result attestation (predicate type ``https://in-toto.io/attestation/test-result/v0.1``) for every release and main-branch commit. The attestation proves the full CI test suite ran against the exact source archive and every check passed, before any binary was produced from that source. Consumers can verify it using ``gh attestation verify dfetch-source.tar.gz`` with ``--predicate-type https://in-toto.io/attestation/test-result/v0.1`` and ``--cert-identity`` pinned to ``test.yml`` at the release tag ref. This provides an additional layer of assurance beyond build provenance: not only was the artifact produced from the official commit, but the test suite demonstrably passed on that exact source before any binary was built. ``.github/workflows/test.yml`` - - diff --git a/doc/explanation/threat_model_usage.rst b/doc/explanation/threat_model_usage.rst index 8aadefb5a..8e6da68c3 100644 --- a/doc/explanation/threat_model_usage.rst +++ b/doc/explanation/threat_model_usage.rst @@ -749,12 +749,12 @@ Dataflows * - DF-01: Invoke dfetch command - Developer - A-22: dfetch Process - - + - * - DF-02: Read manifest - A-12: dfetch Manifest - A-22: dfetch Process - - + - * - DF-03a: Fetch VCS content - HTTPS/SSH - A-22: dfetch Process @@ -799,102 +799,102 @@ Dataflows * - DF-07: Write vendored files - A-22: dfetch Process - A-13: Fetched Source Code - - + - * - DF-08: Write dependency metadata - A-22: dfetch Process - A-18: Dependency Metadata - - + - * - DF-09: Write SBOM - A-22: dfetch Process - A-15: SBOM Output (CycloneDX) - - + - * - DF-16: Read dependency metadata - A-18: Dependency Metadata - A-22: dfetch Process - - + - * - DF-10: Read patch for application - A-19: Patch Files - A-25: Patch Application (patch-ng) - - + - * - DF-10b: Write patched files to vendor directory - A-25: Patch Application (patch-ng) - A-13: Fetched Source Code - - + - * - DF-15: Vendored source to build - A-13: Fetched Source Code - A-11: Consumer Build System - - + - * - DF-11: Dispatch archive bytes to extraction - A-22: dfetch Process - A-24: Archive Extraction (tarfile / zipfile) - - + - * - DF-12: Write extracted archive to temp dir - A-24: Archive Extraction (tarfile / zipfile) - A-20: Local VCS Cache (temp) - - + - * - DF-13: Dispatch SVN export subprocess - A-22: dfetch Process - A-26: SVN Export (svn export) - - + - * - DF-14: Write SVN export to temp dir - A-26: SVN Export (svn export) - A-20: Local VCS Cache (temp) - - + - * - DF-23: Dispatch git clone subprocess - A-22: dfetch Process - A-27: Git Clone (git init / fetch / checkout) - - + - * - DF-24: Write git checkout to temp dir - A-27: Git Clone (git init / fetch / checkout) - A-20: Local VCS Cache (temp) - - + - * - DF-17: Write audit / check reports - A-22: dfetch Process - A-21: Audit / Check Reports - - + - * - DF-22: Read validated content from local VCS cache - A-20: Local VCS Cache (temp) - A-22: dfetch Process - - + - * - DF-18: Read integrity hash for archive verification - A-12: dfetch Manifest - A-22: dfetch Process - - + - * - DF-18b: Write computed hash to manifest (dfetch freeze) - A-22: dfetch Process - A-12: dfetch Manifest - - + - * - DF-20: Author / maintain dfetch.yaml - Developer - A-12: dfetch Manifest - - + - * - DF-19: VCS server publishes source attestation (not consumed by dfetch) - A-09: Remote VCS Server - A-23: Upstream Source Attestation (VSA) - - + - * - DF-21: Create / maintain patch files - Developer - A-19: Patch Files - - + - Threats @@ -1712,5 +1712,3 @@ Controls - Hash algorithm allowlist (SHA-256/384/512 only) - DFT-30 - ``integrity_hash.py`` accepts only ``sha256:``, ``sha384:``, and ``sha512:`` prefixes; any other algorithm prefix is rejected at parse time. MD5 and SHA-1 are not accepted. This directly mitigates DFT-30 (SLSA M2: exploit cryptographic hash collisions) by ensuring that integrity hashes, when present, use algorithms with no known practical collision attacks. ``dfetch/vcs/integrity_hash.py`` - - diff --git a/example/dfetch.yaml b/example/dfetch.yaml index 88641d906..f2629cadd 100644 --- a/example/dfetch.yaml +++ b/example/dfetch.yaml @@ -7,7 +7,7 @@ manifest: default: true # Set it as default - name: sourceforge - url-base: svn://svn.code.sf.net/p/ + url-base: svn+ssh://svn.code.sf.net/p/ projects: diff --git a/tests/test_svn.py b/tests/test_svn.py index 30190a7ad..aeae469fe 100644 --- a/tests/test_svn.py +++ b/tests/test_svn.py @@ -520,3 +520,64 @@ def test_externals_from_url_nonstd_layout_branch_is_space(): assert result[0].url == nonstd_url assert result[0].revision == "" assert result[0].path == "Database" + + +@pytest.mark.parametrize( + "method,url", + [ + ("is_svn", "svn+ssh://svn.code.sf.net/project"), + ("list_of_tags", "svn+ssh://svn.code.sf.net/project"), + ("list_of_branches", "svn+ssh://svn.code.sf.net/project"), + ], +) +def test_svn_remote_raises_hint_on_ssh_host_key_failure(method, url): + stderr = "Host key verification failed." + with patch("dfetch.vcs.svn.run_on_cmdline") as mock_run: + mock_run.side_effect = SubprocessCommandError(["svn"], "", stderr, 1) + + with pytest.raises(RuntimeError, match="known hosts"): + getattr(SvnRemote(url), method)() + + +def test_get_info_from_target_raises_hint_on_ssh_host_key_failure(): + stderr = "Host key verification failed." + with patch("dfetch.vcs.svn.run_on_cmdline") as mock_run: + mock_run.side_effect = SubprocessCommandError(["svn", "info"], "", stderr, 1) + + with pytest.raises(RuntimeError, match="known hosts"): + SvnRepo.get_info_from_target("svn+ssh://svn.code.sf.net/project") + + +def test_ssh_hint_includes_hostname(): + stderr = "Host key verification failed." + with patch("dfetch.vcs.svn.run_on_cmdline") as mock_run: + mock_run.side_effect = SubprocessCommandError(["svn"], "", stderr, 1) + + with pytest.raises(RuntimeError, match="svn.code.sf.net"): + SvnRemote("svn+ssh://svn.code.sf.net/project").is_svn() + + +def test_ssh_hint_includes_user_when_present_in_url(): + stderr = "Host key verification failed." + with patch("dfetch.vcs.svn.run_on_cmdline") as mock_run: + mock_run.side_effect = SubprocessCommandError(["svn"], "", stderr, 1) + + with pytest.raises(RuntimeError, match="myuser@svn.code.sf.net"): + SvnRemote("svn+ssh://myuser@svn.code.sf.net/project").is_svn() + + +def test_svn_ssh_env_has_batch_mode(): + from dfetch.vcs.svn import _extend_env_for_non_interactive_mode + + _extend_env_for_non_interactive_mode.cache_clear() + env = _extend_env_for_non_interactive_mode() + assert "BatchMode=yes" in env["SVN_SSH"] + + +def test_svn_ssh_env_preserves_existing_batch_mode(monkeypatch): + from dfetch.vcs.svn import _extend_env_for_non_interactive_mode + + monkeypatch.setenv("SVN_SSH", "ssh -o BatchMode=yes -i /my/key") + _extend_env_for_non_interactive_mode.cache_clear() + env = _extend_env_for_non_interactive_mode() + assert env["SVN_SSH"].count("BatchMode=yes") == 1