diff --git a/CHANGELOG.rst b/CHANGELOG.rst index b2ec5858f..58281ac4c 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -20,6 +20,7 @@ Release 0.14.0 (unreleased) * Fix arbitrary file write via malicious tar/zip symlink (#1152) * Prevent SSH command injection (#1152) * Allow manifests with no ``projects`` key so ``dfetch add`` can bootstrap empty manifest (#1197) +* Run ``svn+ssh://`` connections in non-interactive mode to prevent hanging (#1230) Release 0.13.0 (released 2026-03-30) ==================================== diff --git a/dfetch/project/svnsubproject.py b/dfetch/project/svnsubproject.py index e44ac070f..50f5760e9 100644 --- a/dfetch/project/svnsubproject.py +++ b/dfetch/project/svnsubproject.py @@ -9,6 +9,7 @@ from dfetch.manifest.version import Version from dfetch.project.metadata import Dependency from dfetch.project.subproject import SubProject +from dfetch.util.cmdline import SubprocessCommandError from dfetch.util.license import is_license_file from dfetch.util.util import ( find_matching_files, @@ -159,8 +160,16 @@ def _fetch_impl(self, version: Version) -> tuple[Version, list[Dependency]]: def _fetch_externals(self, complete_path: str, revision: str) -> list[Dependency]: """Detect and log SVN externals that were exported with the project.""" + try: + externals = SvnRepo.externals_from_url(complete_path, revision) + except SubprocessCommandError: + logger.warning( + "Could not retrieve SVN externals from '%s' — skipping report.", + complete_path, + ) + return [] vcs_deps = [] - for external in SvnRepo.externals_from_url(complete_path, revision): + for external in externals: path_display = "./" + external.path.lstrip("./") display_branch = external.branch or SvnRepo.DEFAULT_BRANCH self._log_project( diff --git a/dfetch/util/cmdline.py b/dfetch/util/cmdline.py index b9a11a586..cdc1f9068 100644 --- a/dfetch/util/cmdline.py +++ b/dfetch/util/cmdline.py @@ -40,12 +40,22 @@ def run_on_cmdline( logger: logging.Logger, cmd: list[str], env: Mapping[str, str] | None = None, + timeout: float | None = None, ) -> "subprocess.CompletedProcess[Any]": """Run a command and log the output, and raise if something goes wrong.""" logger.debug(f"Running {cmd}") try: - proc = subprocess.run(cmd, env=env, capture_output=True, check=True) # nosec + proc = subprocess.run( + cmd, env=env, capture_output=True, check=True, timeout=timeout + ) # nosec + except subprocess.TimeoutExpired as exc: + raise SubprocessCommandError( + list(exc.cmd) if isinstance(exc.cmd, (list, tuple)) else [str(exc.cmd)], + "", + f"Command timed out after {exc.timeout:.0f}s", + 124, + ) from exc except subprocess.CalledProcessError as exc: raise SubprocessCommandError( exc.cmd, diff --git a/dfetch/vcs/svn.py b/dfetch/vcs/svn.py index e6c44d1c0..cb4a6b58d 100644 --- a/dfetch/vcs/svn.py +++ b/dfetch/vcs/svn.py @@ -1,12 +1,14 @@ """Svn repository.""" import contextlib +import functools import os import pathlib import re from collections.abc import Callable, Generator, Sequence from pathlib import Path from typing import NamedTuple +from urllib.parse import urlparse from dfetch.log import get_logger from dfetch.util.cmdline import SubprocessCommandError, run_on_cmdline @@ -15,6 +17,46 @@ logger = get_logger(__name__) +_SSH_HOST_KEY_MSGS = ("host key verification failed", "authenticity of host") +_EXTERNALS_QUERY_TIMEOUT = 30 + + +# As a cli tool, we can safely assume this remains stable during the runtime, caching for speed is better +@functools.lru_cache +def _extend_env_for_non_interactive_mode() -> dict[str, str]: + """Extend the environment vars for svn running in non-interactive mode.""" + env = os.environ.copy() + ssh_cmd = env.get("SVN_SSH", "ssh") + if "BatchMode=" not in ssh_cmd: + ssh_cmd += " -o BatchMode=yes" + else: + logger.debug('BatchMode already configured in SVN_SSH: "%s"', ssh_cmd) + env["SVN_SSH"] = ssh_cmd + return env + + +def _ssh_target_from_url(url: str) -> str: + """Return the ``[user@]host`` portion of a svn+ssh URL, or the URL itself.""" + parsed = urlparse(url) + host = parsed.hostname or url + return f"{parsed.username}@{host}" if parsed.username else host + + +def _raise_if_ssh_host_key_error(url: str, exc: SubprocessCommandError) -> None: + """Raise a helpful RuntimeError if *exc* looks like an SSH host-key failure.""" + stderr_lower = exc.stderr.lower() + if any(msg in stderr_lower for msg in _SSH_HOST_KEY_MSGS): + parsed = urlparse(url) + host_only = parsed.hostname or url + target = _ssh_target_from_url(url) + raise RuntimeError( + f"SSH host key verification failed while connecting to '{url}'.\n" + "Add the host to your known hosts file, for example by running:\n" + f" ssh-keyscan {host_only} >> ~/.ssh/known_hosts\n" + "Or test the SSH connection manually:\n" + f" ssh -T {target}" + ) from exc + def get_svn_version() -> tuple[str, str]: """Get the name and version of svn.""" @@ -49,9 +91,14 @@ def __init__(self, remote: str) -> None: def is_svn(self) -> bool: """Check if is SVN.""" try: - run_on_cmdline(logger, ["svn", "info", self._remote, "--non-interactive"]) + run_on_cmdline( + logger, + ["svn", "info", self._remote, "--non-interactive"], + env=_extend_env_for_non_interactive_mode(), + ) return True except SubprocessCommandError as exc: + _raise_if_ssh_host_key_error(self._remote, exc) if exc.stderr.startswith("svn: E170013"): raise RuntimeError( f">>>{exc.cmd}<<< failed!\n" @@ -67,20 +114,30 @@ def list_of_branches(self) -> list[str]: result = run_on_cmdline( logger, ["svn", "ls", "--non-interactive", f"{self._remote}/branches"], + env=_extend_env_for_non_interactive_mode(), ) return [ line.strip("/\r") for line in result.stdout.decode().splitlines() if line.strip("/\r") ] - except (SubprocessCommandError, RuntimeError): + except SubprocessCommandError as exc: + _raise_if_ssh_host_key_error(self._remote, exc) + return [] + except RuntimeError: return [] def list_of_tags(self) -> list[str]: """Get list of all available tags.""" - result = run_on_cmdline( - logger, ["svn", "ls", "--non-interactive", f"{self._remote}/tags"] - ) + try: + result = run_on_cmdline( + logger, + ["svn", "ls", "--non-interactive", f"{self._remote}/tags"], + env=_extend_env_for_non_interactive_mode(), + ) + except SubprocessCommandError as exc: + _raise_if_ssh_host_key_error(self._remote, exc) + return [] return [ str(tag).strip("/\r") for tag in result.stdout.decode().split("\n") if tag ] @@ -116,7 +173,9 @@ def ls_tree(self, url_path: str) -> list[tuple[str, bool]]: """List immediate children of *url_path* as ``(name, is_dir)`` pairs.""" try: result = run_on_cmdline( - logger, ["svn", "ls", "--non-interactive", url_path] + logger, + ["svn", "ls", "--non-interactive", url_path], + env=_extend_env_for_non_interactive_mode(), ) entries: list[tuple[str, bool]] = [] for line in result.stdout.decode().splitlines(): @@ -126,7 +185,10 @@ def ls_tree(self, url_path: str) -> list[tuple[str, bool]]: is_dir = line.endswith("/") entries.append((line.rstrip("/"), is_dir)) return entries - except (SubprocessCommandError, RuntimeError): + except SubprocessCommandError as exc: + _raise_if_ssh_host_key_error(url_path, exc) + return [] + except RuntimeError: return [] @@ -146,7 +208,11 @@ def is_svn(self) -> bool: """Check if is SVN.""" try: with in_directory(self._path): - run_on_cmdline(logger, ["svn", "info", "--non-interactive"]) + run_on_cmdline( + logger, + ["svn", "info", "--non-interactive"], + env=_extend_env_for_non_interactive_mode(), + ) return True except (SubprocessCommandError, RuntimeError): return False @@ -163,6 +229,7 @@ def externals(self) -> list[External]: "svn:externals", "-R", ], + env=_extend_env_for_non_interactive_mode(), ) repo_root = SvnRepo.get_info_from_target()["Repository Root"] return SvnRepo._parse_externals( @@ -172,11 +239,23 @@ def externals(self) -> list[External]: @staticmethod def externals_from_url(url: str, revision: str = "") -> list[External]: """Get list of externals from a remote SVN URL.""" - cmd = ["svn", "--non-interactive", "propget", "svn:externals", "-R"] + cmd = [ + "svn", + "--non-interactive", + "propget", + "svn:externals", + "--depth", + "immediates", + ] if revision: cmd += ["--revision", revision] cmd += [url] - result = run_on_cmdline(logger, cmd) + result = run_on_cmdline( + logger, + cmd, + env=_extend_env_for_non_interactive_mode(), + timeout=_EXTERNALS_QUERY_TIMEOUT, + ) repo_root = SvnRepo.get_info_from_target(url)["Repository Root"] normalized = SvnRepo._normalize_url_prefix(result.stdout.decode(), url) return SvnRepo._parse_externals(normalized, repo_root) @@ -292,9 +371,12 @@ def get_info_from_target(target: str = "") -> dict[str, str]: """Get the info of the given target.""" try: result = run_on_cmdline( - logger, ["svn", "info", "--non-interactive", target.strip()] + logger, + ["svn", "info", "--non-interactive", target.strip()], + env=_extend_env_for_non_interactive_mode(), ).stdout.decode() except SubprocessCommandError as exc: + _raise_if_ssh_host_key_error(target, exc) if exc.stderr.startswith("svn: E170013"): raise RuntimeError( f">>>{exc.cmd}<<< failed!\n" @@ -335,6 +417,7 @@ def get_last_changed_revision(target: str | Path) -> str: "last-changed-revision", target_str, ], + env=_extend_env_for_non_interactive_mode(), ) .stdout.decode() .strip() @@ -382,6 +465,7 @@ def export(url: str, rev: str = "", dst: str = ".") -> None: ["svn", "export", "--non-interactive", "--force"] + (["--revision", rev] if rev else []) + [url, dst], + env=_extend_env_for_non_interactive_mode(), ) @staticmethod @@ -390,7 +474,9 @@ def files_in_path(url_path: str) -> list[str]: return [ str(line) for line in run_on_cmdline( - logger, ["svn", "list", "--non-interactive", url_path] + logger, + ["svn", "list", "--non-interactive", url_path], + env=_extend_env_for_non_interactive_mode(), ) .stdout.decode() .splitlines() @@ -452,7 +538,9 @@ def create_diff( ) with in_directory(self._path): - patch_text = run_on_cmdline(logger, cmd).stdout + patch_text = run_on_cmdline( + logger, cmd, env=_extend_env_for_non_interactive_mode() + ).stdout if not patch_text.strip(): return Patch.empty().convert_type(PatchType.SVN) @@ -471,6 +559,7 @@ def get_username(self) -> str: "author", self._path, ], + env=_extend_env_for_non_interactive_mode(), ) return str(result.stdout.decode().strip()) except SubprocessCommandError: diff --git a/tests/test_cmdline.py b/tests/test_cmdline.py index 211eb24d9..edecdba0f 100644 --- a/tests/test_cmdline.py +++ b/tests/test_cmdline.py @@ -44,3 +44,13 @@ def test_run_on_cmdline(name, cmd, cmd_result, expectation): else: with pytest.raises(expectation): run_on_cmdline(logger_mock, cmd) + + +def test_run_on_cmdline_timeout_raises_subprocess_command_error(): + """A subprocess that exceeds its timeout must raise SubprocessCommandError.""" + with patch("dfetch.util.cmdline.subprocess.run") as subprocess_mock: + subprocess_mock.side_effect = subprocess.TimeoutExpired( + cmd=["svn", "propget"], timeout=30 + ) + with pytest.raises(SubprocessCommandError, match="timed out"): + run_on_cmdline(MagicMock(), ["svn", "propget"], timeout=30) diff --git a/tests/test_svn.py b/tests/test_svn.py index 30190a7ad..8b2b8f313 100644 --- a/tests/test_svn.py +++ b/tests/test_svn.py @@ -432,6 +432,29 @@ def test_normalize_url_prefix(name, output, base_url, expected): # --------------------------------------------------------------------------- +def test_externals_from_url_uses_immediates_depth(): + """externals_from_url must use --depth immediates, not -R. + + Full recursion (-R) hangs on large remote repos; one level deep covers the + common case (externals defined at root or direct children) without an + unbounded number of round-trips. + """ + with ( + patch("dfetch.vcs.svn.run_on_cmdline") as mock_run, + patch("dfetch.vcs.svn.SvnRepo.get_info_from_target") as mock_info, + ): + mock_run.return_value.stdout = b"" + mock_info.return_value = {"Repository Root": REPO_ROOT} + + SvnRepo.externals_from_url(REPO_ROOT + "/trunk") + + cmd = mock_run.call_args[0][1] + assert "-R" not in cmd + assert "--recursive" not in cmd + assert "--depth" in cmd + assert "immediates" in cmd + + def test_externals_from_url_omits_revision_when_not_given(): with ( patch("dfetch.vcs.svn.run_on_cmdline") as mock_run, @@ -520,3 +543,163 @@ def test_externals_from_url_nonstd_layout_branch_is_space(): assert result[0].url == nonstd_url assert result[0].revision == "" assert result[0].path == "Database" + + +@pytest.mark.parametrize( + "method,url", + [ + ("is_svn", "svn+ssh://svn.code.sf.net/project"), + ("list_of_tags", "svn+ssh://svn.code.sf.net/project"), + ("list_of_branches", "svn+ssh://svn.code.sf.net/project"), + ], +) +def test_svn_remote_raises_hint_on_ssh_host_key_failure(method, url): + stderr = "Host key verification failed." + with patch("dfetch.vcs.svn.run_on_cmdline") as mock_run: + mock_run.side_effect = SubprocessCommandError(["svn"], "", stderr, 1) + + with pytest.raises(RuntimeError, match="known hosts"): + getattr(SvnRemote(url), method)() + + +def test_get_info_from_target_raises_hint_on_ssh_host_key_failure(): + stderr = "Host key verification failed." + with patch("dfetch.vcs.svn.run_on_cmdline") as mock_run: + mock_run.side_effect = SubprocessCommandError(["svn", "info"], "", stderr, 1) + + with pytest.raises(RuntimeError, match="known hosts"): + SvnRepo.get_info_from_target("svn+ssh://svn.code.sf.net/project") + + +def test_ssh_hint_includes_hostname(): + stderr = "Host key verification failed." + with patch("dfetch.vcs.svn.run_on_cmdline") as mock_run: + mock_run.side_effect = SubprocessCommandError(["svn"], "", stderr, 1) + + with pytest.raises(RuntimeError, match="svn.code.sf.net"): + SvnRemote("svn+ssh://svn.code.sf.net/project").is_svn() + + +def test_ssh_hint_includes_user_when_present_in_url(): + stderr = "Host key verification failed." + with patch("dfetch.vcs.svn.run_on_cmdline") as mock_run: + mock_run.side_effect = SubprocessCommandError(["svn"], "", stderr, 1) + + with pytest.raises(RuntimeError, match="myuser@svn.code.sf.net"): + SvnRemote("svn+ssh://myuser@svn.code.sf.net/project").is_svn() + + +def test_svn_ssh_env_has_batch_mode(): + from dfetch.vcs.svn import _extend_env_for_non_interactive_mode + + _extend_env_for_non_interactive_mode.cache_clear() + env = _extend_env_for_non_interactive_mode() + assert "BatchMode=yes" in env["SVN_SSH"] + + +def test_svn_ssh_env_preserves_existing_batch_mode(monkeypatch): + from dfetch.vcs.svn import _extend_env_for_non_interactive_mode + + monkeypatch.setenv("SVN_SSH", "ssh -o BatchMode=yes -i /my/key") + _extend_env_for_non_interactive_mode.cache_clear() + env = _extend_env_for_non_interactive_mode() + assert env["SVN_SSH"].count("BatchMode=yes") == 1 + + +def test_authenticity_of_host_message_triggers_ssh_hint(): + """Second _SSH_HOST_KEY_MSGS variant must also surface the hint.""" + stderr = ( + "The authenticity of host 'svn.code.sf.net (192.0.2.1)' can't be established." + ) + with patch("dfetch.vcs.svn.run_on_cmdline") as mock_run: + mock_run.side_effect = SubprocessCommandError(["svn"], "", stderr, 1) + + with pytest.raises(RuntimeError, match="known hosts"): + SvnRemote("svn+ssh://svn.code.sf.net/project").is_svn() + + +def test_non_ssh_error_does_not_trigger_ssh_hint_for_list_of_branches(): + """A plain SubprocessCommandError must return [] without raising the SSH hint.""" + stderr = "svn: E170013: Unable to connect to a repository at URL" + with patch("dfetch.vcs.svn.run_on_cmdline") as mock_run: + mock_run.side_effect = SubprocessCommandError(["svn"], "", stderr, 1) + + result = SvnRemote("svn+ssh://svn.code.sf.net/project").list_of_branches() + assert result == [] + + +def test_non_ssh_error_does_not_trigger_ssh_hint_for_is_svn(): + """A generic SubprocessCommandError must return False without the SSH hint.""" + stderr = "svn: E200009: Some other svn error" + with patch("dfetch.vcs.svn.run_on_cmdline") as mock_run: + mock_run.side_effect = SubprocessCommandError(["svn"], "", stderr, 1) + + assert SvnRemote("svn+ssh://svn.code.sf.net/project").is_svn() is False + + +def test_list_of_tags_returns_empty_on_non_ssh_error(): + """list_of_tags must match list_of_branches: return [] for non-SSH failures.""" + stderr = "svn: E200009: Could not list all targets because some targets don't exist" + with patch("dfetch.vcs.svn.run_on_cmdline") as mock_run: + mock_run.side_effect = SubprocessCommandError(["svn"], "", stderr, 1) + + result = SvnRemote("svn+ssh://svn.code.sf.net/project").list_of_tags() + assert result == [] + + +def test_svn_repo_is_svn_passes_batch_mode_env(): + """SvnRepo.is_svn() must inject BatchMode=yes so svn+ssh:// WCs don't hang.""" + from dfetch.vcs.svn import _extend_env_for_non_interactive_mode + + _extend_env_for_non_interactive_mode.cache_clear() + with ( + patch("dfetch.vcs.svn.run_on_cmdline") as mock_run, + patch("dfetch.vcs.svn.in_directory"), + ): + mock_run.return_value.stdout = b"" + SvnRepo(".").is_svn() + assert "BatchMode=yes" in mock_run.call_args.kwargs["env"]["SVN_SSH"] + + +def test_svn_repo_externals_passes_batch_mode_env(): + """SvnRepo.externals() must inject BatchMode=yes for the propget call.""" + from dfetch.vcs.svn import _extend_env_for_non_interactive_mode + + _extend_env_for_non_interactive_mode.cache_clear() + with ( + patch("dfetch.vcs.svn.run_on_cmdline") as mock_run, + patch("dfetch.vcs.svn.in_directory"), + patch.object( + SvnRepo, + "get_info_from_target", + return_value={"Repository Root": "svn+ssh://h/r"}, + ), + ): + mock_run.return_value.stdout = b"" + SvnRepo(".").externals() + assert "BatchMode=yes" in mock_run.call_args_list[0].kwargs["env"]["SVN_SSH"] + + +def test_svn_repo_create_diff_passes_batch_mode_env(): + """SvnRepo.create_diff() must inject BatchMode=yes when contacting the server.""" + from dfetch.vcs.svn import _extend_env_for_non_interactive_mode + + _extend_env_for_non_interactive_mode.cache_clear() + with ( + patch("dfetch.vcs.svn.run_on_cmdline") as mock_run, + patch("dfetch.vcs.svn.in_directory"), + ): + mock_run.return_value.stdout = b"" + SvnRepo(".").create_diff(old_revision="42", new_revision=None, ignore=[]) + assert "BatchMode=yes" in mock_run.call_args.kwargs["env"]["SVN_SSH"] + + +def test_svn_repo_get_username_passes_batch_mode_env(): + """SvnRepo.get_username() must inject BatchMode=yes so it doesn't hang.""" + from dfetch.vcs.svn import _extend_env_for_non_interactive_mode + + _extend_env_for_non_interactive_mode.cache_clear() + with patch("dfetch.vcs.svn.run_on_cmdline") as mock_run: + mock_run.return_value.stdout = b"alice" + SvnRepo(".").get_username() + assert "BatchMode=yes" in mock_run.call_args.kwargs["env"]["SVN_SSH"] diff --git a/tests/test_svnsubproject.py b/tests/test_svnsubproject.py index 87ca45cc5..b18f140a2 100644 --- a/tests/test_svnsubproject.py +++ b/tests/test_svnsubproject.py @@ -100,6 +100,21 @@ def test_fetch_externals_preserves_tag_in_dependency(): assert result[0]["source_type"] == "svn-external" +def test_fetch_externals_returns_empty_on_subprocess_error(): + """A slow/unresponsive SVN server must not abort the update — return [] with a warning.""" + from dfetch.util.cmdline import SubprocessCommandError + + with patch("dfetch.project.svnsubproject.SvnRepo.externals_from_url") as mock_ext: + mock_ext.side_effect = SubprocessCommandError( + ["svn"], "", "Command timed out after 30s", 124 + ) + subproject = SvnSubProject( + ProjectEntry({"name": "myproject", "url": REPO_ROOT}) + ) + result = subproject._fetch_externals(REPO_ROOT + "/trunk", "42") + assert result == [] + + def test_fetch_externals_nonstd_layout_preserves_space_branch(): # A non-standard-layout external (URL contains no /trunk/, /branches/, or # /tags/) is represented with branch=" " by _split_url. That sentinel must