Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ Release 0.14.0 (unreleased)
* Fix arbitrary file write via malicious tar/zip symlink (#1152)
* Prevent SSH command injection (#1152)
* Allow manifests with no ``projects`` key so ``dfetch add`` can bootstrap empty manifest (#1197)
* Run ``svn+ssh://`` connections in non-interactive mode to prevent hanging (#0)
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor | ⚡ Quick win

Update placeholder issue reference.

The changelog entry references (#0), which appears to be a placeholder. Based on the PR objectives, this should reference issue or PR #1230.

📝 Proposed fix
-* Run ``svn+ssh://`` connections in non-interactive mode to prevent hanging (`#0`)
+* Run ``svn+ssh://`` connections in non-interactive mode to prevent hanging (`#1230`)
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
* Run ``svn+ssh://`` connections in non-interactive mode to prevent hanging (#0)
* Run ``svn+ssh://`` connections in non-interactive mode to prevent hanging (`#1230`)
🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@CHANGELOG.rst` at line 23, Update the CHANGELOG line that currently reads "*
Run ``svn+ssh://`` connections in non-interactive mode to prevent hanging (`#0`)"
to reference the correct issue/PR number; replace the placeholder "(`#0`)" with
"(`#1230`)" so the entry reads "...prevent hanging (`#1230`)". Ensure only the
numeric reference is changed and formatting (backticks and punctuation) is
preserved.


Release 0.13.0 (released 2026-03-30)
====================================
Expand Down
86 changes: 76 additions & 10 deletions dfetch/vcs/svn.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
"""Svn repository."""

import contextlib
import functools
import os
import pathlib
import re
from collections.abc import Callable, Generator, Sequence
from pathlib import Path
from typing import NamedTuple
from urllib.parse import urlparse

from dfetch.log import get_logger
from dfetch.util.cmdline import SubprocessCommandError, run_on_cmdline
Expand All @@ -15,6 +17,43 @@

logger = get_logger(__name__)

_SSH_HOST_KEY_MSGS = ("host key verification failed", "authenticity of host")


# As a cli tool, we can safely assume this remains stable during the runtime, caching for speed is better
@functools.lru_cache
def _extend_env_for_non_interactive_mode() -> dict[str, str]:
"""Extend the environment vars for svn running in non-interactive mode."""
env = os.environ.copy()
ssh_cmd = env.get("SVN_SSH", "ssh")
if "BatchMode=" not in ssh_cmd:
ssh_cmd += " -o BatchMode=yes"
else:
logger.debug('BatchMode already configured in SVN_SSH: "%s"', ssh_cmd)
env["SVN_SSH"] = ssh_cmd
return env


def _ssh_target_from_url(url: str) -> str:
"""Return the ``[user@]host`` portion of a svn+ssh URL, or the URL itself."""
parsed = urlparse(url)
host = parsed.hostname or url
return f"{parsed.username}@{host}" if parsed.username else host


def _raise_if_ssh_host_key_error(url: str, exc: SubprocessCommandError) -> None:
"""Raise a helpful RuntimeError if *exc* looks like an SSH host-key failure."""
stderr_lower = exc.stderr.lower()
if any(msg in stderr_lower for msg in _SSH_HOST_KEY_MSGS):
target = _ssh_target_from_url(url)
raise RuntimeError(
f"SSH host key verification failed while connecting to '{url}'.\n"
"Add the host to your known hosts file, for example by running:\n"
f" ssh-keyscan {target} >> ~/.ssh/known_hosts\n"
"Or test the SSH connection manually:\n"
f" ssh -T {target}"
) from exc
Comment on lines +37 to +55
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor | ⚡ Quick win

🧩 Analysis chain

🌐 Web query:

Does ssh-keyscan accept targets in user@host form, or only hostnames/IP addresses?

💡 Result:

The ssh-keyscan utility does not accept targets in the user@host format [1][2][3]. It is designed specifically to gather the public SSH host keys of servers, and its arguments expect only hostnames, IP addresses, or CIDR network ranges [1][4][5]. According to the official documentation, the synopsis for ssh-keyscan is [1][3]: ssh-keyscan [-46cDHqv] [-f file] [-O option] [-p port] [-T timeout] [-t type] [host | addrlist namelist] If you provide a string in the user@host format, ssh-keyscan will attempt to resolve that entire string as a hostname, which will fail if the string is not a valid, resolvable host identifier [1][4]. The tool's purpose is to facilitate the creation and verification of ssh_known_hosts files by connecting to the server's public key infrastructure, not to authenticate as a specific user [5][6]. If you need to manage keys for a target accessed as user@host, you should pass only the host portion (the hostname or IP address) to ssh-keyscan [1][7]. Top results: [1][4][3]

Citations:


🏁 Script executed:

#!/bin/bash
set -euo pipefail
ls -la
sed -n '1,220p' dfetch/vcs/svn.py | nl -ba | sed -n '1,220p'

Repository: dfetch-org/dfetch

Length of output: 1911


🏁 Script executed:

#!/bin/bash
set -euo pipefail
nl -ba dfetch/vcs/svn.py | sed -n '1,140p'

Repository: dfetch-org/dfetch

Length of output: 104


🏁 Script executed:

#!/bin/bash
set -euo pipefail
nl -ba dfetch/vcs/svn.py | sed -n '25,90p'

Repository: dfetch-org/dfetch

Length of output: 104


🏁 Script executed:

#!/bin/bash
set -euo pipefail
nl -ba dfetch/vcs/svn.py | sed -n '25,90p'

Repository: dfetch-org/dfetch

Length of output: 104


🏁 Script executed:

#!/bin/bash
set -euo pipefail
nl -ba dfetch/vcs/svn.py | sed -n '25,90p'

Repository: dfetch-org/dfetch

Length of output: 104


🏁 Script executed:

#!/bin/bash
set -euo pipefail
nl -ba dfetch/vcs/svn.py | sed -n '90,190p'

Repository: dfetch-org/dfetch

Length of output: 104


🏁 Script executed:

#!/bin/bash
set -euo pipefail
nl -ba dfetch/vcs/svn.py | sed -n '30,80p'

Repository: dfetch-org/dfetch

Length of output: 104


🏁 Script executed:

#!/bin/bash
set -euo pipefail

wc -l dfetch/vcs/svn.py
echo "----"
cat -n dfetch/vcs/svn.py | sed -n '1,120p'
echo "----"
cat -n dfetch/vcs/svn.py | sed -n '120,220p'

Repository: dfetch-org/dfetch

Length of output: 9233


Fix SSH host-key remediation guidance to use host-only for ssh-keyscan
dfetch/vcs/svn.py formats target as user@host and then uses it in ssh-keyscan {target}; ssh-keyscan expects a hostname/IP/CIDR, so including user@ can make the suggested command fail. Use parsed.hostname for ssh-keyscan, and keep user@host only for the ssh -T example.

🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@dfetch/vcs/svn.py` around lines 37 - 55, The remediation message currently
uses the output of _ssh_target_from_url (which returns "user@host") for both
commands; change _raise_if_ssh_host_key_error to parse the URL (urlparse(url))
locally, build host_only = parsed.hostname or url for the ssh-keyscan
suggestion, and keep target = _ssh_target_from_url(url) (user@host) for the ssh
-T suggestion so the ssh-keyscan line uses only the hostname/IP while the ssh -T
line still shows user@host.



def get_svn_version() -> tuple[str, str]:
"""Get the name and version of svn."""
Expand Down Expand Up @@ -49,9 +88,14 @@ def __init__(self, remote: str) -> None:
def is_svn(self) -> bool:
"""Check if is SVN."""
try:
run_on_cmdline(logger, ["svn", "info", self._remote, "--non-interactive"])
run_on_cmdline(
logger,
["svn", "info", self._remote, "--non-interactive"],
env=_extend_env_for_non_interactive_mode(),
)
return True
except SubprocessCommandError as exc:
_raise_if_ssh_host_key_error(self._remote, exc)
if exc.stderr.startswith("svn: E170013"):
raise RuntimeError(
f">>>{exc.cmd}<<< failed!\n"
Expand All @@ -67,20 +111,30 @@ def list_of_branches(self) -> list[str]:
result = run_on_cmdline(
logger,
["svn", "ls", "--non-interactive", f"{self._remote}/branches"],
env=_extend_env_for_non_interactive_mode(),
)
return [
line.strip("/\r")
for line in result.stdout.decode().splitlines()
if line.strip("/\r")
]
except (SubprocessCommandError, RuntimeError):
except SubprocessCommandError as exc:
_raise_if_ssh_host_key_error(self._remote, exc)
return []
except RuntimeError:
return []

def list_of_tags(self) -> list[str]:
"""Get list of all available tags."""
result = run_on_cmdline(
logger, ["svn", "ls", "--non-interactive", f"{self._remote}/tags"]
)
try:
result = run_on_cmdline(
logger,
["svn", "ls", "--non-interactive", f"{self._remote}/tags"],
env=_extend_env_for_non_interactive_mode(),
)
except SubprocessCommandError as exc:
_raise_if_ssh_host_key_error(self._remote, exc)
raise
return [
str(tag).strip("/\r") for tag in result.stdout.decode().split("\n") if tag
]
Expand Down Expand Up @@ -116,7 +170,9 @@ def ls_tree(self, url_path: str) -> list[tuple[str, bool]]:
"""List immediate children of *url_path* as ``(name, is_dir)`` pairs."""
try:
result = run_on_cmdline(
logger, ["svn", "ls", "--non-interactive", url_path]
logger,
["svn", "ls", "--non-interactive", url_path],
env=_extend_env_for_non_interactive_mode(),
)
entries: list[tuple[str, bool]] = []
for line in result.stdout.decode().splitlines():
Expand All @@ -126,7 +182,10 @@ def ls_tree(self, url_path: str) -> list[tuple[str, bool]]:
is_dir = line.endswith("/")
entries.append((line.rstrip("/"), is_dir))
return entries
except (SubprocessCommandError, RuntimeError):
except SubprocessCommandError as exc:
_raise_if_ssh_host_key_error(url_path, exc)
return []
except RuntimeError:
return []


Expand Down Expand Up @@ -176,7 +235,7 @@ def externals_from_url(url: str, revision: str = "") -> list[External]:
if revision:
cmd += ["--revision", revision]
cmd += [url]
result = run_on_cmdline(logger, cmd)
result = run_on_cmdline(logger, cmd, env=_extend_env_for_non_interactive_mode())
repo_root = SvnRepo.get_info_from_target(url)["Repository Root"]
normalized = SvnRepo._normalize_url_prefix(result.stdout.decode(), url)
return SvnRepo._parse_externals(normalized, repo_root)
Expand Down Expand Up @@ -292,9 +351,12 @@ def get_info_from_target(target: str = "") -> dict[str, str]:
"""Get the info of the given target."""
try:
result = run_on_cmdline(
logger, ["svn", "info", "--non-interactive", target.strip()]
logger,
["svn", "info", "--non-interactive", target.strip()],
env=_extend_env_for_non_interactive_mode(),
).stdout.decode()
except SubprocessCommandError as exc:
_raise_if_ssh_host_key_error(target, exc)
if exc.stderr.startswith("svn: E170013"):
raise RuntimeError(
f">>>{exc.cmd}<<< failed!\n"
Expand Down Expand Up @@ -335,6 +397,7 @@ def get_last_changed_revision(target: str | Path) -> str:
"last-changed-revision",
target_str,
],
env=_extend_env_for_non_interactive_mode(),
)
.stdout.decode()
.strip()
Expand Down Expand Up @@ -382,6 +445,7 @@ def export(url: str, rev: str = "", dst: str = ".") -> None:
["svn", "export", "--non-interactive", "--force"]
+ (["--revision", rev] if rev else [])
+ [url, dst],
env=_extend_env_for_non_interactive_mode(),
)

@staticmethod
Expand All @@ -390,7 +454,9 @@ def files_in_path(url_path: str) -> list[str]:
return [
str(line)
for line in run_on_cmdline(
logger, ["svn", "list", "--non-interactive", url_path]
logger,
["svn", "list", "--non-interactive", url_path],
env=_extend_env_for_non_interactive_mode(),
)
.stdout.decode()
.splitlines()
Expand Down
2 changes: 1 addition & 1 deletion example/dfetch.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ manifest:
default: true # Set it as default

- name: sourceforge
url-base: svn://svn.code.sf.net/p/
url-base: svn+ssh://svn.code.sf.net/p/
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🧹 Nitpick | 🔵 Trivial | 💤 Low value

🧩 Analysis chain

🏁 Script executed:

#!/bin/bash
# Description: Verify the example SVN+SSH URL is accessible (read-only check)
# Note: This will fail if SSH keys are not configured for svn.code.sf.net

svn info --non-interactive svn+ssh://svn.code.sf.net/p/tortoisesvn/code 2>&1 | head -20

Repository: dfetch-org/dfetch

Length of output: 103


🏁 Script executed:

#!/bin/bash
set -euo pipefail
# Inspect the referenced example config to confirm the final URL and any dependent references
sed -n '1,120p' example/dfetch.yaml | cat -n

Repository: dfetch-org/dfetch

Length of output: 2527


🌐 Web query:

SourceForge svn.code.sf.net svn+ssh:// authentication SSH key non-interactive svn clients

💡 Result:

To achieve non-interactive authentication for svn+ssh on SourceForge, you must rely on SSH key-based authentication rather than Subversion-level credential passing [1][2][3]. Because svn+ssh uses an external SSH client to tunnel the connection, standard Subversion --username/--password flags do not pass credentials to the underlying SSH process [4][5]. Follow these steps to configure your environment for non-interactive access: 1. Register your SSH public key with your SourceForge account via the Account SSH page (accessible under your User Account settings) [2][3][6]. 2. Ensure your local SSH client is configured to use this key automatically [2][3]. - On Linux/macOS (OpenSSH), verify your key is added to your ssh-agent [6]. You can also use ~/.ssh/config to explicitly map the host svn.code.sf.net to your identity file: Host svn.code.sf.net IdentityFile ~/.ssh/your_private_key IdentitiesOnly yes - On Windows (PuTTY/Plink), load your private key (.ppk) into Pageant [2][3]. Alternatively, you can point your Subversion configuration to Plink using the SVN_SSH environment variable [2][3]: set SVN_SSH="C:\path\to\plink.exe -batch -i C:\path\to\your\key.ppk" 3. Test your configuration with a simple command like svn info svn+ssh://username@svn.code.sf.net/p/PROJECTNAME/code. If configured correctly, the SSH client will authenticate using your key without requiring a passphrase or interactive prompt [2][3]. Important Considerations: - Protocol Limitations: The Subversion --non-interactive flag only instructs the SVN client to suppress its own prompts; it cannot prevent the underlying SSH client from prompting for a passphrase if your key is encrypted or if no agent is available [4][5][7]. - Security: Avoid using tools like sshpass or passing credentials as plain text in scripts [8][4]. Using an ssh-agent or an unencrypted key (protected by filesystem permissions) is the standard, secure approach for automation [9][10]. - Host Verification: When connecting for the first time, you must accept the server's host key so that subsequent non-interactive connections do not fail due to host verification prompts [2][3][6][11].

Citations:


🏁 Script executed:

#!/bin/bash
set -euo pipefail
rg -n "svn\+ssh|sourceforge|tortoisesvn|SSH key|SSH keys|svn\.code\.sf\.net" -S . || true

Repository: dfetch-org/dfetch

Length of output: 5102


🏁 Script executed:

#!/bin/bash
set -euo pipefail

# Show the README section mentioning svn:// vs svn+ssh, plus the changelog entry.
sed -n '60,130p' README.md | cat -n
echo "----"
sed -n '1,80p' CHANGELOG.rst | cat -n

# Show the specific example YAML lines already referenced.
sed -n '1,60p' example/dfetch.yaml | cat -n

# Inspect the svn+ssh-related tests referenced by rg output.
sed -n '480,620p' tests/test_svn.py | cat -n

Repository: dfetch-org/dfetch

Length of output: 14793


🏁 Script executed:

#!/bin/bash
set -euo pipefail
# Also inspect the purl/security references around svn+ssh to understand stated intent.
sed -n '330,430p' security/tm_usage.py | cat -n
sed -n '1,120p' dfetch/vcs/svn.py | cat -n

Repository: dfetch-org/dfetch

Length of output: 10431


🏁 Script executed:

#!/bin/bash
set -euo pipefail
# Locate where manifest 'remotes' url-base + project repo-path are combined (especially for 'vcs: svn').
rg -n "url-base|repo-path|remote:|vcs:\s*svn|svn\+" dfetch example doc tests -S

Repository: dfetch-org/dfetch

Length of output: 20485


Clarify SSH key/known-host setup for the SourceForge svn+ssh example

example/dfetch.yaml switches url-base to svn+ssh://svn.code.sf.net/p/, so the tortoisesvn/code projects will be fetched via svn+ssh (SSH-key authentication, not the plaintext svn:// transport). DFetch already forces non-interactive SSH (SVN_SSH uses BatchMode=yes) and raises a clear “known hosts”/ssh-keyscan ... >> ~/.ssh/known_hosts hint on host-key verification failures—add a brief note to the example about registering an SSH key with SourceForge and accepting the host key, and do a quick smoke test that tortoisesvn/code is reachable via svn+ssh in typical environments.

🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@example/dfetch.yaml` at line 10, Update the example near the url-base entry
(url-base: svn+ssh://svn.code.sf.net/p/) to add a brief note explaining that
SourceForge requires users to register an SSH public key in their account and to
accept the host's SSH key (add guidance to add the host key to
~/.ssh/known_hosts or run ssh-keyscan for the host), and include a short
smoke-test instruction to verify the repository is reachable via svn+ssh (e.g.,
attempt an svn list/ls of the tortoisesvn/code path) so reviewers know how to
validate the ssh-key/known-hosts setup non-interactively.


projects:

Expand Down
61 changes: 61 additions & 0 deletions tests/test_svn.py
Original file line number Diff line number Diff line change
Expand Up @@ -520,3 +520,64 @@ def test_externals_from_url_nonstd_layout_branch_is_space():
assert result[0].url == nonstd_url
assert result[0].revision == ""
assert result[0].path == "Database"


@pytest.mark.parametrize(
"method,url",
[
("is_svn", "svn+ssh://svn.code.sf.net/project"),
("list_of_tags", "svn+ssh://svn.code.sf.net/project"),
("list_of_branches", "svn+ssh://svn.code.sf.net/project"),
],
)
def test_svn_remote_raises_hint_on_ssh_host_key_failure(method, url):
stderr = "Host key verification failed."
with patch("dfetch.vcs.svn.run_on_cmdline") as mock_run:
mock_run.side_effect = SubprocessCommandError(["svn"], "", stderr, 1)

with pytest.raises(RuntimeError, match="known hosts"):
getattr(SvnRemote(url), method)()


def test_get_info_from_target_raises_hint_on_ssh_host_key_failure():
stderr = "Host key verification failed."
with patch("dfetch.vcs.svn.run_on_cmdline") as mock_run:
mock_run.side_effect = SubprocessCommandError(["svn", "info"], "", stderr, 1)

with pytest.raises(RuntimeError, match="known hosts"):
SvnRepo.get_info_from_target("svn+ssh://svn.code.sf.net/project")


def test_ssh_hint_includes_hostname():
stderr = "Host key verification failed."
with patch("dfetch.vcs.svn.run_on_cmdline") as mock_run:
mock_run.side_effect = SubprocessCommandError(["svn"], "", stderr, 1)

with pytest.raises(RuntimeError, match="svn.code.sf.net"):
SvnRemote("svn+ssh://svn.code.sf.net/project").is_svn()


def test_ssh_hint_includes_user_when_present_in_url():
stderr = "Host key verification failed."
with patch("dfetch.vcs.svn.run_on_cmdline") as mock_run:
mock_run.side_effect = SubprocessCommandError(["svn"], "", stderr, 1)

with pytest.raises(RuntimeError, match="myuser@svn.code.sf.net"):
SvnRemote("svn+ssh://myuser@svn.code.sf.net/project").is_svn()


def test_svn_ssh_env_has_batch_mode():
from dfetch.vcs.svn import _extend_env_for_non_interactive_mode

_extend_env_for_non_interactive_mode.cache_clear()
env = _extend_env_for_non_interactive_mode()
assert "BatchMode=yes" in env["SVN_SSH"]


def test_svn_ssh_env_preserves_existing_batch_mode(monkeypatch):
from dfetch.vcs.svn import _extend_env_for_non_interactive_mode

monkeypatch.setenv("SVN_SSH", "ssh -o BatchMode=yes -i /my/key")
_extend_env_for_non_interactive_mode.cache_clear()
env = _extend_env_for_non_interactive_mode()
assert env["SVN_SSH"].count("BatchMode=yes") == 1
Loading