Skip to content
92 changes: 71 additions & 21 deletions src/apm_cli/deps/github_downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -1224,15 +1224,39 @@ def _download_github_file(self, dep_ref: DependencyReference, file_path: str, re
return content
# All raw attempts failed — fall through to API path which
# handles private repos, rate-limit messaging, and SAML errors.


# Try raw URL for generic hosts (Gitea, GitLab, etc.)
if host.lower() not in ("github.com",) and not host.lower().endswith(".ghe.com"):
raw_url = f"https://{host}/{owner}/{repo}/raw/{ref}/{file_path}"
raw_headers = {}
if token:
raw_headers['Authorization'] = f'token {token}'
try:
response = self._resilient_get(raw_url, headers=raw_headers, timeout=30)
if response.status_code == 200:
if verbose_callback:
verbose_callback(f"Downloaded file: {host}/{dep_ref.repo_url}/{file_path}")
return response.content
except (requests.RequestException, OSError):
pass

# --- Contents API path (authenticated, enterprise, or raw fallback) ---
# Build GitHub API URL - format differs by host type
# Build API URL candidates - format differs by host type
if host == "github.com":
api_url = f"https://api.github.com/repos/{owner}/{repo}/contents/{file_path}?ref={ref}"
api_url_candidates = [
f"https://api.github.com/repos/{owner}/{repo}/contents/{file_path}?ref={ref}"
]
elif host.lower().endswith(".ghe.com"):
api_url = f"https://api.{host}/repos/{owner}/{repo}/contents/{file_path}?ref={ref}"
api_url_candidates = [
f"https://api.{host}/repos/{owner}/{repo}/contents/{file_path}?ref={ref}"
]
else:
api_url = f"https://{host}/api/v3/repos/{owner}/{repo}/contents/{file_path}?ref={ref}"
# Generic host: negotiate Gitea/Gogs-style contents API versions.
api_url_candidates = [
f"https://{host}/api/v1/repos/{owner}/{repo}/contents/{file_path}?ref={ref}",
f"https://{host}/api/v3/repos/{owner}/{repo}/contents/{file_path}?ref={ref}",
Comment thread
ganesanviji marked this conversation as resolved.
]
api_url = api_url_candidates[0]

# Set up authentication headers
headers = {
Expand All @@ -1250,33 +1274,59 @@ def _download_github_file(self, dep_ref: DependencyReference, file_path: str, re
return response.content
except requests.exceptions.HTTPError as e:
if e.response.status_code == 404:
# Try fallback branches if the specified ref fails
# For generic hosts, try remaining API version candidates before ref fallback
for candidate_url in api_url_candidates[1:]:
try:
candidate_resp = self._resilient_get(candidate_url, headers=headers, timeout=30)
candidate_resp.raise_for_status()
if verbose_callback:
verbose_callback(f"Downloaded file: {host}/{dep_ref.repo_url}/{file_path}")
return candidate_resp.content
except requests.exceptions.HTTPError as ce:
if ce.response.status_code != 404:
raise RuntimeError(
f"Failed to download {file_path}: HTTP {ce.response.status_code}"
)
# 404 on this version too -- try next

# All API versions returned 404 -- try fallback ref
if ref not in ["main", "master"]:
# If original ref failed, don't try fallbacks - it might be a specific version
raise RuntimeError(f"File not found: {file_path} at ref '{ref}' in {dep_ref.repo_url}")

# Try the other default branch
fallback_ref = "master" if ref == "main" else "main"

# Build fallback API URL
# Build fallback URL candidates
if host == "github.com":
fallback_url = f"https://api.github.com/repos/{owner}/{repo}/contents/{file_path}?ref={fallback_ref}"
fallback_url_candidates = [
f"https://api.github.com/repos/{owner}/{repo}/contents/{file_path}?ref={fallback_ref}"
]
elif host.lower().endswith(".ghe.com"):
fallback_url = f"https://api.{host}/repos/{owner}/{repo}/contents/{file_path}?ref={fallback_ref}"
fallback_url_candidates = [
f"https://api.{host}/repos/{owner}/{repo}/contents/{file_path}?ref={fallback_ref}"
]
else:
fallback_url = f"https://{host}/api/v3/repos/{owner}/{repo}/contents/{file_path}?ref={fallback_ref}"
fallback_url_candidates = [
f"https://{host}/api/v1/repos/{owner}/{repo}/contents/{file_path}?ref={fallback_ref}",
f"https://{host}/api/v3/repos/{owner}/{repo}/contents/{file_path}?ref={fallback_ref}",
f"https://{host}/api/v4/repos/{owner}/{repo}/contents/{file_path}?ref={fallback_ref}",
Comment thread
ganesanviji marked this conversation as resolved.
]

try:
response = self._resilient_get(fallback_url, headers=headers, timeout=30)
response.raise_for_status()
if verbose_callback:
verbose_callback(f"Downloaded file: {host}/{dep_ref.repo_url}/{file_path}")
return response.content
except requests.exceptions.HTTPError:
raise RuntimeError(
f"File not found: {file_path} in {dep_ref.repo_url} "
f"(tried refs: {ref}, {fallback_ref})"
)
for fallback_url in fallback_url_candidates:
try:
response = self._resilient_get(fallback_url, headers=headers, timeout=30)
response.raise_for_status()
if verbose_callback:
verbose_callback(f"Downloaded file: {host}/{dep_ref.repo_url}/{file_path}")
return response.content
except requests.exceptions.HTTPError:
pass # Try next version or ref

raise RuntimeError(
f"File not found: {file_path} in {dep_ref.repo_url} "
f"(tried refs: {ref}, {fallback_ref})"
)
elif e.response.status_code == 401 or e.response.status_code == 403:
# Distinguish rate limiting from auth failure.
# GitHub returns 403 with X-RateLimit-Remaining: 0 when the
Expand Down
13 changes: 11 additions & 2 deletions src/apm_cli/models/dependency/reference.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
is_artifactory_path,
is_azure_devops_hostname,
is_github_hostname,
is_gitlab_hostname,
is_supported_git_host,
parse_artifactory_path,
unsupported_host_error,
Expand Down Expand Up @@ -580,10 +581,18 @@ def _detect_virtual_package(cls, dependency_str: str):
for seg in path_segments
)
has_collection = "collections" in path_segments
# GitLab supports nested groups (group/subgroup/repo), so the full
# path is the repo -- no shorthand subdirectory splitting.
# Use https://gitlab.com/group/subgroup/repo.git for GitLab nested
# groups; shorthand subdirectory syntax is not supported for GitLab.
# All other generic hosts (Gitea, Bitbucket, self-hosted, etc.) use
# the owner/repo convention, so extra segments are a virtual subdir.
Comment on lines +584 to +589
Copy link

Copilot AI Apr 9, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This PR changes how virtual packages are detected/handled for generic FQDN hosts (and introduces GitLab-specific nested-group behavior). The Starlight docs and the apm-guide usage doc currently document virtual package rules and the "dict form required when shorthand is ambiguous" note, but they don't describe the generic-host behavior being introduced here (e.g., whether subdirectory virtual packages are supported via shorthand on non-GitHub hosts, or require object form). Please update the relevant docs pages so users of Gitea/self-hosted hosts know which syntax is supported and when they must use the object form.

Copilot uses AI. Check for mistakes.
if has_virtual_ext or has_collection:
min_base_segments = 2
else:
elif is_gitlab_hostname(validated_host):
min_base_segments = len(path_segments)
else:
min_base_segments = 2
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Blocking: This sets min_base_segments = 2 for all non-GitLab generic hosts (Gitea, Bitbucket, self-hosted git). That means gitea.myorg.com/group/subgroup/repo parses as repo_url="group/subgroup" + virtual_path="repo" + is_virtual=True.

But your tests (test_three_segment_gitea_path_is_not_virtual, test_four_segment_generic_path_without_indicators_is_not_virtual) expect is_virtual=False with repo_url="group/subgroup/repo".

This is the root cause of the 4 CI failures. Either:

  • Change this to min_base_segments = len(path_segments) (all generic hosts treat full path as repo, require dict form for virtual), or
  • Update the tests to match the current 2-segment split behavior.

The Copilot bot suggested the first option -- I agree that's safer, since Gitea also supports nested orgs/groups.

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, you are correct. but when I go with this logic and try to use the below structure of gitea repo to install, I am facing the issue and installation process is not completing. So, I have added this logic to work with the below structure of gitea repo to mention in apm.yml file.

apm install gitea.host.com/group/repo/skills/create-pull-request#Skill_Feature

else:
min_base_segments = 2

Expand Down Expand Up @@ -734,7 +743,7 @@ def _parse_standard_url(
user_repo = "/".join(parts[1:])
else:
user_repo = "/".join(parts[1:3])
elif len(parts) >= 2 and "." not in parts[0]:
elif len(parts) >= 2 and ("." not in parts[0] or validated_host is not None):
if not host:
host = default_host()
if is_azure_devops_hostname(host) and len(parts) >= 3:
Expand Down
17 changes: 17 additions & 0 deletions src/apm_cli/utils/github_host.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,23 @@ def is_azure_devops_hostname(hostname: Optional[str]) -> bool:
return False


def is_gitlab_hostname(hostname: Optional[str]) -> bool:
"""Return True if hostname is GitLab (cloud or self-hosted).

GitLab supports nested groups (group/subgroup/repo), so paths with
more than two segments should be treated as repo paths, not virtual
subdirectory packages.

Accepts:
- gitlab.com
- Any hostname starting with 'gitlab.' (common self-hosted convention)
"""
if not hostname:
return False
h = hostname.lower()
return h == "gitlab.com" or h.startswith("gitlab.")


def is_github_hostname(hostname: Optional[str]) -> bool:
"""Return True if hostname should be treated as GitHub (cloud or enterprise).

Expand Down
142 changes: 142 additions & 0 deletions tests/test_github_downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -1656,5 +1656,147 @@ def test_try_raw_download_returns_content_on_200(self):
assert result == b'hello world'


# ---------------------------------------------------------------------------
# Generic host (Gitea / GitLab) download tests
# ---------------------------------------------------------------------------

def _make_resp(status_code: int, content: bytes = b"") -> Mock:
"""Build a minimal mock requests.Response."""
resp = Mock()
resp.status_code = status_code
resp.content = content
if status_code >= 400:
resp.raise_for_status = Mock(
side_effect=requests_lib.exceptions.HTTPError(response=resp)
)
else:
resp.raise_for_status = Mock()
return resp


class TestGiteaRawUrlDownload:
"""Gitea raw URL path: /{owner}/{repo}/raw/{ref}/{file}."""

def setup_method(self):
with patch.dict(os.environ, {}, clear=True), _CRED_FILL_PATCH:
self.downloader = GitHubPackageDownloader()

def test_raw_url_succeeds_on_first_attempt(self):
"""Raw URL returns 200 -- content returned without calling the API."""
dep_ref = DependencyReference.parse("gitea.myorg.com/owner/repo")
expected = b"# README content"
raw_ok = _make_resp(200, expected)

with patch.object(self.downloader, "_resilient_get", return_value=raw_ok) as mock_get:
result = self.downloader.download_raw_file(dep_ref, "README.md", "main")

assert result == expected
first_url = mock_get.call_args_list[0][0][0]
assert first_url == "https://gitea.myorg.com/owner/repo/raw/main/README.md"
assert mock_get.call_count == 1

def test_raw_url_with_token_adds_auth_header(self):
"""Token is forwarded as Authorization header in the raw URL request.

Token resolution is lazy, so the env patch must stay active for the
duration of the download call.
"""
dep_ref = DependencyReference.parse("gitea.myorg.com/owner/repo")
raw_ok = _make_resp(200, b"data")

with patch.dict(os.environ, {"GITHUB_APM_PAT": "gta-tok"}, clear=True):
with _CRED_FILL_PATCH:
downloader = GitHubPackageDownloader()
with patch.object(downloader, "_resilient_get", return_value=raw_ok) as mock_get:
downloader.download_raw_file(dep_ref, "README.md", "main")

raw_headers = mock_get.call_args_list[0][1].get("headers", {})
assert "Authorization" in raw_headers

def test_falls_back_to_api_v1_when_raw_returns_non_200(self):
"""When the raw URL returns 404, the API v1 path is tried next."""
dep_ref = DependencyReference.parse("gitea.myorg.com/owner/repo")
expected = b"file via API"

with patch.object(
self.downloader, "_resilient_get",
side_effect=[_make_resp(404), _make_resp(200, expected)]
) as mock_get:
result = self.downloader.download_raw_file(dep_ref, "README.md", "main")

assert result == expected
urls = [c[0][0] for c in mock_get.call_args_list]
assert urls[0] == "https://gitea.myorg.com/owner/repo/raw/main/README.md"
assert "/api/v1/" in urls[1]


class TestGitLabApiVersionNegotiation:
"""API version negotiation: v1 -> v3 -> v4 for generic hosts."""

def setup_method(self):
with patch.dict(os.environ, {}, clear=True), _CRED_FILL_PATCH:
self.downloader = GitHubPackageDownloader()

def test_gitlab_v4_reached_after_v1_and_v3_return_404(self):
"""GitLab uses /api/v4/ -- negotiation must try v1, v3, then v4."""
dep_ref = DependencyReference.parse("gitlab.myorg.com/owner/repo")
expected = b"gitlab file content"

side_effects = [
_make_resp(404), # raw URL
_make_resp(404), # v1
_make_resp(404), # v3
_make_resp(200, expected), # v4
]
with patch.object(self.downloader, "_resilient_get", side_effect=side_effects) as mock_get:
result = self.downloader.download_raw_file(dep_ref, "skill.md", "main")

assert result == expected
urls = [c[0][0] for c in mock_get.call_args_list]
assert "/api/v1/" in urls[1]
assert "/api/v3/" in urls[2]
assert "/api/v4/" in urls[3]

Comment on lines +1733 to +1759
Copy link

Copilot AI Apr 9, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These tests assume a GitLab fallback of '/api/v4/repos/{owner}/{repo}/contents/...', but that's not a valid GitLab API shape (GitLab uses /api/v4/projects/.../repository/files...). As written, this test suite will lock in behavior that won't work against real GitLab instances and may hide regressions. Either remove the GitLab framing (treat this as "try v1 then v3 for Gitea/Gogs") or update both implementation and tests to use GitLab's actual endpoints.

Copilot uses AI. Check for mistakes.
Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@copilot apply changes based on this feedback

def test_gitea_v1_succeeds_without_trying_v3_or_v4(self):
"""When v1 returns 200, v3 and v4 must never be called."""
dep_ref = DependencyReference.parse("gitea.example.com/owner/repo")
expected = b"gitea content"

with patch.object(
self.downloader, "_resilient_get",
side_effect=[_make_resp(404), _make_resp(200, expected)]
) as mock_get:
result = self.downloader.download_raw_file(dep_ref, "file.md", "main")

assert result == expected
urls = [c[0][0] for c in mock_get.call_args_list]
assert all("/api/v3/" not in u and "/api/v4/" not in u for u in urls)

def test_all_api_versions_404_raises_runtime_error(self):
"""When every API version returns 404 for both refs, a clear error is raised."""
dep_ref = DependencyReference.parse("git.example.com/owner/repo")
# raw + v1 + v3 + v4 for 'main', then v1 + v3 + v4 for 'master' fallback
side_effects = [_make_resp(404)] * 8

with patch.object(self.downloader, "_resilient_get", side_effect=side_effects):
with pytest.raises(RuntimeError, match="File not found"):
self.downloader.download_raw_file(dep_ref, "missing.md", "main")

def test_github_com_uses_api_github_com_not_api_v4(self):
"""github.com must still use api.github.com, never /api/v4/."""
dep_ref = DependencyReference.parse("owner/repo")
expected = b"github content"
api_ok = _make_resp(200, expected)

with patch.object(self.downloader, "_try_raw_download", return_value=None):
with patch.object(self.downloader, "_resilient_get", return_value=api_ok) as mock_get:
result = self.downloader.download_raw_file(dep_ref, "README.md", "main")

assert result == expected
url_called = mock_get.call_args_list[0][0][0]
assert url_called.startswith("https://api.github.com/")
assert "/api/v4/" not in url_called


if __name__ == '__main__':
pytest.main([__file__])
59 changes: 59 additions & 0 deletions tests/unit/test_generic_git_urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -682,3 +682,62 @@ def test_https_nested_group_with_virtual_ext_rejected(self):
"""HTTPS URLs can't embed virtual paths even with nested groups."""
with pytest.raises(ValueError, match="virtual file extension"):
DependencyReference.parse("https://gitlab.com/group/subgroup/file.prompt.md")


class TestGiteaVirtualPackageDetection:
"""Gitea-specific virtual package detection -- supplements TestFQDNVirtualPaths
and TestNestedGroupSupport with Gitea host fixtures and regression guards
for the len(path_segments) > 2 over-trigger."""

# --- Must NOT be virtual (nested-group repo, no virtual indicators) ---

def test_three_segment_gitea_path_is_not_virtual(self):
"""group/subgroup/repo on Gitea is a nested-group repo, not virtual."""
dep = DependencyReference.parse("gitea.myorg.com/group/subgroup/repo")
assert dep.host == "gitea.myorg.com"
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggestion: Once the reference.py logic is fixed to match these expectations, consider also adding a test for the dict form to verify that users who do want virtual subdirs on Gitea can use the explicit syntax:

dep = DependencyReference.parse_from_dict({
    "git": "gitea.myorg.com/group/subgroup/repo",
    "path": "prompts/review.prompt.md",
})
assert dep.is_virtual is True

This would confirm both paths work: shorthand = repo, dict form = virtual.

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

assert dep.repo_url == "group/subgroup/repo"
assert dep.is_virtual is False

def test_two_segment_gitea_path_is_not_virtual(self):
"""Simple owner/repo on a Gitea host is never virtual."""
dep = DependencyReference.parse("gitea.myorg.com/owner/repo")
assert dep.host == "gitea.myorg.com"
assert dep.repo_url == "owner/repo"
assert dep.is_virtual is False

def test_four_segment_generic_path_without_indicators_is_not_virtual(self):
"""Deep nested groups without file extensions or /collections/ are not virtual."""
dep = DependencyReference.parse("git.company.internal/team/skills/brand-guidelines")
assert dep.is_virtual is False
assert dep.repo_url == "team/skills/brand-guidelines"

Comment on lines +687 to +713
Copy link

Copilot AI Apr 9, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The new expectations here (e.g., gitea.myorg.com/group/subgroup/repo and other deep FQDN paths are not virtual unless there are explicit indicators) don't match the current generic-host heuristic in DependencyReference._detect_virtual_package(), which still marks any >2-segment generic-host path as virtual. Unless the parsing logic is updated accordingly, these tests will fail and the feature will still mis-parse nested-group repos on generic hosts.

Copilot uses AI. Check for mistakes.
Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@copilot apply changes based on this feedback

# --- Must be virtual (explicit virtual indicators) ---

def test_gitea_virtual_file_extension(self):
"""Path with virtual file extension on Gitea is detected as virtual."""
dep = DependencyReference.parse("gitea.myorg.com/owner/repo/file.prompt.md")
assert dep.host == "gitea.myorg.com"
assert dep.repo_url == "owner/repo"
assert dep.virtual_path == "file.prompt.md"
assert dep.is_virtual is True
assert dep.is_virtual_file() is True

def test_gitea_collections_path_is_virtual(self):
"""Path with /collections/ on Gitea is detected as a virtual collection."""
dep = DependencyReference.parse("gitea.myorg.com/owner/repo/collections/security")
assert dep.host == "gitea.myorg.com"
assert dep.repo_url == "owner/repo"
assert dep.virtual_path == "collections/security"
assert dep.is_virtual is True
assert dep.is_virtual_collection() is True

def test_dict_format_virtual_on_gitea(self):
"""Dict format with path= on Gitea host yields a virtual package."""
dep = DependencyReference.parse_from_dict({
"git": "gitea.myorg.com/owner/repo",
"path": "prompts/review.prompt.md",
})
assert dep.host == "gitea.myorg.com"
assert dep.repo_url == "owner/repo"
assert dep.virtual_path == "prompts/review.prompt.md"
assert dep.is_virtual is True
Loading