Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 57 additions & 0 deletions src/fetchcode/composer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
# fetchcode is a free software tool from nexB Inc. and others.
# Visit https://github.com/aboutcode-org/fetchcode for support and download.
#
# Copyright (c) nexB Inc. and others. All rights reserved.
# http://nexb.com and http://aboutcode.org
#
# This software is licensed under the Apache License version 2.0.
#
# You may not use this software except in compliance with the License.
# You may obtain a copy of the License at:
# http://apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software distributed
# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
# CONDITIONS OF ANY KIND, either express or implied. See the License for the
# specific language governing permissions and limitations under the License.

from packageurl import PackageURL

from fetchcode import fetch_json_response


class Composer:

purl_pattern = "pkg:composer/.*"
base_url = "https://repo.packagist.org"

@classmethod
def get_download_url(cls, purl):

"""
Return the download URL for a Composer PURL.
"""
purl = PackageURL.from_string(purl)

if not purl.name or not purl.version:
raise ValueError("Composer PURL must specify a name and version")

name = f"{purl.namespace}/{purl.name}" if purl.namespace else purl.name

url = f"{cls.base_url}/p2/{name}.json"
data = fetch_json_response(url)

if "packages" not in data:
return

if name not in data["packages"]:
return

for package in data["packages"][name]:
if (
package.get("version") == purl.version
or package.get("version") == f"v{purl.version}"
or package.get("version_normalized") == purl.version
or package.get("version_normalized") == f"v{purl.version}"
):
download_url = package["dist"].get("url")
return download_url
58 changes: 58 additions & 0 deletions src/fetchcode/cpan.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
# fetchcode is a free software tool from nexB Inc. and others.
# Visit https://github.com/aboutcode-org/fetchcode for support and download.
#
# Copyright (c) nexB Inc. and others. All rights reserved.
# http://nexb.com and http://aboutcode.org
#
# This software is licensed under the Apache License version 2.0.
#
# You may not use this software except in compliance with the License.
# You may obtain a copy of the License at:
# http://apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software distributed
# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
# CONDITIONS OF ANY KIND, either express or implied. See the License for the
# specific language governing permissions and limitations under the License.

import urllib.parse

from packageurl import PackageURL

from fetchcode import fetch_json_response
from fetchcode.utils import _http_exists


class CPAN:
purl_pattern = "pkg:cpan/.*"
base_url = "https://cpan.metacpan.org/"

def get_download_url(purl: str):
"""
Resolve a CPAN PURL to a verified, downloadable archive URL.
Strategy: MetaCPAN API -> verified URL; fallback to author-based path if available.
"""
p = PackageURL.from_string(purl)
if not p.name or not p.version:
return None

parsed_name = urllib.parse.quote(p.name)
parsed_version = urllib.parse.quote(p.version)
api = f"https://fastapi.metacpan.org/v1/release/{parsed_name}/{parsed_version}"
if _http_exists(api):
# Fetch release data from MetaCPAN API
# Example: https://fastapi.metacpan.org/v1/release/Some-Module/1.2.3
data = fetch_json_response(url=api)
url = data.get("download_url") or data.get("archive")
if url and _http_exists(url):
return url

author = p.namespace
if not author:
return
auth = author.upper()
a = auth[0]
ab = auth[:2] if len(auth) >= 2 else auth
for ext in (".tar.gz", ".zip"):
url = f"https://cpan.metacpan.org/authors/id/{a}/{ab}/{auth}/{p.name}-{p.version}{ext}"
if _http_exists(url):
return url
46 changes: 46 additions & 0 deletions src/fetchcode/cran.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
# fetchcode is a free software tool from nexB Inc. and others.
# Visit https://github.com/aboutcode-org/fetchcode for support and download.
#
# Copyright (c) nexB Inc. and others. All rights reserved.
# http://nexb.com and http://aboutcode.org
#
# This software is licensed under the Apache License version 2.0.
#
# You may not use this software except in compliance with the License.
# You may obtain a copy of the License at:
# http://apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software distributed
# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
# CONDITIONS OF ANY KIND, either express or implied. See the License for the
# specific language governing permissions and limitations under the License.

from packageurl import PackageURL

from fetchcode.utils import _http_exists


class CRAN:
"""
This class handles CRAN PURLs.
"""

purl_pattern = "pkg:cran/.*"
base_url = "https://cran.r-project.org"

@classmethod
def get_download_url(cls, purl: str):
"""
Resolve a CRAN PURL to a verified, downloadable source tarball URL.
Tries current contrib first, then Archive.
"""
p = PackageURL.from_string(purl)
if not p.name or not p.version:
return None

current_url = f"{cls.base_url}/src/contrib/{p.name}_{p.version}.tar.gz"
if _http_exists(current_url):
return current_url

archive_url = f"{cls.base_url}/src/contrib/Archive/{p.name}/{p.name}_{p.version}.tar.gz"
if _http_exists(archive_url):
return archive_url
8 changes: 5 additions & 3 deletions src/fetchcode/download_urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,13 @@
from packageurl.contrib.route import NoRouteAvailable
from packageurl.contrib.route import Router

from fetchcode.composer import Composer
from fetchcode.cpan import CPAN
from fetchcode.cran import CRAN
from fetchcode.huggingface import Huggingface
from fetchcode.pypi import Pypi

package_registry = [
Pypi,
]
package_registry = [Pypi, CRAN, CPAN, Huggingface, Composer]

router = Router()

Expand Down
53 changes: 53 additions & 0 deletions src/fetchcode/huggingface.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
# fetchcode is a free software tool from nexB Inc. and others.
# Visit https://github.com/aboutcode-org/fetchcode for support and download.
#
# Copyright (c) nexB Inc. and others. All rights reserved.
# http://nexb.com and http://aboutcode.org
#
# This software is licensed under the Apache License version 2.0.
#
# You may not use this software except in compliance with the License.
# You may obtain a copy of the License at:
# http://apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software distributed
# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
# CONDITIONS OF ANY KIND, either express or implied. See the License for the
# specific language governing permissions and limitations under the License.

from packageurl import PackageURL

from fetchcode import fetch_json_response


class Huggingface:
"""
This class handles huggingface PURLs.
"""

purl_pattern = "pkg:huggingface/.*"

@classmethod
def get_download_url(cls, purl: str):
"""
Return the download URL for a Hugging Face PURL.
"""
p = PackageURL.from_string(purl)
if not p.name:
return None

revision = p.version or "main"
model_id = f"{p.namespace}/{p.name}" if p.namespace else p.name
q = p.qualifiers or {}

api_url = f"https://huggingface.co/api/models/{model_id}?revision={revision}"
data = fetch_json_response(api_url)
siblings = data.get("siblings", [])

ALLOWED_EXECUTABLE_EXTS = (".bin",)

for sib in siblings:
file_name = sib.get("rfilename")
if not file_name.endswith(ALLOWED_EXECUTABLE_EXTS):
continue
url = f"https://huggingface.co/{model_id}/resolve/{revision}/{file_name}"
return url
2 changes: 1 addition & 1 deletion src/fetchcode/pypi.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@

class Pypi:
"""
This class handles Cargo PURLs.
This class handles Pypi PURLs.
"""

purl_pattern = "pkg:pypi/.*"
Expand Down
11 changes: 11 additions & 0 deletions src/fetchcode/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,3 +243,14 @@ def get_first_three_md5_hash_characters(podname):
create a hash (using md5) of it and take the first three characters."
"""
return md5_hasher(podname.encode("utf-8")).hexdigest()[0:3]


def _http_exists(url: str) -> bool:
"""
Lightweight existence check using a ranged GET so CDNs/servers that ignore HEAD still work.
"""
try:
resp = make_head_request(url, headers={"Range": "bytes=0-0"})
return resp is not None and resp.status_code in (200, 206)
except Exception:
return False
81 changes: 81 additions & 0 deletions tests/test_composer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
# fetchcode is a free software tool from nexB Inc. and others.
# Visit https://github.com/aboutcode-org/fetchcode for support and download.
#
# Copyright (c) nexB Inc. and others. All rights reserved.
# http://nexb.com and http://aboutcode.org
#
# This software is licensed under the Apache License version 2.0.
#
# You may not use this software except in compliance with the License.
# You may obtain a copy of the License at:
# http://apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software distributed
# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
# CONDITIONS OF ANY KIND, either express or implied. See the License for the
# specific language governing permissions and limitations under the License.

from unittest.mock import patch

import pytest

from fetchcode.composer import Composer


def test_valid_composer_package_with_namespace():
purl = "pkg:composer/laravel/framework@10.0.0"
name = "laravel/framework"
expected_url = f"https://repo.packagist.org/p2/{name}.json"
download_url = "https://github.com/laravel/framework/archive/refs/tags/v10.0.0.zip"

mock_data = {"packages": {name: [{"version": "10.0.0", "dist": {"url": download_url}}]}}

with patch("fetchcode.composer.fetch_json_response", return_value=mock_data) as mock_fetch:
result = Composer.get_download_url(purl)
assert result == download_url
mock_fetch.assert_called_once_with(expected_url)


def test_valid_composer_package_without_namespace():
purl = "pkg:composer/some-package@1.0.0"
name = "some-package"
expected_url = f"https://repo.packagist.org/p2/{name}.json"
download_url = "https://example.org/some-package-1.0.0.zip"

mock_data = {"packages": {name: [{"version": "1.0.0", "dist": {"url": download_url}}]}}

with patch("fetchcode.composer.fetch_json_response", return_value=mock_data) as mock_fetch:
result = Composer.get_download_url(purl)
assert result == download_url
mock_fetch.assert_called_once_with(expected_url)


def test_version_not_found_returns_none():
purl = "pkg:composer/laravel/framework@10.0.0"
name = "laravel/framework"
mock_data = {"packages": {name: [{"version": "9.0.0", "dist": {"url": "https://old.zip"}}]}}

with patch("fetchcode.composer.fetch_json_response", return_value=mock_data):
result = Composer.get_download_url(purl)
assert result is None


def test_missing_packages_key_returns_none():
purl = "pkg:composer/laravel/framework@10.0.0"
with patch("fetchcode.composer.fetch_json_response", return_value={}):
result = Composer.get_download_url(purl)
assert result is None


def test_missing_package_name_in_data_returns_none():
purl = "pkg:composer/laravel/framework@10.0.0"
mock_data = {"packages": {"some/other": []}}

with patch("fetchcode.composer.fetch_json_response", return_value=mock_data):
result = Composer.get_download_url(purl)
assert result is None


def test_missing_version_raises():
purl = "pkg:composer/laravel/framework"
with pytest.raises(ValueError, match="Composer PURL must specify a name and version"):
Composer.get_download_url(purl)
Loading