From e153a4020df378d7611b2da86afb2d3902487e90 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Thu, 31 Jul 2025 17:59:31 +0530 Subject: [PATCH 01/11] Add batch 2 support for purl to download URL Signed-off-by: Tushar Goel --- src/fetchcode/cran.py | 42 +++++++++++++++++++++ src/fetchcode/utils.py | 11 ++++++ tests/test_cran.py | 86 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 139 insertions(+) create mode 100644 src/fetchcode/cran.py create mode 100644 tests/test_cran.py diff --git a/src/fetchcode/cran.py b/src/fetchcode/cran.py new file mode 100644 index 0000000..402d7ac --- /dev/null +++ b/src/fetchcode/cran.py @@ -0,0 +1,42 @@ +# fetchcode is a free software tool from nexB Inc. and others. +# Visit https://github.com/aboutcode-org/fetchcode for support and download. +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# http://nexb.com and http://aboutcode.org +# +# This software is licensed under the Apache License version 2.0. +# +# You may not use this software except in compliance with the License. +# You may obtain a copy of the License at: +# http://apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software distributed +# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +# CONDITIONS OF ANY KIND, either express or implied. See the License for the +# specific language governing permissions and limitations under the License. + +from packageurl import PackageURL + +from fetchcode.utils import _http_exists + + +class Cran: + def get_download_url(purl: str): + """ + Resolve a CRAN PURL to a verified, downloadable source tarball URL. + Tries current contrib first, then Archive. + """ + p = PackageURL.from_string(purl) + if not p.name or not p.version: + return None + + current_url = f"https://cran.r-project.org/src/contrib/{p.name}_{p.version}.tar.gz" + if _http_exists(current_url): + return current_url + + archive_url = ( + f"https://cran.r-project.org/src/contrib/Archive/{p.name}/{p.name}_{p.version}.tar.gz" + ) + if _http_exists(archive_url): + return archive_url + + return None diff --git a/src/fetchcode/utils.py b/src/fetchcode/utils.py index 81ac9df..43345d9 100644 --- a/src/fetchcode/utils.py +++ b/src/fetchcode/utils.py @@ -243,3 +243,14 @@ def get_first_three_md5_hash_characters(podname): create a hash (using md5) of it and take the first three characters." """ return md5_hasher(podname.encode("utf-8")).hexdigest()[0:3] + + +def _http_exists(url: str) -> bool: + """ + Lightweight existence check using a ranged GET so CDNs/servers that ignore HEAD still work. + """ + try: + resp = make_head_request(url, headers={"Range": "bytes=0-0"}) + return resp is not None and resp.status_code in (200, 206) + except Exception: + return False diff --git a/tests/test_cran.py b/tests/test_cran.py new file mode 100644 index 0000000..7aa27e1 --- /dev/null +++ b/tests/test_cran.py @@ -0,0 +1,86 @@ +# fetchcode is a free software tool from nexB Inc. and others. +# Visit https://github.com/aboutcode-org/fetchcode for support and download. +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# http://nexb.com and http://aboutcode.org +# +# This software is licensed under the Apache License version 2.0. +# +# You may not use this software except in compliance with the License. +# You may obtain a copy of the License at: +# http://apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software distributed +# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +# CONDITIONS OF ANY KIND, either express or implied. See the License for the +# specific language governing permissions and limitations under the License. + +from unittest.mock import patch + +import pytest + +from fetchcode.cran import Cran + +get_download_url = Cran.get_download_url + + +@pytest.fixture +def valid_purl(): + return "pkg:cran/dplyr@1.0.0" + + +def test_current_url_exists(valid_purl): + current_url = "https://cran.r-project.org/src/contrib/dplyr_1.0.0.tar.gz" + + with patch("fetchcode.cran._http_exists", return_value=True) as mock_check: + result = get_download_url(valid_purl) + assert result == current_url + mock_check.assert_called_once_with(current_url) + + +def test_fallback_to_archive(valid_purl): + current_url = "https://cran.r-project.org/src/contrib/dplyr_1.0.0.tar.gz" + archive_url = "https://cran.r-project.org/src/contrib/Archive/dplyr/dplyr_1.0.0.tar.gz" + + def side_effect(url): + return url == archive_url + + with patch("fetchcode.cran._http_exists", side_effect=side_effect) as mock_check: + result = get_download_url(valid_purl) + assert result == archive_url + assert mock_check.call_count == 2 + mock_check.assert_any_call(current_url) + mock_check.assert_any_call(archive_url) + + +def test_neither_url_exists(valid_purl): + with patch("fetchcode.cran._http_exists", return_value=False) as mock_check: + result = get_download_url(valid_purl) + assert result is None + assert mock_check.call_count == 2 + + +def test_missing_version_returns_none(): + result = get_download_url("pkg:cran/dplyr") + assert result is None + + +def test_version_with_dash(): + purl = "pkg:cran/somepkg@1.2-3" + + with patch("fetchcode.cran._http_exists", return_value=True) as mock_check: + result = get_download_url(purl) + assert result == "https://cran.r-project.org/src/contrib/somepkg_1.2-3.tar.gz" + mock_check.assert_called_once_with( + "https://cran.r-project.org/src/contrib/somepkg_1.2-3.tar.gz" + ) + + +def test_name_with_dot(): + purl = "pkg:cran/foo.bar@2.0.1" + + with patch("fetchcode.cran._http_exists", return_value=True) as mock_check: + result = get_download_url(purl) + assert result == "https://cran.r-project.org/src/contrib/foo.bar_2.0.1.tar.gz" + mock_check.assert_called_once_with( + "https://cran.r-project.org/src/contrib/foo.bar_2.0.1.tar.gz" + ) From ffdf780153f7e72e3c39b3429ffc696ce05b7795 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Thu, 31 Jul 2025 18:03:26 +0530 Subject: [PATCH 02/11] Fix typos and structure of CRAN class Signed-off-by: Tushar Goel --- src/fetchcode/cran.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/src/fetchcode/cran.py b/src/fetchcode/cran.py index 402d7ac..7dca8a5 100644 --- a/src/fetchcode/cran.py +++ b/src/fetchcode/cran.py @@ -20,7 +20,15 @@ class Cran: - def get_download_url(purl: str): + """ + This class handles CRAN PURLs. + """ + + purl_pattern = "pkg:cran/.*" + base_url = "https://cran.r-project.org" + + @classmethod + def get_download_url(cls, purl: str): """ Resolve a CRAN PURL to a verified, downloadable source tarball URL. Tries current contrib first, then Archive. @@ -29,13 +37,11 @@ def get_download_url(purl: str): if not p.name or not p.version: return None - current_url = f"https://cran.r-project.org/src/contrib/{p.name}_{p.version}.tar.gz" + current_url = f"{cls.base_url}/src/contrib/{p.name}_{p.version}.tar.gz" if _http_exists(current_url): return current_url - archive_url = ( - f"https://cran.r-project.org/src/contrib/Archive/{p.name}/{p.name}_{p.version}.tar.gz" - ) + archive_url = f"{cls.base_url}/src/contrib/Archive/{p.name}/{p.name}_{p.version}.tar.gz" if _http_exists(archive_url): return archive_url From 03e1dfe64aea4730990eb1b102acae3171700593 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Thu, 31 Jul 2025 18:04:03 +0530 Subject: [PATCH 03/11] Fix typos Signed-off-by: Tushar Goel --- src/fetchcode/pypi.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fetchcode/pypi.py b/src/fetchcode/pypi.py index 96b6916..b8c4038 100644 --- a/src/fetchcode/pypi.py +++ b/src/fetchcode/pypi.py @@ -23,7 +23,7 @@ class Pypi: """ - This class handles Cargo PURLs. + This class handles Pypi PURLs. """ purl_pattern = "pkg:pypi/.*" From 786756bd72ab7cb6af13cbdde98a1137088f9481 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Thu, 31 Jul 2025 19:28:56 +0530 Subject: [PATCH 04/11] Add support for composer download URLs Signed-off-by: Tushar Goel --- src/fetchcode/composer.py | 52 +++++++++++++++++++++++++++++++++ src/fetchcode/cpan.py | 53 ++++++++++++++++++++++++++++++++++ src/fetchcode/cran.py | 4 +-- src/fetchcode/download_urls.py | 8 +++-- src/fetchcode/huggingface.py | 53 ++++++++++++++++++++++++++++++++++ 5 files changed, 164 insertions(+), 6 deletions(-) create mode 100644 src/fetchcode/composer.py create mode 100644 src/fetchcode/cpan.py create mode 100644 src/fetchcode/huggingface.py diff --git a/src/fetchcode/composer.py b/src/fetchcode/composer.py new file mode 100644 index 0000000..af38167 --- /dev/null +++ b/src/fetchcode/composer.py @@ -0,0 +1,52 @@ +# fetchcode is a free software tool from nexB Inc. and others. +# Visit https://github.com/aboutcode-org/fetchcode for support and download. +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# http://nexb.com and http://aboutcode.org +# +# This software is licensed under the Apache License version 2.0. +# +# You may not use this software except in compliance with the License. +# You may obtain a copy of the License at: +# http://apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software distributed +# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +# CONDITIONS OF ANY KIND, either express or implied. See the License for the +# specific language governing permissions and limitations under the License. + +from packageurl import PackageURL + +from fetchcode import fetch_json_response + + +class Composer: + + purl_pattern = "pkg:composer/.*" + base_url = "https://repo.packagist.org" + + @classmethod + def get_download_url(cls, purl): + + """ + Return the download URL for a Composer PURL. + """ + purl = PackageURL.from_string(purl) + + if not purl.name or not purl.version: + raise ValueError("Composer PURL must specify a name and version") + + name = f"{purl.namespace}/{purl.name}" if purl.namespace else purl.name + + url = f"{cls.base_url}/p2/{name}.json " + data = fetch_json_response(url) + + if "packages" not in data: + return + + if name not in data["packages"]: + return + + for package in data["packages"][name]: + if package["version"] == purl.version: + download_url = package["dist"].get("url") + return download_url diff --git a/src/fetchcode/cpan.py b/src/fetchcode/cpan.py new file mode 100644 index 0000000..68f2425 --- /dev/null +++ b/src/fetchcode/cpan.py @@ -0,0 +1,53 @@ +# fetchcode is a free software tool from nexB Inc. and others. +# Visit https://github.com/aboutcode-org/fetchcode for support and download. +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# http://nexb.com and http://aboutcode.org +# +# This software is licensed under the Apache License version 2.0. +# +# You may not use this software except in compliance with the License. +# You may obtain a copy of the License at: +# http://apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software distributed +# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +# CONDITIONS OF ANY KIND, either express or implied. See the License for the +# specific language governing permissions and limitations under the License. + +from packageurl import PackageURL + +from fetchcode import fetch_json_response +from fetchcode.utils import _http_exists + + +class CPAN: + purl_pattern = "pkg:cpan/.*" + base_url = "https://cpan.metacpan.org/" + + def get_download_url(purl: str): + """ + Resolve a CPAN PURL to a verified, downloadable archive URL. + Strategy: MetaCPAN API -> verified URL; fallback to author-based path if available. + """ + p = PackageURL.from_string(purl) + if not p.name or not p.version: + return None + + try: + api = f"https://fastapi.metacpan.org/v1/release/{urllib.parse.quote(p.name)}/{urllib.parse.quote(p.version)}" + data = fetch_json_response(api, stream=False, timeout=20) + url = data.get("download_url") or data.get("archive") + if url and _http_exists(url): + return url + except Exception: + pass + + author = p.namespace + if author: + auth = author.upper() + a = auth[0] + ab = auth[:2] if len(auth) >= 2 else auth + for ext in (".tar.gz", ".zip"): + url = f"https://cpan.metacpan.org/authors/id/{a}/{ab}/{auth}/{p.name}-{p.version}{ext}" + if _http_exists(url): + return url diff --git a/src/fetchcode/cran.py b/src/fetchcode/cran.py index 7dca8a5..52a49ed 100644 --- a/src/fetchcode/cran.py +++ b/src/fetchcode/cran.py @@ -19,7 +19,7 @@ from fetchcode.utils import _http_exists -class Cran: +class CRAN: """ This class handles CRAN PURLs. """ @@ -44,5 +44,3 @@ def get_download_url(cls, purl: str): archive_url = f"{cls.base_url}/src/contrib/Archive/{p.name}/{p.name}_{p.version}.tar.gz" if _http_exists(archive_url): return archive_url - - return None diff --git a/src/fetchcode/download_urls.py b/src/fetchcode/download_urls.py index 8594006..c0e89b3 100644 --- a/src/fetchcode/download_urls.py +++ b/src/fetchcode/download_urls.py @@ -17,11 +17,13 @@ from packageurl.contrib.route import NoRouteAvailable from packageurl.contrib.route import Router +from fetchcode.composer import Composer +from fetchcode.cpan import CPAN +from fetchcode.cran import CRAN +from fetchcode.huggingface import Huggingface from fetchcode.pypi import Pypi -package_registry = [ - Pypi, -] +package_registry = [Pypi, CRAN, CPAN, Huggingface, Composer] router = Router() diff --git a/src/fetchcode/huggingface.py b/src/fetchcode/huggingface.py new file mode 100644 index 0000000..2740f9a --- /dev/null +++ b/src/fetchcode/huggingface.py @@ -0,0 +1,53 @@ +# fetchcode is a free software tool from nexB Inc. and others. +# Visit https://github.com/aboutcode-org/fetchcode for support and download. +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# http://nexb.com and http://aboutcode.org +# +# This software is licensed under the Apache License version 2.0. +# +# You may not use this software except in compliance with the License. +# You may obtain a copy of the License at: +# http://apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software distributed +# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +# CONDITIONS OF ANY KIND, either express or implied. See the License for the +# specific language governing permissions and limitations under the License. + +from packageurl import PackageURL + +from fetchcode import fetch_json_response + + +class Huggingface: + """ + This class handles huggingface PURLs. + """ + + purl_pattern = "pkg:huggingface/.*" + + @classmethod + def get_download_url(cls, purl: str): + """ + Return the download URL for a Hugging Face PURL. + """ + p = PackageURL.from_string(purl) + if not p.name: + return None + + revision = p.version or "main" + model_id = p.name + q = p.qualifiers or {} + + api_url = f"https://huggingface.co/api/models/{model_id}?revision={revision}" + data = fetch_json_response(api_url) + siblings = data.get("siblings", []) + + ALLOWED_EXECUTABLE_EXTS = (".bin",) + + for sib in siblings: + file_name = sib.get("rfilename") + if not file_name.endswith(ALLOWED_EXECUTABLE_EXTS): + continue + url = f"https://huggingface.co/{model_id}/resolve/{revision}/{file_name}" + return url From a1bb500e6b638749d63d60e426c7a5f5649e85d8 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Thu, 31 Jul 2025 19:32:11 +0530 Subject: [PATCH 05/11] Fix CRAN tests Signed-off-by: Tushar Goel --- tests/test_cran.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_cran.py b/tests/test_cran.py index 7aa27e1..021df51 100644 --- a/tests/test_cran.py +++ b/tests/test_cran.py @@ -18,9 +18,9 @@ import pytest -from fetchcode.cran import Cran +from fetchcode.cran import CRAN -get_download_url = Cran.get_download_url +get_download_url = CRAN.get_download_url @pytest.fixture From 2add3e3b2817ac8219279b24cf4528712bfdb5ac Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Thu, 31 Jul 2025 20:11:05 +0530 Subject: [PATCH 06/11] Add tests Signed-off-by: Tushar Goel --- src/fetchcode/cpan.py | 8 +++- tests/test_composer.py | 81 +++++++++++++++++++++++++++++++++++++++ tests/test_cpan.py | 86 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 173 insertions(+), 2 deletions(-) create mode 100644 tests/test_composer.py create mode 100644 tests/test_cpan.py diff --git a/src/fetchcode/cpan.py b/src/fetchcode/cpan.py index 68f2425..41c76a4 100644 --- a/src/fetchcode/cpan.py +++ b/src/fetchcode/cpan.py @@ -14,6 +14,8 @@ # CONDITIONS OF ANY KIND, either express or implied. See the License for the # specific language governing permissions and limitations under the License. +import urllib.parse + from packageurl import PackageURL from fetchcode import fetch_json_response @@ -34,8 +36,10 @@ def get_download_url(purl: str): return None try: - api = f"https://fastapi.metacpan.org/v1/release/{urllib.parse.quote(p.name)}/{urllib.parse.quote(p.version)}" - data = fetch_json_response(api, stream=False, timeout=20) + parsed_name = urllib.parse.quote(p.name) + parsed_version = urllib.parse.quote(p.version) + api = f"https://fastapi.metacpan.org/v1/release/{parsed_name}/{parsed_version}" + data = fetch_json_response(url=api) url = data.get("download_url") or data.get("archive") if url and _http_exists(url): return url diff --git a/tests/test_composer.py b/tests/test_composer.py new file mode 100644 index 0000000..bcfcbd6 --- /dev/null +++ b/tests/test_composer.py @@ -0,0 +1,81 @@ +# fetchcode is a free software tool from nexB Inc. and others. +# Visit https://github.com/aboutcode-org/fetchcode for support and download. +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# http://nexb.com and http://aboutcode.org +# +# This software is licensed under the Apache License version 2.0. +# +# You may not use this software except in compliance with the License. +# You may obtain a copy of the License at: +# http://apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software distributed +# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +# CONDITIONS OF ANY KIND, either express or implied. See the License for the +# specific language governing permissions and limitations under the License. + +from unittest.mock import patch + +import pytest + +from fetchcode.composer import Composer + + +def test_valid_composer_package_with_namespace(): + purl = "pkg:composer/laravel/framework@10.0.0" + name = "laravel/framework" + expected_url = f"https://repo.packagist.org/p2/{name}.json " + download_url = "https://github.com/laravel/framework/archive/refs/tags/v10.0.0.zip" + + mock_data = {"packages": {name: [{"version": "10.0.0", "dist": {"url": download_url}}]}} + + with patch("fetchcode.composer.fetch_json_response", return_value=mock_data) as mock_fetch: + result = Composer.get_download_url(purl) + assert result == download_url + mock_fetch.assert_called_once_with(expected_url) + + +def test_valid_composer_package_without_namespace(): + purl = "pkg:composer/some-package@1.0.0" + name = "some-package" + expected_url = f"https://repo.packagist.org/p2/{name}.json " + download_url = "https://example.org/some-package-1.0.0.zip" + + mock_data = {"packages": {name: [{"version": "1.0.0", "dist": {"url": download_url}}]}} + + with patch("fetchcode.composer.fetch_json_response", return_value=mock_data) as mock_fetch: + result = Composer.get_download_url(purl) + assert result == download_url + mock_fetch.assert_called_once_with(expected_url) + + +def test_version_not_found_returns_none(): + purl = "pkg:composer/laravel/framework@10.0.0" + name = "laravel/framework" + mock_data = {"packages": {name: [{"version": "9.0.0", "dist": {"url": "https://old.zip"}}]}} + + with patch("fetchcode.composer.fetch_json_response", return_value=mock_data): + result = Composer.get_download_url(purl) + assert result is None + + +def test_missing_packages_key_returns_none(): + purl = "pkg:composer/laravel/framework@10.0.0" + with patch("fetchcode.composer.fetch_json_response", return_value={}): + result = Composer.get_download_url(purl) + assert result is None + + +def test_missing_package_name_in_data_returns_none(): + purl = "pkg:composer/laravel/framework@10.0.0" + mock_data = {"packages": {"some/other": []}} + + with patch("fetchcode.composer.fetch_json_response", return_value=mock_data): + result = Composer.get_download_url(purl) + assert result is None + + +def test_missing_version_raises(): + purl = "pkg:composer/laravel/framework" + with pytest.raises(ValueError, match="Composer PURL must specify a name and version"): + Composer.get_download_url(purl) diff --git a/tests/test_cpan.py b/tests/test_cpan.py new file mode 100644 index 0000000..7e487f3 --- /dev/null +++ b/tests/test_cpan.py @@ -0,0 +1,86 @@ +# fetchcode is a free software tool from nexB Inc. and others. +# Visit https://github.com/aboutcode-org/fetchcode for support and download. +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# http://nexb.com and http://aboutcode.org +# +# This software is licensed under the Apache License version 2.0. +# +# You may not use this software except in compliance with the License. +# You may obtain a copy of the License at: +# http://apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software distributed +# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +# CONDITIONS OF ANY KIND, either express or implied. See the License for the +# specific language governing permissions and limitations under the License. + +from unittest.mock import patch + +import pytest + +from fetchcode.cpan import CPAN + +get_download_url = CPAN.get_download_url + + +@pytest.fixture +def valid_purl(): + return "pkg:cpan/EXAMPLE/Some-Module@1.2.3" + + +def test_success_from_metacpan_api(valid_purl): + expected_url = "https://cpan.metacpan.org/authors/id/E/EX/EXAMPLE/Some-Module-1.2.3.tar.gz" + + with patch("fetchcode.cpan.fetch_json_response") as mock_fetch, patch( + "fetchcode.cpan._http_exists" + ) as mock_exists: + mock_fetch.return_value = {"download_url": expected_url} + mock_exists.return_value = True + result = get_download_url(valid_purl) + assert result == expected_url + mock_fetch.assert_called_once() + mock_exists.assert_called_once_with(expected_url) + + +def test_fallback_to_author_path(valid_purl): + expected_url = "https://cpan.metacpan.org/authors/id/E/EX/EXAMPLE/Some-Module-1.2.3.tar.gz" + + with patch("fetchcode.cpan.fetch_json_response", side_effect=Exception("API error")), patch( + "fetchcode.cpan._http_exists" + ) as mock_exists: + + mock_exists.side_effect = lambda url: url.endswith(".tar.gz") + + result = get_download_url(valid_purl) + assert result == expected_url + assert mock_exists.call_count >= 1 + + +def test_author_zip_fallback(valid_purl): + tar_url = "https://cpan.metacpan.org/authors/id/E/EX/EXAMPLE/Some-Module-1.2.3.tar.gz" + zip_url = "https://cpan.metacpan.org/authors/id/E/EX/EXAMPLE/Some-Module-1.2.3.zip" + + with patch("fetchcode.cpan.fetch_json_response", return_value={}), patch( + "fetchcode.cpan._http_exists" + ) as mock_exists: + + mock_exists.side_effect = lambda url: url == zip_url + + result = get_download_url(valid_purl) + assert result == zip_url + assert mock_exists.call_count == 2 + assert tar_url in [call[0][0] for call in mock_exists.call_args_list] + + +def test_neither_api_nor_fallback_works(valid_purl): + with patch("fetchcode.cpan.fetch_json_response", return_value={}), patch( + "fetchcode.cpan._http_exists", return_value=False + ) as mock_exists: + + result = get_download_url(valid_purl) + assert result is None + assert mock_exists.call_count == 2 + + +def test_missing_name_or_version(): + assert get_download_url("pkg:cpan/EXAMPLE/Some-Module") is None From abe6416321a3a4d57602811b35ec56173ba6abc1 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Thu, 31 Jul 2025 20:12:32 +0530 Subject: [PATCH 07/11] Add tests for huggingface Signed-off-by: Tushar Goel --- src/fetchcode/huggingface.py | 2 +- tests/test_huggingface.py | 65 ++++++++++++++++++++++++++++++++++++ 2 files changed, 66 insertions(+), 1 deletion(-) create mode 100644 tests/test_huggingface.py diff --git a/src/fetchcode/huggingface.py b/src/fetchcode/huggingface.py index 2740f9a..c3b63c1 100644 --- a/src/fetchcode/huggingface.py +++ b/src/fetchcode/huggingface.py @@ -36,7 +36,7 @@ def get_download_url(cls, purl: str): return None revision = p.version or "main" - model_id = p.name + model_id = f"{p.namespace}/{p.name}" if p.namespace else p.name q = p.qualifiers or {} api_url = f"https://huggingface.co/api/models/{model_id}?revision={revision}" diff --git a/tests/test_huggingface.py b/tests/test_huggingface.py new file mode 100644 index 0000000..45fbf6c --- /dev/null +++ b/tests/test_huggingface.py @@ -0,0 +1,65 @@ +# fetchcode is a free software tool from nexB Inc. and others. +# Visit https://github.com/aboutcode-org/fetchcode for support and download. +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# http://nexb.com and http://aboutcode.org +# +# This software is licensed under the Apache License version 2.0. +# +# You may not use this software except in compliance with the License. +# You may obtain a copy of the License at: +# http://apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software distributed +# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +# CONDITIONS OF ANY KIND, either express or implied. See the License for the +# specific language governing permissions and limitations under the License. + +from unittest.mock import patch + +from fetchcode.huggingface import Huggingface + + +def test_returns_bin_file_url(): + purl = "pkg:huggingface/facebook/opt-350m" + revision = "main" + expected_url = "https://huggingface.co/facebook/opt-350m/resolve/main/pytorch_model.bin" + + mock_data = { + "siblings": [ + {"rfilename": "config.json"}, + {"rfilename": "pytorch_model.bin"}, + ] + } + + with patch("fetchcode.huggingface.fetch_json_response", return_value=mock_data): + result = Huggingface.get_download_url(purl) + assert result == expected_url + + +def test_no_executable_files_returns_none(): + purl = "pkg:huggingface/facebook/opt-350m" + mock_data = { + "siblings": [ + {"rfilename": "config.json"}, + {"rfilename": "tokenizer.json"}, + ] + } + + with patch("fetchcode.huggingface.fetch_json_response", return_value=mock_data): + result = Huggingface.get_download_url(purl) + assert result is None + + +def test_custom_revision_in_purl(): + purl = "pkg:huggingface/facebook/opt-350m@v1.0" + expected_url = "https://huggingface.co/facebook/opt-350m/resolve/v1.0/pytorch_model.bin" + + mock_data = { + "siblings": [ + {"rfilename": "pytorch_model.bin"}, + ] + } + + with patch("fetchcode.huggingface.fetch_json_response", return_value=mock_data): + result = Huggingface.get_download_url(purl) + assert result == expected_url From 8a203729960ee07712887cb2ab52165310617514 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Thu, 31 Jul 2025 20:24:36 +0530 Subject: [PATCH 08/11] Add tests for download URLs Signed-off-by: Tushar Goel --- tests/test_download_urls.py | 46 +++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) create mode 100644 tests/test_download_urls.py diff --git a/tests/test_download_urls.py b/tests/test_download_urls.py new file mode 100644 index 0000000..464de85 --- /dev/null +++ b/tests/test_download_urls.py @@ -0,0 +1,46 @@ +# fetchcode is a free software tool from nexB Inc. and others. +# Visit https://github.com/aboutcode-org/fetchcode for support and download. +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# http://nexb.com and http://aboutcode.org +# +# This software is licensed under the Apache License version 2.0. +# +# You may not use this software except in compliance with the License. +# You may obtain a copy of the License at: +# http://apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software distributed +# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +# CONDITIONS OF ANY KIND, either express or implied. See the License for the +# specific language governing permissions and limitations under the License. + +from unittest.mock import patch + +import pytest +from packageurl.contrib.route import NoRouteAvailable + +from fetchcode.download_urls import download_url +from fetchcode.download_urls import router + + +def test_right_class_being_called_for_the_purls(): + purls = [ + "pkg:pypi/requests@2.31.0", + "pkg:cpan/EXAMPLE/Some-Module@1.2.3", + "pkg:composer/laravel/framework@10.0.0", + "pkg:cran/dplyr@1.0.0", + ] + + with patch("fetchcode.download_urls.Router.process") as mock_fetch: + for purl in purls: + assert download_url(purl) is not None, f"Failed for purl: {purl}" + + +def test_with_invalid_purls(): + invalid_purls = [ + "pkg:invalid/requests", + "pkg:xyz/dplyr", + ] + for purl in invalid_purls: + with pytest.raises(NoRouteAvailable): + router.process(purl) From d3146274e02eecbc1c76139c4766d937cefc1be0 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Fri, 1 Aug 2025 09:06:53 +0530 Subject: [PATCH 09/11] Address review comments Signed-off-by: Tushar Goel --- src/fetchcode/cpan.py | 29 +++++++++++++++-------------- tests/test_cpan.py | 6 +++--- 2 files changed, 18 insertions(+), 17 deletions(-) diff --git a/src/fetchcode/cpan.py b/src/fetchcode/cpan.py index 41c76a4..8e446ec 100644 --- a/src/fetchcode/cpan.py +++ b/src/fetchcode/cpan.py @@ -35,23 +35,24 @@ def get_download_url(purl: str): if not p.name or not p.version: return None - try: - parsed_name = urllib.parse.quote(p.name) - parsed_version = urllib.parse.quote(p.version) - api = f"https://fastapi.metacpan.org/v1/release/{parsed_name}/{parsed_version}" + parsed_name = urllib.parse.quote(p.name) + parsed_version = urllib.parse.quote(p.version) + api = f"https://fastapi.metacpan.org/v1/release/{parsed_name}/{parsed_version}" + if _http_exists(api): + # Fetch release data from MetaCPAN API + # Example: https://fastapi.metacpan.org/v1/release/Some-Module/1.2.3 data = fetch_json_response(url=api) url = data.get("download_url") or data.get("archive") if url and _http_exists(url): return url - except Exception: - pass author = p.namespace - if author: - auth = author.upper() - a = auth[0] - ab = auth[:2] if len(auth) >= 2 else auth - for ext in (".tar.gz", ".zip"): - url = f"https://cpan.metacpan.org/authors/id/{a}/{ab}/{auth}/{p.name}-{p.version}{ext}" - if _http_exists(url): - return url + if not author: + return + auth = author.upper() + a = auth[0] + ab = auth[:2] if len(auth) >= 2 else auth + for ext in (".tar.gz", ".zip"): + url = f"https://cpan.metacpan.org/authors/id/{a}/{ab}/{auth}/{p.name}-{p.version}{ext}" + if _http_exists(url): + return url diff --git a/tests/test_cpan.py b/tests/test_cpan.py index 7e487f3..374af41 100644 --- a/tests/test_cpan.py +++ b/tests/test_cpan.py @@ -39,7 +39,7 @@ def test_success_from_metacpan_api(valid_purl): result = get_download_url(valid_purl) assert result == expected_url mock_fetch.assert_called_once() - mock_exists.assert_called_once_with(expected_url) + assert mock_exists.call_count == 2 def test_fallback_to_author_path(valid_purl): @@ -68,7 +68,7 @@ def test_author_zip_fallback(valid_purl): result = get_download_url(valid_purl) assert result == zip_url - assert mock_exists.call_count == 2 + assert mock_exists.call_count == 3 assert tar_url in [call[0][0] for call in mock_exists.call_args_list] @@ -79,7 +79,7 @@ def test_neither_api_nor_fallback_works(valid_purl): result = get_download_url(valid_purl) assert result is None - assert mock_exists.call_count == 2 + assert mock_exists.call_count == 3 def test_missing_name_or_version(): From db8a4feea60ee680c949ff6003f7476772a1889b Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Fri, 1 Aug 2025 11:41:49 +0530 Subject: [PATCH 10/11] Address review comments Signed-off-by: Tushar Goel --- src/fetchcode/composer.py | 2 +- tests/test_composer.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/fetchcode/composer.py b/src/fetchcode/composer.py index af38167..8e66371 100644 --- a/src/fetchcode/composer.py +++ b/src/fetchcode/composer.py @@ -37,7 +37,7 @@ def get_download_url(cls, purl): name = f"{purl.namespace}/{purl.name}" if purl.namespace else purl.name - url = f"{cls.base_url}/p2/{name}.json " + url = f"{cls.base_url}/p2/{name}.json" data = fetch_json_response(url) if "packages" not in data: diff --git a/tests/test_composer.py b/tests/test_composer.py index bcfcbd6..443c681 100644 --- a/tests/test_composer.py +++ b/tests/test_composer.py @@ -24,7 +24,7 @@ def test_valid_composer_package_with_namespace(): purl = "pkg:composer/laravel/framework@10.0.0" name = "laravel/framework" - expected_url = f"https://repo.packagist.org/p2/{name}.json " + expected_url = f"https://repo.packagist.org/p2/{name}.json" download_url = "https://github.com/laravel/framework/archive/refs/tags/v10.0.0.zip" mock_data = {"packages": {name: [{"version": "10.0.0", "dist": {"url": download_url}}]}} @@ -38,7 +38,7 @@ def test_valid_composer_package_with_namespace(): def test_valid_composer_package_without_namespace(): purl = "pkg:composer/some-package@1.0.0" name = "some-package" - expected_url = f"https://repo.packagist.org/p2/{name}.json " + expected_url = f"https://repo.packagist.org/p2/{name}.json" download_url = "https://example.org/some-package-1.0.0.zip" mock_data = {"packages": {name: [{"version": "1.0.0", "dist": {"url": download_url}}]}} From 58b3c0ba39965ae405bad237638dee2634a4311e Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Fri, 1 Aug 2025 12:06:11 +0530 Subject: [PATCH 11/11] Add version handling for composer Signed-off-by: Tushar Goel --- src/fetchcode/composer.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/fetchcode/composer.py b/src/fetchcode/composer.py index 8e66371..32b73f0 100644 --- a/src/fetchcode/composer.py +++ b/src/fetchcode/composer.py @@ -47,6 +47,11 @@ def get_download_url(cls, purl): return for package in data["packages"][name]: - if package["version"] == purl.version: + if ( + package.get("version") == purl.version + or package.get("version") == f"v{purl.version}" + or package.get("version_normalized") == purl.version + or package.get("version_normalized") == f"v{purl.version}" + ): download_url = package["dist"].get("url") return download_url