From b6140b036cc5f7afe6dae8bebab078be52b140e9 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Tue, 29 Jul 2025 12:48:26 +0530 Subject: [PATCH 1/5] Add Pypi download URL support Signed-off-by: Tushar Goel --- src/fetchcode/__init__.py | 15 +++++++ src/fetchcode/download_urls.py | 41 +++++++++++++++++ src/fetchcode/pypi.py | 62 ++++++++++++++++++++++++++ tests/test_pypi.py | 80 ++++++++++++++++++++++++++++++++++ 4 files changed, 198 insertions(+) create mode 100644 src/fetchcode/download_urls.py create mode 100644 src/fetchcode/pypi.py create mode 100644 tests/test_pypi.py diff --git a/src/fetchcode/__init__.py b/src/fetchcode/__init__.py index 5d05242..82523d6 100644 --- a/src/fetchcode/__init__.py +++ b/src/fetchcode/__init__.py @@ -44,6 +44,7 @@ def fetch_http(url, location): `url` URL string saving the content in a file at `location` """ r = requests.get(url) + with open(location, "wb") as f: f.write(r.content) @@ -106,3 +107,17 @@ def fetch(url): return fetchers.get(scheme)(url, location) raise Exception("Not a supported/known scheme.") + + +def fetch_json_response(url): + """ + Fetch a JSON response from the given URL and return the parsed JSON data. + """ + response = requests.get(url) + if response.status_code != 200: + raise Exception(f"Failed to fetch {url}: {response.status_code} {response.reason}") + + try: + return response.json() + except ValueError as e: + raise Exception(f"Failed to parse JSON from {url}: {str(e)}") diff --git a/src/fetchcode/download_urls.py b/src/fetchcode/download_urls.py new file mode 100644 index 0000000..8594006 --- /dev/null +++ b/src/fetchcode/download_urls.py @@ -0,0 +1,41 @@ +# fetchcode is a free software tool from nexB Inc. and others. +# Visit https://github.com/aboutcode-org/fetchcode for support and download. +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# http://nexb.com and http://aboutcode.org +# +# This software is licensed under the Apache License version 2.0. +# +# You may not use this software except in compliance with the License. +# You may obtain a copy of the License at: +# http://apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software distributed +# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +# CONDITIONS OF ANY KIND, either express or implied. See the License for the +# specific language governing permissions and limitations under the License. + +from packageurl.contrib.route import NoRouteAvailable +from packageurl.contrib.route import Router + +from fetchcode.pypi import Pypi + +package_registry = [ + Pypi, +] + +router = Router() + +for pkg_class in package_registry: + router.append(pattern=pkg_class.purl_pattern, endpoint=pkg_class.get_download_url) + + +def download_url(purl): + """ + Return package metadata for a URL or PURL. + Return None if there is no URL, or the URL or PURL is not supported. + """ + if purl: + try: + return router.process(purl) + except NoRouteAvailable: + return diff --git a/src/fetchcode/pypi.py b/src/fetchcode/pypi.py new file mode 100644 index 0000000..089e5e4 --- /dev/null +++ b/src/fetchcode/pypi.py @@ -0,0 +1,62 @@ +# fetchcode is a free software tool from nexB Inc. and others. +# Visit https://github.com/aboutcode-org/fetchcode for support and download. +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# http://nexb.com and http://aboutcode.org +# +# This software is licensed under the Apache License version 2.0. +# +# You may not use this software except in compliance with the License. +# You may obtain a copy of the License at: +# http://apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software distributed +# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +# CONDITIONS OF ANY KIND, either express or implied. See the License for the +# specific language governing permissions and limitations under the License. + +from urllib.parse import urljoin + +from packageurl import PackageURL + +from fetchcode import fetch_json_response + + +class Pypi: + """ + This class handles Cargo PURLs. + """ + + purl_pattern = "pkg:pypi/.*" + base_url = "https://pypi.org/pypi/" + + @classmethod + def get_download_url(cls, purl): + """ + Return the download URL for a Pypi PURL. + """ + purl = PackageURL.from_string(purl) + + name = purl.name + version = purl.version + + if not name or not version: + raise ValueError("Pypi PURL must specify a name and version") + + url = urljoin(cls.base_url, f"{name}/{version}.json") + + data = fetch_json_response(url) + + download_urls = data.get("urls", [{}]) + + if not download_urls: + raise ValueError(f"No download URLs found for {name} version {version}") + + download_url = next( + (url["url"] for url in download_urls if url.get("url")), + None + ) + + if not download_url: + raise ValueError(f"No download URL found for {name} version {version}") + + return download_url diff --git a/tests/test_pypi.py b/tests/test_pypi.py new file mode 100644 index 0000000..d5e1944 --- /dev/null +++ b/tests/test_pypi.py @@ -0,0 +1,80 @@ +import unittest +from unittest.mock import patch +from fetchcode.pypi import Pypi + +class TestGetDownloadURL(unittest.TestCase): + + @patch("fetchcode.pypi.fetch_json_response") + def test_valid_purl_returns_download_url(self, mock_fetch_json_response): + mock_response = { + "urls": [ + { + "url": "https://files.pythonhosted.org/packages/source/r/requests/requests-2.31.0.tar.gz" + } + ] + } + mock_fetch_json_response.return_value = mock_response + + purl = "pkg:pypi/requests@2.31.0" + result = Pypi.get_download_url(purl) + self.assertEqual( + result, + "https://files.pythonhosted.org/packages/source/r/requests/requests-2.31.0.tar.gz" + ) + + @patch("fetchcode.pypi.fetch_json_response") + def test_missing_version_raises_value_error(self, mock_fetch_json_response): + purl = "pkg:pypi/requests" + with self.assertRaises(ValueError) as context: + Pypi.get_download_url(purl) + self.assertIn("Pypi PURL must specify a name and version", str(context.exception)) + + @patch("fetchcode.pypi.fetch_json_response") + def test_missing_name_raises_value_error(self, mock_fetch_json_response): + purl = "pkg:pypi/@2.31.0" + with self.assertRaises(ValueError) as context: + Pypi.get_download_url(purl) + self.assertIn("purl is missing the required name component", str(context.exception)) + + @patch("fetchcode.pypi.fetch_json_response") + def test_missing_urls_field_raises_value_error(self, mock_fetch_json_response): + mock_fetch_json_response.return_value = {} + purl = "pkg:pypi/requests@2.31.0" + with self.assertRaises(ValueError) as context: + Pypi.get_download_url(purl) + self.assertIn("No download URL found", str(context.exception)) + + @patch("fetchcode.pypi.fetch_json_response") + def test_empty_urls_list_raises_value_error(self, mock_fetch_json_response): + mock_fetch_json_response.return_value = {"urls": []} + purl = "pkg:pypi/requests@2.31.0" + with self.assertRaises(ValueError) as context: + Pypi.get_download_url(purl) + self.assertIn("No download URLs found", str(context.exception)) + + @patch("fetchcode.pypi.fetch_json_response") + def test_first_url_object_missing_url_key(self, mock_fetch_json_response): + mock_fetch_json_response.return_value = { + "urls": [{}] + } + purl = "pkg:pypi/requests@2.31.0" + with self.assertRaises(ValueError) as context: + Pypi.get_download_url(purl) + self.assertIn("No download URL found", str(context.exception)) + + @patch("fetchcode.pypi.fetch_json_response") + def test_url_fallback_when_multiple_urls_provided(self, mock_fetch_json_response): + mock_fetch_json_response.return_value = { + "urls": [ + {}, + {"url": "https://example.com/fallback-url.tar.gz"} + ] + } + + purl = "pkg:pypi/requests@2.31.0" + download_url = Pypi.get_download_url(purl) + self.assertEqual(download_url, "https://example.com/fallback-url.tar.gz") + + def test_malformed_purl_raises_exception(self): + with self.assertRaises(ValueError): + Pypi.get_download_url("this-is-not-a-valid-purl") From cd713f392f4b6b670d91bf9c8dce13daa1bc8587 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Tue, 29 Jul 2025 12:49:13 +0530 Subject: [PATCH 2/5] Fix linting issues Signed-off-by: Tushar Goel --- src/fetchcode/pypi.py | 7 ++----- tests/test_pypi.py | 14 +++++--------- 2 files changed, 7 insertions(+), 14 deletions(-) diff --git a/src/fetchcode/pypi.py b/src/fetchcode/pypi.py index 089e5e4..95cde2a 100644 --- a/src/fetchcode/pypi.py +++ b/src/fetchcode/pypi.py @@ -50,11 +50,8 @@ def get_download_url(cls, purl): if not download_urls: raise ValueError(f"No download URLs found for {name} version {version}") - - download_url = next( - (url["url"] for url in download_urls if url.get("url")), - None - ) + + download_url = next((url["url"] for url in download_urls if url.get("url")), None) if not download_url: raise ValueError(f"No download URL found for {name} version {version}") diff --git a/tests/test_pypi.py b/tests/test_pypi.py index d5e1944..93b9eb2 100644 --- a/tests/test_pypi.py +++ b/tests/test_pypi.py @@ -1,9 +1,10 @@ import unittest from unittest.mock import patch + from fetchcode.pypi import Pypi -class TestGetDownloadURL(unittest.TestCase): +class TestGetDownloadURL(unittest.TestCase): @patch("fetchcode.pypi.fetch_json_response") def test_valid_purl_returns_download_url(self, mock_fetch_json_response): mock_response = { @@ -19,7 +20,7 @@ def test_valid_purl_returns_download_url(self, mock_fetch_json_response): result = Pypi.get_download_url(purl) self.assertEqual( result, - "https://files.pythonhosted.org/packages/source/r/requests/requests-2.31.0.tar.gz" + "https://files.pythonhosted.org/packages/source/r/requests/requests-2.31.0.tar.gz", ) @patch("fetchcode.pypi.fetch_json_response") @@ -54,9 +55,7 @@ def test_empty_urls_list_raises_value_error(self, mock_fetch_json_response): @patch("fetchcode.pypi.fetch_json_response") def test_first_url_object_missing_url_key(self, mock_fetch_json_response): - mock_fetch_json_response.return_value = { - "urls": [{}] - } + mock_fetch_json_response.return_value = {"urls": [{}]} purl = "pkg:pypi/requests@2.31.0" with self.assertRaises(ValueError) as context: Pypi.get_download_url(purl) @@ -65,10 +64,7 @@ def test_first_url_object_missing_url_key(self, mock_fetch_json_response): @patch("fetchcode.pypi.fetch_json_response") def test_url_fallback_when_multiple_urls_provided(self, mock_fetch_json_response): mock_fetch_json_response.return_value = { - "urls": [ - {}, - {"url": "https://example.com/fallback-url.tar.gz"} - ] + "urls": [{}, {"url": "https://example.com/fallback-url.tar.gz"}] } purl = "pkg:pypi/requests@2.31.0" From 5c4fa2ac9ccb9eac1e5c5466b67643f504a1dfe6 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Tue, 29 Jul 2025 20:52:35 +0530 Subject: [PATCH 3/5] Fix URL construction Signed-off-by: Tushar Goel --- src/fetchcode/pypi.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/fetchcode/pypi.py b/src/fetchcode/pypi.py index 95cde2a..32fbb08 100644 --- a/src/fetchcode/pypi.py +++ b/src/fetchcode/pypi.py @@ -42,8 +42,8 @@ def get_download_url(cls, purl): if not name or not version: raise ValueError("Pypi PURL must specify a name and version") - url = urljoin(cls.base_url, f"{name}/{version}.json") - + url = urljoin(cls.base_url, f"{name}/{version}/json") + breakpoint() data = fetch_json_response(url) download_urls = data.get("urls", [{}]) From d8015e688157d9d444023a64482320d2e803624a Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Tue, 29 Jul 2025 20:54:16 +0530 Subject: [PATCH 4/5] Remove macos12 and ubuntu20 Signed-off-by: Tushar Goel --- azure-pipelines.yml | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 40ace8b..8557222 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -5,13 +5,6 @@ ################################################################################ jobs: - - template: etc/ci/azure-posix.yml - parameters: - job_name: ubuntu20_cpython - image_name: ubuntu-20.04 - python_versions: ['3.8', '3.9', '3.10', '3.11', '3.12'] - test_suites: - all: venv/bin/pytest -n 2 -vvs - template: etc/ci/azure-posix.yml parameters: @@ -21,14 +14,6 @@ jobs: test_suites: all: venv/bin/pytest -n 2 -vvs - - template: etc/ci/azure-posix.yml - parameters: - job_name: macos12_cpython - image_name: macOS-12 - python_versions: ['3.8', '3.9', '3.10', '3.11', '3.12'] - test_suites: - all: venv/bin/pytest -n 2 -vvs - - template: etc/ci/azure-posix.yml parameters: job_name: macos13_cpython From 6e9792c14085b3d002383789c33c7dece6ea0b26 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Tue, 29 Jul 2025 20:57:28 +0530 Subject: [PATCH 5/5] Remove breakpoint Signed-off-by: Tushar Goel --- src/fetchcode/pypi.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/fetchcode/pypi.py b/src/fetchcode/pypi.py index 32fbb08..96b6916 100644 --- a/src/fetchcode/pypi.py +++ b/src/fetchcode/pypi.py @@ -43,7 +43,6 @@ def get_download_url(cls, purl): raise ValueError("Pypi PURL must specify a name and version") url = urljoin(cls.base_url, f"{name}/{version}/json") - breakpoint() data = fetch_json_response(url) download_urls = data.get("urls", [{}])