Skip to content

Commit e523da9

Browse files
feat: chronologically likliest setuptools version will be inferred when necessary
Signed-off-by: Abhinav Pradeep <abhinav.pradeep@oracle.com>
1 parent c32f539 commit e523da9

File tree

5 files changed

+101
-12
lines changed

5 files changed

+101
-12
lines changed

src/macaron/build_spec_generator/common_spec/pypi_spec.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,8 @@ def resolve_fields(self, purl: PackageURL) -> None:
122122
python_version_set: set[str] = set()
123123
wheel_name_python_version_list: list[str] = []
124124
wheel_name_platforms: set[str] = set()
125+
# Precautionary fallback to default version
126+
chronologically_likeliest_version: str = defaults.get("heuristic.pypi", "default_setuptools")
125127

126128
if pypi_package_json is not None:
127129
if pypi_package_json.package_json or pypi_package_json.download(dest=""):
@@ -152,6 +154,9 @@ def resolve_fields(self, purl: PackageURL) -> None:
152154
parsed_build_requires["setuptools"] = "==" + defaults.get(
153155
"heuristic.pypi", "setuptools_version_emitting_platform_unknown"
154156
)
157+
chronologically_likeliest_version = (
158+
pypi_package_json.get_chronologically_suitable_setuptools_version()
159+
)
155160
except SourceCodeError:
156161
logger.debug("Could not find pure wheel matching this PURL")
157162

@@ -167,6 +172,10 @@ def resolve_fields(self, purl: PackageURL) -> None:
167172
requires = json_extract(content, ["build-system", "requires"], list)
168173
if requires:
169174
build_requires_set.update(elem.replace(" ", "") for elem in requires)
175+
# If we cannot find [build-system] requires, we lean on the fact that setuptools
176+
# was the de-facto build tool, and infer a setuptools version to include.
177+
else:
178+
build_requires_set.add(f"setuptools=={chronologically_likeliest_version}")
170179
# If we have hatch as a build_tool, we will
171180
if "hatch" in self.data["build_tools"]:
172181
# Look for [tool.hatch.build.hooks.*]
@@ -195,6 +204,10 @@ def resolve_fields(self, purl: PackageURL) -> None:
195204
build_requires_set,
196205
build_backends_set,
197206
)
207+
# Here we have successfully analyzed the pyproject.toml file. Now, if we have a setup.py/cfg,
208+
# we also need to infer a setuptools version to infer.
209+
if pypi_package_json.file_exists("setup.py") or pypi_package_json.file_exists("setup.cfg"):
210+
build_requires_set.add(f"setuptools=={chronologically_likeliest_version}")
198211
except TypeError as error:
199212
logger.debug(
200213
"Found a type error while reading the pyproject.toml file from the sdist: %s", error
@@ -203,6 +216,9 @@ def resolve_fields(self, purl: PackageURL) -> None:
203216
logger.debug("Failed to read the pyproject.toml file from the sdist: %s", error)
204217
except SourceCodeError as error:
205218
logger.debug("No pyproject.toml found: %s", error)
219+
# Here we do not have a pyproject.toml file. Instead, we lean on the fact that setuptools
220+
# was the de-facto build tool, and infer a setuptools version to include.
221+
build_requires_set.add(f"setuptools=={chronologically_likeliest_version}")
206222
except SourceCodeError as error:
207223
logger.debug("No source distribution found: %s", error)
208224

src/macaron/malware_analyzer/pypi_heuristics/metadata/similar_projects.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -74,9 +74,6 @@ def analyze(self, pypi_package_json: PyPIPackageJsonAsset) -> tuple[HeuristicRes
7474
False,
7575
pypi_package_json.pypi_registry,
7676
{},
77-
"",
78-
"",
79-
"",
8077
PyPIInspectorAsset("", [], {}),
8178
)
8279
if not adjacent_pypi_json.download(""):

src/macaron/repo_finder/repo_finder_pypi.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ def find_repo(
5959
if not pypi_registry:
6060
return "", RepoFinderInfo.PYPI_NO_REGISTRY
6161
pypi_asset = PyPIPackageJsonAsset(
62-
purl.name, purl.version, False, pypi_registry, {}, "", "", "", PyPIInspectorAsset("", [], {})
62+
purl.name, purl.version, False, pypi_registry, {}, PyPIInspectorAsset("", [], {})
6363
)
6464

6565
if not pypi_asset:

src/macaron/slsa_analyzer/package_registry/pypi_registry.py

Lines changed: 83 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
"""The module provides abstractions for the pypi package registry."""
55
from __future__ import annotations
66

7+
import bisect
78
import hashlib
89
import logging
910
import os
@@ -15,7 +16,7 @@
1516
import zipfile
1617
from collections.abc import Callable, Generator, Iterator
1718
from contextlib import contextmanager
18-
from dataclasses import dataclass
19+
from dataclasses import dataclass, field
1920
from datetime import datetime
2021
from typing import TYPE_CHECKING
2122

@@ -502,6 +503,42 @@ def get_maintainer_join_date(self, username: str) -> datetime | None:
502503

503504
return res.replace(tzinfo=None) if res else None
504505

506+
def get_matching_setuptools_version(self, package_release_datetime: datetime) -> str:
507+
"""Find the setuptools that would be "latest" for the input datetime.
508+
509+
Parameters
510+
----------
511+
package_release_datetime: str
512+
Release datetime of a package we wish to rebuild
513+
514+
Returns
515+
-------
516+
str: Matching version of setuptools
517+
"""
518+
setuptools_endpoint = urllib.parse.urljoin(self.registry_url, "pypi/setuptools/json")
519+
setuptools_json = self.download_package_json(setuptools_endpoint)
520+
releases = json_extract(setuptools_json, ["releases"], dict)
521+
if releases:
522+
release_tuples = [
523+
(version, release_info[0].get("upload_time"))
524+
for version, release_info in releases.items()
525+
if release_info
526+
]
527+
# Cannot assume this is sorted, as releases is just a dict
528+
release_tuples.sort(key=lambda x: x[1])
529+
# bisect_left gives position to insert package_release_datetime to maintain order, hence we do -1
530+
index = (
531+
bisect.bisect_left(
532+
release_tuples, package_release_datetime, key=lambda x: datetime.strptime(x[1], "%Y-%m-%dT%H:%M:%S")
533+
)
534+
- 1
535+
)
536+
return str(release_tuples[index][0])
537+
# This realistically cannot happen: it would mean we somehow are trying to rebuild
538+
# for a package and version with no releases.
539+
# Return default just in case.
540+
return defaults.get("heuristic.pypi", "default_setuptools")
541+
505542
@staticmethod
506543
def extract_attestation(attestation_data: dict) -> dict | None:
507544
"""Extract the first attestation file from a PyPI attestation response.
@@ -618,13 +655,16 @@ class PyPIPackageJsonAsset:
618655
package_json: dict
619656

620657
#: The source code temporary location name.
621-
package_sourcecode_path: str
658+
package_sourcecode_path: str = field(init=False)
622659

623660
#: The wheel temporary location name.
624-
wheel_path: str
661+
wheel_path: str = field(init=False)
625662

626663
#: Name of the wheel file.
627-
wheel_filename: str
664+
wheel_filename: str = field(init=False)
665+
666+
#: The datetime that the wheel was uploaded.
667+
wheel_upload_time: datetime = field(init=False)
628668

629669
#: The pypi inspector information about this package
630670
inspector_asset: PyPIInspectorAsset
@@ -779,6 +819,7 @@ def get_wheel_url(self, tag: str = "none-any") -> str | None:
779819
# Continue to getting url
780820
wheel_url: str = distribution.get("url") or ""
781821
if wheel_url:
822+
self.wheel_upload_time = datetime.strptime(distribution.get("upload_time") or "", "%Y-%m-%dT%H:%M:%S")
782823
try:
783824
parsed_url = urllib.parse.urlparse(wheel_url)
784825
except ValueError:
@@ -919,6 +960,33 @@ def get_sourcecode_file_contents(self, path: str) -> bytes:
919960
logger.debug(error_msg)
920961
raise SourceCodeError(error_msg) from read_error
921962

963+
def file_exists(self, path: str) -> bool:
964+
"""Check if a file exists in the downloaded source code.
965+
966+
The path can be relative to the package_sourcecode_path attribute, or an absolute path.
967+
968+
Parameters
969+
----------
970+
path: str
971+
The absolute or relative to package_sourcecode_path file path to check for.
972+
973+
Returns
974+
-------
975+
bool: Whether or not a file at path absolute or relative to package_sourcecode_path exists.
976+
"""
977+
if not self.package_sourcecode_path:
978+
# No source code files were downloaded
979+
return False
980+
981+
if not os.path.isabs(path):
982+
path = os.path.join(self.package_sourcecode_path, path)
983+
984+
if not os.path.exists(path):
985+
# Could not find a file at that path
986+
return False
987+
988+
return True
989+
922990
def iter_sourcecode(self) -> Iterator[tuple[str, bytes]]:
923991
"""
924992
Iterate through all source code files.
@@ -1054,6 +1122,16 @@ def get_inspector_src_preview_links(self) -> bool:
10541122
# If all distributions were invalid and went along a 'continue' path.
10551123
return bool(self.inspector_asset)
10561124

1125+
def get_chronologically_suitable_setuptools_version(self) -> str:
1126+
"""Find version of setuptools that would be "latest" for this package.
1127+
1128+
Returns
1129+
-------
1130+
str
1131+
Chronologically likeliest setuptools version
1132+
"""
1133+
return self.pypi_registry.get_matching_setuptools_version(self.wheel_upload_time)
1134+
10571135

10581136
def find_or_create_pypi_asset(
10591137
asset_name: str, asset_version: str | None, pypi_registry_info: PackageRegistryInfo
@@ -1091,8 +1169,6 @@ def find_or_create_pypi_asset(
10911169
logger.debug("Failed to create PyPIPackageJson asset.")
10921170
return None
10931171

1094-
asset = PyPIPackageJsonAsset(
1095-
asset_name, asset_version, False, package_registry, {}, "", "", "", PyPIInspectorAsset("", [], {})
1096-
)
1172+
asset = PyPIPackageJsonAsset(asset_name, asset_version, False, package_registry, {}, PyPIInspectorAsset("", [], {}))
10971173
pypi_registry_info.metadata.append(asset)
10981174
return asset

tests/malware_analyzer/pypi/test_wheel_absence.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,7 @@ def test_get_inspector_src_preview_links(mock_send_head_http_raw: MagicMock) ->
125125
mock_send_head_http_raw.return_value = MagicMock() # Assume valid URL for testing purposes.
126126

127127
pypi_package_json = PyPIPackageJsonAsset(
128-
package_name, version, False, pypi_registry, package_json, "", "", "", PyPIInspectorAsset("", [], {})
128+
package_name, version, False, pypi_registry, package_json, PyPIInspectorAsset("", [], {})
129129
)
130130

131131
assert pypi_package_json.get_inspector_src_preview_links() is True

0 commit comments

Comments
 (0)