diff --git a/scanpipe/models.py b/scanpipe/models.py index 6f3c5f550c..3e96baee27 100644 --- a/scanpipe/models.py +++ b/scanpipe/models.py @@ -2498,7 +2498,11 @@ def macho_binaries(self): ) def executable_binaries(self): - return self.union(self.win_exes(), self.macho_binaries(), self.elfs()) + return ( + self.win_exes() + .order_by() + .union(self.macho_binaries().order_by(), self.elfs().order_by()) + ) def with_has_children(self): """ diff --git a/scanpipe/pipes/d2d.py b/scanpipe/pipes/d2d.py index 5046a61b09..94464aa0da 100644 --- a/scanpipe/pipes/d2d.py +++ b/scanpipe/pipes/d2d.py @@ -164,22 +164,35 @@ def map_checksum(project, checksum_field, logger=None): def _map_jvm_to_class_resource( to_resource, from_resources, from_classes_index, jvm_lang: jvm.JvmLanguage ): - """ - Map the ``to_resource`` .class file Resource with a Resource in - ``from_resources`` source files, using the ``from_classes_index`` index of - from/ fully qualified binary files. - """ for extension in jvm_lang.source_extensions: normalized_path = jvm_lang.get_normalized_path( path=to_resource.path, extension=extension ) + match = pathmap.find_paths(path=normalized_path, index=from_classes_index) + + if not match and jvm_lang.name == "scala": + package_path = str(Path(to_resource.path).parent) + potential_sources = from_resources.filter( + path__startswith=package_path.replace("to/", "from/"), + extension__in=jvm_lang.source_extensions, + ) + for from_resource in potential_sources: + from_source_root_parts = from_resource.path.strip("/").split("/") + from_source_root = "/".join(from_source_root_parts[:-1]) + pipes.make_relation( + from_resource=from_resource, + to_resource=to_resource, + map_type=jvm_lang.binary_map_type, + extra_data={"from_source_root": f"{from_source_root}/"}, + ) + continue + if not match: - return + continue for resource_id in match.resource_ids: from_resource = from_resources.get(id=resource_id) - # compute the root of the packages on the source side from_source_root_parts = from_resource.path.strip("/").split("/") from_source_root = "/".join( from_source_root_parts[: -match.matched_path_length] diff --git a/scanpipe/pipes/jvm.py b/scanpipe/pipes/jvm.py index 5cd420a274..3ab6ff79fc 100644 --- a/scanpipe/pipes/jvm.py +++ b/scanpipe/pipes/jvm.py @@ -182,6 +182,22 @@ class ScalaLanguage(JvmLanguage): package_regex = re.compile(r"^\s*package\s+([\w\.]+)\s*;?") binary_map_type = "scala_to_class" + @classmethod + def get_normalized_path(cls, path, extension): + if not path.endswith(cls.binary_extensions): + raise ValueError( + f"Only path ending with {cls.binary_extensions} are supported." + ) + path_obj = Path(path.strip("/")) + class_name = path_obj.name + + if "$" in class_name: + class_name, _, _ = class_name.partition("$") + else: + class_name, _, _ = class_name.partition(".") + + return str(path_obj.parent / f"{class_name}{extension}") + class KotlinLanguage(JvmLanguage): name = "kotlin" diff --git a/scanpipe/pipes/resolve.py b/scanpipe/pipes/resolve.py index 0a409dd88c..4ca4cbb6a4 100644 --- a/scanpipe/pipes/resolve.py +++ b/scanpipe/pipes/resolve.py @@ -81,6 +81,81 @@ def get_dependencies_from_manifest(resource): return dependencies +def _group_manifests_by_type(manifest_resources): + """Group manifest resources by their package type.""" + manifests_by_type = {} + for resource in manifest_resources: + package_type = get_default_package_type(resource.location) + if package_type: + if package_type not in manifests_by_type: + manifests_by_type[package_type] = [] + manifests_by_type[package_type].append(resource) + return manifests_by_type + + +def _resolve_pypi_manifests( + project, package_registry, pypi_resources, resolved_packages, sboms_headers, model +): + """Resolve PyPI manifest resources.""" + pypi_locations = [resource.location for resource in pypi_resources] + resolver = package_registry.get("pypi") + if not resolver: + return + + try: + packages = resolver(input_locations=pypi_locations) + if packages: + for package_data in packages: + package_data["codebase_resources"] = pypi_resources + resolved_packages.extend(packages) + for resource in pypi_resources: + if headers := get_manifest_headers(resource): + sboms_headers[resource.name] = headers + else: + for resource in pypi_resources: + project.add_error( + description="No packages could be resolved", + model=model, + object_instance=resource, + ) + except Exception as e: + for resource in pypi_resources: + project.add_error( + description=f"Error resolving packages: {e}", + model=model, + object_instance=resource, + ) + + +def _resolve_other_manifests( + project, + package_registry, + manifests_by_type, + resolved_packages, + resolved_dependencies, + sboms_headers, + model, +): + """Resolve non-PyPI manifest resources.""" + for package_type, resources in manifests_by_type.items(): + for resource in resources: + packages = resolve_manifest_resources(resource, package_registry) + if packages: + resolved_packages.extend(packages) + if headers := get_manifest_headers(resource): + sboms_headers[resource.name] = headers + else: + project.add_error( + description="No packages could be resolved", + model=model, + object_instance=resource, + ) + + dependencies = get_dependencies_from_manifest(resource) + if dependencies: + resolved_dependencies.extend(dependencies) + + def get_data_from_manifests(project, package_registry, manifest_resources, model=None): """ Get package and dependency data from package manifests/lockfiles/SBOMs or @@ -97,22 +172,28 @@ def get_data_from_manifests(project, package_registry, manifest_resources, model ) return [] - for resource in manifest_resources: - packages = resolve_manifest_resources(resource, package_registry) - if packages: - resolved_packages.extend(packages) - if headers := get_manifest_headers(resource): - sboms_headers[resource.name] = headers - else: - project.add_error( - description="No packages could be resolved", - model=model, - object_instance=resource, - ) + manifests_by_type = _group_manifests_by_type(manifest_resources) - dependencies = get_dependencies_from_manifest(resource) - if dependencies: - resolved_dependencies.extend(dependencies) + if "pypi" in manifests_by_type: + _resolve_pypi_manifests( + project, + package_registry, + manifests_by_type["pypi"], + resolved_packages, + sboms_headers, + model, + ) + del manifests_by_type["pypi"] + + _resolve_other_manifests( + project, + package_registry, + manifests_by_type, + resolved_packages, + resolved_dependencies, + sboms_headers, + model, + ) if sboms_headers: project.update_extra_data({"sboms_headers": sboms_headers}) @@ -222,13 +303,31 @@ def get_manifest_resources(project): return project.codebaseresources.filter(status=flag.APPLICATION_PACKAGE) -def resolve_pypi_packages(input_location): - """Resolve the PyPI packages from the ``input_location`` requirements file.""" +def resolve_pypi_packages(input_location=None, input_locations=None): + """ + Resolve the PyPI packages from requirement file(s). + + Args: + input_location: Single requirement file path (for backward compatibility) + input_locations: List of requirement file paths (for batch processing) + + Returns: + List of resolved package data dictionaries + + """ + # Handle both single file and multiple files + if input_locations: + requirement_files = input_locations + elif input_location: + requirement_files = [input_location] + else: + raise ValueError("Either input_location or input_locations must be provided") + python_version = f"{sys.version_info.major}{sys.version_info.minor}" operating_system = "linux" resolution_output = python_inspector.resolve_dependencies( - requirement_files=[input_location], + requirement_files=requirement_files, python_version=python_version, operating_system=operating_system, # Prefer source distributions over binary distributions, diff --git a/scanpipe/tests/pipes/test_d2d.py b/scanpipe/tests/pipes/test_d2d.py index 4d8433498e..8570b28c63 100644 --- a/scanpipe/tests/pipes/test_d2d.py +++ b/scanpipe/tests/pipes/test_d2d.py @@ -633,6 +633,43 @@ def test_scanpipe_pipes_d2d_scala_ignore_pattern(self): expected = "Ignoring 2 to/ resources with ecosystem specific configurations." self.assertIn(expected, buffer.getvalue()) + def test_scanpipe_pipes_d2d_map_scala_case_classes_to_source(self): + from1 = make_resource_file( + self.project1, + path="from/pekko-cluster-sharding-typed/org/apache/pekko/cluster/sharding/typed/" + "ClusterShardingQuery.scala", + extra_data={"scala_package": "org.apache.pekko.cluster.sharding.typed"}, + ) + to1 = make_resource_file( + self.project1, + path="to/pekko-cluster-sharding-typed/org/apache/pekko/cluster/sharding/typed/" + "GetClusterShardingStats.class", + ) + to2 = make_resource_file( + self.project1, + path="to/pekko-cluster-sharding-typed/org/apache/pekko/cluster/sharding/typed/" + "GetShardRegionState.class", + ) + to3 = make_resource_file( + self.project1, + path="to/pekko-cluster-sharding-typed/org/apache/pekko/cluster/sharding/typed/" + "ClusterShardingQuery.class", + ) + + buffer = io.StringIO() + d2d.map_jvm_to_class( + self.project1, logger=buffer.write, jvm_lang=jvm.ScalaLanguage + ) + + expected = "Mapping 3 .class resources to 1 ('.scala',)" + self.assertIn(expected, buffer.getvalue()) + self.assertEqual(3, self.project1.codebaserelations.count()) + + for to_resource in [to1, to2, to3]: + relation = self.project1.codebaserelations.get(to_resource=to_resource) + self.assertEqual(from1, relation.from_resource) + self.assertEqual("scala_to_class", relation.map_type) + def test_scanpipe_pipes_d2d_map_jar_to_kotlin_source(self): from1 = make_resource_file( self.project1, diff --git a/scanpipe/tests/pipes/test_resolve.py b/scanpipe/tests/pipes/test_resolve.py index 2c7aa33bcb..5510f73550 100644 --- a/scanpipe/tests/pipes/test_resolve.py +++ b/scanpipe/tests/pipes/test_resolve.py @@ -136,7 +136,7 @@ def test_scanpipe_pipes_resolve_resolve_pypi_packages(self, mock_resolve): mock_resolve.return_value = mock.Mock(packages=inspector_output["packages"]) - packages = resolve.resolve_pypi_packages("") + packages = resolve.resolve_pypi_packages("requirements.txt") self.assertEqual(2, len(packages)) package_data = packages[0] self.assertEqual("pip", package_data["name"]) @@ -376,3 +376,51 @@ def test_scanpipe_resolve_get_manifest_headers(self): ] headers = resolve.get_manifest_headers(resource) self.assertEqual(expected, list(headers.keys())) + + @mock.patch("scanpipe.pipes.resolve.python_inspector.resolve_dependencies") + def test_scanpipe_pipes_resolve_pypi_packages_multiple_files(self, mock_resolve): + """Test that resolve_pypi_packages can handle multiple requirement files.""" + # Generated with: + # $ python-inspector --python-version 3.12 --operating-system linux \ + # --specifier pip==25.0.1 --json - + inspector_output_location = ( + self.data / "resolve" / "python_inspector_resolve_dependencies.json" + ) + with open(inspector_output_location) as f: + inspector_output = json.loads(f.read()) + + mock_resolve.return_value = mock.Mock(packages=inspector_output["packages"]) + + req_files = ["requirements1.txt", "requirements2.txt"] + packages = resolve.resolve_pypi_packages(input_locations=req_files) + + mock_resolve.assert_called_once() + call_args = mock_resolve.call_args + self.assertEqual(req_files, call_args.kwargs["requirement_files"]) + + self.assertEqual(2, len(packages)) + self.assertEqual("pip", packages[0]["name"]) + + @mock.patch("scanpipe.pipes.resolve.python_inspector.resolve_dependencies") + def test_scanpipe_pipes_resolve_pypi_packages_backward_compatibility( + self, mock_resolve + ): + """ + Test that resolve_pypi_packages still works with single file + (backward compatibility). + """ + inspector_output_location = ( + self.data / "resolve" / "python_inspector_resolve_dependencies.json" + ) + with open(inspector_output_location) as f: + inspector_output = json.loads(f.read()) + + mock_resolve.return_value = mock.Mock(packages=inspector_output["packages"]) + + packages = resolve.resolve_pypi_packages(input_location="requirements.txt") + + mock_resolve.assert_called_once() + call_args = mock_resolve.call_args + self.assertEqual(["requirements.txt"], call_args.kwargs["requirement_files"]) + + self.assertEqual(2, len(packages))