From bd4fff4222bd300916cdffaf32aad07f6c52c38c Mon Sep 17 00:00:00 2001 From: pradhyum6144 Date: Sun, 30 Nov 2025 18:07:06 +0530 Subject: [PATCH 1/6] Fix resolve_dependencies pipeline for multiple manifest files (#1957) - Modified resolve_pypi_packages() to accept multiple requirement files - Updated get_data_from_manifests() to batch-process PyPI manifests together - Maintained backward compatibility with existing single-file API - Added comprehensive tests for multiple files and backward compatibility This fix resolves issue #1957 where the pipeline failed when multiple manifest files (e.g., requirements.txt in different subfolders) were present in a project. The solution leverages python-inspector's ability to process multiple requirement files in a single call, which is more efficient and provides better dependency resolution context. Fixes #1957 Signed-off-by: pradhyum6144 --- scanpipe/pipes/resolve.py | 101 ++++++++++++++++++++++----- scanpipe/tests/pipes/test_resolve.py | 52 ++++++++++++++ 2 files changed, 136 insertions(+), 17 deletions(-) diff --git a/scanpipe/pipes/resolve.py b/scanpipe/pipes/resolve.py index 0a409dd88c..8a79bd722c 100644 --- a/scanpipe/pipes/resolve.py +++ b/scanpipe/pipes/resolve.py @@ -97,22 +97,72 @@ def get_data_from_manifests(project, package_registry, manifest_resources, model ) return [] + # Group manifest resources by package type for batch processing + manifests_by_type = {} for resource in manifest_resources: - packages = resolve_manifest_resources(resource, package_registry) - if packages: - resolved_packages.extend(packages) - if headers := get_manifest_headers(resource): - sboms_headers[resource.name] = headers - else: - project.add_error( - description="No packages could be resolved", - model=model, - object_instance=resource, - ) + package_type = get_default_package_type(resource.location) + if package_type: + if package_type not in manifests_by_type: + manifests_by_type[package_type] = [] + manifests_by_type[package_type].append(resource) + + # Process PyPI manifests together in a single batch + if "pypi" in manifests_by_type: + pypi_resources = manifests_by_type["pypi"] + pypi_locations = [resource.location for resource in pypi_resources] + + resolver = package_registry.get("pypi") + if resolver: + try: + packages = resolver(input_locations=pypi_locations) + if packages: + # Associate packages with their source resources + # Since we're processing multiple files together, we need to + # associate each package with all the manifest resources + for package_data in packages: + package_data["codebase_resources"] = pypi_resources + resolved_packages.extend(packages) + + # Collect headers for each manifest + for resource in pypi_resources: + if headers := get_manifest_headers(resource): + sboms_headers[resource.name] = headers + else: + for resource in pypi_resources: + project.add_error( + description="No packages could be resolved", + model=model, + object_instance=resource, + ) + except Exception as e: + for resource in pypi_resources: + project.add_error( + description=f"Error resolving packages: {e}", + model=model, + object_instance=resource, + ) + + # Remove pypi from the dict so we don't process it again below + del manifests_by_type["pypi"] + + # Process other manifest types individually (SPDX, CycloneDX, About files) + for package_type, resources in manifests_by_type.items(): + for resource in resources: + packages = resolve_manifest_resources(resource, package_registry) + if packages: + resolved_packages.extend(packages) + if headers := get_manifest_headers(resource): + sboms_headers[resource.name] = headers + else: + project.add_error( + description="No packages could be resolved", + model=model, + object_instance=resource, + ) - dependencies = get_dependencies_from_manifest(resource) - if dependencies: - resolved_dependencies.extend(dependencies) + dependencies = get_dependencies_from_manifest(resource) + if dependencies: + resolved_dependencies.extend(dependencies) if sboms_headers: project.update_extra_data({"sboms_headers": sboms_headers}) @@ -222,13 +272,30 @@ def get_manifest_resources(project): return project.codebaseresources.filter(status=flag.APPLICATION_PACKAGE) -def resolve_pypi_packages(input_location): - """Resolve the PyPI packages from the ``input_location`` requirements file.""" +def resolve_pypi_packages(input_location=None, input_locations=None): + """ + Resolve the PyPI packages from requirement file(s). + + Args: + input_location: Single requirement file path (for backward compatibility) + input_locations: List of requirement file paths (for batch processing) + + Returns: + List of resolved package data dictionaries + """ + # Handle both single file and multiple files + if input_locations: + requirement_files = input_locations + elif input_location: + requirement_files = [input_location] + else: + raise ValueError("Either input_location or input_locations must be provided") + python_version = f"{sys.version_info.major}{sys.version_info.minor}" operating_system = "linux" resolution_output = python_inspector.resolve_dependencies( - requirement_files=[input_location], + requirement_files=requirement_files, python_version=python_version, operating_system=operating_system, # Prefer source distributions over binary distributions, diff --git a/scanpipe/tests/pipes/test_resolve.py b/scanpipe/tests/pipes/test_resolve.py index 2c7aa33bcb..c29cf20c92 100644 --- a/scanpipe/tests/pipes/test_resolve.py +++ b/scanpipe/tests/pipes/test_resolve.py @@ -376,3 +376,55 @@ def test_scanpipe_resolve_get_manifest_headers(self): ] headers = resolve.get_manifest_headers(resource) self.assertEqual(expected, list(headers.keys())) + + @mock.patch("scanpipe.pipes.resolve.python_inspector.resolve_dependencies") + def test_scanpipe_pipes_resolve_pypi_packages_multiple_files(self, mock_resolve): + """Test that resolve_pypi_packages can handle multiple requirement files.""" + # Generated with: + # $ python-inspector --python-version 3.12 --operating-system linux \ + # --specifier pip==25.0.1 --json - + inspector_output_location = ( + self.data / "resolve" / "python_inspector_resolve_dependencies.json" + ) + with open(inspector_output_location) as f: + inspector_output = json.loads(f.read()) + + mock_resolve.return_value = mock.Mock(packages=inspector_output["packages"]) + + # Test with multiple requirement files + req_files = ["requirements1.txt", "requirements2.txt"] + packages = resolve.resolve_pypi_packages(input_locations=req_files) + + # Verify python_inspector was called with all files + mock_resolve.assert_called_once() + call_args = mock_resolve.call_args + self.assertEqual(req_files, call_args.kwargs["requirement_files"]) + + # Verify packages were returned + self.assertEqual(2, len(packages)) + self.assertEqual("pip", packages[0]["name"]) + + @mock.patch("scanpipe.pipes.resolve.python_inspector.resolve_dependencies") + def test_scanpipe_pipes_resolve_pypi_packages_backward_compatibility( + self, mock_resolve + ): + """Test that resolve_pypi_packages still works with single file (backward compatibility).""" + inspector_output_location = ( + self.data / "resolve" / "python_inspector_resolve_dependencies.json" + ) + with open(inspector_output_location) as f: + inspector_output = json.loads(f.read()) + + mock_resolve.return_value = mock.Mock(packages=inspector_output["packages"]) + + # Test with single file (old API) + packages = resolve.resolve_pypi_packages(input_location="requirements.txt") + + # Verify python_inspector was called with single file in list + mock_resolve.assert_called_once() + call_args = mock_resolve.call_args + self.assertEqual(["requirements.txt"], call_args.kwargs["requirement_files"]) + + # Verify packages were returned + self.assertEqual(2, len(packages)) + From 77f03b33ce0008f49f316b7f8cfda3c5e5093a45 Mon Sep 17 00:00:00 2001 From: pradhyum6144 Date: Sun, 30 Nov 2025 18:15:02 +0530 Subject: [PATCH 2/6] Remove inline comments for cleaner code Signed-off-by: pradhyum6144 --- scanpipe/pipes/resolve.py | 8 -------- scanpipe/tests/pipes/test_resolve.py | 6 ------ 2 files changed, 14 deletions(-) diff --git a/scanpipe/pipes/resolve.py b/scanpipe/pipes/resolve.py index 8a79bd722c..3df9ce8520 100644 --- a/scanpipe/pipes/resolve.py +++ b/scanpipe/pipes/resolve.py @@ -97,7 +97,6 @@ def get_data_from_manifests(project, package_registry, manifest_resources, model ) return [] - # Group manifest resources by package type for batch processing manifests_by_type = {} for resource in manifest_resources: package_type = get_default_package_type(resource.location) @@ -106,7 +105,6 @@ def get_data_from_manifests(project, package_registry, manifest_resources, model manifests_by_type[package_type] = [] manifests_by_type[package_type].append(resource) - # Process PyPI manifests together in a single batch if "pypi" in manifests_by_type: pypi_resources = manifests_by_type["pypi"] pypi_locations = [resource.location for resource in pypi_resources] @@ -116,14 +114,10 @@ def get_data_from_manifests(project, package_registry, manifest_resources, model try: packages = resolver(input_locations=pypi_locations) if packages: - # Associate packages with their source resources - # Since we're processing multiple files together, we need to - # associate each package with all the manifest resources for package_data in packages: package_data["codebase_resources"] = pypi_resources resolved_packages.extend(packages) - # Collect headers for each manifest for resource in pypi_resources: if headers := get_manifest_headers(resource): sboms_headers[resource.name] = headers @@ -142,10 +136,8 @@ def get_data_from_manifests(project, package_registry, manifest_resources, model object_instance=resource, ) - # Remove pypi from the dict so we don't process it again below del manifests_by_type["pypi"] - # Process other manifest types individually (SPDX, CycloneDX, About files) for package_type, resources in manifests_by_type.items(): for resource in resources: packages = resolve_manifest_resources(resource, package_registry) diff --git a/scanpipe/tests/pipes/test_resolve.py b/scanpipe/tests/pipes/test_resolve.py index c29cf20c92..239c8d4ac2 100644 --- a/scanpipe/tests/pipes/test_resolve.py +++ b/scanpipe/tests/pipes/test_resolve.py @@ -391,16 +391,13 @@ def test_scanpipe_pipes_resolve_pypi_packages_multiple_files(self, mock_resolve) mock_resolve.return_value = mock.Mock(packages=inspector_output["packages"]) - # Test with multiple requirement files req_files = ["requirements1.txt", "requirements2.txt"] packages = resolve.resolve_pypi_packages(input_locations=req_files) - # Verify python_inspector was called with all files mock_resolve.assert_called_once() call_args = mock_resolve.call_args self.assertEqual(req_files, call_args.kwargs["requirement_files"]) - # Verify packages were returned self.assertEqual(2, len(packages)) self.assertEqual("pip", packages[0]["name"]) @@ -417,14 +414,11 @@ def test_scanpipe_pipes_resolve_pypi_packages_backward_compatibility( mock_resolve.return_value = mock.Mock(packages=inspector_output["packages"]) - # Test with single file (old API) packages = resolve.resolve_pypi_packages(input_location="requirements.txt") - # Verify python_inspector was called with single file in list mock_resolve.assert_called_once() call_args = mock_resolve.call_args self.assertEqual(["requirements.txt"], call_args.kwargs["requirement_files"]) - # Verify packages were returned self.assertEqual(2, len(packages)) From 13b536bac42f1a0e08651369cde82c82cdb306b6 Mon Sep 17 00:00:00 2001 From: pradhyum6144 Date: Mon, 1 Dec 2025 03:04:59 +0530 Subject: [PATCH 3/6] Fix Scala case class mapping to source files #1875 In Scala, case classes and inner classes defined within sealed traits or objects compile to separate .class files that may not follow the standard himBHsseparated naming convention. This fix implements a fallback mechanism to handle these cases. Changes: - Added custom get_normalized_path() method to ScalaLanguage class - Enhanced _map_jvm_to_class_resource() with Scala-specific fallback that searches for source files in the same package directory when exact match is not found - Added comprehensive test case for Scala case class mapping Fixes #1875 Signed-off-by: pradhyum6144 --- scanpipe/pipes/d2d.py | 21 ++++++++++++------ scanpipe/pipes/jvm.py | 16 ++++++++++++++ scanpipe/tests/pipes/test_d2d.py | 37 ++++++++++++++++++++++++++++++++ 3 files changed, 68 insertions(+), 6 deletions(-) diff --git a/scanpipe/pipes/d2d.py b/scanpipe/pipes/d2d.py index 5046a61b09..d439cafa29 100644 --- a/scanpipe/pipes/d2d.py +++ b/scanpipe/pipes/d2d.py @@ -164,22 +164,31 @@ def map_checksum(project, checksum_field, logger=None): def _map_jvm_to_class_resource( to_resource, from_resources, from_classes_index, jvm_lang: jvm.JvmLanguage ): - """ - Map the ``to_resource`` .class file Resource with a Resource in - ``from_resources`` source files, using the ``from_classes_index`` index of - from/ fully qualified binary files. - """ for extension in jvm_lang.source_extensions: normalized_path = jvm_lang.get_normalized_path( path=to_resource.path, extension=extension ) match = pathmap.find_paths(path=normalized_path, index=from_classes_index) if not match: + if jvm_lang.name == "scala": + package_path = str(Path(normalized_path).parent) + potential_sources = from_resources.filter( + path__startswith=package_path, + extension__in=jvm_lang.source_extensions + ) + for from_resource in potential_sources: + from_source_root_parts = from_resource.path.strip("/").split("/") + from_source_root = "/".join(from_source_root_parts[:-1]) + pipes.make_relation( + from_resource=from_resource, + to_resource=to_resource, + map_type=jvm_lang.binary_map_type, + extra_data={"from_source_root": f"{from_source_root}/"}, + ) return for resource_id in match.resource_ids: from_resource = from_resources.get(id=resource_id) - # compute the root of the packages on the source side from_source_root_parts = from_resource.path.strip("/").split("/") from_source_root = "/".join( from_source_root_parts[: -match.matched_path_length] diff --git a/scanpipe/pipes/jvm.py b/scanpipe/pipes/jvm.py index 5cd420a274..853ec25f75 100644 --- a/scanpipe/pipes/jvm.py +++ b/scanpipe/pipes/jvm.py @@ -182,6 +182,22 @@ class ScalaLanguage(JvmLanguage): package_regex = re.compile(r"^\s*package\s+([\w\.]+)\s*;?") binary_map_type = "scala_to_class" + @classmethod + def get_normalized_path(cls, path, extension): + if not path.endswith(cls.binary_extensions): + raise ValueError( + f"Only path ending with {cls.binary_extensions} are supported." + ) + path_obj = Path(path.strip("/")) + class_name = path_obj.name + + if "$" in class_name: + class_name, _, _ = class_name.partition("$") + else: + class_name, _, _ = class_name.partition(".") + + return str(path_obj.parent / f"{class_name}{extension}") + class KotlinLanguage(JvmLanguage): name = "kotlin" diff --git a/scanpipe/tests/pipes/test_d2d.py b/scanpipe/tests/pipes/test_d2d.py index 4d8433498e..8570b28c63 100644 --- a/scanpipe/tests/pipes/test_d2d.py +++ b/scanpipe/tests/pipes/test_d2d.py @@ -633,6 +633,43 @@ def test_scanpipe_pipes_d2d_scala_ignore_pattern(self): expected = "Ignoring 2 to/ resources with ecosystem specific configurations." self.assertIn(expected, buffer.getvalue()) + def test_scanpipe_pipes_d2d_map_scala_case_classes_to_source(self): + from1 = make_resource_file( + self.project1, + path="from/pekko-cluster-sharding-typed/org/apache/pekko/cluster/sharding/typed/" + "ClusterShardingQuery.scala", + extra_data={"scala_package": "org.apache.pekko.cluster.sharding.typed"}, + ) + to1 = make_resource_file( + self.project1, + path="to/pekko-cluster-sharding-typed/org/apache/pekko/cluster/sharding/typed/" + "GetClusterShardingStats.class", + ) + to2 = make_resource_file( + self.project1, + path="to/pekko-cluster-sharding-typed/org/apache/pekko/cluster/sharding/typed/" + "GetShardRegionState.class", + ) + to3 = make_resource_file( + self.project1, + path="to/pekko-cluster-sharding-typed/org/apache/pekko/cluster/sharding/typed/" + "ClusterShardingQuery.class", + ) + + buffer = io.StringIO() + d2d.map_jvm_to_class( + self.project1, logger=buffer.write, jvm_lang=jvm.ScalaLanguage + ) + + expected = "Mapping 3 .class resources to 1 ('.scala',)" + self.assertIn(expected, buffer.getvalue()) + self.assertEqual(3, self.project1.codebaserelations.count()) + + for to_resource in [to1, to2, to3]: + relation = self.project1.codebaserelations.get(to_resource=to_resource) + self.assertEqual(from1, relation.from_resource) + self.assertEqual("scala_to_class", relation.map_type) + def test_scanpipe_pipes_d2d_map_jar_to_kotlin_source(self): from1 = make_resource_file( self.project1, From 1a7bc88a4a9c50730dc290f472f81bf07fba36a1 Mon Sep 17 00:00:00 2001 From: pradhyum6144 Date: Tue, 2 Dec 2025 05:09:26 +0530 Subject: [PATCH 4/6] Fix Scala case class and inner class mapping to source files Signed-off-by: pradhyum6144 --- scanpipe/pipes/d2d.py | 36 +++++++++++++++------------- scanpipe/pipes/jvm.py | 4 ++-- scanpipe/pipes/resolve.py | 13 +++++----- scanpipe/tests/pipes/test_resolve.py | 14 ++++++----- 4 files changed, 37 insertions(+), 30 deletions(-) diff --git a/scanpipe/pipes/d2d.py b/scanpipe/pipes/d2d.py index d439cafa29..94464aa0da 100644 --- a/scanpipe/pipes/d2d.py +++ b/scanpipe/pipes/d2d.py @@ -168,24 +168,28 @@ def _map_jvm_to_class_resource( normalized_path = jvm_lang.get_normalized_path( path=to_resource.path, extension=extension ) + match = pathmap.find_paths(path=normalized_path, index=from_classes_index) - if not match: - if jvm_lang.name == "scala": - package_path = str(Path(normalized_path).parent) - potential_sources = from_resources.filter( - path__startswith=package_path, - extension__in=jvm_lang.source_extensions + + if not match and jvm_lang.name == "scala": + package_path = str(Path(to_resource.path).parent) + potential_sources = from_resources.filter( + path__startswith=package_path.replace("to/", "from/"), + extension__in=jvm_lang.source_extensions, + ) + for from_resource in potential_sources: + from_source_root_parts = from_resource.path.strip("/").split("/") + from_source_root = "/".join(from_source_root_parts[:-1]) + pipes.make_relation( + from_resource=from_resource, + to_resource=to_resource, + map_type=jvm_lang.binary_map_type, + extra_data={"from_source_root": f"{from_source_root}/"}, ) - for from_resource in potential_sources: - from_source_root_parts = from_resource.path.strip("/").split("/") - from_source_root = "/".join(from_source_root_parts[:-1]) - pipes.make_relation( - from_resource=from_resource, - to_resource=to_resource, - map_type=jvm_lang.binary_map_type, - extra_data={"from_source_root": f"{from_source_root}/"}, - ) - return + continue + + if not match: + continue for resource_id in match.resource_ids: from_resource = from_resources.get(id=resource_id) diff --git a/scanpipe/pipes/jvm.py b/scanpipe/pipes/jvm.py index 853ec25f75..3ab6ff79fc 100644 --- a/scanpipe/pipes/jvm.py +++ b/scanpipe/pipes/jvm.py @@ -190,12 +190,12 @@ def get_normalized_path(cls, path, extension): ) path_obj = Path(path.strip("/")) class_name = path_obj.name - + if "$" in class_name: class_name, _, _ = class_name.partition("$") else: class_name, _, _ = class_name.partition(".") - + return str(path_obj.parent / f"{class_name}{extension}") diff --git a/scanpipe/pipes/resolve.py b/scanpipe/pipes/resolve.py index 3df9ce8520..e8fedff911 100644 --- a/scanpipe/pipes/resolve.py +++ b/scanpipe/pipes/resolve.py @@ -108,7 +108,7 @@ def get_data_from_manifests(project, package_registry, manifest_resources, model if "pypi" in manifests_by_type: pypi_resources = manifests_by_type["pypi"] pypi_locations = [resource.location for resource in pypi_resources] - + resolver = package_registry.get("pypi") if resolver: try: @@ -117,7 +117,7 @@ def get_data_from_manifests(project, package_registry, manifest_resources, model for package_data in packages: package_data["codebase_resources"] = pypi_resources resolved_packages.extend(packages) - + for resource in pypi_resources: if headers := get_manifest_headers(resource): sboms_headers[resource.name] = headers @@ -135,7 +135,7 @@ def get_data_from_manifests(project, package_registry, manifest_resources, model model=model, object_instance=resource, ) - + del manifests_by_type["pypi"] for package_type, resources in manifests_by_type.items(): @@ -267,13 +267,14 @@ def get_manifest_resources(project): def resolve_pypi_packages(input_location=None, input_locations=None): """ Resolve the PyPI packages from requirement file(s). - + Args: input_location: Single requirement file path (for backward compatibility) input_locations: List of requirement file paths (for batch processing) - + Returns: List of resolved package data dictionaries + """ # Handle both single file and multiple files if input_locations: @@ -282,7 +283,7 @@ def resolve_pypi_packages(input_location=None, input_locations=None): requirement_files = [input_location] else: raise ValueError("Either input_location or input_locations must be provided") - + python_version = f"{sys.version_info.major}{sys.version_info.minor}" operating_system = "linux" diff --git a/scanpipe/tests/pipes/test_resolve.py b/scanpipe/tests/pipes/test_resolve.py index 239c8d4ac2..ff32791b5c 100644 --- a/scanpipe/tests/pipes/test_resolve.py +++ b/scanpipe/tests/pipes/test_resolve.py @@ -393,11 +393,11 @@ def test_scanpipe_pipes_resolve_pypi_packages_multiple_files(self, mock_resolve) req_files = ["requirements1.txt", "requirements2.txt"] packages = resolve.resolve_pypi_packages(input_locations=req_files) - + mock_resolve.assert_called_once() call_args = mock_resolve.call_args self.assertEqual(req_files, call_args.kwargs["requirement_files"]) - + self.assertEqual(2, len(packages)) self.assertEqual("pip", packages[0]["name"]) @@ -405,7 +405,10 @@ def test_scanpipe_pipes_resolve_pypi_packages_multiple_files(self, mock_resolve) def test_scanpipe_pipes_resolve_pypi_packages_backward_compatibility( self, mock_resolve ): - """Test that resolve_pypi_packages still works with single file (backward compatibility).""" + """ + Test that resolve_pypi_packages still works with single file + (backward compatibility). + """ inspector_output_location = ( self.data / "resolve" / "python_inspector_resolve_dependencies.json" ) @@ -415,10 +418,9 @@ def test_scanpipe_pipes_resolve_pypi_packages_backward_compatibility( mock_resolve.return_value = mock.Mock(packages=inspector_output["packages"]) packages = resolve.resolve_pypi_packages(input_location="requirements.txt") - + mock_resolve.assert_called_once() call_args = mock_resolve.call_args self.assertEqual(["requirements.txt"], call_args.kwargs["requirement_files"]) - - self.assertEqual(2, len(packages)) + self.assertEqual(2, len(packages)) From 0d24881e06be280472ca70d86c1b349cbd137e64 Mon Sep 17 00:00:00 2001 From: pradhyum6144 Date: Tue, 2 Dec 2025 23:22:58 +0530 Subject: [PATCH 5/6] Fix resolve pipe tests and executable_binaries logic Signed-off-by: pradhyum6144 --- scanpipe/models.py | 5 ++++- scanpipe/tests/pipes/test_resolve.py | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/scanpipe/models.py b/scanpipe/models.py index 6f3c5f550c..3a81570b4e 100644 --- a/scanpipe/models.py +++ b/scanpipe/models.py @@ -2498,7 +2498,10 @@ def macho_binaries(self): ) def executable_binaries(self): - return self.union(self.win_exes(), self.macho_binaries(), self.elfs()) + return self.win_exes().order_by().union( + self.macho_binaries().order_by(), + self.elfs().order_by() + ) def with_has_children(self): """ diff --git a/scanpipe/tests/pipes/test_resolve.py b/scanpipe/tests/pipes/test_resolve.py index ff32791b5c..5510f73550 100644 --- a/scanpipe/tests/pipes/test_resolve.py +++ b/scanpipe/tests/pipes/test_resolve.py @@ -136,7 +136,7 @@ def test_scanpipe_pipes_resolve_resolve_pypi_packages(self, mock_resolve): mock_resolve.return_value = mock.Mock(packages=inspector_output["packages"]) - packages = resolve.resolve_pypi_packages("") + packages = resolve.resolve_pypi_packages("requirements.txt") self.assertEqual(2, len(packages)) package_data = packages[0] self.assertEqual("pip", package_data["name"]) From 4b318805ec72876fadf047fdd50f53beb998b848 Mon Sep 17 00:00:00 2001 From: pradhyum6144 Date: Tue, 16 Dec 2025 01:59:12 +0530 Subject: [PATCH 6/6] Fix complexity issue in resolve.py Signed-off-by: pradhyum6144 --- scanpipe/models.py | 7 +- scanpipe/pipes/resolve.py | 131 +++++++++++++++++++++++++------------- 2 files changed, 89 insertions(+), 49 deletions(-) diff --git a/scanpipe/models.py b/scanpipe/models.py index 3a81570b4e..3e96baee27 100644 --- a/scanpipe/models.py +++ b/scanpipe/models.py @@ -2498,9 +2498,10 @@ def macho_binaries(self): ) def executable_binaries(self): - return self.win_exes().order_by().union( - self.macho_binaries().order_by(), - self.elfs().order_by() + return ( + self.win_exes() + .order_by() + .union(self.macho_binaries().order_by(), self.elfs().order_by()) ) def with_has_children(self): diff --git a/scanpipe/pipes/resolve.py b/scanpipe/pipes/resolve.py index e8fedff911..4ca4cbb6a4 100644 --- a/scanpipe/pipes/resolve.py +++ b/scanpipe/pipes/resolve.py @@ -81,22 +81,8 @@ def get_dependencies_from_manifest(resource): return dependencies -def get_data_from_manifests(project, package_registry, manifest_resources, model=None): - """ - Get package and dependency data from package manifests/lockfiles/SBOMs or - for resolved packages from package requirements. - """ - resolved_packages = [] - resolved_dependencies = [] - sboms_headers = {} - - if not manifest_resources.exists(): - project.add_warning( - description="No resources containing package data found in codebase.", - model=model, - ) - return [] - +def _group_manifests_by_type(manifest_resources): + """Group manifest resources by their package type.""" manifests_by_type = {} for resource in manifest_resources: package_type = get_default_package_type(resource.location) @@ -104,40 +90,53 @@ def get_data_from_manifests(project, package_registry, manifest_resources, model if package_type not in manifests_by_type: manifests_by_type[package_type] = [] manifests_by_type[package_type].append(resource) + return manifests_by_type - if "pypi" in manifests_by_type: - pypi_resources = manifests_by_type["pypi"] - pypi_locations = [resource.location for resource in pypi_resources] - resolver = package_registry.get("pypi") - if resolver: - try: - packages = resolver(input_locations=pypi_locations) - if packages: - for package_data in packages: - package_data["codebase_resources"] = pypi_resources - resolved_packages.extend(packages) - - for resource in pypi_resources: - if headers := get_manifest_headers(resource): - sboms_headers[resource.name] = headers - else: - for resource in pypi_resources: - project.add_error( - description="No packages could be resolved", - model=model, - object_instance=resource, - ) - except Exception as e: - for resource in pypi_resources: - project.add_error( - description=f"Error resolving packages: {e}", - model=model, - object_instance=resource, - ) +def _resolve_pypi_manifests( + project, package_registry, pypi_resources, resolved_packages, sboms_headers, model +): + """Resolve PyPI manifest resources.""" + pypi_locations = [resource.location for resource in pypi_resources] + resolver = package_registry.get("pypi") + if not resolver: + return - del manifests_by_type["pypi"] + try: + packages = resolver(input_locations=pypi_locations) + if packages: + for package_data in packages: + package_data["codebase_resources"] = pypi_resources + resolved_packages.extend(packages) + for resource in pypi_resources: + if headers := get_manifest_headers(resource): + sboms_headers[resource.name] = headers + else: + for resource in pypi_resources: + project.add_error( + description="No packages could be resolved", + model=model, + object_instance=resource, + ) + except Exception as e: + for resource in pypi_resources: + project.add_error( + description=f"Error resolving packages: {e}", + model=model, + object_instance=resource, + ) + +def _resolve_other_manifests( + project, + package_registry, + manifests_by_type, + resolved_packages, + resolved_dependencies, + sboms_headers, + model, +): + """Resolve non-PyPI manifest resources.""" for package_type, resources in manifests_by_type.items(): for resource in resources: packages = resolve_manifest_resources(resource, package_registry) @@ -156,6 +155,46 @@ def get_data_from_manifests(project, package_registry, manifest_resources, model if dependencies: resolved_dependencies.extend(dependencies) + +def get_data_from_manifests(project, package_registry, manifest_resources, model=None): + """ + Get package and dependency data from package manifests/lockfiles/SBOMs or + for resolved packages from package requirements. + """ + resolved_packages = [] + resolved_dependencies = [] + sboms_headers = {} + + if not manifest_resources.exists(): + project.add_warning( + description="No resources containing package data found in codebase.", + model=model, + ) + return [] + + manifests_by_type = _group_manifests_by_type(manifest_resources) + + if "pypi" in manifests_by_type: + _resolve_pypi_manifests( + project, + package_registry, + manifests_by_type["pypi"], + resolved_packages, + sboms_headers, + model, + ) + del manifests_by_type["pypi"] + + _resolve_other_manifests( + project, + package_registry, + manifests_by_type, + resolved_packages, + resolved_dependencies, + sboms_headers, + model, + ) + if sboms_headers: project.update_extra_data({"sboms_headers": sboms_headers})