diff --git a/component_catalog/admin.py b/component_catalog/admin.py
index 3e96cf22..edb32988 100644
--- a/component_catalog/admin.py
+++ b/component_catalog/admin.py
@@ -884,6 +884,7 @@ class PackageAdmin(
"parties",
"datasource_id",
"file_references",
+ "package_content",
)
},
),
diff --git a/component_catalog/api.py b/component_catalog/api.py
index b7054945..602624ea 100644
--- a/component_catalog/api.py
+++ b/component_catalog/api.py
@@ -687,6 +687,7 @@ class Meta:
"parties",
"datasource_id",
"file_references",
+ "package_content",
"external_references",
"created_date",
"last_modified_date",
diff --git a/component_catalog/forms.py b/component_catalog/forms.py
index eb824a0a..a586bdc7 100644
--- a/component_catalog/forms.py
+++ b/component_catalog/forms.py
@@ -339,6 +339,7 @@ class Meta:
"version",
"qualifiers",
"subpath",
+ "package_content",
"collect_data",
]
widgets = {
@@ -407,7 +408,7 @@ def helper(self):
HTML("
"),
Group("description", "keywords"),
Group("primary_language", "cpe"),
- Group("size", "release_date"),
+ Group("package_content", "size", "release_date"),
Group("dependencies", "notes"),
HTML("
"),
Group("homepage_url", "code_view_url"),
@@ -1183,6 +1184,7 @@ class Meta:
"version",
"qualifiers",
"subpath",
+ "package_content",
]
diff --git a/component_catalog/migrations/0013_package_package_content.py b/component_catalog/migrations/0013_package_package_content.py
new file mode 100644
index 00000000..ccf667de
--- /dev/null
+++ b/component_catalog/migrations/0013_package_package_content.py
@@ -0,0 +1,18 @@
+# Generated by Django 5.2.8 on 2025-11-24 12:00
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+ dependencies = [
+ ('component_catalog', '0012_alter_component_children'),
+ ]
+
+ operations = [
+ migrations.AddField(
+ model_name='package',
+ name='package_content',
+ field=models.IntegerField(blank=True, choices=[(1, 'curation'), (2, 'patch'), (3, 'source_repo'), (4, 'source_archive'), (5, 'binary'), (6, 'test'), (7, 'doc')], help_text='Content of this Package as one of: curation, patch, source_repo, source_archive, binary, test, doc', null=True),
+ ),
+ ]
diff --git a/component_catalog/models.py b/component_catalog/models.py
index e0a5f46a..5f72e942 100644
--- a/component_catalog/models.py
+++ b/component_catalog/models.py
@@ -59,6 +59,7 @@
from dejacode_toolkit.download import collect_package_data
from dejacode_toolkit.purldb import PurlDB
from dejacode_toolkit.purldb import pick_purldb_entry
+from dejacode_toolkit.purldb import pick_source_package
from dejacode_toolkit.scancodeio import ScanCodeIO
from dje import urn
from dje.copier import post_copy
@@ -1653,6 +1654,44 @@ def __str__(self):
return self.label
+class PackageContentFieldMixin(models.Model):
+ """
+ Field extracted from the `purldb.packagedb.models.Package` model.
+ It need to stay aligned with its upstream PurlDB implementation.
+ """
+
+ class PackageContentType(models.IntegerChoices):
+ CURATION = 1, "curation"
+ PATCH = 2, "patch"
+ SOURCE_REPO = 3, "source_repo"
+ SOURCE_ARCHIVE = 4, "source_archive"
+ BINARY = 5, "binary"
+ TEST = 6, "test"
+ DOC = 7, "doc"
+
+ package_content = models.IntegerField(
+ null=True,
+ blank=True,
+ choices=PackageContentType.choices,
+ help_text=_(
+ "Content of this Package as one of: {}".format(", ".join(PackageContentType.labels))
+ ),
+ )
+
+ class Meta:
+ abstract = True
+
+ @classmethod
+ def get_package_content_value_from_label(cls, label):
+ """Convert a package_content string label to its integer value."""
+ if not label:
+ return None
+ try:
+ return cls.PackageContentType[label.upper()].value
+ except (KeyError, AttributeError):
+ return None
+
+
PACKAGE_URL_FIELDS = ["type", "namespace", "name", "version", "qualifiers", "subpath"]
@@ -1792,6 +1831,7 @@ class Package(
URLFieldsMixin,
HashFieldsMixin,
PackageURLMixin,
+ PackageContentFieldMixin,
DataspacedModel,
):
filename = models.CharField(
@@ -2496,7 +2536,7 @@ def create_from_url(cls, url, user):
package_for_match = cls(download_url=download_url)
package_for_match.set_package_url(package_url)
purldb_entries = package_for_match.get_purldb_entries(user)
- # Look for one ith the same exact purl in that case
+ # Look for one with the same exact purl in that case
if purldb_data := pick_purldb_entry(purldb_entries, purl=url):
# The format from PurlDB is "2019-11-18T00:00:00Z" from DateTimeField
if release_date := purldb_data.get("release_date"):
@@ -2589,6 +2629,8 @@ def update_from_purldb(self, user):
- Retrieves matching entries from PurlDB using the given user.
- If exactly one match is found, its data is used directly.
+ - If multiple entries are found, leverage the package_content value when
+ available to select a "source" package.
- If multiple entries are found, only values that are non-empty and
common across all entries are merged and used to update the Package.
"""
@@ -2599,6 +2641,11 @@ def update_from_purldb(self, user):
purldb_entries_count = len(purldb_entries)
if purldb_entries_count == 1:
package_data = purldb_entries[0]
+ elif source_package := pick_source_package(purldb_entries):
+ package_data = source_package
+ package_data["package_content"] = Package.get_package_content_value_from_label(
+ package_data["package_content"]
+ )
else:
package_data = merge_common_non_empty_values(purldb_entries)
@@ -2639,6 +2686,12 @@ def update_from_purldb(self, user):
override=False,
override_unknown=True,
)
+
+ if updated_fields:
+ msg = f"Automatically updated {', '.join(updated_fields)} from PurlDB."
+ logger.debug(f"PurlDB: {msg}")
+ History.log_change(user, self, message=msg)
+
return updated_fields
def update_from_scan(self, user, update_products=False):
diff --git a/component_catalog/tests/test_models.py b/component_catalog/tests/test_models.py
index a97afe3e..899d5d5c 100644
--- a/component_catalog/tests/test_models.py
+++ b/component_catalog/tests/test_models.py
@@ -1366,6 +1366,7 @@ def test_component_catalog_models_get_exclude_candidates_fields(self):
"file_references",
"other_license_expression",
"parties",
+ "package_content",
],
),
)
diff --git a/component_catalog/views.py b/component_catalog/views.py
index 00198a08..8e18d7be 100644
--- a/component_catalog/views.py
+++ b/component_catalog/views.py
@@ -1140,6 +1140,7 @@ class PackageDetailsView(
"parties",
"datasource_id",
"file_references",
+ "package_content",
],
},
"components": {
@@ -1293,6 +1294,7 @@ def tab_others(self):
TabField("parties"),
TabField("datasource_id"),
TabField("file_references"),
+ TabField("package_content"),
]
fields = self.get_tab_fields(tab_fields)
@@ -1930,6 +1932,12 @@ def get_initial(self):
if purldb_entry := self.get_entry_from_purldb():
# Duplicate the declared_license_expression as the "concluded" license_expression
purldb_entry["license_expression"] = purldb_entry.get("declared_license_expression")
+
+ # Convert package_content string label to integer value
+ if content_label := purldb_entry.pop("package_content", None):
+ if content_value := Package.get_package_content_value_from_label(content_label):
+ purldb_entry["package_content"] = content_value
+
model_fields = [field.name for field in Package._meta.get_fields()]
initial_from_purldb_entry = {
field_name: value
diff --git a/dejacode_toolkit/purldb.py b/dejacode_toolkit/purldb.py
index 0f63d7a2..83db1ab4 100644
--- a/dejacode_toolkit/purldb.py
+++ b/dejacode_toolkit/purldb.py
@@ -61,6 +61,8 @@ def get_package_by_purl(self, package_url):
def find_packages(self, payload, timeout=None):
"""Get Packages details using provided `payload` filters on the PurlDB package list."""
+ payload.update({"sort": "package_content"})
+
response = self.request_get(self.package_api_url, params=payload, timeout=timeout)
if response and response.get("count") > 0:
return response.get("results")
@@ -88,3 +90,17 @@ def pick_purldb_entry(purldb_entries, purl=None):
matches = [entry for entry in purldb_entries if entry.get("purl") == purl]
if len(matches) == 1:
return matches[0]
+
+
+def pick_source_package(purldb_entries):
+ """Pick a source package from a list of PurlDB entries."""
+ if not purldb_entries:
+ return
+
+ if len(purldb_entries) == 1:
+ return purldb_entries[0]
+
+ for entry in purldb_entries:
+ package_content = entry.get("package_content")
+ if package_content and package_content.lower() == "source_archive":
+ return entry
diff --git a/dje/tests/testfiles/test_dataset_cc_only.json b/dje/tests/testfiles/test_dataset_cc_only.json
index b80388ec..cab86eff 100644
--- a/dje/tests/testfiles/test_dataset_cc_only.json
+++ b/dje/tests/testfiles/test_dataset_cc_only.json
@@ -292,12 +292,13 @@
"vcs_url": "",
"code_view_url": "",
"bug_tracking_url": "",
+ "md5": "",
+ "sha1": "",
"sha256": "",
"sha512": "",
+ "package_content": null,
"filename": "systemu-2.5.2.gem",
"download_url": "https://s3.amazonaws.com/production.s3.rubygems.org/gems/systemu-2.5.2.gem",
- "sha1": "",
- "md5": "",
"size": null,
"release_date": null,
"primary_language": "",
diff --git a/dje/tests/testfiles/test_dataset_pp_only.json b/dje/tests/testfiles/test_dataset_pp_only.json
index 988add2d..264e1bff 100644
--- a/dje/tests/testfiles/test_dataset_pp_only.json
+++ b/dje/tests/testfiles/test_dataset_pp_only.json
@@ -30,6 +30,7 @@
"sha1": "",
"sha256": "",
"sha512": "",
+ "package_content": null,
"filename": "systemu-2.5.2.gem",
"download_url": "https://s3.amazonaws.com/production.s3.rubygems.org/gems/systemu-2.5.2.gem",
"size": null,