From 42fb9b8aef4d5b5c461351a7065a7906b2655ba1 Mon Sep 17 00:00:00 2001 From: Danny Eiselt Date: Wed, 14 May 2025 14:03:47 +0200 Subject: [PATCH] Added Support for syncing DEP11 files This change aims to add support for syncing of DEP11 metadata files, enabling the use of application stores like KDE Discover which are based on Appstream. --- CHANGES/1276.feature | 1 + pulp_deb/app/tasks/publishing.py | 67 ++++++++++++++++++++++++++++- pulp_deb/app/tasks/synchronizing.py | 56 ++++++++++++++++++++++++ 3 files changed, 122 insertions(+), 2 deletions(-) create mode 100644 CHANGES/1276.feature diff --git a/CHANGES/1276.feature b/CHANGES/1276.feature new file mode 100644 index 000000000..3794f72c5 --- /dev/null +++ b/CHANGES/1276.feature @@ -0,0 +1 @@ +Added support to sync DEP11 metadata files if available in the source repo. \ No newline at end of file diff --git a/pulp_deb/app/tasks/publishing.py b/pulp_deb/app/tasks/publishing.py index f4b612669..cddca8020 100644 --- a/pulp_deb/app/tasks/publishing.py +++ b/pulp_deb/app/tasks/publishing.py @@ -8,7 +8,7 @@ from datetime import datetime, timezone from debian import deb822 -from gzip import GzipFile +import gzip import tempfile from django.conf import settings @@ -23,6 +23,7 @@ PublishedMetadata, RemoteArtifact, RepositoryVersion, + ContentArtifact, ) from pulp_deb.app.constants import NULL_VALUE @@ -38,6 +39,7 @@ AptReleaseSigningService, SourcePackage, SourcePackageReleaseComponent, + GenericContent, ) from pulp_deb.app.serializers import ( @@ -244,8 +246,59 @@ def publish( release=release, temp_dir=temp_dir, signing_service=signing_service, + dep11_file_paths=[], ) + log.info("publish(): looking for dep11 files ...") + dep11_files = GenericContent.objects.filter( + pk__in=repo_version.content.order_by("-pulp_created"), + relative_path__contains="/dep11/", + ) + + for dep11_file in dep11_files: + release_helper.dep11_file_paths.append(dep11_file.relative_path) + # make sure that there actually are artifacts for dep11 files + try: + artifact = ContentArtifact.objects.get( + content_id=dep11_file.content_ptr_id + ) + except Exception as e: + log.warning( + f"DEP11: artifact not found for {dep11_file}: {e}, skipping" + ) + continue + + artifact_path = f"{settings.MEDIA_ROOT}/{artifact.artifact.file}" + dep11_metadata = PublishedMetadata.create_from_file( + publication=publication, + file=File(open(artifact_path, "rb")), + relative_path=dep11_file.relative_path, + ) + dep11_metadata.save() + release_helper.add_metadata(dep11_metadata) + + # this is a "hack" because we need a mention of the + # uncompressed files in the Release file, + # for Appstream to find them + # We normally don't care about the artifact of the + # uncompressed files but every logic like + # sync and publish relies on the availability of an artifact. + # We also need to decompress those files to avoid hash mismatch errors + if "CID-Index" not in dep11_file.relative_path: + if dep11_file.relative_path.endswith(".gz"): + dep11_file_uncompressed = dep11_file.relative_path.strip(".gz") + with gzip.open(artifact_path, "rb") as f_in: + with open(dep11_file_uncompressed, "wb") as f_out: + shutil.copyfileobj(f_in, f_out) + + dep11_metadata_uncompressed = PublishedMetadata.create_from_file( + publication=publication, + file=File(open(dep11_file_uncompressed, "rb")), + relative_path=dep11_file_uncompressed, + ) + dep11_metadata_uncompressed.save() + release_helper.add_metadata(dep11_metadata_uncompressed) + package_release_components = PackageReleaseComponent.objects.filter( pk__in=repo_version.content.order_by("-pulp_created"), release_component__in=release_components_filtered, @@ -301,6 +354,8 @@ def __init__(self, parent, component): self.plain_component = os.path.basename(component) self.package_index_files = {} self.source_index_file_info = None + self.dep11_path = None + self.dep11_file_paths = [] for architecture in self.parent.architectures: package_index_path = os.path.join( @@ -329,6 +384,12 @@ def __init__(self, parent, component): source_index_path, ) + # DEP11 directory + self.dep11_dir = os.path.join( + "dists", self.parent.dists_subfolder, self.plain_component, "dep11" + ) + os.makedirs(self.dep11_dir, exist_ok=True) + def add_packages(self, packages, artifact_dict, remote_artifact_dict): published_artifacts = [] package_data = [] @@ -471,6 +532,7 @@ def __init__( release, temp_dir, signing_service=None, + dep11_file_paths=None, ): self.publication = publication self.temp_env = {"PULP_TEMP_WORKING_DIR": _create_random_directory(temp_dir)} @@ -508,6 +570,7 @@ def __init__( self.architectures = architectures self.components = {component: _ComponentHelper(self, component) for component in components} self.signing_service = publication.signing_service or signing_service + self.dep11_file_paths = dep11_file_paths def add_metadata(self, metadata): artifact = metadata._artifacts.get() @@ -573,7 +636,7 @@ def save_signed_metadata(self): def _zip_file(file_path): gz_file_path = file_path + ".gz" with open(file_path, "rb") as f_in: - with GzipFile(gz_file_path, "wb") as f_out: + with gzip.GzipFile(gz_file_path, "wb") as f_out: shutil.copyfileobj(f_in, f_out) return gz_file_path diff --git a/pulp_deb/app/tasks/synchronizing.py b/pulp_deb/app/tasks/synchronizing.py index 36f4daa6a..3619feee3 100644 --- a/pulp_deb/app/tasks/synchronizing.py +++ b/pulp_deb/app/tasks/synchronizing.py @@ -843,6 +843,10 @@ async def _handle_component( pending_tasks.extend( [self._handle_source_index(release_file, release_component, file_references)] ) + + pending_tasks.extend( + [self._handle_dep11_files(release_file, release_component, file_references)] + ) await asyncio.gather(*pending_tasks) async def _handle_flat_repo(self, file_references, release_file, distribution): @@ -871,6 +875,58 @@ async def _handle_flat_repo(self, file_references, release_file, distribution): # Await all tasks await asyncio.gather(*pending_tasks) + async def _handle_dep11_files(self, release_file, release_component, file_references): + dep11_dir = os.path.join(release_component.plain_component, "dep11") + paths = [path for path in file_references.keys() if path.startswith(dep11_dir)] + + if paths: + # CID-Index-amd64.json.gz is missing in file_references (not in Release file) + # Inject it manually? + dep11s = {} + supported_artifacts = [ + "CID-Index-amd64.json.gz", + "Components-amd64.yml.gz", + "Components-amd64.yml.xz", + "icons-48x48.tar.gz", + "icons-48x48@2.tar.gz", + "icons-64x64.tar.gz", + "icons-64x64@2.tar.gz", + "icons-128x128.tar.gz", + "icons-128x128@2.tar.gz", + ] + + for path in paths: + relative_path = os.path.join(os.path.dirname(release_file.relative_path), path) + basename = os.path.basename(relative_path) + + if basename not in supported_artifacts: + log.warning(f"DEP11: {basename} is not in supported artifacts, skipping") + continue + + d_artifact = self._to_d_artifact(relative_path, file_references[path]) + key = relative_path + + if key not in dep11s: + sha256 = d_artifact.artifact.sha256 + dep11s[key] = {"sha256": sha256, "d_artifacts": []} + log.warning(f"_handle_dep11_files: adding key={key}, sha256={sha256}") + + dep11s[key]["d_artifacts"].append(d_artifact) + + # handle CID-Index-amd64.json.gz separately + # because it is not listed in upstream Release file + cid_file_path = os.path.join( + os.path.dirname(release_file.relative_path), dep11_dir, "CID-Index-amd64.json.gz" + ) + artifact = self._to_d_artifact(cid_file_path) + dep11s[cid_file_path] = {"sha256": artifact.artifact.sha256, "d_artifacts": []} + dep11s[cid_file_path]["d_artifacts"].append(artifact) + for relative_path, dep11 in dep11s.items(): + content_unit = GenericContent(sha256=dep11["sha256"], relative_path=relative_path) + await self.put( + DeclarativeContent(content=content_unit, d_artifacts=dep11["d_artifacts"]) + ) + async def _handle_package_index( self, release_file,