Skip to content

Commit f27df71

Browse files
mikolalysenkoclaude
andcommitted
feat(pypi): narrow/broad release breadth + multi-variant remove/rollback fixes
PyPI is the only ecosystem that carries a Socket-specific `artifact_id` PURL qualifier, so a single `package@version` can resolve to several patch variants (one per wheel/sdist release). Only the installed distribution can ever apply, but scan/get downloaded every variant and remove/rollback mishandled the qualified keys. Feature — download-time release breadth (narrow default): - Add `--all-releases` (env `SOCKET_ALL_RELEASES`, default off) to `scan` and `get`. Narrow keeps only the variant matching the installed distribution; broad keeps every variant (portable across environments). - `select_installed_variant` (core `patch/apply.rs`) picks the variant whose first patched file is Ready/AlreadyPatched against the on-disk package — shared by the narrow filter and rollback dedupe. - `filter_to_installed_releases` (commands/get.rs) runs in the shared download path; non-PyPI ecosystems and single-variant packages pass through untouched. Falls back to broad (with a warning surfaced in the JSON `warnings` array) when the package is not installed or no variant matches the on-disk bytes. Correctness fixes surfaced by multi-release manifests: - Base-PURL matching (`purl_matches_identifier`, core utils/purl.rs): `remove`/`rollback` of a base PURL now affect every release variant; a qualified PURL or UUID still targets exactly one. Previously a base PURL matched nothing because manifest keys are qualified. - Rollback variant dedupe: rollback groups discovered packages by base PURL and rolls back only the installed-dist variant, ending the spurious HashMismatch failures that broad manifests would have caused (the non-installed variants resolve to the same on-disk file). - `remove` lists each variant when a base PURL expands, so the blast radius is visible before confirmation. - `detect_prunable` compares on stripped base PURLs, so `scan --all-releases --sync` no longer prunes the very variants it just downloaded. Tests: - New in_process_pypi_multi_release.rs: real `pip install six` + a three-variant wiremock exercising narrow-keeps-one, broad-keeps-all, remove-base-clears-all-and-rolls-back, and rollback-all-succeeds. - Unit tests for purl_matches_identifier, find_patches_to_rollback, remove_patch_from_manifest, detect_prunable (PyPI keep/prune), and `--all-releases` parse defaults for scan + get. Full workspace suite: 1573 passed, 0 failed; clippy clean. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
1 parent 3835974 commit f27df71

16 files changed

Lines changed: 1171 additions & 48 deletions

crates/socket-patch-cli/src/commands/get.rs

Lines changed: 204 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,20 +6,21 @@ use socket_patch_core::api::client::{
66
use socket_patch_core::api::types::{
77
PatchResponse, PatchSearchResult, SearchResponse, VulnerabilityResponse,
88
};
9-
use socket_patch_core::crawlers::CrawlerOptions;
9+
use socket_patch_core::crawlers::{CrawlerOptions, Ecosystem};
1010
use socket_patch_core::manifest::operations::{read_manifest, write_manifest};
1111
use socket_patch_core::manifest::schema::{
1212
PatchFileInfo, PatchManifest, PatchRecord, VulnerabilityInfo,
1313
};
14+
use socket_patch_core::patch::apply::select_installed_variant;
1415
use socket_patch_core::utils::fuzzy_match::fuzzy_match_packages;
15-
use socket_patch_core::utils::purl::is_purl;
16+
use socket_patch_core::utils::purl::{is_purl, strip_purl_qualifiers};
1617
use socket_patch_core::utils::telemetry::{track_patch_fetch_failed, track_patch_fetched};
1718
use std::collections::HashMap;
1819
use std::fmt;
1920
use std::path::{Path, PathBuf};
2021

2122
use crate::args::{apply_env_toggles, GlobalArgs};
22-
use crate::ecosystem_dispatch::crawl_all_ecosystems;
23+
use crate::ecosystem_dispatch::{crawl_all_ecosystems, find_packages_for_rollback, partition_purls};
2324
use crate::output::{confirm, select_one, SelectError};
2425

2526
/// Best-effort ecosystem extractor for a `pkg:<eco>/...` PURL. Used as
@@ -327,6 +328,19 @@ pub struct GetArgs {
327328
/// Apply patch immediately without saving to .socket folder.
328329
#[arg(long = "one-off", env = "SOCKET_ONE_OFF", default_value_t = false)]
329330
pub one_off: bool,
331+
332+
/// Download patches for every release/distribution (artifact_id) of
333+
/// a matched package, not just the one matching the locally-
334+
/// installed distribution. Only affects PyPI today — the only
335+
/// ecosystem with per-release artifact_id variants. Off by default:
336+
/// only the patch for the installed dist is fetched.
337+
#[arg(
338+
long = "all-releases",
339+
env = "SOCKET_ALL_RELEASES",
340+
default_value_t = false,
341+
value_parser = clap::builder::BoolishValueParser::new(),
342+
)]
343+
pub all_releases: bool,
330344
}
331345

332346
#[derive(Debug, Clone, Copy, PartialEq)]
@@ -508,11 +522,170 @@ pub struct DownloadParams {
508522
/// client constructed here. Without this, `download_and_apply_patches`
509523
/// would only honor env vars and ignore the user's flags.
510524
pub api_overrides: socket_patch_core::api::client::ApiClientEnvOverrides,
525+
/// When `false` (the default — narrow), a PyPI package with multiple
526+
/// release variants (`?artifact_id=...`) is filtered down to the one
527+
/// matching the locally-installed distribution before download. When
528+
/// `true` (`--all-releases`), every variant is downloaded. No effect
529+
/// on ecosystems without per-release artifact_id variants.
530+
pub all_releases: bool,
511531
}
512532

513533
/// Download and apply a set of selected patches.
514534
///
515535
/// Used by both `get` and `scan` commands. Returns (exit_code, json_result).
536+
/// Narrow a selection of patches down to the release variant matching
537+
/// each locally-installed distribution.
538+
///
539+
/// A PyPI `package@version` can resolve to several patch variants — one
540+
/// per `?artifact_id=...` release (wheel/sdist). Only one distribution
541+
/// is ever installed in a given environment, so only one variant can
542+
/// apply. With `--all-releases` off (the default) we keep just the
543+
/// variant whose first patched file's hash matches the on-disk package,
544+
/// dropping the rest so they are never downloaded or written to the
545+
/// manifest. Non-PyPI ecosystems never carry `artifact_id` qualifiers,
546+
/// so they pass through untouched.
547+
///
548+
/// Fallbacks (keep all variants of the base, i.e. behave as broad):
549+
/// * the base package is not installed on disk (nothing to match
550+
/// against — e.g. `get` for an absent package), or
551+
/// * the installed distribution matches none of the variants (a local
552+
/// modification, or no patch exists for the installed release).
553+
///
554+
/// Both fallbacks push a human-readable warning.
555+
///
556+
/// Returns the kept patches plus any warnings to surface to the caller.
557+
async fn filter_to_installed_releases(
558+
selected: &[PatchSearchResult],
559+
params: &DownloadParams,
560+
api_client: &socket_patch_core::api::client::ApiClient,
561+
org: Option<&str>,
562+
) -> (Vec<PatchSearchResult>, Vec<String>) {
563+
// Group the PyPI selections by their base PURL (qualifiers stripped).
564+
// Anything that isn't PyPI, or whose base has a single variant, is
565+
// kept verbatim and needs no installed-dist resolution.
566+
let mut pypi_groups: HashMap<String, Vec<PatchSearchResult>> = HashMap::new();
567+
let mut kept: Vec<PatchSearchResult> = Vec::new();
568+
for sr in selected {
569+
if Ecosystem::from_purl(&sr.purl) == Some(Ecosystem::Pypi) {
570+
pypi_groups
571+
.entry(strip_purl_qualifiers(&sr.purl).to_string())
572+
.or_default()
573+
.push(sr.clone());
574+
} else {
575+
kept.push(sr.clone());
576+
}
577+
}
578+
579+
let mut warnings: Vec<String> = Vec::new();
580+
581+
// Singleton PyPI bases have nothing to disambiguate — keep as-is.
582+
// Collect the multi-variant bases that actually need resolution.
583+
let mut multi: Vec<(String, Vec<PatchSearchResult>)> = Vec::new();
584+
for (base, variants) in pypi_groups {
585+
if variants.len() <= 1 {
586+
kept.extend(variants);
587+
} else {
588+
multi.push((base, variants));
589+
}
590+
}
591+
592+
if multi.is_empty() {
593+
return (kept, warnings);
594+
}
595+
596+
// Discover the on-disk path for each multi-variant base. The pypi
597+
// crawler is queried with base PURLs and the result is fanned back
598+
// out to every qualified variant (all variants of one installed
599+
// package resolve to the same path).
600+
let all_qualified: Vec<String> = multi
601+
.iter()
602+
.flat_map(|(_, variants)| variants.iter().map(|s| s.purl.clone()))
603+
.collect();
604+
// All collected PURLs are PyPI; no ecosystem filter needed.
605+
let partitioned = partition_purls(&all_qualified, None);
606+
let crawler_options = CrawlerOptions {
607+
cwd: params.cwd.clone(),
608+
global: params.global,
609+
global_prefix: params.global_prefix.clone(),
610+
batch_size: 100,
611+
};
612+
let paths = find_packages_for_rollback(&partitioned, &crawler_options, true).await;
613+
614+
for (base, variants) in multi {
615+
// Any variant's resolved path works — they all map to the single
616+
// installed distribution.
617+
let pkg_path = variants.iter().find_map(|s| paths.get(&s.purl)).cloned();
618+
let Some(pkg_path) = pkg_path else {
619+
// Not installed: cannot determine the relevant release. Keep
620+
// every variant so the patch is still obtainable.
621+
warnings.push(format!(
622+
"{base} is not installed locally; keeping all {} release variant(s).",
623+
variants.len()
624+
));
625+
kept.extend(variants);
626+
continue;
627+
};
628+
629+
// Fetch each variant's file hashes (the view carries them) so we
630+
// can hash-match against the installed distribution.
631+
let mut candidates: Vec<(String, HashMap<String, PatchFileInfo>)> = Vec::new();
632+
for s in &variants {
633+
match api_client.fetch_patch(org, &s.uuid).await {
634+
Ok(Some(patch)) => {
635+
candidates.push((s.purl.clone(), files_for_selection(&patch)));
636+
}
637+
// On a fetch error/miss, keep the variant so the main
638+
// download loop can record the failure as it would today.
639+
_ => candidates.push((s.purl.clone(), HashMap::new())),
640+
}
641+
}
642+
643+
let refs: Vec<(&str, &HashMap<String, PatchFileInfo>)> = candidates
644+
.iter()
645+
.map(|(purl, files)| (purl.as_str(), files))
646+
.collect();
647+
648+
match select_installed_variant(&pkg_path, &refs).await {
649+
Some(idx) => {
650+
let winner = candidates[idx].0.clone();
651+
kept.extend(variants.into_iter().filter(|s| s.purl == winner));
652+
}
653+
None => {
654+
// Installed, but no variant matches the on-disk bytes.
655+
// Fall back to broad rather than silently dropping a
656+
// package the user asked about.
657+
warnings.push(format!(
658+
"No release variant of {base} matches the installed distribution; keeping all {} variant(s).",
659+
variants.len()
660+
));
661+
kept.extend(variants);
662+
}
663+
}
664+
}
665+
666+
(kept, warnings)
667+
}
668+
669+
/// Build the before/after-hash map used for installed-distribution
670+
/// matching. Mirrors the download flow's requirement that a patchable
671+
/// file carry both hashes (new files, with an empty `beforeHash`, are
672+
/// still kept so first-file verification can treat them as Ready).
673+
fn files_for_selection(patch: &PatchResponse) -> HashMap<String, PatchFileInfo> {
674+
let mut files = HashMap::new();
675+
for (file_path, file_info) in &patch.files {
676+
if let (Some(before), Some(after)) = (&file_info.before_hash, &file_info.after_hash) {
677+
files.insert(
678+
file_path.clone(),
679+
PatchFileInfo {
680+
before_hash: before.clone(),
681+
after_hash: after.clone(),
682+
},
683+
);
684+
}
685+
}
686+
files
687+
}
688+
516689
pub async fn download_and_apply_patches(
517690
selected: &[PatchSearchResult],
518691
params: &DownloadParams,
@@ -545,6 +718,26 @@ pub async fn download_and_apply_patches(
545718
_ => PatchManifest::new(),
546719
};
547720

721+
// Narrow PyPI multi-release selections to the installed distribution
722+
// unless --all-releases was passed. `filter_to_installed_releases`
723+
// is a no-op for non-PyPI ecosystems and single-variant packages.
724+
let mut narrow_warnings: Vec<String> = Vec::new();
725+
let selected_owned: Vec<PatchSearchResult>;
726+
let selected: &[PatchSearchResult] = if params.all_releases {
727+
selected
728+
} else {
729+
let (kept, warns) =
730+
filter_to_installed_releases(selected, params, &api_client, effective_org).await;
731+
if !params.json && !params.silent {
732+
for w in &warns {
733+
eprintln!(" [note] {w}");
734+
}
735+
}
736+
narrow_warnings = warns;
737+
selected_owned = kept;
738+
&selected_owned
739+
};
740+
548741
if !params.json && !params.silent {
549742
eprintln!("\nDownloading {} patch(es)...", selected.len());
550743
}
@@ -735,7 +928,7 @@ pub async fn download_and_apply_patches(
735928
}
736929
}
737930

738-
let result_json = serde_json::json!({
931+
let mut result_json = serde_json::json!({
739932
"status": if patches_failed > 0 { "partial_failure" } else { "success" },
740933
"found": selected.len(),
741934
"downloaded": patches_added,
@@ -745,6 +938,12 @@ pub async fn download_and_apply_patches(
745938
"updated": updates.len(),
746939
"patches": downloaded_patches,
747940
});
941+
// Surface release-narrowing fallbacks (uninstalled package / no
942+
// matching variant) so JSON consumers can see why all variants were
943+
// kept. Omitted entirely when narrowing was clean.
944+
if !narrow_warnings.is_empty() {
945+
result_json["warnings"] = serde_json::json!(narrow_warnings);
946+
}
748947

749948
let exit_code = if patches_failed > 0 || (!apply_succeeded && patches_added > 0 && !params.save_only) { 1 } else { 0 };
750949
(exit_code, result_json)
@@ -1127,6 +1326,7 @@ pub async fn run(args: GetArgs) -> i32 {
11271326
silent: false,
11281327
download_mode: args.common.download_mode.clone(),
11291328
api_overrides: args.common.api_client_overrides(),
1329+
all_releases: args.all_releases,
11301330
};
11311331

11321332
let (code, result_json) = download_and_apply_patches(&selected, &params).await;

0 commit comments

Comments
 (0)