Skip to content

Commit 2af4f2c

Browse files
mikolalysenkoclaude
andcommitted
fix: address code-review findings + bring in the variant-apply-failure test
Adversarial review (PR #105) surfaced real bugs in the new code; fix them and make the swept-in WIP test pass reliably so it can land. VEX / setup-state (property 7): - configured_ecosystems detected Python via plan_python, which applies the --ecosystems filter — so `vex --ecosystems cargo` in a python-set-up repo reported python as NOT set up and dropped its patches. Detect python on-disk state directly (is_python_project + choose_python_manifests + deps_contain_hook), bypassing the filter like every other ecosystem in that probe. - ecosystem_from_manual_name was missing maven/nuget/deno — the apply-only ecosystems that are the *primary* use of `setup.manual` — so declaring them manual silently dropped their patches from VEX. Add them (feature-gated) + a unit test covering every compiled-in ecosystem. Launchers (supply-chain hardening): - The RubyGems + Composer launchers followed redirects without enforcing HTTPS; a MitM could redirect github.com -> http:// and serve a malicious binary AND a matching SHA256SUMS (both attacker-controlled), defeating the checksum. Enforce HTTPS on every hop: Ruby refuses a non-HTTPS (initial/redirect) URL and resolves relative redirects with URI.join; PHP sets CURLOPT_PROTOCOLS/REDIR_PROTOCOLS=HTTPS and the no-curl fallback follows redirects manually, vetting each hop's scheme. Documented the (accepted, user-owned) cache-trust model at the cache-hit path. Cargo recursive glob: - collect_manifests_recursive (crates/**) followed symlinked dirs via metadata, so a loop symlink could re-add the workspace root as a duplicate member and an escaping symlink could make setup EDIT an out-of-tree Cargo.toml (breaking the in-repo-only contract). Skip symlinked dirs in the recursive walk (matches the `glob` crate); single-level crates/* still follows a symlinked member. +unix test. variant-apply-failure test (was unrelated broken WIP, now fixed + included): - Fixed the `purl` borrow-after-move, then rewrote it to invoke the binary as a subprocess and capture the child's stdout instead of an in-process `gag::BufferRedirect` (which races libtest's stdout capture and failed deterministically under default `cargo test`). No `gag` dep; passes reliably. Verified: clippy --workspace --all-features -D warnings exits 0; the full setup/vex/cargo suites + the variant test pass; ruby `gem build` + `ruby -c` clean; PHP `php -l` clean (docker php:8.2). Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
1 parent 1fcac7c commit 2af4f2c

6 files changed

Lines changed: 218 additions & 60 deletions

File tree

composer/socket-patch/bin/socket-patch

Lines changed: 60 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -117,14 +117,27 @@ function sp_cache_dir()
117117
return $base . DIRECTORY_SEPARATOR . 'socket-patch' . DIRECTORY_SEPARATOR . 'bin';
118118
}
119119

120-
/** Download `$url`; write to `$dest` if given, else return the body. */
120+
/**
121+
* Download `$url`; write to `$dest` if given, else return the body. HTTPS is
122+
* enforced including across redirects: GitHub release downloads redirect to a
123+
* CDN (still HTTPS), but a redirect to http:// would let a network attacker
124+
* serve a malicious binary AND a matching SHA256SUMS (both attacker-controlled),
125+
* defeating the checksum check — so a non-HTTPS URL (initial or redirect target)
126+
* is refused.
127+
*/
121128
function sp_http_get($url, $dest = null)
122129
{
130+
if (stripos($url, 'https://') !== 0) {
131+
sp_fail("refusing non-HTTPS URL: $url");
132+
}
123133
if (function_exists('curl_init')) {
124134
$ch = curl_init($url);
125135
curl_setopt_array($ch, [
126136
CURLOPT_FOLLOWLOCATION => true,
127137
CURLOPT_MAXREDIRS => 10,
138+
// Only fetch/redirect over HTTPS — curl refuses an http:// redirect.
139+
CURLOPT_PROTOCOLS => CURLPROTO_HTTPS,
140+
CURLOPT_REDIR_PROTOCOLS => CURLPROTO_HTTPS,
128141
CURLOPT_RETURNTRANSFER => true,
129142
CURLOPT_FAILONERROR => true,
130143
CURLOPT_USERAGENT => 'socket-patch-composer',
@@ -137,15 +150,10 @@ function sp_http_get($url, $dest = null)
137150
}
138151
curl_close($ch);
139152
} else {
140-
$ctx = stream_context_create(['http' => [
141-
'follow_location' => 1,
142-
'max_redirects' => 10,
143-
'user_agent' => 'socket-patch-composer',
144-
]]);
145-
$data = @file_get_contents($url, false, $ctx);
146-
if ($data === false) {
147-
sp_fail("download failed for $url");
148-
}
153+
// No curl: follow redirects manually so each hop's scheme can be checked
154+
// (PHP streams can't whitelist redirect protocols). TLS verification is
155+
// on by default; assert it explicitly.
156+
$data = sp_stream_get($url, 10);
149157
}
150158
if ($dest !== null) {
151159
file_put_contents($dest, $data);
@@ -154,6 +162,44 @@ function sp_http_get($url, $dest = null)
154162
return $data;
155163
}
156164

165+
/** Manual, HTTPS-only redirect-following GET for the no-curl fallback. */
166+
function sp_stream_get($url, $redirects)
167+
{
168+
if ($redirects < 0) {
169+
sp_fail("too many redirects for $url");
170+
}
171+
if (stripos($url, 'https://') !== 0) {
172+
sp_fail("refusing non-HTTPS URL: $url");
173+
}
174+
$ctx = stream_context_create([
175+
'http' => [
176+
'follow_location' => 0, // we follow manually to vet each hop
177+
'ignore_errors' => true,
178+
'user_agent' => 'socket-patch-composer',
179+
],
180+
'ssl' => ['verify_peer' => true, 'verify_peer_name' => true],
181+
]);
182+
$http_response_header = [];
183+
$data = @file_get_contents($url, false, $ctx);
184+
$status = 0;
185+
$location = null;
186+
foreach ($http_response_header as $h) {
187+
if (preg_match('#^HTTP/\S+\s+(\d+)#', $h, $m)) {
188+
$status = (int) $m[1];
189+
} elseif (stripos($h, 'Location:') === 0) {
190+
$location = trim(substr($h, strlen('Location:')));
191+
}
192+
}
193+
if ($status >= 300 && $status < 400 && $location !== null) {
194+
// GitHub uses absolute HTTPS redirect targets; reject anything else.
195+
return sp_stream_get($location, $redirects - 1);
196+
}
197+
if ($data === false || $status >= 400) {
198+
sp_fail("download failed ($status) for $url");
199+
}
200+
return $data;
201+
}
202+
157203
/** SHA256SUMS lines are "<hex> <filename>" (filename may be `*`-prefixed). */
158204
function sp_verify_sha256($path, $archive, $sums)
159205
{
@@ -219,6 +265,10 @@ function sp_resolve_binary()
219265
$exe = SP_BINARY . (PHP_OS_FAMILY === 'Windows' ? '.exe' : '');
220266
$cached = sp_cache_dir() . DIRECTORY_SEPARATOR . $ver
221267
. DIRECTORY_SEPARATOR . $target . DIRECTORY_SEPARATOR . $exe;
268+
// Cache hit: verified when first downloaded, under the user's own cache dir.
269+
// Trusted without re-verification (re-verifying needs a network fetch each
270+
// run), matching npx / pip / rustup; an attacker able to write here can
271+
// already replace the installed package or the binary itself.
222272
if (is_executable($cached)) {
223273
return $cached;
224274
}

crates/socket-patch-cli/src/commands/setup.rs

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -318,9 +318,13 @@ pub(crate) async fn configured_ecosystems(
318318
}
319319

320320
// pypi: a chosen python manifest carries the `socket-patch[hook]` dep.
321-
if let Some(plan) = plan_python(common).await {
322-
for (path, _) in &plan.manifests {
323-
if let Ok(content) = tokio::fs::read_to_string(path).await {
321+
// Detect on-disk state DIRECTLY — not via `plan_python`, which applies the
322+
// `--ecosystems` filter; this probe must report real state regardless of it
323+
// (e.g. `vex --ecosystems cargo` must still see a set-up python project).
324+
if is_python_project(&common.cwd).await {
325+
let pm = detect_python_pm(&common.cwd).await;
326+
for (path, _) in choose_python_manifests(&common.cwd, pm).await {
327+
if let Ok(content) = tokio::fs::read_to_string(&path).await {
324328
if deps_contain_hook(&content) {
325329
set.insert(Ecosystem::Pypi);
326330
break;

crates/socket-patch-cli/src/commands/vex.rs

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -262,6 +262,15 @@ fn ecosystem_from_manual_name(name: &str) -> Option<Ecosystem> {
262262
"golang" | "go" => Some(Ecosystem::Golang),
263263
#[cfg(feature = "composer")]
264264
"composer" | "php" => Some(Ecosystem::Composer),
265+
// The apply-only ecosystems are the primary use of `manual` (hand-applied
266+
// patches with no auto-install hook); they must map too, feature-gated to
267+
// match the compiled-in `Ecosystem` variants `from_purl` can return.
268+
#[cfg(feature = "maven")]
269+
"maven" | "java" => Some(Ecosystem::Maven),
270+
#[cfg(feature = "nuget")]
271+
"nuget" | "dotnet" => Some(Ecosystem::Nuget),
272+
#[cfg(feature = "deno")]
273+
"deno" | "jsr" => Some(Ecosystem::Deno),
265274
_ => None,
266275
}
267276
}
@@ -568,6 +577,30 @@ mod tests {
568577
use super::*;
569578
use clap::Parser;
570579

580+
// Property 7: every ecosystem the build can classify a PURL for must also be
581+
// declarable `manual`. Apply-only maven/nuget/deno are the *primary* use of
582+
// `manual`; they were missing originally, silently dropping their patches.
583+
#[test]
584+
fn ecosystem_from_manual_name_maps_compiled_in_ecosystems() {
585+
assert_eq!(ecosystem_from_manual_name("npm"), Some(Ecosystem::Npm));
586+
assert_eq!(ecosystem_from_manual_name("PyPI"), Some(Ecosystem::Pypi)); // case-insensitive
587+
assert_eq!(ecosystem_from_manual_name("python"), Some(Ecosystem::Pypi));
588+
assert_eq!(ecosystem_from_manual_name("ruby"), Some(Ecosystem::Gem));
589+
assert_eq!(ecosystem_from_manual_name("nonsense"), None);
590+
#[cfg(feature = "cargo")]
591+
assert_eq!(ecosystem_from_manual_name("cargo"), Some(Ecosystem::Cargo));
592+
#[cfg(feature = "golang")]
593+
assert_eq!(ecosystem_from_manual_name("go"), Some(Ecosystem::Golang));
594+
#[cfg(feature = "composer")]
595+
assert_eq!(ecosystem_from_manual_name("composer"), Some(Ecosystem::Composer));
596+
#[cfg(feature = "maven")]
597+
assert_eq!(ecosystem_from_manual_name("maven"), Some(Ecosystem::Maven));
598+
#[cfg(feature = "nuget")]
599+
assert_eq!(ecosystem_from_manual_name("nuget"), Some(Ecosystem::Nuget));
600+
#[cfg(feature = "deno")]
601+
assert_eq!(ecosystem_from_manual_name("deno"), Some(Ecosystem::Deno));
602+
}
603+
571604
#[derive(Parser)]
572605
struct Wrap {
573606
#[command(subcommand)]

crates/socket-patch-cli/tests/in_process_variant_apply_failure.rs

Lines changed: 37 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -22,14 +22,16 @@
2222
use std::path::{Path, PathBuf};
2323
use std::process::Command;
2424

25-
use serial_test::serial;
2625
use sha2::{Digest, Sha256};
27-
use socket_patch_cli::commands::apply::{run as apply_run, ApplyArgs};
2826

2927
const PYPI_PACKAGE: &str = "six";
3028
const PYPI_VERSION: &str = "1.16.0";
3129
const UUID: &str = "12121212-1212-4121-8121-121212121212";
3230

31+
fn binary() -> PathBuf {
32+
env!("CARGO_BIN_EXE_socket-patch").into()
33+
}
34+
3335
fn git_sha256(content: &[u8]) -> String {
3436
let header = format!("blob {}\0", content.len());
3537
let mut hasher = Sha256::new();
@@ -108,9 +110,8 @@ fn install_six(tmp: &Path) -> PathBuf {
108110
/// blob does not hash to its declared `afterHash` (so apply fails) must
109111
/// produce exactly one `failed` event and NO `package_not_installed`
110112
/// `skipped` event for the same PURL.
111-
#[tokio::test]
112-
#[serial]
113-
async fn failed_installed_variant_is_not_also_reported_not_installed() {
113+
#[test]
114+
fn failed_installed_variant_is_not_also_reported_not_installed() {
114115
if find_python().is_none() {
115116
println!("SKIP: python3 not on PATH");
116117
return;
@@ -140,9 +141,12 @@ async fn failed_installed_variant_is_not_also_reported_not_installed() {
140141
.expect("write decoy blob");
141142

142143
let purl = format!("pkg:pypi/{PYPI_PACKAGE}@{PYPI_VERSION}");
144+
// `serde_json::json!` consumes the key expression, so clone for the key and
145+
// keep `purl` itself for the assertions further down.
146+
let manifest_key = purl.clone();
143147
let manifest = serde_json::json!({
144148
"patches": {
145-
purl: {
149+
manifest_key: {
146150
"uuid": UUID,
147151
"exportedAt": "2024-01-01T00:00:00Z",
148152
"files": {
@@ -161,37 +165,33 @@ async fn failed_installed_variant_is_not_also_reported_not_installed() {
161165
)
162166
.expect("write manifest");
163167

164-
// Capture stdout (the JSON envelope) from the in-process run.
165-
let apply_args = ApplyArgs {
166-
common: socket_patch_cli::args::GlobalArgs {
167-
cwd: tmp.path().to_path_buf(),
168-
dry_run: false,
169-
silent: false,
170-
manifest_path: ".socket/manifest.json".to_string(),
171-
offline: true,
172-
global: false,
173-
global_prefix: None,
174-
ecosystems: Some(vec!["pypi".to_string()]),
175-
json: true,
176-
verbose: false,
177-
download_mode: "diff".to_string(),
178-
..socket_patch_cli::args::GlobalArgs::default()
179-
},
180-
// NOT forced: exercises the variant-matches-installed path, which
181-
// is exactly where the misreport happened.
182-
force: false,
183-
check: false,
184-
vex: Default::default(),
185-
};
186-
187-
let buf = gag::BufferRedirect::stdout().expect("redirect stdout");
188-
let code = apply_run(apply_args).await;
189-
let mut out = String::new();
190-
{
191-
use std::io::Read;
192-
let mut buf = buf;
193-
buf.read_to_string(&mut out).expect("read captured stdout");
194-
}
168+
// Run the real binary as a subprocess and capture its JSON envelope from the
169+
// child's stdout. This is reliable under cargo's test-output capture, unlike
170+
// an in-process `gag`-based stdout redirect (which races libtest's own
171+
// capture). NOT `--force`: exercises the variant-matches-installed path,
172+
// exactly where the misreport happened. SOCKET_* are scrubbed so the flags
173+
// decide behaviour.
174+
let output = Command::new(binary())
175+
.args([
176+
"apply",
177+
"--offline",
178+
"--ecosystems",
179+
"pypi",
180+
"--json",
181+
"--cwd",
182+
tmp.path().to_str().unwrap(),
183+
])
184+
.env_remove("SOCKET_API_TOKEN")
185+
.env_remove("SOCKET_OFFLINE")
186+
.env_remove("SOCKET_ECOSYSTEMS")
187+
.env_remove("SOCKET_JSON")
188+
.env_remove("SOCKET_FORCE")
189+
.env_remove("SOCKET_CWD")
190+
.env_remove("SOCKET_MANIFEST_PATH")
191+
.output()
192+
.expect("run socket-patch apply");
193+
let code = output.status.code().unwrap_or(-1);
194+
let out = String::from_utf8_lossy(&output.stdout).to_string();
195195

196196
// The apply failed, so the command exits non-zero (partial failure).
197197
assert_eq!(code, 1, "a failed apply must exit 1; stdout: {out}");

crates/socket-patch-core/src/cargo_setup/discover.rs

Lines changed: 55 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -173,16 +173,26 @@ async fn collect_manifests_recursive(dir: &Path, depth: usize, out: &mut Vec<Pat
173173
Err(_) => return,
174174
};
175175
while let Ok(Some(entry)) = rd.next_entry().await {
176-
let path = entry.path();
177-
// Stat (not `file_type`) so symlinked dirs are followed, mirroring `glob_dir`.
178-
if !fs::metadata(&path).await.map(|m| m.is_dir()).unwrap_or(false) {
176+
// Use the dir-entry's own type (does NOT follow symlinks): a `**` walk
177+
// must not traverse a symlinked dir — it could loop back to an ancestor
178+
// (so the workspace root's own `Cargo.toml` reappears as a duplicate
179+
// member) or escape the repo entirely (so `setup` would edit an
180+
// out-of-tree `Cargo.toml`, breaking the in-repo-only contract). The
181+
// `glob` crate's `**` likewise does not follow symlinks. (The
182+
// single-level `glob_dir` still follows a symlinked direct member.)
183+
let ft = match entry.file_type().await {
184+
Ok(ft) => ft,
185+
Err(_) => continue,
186+
};
187+
if ft.is_symlink() || !ft.is_dir() {
179188
continue;
180189
}
181190
let name = entry.file_name();
182191
let name = name.to_string_lossy();
183192
if name.starts_with('.') || name == "target" {
184193
continue;
185194
}
195+
let path = entry.path();
186196
let manifest = path.join("Cargo.toml");
187197
if fs::metadata(&manifest).await.is_ok() {
188198
out.push(manifest);
@@ -295,6 +305,48 @@ mod tests {
295305
assert_eq!(proj.members.len(), 2, "exactly leaf + top: {:?}", proj.members);
296306
}
297307

308+
// A recursive `crates/**` glob must NOT follow symlinked directories: a
309+
// loop symlink back to the root would re-add the workspace manifest as a
310+
// duplicate member, and an escaping symlink would let `setup` edit an
311+
// out-of-tree `Cargo.toml`. (Contrast the single-level `crates/*` case,
312+
// which intentionally follows a symlinked direct member.)
313+
#[cfg(unix)]
314+
#[tokio::test]
315+
async fn test_recursive_glob_does_not_follow_symlinks() {
316+
let dir = tempfile::tempdir().unwrap();
317+
let root = dir.path();
318+
write(
319+
&root.join("Cargo.toml"),
320+
"[workspace]\nmembers = [\"crates/**\"]\n",
321+
)
322+
.await;
323+
write(
324+
&root.join("crates/real/Cargo.toml"),
325+
"[package]\nname=\"real\"\nversion=\"0.1.0\"\n",
326+
)
327+
.await;
328+
fs::create_dir_all(root.join("crates")).await.unwrap();
329+
// Loop: crates/loop -> the workspace root (would re-discover root Cargo.toml).
330+
std::os::unix::fs::symlink(root, root.join("crates/loop")).unwrap();
331+
// Escape: crates/escape -> an unrelated dir OUTSIDE the repo.
332+
let outside = tempfile::tempdir().unwrap();
333+
write(
334+
&outside.path().join("Cargo.toml"),
335+
"[package]\nname=\"outside\"\nversion=\"0.1.0\"\n",
336+
)
337+
.await;
338+
std::os::unix::fs::symlink(outside.path(), root.join("crates/escape")).unwrap();
339+
340+
let proj = discover_cargo_project(root).await.unwrap();
341+
assert_eq!(
342+
proj.members,
343+
vec![root.join("crates/real/Cargo.toml")],
344+
"recursive `**` must find only the real nested member — never the root \
345+
via the loop symlink, never the out-of-tree crate via the escape symlink; got {:?}",
346+
proj.members
347+
);
348+
}
349+
298350
#[tokio::test]
299351
async fn test_virtual_manifest_explicit_members() {
300352
let dir = tempfile::tempdir().unwrap();

0 commit comments

Comments
 (0)