From 6b086e76180fed454eaeb3052c19b1680a319c8e Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 28 May 2026 22:02:31 +0000 Subject: [PATCH 1/2] fix: warn on disabled TLS verification and cap response body size - Log a warning when verify_ssl is disabled so the security implication of danger_accept_invalid_certs is visible - Add FetcherConfig.max_body_bytes (default 50 MiB) and stream the response body in chunks, aborting if Content-Length or the accumulated bytes exceed the cap, so a hostile server cannot exhaust memory Closes #4 Closes #7 https://claude.ai/code/session_012RmdaovmNWZVAim4XxCWwn --- src/fetchers/client.rs | 41 ++++++++++++++++++++++++++++++++++++++-- src/fetchers/config.rs | 11 +++++++++++ tests/fetchers_config.rs | 11 +++++++++++ 3 files changed, 61 insertions(+), 2 deletions(-) diff --git a/src/fetchers/client.rs b/src/fetchers/client.rs index 41398bd..09504c4 100644 --- a/src/fetchers/client.rs +++ b/src/fetchers/client.rs @@ -22,6 +22,13 @@ pub enum FetcherError { impl Fetcher { pub fn new(config: FetcherConfig) -> Self { + if !config.verify_ssl { + log::warn!( + "SSL certificate verification is DISABLED. This is insecure \ + and must not be used in production." + ); + } + let (clients, rotator) = if config.proxy_list.is_empty() { // No rotation: a single client honouring `proxy` and the // per-protocol `proxies` map. @@ -166,7 +173,7 @@ impl Fetcher { } match req.send().await { - Ok(resp) => { + Ok(mut resp) => { let status_code = resp.status().as_u16(); let final_url = resp.url().to_string(); @@ -184,7 +191,37 @@ impl Fetcher { .cloned() .unwrap_or_default(); - let body_text = resp.text().await.unwrap_or_default(); + // Reject obviously-too-large bodies up front when the + // server advertised a Content-Length. + let max_body = self.config.max_body_bytes; + if let Some(len) = resp.content_length() { + if len as usize > max_body { + return Err(FetcherError::RequestFailed(format!( + "response body too large: {} bytes (max {})", + len, max_body + ))); + } + } + + // Stream the body and cap the accumulated size so a + // server that lies about (or omits) Content-Length still + // cannot exhaust memory. + let mut bytes: Vec = Vec::new(); + let mut overflow = false; + while let Ok(Some(chunk)) = resp.chunk().await { + if bytes.len() + chunk.len() > max_body { + overflow = true; + break; + } + bytes.extend_from_slice(&chunk); + } + if overflow { + return Err(FetcherError::RequestFailed(format!( + "response body exceeded {} bytes", + max_body + ))); + } + let body_text = String::from_utf8_lossy(&bytes).into_owned(); return Ok(Response::new( status_code, diff --git a/src/fetchers/config.rs b/src/fetchers/config.rs index 8230188..40e6c99 100644 --- a/src/fetchers/config.rs +++ b/src/fetchers/config.rs @@ -19,6 +19,9 @@ pub struct FetcherConfig { pub headers: HashMap, pub stealthy_headers: bool, pub user_agent: Option, + /// Maximum response body size in bytes. Responses larger than this are + /// aborted to protect against OOM. Defaults to 50 MiB. + pub max_body_bytes: usize, } impl Default for FetcherConfig { @@ -36,6 +39,7 @@ impl Default for FetcherConfig { headers: HashMap::new(), stealthy_headers: true, user_agent: None, + max_body_bytes: 50 * 1024 * 1024, } } } @@ -205,6 +209,13 @@ impl FetcherConfigBuilder { self } + /// Cap on response body size in bytes. Responses larger than this are + /// aborted before reading them fully into memory. + pub fn max_body_bytes(mut self, bytes: usize) -> Self { + self.inner.max_body_bytes = bytes; + self + } + pub fn build(self) -> FetcherConfig { self.inner } diff --git a/tests/fetchers_config.rs b/tests/fetchers_config.rs index ad496a7..c2a064b 100644 --- a/tests/fetchers_config.rs +++ b/tests/fetchers_config.rs @@ -250,3 +250,14 @@ fn builder_rotating_proxies_populates_list() { fn default_proxy_list_is_empty() { assert!(FetcherConfig::default().proxy_list.is_empty()); } + +#[test] +fn default_max_body_bytes_is_50_mib() { + assert_eq!(FetcherConfig::default().max_body_bytes, 50 * 1024 * 1024); +} + +#[test] +fn builder_max_body_bytes_overrides_default() { + let cfg = FetcherConfig::builder().max_body_bytes(1024).build(); + assert_eq!(cfg.max_body_bytes, 1024); +} From 148e74d57e070b846f93af27fe5267f743c813d1 Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 28 May 2026 22:10:37 +0000 Subject: [PATCH 2/2] fix: surface chunk read errors and use checked arithmetic for body cap Address CodeRabbit review on #20: - Replace `while let Ok(Some(chunk))` with a `match` that returns Err on chunk read errors, so a mid-body error no longer silently produces a truncated body - Use usize::try_from for the u64 Content-Length conversion so a value that does not fit a 32-bit usize is treated as too large rather than silently truncated - Use checked_add when accumulating chunk sizes to detect overflow https://claude.ai/code/session_012RmdaovmNWZVAim4XxCWwn --- src/fetchers/client.rs | 43 +++++++++++++++++++++++++++--------------- 1 file changed, 28 insertions(+), 15 deletions(-) diff --git a/src/fetchers/client.rs b/src/fetchers/client.rs index 09504c4..038d36f 100644 --- a/src/fetchers/client.rs +++ b/src/fetchers/client.rs @@ -192,10 +192,15 @@ impl Fetcher { .unwrap_or_default(); // Reject obviously-too-large bodies up front when the - // server advertised a Content-Length. + // server advertised a Content-Length. Use try_from so the + // u64 -> usize cast cannot truncate on 32-bit targets. let max_body = self.config.max_body_bytes; if let Some(len) = resp.content_length() { - if len as usize > max_body { + let too_large = match usize::try_from(len) { + Ok(n) => n > max_body, + Err(_) => true, + }; + if too_large { return Err(FetcherError::RequestFailed(format!( "response body too large: {} bytes (max {})", len, max_body @@ -205,21 +210,29 @@ impl Fetcher { // Stream the body and cap the accumulated size so a // server that lies about (or omits) Content-Length still - // cannot exhaust memory. + // cannot exhaust memory. Chunk read errors surface as a + // request failure rather than a truncated success. let mut bytes: Vec = Vec::new(); - let mut overflow = false; - while let Ok(Some(chunk)) = resp.chunk().await { - if bytes.len() + chunk.len() > max_body { - overflow = true; - break; + loop { + match resp.chunk().await { + Ok(Some(chunk)) => { + let new_len = bytes.len().checked_add(chunk.len()); + if new_len.map(|n| n > max_body).unwrap_or(true) { + return Err(FetcherError::RequestFailed(format!( + "response body exceeded {} bytes", + max_body + ))); + } + bytes.extend_from_slice(&chunk); + } + Ok(None) => break, + Err(e) => { + return Err(FetcherError::RequestFailed(format!( + "chunk read error: {}", + e + ))); + } } - bytes.extend_from_slice(&chunk); - } - if overflow { - return Err(FetcherError::RequestFailed(format!( - "response body exceeded {} bytes", - max_body - ))); } let body_text = String::from_utf8_lossy(&bytes).into_owned();