Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 52 additions & 2 deletions src/fetchers/client.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,13 @@ pub enum FetcherError {

impl Fetcher {
pub fn new(config: FetcherConfig) -> Self {
if !config.verify_ssl {
log::warn!(
"SSL certificate verification is DISABLED. This is insecure \
and must not be used in production."
);
}

let (clients, rotator) = if config.proxy_list.is_empty() {
// No rotation: a single client honouring `proxy` and the
// per-protocol `proxies` map.
Expand Down Expand Up @@ -166,7 +173,7 @@ impl Fetcher {
}

match req.send().await {
Ok(resp) => {
Ok(mut resp) => {
let status_code = resp.status().as_u16();
let final_url = resp.url().to_string();

Expand All @@ -184,7 +191,50 @@ impl Fetcher {
.cloned()
.unwrap_or_default();

let body_text = resp.text().await.unwrap_or_default();
// Reject obviously-too-large bodies up front when the
// server advertised a Content-Length. Use try_from so the
// u64 -> usize cast cannot truncate on 32-bit targets.
let max_body = self.config.max_body_bytes;
if let Some(len) = resp.content_length() {
let too_large = match usize::try_from(len) {
Ok(n) => n > max_body,
Err(_) => true,
};
if too_large {
return Err(FetcherError::RequestFailed(format!(
"response body too large: {} bytes (max {})",
len, max_body
)));
}
}

// Stream the body and cap the accumulated size so a
// server that lies about (or omits) Content-Length still
// cannot exhaust memory. Chunk read errors surface as a
// request failure rather than a truncated success.
let mut bytes: Vec<u8> = Vec::new();
loop {
match resp.chunk().await {
Ok(Some(chunk)) => {
let new_len = bytes.len().checked_add(chunk.len());
if new_len.map(|n| n > max_body).unwrap_or(true) {
return Err(FetcherError::RequestFailed(format!(
"response body exceeded {} bytes",
max_body
)));
}
bytes.extend_from_slice(&chunk);
}
Ok(None) => break,
Err(e) => {
return Err(FetcherError::RequestFailed(format!(
"chunk read error: {}",
e
)));
}
}
}
let body_text = String::from_utf8_lossy(&bytes).into_owned();

return Ok(Response::new(
status_code,
Expand Down
11 changes: 11 additions & 0 deletions src/fetchers/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@ pub struct FetcherConfig {
pub headers: HashMap<String, String>,
pub stealthy_headers: bool,
pub user_agent: Option<String>,
/// Maximum response body size in bytes. Responses larger than this are
/// aborted to protect against OOM. Defaults to 50 MiB.
pub max_body_bytes: usize,
}

impl Default for FetcherConfig {
Expand All @@ -36,6 +39,7 @@ impl Default for FetcherConfig {
headers: HashMap::new(),
stealthy_headers: true,
user_agent: None,
max_body_bytes: 50 * 1024 * 1024,
}
}
}
Expand Down Expand Up @@ -205,6 +209,13 @@ impl FetcherConfigBuilder {
self
}

/// Cap on response body size in bytes. Responses larger than this are
/// aborted before reading them fully into memory.
pub fn max_body_bytes(mut self, bytes: usize) -> Self {
self.inner.max_body_bytes = bytes;
self
}

pub fn build(self) -> FetcherConfig {
self.inner
}
Expand Down
11 changes: 11 additions & 0 deletions tests/fetchers_config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -250,3 +250,14 @@ fn builder_rotating_proxies_populates_list() {
fn default_proxy_list_is_empty() {
assert!(FetcherConfig::default().proxy_list.is_empty());
}

#[test]
fn default_max_body_bytes_is_50_mib() {
assert_eq!(FetcherConfig::default().max_body_bytes, 50 * 1024 * 1024);
}

#[test]
fn builder_max_body_bytes_overrides_default() {
let cfg = FetcherConfig::builder().max_body_bytes(1024).build();
assert_eq!(cfg.max_body_bytes, 1024);
}
Loading