From bba9d110efa84c31fc6b68a3b108076d29392cbc Mon Sep 17 00:00:00 2001 From: konstin Date: Thu, 5 Mar 2026 23:29:51 +0100 Subject: [PATCH 1/3] Avoid panics when content is longer than content length header Fixes https://github.com/astral-sh/uv/issues/18316 This fix handles two cases: * The server sends more bytes than initially promised in the `Content-Length` header * The server sends more or less bytes than the `content-range` indicated As a side effect, we remove the requirement that the initial response must be towards the end (`-len:`). --- src/error.rs | 6 +- src/lib.rs | 184 +++++++++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 175 insertions(+), 15 deletions(-) diff --git a/src/error.rs b/src/error.rs index 19e078e..edae7c5 100644 --- a/src/error.rs +++ b/src/error.rs @@ -32,8 +32,12 @@ pub enum AsyncHttpRangeReaderError { MemoryMapError(#[source] Arc), /// Error from `http-content-range` - #[error("Invalid Content-Range header: {0}")] + #[error("invalid Content-Range header: {0}")] ContentRangeParser(String), + + /// The server returned fewer or more bytes than the range request asked for + #[error("expected {expected} bytes from range response, got {actual}")] + ContentLengthMismatch { expected: u64, actual: u64 }, } impl From for AsyncHttpRangeReaderError { diff --git a/src/lib.rs b/src/lib.rs index 8a3c6e3..c5b3194 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -110,6 +110,8 @@ struct Inner { streamer_state_rx: WatchStream, /// A channel sender to send range requests to the background task + /// + /// Contract: All ranges sent must be inside the range of the memory map request_tx: tokio::sync::mpsc::Sender>, /// An optional object to reserve a slot in the `request_tx` sender. When in the process of @@ -196,7 +198,7 @@ impl AsyncHttpRangeReader { } /// Initialize the reader from [`AsyncHttpRangeReader::initial_tail_request`] (or a user - /// provided response that also has a range of bytes from the end as body) + /// provided range response) pub async fn from_tail_response( client: impl Into, tail_request_response: Response, @@ -212,10 +214,11 @@ impl AsyncHttpRangeReader { .ok_or(AsyncHttpRangeReaderError::ContentRangeMissing)? .to_str() .map_err(|_err| AsyncHttpRangeReaderError::ContentRangeMissing)?; + // The parser ensures finish < complete_length let content_range = ContentRange::parse(content_range_header).ok_or_else(|| { AsyncHttpRangeReaderError::ContentRangeParser(content_range_header.to_string()) })?; - let (start, finish, complete_length) = match content_range { + let (start, end_inclusive, complete_length) = match content_range { ContentRange::Bytes(ContentRangeBytes { first_byte, last_byte, @@ -236,8 +239,7 @@ impl AsyncHttpRangeReader { let memory_map_slice = unsafe { std::slice::from_raw_parts(memory_map.as_ptr(), memory_map.len()) }; - let requested_range = - SparseRange::from_range(complete_length - (finish - start)..complete_length); + let requested_range = SparseRange::from_range(start..end_inclusive + 1); // adding more than 2 entries to the channel would block the sender. I assumed two would // suffice because I would want to 1) prefetch a certain range and 2) read stuff via the @@ -249,7 +251,7 @@ impl AsyncHttpRangeReader { client, url, extra_headers, - Some((tail_request_response, start)), + Some((tail_request_response, start, end_inclusive + 1)), memory_map, state_tx, request_rx, @@ -259,7 +261,7 @@ impl AsyncHttpRangeReader { let mut streamer_state = StreamerState::default(); streamer_state .requested_ranges - .push(complete_length - (finish - start)..complete_length); + .push(start..end_inclusive + 1); let reader = Self { len: memory_map_slice.len() as u64, @@ -416,23 +418,22 @@ async fn run_streamer( client: reqwest_middleware::ClientWithMiddleware, url: Url, extra_headers: HeaderMap, - initial_tail_response: Option<(Response, u64)>, + initial_tail_response: Option<(Response, u64, u64)>, mut memory_map: MmapMut, mut state_tx: Sender, mut request_rx: tokio::sync::mpsc::Receiver>, ) { let mut state = StreamerState::default(); - if let Some((response, response_start)) = initial_tail_response { + if let Some((response, start, end_exclusive)) = initial_tail_response { // Add the initial range to the state - state - .requested_ranges - .push(response_start..memory_map.len() as u64); + state.requested_ranges.push(start..memory_map.len() as u64); // Stream the initial data in memory if !stream_response( response, - response_start, + start, + end_exclusive, &mut memory_map, &mut state_tx, &mut state, @@ -497,6 +498,7 @@ async fn run_streamer( if !stream_response( response, *range.start(), + *range.end() + 1, &mut memory_map, &mut state_tx, &mut state, @@ -512,13 +514,25 @@ async fn run_streamer( /// Streams the data from the specified response to the memory map updating progress in between. /// Returns `true` if everything went fine, `false` if anything went wrong. The error state, if any, /// is stored in `state_tx` so the "frontend" will consume it. +/// +/// The response must return bytes for the range of precisely `start..end_exclusive`. async fn stream_response( tail_request_response: Response, - mut offset: u64, + start: u64, + end_exclusive: u64, memory_map: &mut MmapMut, state_tx: &mut Sender, state: &mut StreamerState, ) -> bool { + // Enforce request channel contract + assert!( + (end_exclusive as usize) <= memory_map.len(), + "end is outside of memory map {} > {}", + end_exclusive, + memory_map.len() + ); + + let mut offset = start; let mut byte_stream = tail_request_response.bytes_stream(); while let Some(bytes) = byte_stream.next().await { let bytes = match bytes { @@ -534,7 +548,17 @@ async fn stream_response( let byte_range = offset..offset + bytes.len() as u64; // Update the offset - offset = byte_range.end; + offset += bytes.len() as u64; + + // Prevent the server from sending more bytes than advertised in a response + if offset > end_exclusive { + state.error = Some(AsyncHttpRangeReaderError::ContentLengthMismatch { + expected: end_exclusive - start, + actual: offset - start, + }); + let _ = state_tx.send(state.clone()); + return false; + } // Copy the data from the stream to memory memory_map[byte_range.start as usize..byte_range.end as usize] @@ -551,6 +575,16 @@ async fn stream_response( } } + // Prevent the server from sending less bytes than advertised in a response + if offset != end_exclusive { + state.error = Some(AsyncHttpRangeReaderError::ContentLengthMismatch { + expected: end_exclusive - start, + actual: offset - start, + }); + let _ = state_tx.send(state.clone()); + return false; + } + true } @@ -658,7 +692,12 @@ mod test { use crate::static_directory_server::StaticDirectoryServer; use assert_matches::assert_matches; use async_zip::tokio::read::seek::ZipFileReader; + use axum::body::Body; + use axum::extract::Request; + use axum::response::IntoResponse; use futures::AsyncReadExt; + use reqwest::header; + use reqwest::Method; use reqwest::{Client, StatusCode}; use rstest::*; use std::path::Path; @@ -854,4 +893,121 @@ mod test { err, AsyncHttpRangeReaderError::HttpError(err) if err.status() == Some(StatusCode::NOT_FOUND) ); } + + /// Spawn a server where the HEAD response reports `head_size` bytes, and range requests always + /// claim to be `pretend_size` bytes, while actually serving `actual_size`. + async fn spawn_mismatch_server( + head_content_length: usize, + pretend_size: usize, + actual_size: usize, + ) -> Url { + let app = + axum::Router::new().fallback(async move |request: Request| match *request.method() { + Method::HEAD => { + let headers = [ + (header::CONTENT_LENGTH, head_content_length.to_string()), + (header::ACCEPT_RANGES, "bytes".to_string()), + ]; + (StatusCode::OK, headers).into_response() + } + Method::GET => { + let range_header = request + .headers() + .get(header::RANGE) + .unwrap() + .to_str() + .unwrap() + .to_string(); + + let range_spec = range_header.strip_prefix("bytes=").unwrap(); + let (start_str, _end_str) = range_spec.split_once('-').unwrap(); + let start = start_str.parse::().unwrap(); + // The end is inclusive + let end = start + pretend_size - 1; + + axum::response::Response::builder() + .status(StatusCode::PARTIAL_CONTENT) + // Note that the client ignores this value currently, it only checks the + // actual size + .header( + header::CONTENT_RANGE, + format!("bytes {start}-{end}/{head_content_length}"), + ) + .body(Body::from(vec![1u8; actual_size])) + .unwrap() + .into_response() + } + _ => StatusCode::METHOD_NOT_ALLOWED.into_response(), + }); + + let listener = tokio::net::TcpListener::bind("127.0.0.1:0").await.unwrap(); + let local_addr = listener.local_addr().unwrap(); + tokio::spawn(async move { + axum::serve(listener, app.into_make_service()) + .await + .unwrap(); + }); + + Url::parse(&format!("http://localhost:{}/file", local_addr.port())).unwrap() + } + + /// HEAD says 512 bytes, but range responses return 1024 bytes — overflows + /// the memory map. + #[tokio::test] + async fn test_content_length_response_beyond_content_length() { + let cases = [ + // Baseline + (512, 512, 512, true), + // The requested and declared length is 512, while the actual content is 1024 + (512, 512, 1024, false), + // The declared total length is 512, but it says and sends a range of 1024 + (512, 1024, 1024, false), + // We ignore the response range end header is lying, we're getting the 512 we ordered + (512, 1024, 512, true), + // Baseline + (1024, 512, 512, true), + // We requested 512, but we're getting 1024 + (1024, 512, 1024, false), + // We requested 512, but we're getting 1024 + (1024, 1024, 1024, false), + // We ignore the response range end header is lying, we're getting the 512 we ordered + (1024, 1024, 512, true), + ]; + for (head_content_length, range_header_length, range_actual_length, is_ok) in cases { + let url = spawn_mismatch_server( + head_content_length, + range_header_length, + range_actual_length, + ) + .await; + + let (mut reader, _) = AsyncHttpRangeReader::new( + Client::new(), + url, + CheckSupportMethod::Head, + HeaderMap::default(), + ) + .await + .unwrap(); + + assert_eq!(reader.len(), head_content_length as u64); + reader.prefetch(0..512).await; + + let mut buf = vec![0u8; 512]; + let result = reader.read(&mut buf).await; + if is_ok { + assert_matches!( + result, + Ok(_), + "{head_content_length} {range_header_length} {range_actual_length}" + ); + } else { + assert_matches!( + result, + Err(_), + "{head_content_length} {range_header_length} {range_actual_length}" + ); + } + } + } } From 785c99fe9a7a2f55643e83ed1315ea111bfe110d Mon Sep 17 00:00:00 2001 From: konsti Date: Mon, 9 Mar 2026 11:45:38 +0100 Subject: [PATCH 2/3] Update src/lib.rs Co-authored-by: Tomasz Kramkowski --- src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index c5b3194..ed18b1b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -427,7 +427,7 @@ async fn run_streamer( if let Some((response, start, end_exclusive)) = initial_tail_response { // Add the initial range to the state - state.requested_ranges.push(start..memory_map.len() as u64); + state.requested_ranges.push(start..end_exclusive); // Stream the initial data in memory if !stream_response( From 7d6c1dcfa4e9f26b5db97df36ed8ac8293e84c50 Mon Sep 17 00:00:00 2001 From: konstin Date: Mon, 9 Mar 2026 12:44:28 +0100 Subject: [PATCH 3/3] Review --- src/error.rs | 23 ++++++- src/lib.rs | 190 +++++++++++++++++++++++++++++++++++++++++---------- 2 files changed, 175 insertions(+), 38 deletions(-) diff --git a/src/error.rs b/src/error.rs index edae7c5..cac6204 100644 --- a/src/error.rs +++ b/src/error.rs @@ -35,9 +35,28 @@ pub enum AsyncHttpRangeReaderError { #[error("invalid Content-Range header: {0}")] ContentRangeParser(String), - /// The server returned fewer or more bytes than the range request asked for + /// The server returned an invalid range response + #[error( + "request and response range mismatch, \ + expected {expected_start}-{expected_end_inclusive}/{expected_complete_length}, \ + got {actual_start}-{actual_end_inclusive}/{actual_complete_length}" + )] + RangeMismatch { + expected_start: u64, + expected_end_inclusive: u64, + expected_complete_length: usize, + actual_start: u64, + actual_end_inclusive: u64, + actual_complete_length: u64, + }, + + /// The server returned more bytes than the range request asked for + #[error("range response returned more than the expected {expected} bytes")] + ResponseTooLong { expected: u64 }, + + /// The server returned fewer bytes than the range request asked for #[error("expected {expected} bytes from range response, got {actual}")] - ContentLengthMismatch { expected: u64, actual: u64 }, + ResponseTooShort { expected: u64, actual: u64 }, } impl From for AsyncHttpRangeReaderError { diff --git a/src/lib.rs b/src/lib.rs index ed18b1b..ba6e00c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -50,7 +50,7 @@ pub use error::AsyncHttpRangeReaderError; /// The general entrypoint is [`AsyncHttpRangeReader::new`]. Depending on the /// [`CheckSupportMethod`], this will either call [`AsyncHttpRangeReader::initial_tail_request`] or /// [`AsyncHttpRangeReader::initial_head_request`] to send the initial request and then -/// [`AsyncHttpRangeReader::from_tail_response`] or [`AsyncHttpRangeReader::from_head_response`] to +/// [`AsyncHttpRangeReader::from_range_response`] or [`AsyncHttpRangeReader::from_head_response`] to /// initialize the async reader. If you want to apply a caching layer, you can send the initial head /// (or tail) request yourself with your cache headers (e.g. through the /// [http-cache-semantics](https://docs.rs/http-cache-semantics) crate): @@ -158,7 +158,7 @@ impl AsyncHttpRangeReader { ) .await?; let response_headers = response.headers().clone(); - let self_ = Self::from_tail_response(client, response, url, extra_headers).await?; + let self_ = Self::from_range_response(client, response, url, extra_headers).await?; Ok((self_, response_headers)) } CheckSupportMethod::Head => { @@ -197,18 +197,28 @@ impl AsyncHttpRangeReader { Ok(tail_response) } - /// Initialize the reader from [`AsyncHttpRangeReader::initial_tail_request`] (or a user - /// provided range response) + #[deprecated(note = "use `from_range_response` instead")] pub async fn from_tail_response( client: impl Into, tail_request_response: Response, url: Url, extra_headers: HeaderMap, + ) -> Result { + Self::from_range_response(client, tail_request_response, url, extra_headers).await + } + + /// Initialize the reader from [`AsyncHttpRangeReader::initial_tail_request`] (or a user + /// provided range response) + pub async fn from_range_response( + client: impl Into, + response: Response, + url: Url, + extra_headers: HeaderMap, ) -> Result { let client = client.into(); // Get the size of the file from this initial request - let content_range_header = tail_request_response + let content_range_header = response .headers() .get(reqwest::header::CONTENT_RANGE) .ok_or(AsyncHttpRangeReaderError::ContentRangeMissing)? @@ -251,7 +261,7 @@ impl AsyncHttpRangeReader { client, url, extra_headers, - Some((tail_request_response, start, end_inclusive + 1)), + Some((response, start, end_inclusive + 1)), memory_map, state_tx, request_rx, @@ -300,7 +310,7 @@ impl AsyncHttpRangeReader { } /// Initialize the reader from [`AsyncHttpRangeReader::initial_head_request`] (or a user - /// provided response the) + /// provided response) pub async fn from_head_response( client: impl Into, head_response: Response, @@ -418,14 +428,14 @@ async fn run_streamer( client: reqwest_middleware::ClientWithMiddleware, url: Url, extra_headers: HeaderMap, - initial_tail_response: Option<(Response, u64, u64)>, + response: Option<(Response, u64, u64)>, mut memory_map: MmapMut, mut state_tx: Sender, mut request_rx: tokio::sync::mpsc::Receiver>, ) { let mut state = StreamerState::default(); - if let Some((response, start, end_exclusive)) = initial_tail_response { + if let Some((response, start, end_exclusive)) = response { // Add the initial range to the state state.requested_ranges.push(start..end_exclusive); @@ -487,6 +497,14 @@ async fn run_streamer( Ok(response) => response, }; + if let Err(err) = + validate_content_range(&response, *range.start(), *range.end(), memory_map.len()) + { + state.error = Some(err); + let _ = state_tx.send(state); + break 'outer; + } + // If the server returns a successful, but non-206 response (e.g., 200), then it // doesn't support range requests (even if the `Accept-Ranges` header is set). if response.status() != reqwest::StatusCode::PARTIAL_CONTENT { @@ -511,6 +529,47 @@ async fn run_streamer( } } +/// Ensure that the response range headers match the request range headers +fn validate_content_range( + response: &Response, + expected_start: u64, + expected_end_inclusive: u64, + expected_complete_length: usize, +) -> Result<(), AsyncHttpRangeReaderError> { + let content_range_header = response + .headers() + .get(reqwest::header::CONTENT_RANGE) + .ok_or(AsyncHttpRangeReaderError::ContentRangeMissing)? + .to_str() + .map_err(|_err| AsyncHttpRangeReaderError::ContentRangeMissing)?; + let content_range = ContentRange::parse(content_range_header).ok_or_else(|| { + AsyncHttpRangeReaderError::ContentRangeParser(content_range_header.to_string()) + })?; + let (actual_start, actual_end_inclusive, actual_complete_length) = match content_range { + ContentRange::Bytes(ContentRangeBytes { + first_byte, + last_byte, + complete_length, + }) => (first_byte, last_byte, complete_length), + _ => return Err(AsyncHttpRangeReaderError::HttpRangeRequestUnsupported), + }; + if expected_start != actual_start + || expected_end_inclusive != actual_end_inclusive + || expected_complete_length as u64 != actual_complete_length + { + return Err(AsyncHttpRangeReaderError::RangeMismatch { + expected_start, + expected_end_inclusive, + expected_complete_length, + actual_start, + actual_end_inclusive, + actual_complete_length, + }); + } + + Ok(()) +} + /// Streams the data from the specified response to the memory map updating progress in between. /// Returns `true` if everything went fine, `false` if anything went wrong. The error state, if any, /// is stored in `state_tx` so the "frontend" will consume it. @@ -552,9 +611,8 @@ async fn stream_response( // Prevent the server from sending more bytes than advertised in a response if offset > end_exclusive { - state.error = Some(AsyncHttpRangeReaderError::ContentLengthMismatch { + state.error = Some(AsyncHttpRangeReaderError::ResponseTooLong { expected: end_exclusive - start, - actual: offset - start, }); let _ = state_tx.send(state.clone()); return false; @@ -577,7 +635,7 @@ async fn stream_response( // Prevent the server from sending less bytes than advertised in a response if offset != end_exclusive { - state.error = Some(AsyncHttpRangeReaderError::ContentLengthMismatch { + state.error = Some(AsyncHttpRangeReaderError::ResponseTooShort { expected: end_exclusive - start, actual: offset - start, }); @@ -955,25 +1013,83 @@ mod test { /// the memory map. #[tokio::test] async fn test_content_length_response_beyond_content_length() { - let cases = [ + /// Extract the [`AsyncHttpRangeReaderError`] from an `io::Error` returned by `read`. + fn into_range_error(err: std::io::Error) -> AsyncHttpRangeReaderError { + err.into_inner() + .unwrap() + .downcast::() + .map(|e| *e) + .unwrap() + } + + let cases: Vec<(usize, usize, usize, Option)> = vec![ // Baseline - (512, 512, 512, true), + (512, 512, 512, None), // The requested and declared length is 512, while the actual content is 1024 - (512, 512, 1024, false), + ( + 512, + 512, + 1024, + Some(AsyncHttpRangeReaderError::ResponseTooLong { expected: 512 }), + ), // The declared total length is 512, but it says and sends a range of 1024 - (512, 1024, 1024, false), - // We ignore the response range end header is lying, we're getting the 512 we ordered - (512, 1024, 512, true), + ( + 512, + 1024, + 1024, + Some(AsyncHttpRangeReaderError::ContentRangeParser( + "bytes 0-1023/512".to_string(), + )), + ), + // The declared total length is 512, but it says a range of 1024 + ( + 512, + 1024, + 512, + Some(AsyncHttpRangeReaderError::ContentRangeParser( + "bytes 0-1023/512".to_string(), + )), + ), // Baseline - (1024, 512, 512, true), + (1024, 512, 512, None), // We requested 512, but we're getting 1024 - (1024, 512, 1024, false), + ( + 1024, + 512, + 1024, + Some(AsyncHttpRangeReaderError::ResponseTooLong { expected: 512 }), + ), // We requested 512, but we're getting 1024 - (1024, 1024, 1024, false), - // We ignore the response range end header is lying, we're getting the 512 we ordered - (1024, 1024, 512, true), + ( + 1024, + 1024, + 1024, + Some(AsyncHttpRangeReaderError::RangeMismatch { + expected_start: 0, + expected_end_inclusive: 511, + expected_complete_length: 1024, + actual_start: 0, + actual_end_inclusive: 1023, + actual_complete_length: 1024, + }), + ), + // We requested 512, but the header says 1024 + ( + 1024, + 1024, + 512, + Some(AsyncHttpRangeReaderError::RangeMismatch { + expected_start: 0, + expected_end_inclusive: 511, + expected_complete_length: 1024, + actual_start: 0, + actual_end_inclusive: 1023, + actual_complete_length: 1024, + }), + ), ]; - for (head_content_length, range_header_length, range_actual_length, is_ok) in cases { + for (head_content_length, range_header_length, range_actual_length, expected_error) in cases + { let url = spawn_mismatch_server( head_content_length, range_header_length, @@ -995,18 +1111,20 @@ mod test { let mut buf = vec![0u8; 512]; let result = reader.read(&mut buf).await; - if is_ok { - assert_matches!( - result, - Ok(_), - "{head_content_length} {range_header_length} {range_actual_length}" - ); - } else { - assert_matches!( - result, - Err(_), - "{head_content_length} {range_header_length} {range_actual_length}" - ); + let label = + format!("{head_content_length} {range_header_length} {range_actual_length}"); + match expected_error { + None => { + assert_matches!(result, Ok(_), "{label}"); + } + Some(expected) => { + // The nested error don't support `PartialEq` + assert_eq!( + into_range_error(result.unwrap_err()).to_string(), + expected.to_string(), + "{label}" + ); + } } } }