From bc2790dbe36326b9e696b60d36d420554631cae3 Mon Sep 17 00:00:00 2001 From: Lilith River Date: Wed, 6 May 2026 04:58:42 -0600 Subject: [PATCH 1/4] feat(limits): add ResourceLimits::for_untrusted_input safer-defaults helper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce ResourceLimits::for_untrusted_input() (alias safe_default()) with sane caps for processing untrusted input: 100 MP per frame, 200 MP total, 16384×16384 max dims, 1 GiB memory, 256 MiB input, 65536 frames, 1h duration. ResourceLimits::default() continues to return all-None (no limits) for backwards compatibility — the new helper is the recommended starting point for services accepting bytes from the network or end users. Adds 6 tests verifying the caps reject oversized input, accept typical 4K/12 MP images, and that default() behavior is unchanged. --- src/limits.rs | 127 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 127 insertions(+) diff --git a/src/limits.rs b/src/limits.rs index 2a10847..cc62e63 100644 --- a/src/limits.rs +++ b/src/limits.rs @@ -271,6 +271,63 @@ impl ResourceLimits { Self::default() } + /// Safer default caps for processing **untrusted input**. + /// + /// [`ResourceLimits::default()`] has every field `None` (no limits) for + /// backwards compatibility — that is fine for trusted, controlled input + /// but is **resource-management DoS by default** when feeding bytes + /// from the network or end users. Prefer this helper when limits are + /// not explicitly tuned by the caller. + /// + /// Caps applied (chosen conservatively for typical web image workloads): + /// - `max_pixels`: 100 MP per frame (covers e.g. 12000 × 8000 stills) + /// - `max_total_pixels`: 200 MP across all frames of an animation + /// - `max_width` / `max_height`: 16384 each (typical decoder hardware ceiling) + /// - `max_memory_bytes`: 1 GiB + /// - `max_input_bytes`: 256 MiB + /// - `max_frames`: 65 536 + /// - `max_animation_ms`: 1 hour + /// + /// Threading is left at the default ([`ThreadingPolicy::Parallel`]). + /// + /// These are intentionally **generous** — large enough that legitimate + /// inputs are not rejected, small enough that an adversarial input + /// cannot consume the whole machine. Tighten further for your specific + /// workload (e.g. a thumbnail server may want `max_pixels = 4_000_000`). + /// + /// # Example + /// + /// ``` + /// use zencodec::ResourceLimits; + /// + /// // Recommended starting point for a public image-decode service. + /// let limits = ResourceLimits::for_untrusted_input(); + /// assert!(limits.max_pixels.is_some()); + /// assert!(limits.max_input_bytes.is_some()); + /// ``` + pub fn for_untrusted_input() -> Self { + Self { + max_pixels: Some(100_000_000), + max_total_pixels: Some(200_000_000), + max_width: Some(16384), + max_height: Some(16384), + max_memory_bytes: Some(1024 * 1024 * 1024), + max_input_bytes: Some(256 * 1024 * 1024), + max_output_bytes: None, + max_frames: Some(65_536), + max_animation_ms: Some(60 * 60 * 1000), + threading: ThreadingPolicy::Parallel, + } + } + + /// Alias for [`for_untrusted_input`](Self::for_untrusted_input). + /// + /// Provided for callers who prefer the `safe_default` naming convention + /// (mirrors the pattern used in some other crates). + pub fn safe_default() -> Self { + Self::for_untrusted_input() + } + /// Set maximum total pixels. pub fn with_max_pixels(mut self, max: u64) -> Self { self.max_pixels = Some(max); @@ -940,6 +997,76 @@ mod tests { ); } + #[test] + fn for_untrusted_input_has_caps() { + let limits = ResourceLimits::for_untrusted_input(); + assert!(limits.has_any()); + assert!(limits.max_pixels.is_some()); + assert!(limits.max_total_pixels.is_some()); + assert!(limits.max_width.is_some()); + assert!(limits.max_height.is_some()); + assert!(limits.max_memory_bytes.is_some()); + assert!(limits.max_input_bytes.is_some()); + assert!(limits.max_frames.is_some()); + assert!(limits.max_animation_ms.is_some()); + } + + #[test] + fn for_untrusted_input_rejects_oversized_image() { + use crate::{ImageFormat, ImageInfo}; + let limits = ResourceLimits::for_untrusted_input(); + // 30000×30000 = 900 MP, far above the 100 MP per-frame cap. + let info = ImageInfo::new(30000, 30000, ImageFormat::Jpeg); + let err = limits.check_image_info(&info).unwrap_err(); + // Width is the first cap we trip (16384 < 30000). + assert!(matches!(err, LimitExceeded::Width { .. })); + + // Smaller width but still huge pixel count. + let info = ImageInfo::new(10000, 12000, ImageFormat::Jpeg); + let err = limits.check_image_info(&info).unwrap_err(); + assert!(matches!(err, LimitExceeded::Pixels { .. })); + } + + #[test] + fn for_untrusted_input_accepts_typical_image() { + use crate::{ImageFormat, ImageInfo}; + let limits = ResourceLimits::for_untrusted_input(); + // 4K image — should pass. + let info = ImageInfo::new(3840, 2160, ImageFormat::Jpeg); + assert!(limits.check_image_info(&info).is_ok()); + // 12 MP photo — should pass. + let info = ImageInfo::new(4000, 3000, ImageFormat::Jpeg); + assert!(limits.check_image_info(&info).is_ok()); + } + + #[test] + fn for_untrusted_input_rejects_oversized_input() { + let limits = ResourceLimits::for_untrusted_input(); + // 1 GiB input is definitely too big. + assert!(limits.check_input_size(1024 * 1024 * 1024).is_err()); + // 16 MiB input is fine. + assert!(limits.check_input_size(16 * 1024 * 1024).is_ok()); + } + + #[test] + fn safe_default_alias_matches_for_untrusted_input() { + assert_eq!( + ResourceLimits::safe_default(), + ResourceLimits::for_untrusted_input() + ); + } + + #[test] + fn default_remains_no_limits_for_backwards_compat() { + // Per the crate's stability guarantee, ResourceLimits::default() + // continues to mean "no limits" — switching to safer caps is + // opt-in via for_untrusted_input(). + let limits = ResourceLimits::default(); + assert!(!limits.has_any()); + assert!(limits.max_pixels.is_none()); + assert!(limits.max_input_bytes.is_none()); + } + #[test] fn total_pixels_display() { use alloc::format; From 468073d22bc1239b1b4b3e12e7cfd083097315f9 Mon Sep 17 00:00:00 2001 From: Lilith River Date: Wed, 6 May 2026 05:00:36 -0600 Subject: [PATCH 2/4] docs(policy): promote DecodePolicy::strict for untrusted input --- src/policy.rs | 34 +++++++++++++++++++++++++++++++--- 1 file changed, 31 insertions(+), 3 deletions(-) diff --git a/src/policy.rs b/src/policy.rs index 9d34d50..c29d907 100644 --- a/src/policy.rs +++ b/src/policy.rs @@ -4,13 +4,32 @@ //! All fields default to `None`, meaning the codec uses its own default. //! `Some(true)` explicitly allows; `Some(false)` explicitly denies. //! +//! # Choosing a starting point +//! +//! For **untrusted input** (network bytes, user uploads, third-party data), +//! prefer [`DecodePolicy::strict()`] as the starting point and selectively +//! re-enable features the application actually needs. This is the +//! recommended default for any service that processes bytes it did not +//! produce itself. Pair this with +//! [`ResourceLimits::for_untrusted_input`](crate::ResourceLimits::for_untrusted_input) +//! for resource caps. +//! +//! For **trusted input** (your own pipeline, internal tools), use +//! [`DecodePolicy::none()`] (all defaults) or [`DecodePolicy::permissive()`] +//! to keep all features available. +//! //! # Named levels //! +//! - [`DecodePolicy::strict()`] — **recommended for untrusted input.** +//! Minimal attack surface: no ICC/EXIF/XMP extraction, no progressive, +//! no animation, strict spec parsing, no truncated input. //! - [`DecodePolicy::none()`] / [`EncodePolicy::none()`] — all defaults -//! - [`DecodePolicy::strict()`] — minimal attack surface (no metadata, no progressive, no animation) -//! - [`DecodePolicy::permissive()`] — allow everything +//! (each codec picks its own behavior). +//! - [`DecodePolicy::permissive()`] — allow everything (use only for +//! trusted input). //! -//! Individual flags can be overridden after constructing a named level. +//! Individual flags can be overridden after constructing a named level +//! — e.g. `DecodePolicy::strict().with_allow_icc(true)` for strict-but-with-color. /// Decode security policy. /// @@ -18,6 +37,15 @@ /// untrusted input. Codecs check these flags and skip or reject /// accordingly; unrecognized flags are ignored. /// +/// # Recommended: start strict for untrusted input +/// +/// When decoding bytes from the network, end users, or any third-party +/// source, use [`DecodePolicy::strict()`] as the starting point and +/// selectively enable the features the application actually needs. +/// `DecodePolicy::default()` returns [`DecodePolicy::none()`] (all +/// `None` — each codec's own default applies) for backwards compatibility, +/// but this is **not** the safest choice for untrusted input. +/// /// # Example /// /// ``` From 141238f7a7b275c977073516a4232d97ceffeb61 Mon Sep 17 00:00:00 2001 From: Lilith River Date: Wed, 6 May 2026 05:00:36 -0600 Subject: [PATCH 3/4] refactor(metadata): delegate parse_exif_orientation to helpers::exif MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The metadata.rs orientation parser was a looser duplicate of helpers/exif.rs — it only read u16 at +8 regardless of TIFF type field, missing TIFF_LONG (type 4) values that store data in different bytes for big-endian, and it lacked the IFD entry-count cap and tag-sort early exit that helpers/exif.rs provides as DoS protection. Have metadata::parse_exif_orientation delegate to the canonical impl in helpers/exif.rs. Adds two tests verifying TIFF_LONG type is now handled correctly (BE and LE) — the previous parser silently missed those values. --- src/metadata.rs | 91 ++++++++++++++++++++++++++----------------------- 1 file changed, 48 insertions(+), 43 deletions(-) diff --git a/src/metadata.rs b/src/metadata.rs index 2ea3b15..00d9d57 100644 --- a/src/metadata.rs +++ b/src/metadata.rs @@ -153,51 +153,17 @@ impl From<&crate::ImageInfo> for Metadata { /// Parse the EXIF Orientation tag (0x0112) from a TIFF/EXIF blob. /// +/// Delegates to the canonical implementation in +/// [`helpers::parse_exif_orientation`](crate::helpers::parse_exif_orientation), +/// which performs full bounds-checking, supports both `SHORT` and `LONG` +/// TIFF types, validates the TIFF magic, and caps IFD entry count to +/// prevent DoS from malformed data. +/// /// Handles both little-endian (`II*\0`) and big-endian (`MM\0*`) byte -/// orders. Walks IFD0 and returns the first Orientation entry found. -/// Returns `None` if the blob is malformed or no Orientation tag exists. +/// orders. Returns `None` if the blob is malformed or no Orientation +/// tag exists. fn parse_exif_orientation(blob: &[u8]) -> Option { - if blob.len() < 8 { - return None; - } - let little_endian = match &blob[0..4] { - [b'I', b'I', 0x2a, 0x00] => true, - [b'M', b'M', 0x00, 0x2a] => false, - _ => return None, - }; - let read_u16 = |offset: usize| -> Option { - let bytes = blob.get(offset..offset + 2)?; - Some(if little_endian { - u16::from_le_bytes([bytes[0], bytes[1]]) - } else { - u16::from_be_bytes([bytes[0], bytes[1]]) - }) - }; - let read_u32 = |offset: usize| -> Option { - let bytes = blob.get(offset..offset + 4)?; - Some(if little_endian { - u32::from_le_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]) - } else { - u32::from_be_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]) - }) - }; - - let ifd0_offset = read_u32(4)? as usize; - let entry_count = read_u16(ifd0_offset)? as usize; - let entries_start = ifd0_offset.checked_add(2)?; - - for i in 0..entry_count { - let entry_offset = entries_start.checked_add(i.checked_mul(12)?)?; - let tag = read_u16(entry_offset)?; - if tag == 0x0112 { - // Orientation tag: type SHORT (3), count 1, value inline at +8. - let value = read_u16(entry_offset + 8)?; - if value > 0 && value <= 8 { - return Orientation::from_exif(value as u8); - } - } - } - None + crate::helpers::parse_exif_orientation(blob) } #[cfg(test)] @@ -372,6 +338,45 @@ mod tests { assert_eq!(meta.orientation, Orientation::Rotate270); } + /// Build TIFF with the orientation tag stored as TIFF_LONG (type 4) + /// instead of SHORT (type 3). The previous loose parser in this file + /// only read u16 at +8 regardless of type, so for big-endian LONG it + /// would read the high zero bytes and miss the value. The delegated + /// helper handles both types correctly. + fn build_exif_with_long_orientation(value: u32, big_endian: bool) -> alloc::vec::Vec { + let mut v = alloc::vec::Vec::new(); + if big_endian { + v.extend_from_slice(b"MM\x00\x2a"); + v.extend_from_slice(&8u32.to_be_bytes()); + v.extend_from_slice(&1u16.to_be_bytes()); + v.extend_from_slice(&0x0112u16.to_be_bytes()); + v.extend_from_slice(&4u16.to_be_bytes()); // type = LONG + v.extend_from_slice(&1u32.to_be_bytes()); + v.extend_from_slice(&value.to_be_bytes()); + } else { + v.extend_from_slice(b"II\x2a\x00"); + v.extend_from_slice(&8u32.to_le_bytes()); + v.extend_from_slice(&1u16.to_le_bytes()); + v.extend_from_slice(&0x0112u16.to_le_bytes()); + v.extend_from_slice(&4u16.to_le_bytes()); // type = LONG + v.extend_from_slice(&1u32.to_le_bytes()); + v.extend_from_slice(&value.to_le_bytes()); + } + v + } + + #[test] + fn parse_exif_orientation_accepts_long_type_be() { + let blob = build_exif_with_long_orientation(6, true); + assert_eq!(parse_exif_orientation(&blob), Some(Orientation::Rotate90)); + } + + #[test] + fn parse_exif_orientation_accepts_long_type_le() { + let blob = build_exif_with_long_orientation(8, false); + assert_eq!(parse_exif_orientation(&blob), Some(Orientation::Rotate270)); + } + #[test] fn with_exif_does_not_override_explicit_orientation() { let blob = build_minimal_exif_with_orientation(6, false); From 82673833f3ec90190b1e2aba723594f50d77b050 Mon Sep 17 00:00:00 2001 From: Lilith River Date: Wed, 6 May 2026 05:01:45 -0600 Subject: [PATCH 4/4] fix(traits): debug_assert dyn-job setters not called after consumption MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The DynDecodeJob and DynEncodeJob shims silently no-op'd if a setter was called after the inner job was moved out by an into_* method. This is unreachable through the public API (every into_* method takes Box), but the silent fallback masked the bug if the path were ever reached through internal misuse or a future refactor. Consolidate the setter bodies through a try_apply helper that fires debug_assert! when the inner job is missing, so any future regression that exposes the consumed-after path fails loudly in tests and dev builds. Release-build behaviour is unchanged (silent no-op preserved) to avoid any chance of a panic in production code. The DynDecodeJob/DynEncodeJob trait signatures are unchanged — these remain infallible setters for backwards compatibility per the audit's guidance to avoid breaking changes to public types in this foundation crate. --- CHANGELOG.md | 29 +++++++++++++++++++++ src/traits/dyn_decoding.rs | 53 ++++++++++++++++++++------------------ src/traits/dyn_encoding.rs | 45 ++++++++++++++++++-------------- 3 files changed, 82 insertions(+), 45 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ca5d4e1..5321904 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,35 @@ All notable changes to zencodec are documented here. ## [Unreleased] +### Added + +- `ResourceLimits::for_untrusted_input()` (with `safe_default()` alias) — a + safer starting point than `ResourceLimits::default()` for services + accepting bytes from the network or end users. Caps: 100 MP per frame, + 200 MP across an animation, 16384×16384 max dims, 1 GiB memory, 256 MiB + input, 65536 frames, 1 hour duration. `ResourceLimits::default()` + continues to mean "no limits" for backwards compatibility (bc2790d). + +### Changed + +- `metadata::parse_exif_orientation` now delegates to the canonical + `helpers::parse_exif_orientation`. The previous local implementation was + a looser duplicate that read the orientation value as `u16` regardless + of TIFF type, missing `TIFF_LONG` (type 4) values for big-endian inputs + and lacking the IFD entry-count cap and tag-sort early-exit DoS + protections present in the helper (141238f). +- `DynDecodeJob` and `DynEncodeJob` shim setters now `debug_assert!` when + called after the inner job has been consumed by an `into_*` method, + catching the (structurally unreachable) misuse path loudly in tests and + dev builds. Release behaviour is unchanged (silent no-op). Trait + signatures are unchanged (a5b782e). + +### Documentation + +- Module-level docs in `policy.rs` now recommend `DecodePolicy::strict()` + as the starting point for untrusted input, paired with + `ResourceLimits::for_untrusted_input` (468073d). + ## [0.1.20] - 2026-04-21 ### Added diff --git a/src/traits/dyn_decoding.rs b/src/traits/dyn_decoding.rs index dfe61a0..97ae552 100644 --- a/src/traits/dyn_decoding.rs +++ b/src/traits/dyn_decoding.rs @@ -278,15 +278,32 @@ impl DecodeJobShim { .ok_or_else(|| "DecodeJobShim: job already consumed (double take)".into()) } - fn put(&mut self, job: J) { - self.0 = Some(job); - } - fn as_ref(&self) -> Result<&J, BoxedError> { self.0 .as_ref() .ok_or_else(|| "DecodeJobShim: job already consumed (double take)".into()) } + + /// Apply `f` to the inner job if it has not been consumed. + /// + /// `DynDecodeJob` setters return `()` for ergonomics and backwards + /// compatibility, so a setter call after the inner job has been + /// consumed by an `into_*` method has no return path. We + /// `debug_assert!` here so the misuse fires in tests and dev builds; + /// in release the call still silently no-ops, but any subsequent + /// `into_*` call will return the "job already consumed" error so the + /// problem surfaces at the next observable boundary. + fn try_apply J>(&mut self, f: F) { + match self.0.take() { + Some(job) => self.0 = Some(f(job)), + None => { + debug_assert!( + false, + "DynDecodeJob setter called after the inner job was consumed by an into_* method; the call has no effect" + ); + } + } + } } impl<'a, J> DynDecodeJob<'a> for DecodeJobShim @@ -296,21 +313,15 @@ where J::AnimationFrameDec: Send, { fn set_stop(&mut self, stop: StopToken) { - if let Ok(job) = self.take() { - self.put(job.with_stop(stop)); - } + self.try_apply(|job| job.with_stop(stop)); } fn set_limits(&mut self, limits: ResourceLimits) { - if let Ok(job) = self.take() { - self.put(job.with_limits(limits)); - } + self.try_apply(|job| job.with_limits(limits)); } fn set_policy(&mut self, policy: crate::DecodePolicy) { - if let Ok(job) = self.take() { - self.put(job.with_policy(policy)); - } + self.try_apply(|job| job.with_policy(policy)); } fn probe(&self, data: &[u8]) -> Result { @@ -326,27 +337,19 @@ where } fn set_crop_hint(&mut self, x: u32, y: u32, width: u32, height: u32) { - if let Ok(job) = self.take() { - self.put(job.with_crop_hint(x, y, width, height)); - } + self.try_apply(|job| job.with_crop_hint(x, y, width, height)); } fn set_orientation(&mut self, hint: OrientationHint) { - if let Ok(job) = self.take() { - self.put(job.with_orientation(hint)); - } + self.try_apply(|job| job.with_orientation(hint)); } fn set_start_frame_index(&mut self, index: u32) { - if let Ok(job) = self.take() { - self.put(job.with_start_frame_index(index)); - } + self.try_apply(|job| job.with_start_frame_index(index)); } fn set_extract_gain_map(&mut self, extract: bool) { - if let Ok(job) = self.take() { - self.put(job.with_extract_gain_map(extract)); - } + self.try_apply(|job| job.with_extract_gain_map(extract)); } fn extensions(&self) -> Option<&dyn Any> { diff --git a/src/traits/dyn_encoding.rs b/src/traits/dyn_encoding.rs index da8f454..40bb89d 100644 --- a/src/traits/dyn_encoding.rs +++ b/src/traits/dyn_encoding.rs @@ -261,8 +261,25 @@ impl EncodeJobShim { .ok_or_else(|| "EncodeJobShim: job already consumed (double take)".into()) } - fn put(&mut self, job: J) { - self.0 = Some(job); + /// Apply `f` to the inner job if it has not been consumed. + /// + /// `DynEncodeJob` setters return `()` for ergonomics and backwards + /// compatibility, so a setter call after the inner job has been + /// consumed by an `into_*` method has no return path. We + /// `debug_assert!` here so the misuse fires in tests and dev builds; + /// in release the call still silently no-ops, but any subsequent + /// `into_*` call will return the "job already consumed" error so the + /// problem surfaces at the next observable boundary. + fn try_apply J>(&mut self, f: F) { + match self.0.take() { + Some(job) => self.0 = Some(f(job)), + None => { + debug_assert!( + false, + "DynEncodeJob setter called after the inner job was consumed by an into_* method; the call has no effect" + ); + } + } } } @@ -273,39 +290,27 @@ where J::AnimationFrameEnc: AnimationFrameEncoder, { fn set_stop(&mut self, stop: StopToken) { - if let Ok(job) = self.take() { - self.put(job.with_stop(stop)); - } + self.try_apply(|job| job.with_stop(stop)); } fn set_limits(&mut self, limits: ResourceLimits) { - if let Ok(job) = self.take() { - self.put(job.with_limits(limits)); - } + self.try_apply(|job| job.with_limits(limits)); } fn set_policy(&mut self, policy: crate::EncodePolicy) { - if let Ok(job) = self.take() { - self.put(job.with_policy(policy)); - } + self.try_apply(|job| job.with_policy(policy)); } fn set_metadata(&mut self, meta: Metadata) { - if let Ok(job) = self.take() { - self.put(job.with_metadata(meta)); - } + self.try_apply(|job| job.with_metadata(meta)); } fn set_canvas_size(&mut self, width: u32, height: u32) { - if let Ok(job) = self.take() { - self.put(job.with_canvas_size(width, height)); - } + self.try_apply(|job| job.with_canvas_size(width, height)); } fn set_loop_count(&mut self, count: Option) { - if let Ok(job) = self.take() { - self.put(job.with_loop_count(count)); - } + self.try_apply(|job| job.with_loop_count(count)); } fn extensions(&self) -> Option<&dyn Any> {