From 58f6364fb7a1963cc3ad48f8e2cfb7d22d018d97 Mon Sep 17 00:00:00 2001 From: Martin Risell Lilja Date: Wed, 8 Oct 2025 23:24:28 +0200 Subject: [PATCH 1/2] Adds option to ignore EOF errors in certain situations when deserializing. --- Cargo.toml | 2 + src/de.rs | 137 +++++++++++++++++++++++++++++-- src/read.rs | 208 ++++++++++++++++++++++++++++++++++++----------- tests/partial.rs | 70 ++++++++++++++++ 4 files changed, 361 insertions(+), 56 deletions(-) create mode 100644 tests/partial.rs diff --git a/Cargo.toml b/Cargo.toml index aa8e4d87f..78e72883d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -93,3 +93,5 @@ raw_value = [] # overflow the stack after deserialization has completed, including, but not # limited to, Display and Debug and Drop impls. unbounded_depth = [] + +partial_parsing = [] diff --git a/src/de.rs b/src/de.rs index 4080c54ac..80161edd6 100644 --- a/src/de.rs +++ b/src/de.rs @@ -36,6 +36,12 @@ pub struct Deserializer { single_precision: bool, #[cfg(feature = "unbounded_depth")] disable_recursion_limit: bool, + #[cfg(feature = "partial_parsing")] + allow_partial_list: bool, + #[cfg(feature = "partial_parsing")] + allow_partial_object: bool, + #[cfg(feature = "partial_parsing")] + allow_partial_string: bool, } impl<'de, R> Deserializer @@ -65,6 +71,12 @@ where single_precision: false, #[cfg(feature = "unbounded_depth")] disable_recursion_limit: false, + #[cfg(feature = "partial_parsing")] + allow_partial_list: false, + #[cfg(feature = "partial_parsing")] + allow_partial_object: false, + #[cfg(feature = "partial_parsing")] + allow_partial_string: false, } } } @@ -216,6 +228,77 @@ impl<'de, R: Read<'de>> Deserializer { self.disable_recursion_limit = true; } + /// Allows lists to be partial without resulting in an EOF error. + /// + /// # Examples + /// + /// ``` + /// use serde::Deserialize; + /// use serde_json::{Value, json}; + /// + /// fn main() { + /// let json = r#"["test", "list""#; + /// + /// let mut deserializer = serde_json::Deserializer::from_str(&json); + /// deserializer.allow_partial_list(); + /// let value = Value::deserialize(&mut deserializer).unwrap(); + /// assert_eq!(value, json!(["test", "list"])); + /// } + /// ``` + #[cfg(feature = "partial_parsing")] + #[cfg_attr(docsrs, doc(cfg(feature = "partial_parsing")))] + pub fn allow_partial_list(&mut self) { + self.allow_partial_list = true; + } + + /// Allows objects to be partial without resulting in an EOF error. + /// + /// # Examples + /// + /// ``` + /// use serde::Deserialize; + /// use serde_json::{Value, json}; + /// + /// fn main() { + /// let json = r#"{"test": "value""#; + /// + /// let mut deserializer = serde_json::Deserializer::from_str(&json); + /// deserializer.allow_partial_object(); + /// let value = Value::deserialize(&mut deserializer).unwrap(); + /// assert_eq!(value, json!({"test": "value"})); + /// } + /// ``` + #[cfg(feature = "partial_parsing")] + #[cfg_attr(docsrs, doc(cfg(feature = "partial_parsing")))] + pub fn allow_partial_object(&mut self) { + self.allow_partial_object = true; + } + + /// Allows strings to be partial without resulting in an EOF error. + /// + /// # Examples + /// + /// ``` + /// use serde::Deserialize; + /// use serde_json::{Value, json}; + /// + /// fn main() { + /// // Note that the quote is part of Rust's syntax not of our JSON data. + /// let json = r#"{"test": "value"#; + /// + /// let mut deserializer = serde_json::Deserializer::from_str(&json); + /// deserializer.allow_partial_object(); + /// deserializer.allow_partial_string(); + /// let value = Value::deserialize(&mut deserializer).unwrap(); + /// assert_eq!(value, json!({"test": "value"})); + /// } + /// ``` + #[cfg(feature = "partial_parsing")] + #[cfg_attr(docsrs, doc(cfg(feature = "partial_parsing")))] + pub fn allow_partial_string(&mut self) { + self.allow_partial_string = true; + } + pub(crate) fn peek(&mut self) -> Result> { self.read.peek() } @@ -303,7 +386,11 @@ impl<'de, R: Read<'de>> Deserializer { b'"' => { self.eat_char(); self.scratch.clear(); - match self.read.parse_str(&mut self.scratch) { + match self.read.parse_str( + &mut self.scratch, + #[cfg(feature = "partial_parsing")] + self.allow_partial_string, + ) { Ok(s) => de::Error::invalid_type(Unexpected::Str(&s), exp), Err(err) => return err, } @@ -1081,6 +1168,8 @@ impl<'de, R: Read<'de>> Deserializer { } } Some(_) => Err(self.peek_error(ErrorCode::TrailingCharacters)), + #[cfg(feature = "partial_parsing")] + None if self.allow_partial_list => Ok(()), None => Err(self.peek_error(ErrorCode::EofWhileParsingList)), } } @@ -1093,6 +1182,8 @@ impl<'de, R: Read<'de>> Deserializer { } Some(b',') => Err(self.peek_error(ErrorCode::TrailingComma)), Some(_) => Err(self.peek_error(ErrorCode::TrailingCharacters)), + #[cfg(feature = "partial_parsing")] + None if self.allow_partial_object => Ok(()), None => Err(self.peek_error(ErrorCode::EofWhileParsingObject)), } } @@ -1136,7 +1227,10 @@ impl<'de, R: Read<'de>> Deserializer { } b'"' => { self.eat_char(); - tri!(self.read.ignore_str()); + tri!(self.read.ignore_str( + #[cfg(feature = "partial_parsing")] + self.allow_partial_string + )); None } frame @ (b'[' | b'{') => { @@ -1200,7 +1294,10 @@ impl<'de, R: Read<'de>> Deserializer { Some(_) => return Err(self.peek_error(ErrorCode::KeyMustBeAString)), None => return Err(self.peek_error(ErrorCode::EofWhileParsingObject)), } - tri!(self.read.ignore_str()); + tri!(self.read.ignore_str( + #[cfg(feature = "partial_parsing")] + self.allow_partial_string + )); match tri!(self.parse_whitespace()) { Some(b':') => self.eat_char(), Some(_) => return Err(self.peek_error(ErrorCode::ExpectedColon)), @@ -1423,7 +1520,11 @@ impl<'de, R: Read<'de>> de::Deserializer<'de> for &mut Deserializer { b'"' => { self.eat_char(); self.scratch.clear(); - match tri!(self.read.parse_str(&mut self.scratch)) { + match tri!(self.read.parse_str( + &mut self.scratch, + #[cfg(feature = "partial_parsing")] + self.allow_partial_string + )) { Reference::Borrowed(s) => visitor.visit_borrowed_str(s), Reference::Copied(s) => visitor.visit_str(s), } @@ -1534,7 +1635,11 @@ impl<'de, R: Read<'de>> de::Deserializer<'de> for &mut Deserializer { b'"' => { self.eat_char(); self.scratch.clear(); - match tri!(self.read.parse_str(&mut self.scratch)) { + match tri!(self.read.parse_str( + &mut self.scratch, + #[cfg(feature = "partial_parsing")] + self.allow_partial_string + )) { Reference::Borrowed(s) => visitor.visit_borrowed_str(s), Reference::Copied(s) => visitor.visit_str(s), } @@ -1643,7 +1748,11 @@ impl<'de, R: Read<'de>> de::Deserializer<'de> for &mut Deserializer { b'"' => { self.eat_char(); self.scratch.clear(); - match tri!(self.read.parse_str_raw(&mut self.scratch)) { + match tri!(self.read.parse_str_raw( + &mut self.scratch, + #[cfg(feature = "partial_parsing")] + self.allow_partial_string + )) { Reference::Borrowed(b) => visitor.visit_borrowed_bytes(b), Reference::Copied(b) => visitor.visit_bytes(b), } @@ -1937,6 +2046,8 @@ impl<'de, 'a, R: Read<'de> + 'a> de::SeqAccess<'de> for SeqAccess<'a, R> { ) -> Result { let peek = match tri!(seq.de.parse_whitespace()) { Some(b) => b, + #[cfg(feature = "partial_parsing")] + None if seq.de.allow_partial_list => return Ok(false), None => { return Err(seq.de.peek_error(ErrorCode::EofWhileParsingList)); } @@ -1988,6 +2099,8 @@ impl<'de, 'a, R: Read<'de> + 'a> de::MapAccess<'de> for MapAccess<'a, R> { fn has_next_key<'de, 'a, R: Read<'de> + 'a>(map: &mut MapAccess<'a, R>) -> Result { let peek = match tri!(map.de.parse_whitespace()) { Some(b) => b, + #[cfg(feature = "partial_parsing")] + None if map.de.allow_partial_object => return Ok(false), None => { return Err(map.de.peek_error(ErrorCode::EofWhileParsingObject)); } @@ -2206,7 +2319,11 @@ where { self.de.eat_char(); self.de.scratch.clear(); - match tri!(self.de.read.parse_str(&mut self.de.scratch)) { + match tri!(self.de.read.parse_str( + &mut self.de.scratch, + #[cfg(feature = "partial_parsing")] + self.de.allow_partial_string + )) { Reference::Borrowed(s) => visitor.visit_borrowed_str(s), Reference::Copied(s) => visitor.visit_str(s), } @@ -2252,7 +2369,11 @@ where } _ => { self.de.scratch.clear(); - let s = tri!(self.de.read.parse_str(&mut self.de.scratch)); + let s = tri!(self.de.read.parse_str( + &mut self.de.scratch, + #[cfg(feature = "partial_parsing")] + self.de.allow_partial_string + )); Err(de::Error::invalid_type(Unexpected::Str(&s), &visitor)) } }; diff --git a/src/read.rs b/src/read.rs index f90d9f74a..7851248d9 100644 --- a/src/read.rs +++ b/src/read.rs @@ -64,7 +64,11 @@ pub trait Read<'de>: private::Sealed { /// string until the next quotation mark using the given scratch space if /// necessary. The scratch space is initially empty. #[doc(hidden)] - fn parse_str<'s>(&'s mut self, scratch: &'s mut Vec) -> Result>; + fn parse_str<'s>( + &'s mut self, + scratch: &'s mut Vec, + #[cfg(feature = "partial_parsing")] allow_partial: bool, + ) -> Result>; /// Assumes the previous byte was a quotation mark. Parses a JSON-escaped /// string until the next quotation mark using the given scratch space if @@ -76,12 +80,14 @@ pub trait Read<'de>: private::Sealed { fn parse_str_raw<'s>( &'s mut self, scratch: &'s mut Vec, + #[cfg(feature = "partial_parsing")] allow_partial: bool, ) -> Result>; /// Assumes the previous byte was a quotation mark. Parses a JSON-escaped /// string until the next quotation mark but discards the data. #[doc(hidden)] - fn ignore_str(&mut self) -> Result<()>; + fn ignore_str(&mut self, #[cfg(feature = "partial_parsing")] allow_partial: bool) + -> Result<()>; /// Assumes the previous byte was a hex escape sequence ('\u') in a string. /// Parses next hexadecimal sequence. @@ -220,13 +226,20 @@ where scratch: &'s mut Vec, validate: bool, result: F, + #[cfg(feature = "partial_parsing")] allow_partial: bool, ) -> Result where T: 's, F: FnOnce(&'s Self, &'s [u8]) -> Result, { loop { - let ch = tri!(next_or_eof(self)); + let ch = tri!(next_or_eof( + self, + #[cfg(feature = "partial_parsing")] + allow_partial, + #[cfg(not(feature = "partial_parsing"))] + false + )); if !is_escape(ch, true) { scratch.push(ch); continue; @@ -332,22 +345,49 @@ where } } - fn parse_str<'s>(&'s mut self, scratch: &'s mut Vec) -> Result> { - self.parse_str_bytes(scratch, true, as_str) - .map(Reference::Copied) + fn parse_str<'s>( + &'s mut self, + scratch: &'s mut Vec, + #[cfg(feature = "partial_parsing")] allow_partial: bool, + ) -> Result> { + self.parse_str_bytes( + scratch, + true, + as_str, + #[cfg(feature = "partial_parsing")] + allow_partial, + ) + .map(Reference::Copied) } fn parse_str_raw<'s>( &'s mut self, scratch: &'s mut Vec, + #[cfg(feature = "partial_parsing")] allow_partial: bool, ) -> Result> { - self.parse_str_bytes(scratch, false, |_, bytes| Ok(bytes)) - .map(Reference::Copied) - } - - fn ignore_str(&mut self) -> Result<()> { + self.parse_str_bytes( + scratch, + false, + |_, bytes| Ok(bytes), + #[cfg(feature = "partial_parsing")] + allow_partial, + ) + .map(Reference::Copied) + } + + fn ignore_str( + &mut self, + #[cfg(feature = "partial_parsing")] allow_partial: bool, + ) -> Result<()> { loop { - let ch = tri!(next_or_eof(self)); + let ch = tri!(next_or_eof( + self, + #[cfg(feature = "partial_parsing")] + allow_partial, + #[cfg(not(feature = "partial_parsing"))] + false, + )); + if !is_escape(ch, true) { continue; } @@ -366,10 +406,10 @@ where } fn decode_hex_escape(&mut self) -> Result { - let a = tri!(next_or_eof(self)); - let b = tri!(next_or_eof(self)); - let c = tri!(next_or_eof(self)); - let d = tri!(next_or_eof(self)); + let a = tri!(next_or_eof(self, false)); + let b = tri!(next_or_eof(self, false)); + let c = tri!(next_or_eof(self, false)); + let d = tri!(next_or_eof(self, false)); match decode_four_hex_digits(a, b, c, d) { Some(val) => Ok(val), None => error(self, ErrorCode::InvalidEscape), @@ -496,6 +536,7 @@ impl<'a> SliceRead<'a> { scratch: &'s mut Vec, validate: bool, result: F, + #[cfg(feature = "partial_parsing")] allow_partial: bool, ) -> Result> where T: ?Sized + 's, @@ -506,10 +547,13 @@ impl<'a> SliceRead<'a> { loop { self.skip_to_escape(validate); - if self.index == self.slice.len() { - return error(self, ErrorCode::EofWhileParsingString); - } - match self.slice[self.index] { + let ch = match self.slice.get(self.index) { + Some(&ch) => ch, + #[cfg(feature = "partial_parsing")] + None if allow_partial => b'"', + None => return error(self, ErrorCode::EofWhileParsingString), + }; + match ch { b'"' => { if scratch.is_empty() { // Fast path: return a slice of the raw JSON without any @@ -584,24 +628,47 @@ impl<'a> Read<'a> for SliceRead<'a> { self.index } - fn parse_str<'s>(&'s mut self, scratch: &'s mut Vec) -> Result> { - self.parse_str_bytes(scratch, true, as_str) + fn parse_str<'s>( + &'s mut self, + scratch: &'s mut Vec, + #[cfg(feature = "partial_parsing")] allow_partial: bool, + ) -> Result> { + self.parse_str_bytes( + scratch, + true, + as_str, + #[cfg(feature = "partial_parsing")] + allow_partial, + ) } fn parse_str_raw<'s>( &'s mut self, scratch: &'s mut Vec, + #[cfg(feature = "partial_parsing")] allow_partial: bool, ) -> Result> { - self.parse_str_bytes(scratch, false, |_, bytes| Ok(bytes)) - } - - fn ignore_str(&mut self) -> Result<()> { + self.parse_str_bytes( + scratch, + false, + |_, bytes| Ok(bytes), + #[cfg(feature = "partial_parsing")] + allow_partial, + ) + } + + fn ignore_str( + &mut self, + #[cfg(feature = "partial_parsing")] allow_partial: bool, + ) -> Result<()> { loop { self.skip_to_escape(true); - if self.index == self.slice.len() { - return error(self, ErrorCode::EofWhileParsingString); - } - match self.slice[self.index] { + let ch = match self.slice.get(self.index) { + Some(ch) => ch, + #[cfg(feature = "partial_parsing")] + None if allow_partial => return Ok(()), + None => return error(self, ErrorCode::EofWhileParsingString), + }; + match ch { b'"' => { self.index += 1; return Ok(()); @@ -706,24 +773,45 @@ impl<'a> Read<'a> for StrRead<'a> { self.delegate.byte_offset() } - fn parse_str<'s>(&'s mut self, scratch: &'s mut Vec) -> Result> { - self.delegate.parse_str_bytes(scratch, true, |_, bytes| { - // The deserialization input came in as &str with a UTF-8 guarantee, - // and the \u-escapes are checked along the way, so don't need to - // check here. - Ok(unsafe { str::from_utf8_unchecked(bytes) }) - }) + fn parse_str<'s>( + &'s mut self, + scratch: &'s mut Vec, + #[cfg(feature = "partial_parsing")] allow_partial: bool, + ) -> Result> { + self.delegate.parse_str_bytes( + scratch, + true, + |_, bytes| { + // The deserialization input came in as &str with a UTF-8 guarantee, + // and the \u-escapes are checked along the way, so don't need to + // check here. + Ok(unsafe { str::from_utf8_unchecked(bytes) }) + }, + #[cfg(feature = "partial_parsing")] + allow_partial, + ) } fn parse_str_raw<'s>( &'s mut self, scratch: &'s mut Vec, + #[cfg(feature = "partial_parsing")] allow_partial: bool, ) -> Result> { - self.delegate.parse_str_raw(scratch) + self.delegate.parse_str_raw( + scratch, + #[cfg(feature = "partial_parsing")] + allow_partial, + ) } - fn ignore_str(&mut self) -> Result<()> { - self.delegate.ignore_str() + fn ignore_str( + &mut self, + #[cfg(feature = "partial_parsing")] allow_partial: bool, + ) -> Result<()> { + self.delegate.ignore_str( + #[cfg(feature = "partial_parsing")] + allow_partial, + ) } fn decode_hex_escape(&mut self) -> Result { @@ -787,19 +875,41 @@ where R::byte_offset(self) } - fn parse_str<'s>(&'s mut self, scratch: &'s mut Vec) -> Result> { - R::parse_str(self, scratch) + fn parse_str<'s>( + &'s mut self, + scratch: &'s mut Vec, + #[cfg(feature = "partial_parsing")] allow_partial: bool, + ) -> Result> { + R::parse_str( + self, + scratch, + #[cfg(feature = "partial_parsing")] + allow_partial, + ) } fn parse_str_raw<'s>( &'s mut self, scratch: &'s mut Vec, + #[cfg(feature = "partial_parsing")] allow_partial: bool, ) -> Result> { - R::parse_str_raw(self, scratch) + R::parse_str_raw( + self, + scratch, + #[cfg(feature = "partial_parsing")] + allow_partial, + ) } - fn ignore_str(&mut self) -> Result<()> { - R::ignore_str(self) + fn ignore_str( + &mut self, + #[cfg(feature = "partial_parsing")] allow_partial: bool, + ) -> Result<()> { + R::ignore_str( + self, + #[cfg(feature = "partial_parsing")] + allow_partial, + ) } fn decode_hex_escape(&mut self) -> Result { @@ -837,12 +947,14 @@ fn is_escape(ch: u8, including_control_characters: bool) -> bool { ch == b'"' || ch == b'\\' || (including_control_characters && ch < 0x20) } -fn next_or_eof<'de, R>(read: &mut R) -> Result +fn next_or_eof<'de, R>(read: &mut R, quote_on_eof: bool) -> Result where R: ?Sized + Read<'de>, { match tri!(read.next()) { Some(b) => Ok(b), + #[cfg(feature = "partial_parsing")] + None if quote_on_eof => Ok(b'"'), None => error(read, ErrorCode::EofWhileParsingString), } } @@ -876,7 +988,7 @@ fn parse_escape<'de, R: Read<'de>>( validate: bool, scratch: &mut Vec, ) -> Result<()> { - let ch = tri!(next_or_eof(read)); + let ch = tri!(next_or_eof(read, false)); match ch { b'"' => scratch.push(b'"'), @@ -1026,7 +1138,7 @@ fn ignore_escape<'de, R>(read: &mut R) -> Result<()> where R: ?Sized + Read<'de>, { - let ch = tri!(next_or_eof(read)); + let ch = tri!(next_or_eof(read, false)); match ch { b'"' | b'\\' | b'/' | b'b' | b'f' | b'n' | b'r' | b't' => {} diff --git a/tests/partial.rs b/tests/partial.rs new file mode 100644 index 000000000..a44714066 --- /dev/null +++ b/tests/partial.rs @@ -0,0 +1,70 @@ +#![cfg(feature = "partial_parsing")] + +use serde::{Deserialize, Serialize}; +use serde_json::{ + from_reader, from_slice, from_str, from_value, json, to_string, to_string_pretty, to_value, + to_vec, Deserializer, Number, Value, +}; +use std::io::Cursor; + +#[test] +fn test_partial_json_object() { + let json = r#"{"foo": ["bar", "baz"], "test": "val""#; + let mut deserializer = serde_json::Deserializer::from_str(&json); + deserializer.allow_partial_object(); + let value = Value::deserialize(&mut deserializer).unwrap(); + assert_eq!( + value, + json!({ + "foo": ["bar", "baz"], + "test": "val" + }) + ) +} + +#[test] +fn test_partial_json_list() { + let json = r#"{"foo": ["bar", "baz""#; + let mut deserializer = serde_json::Deserializer::from_str(&json); + deserializer.allow_partial_object(); + deserializer.allow_partial_list(); + let value = Value::deserialize(&mut deserializer).unwrap(); + assert_eq!( + value, + json!({ + "foo": ["bar", "baz"], + }) + ) +} + +#[test] +fn test_partial_json_string() { + let json = r#"{"test": "val"#; + let mut deserializer = serde_json::Deserializer::from_str(&json); + deserializer.allow_partial_object(); + deserializer.allow_partial_string(); + let value = Value::deserialize(&mut deserializer).unwrap(); + assert_eq!( + value, + json!({ + "test": "val" + }) + ) +} + +#[test] +fn test_partial_json_reader() { + let cursor = Cursor::new(r#"{"foo": ["bar", "baz"], "test": "val"#); + let mut deserializer = serde_json::Deserializer::from_reader(cursor); + deserializer.allow_partial_object(); + deserializer.allow_partial_list(); + deserializer.allow_partial_string(); + let value = Value::deserialize(&mut deserializer).unwrap(); + assert_eq!( + value, + json!({ + "foo": ["bar", "baz"], + "test": "val" + }) + ) +} From 97cace0fbde8918d4b6d7b3a31a6eca1a0749e34 Mon Sep 17 00:00:00 2001 From: Martin Risell Lilja Date: Thu, 9 Oct 2025 13:14:24 +0200 Subject: [PATCH 2/2] fix warning --- src/read.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/read.rs b/src/read.rs index 7851248d9..4847118c6 100644 --- a/src/read.rs +++ b/src/read.rs @@ -947,7 +947,11 @@ fn is_escape(ch: u8, including_control_characters: bool) -> bool { ch == b'"' || ch == b'\\' || (including_control_characters && ch < 0x20) } -fn next_or_eof<'de, R>(read: &mut R, quote_on_eof: bool) -> Result +fn next_or_eof<'de, R>( + read: &mut R, + #[cfg(feature = "partial_parsing")] quote_on_eof: bool, + #[cfg(not(feature = "partial_parsing"))] _quote_on_eof: bool, +) -> Result where R: ?Sized + Read<'de>, {