Skip to content

Commit d09855a

Browse files
committed
Raise an EncodingError from Utf8ValidatingReader
Do all of the plumbing necessary to return EncodingError directly from Utf8ValidatingReader using IoError::InvalidData + error downcasting. The Utf8 variant of EncodingError now holds an error enum, as we cannot create instances of Utf8Error ourselves.
1 parent 0def72a commit d09855a

4 files changed

Lines changed: 93 additions & 15 deletions

File tree

src/encoding.rs

Lines changed: 56 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,52 @@ pub(crate) const UTF16_LE_BOM: &[u8] = &[0xFF, 0xFE];
1919
#[cfg(feature = "encoding")]
2020
pub(crate) const UTF16_BE_BOM: &[u8] = &[0xFE, 0xFF];
2121

22+
/// An error type representing UTF-8 validation failure.
23+
///
24+
/// Unlike [`std::str::Utf8Error`], instances can be created directly for custom error scenarios.
25+
#[derive(Clone, Debug, PartialEq, Eq)]
26+
pub enum Utf8ValidationError {
27+
/// Error from standard library UTF-8 validation
28+
Utf8(Utf8Error),
29+
/// Invalid UTF-8 sequence found in the input
30+
InvalidSequence {
31+
/// Length of the invalid UTF-8 sequence in bytes
32+
error_len: usize,
33+
},
34+
/// Incomplete UTF-8 sequence at end of stream
35+
IncompleteSequence,
36+
}
37+
38+
impl From<Utf8Error> for Utf8ValidationError {
39+
#[inline]
40+
fn from(e: Utf8Error) -> Self {
41+
Self::Utf8(e)
42+
}
43+
}
44+
45+
impl std::fmt::Display for Utf8ValidationError {
46+
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
47+
match self {
48+
Self::Utf8(e) => write!(f, "{}", e),
49+
Self::InvalidSequence { error_len } => {
50+
write!(f, "invalid UTF-8 sequence of {} bytes", error_len)
51+
}
52+
Self::IncompleteSequence => {
53+
write!(f, "incomplete UTF-8 sequence at end of stream")
54+
}
55+
}
56+
}
57+
}
58+
59+
impl std::error::Error for Utf8ValidationError {
60+
fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
61+
match self {
62+
Self::Utf8(e) => Some(e),
63+
_ => None,
64+
}
65+
}
66+
}
67+
2268
/// An error when decoding or encoding
2369
///
2470
/// If feature [`encoding`] is disabled, the [`EncodingError`] is always [`EncodingError::Utf8`]
@@ -28,7 +74,7 @@ pub(crate) const UTF16_BE_BOM: &[u8] = &[0xFE, 0xFF];
2874
#[non_exhaustive]
2975
pub enum EncodingError {
3076
/// Input was not valid UTF-8
31-
Utf8(Utf8Error),
77+
Utf8(Utf8ValidationError),
3278
/// Input did not adhere to the given encoding
3379
#[cfg(feature = "encoding")]
3480
Other(&'static Encoding),
@@ -37,6 +83,13 @@ pub enum EncodingError {
3783
impl From<Utf8Error> for EncodingError {
3884
#[inline]
3985
fn from(e: Utf8Error) -> Self {
86+
Self::Utf8(e.into())
87+
}
88+
}
89+
90+
impl From<Utf8ValidationError> for EncodingError {
91+
#[inline]
92+
fn from(e: Utf8ValidationError) -> Self {
4093
Self::Utf8(e)
4194
}
4295
}
@@ -406,7 +459,7 @@ impl<R: Read> Read for Utf8ValidatingReader<R> {
406459
if valid_up_to == 0 {
407460
return Err(io::Error::new(
408461
io::ErrorKind::InvalidData,
409-
format!("invalid UTF-8 sequence of {} bytes", error_len),
462+
EncodingError::Utf8(Utf8ValidationError::InvalidSequence { error_len }),
410463
));
411464
}
412465
// Write valid portion before the error
@@ -445,7 +498,7 @@ impl<R: Read> Read for Utf8ValidatingReader<R> {
445498
// EOF with incomplete UTF-8 sequence
446499
return Err(io::Error::new(
447500
io::ErrorKind::InvalidData,
448-
"incomplete UTF-8 sequence at end of stream",
501+
EncodingError::Utf8(Utf8ValidationError::IncompleteSequence),
449502
));
450503
}
451504
}

src/errors.rs

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ use crate::escape::EscapeError;
55
use crate::events::attributes::AttrError;
66
use crate::name::{NamespaceError, QName};
77
use std::fmt;
8-
use std::io::Error as IoError;
8+
use std::io::{Error as IoError, ErrorKind as IoErrorKind};
99
use std::sync::Arc;
1010

1111
/// An error returned if parsed document does not correspond to the XML grammar,
@@ -221,7 +221,14 @@ impl From<IoError> for Error {
221221
/// Creates a new `Error::Io` from the given error
222222
#[inline]
223223
fn from(error: IoError) -> Error {
224-
Self::Io(Arc::new(error))
224+
match error.kind() {
225+
IoErrorKind::InvalidData => Self::Encoding(
226+
error
227+
.downcast::<EncodingError>()
228+
.expect("Got an IoError::InvalidData, but it wasn't downcastable to EncodingError?"),
229+
),
230+
_ => Self::Io(Arc::new(error)),
231+
}
225232
}
226233
}
227234

src/events/attributes.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -151,10 +151,10 @@ impl<'a> Attribute<'a> {
151151
use std::str::from_utf8;
152152

153153
let decoded = match &self.value {
154-
Cow::Borrowed(bytes) => Cow::Borrowed(from_utf8(bytes).map_err(EncodingError::Utf8)?),
154+
Cow::Borrowed(bytes) => Cow::Borrowed(from_utf8(bytes).map_err(EncodingError::from)?),
155155
// Convert to owned, because otherwise Cow will be bound with wrong lifetime
156156
Cow::Owned(bytes) => {
157-
Cow::Owned(from_utf8(bytes).map_err(EncodingError::Utf8)?.to_owned())
157+
Cow::Owned(from_utf8(bytes).map_err(EncodingError::from)?.to_owned())
158158
}
159159
};
160160

tests/encodings.rs

Lines changed: 26 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -32,34 +32,52 @@ mod decode {
3232
#[cfg(not(feature = "encoding"))]
3333
mod validate {
3434
use super::*;
35+
use quick_xml::encoding::{EncodingError, Utf8ValidationError};
36+
use quick_xml::errors::Error;
3537

3638
#[test]
37-
#[should_panic(expected = "invalid UTF-8")]
3839
fn test_validation_fails_on_utf16le_input() {
3940
let src = include_bytes!("documents/encoding/utf16le-bom.xml").as_ref();
4041
let mut buf = vec![];
4142
let mut r = Reader::from_reader_validating(src);
4243
r.config_mut().trim_text(true);
43-
loop {
44+
45+
let result = loop {
4446
match r.read_event_into(&mut buf) {
45-
Ok(_) => break,
46-
Err(e) => panic!("Read error: {}", e),
47+
Ok(_) => panic!("Expected encoding error, didn't get one"),
48+
Err(e) => break e,
49+
}
50+
};
51+
52+
// Assert that we got the specific error type
53+
match result {
54+
Error::Encoding(EncodingError::Utf8(Utf8ValidationError::InvalidSequence { error_len })) => {
55+
assert_eq!(error_len, 1, "Expected 1-byte invalid sequence (0xFF)");
4756
}
57+
other => panic!("Expected EncodingError::Utf8(InvalidSequence), got: {:?}", other),
4858
}
4959
}
5060

5161
#[test]
52-
#[should_panic(expected = "invalid UTF-8")]
5362
fn test_validation_fails_on_utf16be_input() {
5463
let src = include_bytes!("documents/encoding/utf16be-bom.xml").as_ref();
5564
let mut buf = vec![];
5665
let mut r = Reader::from_reader_validating(src);
5766
r.config_mut().trim_text(true);
58-
loop {
67+
68+
let result = loop {
5969
match r.read_event_into(&mut buf) {
60-
Ok(_) => break,
61-
Err(e) => panic!("Read error: {}", e),
70+
Ok(_) => panic!("Expected encoding error, didn't get one"),
71+
Err(e) => break e,
72+
}
73+
};
74+
75+
// Assert that we got the specific error type
76+
match result {
77+
Error::Encoding(EncodingError::Utf8(Utf8ValidationError::InvalidSequence { error_len })) => {
78+
assert_eq!(error_len, 1, "Expected 1-byte invalid sequence (0xFE)");
6279
}
80+
other => panic!("Expected EncodingError::Utf8(InvalidSequence), got: {:?}", other),
6381
}
6482
}
6583
}

0 commit comments

Comments
 (0)