From f8e64b1ef3526261120d47ada4f508032c4ab518 Mon Sep 17 00:00:00 2001 From: Charles Hall Date: Tue, 17 Nov 2020 21:43:50 -0800 Subject: [PATCH] add RawValue for deferring/omitting serde-ing Unfortunately this requires some unsafe, but fortunately it's only two lines and it's very easy to follow. There are some problems still though, such as not being able to deserialize to a `&RawValue` and `Box`'s deserialize impl doing a roundtrip through `Value`. `RawValue` could probably also be made available on alloc-only no-std platforms, but I'm leaving it at std-only for now just in case I'm wrong. That aside, it does what it's supposed to do, so that's nice. --- src/ser.rs | 235 +++++++++++++++++++++++++++++++++++++++++++++-- src/value/mod.rs | 5 + src/value/raw.rs | 140 ++++++++++++++++++++++++++++ tests/raw.rs | 27 ++++++ 4 files changed, 400 insertions(+), 7 deletions(-) create mode 100644 src/value/raw.rs create mode 100644 tests/raw.rs diff --git a/src/ser.rs b/src/ser.rs index 7016dc34..616f55d4 100644 --- a/src/ser.rs +++ b/src/ser.rs @@ -9,11 +9,13 @@ pub use crate::write::{SliceWrite, Write}; use crate::error::{Error, Result}; use half::f16; -use serde::ser::{self, Serialize}; +use serde::ser::{self, Impossible, Serialize}; #[cfg(feature = "std")] use std::io; use crate::tags::{get_tag, CBOR_NEWTYPE_NAME}; +#[cfg(feature = "std")] +use crate::value::raw::CBOR_RAW_VALUE_NAME; /// Serializes a value to a vector. #[cfg(any(feature = "std", feature = "alloc"))] @@ -493,8 +495,18 @@ where #[inline] fn serialize_struct(self, _name: &'static str, len: usize) -> Result> { - self.write_u64(5, len as u64)?; - Ok(StructSerializer { ser: self, idx: 0 }) + + #[cfg(feature = "std")] + let raw = _name == CBOR_RAW_VALUE_NAME; + + #[cfg(not(feature = "std"))] + let raw = false; + + if !raw { + self.write_u64(5, len as u64)?; + } + + Ok(StructSerializer { ser: self, idx: 0, raw }) } #[inline] @@ -587,6 +599,7 @@ where pub struct StructSerializer<'a, W> { ser: &'a mut Serializer, idx: u32, + raw: bool, } impl<'a, W> StructSerializer<'a, W> @@ -598,12 +611,24 @@ where where T: ?Sized + ser::Serialize, { - if self.ser.packed { - self.idx.serialize(&mut *self.ser)?; + if !self.raw { + if self.ser.packed { + self.idx.serialize(&mut *self.ser)?; + } else { + key.serialize(&mut *self.ser)?; + } + } + + if self.raw { + #[cfg(feature = "std")] + value.serialize(RawValueBytesEmitter(&mut *self.ser))?; + + #[cfg(not(feature = "std"))] + value.serialize(&mut *self.ser)?; } else { - key.serialize(&mut *self.ser)?; + value.serialize(&mut *self.ser)?; } - value.serialize(&mut *self.ser)?; + self.idx += 1; Ok(()) } @@ -741,3 +766,199 @@ where self.end_inner() } } + +#[cfg(feature = "std")] +struct RawValueBytesEmitter<'a, W: 'a + Write>(&'a mut Serializer); + +#[cfg(feature = "std")] +impl <'a, W: 'a + Write> ser::Serializer for RawValueBytesEmitter<'a, W> { + type Ok = (); + type Error = Error; + + type SerializeSeq = Impossible<(), Error>; + type SerializeTuple = Impossible<(), Error>; + type SerializeTupleStruct = Impossible<(), Error>; + type SerializeTupleVariant = Impossible<(), Error>; + type SerializeMap = Impossible<(), Error>; + type SerializeStruct = Impossible<(), Error>; + type SerializeStructVariant = Impossible<(), Error>; + + // This is the actual useful one + #[inline] + fn serialize_bytes(self, v: &[u8]) -> Result<()> { + self.0.writer.write_all(v).map_err(|e| e.into().into()) + } + + // None of these should ever happen + + #[inline] + fn serialize_seq(self, _: Option) -> Result { + panic!("expected RawValue (got seq)") + } + + #[inline] + fn serialize_bool(self, _: bool) -> Result<()> { + panic!("expected RawValue (got bool)") + } + + #[inline] + fn serialize_i8(self, _: i8) -> Result<()> { + panic!("expected RawValue (got i8)") + } + + #[inline] + fn serialize_i16(self, _: i16) -> Result<()> { + panic!("expected RawValue (got i16)") + } + + #[inline] + fn serialize_i32(self, _: i32) -> Result<()> { + panic!("expected RawValue (got i32)") + } + + #[inline] + fn serialize_i64(self, _: i64) -> Result<()> { + panic!("expected RawValue (got i64)") + } + + #[inline] + fn serialize_u8(self, _: u8) -> Result<()> { + panic!("expected RawValue (got u8)") + } + + #[inline] + fn serialize_u16(self, _: u16) -> Result<()> { + panic!("expected RawValue (got u16)") + } + + #[inline] + fn serialize_u32(self, _: u32) -> Result<()> { + panic!("expected RawValue (got u32)") + } + + #[inline] + fn serialize_u64(self, _: u64) -> Result<()> { + panic!("expected RawValue (got u64)") + } + + #[inline] + fn serialize_f32(self, _: f32) -> Result<()> { + panic!("expected RawValue (got f32)") + } + + #[inline] + fn serialize_f64(self, _: f64) -> Result<()> { + panic!("expected RawValue (got f64)") + } + + #[inline] + fn serialize_char(self, _: char) -> Result<()> { + panic!("expected RawValue (got char)") + } + + #[inline] + fn serialize_str(self, _: &str) -> Result<()> { + panic!("expected RawValue (got str)") + } + + #[inline] + fn serialize_none(self) -> Result<()> { + panic!("expected RawValue (got none)") + } + + #[inline] + fn serialize_some(self, _: &T) -> Result<()> + where + T: ?Sized + Serialize, + { + panic!("expected RawValue (got some)") + } + + #[inline] + fn serialize_unit(self) -> Result<()> { + panic!("expected RawValue (got unit)") + } + + #[inline] + fn serialize_unit_struct(self, _: &'static str) -> Result<()> { + panic!("expected RawValue (got unit struct)") + } + + #[inline] + fn serialize_unit_variant( + self, + _: &'static str, + _: u32, + _: &'static str, + ) -> Result<()> { + panic!("expected RawValue (got unit variant)") + } + + #[inline] + fn serialize_newtype_struct(self, _: &'static str, _: &T) -> Result<()> + where + T: ?Sized + Serialize, + { + panic!("expected RawValue (got newtype struct)") + } + + #[inline] + fn serialize_newtype_variant( + self, + _: &'static str, + _: u32, + _: &'static str, + _: &T, + ) -> Result<()> + where + T: ?Sized + Serialize, + { + panic!("expected RawValue (got newtype variant)") + } + + #[inline] + fn serialize_tuple(self, _: usize) -> Result { + panic!("expected RawValue (got tuple)") + } + + #[inline] + fn serialize_tuple_struct( + self, + _: &'static str, + _: usize, + ) -> Result { + panic!("expected RawValue (got tuple struct)") + } + + #[inline] + fn serialize_tuple_variant( + self, + _: &'static str, + _: u32, + _: &'static str, + _: usize, + ) -> Result { + panic!("expected RawValue (got tuple variant)") + } + + #[inline] + fn serialize_map(self, _: Option) -> Result { + panic!("expected RawValue (got map)") + } + + #[inline] + fn serialize_struct(self, _: &'static str, _len: usize) -> Result { + panic!("expected RawValue (got struct)") + } + + #[inline] + fn serialize_struct_variant( + self, + _: &'static str, + _: u32, + _: &'static str, + _: usize, + ) -> Result { + panic!("expected RawValue (got struct variant)") + } +} diff --git a/src/value/mod.rs b/src/value/mod.rs index 7bd22553..41e61a4a 100644 --- a/src/value/mod.rs +++ b/src/value/mod.rs @@ -2,10 +2,15 @@ mod de; mod ser; +#[cfg(any(feature = "std", feature = "alloc"))] +pub(crate) mod raw; use std::cmp::{Ord, Ordering, PartialOrd}; use std::collections::BTreeMap; +#[cfg(any(feature = "std", feature = "alloc"))] +pub use raw::RawValue; + #[doc(inline)] pub use self::de::from_value; #[doc(inline)] diff --git a/src/value/raw.rs b/src/value/raw.rs new file mode 100644 index 00000000..92d56a6b --- /dev/null +++ b/src/value/raw.rs @@ -0,0 +1,140 @@ +use std::fmt::{self, Debug}; +use std::mem; +use crate::error::Error; +use serde::{ + de::{Deserialize, Deserializer}, + ser::{Serialize, SerializeStruct, Serializer}, +}; + +pub(crate) const CBOR_RAW_VALUE_NAME: &str = "\0raw_value"; + +/// Reference to a range of bytes encompassing a single valid CBOR value +/// +/// A `RawValue` can be used to defer parsing parts of a payload until later, +/// or to avoid parsing it at all in the case that part of the payload just +/// needs to be transferred verbatim into a different output object. +#[repr(transparent)] +pub struct RawValue { + cbor: [u8], +} + +impl RawValue { + /// Access the underlying CBOR bytes + pub fn get(&self) -> &[u8] { + &self.cbor + } + + /// Convert a `T: Serialize` into a boxed `RawValue` + pub fn from_serialize(other: &T) -> Result, Error> + where + T: Serialize, + { + let cbor = crate::ser::to_vec(other)?; + Ok(Box::::from(cbor)) + } +} + +impl Clone for Box { + fn clone(&self) -> Self { + (**self).to_owned() + } +} + +impl ToOwned for RawValue { + type Owned = Box; + + fn to_owned(&self) -> Self::Owned { + (&self.cbor).into() + } +} + +impl<'a> From<&'a [u8]> for &'a RawValue { + /// Convert a borrowed `&[u8]` of CBOR data to a borrowed `RawValue` + /// + /// **Note:** this function does not perform any validity checks on the + /// provided input. + fn from(other: &'a [u8]) -> Self { + unsafe { &*(other as *const [u8] as *const RawValue) } + } +} + +impl From for Box +where + T: Into>, +{ + /// Convert a `&[u8]`, `Box<[u8]>`, or `Vec` to an owned `RawValue` + /// + /// **Note:** this function does not perform any validity checks on the + /// provided input. + fn from(other: T) -> Self { + let boxed: Box<[u8]> = other.into(); + unsafe { mem::transmute::, Self>(boxed) } + } +} + +impl<'a> From<&'a RawValue> for Box { + fn from(other: &'a RawValue) -> Self { + Self::from(&other.cbor) + } +} + +impl Debug for RawValue { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let mut hex = String::new(); + + for byte in &self.cbor { + hex.push_str(&format!("{:02X}", byte)); + } + + f + .debug_tuple("RawValue") + .field(&format_args!("{}", hex)) + .finish() + } +} + +impl Serialize for RawValue { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + let mut s = serializer.serialize_struct(CBOR_RAW_VALUE_NAME, 1)?; + s.serialize_field("", &RawValueNewtype(&self.cbor))?; + s.end() + } +} + +// This is necessary because serde implements `serialize_seq` handlers on `[T]`, +// and we need it to use `serialize_bytes` instead +struct RawValueNewtype<'a>(&'a [u8]); + +impl<'a> Serialize for RawValueNewtype<'a> { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + serializer.serialize_bytes(self.0) + } +} + +impl<'de> Deserialize<'de> for Box { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + // TODO don't roundtrip through Value + let x = crate::value::Value::deserialize(deserializer)?; + crate::to_vec(&x).map(|x| x.into()).map_err(|_| panic!()) + } +} + +/* TODO +impl<'de: 'a, 'a> Deserialize<'de> for &'a RawValue { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + todo!() + } +} +*/ diff --git a/tests/raw.rs b/tests/raw.rs new file mode 100644 index 00000000..4264f7b2 --- /dev/null +++ b/tests/raw.rs @@ -0,0 +1,27 @@ +#[macro_use] +extern crate serde_derive; + +#[cfg(feature = "std")] +mod std_tests { + use serde_cbor::value::RawValue; + + #[derive(Serialize, Deserialize)] + #[serde(untagged)] + enum Test { + Known(u32), + Unknown(Box), + } + + #[test] + fn test() { + let test = Test::Known(1337); + let known_bytes = serde_cbor::to_vec(&test) + .expect("serialization failed"); + + let test = Test::Unknown(known_bytes.as_slice().into()); + let unknown_bytes = serde_cbor::to_vec(&test) + .expect("serialization failed"); + + assert_eq!(known_bytes, unknown_bytes); + } +}