From 1dd71a0de6b6c99f2c2dbbc69106bb87b1e2cdcb Mon Sep 17 00:00:00 2001 From: Farhan Syah Date: Sat, 18 Apr 2026 20:12:25 +0800 Subject: [PATCH 1/5] feat: opt-in schema evolution via `#[msgpack(default)]` MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds three capabilities while preserving strict-by-default and upstream codegen for untouched structs: - `Read::skip_value()` — trait method that consumes one MessagePack value of any type; implemented on `SliceReader` and `IOReader`. Respects the existing `MAX_DEPTH` check. Independently useful for log parsers and protocol proxies. - Field-level `#[msgpack(default)]` and `#[msgpack(default = "path")]` — on read, a missing key/slot is filled with `Default::default()` or the named function. Mirrors serde's surface, pure codegen. - Implicit evolution in map mode: structs with any `#[msgpack(default)]` field switch to a tolerant decoder (read_map_len + iterate + skip unknown keys via skip_value). No new struct-level attribute. - Tolerant array decode: structs with trailing `default` fields accept shorter arrays and skip trailing extras. - Compile-error safety net: array-mode structs with a mid-struct default are rejected at derive time, pointing to the exact field. Strict structs (no `default` fields) get byte-for-byte identical codegen to 0.4.1 — check_map_len / check_array_len fast-fail preserved. All 153 existing tests pass. --- zerompk/src/read.rs | 269 ++++++++++++++++++++++++++++++++++++++ zerompk_derive/src/lib.rs | 238 ++++++++++++++++++++++++++++----- 2 files changed, 475 insertions(+), 32 deletions(-) diff --git a/zerompk/src/read.rs b/zerompk/src/read.rs index c25c2a8..7a45486 100644 --- a/zerompk/src/read.rs +++ b/zerompk/src/read.rs @@ -152,6 +152,11 @@ pub trait Read<'de> { Err(Error::MapLengthMismatch { expected, actual }) } } + + /// Consumes exactly one MessagePack value from the input, regardless of + /// its type. Used by `#[msgpack(allow_unknown)]` map-mode decoding to + /// skip over unknown keys' values without needing to know their type. + fn skip_value(&mut self) -> Result<()>; } pub struct SliceReader<'de> { @@ -900,6 +905,143 @@ impl<'de> Read<'de> for SliceReader<'de> { _ => Err(Error::InvalidMarker(byte)), } } + + fn skip_value(&mut self) -> Result<()> { + self.increment_depth()?; + let byte = self.peek_byte()?; + match byte { + POS_FIXINT_START..=POS_FIXINT_END | NEG_FIXINT_START..=NEG_FIXINT_END => { + self.pos += 1; + } + NIL_MARKER | TRUE_MARKER | FALSE_MARKER => { + self.pos += 1; + } + UINT8_MARKER | INT8_MARKER => { + self.pos += 1; + self.take_slice(1)?; + } + UINT16_MARKER | INT16_MARKER => { + self.pos += 1; + self.take_slice(2)?; + } + UINT32_MARKER | INT32_MARKER | FLOAT32_MARKER => { + self.pos += 1; + self.take_slice(4)?; + } + UINT64_MARKER | INT64_MARKER | FLOAT64_MARKER => { + self.pos += 1; + self.take_slice(8)?; + } + FIXSTR_START..=FIXSTR_END => { + let len = (byte - FIXSTR_START) as usize; + self.pos += 1; + self.take_slice(len)?; + } + STR8_MARKER | BIN8_MARKER => { + self.pos += 1; + let len = self.take_byte()? as usize; + self.take_slice(len)?; + } + STR16_MARKER | BIN16_MARKER => { + self.pos += 1; + let bytes = self.take_array::<2>()?; + let len = u16::from_be_bytes(*bytes) as usize; + self.take_slice(len)?; + } + STR32_MARKER | BIN32_MARKER => { + self.pos += 1; + let bytes = self.take_array::<4>()?; + let len = u32::from_be_bytes(*bytes) as usize; + self.take_slice(len)?; + } + FIXARRAY_START..=FIXARRAY_END => { + let len = (byte - FIXARRAY_START) as usize; + self.pos += 1; + for _ in 0..len { + self.skip_value()?; + } + } + ARRAY16_MARKER => { + self.pos += 1; + let bytes = self.take_array::<2>()?; + let len = u16::from_be_bytes(*bytes) as usize; + for _ in 0..len { + self.skip_value()?; + } + } + ARRAY32_MARKER => { + self.pos += 1; + let bytes = self.take_array::<4>()?; + let len = u32::from_be_bytes(*bytes) as usize; + for _ in 0..len { + self.skip_value()?; + } + } + FIXMAP_START..=FIXMAP_END => { + let len = (byte - FIXMAP_START) as usize; + self.pos += 1; + for _ in 0..(len * 2) { + self.skip_value()?; + } + } + MAP16_MARKER => { + self.pos += 1; + let bytes = self.take_array::<2>()?; + let len = u16::from_be_bytes(*bytes) as usize; + for _ in 0..(len * 2) { + self.skip_value()?; + } + } + MAP32_MARKER => { + self.pos += 1; + let bytes = self.take_array::<4>()?; + let len = u32::from_be_bytes(*bytes) as usize; + for _ in 0..(len * 2) { + self.skip_value()?; + } + } + FIXEXT1_MARKER => { + self.pos += 1; + self.take_slice(2)?; + } + FIXEXT2_MARKER => { + self.pos += 1; + self.take_slice(3)?; + } + FIXEXT4_MARKER => { + self.pos += 1; + self.take_slice(5)?; + } + FIXEXT8_MARKER => { + self.pos += 1; + self.take_slice(9)?; + } + FIXEXT16_MARKER => { + self.pos += 1; + self.take_slice(17)?; + } + EXT8_MARKER => { + self.pos += 1; + let len = self.take_byte()? as usize; + self.take_slice(len + 1)?; + } + EXT16_MARKER => { + self.pos += 1; + let bytes = self.take_array::<2>()?; + let len = u16::from_be_bytes(*bytes) as usize; + self.take_slice(len + 1)?; + } + EXT32_MARKER => { + self.pos += 1; + let bytes = self.take_array::<4>()?; + let len = u32::from_be_bytes(*bytes) as usize; + self.take_slice(len + 1)?; + } + _ => return Err(Error::InvalidMarker(byte)), + } + self.decrement_depth(); + Ok(()) + } } #[cfg(feature = "std")] @@ -1502,4 +1644,131 @@ impl<'de, R: std::io::Read> Read<'de> for IOReader { _ => Err(Error::InvalidMarker(byte)), } } + + fn skip_value(&mut self) -> Result<()> { + self.increment_depth()?; + let byte = self.read_byte()?; + match byte { + POS_FIXINT_START..=POS_FIXINT_END | NEG_FIXINT_START..=NEG_FIXINT_END => {} + NIL_MARKER | TRUE_MARKER | FALSE_MARKER => {} + UINT8_MARKER | INT8_MARKER => { + let mut buf = [0u8; 1]; + self.read_exact(&mut buf)?; + } + UINT16_MARKER | INT16_MARKER => { + let mut buf = [0u8; 2]; + self.read_exact(&mut buf)?; + } + UINT32_MARKER | INT32_MARKER | FLOAT32_MARKER => { + let mut buf = [0u8; 4]; + self.read_exact(&mut buf)?; + } + UINT64_MARKER | INT64_MARKER | FLOAT64_MARKER => { + let mut buf = [0u8; 8]; + self.read_exact(&mut buf)?; + } + FIXSTR_START..=FIXSTR_END => { + let len = (byte - FIXSTR_START) as usize; + let _ = self.read_exact_vec(len)?; + } + STR8_MARKER | BIN8_MARKER => { + let mut buf = [0u8; 1]; + self.read_exact(&mut buf)?; + let len = buf[0] as usize; + let _ = self.read_exact_vec(len)?; + } + STR16_MARKER | BIN16_MARKER => { + let mut buf = [0u8; 2]; + self.read_exact(&mut buf)?; + let len = u16::from_be_bytes(buf) as usize; + let _ = self.read_exact_vec(len)?; + } + STR32_MARKER | BIN32_MARKER => { + let mut buf = [0u8; 4]; + self.read_exact(&mut buf)?; + let len = u32::from_be_bytes(buf) as usize; + let _ = self.read_exact_vec(len)?; + } + FIXARRAY_START..=FIXARRAY_END => { + let len = (byte - FIXARRAY_START) as usize; + for _ in 0..len { + self.skip_value()?; + } + } + ARRAY16_MARKER => { + let mut buf = [0u8; 2]; + self.read_exact(&mut buf)?; + let len = u16::from_be_bytes(buf) as usize; + for _ in 0..len { + self.skip_value()?; + } + } + ARRAY32_MARKER => { + let mut buf = [0u8; 4]; + self.read_exact(&mut buf)?; + let len = u32::from_be_bytes(buf) as usize; + for _ in 0..len { + self.skip_value()?; + } + } + FIXMAP_START..=FIXMAP_END => { + let len = (byte - FIXMAP_START) as usize; + for _ in 0..(len * 2) { + self.skip_value()?; + } + } + MAP16_MARKER => { + let mut buf = [0u8; 2]; + self.read_exact(&mut buf)?; + let len = u16::from_be_bytes(buf) as usize; + for _ in 0..(len * 2) { + self.skip_value()?; + } + } + MAP32_MARKER => { + let mut buf = [0u8; 4]; + self.read_exact(&mut buf)?; + let len = u32::from_be_bytes(buf) as usize; + for _ in 0..(len * 2) { + self.skip_value()?; + } + } + FIXEXT1_MARKER => { + let _ = self.read_exact_vec(2)?; + } + FIXEXT2_MARKER => { + let _ = self.read_exact_vec(3)?; + } + FIXEXT4_MARKER => { + let _ = self.read_exact_vec(5)?; + } + FIXEXT8_MARKER => { + let _ = self.read_exact_vec(9)?; + } + FIXEXT16_MARKER => { + let _ = self.read_exact_vec(17)?; + } + EXT8_MARKER => { + let mut buf = [0u8; 1]; + self.read_exact(&mut buf)?; + let len = buf[0] as usize; + let _ = self.read_exact_vec(len + 1)?; + } + EXT16_MARKER => { + let mut buf = [0u8; 2]; + self.read_exact(&mut buf)?; + let len = u16::from_be_bytes(buf) as usize; + let _ = self.read_exact_vec(len + 1)?; + } + EXT32_MARKER => { + let mut buf = [0u8; 4]; + self.read_exact(&mut buf)?; + let len = u32::from_be_bytes(buf) as usize; + let _ = self.read_exact_vec(len + 1)?; + } + _ => return Err(Error::InvalidMarker(byte)), + } + self.decrement_depth(); + Ok(()) + } } diff --git a/zerompk_derive/src/lib.rs b/zerompk_derive/src/lib.rs index 0e5d2bd..3be84d2 100644 --- a/zerompk_derive/src/lib.rs +++ b/zerompk_derive/src/lib.rs @@ -355,12 +355,16 @@ struct FieldConfig { key: Option, ignore: bool, as_bytes: Option, + default: bool, + default_path: Option, } fn parse_field_config(field: &Field) -> Result { let mut key: Option = None; let mut ignore = false; let mut as_bytes: Option = None; + let mut default = false; + let mut default_path: Option = None; for attr in &field.attrs { if !attr.path().is_ident("msgpack") { @@ -403,11 +407,27 @@ fn parse_field_config(field: &Field) -> Result { } }); Ok(()) + } else if meta.path.is_ident("default") { + if default { + return Err(meta.error("duplicate `default` attribute")); + } + default = true; + if meta.input.peek(syn::Token![=]) { + let value = meta.value()?; + let lit: Lit = value.parse()?; + match lit { + Lit::Str(s) => { + default_path = Some(s.parse()?); + } + _ => return Err(meta.error("`default = ...` must be a string path")), + } + } + Ok(()) } else if meta.path.is_ident("array") || meta.path.is_ident("map") { Err(meta.error("field-level msgpack attribute does not support `array/map`")) } else { Err(meta - .error("field-level msgpack attribute supports only `key = ...`, `ignore`, or `as_bytes = true/false`")) + .error("field-level msgpack attribute supports only `key = ...`, `ignore`, `default`, or `as_bytes = true/false`")) } })?; } @@ -437,6 +457,8 @@ fn parse_field_config(field: &Field) -> Result { key, ignore, as_bytes, + default, + default_path, }) } @@ -881,6 +903,37 @@ fn expand_array_struct(data: &DataStruct) -> Result { .collect::>()?; let field_index_by_slot = build_named_array_slots(fields, &field_configs)?; + // In array mode, `#[msgpack(default)]` is only safe at trailing + // positions: anything after a defaulted slot shifts on evolution. + // Reject mid-struct defaults; user must move the field to the end + // or add `#[msgpack(map)]`. + { + let mut saw_default = false; + for slot in field_index_by_slot.iter() { + match slot { + Some(i) => { + let cfg = &field_configs[*i]; + if cfg.default { + saw_default = true; + } else if saw_default { + return Err(syn::Error::new( + fields.named[*i].span(), + "`#[msgpack(default)]` in array mode must be trailing; move this field to the end or use `#[msgpack(map)]`", + )); + } + } + None => { + if saw_default { + return Err(syn::Error::new( + fields.named.span(), + "`#[msgpack(default)]` in array mode must be trailing; use `#[msgpack(map)]`", + )); + } + } + } + } + } + let array_len = field_index_by_slot.len(); let is_dense_sequential = field_index_by_slot.len() == names.len() && field_index_by_slot @@ -934,25 +987,85 @@ fn expand_array_struct(data: &DataStruct) -> Result { Ok(()) }; - let read = if is_dense_sequential { - let direct_fields: Vec<_> = names + let any_field_has_default = field_configs.iter().any(|c| c.default); + + let read = if !any_field_has_default { + // Strict path: bit-identical to upstream 0.4.1. + if is_dense_sequential { + let direct_fields: Vec<_> = names + .iter() + .zip(tys.iter()) + .zip(field_configs.iter()) + .map(|((name, ty), cfg)| { + let read_expr = build_read_expr(ty, Some(cfg)); + quote! { #name: #read_expr } + }) + .collect(); + + quote! { + reader.check_array_len(#array_len)?; + Ok(Self { #( #direct_fields ),* }) + } + } else { + quote! { + reader.check_array_len(#array_len)?; + #( #read_slots )* + Ok(Self { #( #init_fields ),* }) + } + } + } else { + // Tolerant array path: accepts arrays shorter than the full schema, + // defaulting any trailing slots whose fields are marked + // `#[msgpack(default)]`. Arrays longer than expected are also + // tolerated — trailing extra values are skipped. + let tolerant_slot_reads: Vec<_> = field_index_by_slot .iter() - .zip(tys.iter()) - .zip(field_configs.iter()) - .map(|((name, ty), cfg)| { - let read_expr = build_read_expr(ty, Some(cfg)); - quote! { #name: #read_expr } + .enumerate() + .map(|(slot_idx, slot)| match slot { + Some(i) => { + let name = &names[*i]; + let ty = &tys[*i]; + let cfg = &field_configs[*i]; + let read_expr = build_read_expr(ty, Some(cfg)); + let default_expr = if cfg.default { + if let Some(path) = &cfg.default_path { + quote! { #path() } + } else { + quote! { <#ty as ::core::default::Default>::default() } + } + } else { + quote! { + return Err(::zerompk::Error::ArrayLengthMismatch { + expected: #array_len, + actual: __array_len, + }) + } + }; + quote! { + let #name: #ty = if #slot_idx < __array_len { + #read_expr + } else { + #default_expr + }; + } + } + None => quote! { + if #slot_idx < __array_len { + reader.read_nil()?; + } + }, }) .collect(); quote! { - reader.check_array_len(#array_len)?; - Ok(Self { #( #direct_fields ),* }) - } - } else { - quote! { - reader.check_array_len(#array_len)?; - #( #read_slots )* + let __array_len = reader.read_array_len()?; + #( #tolerant_slot_reads )* + // Skip any trailing values the writer emitted beyond our schema. + if __array_len > #array_len { + for _ in #array_len..__array_len { + reader.skip_value()?; + } + } Ok(Self { #( #init_fields ),* }) } }; @@ -1166,30 +1279,91 @@ fn expand_map_struct(data: &DataStruct) -> Result { Ok(()) }; - let read = quote! { - reader.check_map_len(#count)?; + let any_field_has_default = field_configs.iter().any(|c| c.default); - #( let mut #slots: ::core::option::Option<#tys> = ::core::option::Option::None; )* + let read = if !any_field_has_default { + // Strict path: preserves 0.4.1 fast-fail behavior bit-for-bit. + quote! { + reader.check_map_len(#count)?; - #[allow(clippy::reversed_empty_ranges)] - for _ in 0..#count { - let __key_bytes = reader.read_string_bytes()?; - let __key_bytes = __key_bytes.as_ref(); - let __key_index = (|| -> ::zerompk::Result { - #key_dispatch - })()?; + #( let mut #slots: ::core::option::Option<#tys> = ::core::option::Option::None; )* - match __key_index { - #( #read_value_arms )* - _ => unreachable!(), + #[allow(clippy::reversed_empty_ranges)] + for _ in 0..#count { + let __key_bytes = reader.read_string_bytes()?; + let __key_bytes = __key_bytes.as_ref(); + let __key_index = (|| -> ::zerompk::Result { + #key_dispatch + })()?; + + match __key_index { + #( #read_value_arms )* + _ => unreachable!(), + } } + + #( + let #names = #slots.ok_or_else(|| ::zerompk::Error::KeyNotFound(#key_lits.into()))?; + )* + + Ok(Self { #( #init_fields ),* }) } + } else { + // Tolerant path: struct opted into evolution via per-field `default`. + // Accepts maps of any length, skips unknown keys, fills missing keys + // with their declared default. + let unknown_arm = quote! { _ => { reader.skip_value()?; } }; - #( - let #names = #slots.ok_or_else(|| ::zerompk::Error::KeyNotFound(#key_lits.into()))?; - )* + let key_dispatch_tolerant = quote! { + let __matched_idx: usize = (|| -> ::zerompk::Result { + #key_dispatch + })().unwrap_or(usize::MAX); + }; - Ok(Self { #( #init_fields ),* }) + let slot_finalize: Vec<_> = (0..count) + .map(|idx| { + let name = &names[idx]; + let slot = &slots[idx]; + let key_name = &key_lits[idx]; + let ty = &tys[idx]; + let cfg = &field_configs[field_indices[idx]]; + if cfg.default { + let default_expr = if let Some(path) = &cfg.default_path { + quote! { #path() } + } else { + quote! { <#ty as ::core::default::Default>::default() } + }; + quote! { + let #name = #slot.unwrap_or_else(|| #default_expr); + } + } else { + quote! { + let #name = #slot.ok_or_else(|| ::zerompk::Error::KeyNotFound(#key_name.into()))?; + } + } + }) + .collect(); + + quote! { + let __map_len = reader.read_map_len()?; + + #( let mut #slots: ::core::option::Option<#tys> = ::core::option::Option::None; )* + + for _ in 0..__map_len { + let __key_bytes = reader.read_string_bytes()?; + let __key_bytes = __key_bytes.as_ref(); + #key_dispatch_tolerant + + match __matched_idx { + #( #read_value_arms )* + #unknown_arm + } + } + + #( #slot_finalize )* + + Ok(Self { #( #init_fields ),* }) + } }; Ok(ImplBody { write, read }) From 4036bb9d79a070533e5ba80e9cf680d0c3add703 Mon Sep 17 00:00:00 2001 From: Farhan Syah Date: Wed, 29 Apr 2026 11:52:46 +0800 Subject: [PATCH 2/5] refactor(derive): separate `default` and `allow_unknown_fields` into orthogonal attributes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously, any field annotated with `#[msgpack(default)]` silently flipped the entire map-mode decoder into tolerating unknown keys — two unrelated concerns bundled into one attribute. Split them: - `#[msgpack(default)]` fills missing keys only; unknown keys still error, with the offending key surfaced in `KeyNotFound`. - `#[msgpack(allow_unknown_fields)]` (struct-level) skips unknown keys only; fields without `default` are still required. - The two attributes compose independently for full schema evolution. Tightened additional edges: - `#[msgpack(default)]` in array mode is now a compile error (arrays have no field names; silently accepting shorter arrays hides corruption). - `#[msgpack(default)]` on enum-variant fields is now rejected (was a silent no-op). - `allow_unknown_fields` on array-mode structs and on enums is rejected with a clear diagnostic. - Strict-mode codegen is unchanged byte-for-byte. Add nine integration tests in `zerompk/tests/schema_evolution.rs` covering each combination of the two attributes (the feature previously had no tests). Update README to document both attributes and correct the Security section's "always strict" claim. --- README.md | 55 +++++++- fuzz/Cargo.lock | 4 +- zerompk/tests/schema_evolution.rs | 196 ++++++++++++++++++++++++++++ zerompk_derive/src/lib.rs | 210 ++++++++++++++---------------- 4 files changed, 350 insertions(+), 115 deletions(-) create mode 100644 zerompk/tests/schema_evolution.rs diff --git a/README.md b/README.md index f9758f8..2932422 100644 --- a/README.md +++ b/README.md @@ -150,6 +150,57 @@ struct Foo<'a> { } ``` +### default + +Marks a field as recoverable when its key is missing from the wire payload. On read, a missing key is filled with `Default::default()`, or with the result of a named function if `default = "path"` is supplied. Only supported on `#[msgpack(map)]` structs — array representation has no field names, so missing values cannot be detected safely. + +```rust +fn default_age() -> u32 { 18 } + +#[derive(FromMessagePack, ToMessagePack)] +#[msgpack(map)] +pub struct Person { + pub name: String, + + #[msgpack(default)] + pub nickname: Option, + + #[msgpack(default = "default_age")] + pub age: u32, +} +``` + +`default` only controls missing keys. Unknown keys still cause a decode error unless `allow_unknown_fields` is also set on the struct. + +### allow_unknown_fields + +Struct-level opt-in that tells the decoder to skip keys it does not recognize instead of erroring. Only meaningful on `#[msgpack(map)]` structs. + +```rust +#[derive(FromMessagePack, ToMessagePack)] +#[msgpack(map, allow_unknown_fields)] +pub struct Person { + pub name: String, + pub age: u32, +} +``` + +`default` and `allow_unknown_fields` are orthogonal. Combine them for full forward/backward compatibility: + +```rust +#[derive(FromMessagePack, ToMessagePack)] +#[msgpack(map, allow_unknown_fields)] +pub struct Person { + pub name: String, + + #[msgpack(default)] + pub age: u32, +} +``` + +> [!NOTE] +> Both attributes are opt-in. Structs that use neither preserve zerompk's strict-by-default decoding behavior with byte-identical codegen. + ## Design Philosophy The most popular MessagePack serializer, [rmp](https://github.com/3Hren/msgpack-rust), is highly optimized, but zerompk is designed with an even greater focus on performance. @@ -552,7 +603,9 @@ Many of these optimizations are inspired by the high-performance MessagePack ser ## Security -zerompk always requires strict type schemas for serialization/deserialization, making it almost safe against untrusted binaries. Additionally, zerompk implements measures against the following attacks: +By default, zerompk requires strict type schemas for serialization/deserialization (every declared field must appear; no unknown keys are tolerated), making it almost safe against untrusted binaries. Schema evolution is opt-in per struct via `#[msgpack(default)]` and `#[msgpack(allow_unknown_fields)]`; structs that opt in relax these checks intentionally and should be paired with application-level validation when the input is untrusted. + +Additionally, zerompk implements measures against the following attacks: - Stack overflow caused by excessive object nesting. zerompk rejects objects nested beyond `MAX_DEPTH = 500` and returns an error. - Memory consumption due to large size headers. zerompk validates header sizes before memory allocation and returns an error if the buffer is insufficient. diff --git a/fuzz/Cargo.lock b/fuzz/Cargo.lock index 6c979ec..3410a74 100644 --- a/fuzz/Cargo.lock +++ b/fuzz/Cargo.lock @@ -157,14 +157,14 @@ checksum = "d7249219f66ced02969388cf2bb044a09756a083d0fab1e566056b04d9fbcaa5" [[package]] name = "zerompk" -version = "0.3.2" +version = "0.4.2" dependencies = [ "zerompk_derive", ] [[package]] name = "zerompk_derive" -version = "0.3.2" +version = "0.4.2" dependencies = [ "proc-macro2", "quote", diff --git a/zerompk/tests/schema_evolution.rs b/zerompk/tests/schema_evolution.rs new file mode 100644 index 0000000..3a718ee --- /dev/null +++ b/zerompk/tests/schema_evolution.rs @@ -0,0 +1,196 @@ +//! Tests for `#[msgpack(default)]` (fill missing keys) and +//! `#[msgpack(allow_unknown_fields)]` (skip unknown keys), plus their +//! interaction. These two opt-ins are intentionally orthogonal. + +use zerompk::{FromMessagePack, ToMessagePack}; +use zerompk_derive::{FromMessagePack, ToMessagePack}; + +fn encode(value: &T) -> Vec { + zerompk::to_msgpack_vec(value).unwrap() +} + +fn decode<'a, T: FromMessagePack<'a>>(bytes: &'a [u8]) -> Result { + zerompk::from_msgpack(bytes) +} + +// --------------------------------------------------------------------------- +// V1 schema (writer side): the "old" version of a message. +// --------------------------------------------------------------------------- + +#[derive(ToMessagePack, FromMessagePack, Debug, PartialEq)] +#[msgpack(map)] +struct V1 { + a: i32, + b: i32, +} + +// --------------------------------------------------------------------------- +// `default` only: fill missing keys, but unknown keys must still error. +// This is the "I added a new field" evolution direction. +// --------------------------------------------------------------------------- + +#[derive(FromMessagePack, Debug, PartialEq)] +#[msgpack(map)] +struct V2DefaultsOnly { + a: i32, + b: i32, + #[msgpack(default)] + c: i32, +} + +#[test] +fn defaults_fill_missing_keys() { + let v1 = V1 { a: 1, b: 2 }; + let bytes = encode(&v1); + let v2: V2DefaultsOnly = decode(&bytes).unwrap(); + assert_eq!(v2, V2DefaultsOnly { a: 1, b: 2, c: 0 }); +} + +#[test] +fn defaults_alone_still_reject_unknown_keys() { + // Writer emits an extra unknown key `z`; reader has defaults but not + // `allow_unknown_fields`. Decode must fail loudly. + #[derive(ToMessagePack)] + #[msgpack(map)] + struct WithExtra { + a: i32, + b: i32, + z: i32, + } + let bytes = encode(&WithExtra { a: 1, b: 2, z: 99 }); + let err = decode::(&bytes).unwrap_err(); + match err { + zerompk::Error::KeyNotFound(k) => assert_eq!(k, "z"), + other => panic!("expected KeyNotFound, got {other:?}"), + } +} + +// --------------------------------------------------------------------------- +// `allow_unknown_fields` only: skip unknown keys, but missing keys must +// still error. This is the "I removed a field" evolution direction. +// --------------------------------------------------------------------------- + +#[derive(FromMessagePack, Debug, PartialEq)] +#[msgpack(map, allow_unknown_fields)] +struct V0AllowUnknownOnly { + a: i32, +} + +#[test] +fn allow_unknown_skips_extra_keys() { + let v1 = V1 { a: 1, b: 2 }; + let bytes = encode(&v1); + let v0: V0AllowUnknownOnly = decode(&bytes).unwrap(); + assert_eq!(v0, V0AllowUnknownOnly { a: 1 }); +} + +#[test] +fn allow_unknown_alone_still_requires_all_keys() { + // Writer is missing key `a`; reader allows unknowns but `a` has no default. + #[derive(ToMessagePack)] + #[msgpack(map)] + struct OnlyB { + b: i32, + } + let bytes = encode(&OnlyB { b: 5 }); + let err = decode::(&bytes).unwrap_err(); + assert!(matches!(err, zerompk::Error::KeyNotFound(_))); +} + +// --------------------------------------------------------------------------- +// Both: full schema evolution — accept missing keys (defaulted) and skip +// unknown keys. +// --------------------------------------------------------------------------- + +#[derive(FromMessagePack, Debug, PartialEq)] +#[msgpack(map, allow_unknown_fields)] +struct VFull { + a: i32, + #[msgpack(default)] + new_field: i32, +} + +#[test] +fn both_modes_compose() { + // V1 has `b` (unknown to VFull) and lacks `new_field` (defaulted). + let v1 = V1 { a: 7, b: 2 }; + let bytes = encode(&v1); + let v: VFull = decode(&bytes).unwrap(); + assert_eq!(v, VFull { a: 7, new_field: 0 }); +} + +// --------------------------------------------------------------------------- +// `default = "path"` form. +// --------------------------------------------------------------------------- + +fn forty_two() -> i32 { + 42 +} + +#[derive(FromMessagePack, Debug, PartialEq)] +#[msgpack(map)] +struct V2DefaultPath { + a: i32, + b: i32, + #[msgpack(default = "forty_two")] + c: i32, +} + +#[test] +fn default_path_invokes_named_function() { + let bytes = encode(&V1 { a: 1, b: 2 }); + let v: V2DefaultPath = decode(&bytes).unwrap(); + assert_eq!(v, V2DefaultPath { a: 1, b: 2, c: 42 }); +} + +// --------------------------------------------------------------------------- +// Strict-by-default: an untouched struct still rejects missing keys and +// extra keys. This guards against the strict-mode codegen regressing. +// --------------------------------------------------------------------------- + +#[test] +fn strict_default_rejects_missing_key() { + #[derive(ToMessagePack)] + #[msgpack(map)] + struct OnlyA { + a: i32, + } + let bytes = encode(&OnlyA { a: 1 }); + let err = decode::(&bytes).unwrap_err(); + // 0.4.1 strict path uses check_map_len, which surfaces a length error. + assert!(matches!(err, zerompk::Error::MapLengthMismatch { .. })); +} + +#[test] +fn strict_default_rejects_extra_key() { + #[derive(ToMessagePack)] + #[msgpack(map)] + struct WithExtra { + a: i32, + b: i32, + c: i32, + } + let bytes = encode(&WithExtra { a: 1, b: 2, c: 3 }); + let err = decode::(&bytes).unwrap_err(); + assert!(matches!(err, zerompk::Error::MapLengthMismatch { .. })); +} + +// --------------------------------------------------------------------------- +// Round-trip: writer emits all declared fields, so V2-encoded → V2-decoded +// preserves values regardless of mode. +// --------------------------------------------------------------------------- + +#[test] +fn round_trip_preserves_values() { + #[derive(ToMessagePack, FromMessagePack, Debug, PartialEq)] + #[msgpack(map, allow_unknown_fields)] + struct V { + a: i32, + #[msgpack(default)] + b: i32, + } + let original = V { a: 10, b: 20 }; + let bytes = encode(&original); + let decoded: V = decode(&bytes).unwrap(); + assert_eq!(original, decoded); +} diff --git a/zerompk_derive/src/lib.rs b/zerompk_derive/src/lib.rs index ecdf4bb..f0d355d 100644 --- a/zerompk_derive/src/lib.rs +++ b/zerompk_derive/src/lib.rs @@ -42,11 +42,13 @@ enum Repr { struct TypeConfig { repr: Option, c_enum: bool, + allow_unknown_fields: bool, } fn parse_type_config_from_attrs(attrs: &[syn::Attribute]) -> Result { let mut repr = None; let mut c_enum = false; + let mut allow_unknown_fields = false; for attr in attrs { if !attr.path().is_ident("msgpack") { @@ -75,13 +77,25 @@ fn parse_type_config_from_attrs(attrs: &[syn::Attribute]) -> Result } c_enum = true; Ok(()) + } else if meta.path.is_ident("allow_unknown_fields") { + if allow_unknown_fields { + return Err(meta.error("duplicate `allow_unknown_fields` attribute")); + } + allow_unknown_fields = true; + Ok(()) } else { - Err(meta.error("expected `array`, `map`, `c_enum`, or `key = ...`")) + Err(meta.error( + "expected `array`, `map`, `c_enum`, `allow_unknown_fields`, or `key = ...`", + )) } })?; } - Ok(TypeConfig { repr, c_enum }) + Ok(TypeConfig { + repr, + c_enum, + allow_unknown_fields, + }) } fn add_trait_bounds(mut generics: Generics, kind: DeriveKind) -> Generics { @@ -811,9 +825,15 @@ fn expand(input: DeriveInput, kind: DeriveKind) -> Result expand_array_struct(&data)?, - Repr::Map => expand_map_struct(&data)?, + Repr::Map => expand_map_struct(&data, type_cfg.allow_unknown_fields)?, } } Data::Enum(data) => { @@ -823,6 +843,12 @@ fn expand(input: DeriveInput, kind: DeriveKind) -> Result Result { .collect::>()?; let field_index_by_slot = build_named_array_slots(fields, &field_configs)?; - // In array mode, `#[msgpack(default)]` is only safe at trailing - // positions: anything after a defaulted slot shifts on evolution. - // Reject mid-struct defaults; user must move the field to the end - // or add `#[msgpack(map)]`. - { - let mut saw_default = false; - for slot in field_index_by_slot.iter() { - match slot { - Some(i) => { - let cfg = &field_configs[*i]; - if cfg.default { - saw_default = true; - } else if saw_default { - return Err(syn::Error::new( - fields.named[*i].span(), - "`#[msgpack(default)]` in array mode must be trailing; move this field to the end or use `#[msgpack(map)]`", - )); - } - } - None => { - if saw_default { - return Err(syn::Error::new( - fields.named.span(), - "`#[msgpack(default)]` in array mode must be trailing; use `#[msgpack(map)]`", - )); - } - } - } + // `#[msgpack(default)]` is only honored in map mode. Arrays have + // no field names, so silently accepting shorter/longer arrays + // hides corruption rather than evolving schema. Force the user + // to opt into map representation explicitly. + for (i, cfg) in field_configs.iter().enumerate() { + if cfg.default { + return Err(syn::Error::new( + fields.named[i].span(), + "`#[msgpack(default)]` is only supported with `#[msgpack(map)]`; array representation has no field names so missing values cannot be detected safely", + )); } } @@ -987,85 +995,25 @@ fn expand_array_struct(data: &DataStruct) -> Result { Ok(()) }; - let any_field_has_default = field_configs.iter().any(|c| c.default); - - let read = if !any_field_has_default { - // Strict path: bit-identical to upstream 0.4.1. - if is_dense_sequential { - let direct_fields: Vec<_> = names - .iter() - .zip(tys.iter()) - .zip(field_configs.iter()) - .map(|((name, ty), cfg)| { - let read_expr = build_read_expr(ty, Some(cfg)); - quote! { #name: #read_expr } - }) - .collect(); - - quote! { - reader.check_array_len(#array_len)?; - Ok(Self { #( #direct_fields ),* }) - } - } else { - quote! { - reader.check_array_len(#array_len)?; - #( #read_slots )* - Ok(Self { #( #init_fields ),* }) - } - } - } else { - // Tolerant array path: accepts arrays shorter than the full schema, - // defaulting any trailing slots whose fields are marked - // `#[msgpack(default)]`. Arrays longer than expected are also - // tolerated — trailing extra values are skipped. - let tolerant_slot_reads: Vec<_> = field_index_by_slot + let read = if is_dense_sequential { + let direct_fields: Vec<_> = names .iter() - .enumerate() - .map(|(slot_idx, slot)| match slot { - Some(i) => { - let name = &names[*i]; - let ty = &tys[*i]; - let cfg = &field_configs[*i]; - let read_expr = build_read_expr(ty, Some(cfg)); - let default_expr = if cfg.default { - if let Some(path) = &cfg.default_path { - quote! { #path() } - } else { - quote! { <#ty as ::core::default::Default>::default() } - } - } else { - quote! { - return Err(::zerompk::Error::ArrayLengthMismatch { - expected: #array_len, - actual: __array_len, - }) - } - }; - quote! { - let #name: #ty = if #slot_idx < __array_len { - #read_expr - } else { - #default_expr - }; - } - } - None => quote! { - if #slot_idx < __array_len { - reader.read_nil()?; - } - }, + .zip(tys.iter()) + .zip(field_configs.iter()) + .map(|((name, ty), cfg)| { + let read_expr = build_read_expr(ty, Some(cfg)); + quote! { #name: #read_expr } }) .collect(); quote! { - let __array_len = reader.read_array_len()?; - #( #tolerant_slot_reads )* - // Skip any trailing values the writer emitted beyond our schema. - if __array_len > #array_len { - for _ in #array_len..__array_len { - reader.skip_value()?; - } - } + reader.check_array_len(#array_len)?; + Ok(Self { #( #direct_fields ),* }) + } + } else { + quote! { + reader.check_array_len(#array_len)?; + #( #read_slots )* Ok(Self { #( #init_fields ),* }) } }; @@ -1196,7 +1144,7 @@ fn expand_array_struct(data: &DataStruct) -> Result { } } -fn expand_map_struct(data: &DataStruct) -> Result { +fn expand_map_struct(data: &DataStruct, allow_unknown_fields: bool) -> Result { let fields = match &data.fields { Fields::Named(fields) => fields, Fields::Unnamed(_) | Fields::Unit => { @@ -1281,8 +1229,19 @@ fn expand_map_struct(data: &DataStruct) -> Result { let any_field_has_default = field_configs.iter().any(|c| c.default); - let read = if !any_field_has_default { - // Strict path: preserves 0.4.1 fast-fail behavior bit-for-bit. + // Three decoding modes, controlled by orthogonal opt-ins: + // + // defaults unknown decoder behavior + // -------- ------------- ----------------------------------------- + // no deny (default) check_map_len(N), every key required + // yes deny read_map_len, fill missing, error on unknown + // no allow read_map_len, every key required, skip unknown + // yes allow read_map_len, fill missing, skip unknown + // + // Strict mode preserves 0.4.1 codegen byte-for-byte. The other two modes + // share one tolerant skeleton parameterized by what to do with missing + // keys (default vs error) and unknown keys (skip vs error). + let read = if !any_field_has_default && !allow_unknown_fields { quote! { '__zerompk_read_map: { reader.check_map_len(#count)?; @@ -1311,10 +1270,19 @@ fn expand_map_struct(data: &DataStruct) -> Result { } } } else { - // Tolerant path: struct opted into evolution via per-field `default`. - // Accepts maps of any length, skips unknown keys, fills missing keys - // with their declared default. - let unknown_arm = quote! { _ => { reader.skip_value()?; } }; + let unknown_arm = if allow_unknown_fields { + quote! { _ => { reader.skip_value()?; } } + } else { + // Surface the offending key so users can diagnose schema drift. + quote! { + _ => { + let __key_str = ::core::str::from_utf8(__key_bytes) + .unwrap_or("") + .to_string(); + break '__zerompk_read_map Err(::zerompk::Error::KeyNotFound(__key_str)); + } + } + }; let key_dispatch_tolerant = quote! { let __matched_idx: usize = (|| -> ::zerompk::Result { @@ -1568,6 +1536,24 @@ fn build_enum_variant_payload( )> { let v_ident = &variant.ident; + // `#[msgpack(default)]` on enum-variant fields is currently a no-op in + // codegen — silently accepting it would let users write code that looks + // like it does schema evolution but doesn't. Reject loudly. + let variant_field_iter: Box> = match &variant.fields { + Fields::Named(f) => Box::new(f.named.iter()), + Fields::Unnamed(f) => Box::new(f.unnamed.iter()), + Fields::Unit => Box::new(std::iter::empty()), + }; + for field in variant_field_iter { + let fc = parse_field_config(field)?; + if fc.default { + return Err(syn::Error::new( + field.span(), + "`#[msgpack(default)]` is not supported on enum-variant fields", + )); + } + } + match &variant.fields { Fields::Unit => { if cfg.repr.is_some() { From 72734dade7035a1fa051e2129575a7e6e3c4eba8 Mon Sep 17 00:00:00 2001 From: Farhan Syah Date: Fri, 1 May 2026 14:16:50 +0800 Subject: [PATCH 3/5] fix(tests): remove duplicate macro import in schema_evolution tests The derive macros are already re-exported via the zerompk crate, making the explicit zerompk_derive import redundant and causing a CI failure on PR #37. --- zerompk/tests/schema_evolution.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/zerompk/tests/schema_evolution.rs b/zerompk/tests/schema_evolution.rs index 3a718ee..2a6af0b 100644 --- a/zerompk/tests/schema_evolution.rs +++ b/zerompk/tests/schema_evolution.rs @@ -3,7 +3,6 @@ //! interaction. These two opt-ins are intentionally orthogonal. use zerompk::{FromMessagePack, ToMessagePack}; -use zerompk_derive::{FromMessagePack, ToMessagePack}; fn encode(value: &T) -> Vec { zerompk::to_msgpack_vec(value).unwrap() From ec299232255fd06b1f1d907d36f90b053b88b97e Mon Sep 17 00:00:00 2001 From: Farhan Syah Date: Sun, 3 May 2026 13:27:02 +0800 Subject: [PATCH 4/5] Fix schema evolution derive imports --- zerompk/tests/schema_evolution.rs | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/zerompk/tests/schema_evolution.rs b/zerompk/tests/schema_evolution.rs index 2a6af0b..b78a509 100644 --- a/zerompk/tests/schema_evolution.rs +++ b/zerompk/tests/schema_evolution.rs @@ -3,6 +3,9 @@ //! interaction. These two opt-ins are intentionally orthogonal. use zerompk::{FromMessagePack, ToMessagePack}; +use zerompk_derive::{ + FromMessagePack as DeriveFromMessagePack, ToMessagePack as DeriveToMessagePack, +}; fn encode(value: &T) -> Vec { zerompk::to_msgpack_vec(value).unwrap() @@ -16,7 +19,7 @@ fn decode<'a, T: FromMessagePack<'a>>(bytes: &'a [u8]) -> Result i32 { 42 } -#[derive(FromMessagePack, Debug, PartialEq)] +#[derive(DeriveFromMessagePack, Debug, PartialEq)] #[msgpack(map)] struct V2DefaultPath { a: i32, @@ -149,7 +152,7 @@ fn default_path_invokes_named_function() { #[test] fn strict_default_rejects_missing_key() { - #[derive(ToMessagePack)] + #[derive(DeriveToMessagePack)] #[msgpack(map)] struct OnlyA { a: i32, @@ -162,7 +165,7 @@ fn strict_default_rejects_missing_key() { #[test] fn strict_default_rejects_extra_key() { - #[derive(ToMessagePack)] + #[derive(DeriveToMessagePack)] #[msgpack(map)] struct WithExtra { a: i32, @@ -181,7 +184,7 @@ fn strict_default_rejects_extra_key() { #[test] fn round_trip_preserves_values() { - #[derive(ToMessagePack, FromMessagePack, Debug, PartialEq)] + #[derive(DeriveToMessagePack, DeriveFromMessagePack, Debug, PartialEq)] #[msgpack(map, allow_unknown_fields)] struct V { a: i32, From 968938b4b4ec0add4d77e695e8adc15c5739f933 Mon Sep 17 00:00:00 2001 From: nuskey8 Date: Sat, 9 May 2026 15:29:02 +0900 Subject: [PATCH 5/5] update: README_JA --- README_JA.md | 55 +++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 54 insertions(+), 1 deletion(-) diff --git a/README_JA.md b/README_JA.md index c5fd4b3..36d3f73 100644 --- a/README_JA.md +++ b/README_JA.md @@ -150,6 +150,59 @@ struct Foo<'a> { } ``` +### default + +デシリアライズにキーが存在しない場合に、そのフィールドをデフォルト値で置き換えます。欠損したキーは `Default::default()` で埋められるか、`default = "path"` が指定されている場合は名前付き関数の結果で埋められます。 + +これは`#[msgpack(map)]` でのみサポートされています。(配列にはフィールド名がないため、欠落した値を安全に検出できません) + +```rust +fn default_age() -> u32 { 18 } + +#[derive(FromMessagePack, ToMessagePack)] +#[msgpack(map)] +pub struct Person { + pub name: String, + + #[msgpack(default)] + pub nickname: Option, + + #[msgpack(default = "default_age")] + pub age: u32, +} +``` + +`default` は欠損したキーのみに対応します。`allow_unknown_fields` が設定されていない限り、不明なキーが含まれる場合はエラーとなります。 + +### allow_unknown_fields + +デシリアライズ時に不明なキーがあった場合、エラーにする代わりにスキップするように変更します。これは`#[msgpack(map)]`でのみ有効です。 + +```rust +#[derive(FromMessagePack, ToMessagePack)] +#[msgpack(map, allow_unknown_fields)] +pub struct Person { + pub name: String, + pub age: u32, +} +``` + +完全な前方互換性と後方互換性を確保するには、`default`と`allow_unknown_fields`を組み合わせて使用します。 + +```rust +#[derive(FromMessagePack, ToMessagePack)] +#[msgpack(map, allow_unknown_fields)] +pub struct Person { + pub name: String, + + #[msgpack(default)] + pub age: u32, +} +``` + +> [!NOTE] +> これらの属性はオプトインです。デフォルトではzerompkは厳密なスキーマの一致を要求します。 + ## 設計哲学 最もメジャーなMessagePackシリアライザである[rmp](https://github.com/3Hren/msgpack-rust)は十分に最適化されていますが、zerompkはそれ以上にパフォーマンスに注力した設計になっています。 @@ -561,4 +614,4 @@ zerompkはシリアライズ/デシリアライズに対して常に厳格な型 ## ライセンス -このライブラリは[MIT License](LICENSE)の下で公開されています。 \ No newline at end of file +このライブラリは[MIT License](LICENSE)の下で公開されています。