diff --git a/README.md b/README.md index f9758f8..2932422 100644 --- a/README.md +++ b/README.md @@ -150,6 +150,57 @@ struct Foo<'a> { } ``` +### default + +Marks a field as recoverable when its key is missing from the wire payload. On read, a missing key is filled with `Default::default()`, or with the result of a named function if `default = "path"` is supplied. Only supported on `#[msgpack(map)]` structs — array representation has no field names, so missing values cannot be detected safely. + +```rust +fn default_age() -> u32 { 18 } + +#[derive(FromMessagePack, ToMessagePack)] +#[msgpack(map)] +pub struct Person { + pub name: String, + + #[msgpack(default)] + pub nickname: Option, + + #[msgpack(default = "default_age")] + pub age: u32, +} +``` + +`default` only controls missing keys. Unknown keys still cause a decode error unless `allow_unknown_fields` is also set on the struct. + +### allow_unknown_fields + +Struct-level opt-in that tells the decoder to skip keys it does not recognize instead of erroring. Only meaningful on `#[msgpack(map)]` structs. + +```rust +#[derive(FromMessagePack, ToMessagePack)] +#[msgpack(map, allow_unknown_fields)] +pub struct Person { + pub name: String, + pub age: u32, +} +``` + +`default` and `allow_unknown_fields` are orthogonal. Combine them for full forward/backward compatibility: + +```rust +#[derive(FromMessagePack, ToMessagePack)] +#[msgpack(map, allow_unknown_fields)] +pub struct Person { + pub name: String, + + #[msgpack(default)] + pub age: u32, +} +``` + +> [!NOTE] +> Both attributes are opt-in. Structs that use neither preserve zerompk's strict-by-default decoding behavior with byte-identical codegen. + ## Design Philosophy The most popular MessagePack serializer, [rmp](https://github.com/3Hren/msgpack-rust), is highly optimized, but zerompk is designed with an even greater focus on performance. @@ -552,7 +603,9 @@ Many of these optimizations are inspired by the high-performance MessagePack ser ## Security -zerompk always requires strict type schemas for serialization/deserialization, making it almost safe against untrusted binaries. Additionally, zerompk implements measures against the following attacks: +By default, zerompk requires strict type schemas for serialization/deserialization (every declared field must appear; no unknown keys are tolerated), making it almost safe against untrusted binaries. Schema evolution is opt-in per struct via `#[msgpack(default)]` and `#[msgpack(allow_unknown_fields)]`; structs that opt in relax these checks intentionally and should be paired with application-level validation when the input is untrusted. + +Additionally, zerompk implements measures against the following attacks: - Stack overflow caused by excessive object nesting. zerompk rejects objects nested beyond `MAX_DEPTH = 500` and returns an error. - Memory consumption due to large size headers. zerompk validates header sizes before memory allocation and returns an error if the buffer is insufficient. diff --git a/README_JA.md b/README_JA.md index c5fd4b3..36d3f73 100644 --- a/README_JA.md +++ b/README_JA.md @@ -150,6 +150,59 @@ struct Foo<'a> { } ``` +### default + +デシリアライズにキーが存在しない場合に、そのフィールドをデフォルト値で置き換えます。欠損したキーは `Default::default()` で埋められるか、`default = "path"` が指定されている場合は名前付き関数の結果で埋められます。 + +これは`#[msgpack(map)]` でのみサポートされています。(配列にはフィールド名がないため、欠落した値を安全に検出できません) + +```rust +fn default_age() -> u32 { 18 } + +#[derive(FromMessagePack, ToMessagePack)] +#[msgpack(map)] +pub struct Person { + pub name: String, + + #[msgpack(default)] + pub nickname: Option, + + #[msgpack(default = "default_age")] + pub age: u32, +} +``` + +`default` は欠損したキーのみに対応します。`allow_unknown_fields` が設定されていない限り、不明なキーが含まれる場合はエラーとなります。 + +### allow_unknown_fields + +デシリアライズ時に不明なキーがあった場合、エラーにする代わりにスキップするように変更します。これは`#[msgpack(map)]`でのみ有効です。 + +```rust +#[derive(FromMessagePack, ToMessagePack)] +#[msgpack(map, allow_unknown_fields)] +pub struct Person { + pub name: String, + pub age: u32, +} +``` + +完全な前方互換性と後方互換性を確保するには、`default`と`allow_unknown_fields`を組み合わせて使用します。 + +```rust +#[derive(FromMessagePack, ToMessagePack)] +#[msgpack(map, allow_unknown_fields)] +pub struct Person { + pub name: String, + + #[msgpack(default)] + pub age: u32, +} +``` + +> [!NOTE] +> これらの属性はオプトインです。デフォルトではzerompkは厳密なスキーマの一致を要求します。 + ## 設計哲学 最もメジャーなMessagePackシリアライザである[rmp](https://github.com/3Hren/msgpack-rust)は十分に最適化されていますが、zerompkはそれ以上にパフォーマンスに注力した設計になっています。 @@ -561,4 +614,4 @@ zerompkはシリアライズ/デシリアライズに対して常に厳格な型 ## ライセンス -このライブラリは[MIT License](LICENSE)の下で公開されています。 \ No newline at end of file +このライブラリは[MIT License](LICENSE)の下で公開されています。 diff --git a/fuzz/Cargo.lock b/fuzz/Cargo.lock index 6c979ec..3410a74 100644 --- a/fuzz/Cargo.lock +++ b/fuzz/Cargo.lock @@ -157,14 +157,14 @@ checksum = "d7249219f66ced02969388cf2bb044a09756a083d0fab1e566056b04d9fbcaa5" [[package]] name = "zerompk" -version = "0.3.2" +version = "0.4.2" dependencies = [ "zerompk_derive", ] [[package]] name = "zerompk_derive" -version = "0.3.2" +version = "0.4.2" dependencies = [ "proc-macro2", "quote", diff --git a/zerompk/src/read.rs b/zerompk/src/read.rs index 6ad361d..f084ebb 100644 --- a/zerompk/src/read.rs +++ b/zerompk/src/read.rs @@ -156,6 +156,11 @@ pub trait Read<'de> { Err(Error::MapLengthMismatch { expected, actual }) } } + + /// Consumes exactly one MessagePack value from the input, regardless of + /// its type. Used by `#[msgpack(allow_unknown)]` map-mode decoding to + /// skip over unknown keys' values without needing to know their type. + fn skip_value(&mut self) -> Result<()>; } pub struct SliceReader<'de> { @@ -866,6 +871,143 @@ impl<'de> Read<'de> for SliceReader<'de> { } } } + + fn skip_value(&mut self) -> Result<()> { + self.increment_depth()?; + let byte = self.peek_byte()?; + match byte { + POS_FIXINT_START..=POS_FIXINT_END | NEG_FIXINT_START..=NEG_FIXINT_END => { + self.pos += 1; + } + NIL_MARKER | TRUE_MARKER | FALSE_MARKER => { + self.pos += 1; + } + UINT8_MARKER | INT8_MARKER => { + self.pos += 1; + self.take_slice(1)?; + } + UINT16_MARKER | INT16_MARKER => { + self.pos += 1; + self.take_slice(2)?; + } + UINT32_MARKER | INT32_MARKER | FLOAT32_MARKER => { + self.pos += 1; + self.take_slice(4)?; + } + UINT64_MARKER | INT64_MARKER | FLOAT64_MARKER => { + self.pos += 1; + self.take_slice(8)?; + } + FIXSTR_START..=FIXSTR_END => { + let len = (byte - FIXSTR_START) as usize; + self.pos += 1; + self.take_slice(len)?; + } + STR8_MARKER | BIN8_MARKER => { + self.pos += 1; + let len = self.take_byte()? as usize; + self.take_slice(len)?; + } + STR16_MARKER | BIN16_MARKER => { + self.pos += 1; + let bytes = self.take_array::<2>()?; + let len = u16::from_be_bytes(*bytes) as usize; + self.take_slice(len)?; + } + STR32_MARKER | BIN32_MARKER => { + self.pos += 1; + let bytes = self.take_array::<4>()?; + let len = u32::from_be_bytes(*bytes) as usize; + self.take_slice(len)?; + } + FIXARRAY_START..=FIXARRAY_END => { + let len = (byte - FIXARRAY_START) as usize; + self.pos += 1; + for _ in 0..len { + self.skip_value()?; + } + } + ARRAY16_MARKER => { + self.pos += 1; + let bytes = self.take_array::<2>()?; + let len = u16::from_be_bytes(*bytes) as usize; + for _ in 0..len { + self.skip_value()?; + } + } + ARRAY32_MARKER => { + self.pos += 1; + let bytes = self.take_array::<4>()?; + let len = u32::from_be_bytes(*bytes) as usize; + for _ in 0..len { + self.skip_value()?; + } + } + FIXMAP_START..=FIXMAP_END => { + let len = (byte - FIXMAP_START) as usize; + self.pos += 1; + for _ in 0..(len * 2) { + self.skip_value()?; + } + } + MAP16_MARKER => { + self.pos += 1; + let bytes = self.take_array::<2>()?; + let len = u16::from_be_bytes(*bytes) as usize; + for _ in 0..(len * 2) { + self.skip_value()?; + } + } + MAP32_MARKER => { + self.pos += 1; + let bytes = self.take_array::<4>()?; + let len = u32::from_be_bytes(*bytes) as usize; + for _ in 0..(len * 2) { + self.skip_value()?; + } + } + FIXEXT1_MARKER => { + self.pos += 1; + self.take_slice(2)?; + } + FIXEXT2_MARKER => { + self.pos += 1; + self.take_slice(3)?; + } + FIXEXT4_MARKER => { + self.pos += 1; + self.take_slice(5)?; + } + FIXEXT8_MARKER => { + self.pos += 1; + self.take_slice(9)?; + } + FIXEXT16_MARKER => { + self.pos += 1; + self.take_slice(17)?; + } + EXT8_MARKER => { + self.pos += 1; + let len = self.take_byte()? as usize; + self.take_slice(len + 1)?; + } + EXT16_MARKER => { + self.pos += 1; + let bytes = self.take_array::<2>()?; + let len = u16::from_be_bytes(*bytes) as usize; + self.take_slice(len + 1)?; + } + EXT32_MARKER => { + self.pos += 1; + let bytes = self.take_array::<4>()?; + let len = u32::from_be_bytes(*bytes) as usize; + self.take_slice(len + 1)?; + } + _ => return Err(Error::InvalidMarker(byte)), + } + self.decrement_depth(); + Ok(()) + } } #[cfg(feature = "std")] @@ -1473,6 +1615,133 @@ impl<'de, R: std::io::Read> Read<'de> for IOReader { _ => Err(Error::InvalidMarker(byte)), } } + + fn skip_value(&mut self) -> Result<()> { + self.increment_depth()?; + let byte = self.read_byte()?; + match byte { + POS_FIXINT_START..=POS_FIXINT_END | NEG_FIXINT_START..=NEG_FIXINT_END => {} + NIL_MARKER | TRUE_MARKER | FALSE_MARKER => {} + UINT8_MARKER | INT8_MARKER => { + let mut buf = [0u8; 1]; + self.read_exact(&mut buf)?; + } + UINT16_MARKER | INT16_MARKER => { + let mut buf = [0u8; 2]; + self.read_exact(&mut buf)?; + } + UINT32_MARKER | INT32_MARKER | FLOAT32_MARKER => { + let mut buf = [0u8; 4]; + self.read_exact(&mut buf)?; + } + UINT64_MARKER | INT64_MARKER | FLOAT64_MARKER => { + let mut buf = [0u8; 8]; + self.read_exact(&mut buf)?; + } + FIXSTR_START..=FIXSTR_END => { + let len = (byte - FIXSTR_START) as usize; + let _ = self.read_exact_vec(len)?; + } + STR8_MARKER | BIN8_MARKER => { + let mut buf = [0u8; 1]; + self.read_exact(&mut buf)?; + let len = buf[0] as usize; + let _ = self.read_exact_vec(len)?; + } + STR16_MARKER | BIN16_MARKER => { + let mut buf = [0u8; 2]; + self.read_exact(&mut buf)?; + let len = u16::from_be_bytes(buf) as usize; + let _ = self.read_exact_vec(len)?; + } + STR32_MARKER | BIN32_MARKER => { + let mut buf = [0u8; 4]; + self.read_exact(&mut buf)?; + let len = u32::from_be_bytes(buf) as usize; + let _ = self.read_exact_vec(len)?; + } + FIXARRAY_START..=FIXARRAY_END => { + let len = (byte - FIXARRAY_START) as usize; + for _ in 0..len { + self.skip_value()?; + } + } + ARRAY16_MARKER => { + let mut buf = [0u8; 2]; + self.read_exact(&mut buf)?; + let len = u16::from_be_bytes(buf) as usize; + for _ in 0..len { + self.skip_value()?; + } + } + ARRAY32_MARKER => { + let mut buf = [0u8; 4]; + self.read_exact(&mut buf)?; + let len = u32::from_be_bytes(buf) as usize; + for _ in 0..len { + self.skip_value()?; + } + } + FIXMAP_START..=FIXMAP_END => { + let len = (byte - FIXMAP_START) as usize; + for _ in 0..(len * 2) { + self.skip_value()?; + } + } + MAP16_MARKER => { + let mut buf = [0u8; 2]; + self.read_exact(&mut buf)?; + let len = u16::from_be_bytes(buf) as usize; + for _ in 0..(len * 2) { + self.skip_value()?; + } + } + MAP32_MARKER => { + let mut buf = [0u8; 4]; + self.read_exact(&mut buf)?; + let len = u32::from_be_bytes(buf) as usize; + for _ in 0..(len * 2) { + self.skip_value()?; + } + } + FIXEXT1_MARKER => { + let _ = self.read_exact_vec(2)?; + } + FIXEXT2_MARKER => { + let _ = self.read_exact_vec(3)?; + } + FIXEXT4_MARKER => { + let _ = self.read_exact_vec(5)?; + } + FIXEXT8_MARKER => { + let _ = self.read_exact_vec(9)?; + } + FIXEXT16_MARKER => { + let _ = self.read_exact_vec(17)?; + } + EXT8_MARKER => { + let mut buf = [0u8; 1]; + self.read_exact(&mut buf)?; + let len = buf[0] as usize; + let _ = self.read_exact_vec(len + 1)?; + } + EXT16_MARKER => { + let mut buf = [0u8; 2]; + self.read_exact(&mut buf)?; + let len = u16::from_be_bytes(buf) as usize; + let _ = self.read_exact_vec(len + 1)?; + } + EXT32_MARKER => { + let mut buf = [0u8; 4]; + self.read_exact(&mut buf)?; + let len = u32::from_be_bytes(buf) as usize; + let _ = self.read_exact_vec(len + 1)?; + } + _ => return Err(Error::InvalidMarker(byte)), + } + self.decrement_depth(); + Ok(()) + } } #[cfg(test)] diff --git a/zerompk/tests/schema_evolution.rs b/zerompk/tests/schema_evolution.rs new file mode 100644 index 0000000..b78a509 --- /dev/null +++ b/zerompk/tests/schema_evolution.rs @@ -0,0 +1,198 @@ +//! Tests for `#[msgpack(default)]` (fill missing keys) and +//! `#[msgpack(allow_unknown_fields)]` (skip unknown keys), plus their +//! interaction. These two opt-ins are intentionally orthogonal. + +use zerompk::{FromMessagePack, ToMessagePack}; +use zerompk_derive::{ + FromMessagePack as DeriveFromMessagePack, ToMessagePack as DeriveToMessagePack, +}; + +fn encode(value: &T) -> Vec { + zerompk::to_msgpack_vec(value).unwrap() +} + +fn decode<'a, T: FromMessagePack<'a>>(bytes: &'a [u8]) -> Result { + zerompk::from_msgpack(bytes) +} + +// --------------------------------------------------------------------------- +// V1 schema (writer side): the "old" version of a message. +// --------------------------------------------------------------------------- + +#[derive(DeriveToMessagePack, DeriveFromMessagePack, Debug, PartialEq)] +#[msgpack(map)] +struct V1 { + a: i32, + b: i32, +} + +// --------------------------------------------------------------------------- +// `default` only: fill missing keys, but unknown keys must still error. +// This is the "I added a new field" evolution direction. +// --------------------------------------------------------------------------- + +#[derive(DeriveFromMessagePack, Debug, PartialEq)] +#[msgpack(map)] +struct V2DefaultsOnly { + a: i32, + b: i32, + #[msgpack(default)] + c: i32, +} + +#[test] +fn defaults_fill_missing_keys() { + let v1 = V1 { a: 1, b: 2 }; + let bytes = encode(&v1); + let v2: V2DefaultsOnly = decode(&bytes).unwrap(); + assert_eq!(v2, V2DefaultsOnly { a: 1, b: 2, c: 0 }); +} + +#[test] +fn defaults_alone_still_reject_unknown_keys() { + // Writer emits an extra unknown key `z`; reader has defaults but not + // `allow_unknown_fields`. Decode must fail loudly. + #[derive(DeriveToMessagePack)] + #[msgpack(map)] + struct WithExtra { + a: i32, + b: i32, + z: i32, + } + let bytes = encode(&WithExtra { a: 1, b: 2, z: 99 }); + let err = decode::(&bytes).unwrap_err(); + match err { + zerompk::Error::KeyNotFound(k) => assert_eq!(k, "z"), + other => panic!("expected KeyNotFound, got {other:?}"), + } +} + +// --------------------------------------------------------------------------- +// `allow_unknown_fields` only: skip unknown keys, but missing keys must +// still error. This is the "I removed a field" evolution direction. +// --------------------------------------------------------------------------- + +#[derive(DeriveFromMessagePack, Debug, PartialEq)] +#[msgpack(map, allow_unknown_fields)] +struct V0AllowUnknownOnly { + a: i32, +} + +#[test] +fn allow_unknown_skips_extra_keys() { + let v1 = V1 { a: 1, b: 2 }; + let bytes = encode(&v1); + let v0: V0AllowUnknownOnly = decode(&bytes).unwrap(); + assert_eq!(v0, V0AllowUnknownOnly { a: 1 }); +} + +#[test] +fn allow_unknown_alone_still_requires_all_keys() { + // Writer is missing key `a`; reader allows unknowns but `a` has no default. + #[derive(DeriveToMessagePack)] + #[msgpack(map)] + struct OnlyB { + b: i32, + } + let bytes = encode(&OnlyB { b: 5 }); + let err = decode::(&bytes).unwrap_err(); + assert!(matches!(err, zerompk::Error::KeyNotFound(_))); +} + +// --------------------------------------------------------------------------- +// Both: full schema evolution — accept missing keys (defaulted) and skip +// unknown keys. +// --------------------------------------------------------------------------- + +#[derive(DeriveFromMessagePack, Debug, PartialEq)] +#[msgpack(map, allow_unknown_fields)] +struct VFull { + a: i32, + #[msgpack(default)] + new_field: i32, +} + +#[test] +fn both_modes_compose() { + // V1 has `b` (unknown to VFull) and lacks `new_field` (defaulted). + let v1 = V1 { a: 7, b: 2 }; + let bytes = encode(&v1); + let v: VFull = decode(&bytes).unwrap(); + assert_eq!(v, VFull { a: 7, new_field: 0 }); +} + +// --------------------------------------------------------------------------- +// `default = "path"` form. +// --------------------------------------------------------------------------- + +fn forty_two() -> i32 { + 42 +} + +#[derive(DeriveFromMessagePack, Debug, PartialEq)] +#[msgpack(map)] +struct V2DefaultPath { + a: i32, + b: i32, + #[msgpack(default = "forty_two")] + c: i32, +} + +#[test] +fn default_path_invokes_named_function() { + let bytes = encode(&V1 { a: 1, b: 2 }); + let v: V2DefaultPath = decode(&bytes).unwrap(); + assert_eq!(v, V2DefaultPath { a: 1, b: 2, c: 42 }); +} + +// --------------------------------------------------------------------------- +// Strict-by-default: an untouched struct still rejects missing keys and +// extra keys. This guards against the strict-mode codegen regressing. +// --------------------------------------------------------------------------- + +#[test] +fn strict_default_rejects_missing_key() { + #[derive(DeriveToMessagePack)] + #[msgpack(map)] + struct OnlyA { + a: i32, + } + let bytes = encode(&OnlyA { a: 1 }); + let err = decode::(&bytes).unwrap_err(); + // 0.4.1 strict path uses check_map_len, which surfaces a length error. + assert!(matches!(err, zerompk::Error::MapLengthMismatch { .. })); +} + +#[test] +fn strict_default_rejects_extra_key() { + #[derive(DeriveToMessagePack)] + #[msgpack(map)] + struct WithExtra { + a: i32, + b: i32, + c: i32, + } + let bytes = encode(&WithExtra { a: 1, b: 2, c: 3 }); + let err = decode::(&bytes).unwrap_err(); + assert!(matches!(err, zerompk::Error::MapLengthMismatch { .. })); +} + +// --------------------------------------------------------------------------- +// Round-trip: writer emits all declared fields, so V2-encoded → V2-decoded +// preserves values regardless of mode. +// --------------------------------------------------------------------------- + +#[test] +fn round_trip_preserves_values() { + #[derive(DeriveToMessagePack, DeriveFromMessagePack, Debug, PartialEq)] + #[msgpack(map, allow_unknown_fields)] + struct V { + a: i32, + #[msgpack(default)] + b: i32, + } + let original = V { a: 10, b: 20 }; + let bytes = encode(&original); + let decoded: V = decode(&bytes).unwrap(); + assert_eq!(original, decoded); +} diff --git a/zerompk_derive/src/lib.rs b/zerompk_derive/src/lib.rs index 6d84214..f0d355d 100644 --- a/zerompk_derive/src/lib.rs +++ b/zerompk_derive/src/lib.rs @@ -42,11 +42,13 @@ enum Repr { struct TypeConfig { repr: Option, c_enum: bool, + allow_unknown_fields: bool, } fn parse_type_config_from_attrs(attrs: &[syn::Attribute]) -> Result { let mut repr = None; let mut c_enum = false; + let mut allow_unknown_fields = false; for attr in attrs { if !attr.path().is_ident("msgpack") { @@ -75,13 +77,25 @@ fn parse_type_config_from_attrs(attrs: &[syn::Attribute]) -> Result } c_enum = true; Ok(()) + } else if meta.path.is_ident("allow_unknown_fields") { + if allow_unknown_fields { + return Err(meta.error("duplicate `allow_unknown_fields` attribute")); + } + allow_unknown_fields = true; + Ok(()) } else { - Err(meta.error("expected `array`, `map`, `c_enum`, or `key = ...`")) + Err(meta.error( + "expected `array`, `map`, `c_enum`, `allow_unknown_fields`, or `key = ...`", + )) } })?; } - Ok(TypeConfig { repr, c_enum }) + Ok(TypeConfig { + repr, + c_enum, + allow_unknown_fields, + }) } fn add_trait_bounds(mut generics: Generics, kind: DeriveKind) -> Generics { @@ -355,12 +369,16 @@ struct FieldConfig { key: Option, ignore: bool, as_bytes: Option, + default: bool, + default_path: Option, } fn parse_field_config(field: &Field) -> Result { let mut key: Option = None; let mut ignore = false; let mut as_bytes: Option = None; + let mut default = false; + let mut default_path: Option = None; for attr in &field.attrs { if !attr.path().is_ident("msgpack") { @@ -403,11 +421,27 @@ fn parse_field_config(field: &Field) -> Result { } }); Ok(()) + } else if meta.path.is_ident("default") { + if default { + return Err(meta.error("duplicate `default` attribute")); + } + default = true; + if meta.input.peek(syn::Token![=]) { + let value = meta.value()?; + let lit: Lit = value.parse()?; + match lit { + Lit::Str(s) => { + default_path = Some(s.parse()?); + } + _ => return Err(meta.error("`default = ...` must be a string path")), + } + } + Ok(()) } else if meta.path.is_ident("array") || meta.path.is_ident("map") { Err(meta.error("field-level msgpack attribute does not support `array/map`")) } else { Err(meta - .error("field-level msgpack attribute supports only `key = ...`, `ignore`, or `as_bytes = true/false`")) + .error("field-level msgpack attribute supports only `key = ...`, `ignore`, `default`, or `as_bytes = true/false`")) } })?; } @@ -437,6 +471,8 @@ fn parse_field_config(field: &Field) -> Result { key, ignore, as_bytes, + default, + default_path, }) } @@ -789,9 +825,15 @@ fn expand(input: DeriveInput, kind: DeriveKind) -> Result expand_array_struct(&data)?, - Repr::Map => expand_map_struct(&data)?, + Repr::Map => expand_map_struct(&data, type_cfg.allow_unknown_fields)?, } } Data::Enum(data) => { @@ -801,6 +843,12 @@ fn expand(input: DeriveInput, kind: DeriveKind) -> Result Result { .collect::>()?; let field_index_by_slot = build_named_array_slots(fields, &field_configs)?; + // `#[msgpack(default)]` is only honored in map mode. Arrays have + // no field names, so silently accepting shorter/longer arrays + // hides corruption rather than evolving schema. Force the user + // to opt into map representation explicitly. + for (i, cfg) in field_configs.iter().enumerate() { + if cfg.default { + return Err(syn::Error::new( + fields.named[i].span(), + "`#[msgpack(default)]` is only supported with `#[msgpack(map)]`; array representation has no field names so missing values cannot be detected safely", + )); + } + } + let array_len = field_index_by_slot.len(); let is_dense_sequential = field_index_by_slot.len() == names.len() && field_index_by_slot @@ -1083,7 +1144,7 @@ fn expand_array_struct(data: &DataStruct) -> Result { } } -fn expand_map_struct(data: &DataStruct) -> Result { +fn expand_map_struct(data: &DataStruct, allow_unknown_fields: bool) -> Result { let fields = match &data.fields { Fields::Named(fields) => fields, Fields::Unnamed(_) | Fields::Unit => { @@ -1166,31 +1227,114 @@ fn expand_map_struct(data: &DataStruct) -> Result { Ok(()) }; - let read = quote! { - '__zerompk_read_map: { - reader.check_map_len(#count)?; + let any_field_has_default = field_configs.iter().any(|c| c.default); + + // Three decoding modes, controlled by orthogonal opt-ins: + // + // defaults unknown decoder behavior + // -------- ------------- ----------------------------------------- + // no deny (default) check_map_len(N), every key required + // yes deny read_map_len, fill missing, error on unknown + // no allow read_map_len, every key required, skip unknown + // yes allow read_map_len, fill missing, skip unknown + // + // Strict mode preserves 0.4.1 codegen byte-for-byte. The other two modes + // share one tolerant skeleton parameterized by what to do with missing + // keys (default vs error) and unknown keys (skip vs error). + let read = if !any_field_has_default && !allow_unknown_fields { + quote! { + '__zerompk_read_map: { + reader.check_map_len(#count)?; + + #( let mut #slots: ::core::option::Option<#tys> = ::core::option::Option::None; )* + + #[allow(clippy::reversed_empty_ranges)] + for _ in 0..#count { + let __key_bytes = reader.read_string_bytes()?; + let __key_bytes = __key_bytes.as_ref(); + let __key_index = (|| -> ::zerompk::Result { + #key_dispatch + })()?; + + match __key_index { + #( #read_value_arms )* + _ => unreachable!(), + } + } - #( let mut #slots: ::core::option::Option<#tys> = ::core::option::Option::None; )* + #( + let #names = #slots.ok_or_else(|| ::zerompk::Error::KeyNotFound(#key_lits.into()))?; + )* + + break '__zerompk_read_map Ok(Self { #( #init_fields ),* }); + } + } + } else { + let unknown_arm = if allow_unknown_fields { + quote! { _ => { reader.skip_value()?; } } + } else { + // Surface the offending key so users can diagnose schema drift. + quote! { + _ => { + let __key_str = ::core::str::from_utf8(__key_bytes) + .unwrap_or("") + .to_string(); + break '__zerompk_read_map Err(::zerompk::Error::KeyNotFound(__key_str)); + } + } + }; - #[allow(clippy::reversed_empty_ranges)] - for _ in 0..#count { - let __key_bytes = reader.read_string_bytes()?; - let __key_bytes = __key_bytes.as_ref(); - let __key_index = (|| -> ::zerompk::Result { + let key_dispatch_tolerant = quote! { + let __matched_idx: usize = (|| -> ::zerompk::Result { #key_dispatch - })()?; + })().unwrap_or(usize::MAX); + }; + + let slot_finalize: Vec<_> = (0..count) + .map(|idx| { + let name = &names[idx]; + let slot = &slots[idx]; + let key_name = &key_lits[idx]; + let ty = &tys[idx]; + let cfg = &field_configs[field_indices[idx]]; + if cfg.default { + let default_expr = if let Some(path) = &cfg.default_path { + quote! { #path() } + } else { + quote! { <#ty as ::core::default::Default>::default() } + }; + quote! { + let #name = #slot.unwrap_or_else(|| #default_expr); + } + } else { + quote! { + let #name = #slot.ok_or_else(|| ::zerompk::Error::KeyNotFound(#key_name.into()))?; + } + } + }) + .collect(); - match __key_index { - #( #read_value_arms )* - _ => unreachable!(), + quote! { + '__zerompk_read_map: { + let __map_len = reader.read_map_len()?; + + #( let mut #slots: ::core::option::Option<#tys> = ::core::option::Option::None; )* + + for _ in 0..__map_len { + let __key_bytes = reader.read_string_bytes()?; + let __key_bytes = __key_bytes.as_ref(); + #key_dispatch_tolerant + + match __matched_idx { + #( #read_value_arms )* + #unknown_arm + } } - } - #( - let #names = #slots.ok_or_else(|| ::zerompk::Error::KeyNotFound(#key_lits.into()))?; - )* + #( #slot_finalize )* - break '__zerompk_read_map Ok(Self { #( #init_fields ),* }); + break '__zerompk_read_map Ok(Self { #( #init_fields ),* }); + } } }; @@ -1392,6 +1536,24 @@ fn build_enum_variant_payload( )> { let v_ident = &variant.ident; + // `#[msgpack(default)]` on enum-variant fields is currently a no-op in + // codegen — silently accepting it would let users write code that looks + // like it does schema evolution but doesn't. Reject loudly. + let variant_field_iter: Box> = match &variant.fields { + Fields::Named(f) => Box::new(f.named.iter()), + Fields::Unnamed(f) => Box::new(f.unnamed.iter()), + Fields::Unit => Box::new(std::iter::empty()), + }; + for field in variant_field_iter { + let fc = parse_field_config(field)?; + if fc.default { + return Err(syn::Error::new( + field.span(), + "`#[msgpack(default)]` is not supported on enum-variant fields", + )); + } + } + match &variant.fields { Fields::Unit => { if cfg.repr.is_some() {