From da2897fb5f6cb7ea55b14dd426fa19ec7c991a7c Mon Sep 17 00:00:00 2001
From: Chao Liu <cbmixx@gmail.com>
Date: Thu, 11 Jun 2026 17:56:52 +0800
Subject: [PATCH] fix(parquet_derive): support raw identifiers as column names

ParquetRecordReader and ParquetRecordWriter derives stringified struct
field identifiers including the r# prefix, so a field declared as
r#type was looked up (reader) and written to the schema (writer) as a
column literally named "r#type" instead of "type". This made it
impossible to read or write parquet columns whose names are Rust
keywords.

Unraw the identifier wherever it is used as a column name, while
keeping the raw identifier for field access in the generated code.

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
---
 parquet_derive/src/lib.rs           | 18 +++++++++--
 parquet_derive/src/parquet_field.rs | 25 ++++++++++++++-
 parquet_derive_test/src/lib.rs      | 49 +++++++++++++++++++++++++++++
 3 files changed, 88 insertions(+), 4 deletions(-)

diff --git a/parquet_derive/src/lib.rs b/parquet_derive/src/lib.rs
index 1aaa1abfd2a3..a959507d905c 100644
--- a/parquet_derive/src/lib.rs
+++ b/parquet_derive/src/lib.rs
@@ -34,7 +34,7 @@ extern crate quote;
 
 extern crate parquet;
 
-use ::syn::{Data, DataStruct, DeriveInput, parse_macro_input};
+use ::syn::{Data, DataStruct, DeriveInput, ext::IdentExt, parse_macro_input};
 
 mod parquet_field;
 
@@ -234,6 +234,18 @@ pub fn parquet_record_reader(input: proc_macro::TokenStream) -> proc_macro::Toke
 
     let field_infos: Vec<_> = fields.iter().map(parquet_field::Field::from).collect();
     let field_names: Vec<_> = fields.iter().map(|f| f.ident.clone()).collect();
+    // unraw the identifiers, so raw identifiers like `r#type` are looked
+    // up by their column name `type` in the parquet file
+    let field_names_str: Vec<_> = fields
+        .iter()
+        .map(|f| {
+            f.ident
+                .as_ref()
+                .expect("Only structs with named fields are currently supported")
+                .unraw()
+                .to_string()
+        })
+        .collect();
     let reader_snippets: Vec<proc_macro2::TokenStream> =
         field_infos.iter().map(|x| x.reader_snippet()).collect();
 
@@ -270,10 +282,10 @@ pub fn parquet_record_reader(input: proc_macro::TokenStream) -> proc_macro::Toke
 
         #(
           {
-              let idx: usize = match name_to_index.get(stringify!(#field_names)) {
+              let idx: usize = match name_to_index.get(#field_names_str) {
                 Some(&col_idx) => col_idx,
                 None => {
-                  let error_msg = format!("column name '{}' is not found in parquet file!", stringify!(#field_names));
+                  let error_msg = format!("column name '{}' is not found in parquet file!", #field_names_str);
                   return Err(::parquet::errors::ParquetError::General(error_msg));
                 }
               };
diff --git a/parquet_derive/src/parquet_field.rs b/parquet_derive/src/parquet_field.rs
index 17b8d8543725..e332ea21aa4a 100644
--- a/parquet_derive/src/parquet_field.rs
+++ b/parquet_derive/src/parquet_field.rs
@@ -15,6 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
+use syn::ext::IdentExt;
+
 #[derive(Debug, PartialEq)]
 pub struct Field {
     ident: syn::Ident,
@@ -293,7 +295,9 @@ impl Field {
         // TODO: Support group types
         // TODO: Add length if dealing with fixedlenbinary
 
-        let field_name = &self.ident.to_string();
+        // unraw the identifier, so a raw identifier like `r#type`
+        // becomes a column named `type` in the parquet schema
+        let field_name = self.ident.unraw().to_string();
         let physical_type = match self.ty.physical_type() {
             parquet::basic::Type::BOOLEAN => quote! {
                 ::parquet::basic::Type::BOOLEAN
@@ -880,6 +884,25 @@ mod test {
         )
     }
 
+    #[test]
+    fn test_parquet_type_with_raw_identifier() {
+        let snippet: proc_macro2::TokenStream = quote! {
+          struct ABoringStruct {
+            r#type: i32,
+          }
+        };
+
+        let fields = extract_fields(snippet);
+        let r#type = Field::from(&fields[0]);
+
+        // the raw identifier `r#type` is named `type` in the parquet schema
+        let snippet = r#type.parquet_type().to_string();
+        assert!(
+            snippet.contains("primitive_type_builder (\"type\""),
+            "{snippet}"
+        );
+    }
+
     #[test]
     fn test_optional_to_writer_snippet() {
         let struct_def: proc_macro2::TokenStream = quote! {
diff --git a/parquet_derive_test/src/lib.rs b/parquet_derive_test/src/lib.rs
index fe96fa0e6122..e8462874f360 100644
--- a/parquet_derive_test/src/lib.rs
+++ b/parquet_derive_test/src/lib.rs
@@ -109,6 +109,15 @@ struct APrunedRecord {
     pub isize: isize,
 }
 
+// This struct has a field declared with a raw identifier,
+// which maps to a parquet column named without the `r#` prefix
+// (e.g. a column named `type`, as written by other tools)
+#[derive(PartialEq, ParquetRecordWriter, ParquetRecordReader, Debug)]
+struct ARecordWithRawIdentifiers {
+    pub r#type: i32,
+    pub count: i32,
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -356,6 +365,46 @@ mod tests {
         assert_eq!(drs[0].isize, out[0].isize);
     }
 
+    #[test]
+    fn test_parquet_derive_raw_identifiers() {
+        let file = get_temp_file("test_parquet_derive_raw_identifiers", &[]);
+        let drs = vec![ARecordWithRawIdentifiers {
+            r#type: 456,
+            count: 123,
+        }];
+
+        let generated_schema = drs.as_slice().schema().unwrap();
+
+        // raw identifiers are written without the `r#` prefix,
+        // while normal identifiers are unchanged
+        assert_eq!(
+            vec!["type", "count"],
+            generated_schema
+                .get_fields()
+                .iter()
+                .map(|field| field.name())
+                .collect::<Vec<_>>()
+        );
+
+        let props = Default::default();
+        let mut writer =
+            SerializedFileWriter::new(file.try_clone().unwrap(), generated_schema, props).unwrap();
+
+        let mut row_group = writer.next_row_group().unwrap();
+        drs.as_slice().write_to_row_group(&mut row_group).unwrap();
+        row_group.close().unwrap();
+        writer.close().unwrap();
+
+        use parquet::file::{reader::FileReader, serialized_reader::SerializedFileReader};
+        let reader = SerializedFileReader::new(file).unwrap();
+        let mut out: Vec<ARecordWithRawIdentifiers> = Vec::new();
+
+        let mut row_group = reader.get_row_group(0).unwrap();
+        out.read_from_row_group(&mut *row_group, 1).unwrap();
+
+        assert_eq!(drs, out);
+    }
+
     #[test]
     fn test_aliased_result() {
         // Issue 7547, Where aliasing the `Result` led to