diff --git a/parquet-variant-compute/src/from_json.rs b/parquet-variant-compute/src/from_json.rs index 0983147132a2..8e8c0ea9cf98 100644 --- a/parquet-variant-compute/src/from_json.rs +++ b/parquet-variant-compute/src/from_json.rs @@ -84,8 +84,8 @@ mod test { let array_ref: ArrayRef = Arc::new(input); let variant_array = json_to_variant(&array_ref).unwrap(); - let metadata_array = variant_array.metadata_field(); - let value_array = variant_array.value_field().expect("value field"); + let metadata_array = variant_array.metadata_column(); + let value_array = variant_array.value_column().expect("value column"); // Compare row 0 assert!(!variant_array.is_null(0)); @@ -133,8 +133,8 @@ mod test { let array_ref: ArrayRef = Arc::new(input); let variant_array = json_to_variant(&array_ref).unwrap(); - let metadata_array = variant_array.metadata_field(); - let value_array = variant_array.value_field().expect("value field"); + let metadata_array = variant_array.metadata_column(); + let value_array = variant_array.value_column().expect("value column"); // Compare row 0 assert!(!variant_array.is_null(0)); @@ -182,8 +182,8 @@ mod test { let array_ref: ArrayRef = Arc::new(input); let variant_array = json_to_variant(&array_ref).unwrap(); - let metadata_array = variant_array.metadata_field(); - let value_array = variant_array.value_field().expect("value field"); + let metadata_array = variant_array.metadata_column(); + let value_array = variant_array.value_column().expect("value column"); // Compare row 0 assert!(!variant_array.is_null(0)); diff --git a/parquet-variant-compute/src/shred_variant.rs b/parquet-variant-compute/src/shred_variant.rs index 440f4b716521..c06caea945fd 100644 --- a/parquet-variant-compute/src/shred_variant.rs +++ b/parquet-variant-compute/src/shred_variant.rs @@ -76,13 +76,13 @@ pub(crate) fn shred_variant_with_options( as_type: &DataType, cast_options: &CastOptions, ) -> Result { - if array.typed_value_field().is_some() { + if array.typed_value_column().is_some() { return Err(ArrowError::InvalidArgumentError( "Input is already shredded".to_string(), )); } - if array.value_field().is_none() { + if array.value_column().is_none() { // all-null case -- nothing to do. return Ok(array.clone()); }; @@ -102,7 +102,7 @@ pub(crate) fn shred_variant_with_options( } let (value, typed_value, nulls) = builder.finish()?; Ok(VariantArray::from_parts( - array.metadata_field().clone(), + array.metadata_column().clone(), Some(Arc::new(value)), Some(typed_value), nulls, @@ -855,7 +855,7 @@ mod tests { fn downcast_list_like_array( array: &VariantArray, ) -> &dyn TestListLikeArray { - let typed_value = array.typed_value_field().unwrap(); + let typed_value = array.typed_value_column().unwrap(); if let Some(list) = typed_value.as_any().downcast_ref::>() { list } else if let Some(list_view) = typed_value @@ -880,8 +880,8 @@ mod tests { ) { assert_eq!(array.len(), expected_len); - let fallback_value = array.value_field().unwrap(); - let fallback_metadata = array.metadata_field(); + let fallback_value = array.value_column().unwrap(); + let fallback_metadata = array.metadata_column(); let array = downcast_list_like_array::(array); assert_eq!( @@ -978,7 +978,7 @@ mod tests { // Validate the shredded primitive values for list elements let element_array = ShreddedVariantFieldArray::try_new(array.values().as_ref()).unwrap(); let element_values = element_array - .typed_value_field() + .typed_value_column() .unwrap() .as_any() .downcast_ref::>() @@ -995,7 +995,7 @@ mod tests { } // Validate fallback variants for list elements that could not be shredded - let element_fallbacks = element_array.value_field().unwrap(); + let element_fallbacks = element_array.value_column().unwrap(); assert_eq!(element_fallbacks.len(), expected_fallbacks.len()); for (idx, expected_fallback) in expected_fallbacks.iter().enumerate() { match expected_fallback { @@ -1123,8 +1123,8 @@ mod tests { typed_struct.column_by_name(field_name).unwrap(), ) .unwrap(); - assert!(field.value_field().unwrap().is_null(0)); - assert!(field.typed_value_field().unwrap().is_null(0)); + assert!(field.value_column().unwrap().is_null(0)); + assert!(field.typed_value_column().unwrap().is_null(0)); } } } @@ -1134,8 +1134,8 @@ mod tests { // Create a VariantArray that already has typed_value_field // First create a valid VariantArray, then extract its parts to construct a shredded one let temp_array = VariantArray::from_iter(vec![Some(Variant::from("test"))]); - let metadata = temp_array.metadata_field().clone(); - let value = temp_array.value_field().unwrap().clone(); + let metadata = temp_array.metadata_column().clone(); + let value = temp_array.value_column().unwrap().clone(); let typed_value = Arc::new(Int64Array::from(vec![42])) as ArrayRef; let shredded_array = @@ -1156,8 +1156,8 @@ mod tests { let result = shred_variant(&all_null_array, &DataType::Int64).unwrap(); // Should return array with no value/typed_value fields - assert!(result.value_field().is_none()); - assert!(result.typed_value_field().is_none()); + assert!(result.value_column().is_none()); + assert!(result.typed_value_column().is_none()); } #[test] @@ -1205,7 +1205,7 @@ mod tests { // probe the downcasted typed_value array to make sure uuids are shredded correctly let uuids = variant_array - .typed_value_field() + .typed_value_column() .unwrap() .as_any() .downcast_ref::() @@ -1242,10 +1242,10 @@ mod tests { let result = shred_variant(&input, &DataType::Int64).unwrap(); // Verify structure - let metadata_field = result.metadata_field(); - let value_field = result.value_field().unwrap(); + let metadata_field = result.metadata_column(); + let value_field = result.value_column().unwrap(); let typed_value_field = result - .typed_value_field() + .typed_value_column() .unwrap() .as_any() .downcast_ref::() @@ -1304,7 +1304,7 @@ mod tests { // Test Int32 target let result_int32 = shred_variant(&input, &DataType::Int32).unwrap(); let typed_value_int32 = result_int32 - .typed_value_field() + .typed_value_column() .unwrap() .as_any() .downcast_ref::() @@ -1316,7 +1316,7 @@ mod tests { // Test Float64 target let result_float64 = shred_variant(&input, &DataType::Float64).unwrap(); let typed_value_float64 = result_float64 - .typed_value_field() + .typed_value_column() .unwrap() .as_any() .downcast_ref::() @@ -1337,10 +1337,10 @@ mod tests { ]); let result = shred_variant(&input, &DataType::LargeUtf8).unwrap(); - let metadata = result.metadata_field(); - let value = result.value_field().unwrap(); + let metadata = result.metadata_column(); + let value = result.value_column().unwrap(); let typed_value = result - .typed_value_field() + .typed_value_column() .unwrap() .as_any() .downcast_ref::() @@ -1393,10 +1393,10 @@ mod tests { ]); let result = shred_variant(&input, &DataType::LargeBinary).unwrap(); - let metadata = result.metadata_field(); - let value = result.value_field().unwrap(); + let metadata = result.metadata_column(); + let value = result.value_column().unwrap(); let typed_value = result - .typed_value_field() + .typed_value_column() .unwrap() .as_any() .downcast_ref::() @@ -1644,23 +1644,23 @@ mod tests { // The first row should be shredded, so the `value` field should be null and the // `typed_value` field should contain the list assert!(result.is_valid(0)); - assert!(result.value_field().unwrap().is_null(0)); - assert!(result.typed_value_field().unwrap().is_valid(0)); + assert!(result.value_column().unwrap().is_null(0)); + assert!(result.typed_value_column().unwrap().is_valid(0)); // The second row should not be shredded because the provided schema for shredding did not // match. Hence, the `value` field should contain the raw value and the `typed_value` field // should be null. assert!(result.is_valid(1)); - assert!(result.value_field().unwrap().is_valid(1)); - assert!(result.typed_value_field().unwrap().is_null(1)); + assert!(result.value_column().unwrap().is_valid(1)); + assert!(result.typed_value_column().unwrap().is_null(1)); // The third row should be shredded, so the `value` field should be null and the // `typed_value` field should contain the list assert!(result.is_valid(2)); - assert!(result.value_field().unwrap().is_null(2)); - assert!(result.typed_value_field().unwrap().is_valid(2)); + assert!(result.value_column().unwrap().is_null(2)); + assert!(result.typed_value_column().unwrap().is_valid(2)); - let typed_value = result.typed_value_field().unwrap(); + let typed_value = result.typed_value_column().unwrap(); let fixed_size_list = typed_value .as_any() .downcast_ref::() @@ -1742,7 +1742,7 @@ mod tests { assert_eq!(result.len(), 4); let typed_value = result - .typed_value_field() + .typed_value_column() .unwrap() .as_any() .downcast_ref::() @@ -1760,12 +1760,12 @@ mod tests { ShreddedVariantFieldArray::try_new(typed_value.values().as_ref()).unwrap(); assert_eq!(outer_elements.len(), 6); let outer_values = outer_elements - .typed_value_field() + .typed_value_column() .unwrap() .as_any() .downcast_ref::() .unwrap(); - let outer_fallbacks = outer_elements.value_field().unwrap(); + let outer_fallbacks = outer_elements.value_column().unwrap(); let outer_metadata = Arc::new(BinaryViewArray::from_iter_values(std::iter::repeat_n( EMPTY_VARIANT_METADATA_BYTES, @@ -1846,7 +1846,7 @@ mod tests { // Validate nested struct fields for each element let typed_value = result - .typed_value_field() + .typed_value_column() .unwrap() .as_any() .downcast_ref::() @@ -1855,7 +1855,7 @@ mod tests { ShreddedVariantFieldArray::try_new(typed_value.values().as_ref()).unwrap(); assert_eq!(element_array.len(), 2); let element_objects = element_array - .typed_value_field() + .typed_value_column() .unwrap() .as_any() .downcast_ref::() @@ -1865,9 +1865,9 @@ mod tests { let id_field = ShreddedVariantFieldArray::try_new(element_objects.column_by_name("id").unwrap()) .unwrap(); - let id_values = id_field.value_field().unwrap(); + let id_values = id_field.value_column().unwrap(); let id_typed_values = id_field - .typed_value_field() + .typed_value_column() .unwrap() .as_any() .downcast_ref::() @@ -1889,9 +1889,9 @@ mod tests { let name_field = ShreddedVariantFieldArray::try_new(element_objects.column_by_name("name").unwrap()) .unwrap(); - let name_values = name_field.value_field().unwrap(); + let name_values = name_field.value_column().unwrap(); let name_typed_values = name_field - .typed_value_field() + .typed_value_column() .unwrap() .as_any() .downcast_ref::() @@ -1949,14 +1949,14 @@ mod tests { let result = shred_variant(&input, &target_schema).unwrap(); // Verify structure - assert!(result.value_field().is_some()); - assert!(result.typed_value_field().is_some()); + assert!(result.value_column().is_some()); + assert!(result.typed_value_column().is_some()); assert_eq!(result.len(), 9); - let metadata = result.metadata_field(); - let value = result.value_field().unwrap(); + let metadata = result.metadata_column(); + let value = result.value_column().unwrap(); let typed_value = result - .typed_value_field() + .typed_value_column() .unwrap() .as_any() .downcast_ref::() @@ -1969,16 +1969,16 @@ mod tests { let age_field = ShreddedVariantFieldArray::try_new(typed_value.column_by_name("age").unwrap()).unwrap(); - let score_value = score_field.value_field().unwrap(); + let score_value = score_field.value_column().unwrap(); let score_typed_value = score_field - .typed_value_field() + .typed_value_column() .unwrap() .as_any() .downcast_ref::() .unwrap(); - let age_value = age_field.value_field().unwrap(); + let age_value = age_field.value_column().unwrap(); let age_typed_value = age_field - .typed_value_field() + .typed_value_column() .unwrap() .as_any() .downcast_ref::() @@ -2279,9 +2279,9 @@ mod tests { assert_eq!(result.len(), 5); // Access base value/typed_value columns - let value_field = result.value_field().unwrap(); + let value_field = result.value_column().unwrap(); let typed_struct = result - .typed_value_field() + .typed_value_column() .unwrap() .as_any() .downcast_ref::() @@ -2293,7 +2293,7 @@ mod tests { assert!(value_field.is_null(2)); assert!(value_field.is_valid(3)); assert_eq!( - variant_from_arrays_at(result.metadata_field(), value_field, 3).unwrap(), + variant_from_arrays_at(result.metadata_column(), value_field, 3).unwrap(), Variant::from("not an object") ); assert!(value_field.is_null(4)); @@ -2315,8 +2315,8 @@ mod tests { EMPTY_VARIANT_METADATA_BYTES, scores_field.len(), ))), - Some(scores_field.value_field().unwrap().clone()), - Some(scores_field.typed_value_field().unwrap().clone()), + Some(scores_field.value_column().unwrap().clone()), + Some(scores_field.typed_value_column().unwrap().clone()), None, ), scores_field.len(), @@ -2344,7 +2344,7 @@ mod tests { .with_path("id", &DataType::Int32)? .build(); let result1 = shred_variant(&input, &schema1).unwrap(); - let value_field1 = result1.value_field().unwrap(); + let value_field1 = result1.value_column().unwrap(); assert!(!value_field1.is_null(0)); // should contain {"age": 25, "score": 95.5} // Test with schema containing id and age fields @@ -2353,7 +2353,7 @@ mod tests { .with_path("age", &DataType::Int64)? .build(); let result2 = shred_variant(&input, &schema2).unwrap(); - let value_field2 = result2.value_field().unwrap(); + let value_field2 = result2.value_column().unwrap(); assert!(!value_field2.is_null(0)); // should contain {"score": 95.5} // Test with schema containing all fields @@ -2363,7 +2363,7 @@ mod tests { .with_path("score", &DataType::Float64)? .build(); let result3 = shred_variant(&input, &schema3).unwrap(); - let value_field3 = result3.value_field().unwrap(); + let value_field3 = result3.value_column().unwrap(); assert!(value_field3.is_null(0)); // fully shredded, no remaining fields Ok(()) @@ -2410,14 +2410,14 @@ mod tests { let result = shred_variant(&input, &target_schema).unwrap(); - assert!(result.value_field().is_some()); - assert!(result.typed_value_field().is_some()); + assert!(result.value_column().is_some()); + assert!(result.typed_value_column().is_some()); assert_eq!(result.len(), 6); - let metadata = result.metadata_field(); - let value = result.value_field().unwrap(); + let metadata = result.metadata_column(); + let value = result.value_column().unwrap(); let typed_value = result - .typed_value_field() + .typed_value_column() .unwrap() .as_any() .downcast_ref::() @@ -2430,16 +2430,16 @@ mod tests { ShreddedVariantFieldArray::try_new(typed_value.column_by_name("session_id").unwrap()) .unwrap(); - let id_value = id_field.value_field().unwrap(); + let id_value = id_field.value_column().unwrap(); let id_typed_value = id_field - .typed_value_field() + .typed_value_column() .unwrap() .as_any() .downcast_ref::() .unwrap(); - let session_id_value = session_id_field.value_field().unwrap(); + let session_id_value = session_id_field.value_column().unwrap(); let session_id_typed_value = session_id_field - .typed_value_field() + .typed_value_column() .unwrap() .as_any() .downcast_ref::() @@ -2543,21 +2543,27 @@ mod tests { assert!(inner_struct.column_by_name("typed_value").is_some()); // Test metadata preservation - assert_eq!(result.metadata_field().len(), input.metadata_field().len()); + assert_eq!( + result.metadata_column().len(), + input.metadata_column().len() + ); // The metadata should be the same reference (cheap clone) // Note: BinaryViewArray doesn't have a .values() method, so we compare the arrays directly - assert_eq!(result.metadata_field().len(), input.metadata_field().len()); + assert_eq!( + result.metadata_column().len(), + input.metadata_column().len() + ); // Test output structure correctness assert_eq!(result.len(), input.len()); - assert!(result.value_field().is_some()); - assert!(result.typed_value_field().is_some()); + assert!(result.value_column().is_some()); + assert!(result.typed_value_column().is_some()); // For primitive shredding, verify that value and typed_value are never both non-null // (This rule applies to primitives; for objects, both can be non-null for partial shredding) - let value_field = result.value_field().unwrap(); + let value_field = result.value_column().unwrap(); let typed_value_field = result - .typed_value_field() + .typed_value_column() .unwrap() .as_any() .downcast_ref::() @@ -2718,10 +2724,10 @@ mod tests { ); assert_eq!(result.len(), 3); - assert!(result.typed_value_field().is_some()); + assert!(result.typed_value_column().is_some()); let typed_value = result - .typed_value_field() + .typed_value_column() .unwrap() .as_any() .downcast_ref::() @@ -2735,13 +2741,13 @@ mod tests { .unwrap(); let time_typed = time_field - .typed_value_field() + .typed_value_column() .unwrap() .as_any() .downcast_ref::() .unwrap(); let hostname_typed = hostname_field - .typed_value_field() + .typed_value_column() .unwrap() .as_any() .downcast_ref::() diff --git a/parquet-variant-compute/src/unshred_variant.rs b/parquet-variant-compute/src/unshred_variant.rs index f4bfa73bded1..30f861ce1e94 100644 --- a/parquet-variant-compute/src/unshred_variant.rs +++ b/parquet-variant-compute/src/unshred_variant.rs @@ -59,7 +59,7 @@ use uuid::Uuid; /// - If unsupported data types are encountered in typed_value columns pub fn unshred_variant(array: &VariantArray) -> Result { // Check if already unshredded (optimization for common case) - if array.typed_value_field().is_none() && array.value_field().is_some() { + if array.typed_value_column().is_none() && array.value_column().is_some() { return Ok(array.clone()); } @@ -69,7 +69,7 @@ pub fn unshred_variant(array: &VariantArray) -> Result { let mut row_builder = UnshredVariantRowBuilder::try_new_opt(array.inner())? .unwrap_or_else(UnshredVariantRowBuilder::null); - let metadata = array.metadata_field(); + let metadata = array.metadata_column(); let mut value_builder = VariantValueArrayBuilder::new(array.len()); for i in 0..array.len() { if array.is_null(i) { diff --git a/parquet-variant-compute/src/variant_array.rs b/parquet-variant-compute/src/variant_array.rs index a6ee281002cc..ea5c46e2f76b 100644 --- a/parquet-variant-compute/src/variant_array.rs +++ b/parquet-variant-compute/src/variant_array.rs @@ -396,7 +396,7 @@ impl VariantArray { /// Note: Does not do deep validation of the [`Variant`], so it is up to the /// caller to ensure that the metadata and value were constructed correctly. pub fn try_value(&self, index: usize) -> Result> { - match (self.typed_value_field(), self.value_field()) { + match (self.typed_value_column(), self.value_column()) { // Always prefer typed_value, if available (Some(typed_value), value) if typed_value.is_valid(index) => { typed_value_to_variant(typed_value, value, index) @@ -420,19 +420,36 @@ impl VariantArray { } } - /// Return a reference to the metadata field of the [`StructArray`] - pub fn metadata_field(&self) -> &ArrayRef { + /// Return a reference to the `metadata` column of the [`StructArray`] + pub fn metadata_column(&self) -> &ArrayRef { &self.metadata } - /// Return a reference to the value field of the `StructArray` - pub fn value_field(&self) -> Option<&ArrayRef> { - self.shredding_state.value_field() + /// Return a reference to the `value` column of the [`StructArray`], if present + pub fn value_column(&self) -> Option<&ArrayRef> { + self.shredding_state.value_column() } - /// Return a reference to the typed_value field of the `StructArray`, if present - pub fn typed_value_field(&self) -> Option<&ArrayRef> { - self.shredding_state.typed_value_field() + /// Return a reference to the `typed_value` column of the [`StructArray`], if present + pub fn typed_value_column(&self) -> Option<&ArrayRef> { + self.shredding_state.typed_value_column() + } + + /// Return the [`FieldRef`] of the `metadata` column of the [`StructArray`] + pub fn metadata_field(&self) -> &FieldRef { + self.inner + .field_by_name("metadata") + .expect("VariantArray always has a metadata field") + } + + /// Return the [`FieldRef`] of the `value` column of the [`StructArray`], if present + pub fn value_field(&self) -> Option<&FieldRef> { + self.inner.field_by_name("value") + } + + /// Return the [`FieldRef`] of the `typed_value` column of the [`StructArray`], if present + pub fn typed_value_field(&self) -> Option<&FieldRef> { + self.inner.field_by_name("typed_value") } /// Return a field to represent this VariantArray in a `Schema` with @@ -688,14 +705,24 @@ impl ShreddedVariantFieldArray { &self.shredding_state } - /// Return a reference to the value field of the `StructArray` - pub fn value_field(&self) -> Option<&ArrayRef> { - self.shredding_state.value_field() + /// Return a reference to the `value` column of the [`StructArray`], if present + pub fn value_column(&self) -> Option<&ArrayRef> { + self.shredding_state.value_column() } - /// Return a reference to the typed_value field of the `StructArray`, if present - pub fn typed_value_field(&self) -> Option<&ArrayRef> { - self.shredding_state.typed_value_field() + /// Return a reference to the `typed_value` column of the [`StructArray`], if present + pub fn typed_value_column(&self) -> Option<&ArrayRef> { + self.shredding_state.typed_value_column() + } + + /// Return the [`FieldRef`] of the `value` column of the [`StructArray`], if present + pub fn value_field(&self) -> Option<&FieldRef> { + self.inner.field_by_name("value") + } + + /// Return the [`FieldRef`] of the `typed_value` column of the [`StructArray`], if present + pub fn typed_value_field(&self) -> Option<&FieldRef> { + self.inner.field_by_name("typed_value") } /// Returns a reference to the underlying [`StructArray`]. @@ -833,13 +860,13 @@ impl ShreddingState { Self { value, typed_value } } - /// Return a reference to the value field, if present - pub fn value_field(&self) -> Option<&ArrayRef> { + /// Return a reference to the `value` column, if present + pub fn value_column(&self) -> Option<&ArrayRef> { self.value.as_ref() } - /// Return a reference to the typed_value field, if present - pub fn typed_value_field(&self) -> Option<&ArrayRef> { + /// Return a reference to the `typed_value` column, if present + pub fn typed_value_column(&self) -> Option<&ArrayRef> { self.typed_value.as_ref() } @@ -1216,11 +1243,12 @@ mod test { use super::*; use arrow::array::{ - BinaryArray, BinaryViewArray, Decimal32Array, Decimal64Array, Decimal128Array, Int32Array, - Int64Array, LargeBinaryArray, LargeListArray, LargeListViewArray, ListArray, ListViewArray, - Time64MicrosecondArray, + BinaryArray, BinaryViewArray, Decimal32Array, Decimal64Array, Decimal128Array, + FixedSizeBinaryArray, Int32Array, Int64Array, LargeBinaryArray, LargeListArray, + LargeListViewArray, ListArray, ListViewArray, Time64MicrosecondArray, }; use arrow::buffer::{OffsetBuffer, ScalarBuffer}; + use arrow_schema::extension::Uuid; use arrow_schema::{Field, Fields}; use parquet_variant::{EMPTY_VARIANT_METADATA_BYTES, ShortString}; @@ -1484,12 +1512,48 @@ mod test { let input = make_variant_struct_with_typed_value(typed_value.clone()); let variant_array = VariantArray::try_new(&input).unwrap(); assert_eq!( - variant_array.typed_value_field().unwrap().data_type(), + variant_array.typed_value_column().unwrap().data_type(), typed_value.data_type(), ); } } + #[test] + fn field_apis_return_inner_struct_fields() { + let typed_value = make_int32_array(); + let input = make_variant_struct_with_typed_value(typed_value.clone()); + let variant_array = VariantArray::try_new(&input).unwrap(); + + assert_eq!(variant_array.metadata_field().name(), "metadata"); + assert!(variant_array.value_field().is_none()); + + let typed_value_field = variant_array.typed_value_field().unwrap(); + assert_eq!(typed_value_field.name(), "typed_value"); + assert_eq!(typed_value_field.data_type(), typed_value.data_type()); + assert_eq!( + typed_value_field.data_type(), + variant_array.typed_value_column().unwrap().data_type(), + ); + } + + #[test] + fn field_apis_preserve_extension_type() { + // Built directly, not via `from_parts`, which would drop the extension type. + let metadata = Arc::new(BinaryViewArray::from(vec![b"test" as &[u8]])); + let typed_value = + Arc::new(FixedSizeBinaryArray::try_from_iter(std::iter::once([0u8; 16])).unwrap()); + let fields = Fields::from(vec![ + Field::new("metadata", DataType::BinaryView, false), + Field::new("typed_value", DataType::FixedSizeBinary(16), true) + .with_extension_type(Uuid), + ]); + let struct_array = StructArray::new(fields, vec![metadata, typed_value], None); + + let variant_array = VariantArray::try_new(&struct_array).unwrap(); + let typed_value_field = variant_array.typed_value_field().unwrap(); + assert!(typed_value_field.has_valid_extension_type::()); + } + #[test] fn test_variant_array_iterable() { let mut b = VariantArrayBuilder::new(6); diff --git a/parquet-variant-compute/src/variant_array_builder.rs b/parquet-variant-compute/src/variant_array_builder.rs index e669277f1d06..4c7340a0fba4 100644 --- a/parquet-variant-compute/src/variant_array_builder.rs +++ b/parquet-variant-compute/src/variant_array_builder.rs @@ -492,8 +492,8 @@ mod test { assert_eq!(variant_array.value(1), Variant::from(42i32)); // the metadata and value fields of non shredded variants should not be null - assert!(variant_array.metadata_field().nulls().is_none()); - assert!(variant_array.value_field().unwrap().nulls().is_none()); + assert!(variant_array.metadata_column().nulls().is_none()); + assert!(variant_array.value_column().unwrap().nulls().is_none()); let DataType::Struct(fields) = variant_array.data_type() else { panic!("Expected VariantArray to have Struct data type"); }; @@ -638,7 +638,7 @@ mod test { .finish(); let array2 = VariantArray::from_parts( - array.metadata_field().clone(), + array.metadata_column().clone(), Some(Arc::new(value_builder.build().unwrap())), None, None, diff --git a/parquet-variant-compute/src/variant_get.rs b/parquet-variant-compute/src/variant_get.rs index c3e915993533..0caef50759b7 100644 --- a/parquet-variant-compute/src/variant_get.rs +++ b/parquet-variant-compute/src/variant_get.rs @@ -67,8 +67,8 @@ fn take_list_like_index_as_shredding_state( }; let shredding_state = ShreddingState::try_from(struct_array)?; - let value_array = shredding_state.value_field(); - let typed_array = shredding_state.typed_value_field(); + let value_array = shredding_state.value_column(); + let typed_array = shredding_state.typed_value_column(); // If list elements have neither typed nor fallback value, this path step is missing. if value_array.is_none() && typed_array.is_none() { @@ -117,12 +117,12 @@ pub(crate) fn follow_shredded_path_element( ) -> Result { // If the requested path element is not present in `typed_value`, and `value` is missing, then // we know it does not exist; it, and all paths under it, are all-NULL. - let missing_path_step = || match shredding_state.value_field() { + let missing_path_step = || match shredding_state.value_column() { Some(_) => ShreddedPathStep::NotShredded, None => ShreddedPathStep::Missing, }; - let Some(typed_value) = shredding_state.typed_value_field() else { + let Some(typed_value) = shredding_state.typed_value_column() else { return Ok(missing_path_step()); }; @@ -197,7 +197,7 @@ fn shredded_get_path( |value: Option, typed_value: Option, accumulated_nulls: Option| { - let metadata = input.metadata_field().clone(); + let metadata = input.metadata_column().clone(); VariantArray::from_parts(metadata, value, typed_value, accumulated_nulls) }; @@ -206,7 +206,7 @@ fn shredded_get_path( |target: VariantArray, path: VariantPath<'_>, as_field: Option<&Field>| { let as_type = as_field.map(|f| f.data_type()); let mut builder = make_variant_to_arrow_row_builder( - target.metadata_field(), + target.metadata_column(), path, as_type, cast_options, @@ -240,7 +240,7 @@ fn shredded_get_path( match follow_shredded_path_element(&shredding_state, path_element, cast_options)? { ShreddedPathStep::Success(state) => { // Union nulls from the typed_value we just accessed - if let Some(typed_value) = shredding_state.typed_value_field() { + if let Some(typed_value) = shredding_state.typed_value_column() { accumulated_nulls = NullBuffer::union(accumulated_nulls.as_ref(), typed_value.nulls()); } @@ -258,7 +258,7 @@ fn shredded_get_path( } ShreddedPathStep::NotShredded => { let target = make_target_variant( - shredding_state.value_field().cloned(), + shredding_state.value_column().cloned(), None, accumulated_nulls, ); @@ -269,8 +269,8 @@ fn shredded_get_path( // Path exhausted! Create a new `VariantArray` for the location we landed on. let target = make_target_variant( - shredding_state.value_field().cloned(), - shredding_state.typed_value_field().cloned(), + shredding_state.value_column().cloned(), + shredding_state.typed_value_column().cloned(), accumulated_nulls, ); @@ -294,7 +294,7 @@ fn shredded_get_path( // For shredded/partially-shredded targets (`typed_value` present), recurse into each field // separately to take advantage of deeper shredding in child fields. if let DataType::Struct(fields) = as_field.data_type() { - if target.typed_value_field().is_none() { + if target.typed_value_column().is_none() { return shred_basic_variant(target, VariantPath::default(), Some(as_field)); } @@ -328,11 +328,11 @@ fn try_perfect_shredding(variant_array: &VariantArray, as_field: &Field) -> Opti if matches!(as_field.data_type(), DataType::Struct(_)) { return None; } - let typed_value = variant_array.typed_value_field()?; + let typed_value = variant_array.typed_value_column()?; if typed_value.data_type() == as_field.data_type() && variant_array - .value_field() + .value_column() .is_none_or(|v| v.null_count() == v.len()) { // Here we need to gate against the case where the `typed_value` is null but data is in the `value` column. @@ -4356,7 +4356,7 @@ mod test { let variant_array = perfectly_shredded_int32_variant_array(); let variant_array_ref = VariantArray::try_new(&variant_array).unwrap(); - let typed_value_arc = variant_array_ref.typed_value_field().unwrap().clone(); + let typed_value_arc = variant_array_ref.typed_value_column().unwrap().clone(); let field = Field::new("result", DataType::Int32, true); let options = GetOptions::new().with_as_type(Some(FieldRef::from(field)));