Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -381,7 +381,7 @@ debug = "full"
inherits = "release"

[profile.bench]
codegen-units = 16
codegen-units = 1
debug = "full"
lto = false

Expand Down
8 changes: 8 additions & 0 deletions encodings/parquet-variant/src/array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,10 @@ use vortex_array::arrays::StructArray;
use vortex_array::arrays::VariantArray;
use vortex_array::arrays::list::ListArrayExt;
use vortex_array::arrays::struct_::StructArrayExt;
#[expect(
deprecated,
reason = "TODO(aduffy): figure out what to do with Parquet Variant"
)]
use vortex_array::arrow::ArrowArrayExecutor;
use vortex_array::arrow::FromArrowArray;
use vortex_array::arrow::to_arrow_null_buffer;
Expand Down Expand Up @@ -331,6 +335,10 @@ pub trait ParquetVariantArrayExt: TypedArrayRef<ParquetVariant> {
}

/// Converts this storage array to Arrow's canonical Parquet Variant extension storage.
#[expect(
deprecated,
reason = "TODO(aduffy): figure out what to do with Parquet Variant"
)]
fn to_arrow(&self, ctx: &mut ExecutionCtx) -> VortexResult<ArrowVariantArray> {
let metadata = self.metadata_array();
let len = metadata.len();
Expand Down
4 changes: 4 additions & 0 deletions encodings/parquet-variant/src/kernel.rs
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,10 @@ fn to_parquet_variant_path(path: &VariantPath) -> VortexResult<PqVariantPath<'st
.map(PqVariantPath::new)
}

#[expect(
deprecated,
reason = "TODO(aduffy): figure out what to do with Parquet Variant"
)]
fn to_arrow_as_type(dtype: Option<&DType>) -> VortexResult<Option<FieldRef>> {
match dtype {
Some(dtype) if !dtype.is_variant() => Ok(Some(Arc::new(Field::new(
Expand Down
13 changes: 9 additions & 4 deletions encodings/pco/src/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ use vortex_array::LEGACY_SESSION;
use vortex_array::VortexSessionExecute;
use vortex_array::arrays::BoolArray;
use vortex_array::arrays::PrimitiveArray;
use vortex_array::arrow::ArrowArrayExecutor;
use vortex_array::arrow::ArrowSessionExt;
use vortex_array::assert_arrays_eq;
use vortex_array::assert_nth_scalar;
use vortex_array::dtype::DType;
Expand Down Expand Up @@ -213,9 +213,14 @@ fn test_serde() -> VortexResult<()> {
&ReadContext::new(context.to_ids()),
&SESSION,
)?;
let data_type = data.dtype().to_arrow_dtype()?;
let pco_arrow = pco.execute_arrow(Some(&data_type), &mut ctx)?;
let decoded_arrow = decoded.execute_arrow(Some(&data_type), &mut ctx)?;
let data_type = LEGACY_SESSION.arrow().to_arrow_field("", data.dtype())?;
let pco_arrow = LEGACY_SESSION
.arrow()
.execute_arrow(pco, Some(&data_type), &mut ctx)?;
let decoded_arrow =
LEGACY_SESSION
.arrow()
.execute_arrow(decoded, Some(&data_type), &mut ctx)?;
assert!(pco_arrow == decoded_arrow);
Ok(())
}
7 changes: 5 additions & 2 deletions encodings/runend/src/arrow.rs
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ mod tests {
use vortex_array::VortexSessionExecute as _;
use vortex_array::arrays::PrimitiveArray;
use vortex_array::arrays::primitive::PrimitiveArrayExt;
use vortex_array::arrow::ArrowArrayExecutor;
use vortex_array::arrow::ArrowSessionExt;
use vortex_array::arrow::FromArrowArray;
use vortex_array::assert_arrays_eq;
use vortex_array::dtype::DType;
Expand Down Expand Up @@ -301,7 +301,10 @@ mod tests {
}

fn execute(array: ArrayRef, dt: &DataType) -> VortexResult<arrow_array::ArrayRef> {
array.execute_arrow(Some(dt), &mut SESSION.create_execution_ctx())
let field = Field::new("", dt.clone(), true);
SESSION
.arrow()
.execute_arrow(array, Some(&field), &mut SESSION.create_execution_ctx())
}

#[test]
Expand Down
57 changes: 37 additions & 20 deletions encodings/sparse/src/canonical.rs
Original file line number Diff line number Diff line change
Expand Up @@ -599,7 +599,7 @@ mod test {
use vortex_array::arrays::VarBinArray;
use vortex_array::arrays::VarBinViewArray;
use vortex_array::arrays::listview::ListViewArrayExt;
use vortex_array::arrow::ArrowArrayExecutor;
use vortex_array::arrow::ArrowSessionExt;
use vortex_array::assert_arrays_eq;
use vortex_array::dtype::DType;
use vortex_array::dtype::DecimalDType;
Expand Down Expand Up @@ -845,23 +845,33 @@ mod test {
let fill_scalar = Scalar::decimal(DecimalValue::I32(123), decimal_dtype, Nullable);
let sparse_struct = Sparse::try_new(indices, patch_values, len, fill_scalar).unwrap();

let expected = DecimalArray::new(
buffer![100i128, 200, 123, 123, 123, 123, 123, 300, 4000, 123],
decimal_dtype,
// NB: patch indices: [0, 1, 7, 8]; patch validity: [Valid, Valid, Valid, Invalid]; ergo 0, 1, 7 are valid.
Validity::from_mask(Mask::from_excluded_indices(10, vec![8]), Nullable),
)
.into_array()
.execute_arrow(None, &mut ctx)
.unwrap();
let expected = LEGACY_SESSION
.arrow()
.execute_arrow(
DecimalArray::new(
buffer![100i128, 200, 123, 123, 123, 123, 123, 300, 4000, 123],
decimal_dtype,
// NB: patch indices: [0, 1, 7, 8]; patch validity: [Valid, Valid, Valid, Invalid]; ergo 0, 1, 7 are valid.
Validity::from_mask(Mask::from_excluded_indices(10, vec![8]), Nullable),
)
.into_array(),
None,
&mut ctx,
)
.unwrap();

let actual = sparse_struct
.as_array()
.clone()
.execute::<DecimalArray>(&mut ctx)
.unwrap()
.into_array()
.execute_arrow(None, &mut ctx)
let actual = LEGACY_SESSION
.arrow()
.execute_arrow(
sparse_struct
.as_array()
.clone()
.execute::<DecimalArray>(&mut ctx)
.unwrap()
.into_array(),
None,
&mut ctx,
)
.unwrap();

assert_eq!(expected.data_type(), actual.data_type());
Expand Down Expand Up @@ -1544,9 +1554,16 @@ mod test {
assert_arrays_eq!(&actual, &expected);

// Note that the preferred arrow list representation is `List` (not `ListView`).
let arrow_dtype = expected.dtype().to_arrow_dtype()?;
let actual = actual.execute_arrow(Some(&arrow_dtype), &mut ctx)?;
let expected = expected.execute_arrow(Some(&arrow_dtype), &mut ctx)?;
let arrow_dtype = LEGACY_SESSION
.arrow()
.to_arrow_field("", expected.dtype())?;
let actual = LEGACY_SESSION
.arrow()
.execute_arrow(actual, Some(&arrow_dtype), &mut ctx)?;
let expected =
LEGACY_SESSION
.arrow()
.execute_arrow(expected, Some(&arrow_dtype), &mut ctx)?;

assert_eq!(actual.data_type(), expected.data_type());
Ok(())
Expand Down
6 changes: 5 additions & 1 deletion vortex-array/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ arrow-buffer = { workspace = true }
arrow-cast = { workspace = true }
arrow-data = { workspace = true }
arrow-ord = { workspace = true }
arrow-schema = { workspace = true }
arrow-schema = { workspace = true, features = ["canonical_extension_types"] }
arrow-select = { workspace = true }
arrow-string = { workspace = true }
async-lock = { workspace = true }
Expand Down Expand Up @@ -206,3 +206,7 @@ harness = false
[[bench]]
name = "slice_dict_primitive"
harness = false

[[bench]]
name = "to_arrow"
harness = false
130 changes: 130 additions & 0 deletions vortex-array/benches/to_arrow.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright the Vortex contributors

#![expect(clippy::unwrap_used)]

use std::sync::Arc;

use divan::Bencher;
use vortex_array::ArrayRef;
use vortex_array::IntoArray;
use vortex_array::LEGACY_SESSION;
use vortex_array::VortexSessionExecute;
use vortex_array::arrays::DecimalArray;
use vortex_array::arrays::ListArray;
use vortex_array::arrays::PrimitiveArray;
use vortex_array::arrays::StructArray;
#[expect(
deprecated,
reason = "benchmark comparing deprecated method with new one"
)]
use vortex_array::arrow::ArrowArrayExecutor;
use vortex_array::arrow::ArrowSessionExt;
use vortex_array::dtype::DType;
use vortex_array::dtype::DecimalDType;
use vortex_array::dtype::Nullability;
use vortex_array::dtype::PType;
use vortex_array::dtype::StructFields;

fn main() {
divan::main();
}

fn schema() -> DType {
let fields = StructFields::from_iter([
(
"primitive",
DType::Primitive(PType::F32, Nullability::Nullable),
),
(
"list",
DType::List(
Arc::new(DType::Binary(Nullability::NonNullable)),
Nullability::Nullable,
),
),
(
"decimal",
DType::Decimal(DecimalDType::new(19, 10), Nullability::Nullable),
),
]);
DType::Struct(fields, Nullability::NonNullable)
}

fn array() -> ArrayRef {
StructArray::from_fields(&[
(
"primitive",
PrimitiveArray::from_iter(0i16..1024).into_array(),
),
(
"list",
ListArray::from_iter_slow::<u32, _>(
(0..1024).map(|_| vec!["a", "b", "c"]).collect::<Vec<_>>(),
Arc::new(DType::Utf8(Nullability::NonNullable)),
)
.unwrap()
.into_array(),
),
(
"decimal",
DecimalArray::from_iter(0i64..1024, DecimalDType::new(19, 2)).into_array(),
),
])
.unwrap()
.into_array()
}

#[divan::bench]
fn to_arrow_dtype(bencher: Bencher) {
bencher.with_inputs(schema).bench_values(|dtype| {
#[expect(deprecated, reason = "benchmarking deprecated code path")]
dtype.to_arrow_dtype().unwrap()
});
}

#[allow(non_snake_case)]
#[divan::bench]
fn ArrowExportVTable_to_arrow_field(bencher: Bencher) {
// Warm the ArrowSession
drop(
LEGACY_SESSION
.arrow()
.to_arrow_field("", &schema())
.unwrap(),
);

bencher
.with_inputs(schema)
.bench_values(|dtype| LEGACY_SESSION.arrow().to_arrow_field("", &dtype).unwrap())
}

#[divan::bench]
fn to_arrow_array(bencher: Bencher) {
bencher
.with_inputs(|| (array(), LEGACY_SESSION.create_execution_ctx()))
.bench_values(|(array, mut ctx)| {
#[expect(deprecated, reason = "benchmarking deprecated code path")]
array.execute_arrow(None, &mut ctx).unwrap()
});
}

#[allow(non_snake_case)]
#[divan::bench]
fn ArrowExportVTable_execute_arrow(bencher: Bencher) {
// Warm the ArrowSession
drop(LEGACY_SESSION.arrow().execute_arrow(
array(),
None,
&mut LEGACY_SESSION.create_execution_ctx(),
));

bencher
.with_inputs(|| (array(), LEGACY_SESSION.create_execution_ctx()))
.bench_values(|(array, mut ctx)| {
LEGACY_SESSION
.arrow()
.execute_arrow(array, None, &mut ctx)
.unwrap()
})
}
Loading
Loading