IPC writer benchmark test data do not currently include any dictionary encoded columns:
|
fn create_batch(num_rows: usize, allow_nulls: bool) -> RecordBatch { |
|
let schema = Arc::new(Schema::new(vec![ |
|
Field::new("c0", DataType::Int32, true), |
|
Field::new("c1", DataType::Utf8, true), |
|
Field::new("c2", DataType::Date32, true), |
|
Field::new("c3", DataType::Decimal128(11, 2), true), |
|
])); |
|
let mut a = Int32Builder::new(); |
|
let mut b = StringBuilder::new(); |
|
let mut c = Date32Builder::new(); |
|
let mut d = Decimal128Builder::new() |
|
.with_precision_and_scale(11, 2) |
|
.unwrap(); |
|
for i in 0..num_rows { |
|
a.append_value(i as i32); |
|
c.append_value(i as i32); |
|
d.append_value((i * 1000000) as i128); |
|
if allow_nulls && i % 10 == 0 { |
|
b.append_null(); |
|
} else { |
|
b.append_value(format!("this is string number {i}")); |
|
} |
|
} |
|
let a = a.finish(); |
|
let b = b.finish(); |
|
let c = c.finish(); |
|
let d = d.finish(); |
|
RecordBatch::try_new( |
|
schema.clone(), |
|
vec![Arc::new(a), Arc::new(b), Arc::new(c), Arc::new(d)], |
|
) |
|
.unwrap() |
|
} |
|
|
Dictionaries have a lot of special handling in IPC writer code, which we want to optimize, so it makes sense to add a benchmark focused on these so that we can measure any perf impact.
Relevant thread from a recent PR: #10044 (comment)
CC: @alamb @Rich-T-kid
IPC writer benchmark test data do not currently include any dictionary encoded columns:
arrow-rs/arrow-ipc/benches/ipc_writer.rs
Lines 74 to 107 in cecbc72
Dictionaries have a lot of special handling in IPC writer code, which we want to optimize, so it makes sense to add a benchmark focused on these so that we can measure any perf impact.
Relevant thread from a recent PR: #10044 (comment)
CC: @alamb @Rich-T-kid