From efa87e3b16a61d860ce9ca07cbda5e7237833b9d Mon Sep 17 00:00:00 2001 From: yew1eb Date: Fri, 21 Nov 2025 17:27:49 +0800 Subject: [PATCH 01/10] [AURON #1637] Enforce Clippy lint and replace unwrap with expect --- .github/workflows/tpcds-reusable.yml | 5 + Cargo.toml | 3 + dev/mvn-build-helper/build-native.sh | 3 + native-engine/auron-serde/build.rs | 5 +- native-engine/auron-serde/src/from_proto.rs | 36 ++--- native-engine/auron/src/alloc.rs | 2 +- native-engine/auron/src/exec.rs | 4 +- native-engine/auron/src/http/mod.rs | 4 +- native-engine/auron/src/lib.rs | 2 +- .../datafusion-ext-commons/src/arrow/cast.rs | 9 +- .../src/arrow/eq_comparator.rs | 133 ++++++++++-------- .../src/arrow/selection.rs | 6 +- .../src/io/batch_serde.rs | 16 +-- .../datafusion-ext-commons/src/spark_hash.rs | 18 ++- .../src/bloom_filter_might_contain.rs | 2 +- native-engine/datafusion-ext-exprs/src/lib.rs | 4 +- .../src/string_contains.rs | 3 +- .../src/string_ends_with.rs | 2 +- .../src/string_starts_with.rs | 2 +- .../src/spark_check_overflow.rs | 2 +- .../src/spark_get_json_object.rs | 18 +-- .../src/spark_make_decimal.rs | 2 +- .../src/spark_strings.rs | 4 +- .../src/spark_unscaled_value.rs | 2 +- .../datafusion-ext-plans/src/agg/agg_ctx.rs | 4 +- .../datafusion-ext-plans/src/agg/agg_table.rs | 4 +- .../src/agg/bloom_filter.rs | 2 +- .../datafusion-ext-plans/src/agg/collect.rs | 20 ++- .../src/agg/spark_udaf_wrapper.rs | 4 +- .../datafusion-ext-plans/src/agg_exec.rs | 18 +-- .../src/common/cached_exprs_evaluator.rs | 2 +- .../src/common/column_pruning.rs | 4 +- .../src/common/execution_context.rs | 2 +- .../src/common/row_null_checker.rs | 9 +- .../datafusion-ext-plans/src/expand_exec.rs | 125 ++++++++-------- .../datafusion-ext-plans/src/generate_exec.rs | 2 +- .../src/joins/join_hash_map.rs | 4 +- .../datafusion-ext-plans/src/joins/test.rs | 45 +++--- .../datafusion-ext-plans/src/limit_exec.rs | 16 +-- .../datafusion-ext-plans/src/parquet_exec.rs | 2 +- .../src/parquet_sink_exec.rs | 2 +- .../src/shuffle/buffered_data.rs | 6 +- .../src/shuffle/single_repartitioner.rs | 2 +- .../datafusion-ext-plans/src/sort_exec.rs | 22 +-- .../src/window/window_context.rs | 4 +- 45 files changed, 311 insertions(+), 275 deletions(-) diff --git a/.github/workflows/tpcds-reusable.yml b/.github/workflows/tpcds-reusable.yml index fc039fdaa..5981fa709 100644 --- a/.github/workflows/tpcds-reusable.yml +++ b/.github/workflows/tpcds-reusable.yml @@ -151,6 +151,11 @@ jobs: components: cargo rustfmt + clippy + + - name: Cargo clippy + run: | + cargo clippy --all-targets --workspace -- -A warnings -A clippy::all -D clippy::unwrap_used - name: Cargo test run: | diff --git a/Cargo.toml b/Cargo.toml index baf0915b3..b123f60d8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -27,6 +27,9 @@ members = [ "native-engine/auron-memmgr", ] +[workspace.lints.clippy] +unwrap_used = "deny" + [profile.release] opt-level = 3 lto = true diff --git a/dev/mvn-build-helper/build-native.sh b/dev/mvn-build-helper/build-native.sh index f97eddb17..6fa4f860b 100755 --- a/dev/mvn-build-helper/build-native.sh +++ b/dev/mvn-build-helper/build-native.sh @@ -84,6 +84,9 @@ if [ ! -f "$cache_libpath" ] || [ "$new_checksum" != "$old_checksum" ]; then echo "Running cargo fmt..." cargo fmt --all -q -- 2>&1 + echo "Running cargo clippy..." + cargo clippy --all-targets --workspace -- -A warnings -A clippy::all -D clippy::unwrap_used 2>&1 + echo "Building native with [$profile] profile..." cargo build --profile="$profile" $features_arg --verbose --locked --frozen 2>&1 diff --git a/native-engine/auron-serde/build.rs b/native-engine/auron-serde/build.rs index afcf85157..fa7ae5221 100644 --- a/native-engine/auron-serde/build.rs +++ b/native-engine/auron-serde/build.rs @@ -40,10 +40,11 @@ fn main() -> Result<(), String> { } } if let Some(path) = protoc_file { - eprintln!("Using protoc executable: {:?}", path); + eprintln!("Using protoc executable: {}", path.display()); prost_build.protoc_executable(path); } prost_build .compile_protos(&["proto/auron.proto"], &["proto"]) - .map_err(|e| format!("protobuf compilation failed: {}", e)) + .map_err(|e| format!("protobuf compilation failed: {e}") + ) } diff --git a/native-engine/auron-serde/src/from_proto.rs b/native-engine/auron-serde/src/from_proto.rs index 31082e8ba..3eb22f36b 100644 --- a/native-engine/auron-serde/src/from_proto.rs +++ b/native-engine/auron-serde/src/from_proto.rs @@ -151,7 +151,7 @@ impl TryInto> for &protobuf::PhysicalPlanNode { Ok(Arc::new(FilterExec::try_new(predicates, input)?)) } PhysicalPlanType::ParquetScan(scan) => { - let conf: FileScanConfig = scan.base_conf.as_ref().unwrap().try_into()?; + let conf: FileScanConfig = scan.base_conf.as_ref().expect("base_conf").try_into()?; let predicate = scan .pruning_predicates .iter() @@ -168,7 +168,7 @@ impl TryInto> for &protobuf::PhysicalPlanNode { ))) } PhysicalPlanType::OrcScan(scan) => { - let conf: FileScanConfig = scan.base_conf.as_ref().unwrap().try_into()?; + let conf: FileScanConfig = scan.base_conf.as_ref().expect("base_conf").try_into()?; let predicate = scan .pruning_predicates .iter() @@ -193,9 +193,9 @@ impl TryInto> for &protobuf::PhysicalPlanNode { .iter() .map(|col| { let left_key = - try_parse_physical_expr(&col.left.as_ref().unwrap(), &left.schema())?; + try_parse_physical_expr(&col.left.as_ref().expect("left"), &left.schema())?; let right_key = - try_parse_physical_expr(&col.right.as_ref().unwrap(), &right.schema())?; + try_parse_physical_expr(&col.right.as_ref().expect("right"), &right.schema())?; Ok((left_key, right_key)) }) .collect::>()?; @@ -230,9 +230,9 @@ impl TryInto> for &protobuf::PhysicalPlanNode { .iter() .map(|col| { let left_key = - try_parse_physical_expr(&col.left.as_ref().unwrap(), &left.schema())?; + try_parse_physical_expr(&col.left.as_ref().expect("left"), &left.schema())?; let right_key = - try_parse_physical_expr(&col.right.as_ref().unwrap(), &right.schema())?; + try_parse_physical_expr(&col.right.as_ref().expect("right"), &right.schema())?; Ok((left_key, right_key)) }) .collect::>()?; @@ -270,7 +270,7 @@ impl TryInto> for &protobuf::PhysicalPlanNode { Ok(Arc::new(ShuffleWriterExec::try_new( input, - output_partitioning.unwrap(), + output_partitioning.expect("partitioning"), shuffle_writer.output_data_file.clone(), shuffle_writer.output_index_file.clone(), )?)) @@ -285,7 +285,7 @@ impl TryInto> for &protobuf::PhysicalPlanNode { )?; Ok(Arc::new(RssShuffleWriterExec::try_new( input, - output_partitioning.unwrap(), + output_partitioning.expect("partitioning"), rss_shuffle_writer.rss_partition_writer_resource_id.clone(), )?)) } @@ -340,9 +340,9 @@ impl TryInto> for &protobuf::PhysicalPlanNode { .iter() .map(|col| { let left_key = - try_parse_physical_expr(&col.left.as_ref().unwrap(), &left.schema())?; + try_parse_physical_expr(&col.left.as_ref().expect("left"), &left.schema())?; let right_key = - try_parse_physical_expr(&col.right.as_ref().unwrap(), &right.schema())?; + try_parse_physical_expr(&col.right.as_ref().expect("right"), &right.schema())?; Ok((left_key, right_key)) }) .collect::>()?; @@ -471,7 +471,7 @@ impl TryInto> for &protobuf::PhysicalPlanNode { let agg = match AggFunction::from(agg_function) { AggFunction::Udaf => { - let udaf = agg_node.udaf.as_ref().unwrap(); + let udaf = agg_node.udaf.as_ref().expect("udaf"); let serialized = udaf.serialized.clone(); create_udaf_agg(serialized, return_type, agg_children_exprs)? } @@ -696,7 +696,7 @@ impl TryInto> for &protobuf::PhysicalPlanNode { children, )?, GenerateFunction::Udtf => { - let udtf = pb_generator.udtf.as_ref().unwrap(); + let udtf = pb_generator.udtf.as_ref().expect("udtf"); let serialized = udtf.serialized.clone(); let return_schema = Arc::new(convert_required!(udtf.return_schema)?); create_udtf_generator(serialized, return_schema, children)? @@ -1136,13 +1136,13 @@ pub fn parse_protobuf_partitioning( .collect::, _>>()?; Ok(Some(Partitioning::HashPartitioning( expr, - hash_part.partition_count.try_into().unwrap(), + hash_part.partition_count.try_into().expect("partition_count"), ))) } RepartitionType::RoundRobinRepartition(round_robin_part) => { Ok(Some(Partitioning::RoundRobinPartitioning( - round_robin_part.partition_count.try_into().unwrap(), + round_robin_part.partition_count.try_into().expect("partition_count"), ))) } @@ -1150,7 +1150,7 @@ pub fn parse_protobuf_partitioning( if range_part.partition_count == 1 { Ok(Some(Partitioning::SinglePartitioning())) } else { - let sort = range_part.sort_expr.clone().unwrap(); + let sort = range_part.sort_expr.clone().expect("sort_expr"); let exprs = try_parse_physical_sort_expr(&input, &sort).unwrap_or_else(|e| { panic!("Failed to parse physical sort expressions: {}", e); }); @@ -1187,7 +1187,7 @@ pub fn parse_protobuf_partitioning( let bound_rows = sort_row_converter.lock().convert_columns(&bound_cols)?; Ok(Some(Partitioning::RangePartitioning( exprs, - range_part.partition_count.try_into().unwrap(), + range_part.partition_count.try_into().expect("partition_count"), Arc::new(bound_rows), ))) } @@ -1250,12 +1250,12 @@ impl From<&protobuf::ColumnStats> for ColumnStatistics { max_value: cs .max_value .as_ref() - .map(|m| Precision::Exact(m.try_into().unwrap())) + .map(|m| Precision::Exact(m.try_into().expect("max_value"))) .unwrap_or(Precision::Absent), min_value: cs .min_value .as_ref() - .map(|m| Precision::Exact(m.try_into().unwrap())) + .map(|m| Precision::Exact(m.try_into().expect("min_value"))) .unwrap_or(Precision::Absent), sum_value: Precision::Absent, distinct_count: Precision::Exact(cs.distinct_count as usize), diff --git a/native-engine/auron/src/alloc.rs b/native-engine/auron/src/alloc.rs index a65e7431e..75ce62b49 100644 --- a/native-engine/auron/src/alloc.rs +++ b/native-engine/auron/src/alloc.rs @@ -57,7 +57,7 @@ impl DebugAlloc { } fn update(&self) { - let _lock = self.mutex.lock().unwrap(); + let _lock = self.mutex.lock().expect("lock"); let current = self.current.load(SeqCst); let last_updated = self.last_updated.load(SeqCst); let delta = (current as isize - last_updated as isize).abs(); diff --git a/native-engine/auron/src/exec.rs b/native-engine/auron/src/exec.rs index de8d372a3..01a0b329e 100644 --- a/native-engine/auron/src/exec.rs +++ b/native-engine/auron/src/exec.rs @@ -62,7 +62,7 @@ pub extern "system" fn Java_org_apache_auron_jni_JniBridge_callNative( INIT.get_or_try_init(|| { // logging is not initialized at this moment eprintln!("------ initializing auron native environment ------"); - let log_level = env.get_string(log_level).map(|s| String::from(s)).unwrap(); + let log_level = env.get_string(log_level).map(|s| String::from(s)).expect("log_level"); eprintln!("initializing logging with level: {}", log_level); init_logging(log_level.as_str()); @@ -103,7 +103,7 @@ pub extern "system" fn Java_org_apache_auron_jni_JniBridge_callNative( // create execution runtime let runtime = Box::new(NativeExecutionRuntime::start( native_wrapper, - SESSION.get().unwrap().task_ctx(), + SESSION.get().expect("session").task_ctx(), )?); // returns runtime raw pointer diff --git a/native-engine/auron/src/http/mod.rs b/native-engine/auron/src/http/mod.rs index 86ef14322..e7f301ffc 100644 --- a/native-engine/auron/src/http/mod.rs +++ b/native-engine/auron/src/http/mod.rs @@ -66,7 +66,7 @@ impl HTTPServer for DefaultHTTPServer { fn start(&self) { if let Some(port) = find_available_port() { let mut app = Route::new(); - let handlers = self.handlers.lock().unwrap(); + let handlers = self.handlers.lock().expect("lock"); for handler in handlers.iter() { app = app.at(handler.get_route_path(), handler.get_route_method()); } @@ -83,7 +83,7 @@ impl HTTPServer for DefaultHTTPServer { } fn register_handler(&self, handler: Box) { - let mut handlers = self.handlers.lock().unwrap(); + let mut handlers = self.handlers.lock().expect("lock"); handlers.push(handler); } } diff --git a/native-engine/auron/src/lib.rs b/native-engine/auron/src/lib.rs index 57492593e..46cc82adc 100644 --- a/native-engine/auron/src/lib.rs +++ b/native-engine/auron/src/lib.rs @@ -55,7 +55,7 @@ fn handle_unwinded(err: Box) { } fn handle_unwinded_scope(scope: impl FnOnce() -> Result) -> T { - match std::panic::catch_unwind(AssertUnwindSafe(|| scope().unwrap())) { + match std::panic::catch_unwind(AssertUnwindSafe(|| scope().expect("scope"))) { Ok(v) => v, Err(err) => { handle_unwinded(err); diff --git a/native-engine/datafusion-ext-commons/src/arrow/cast.rs b/native-engine/datafusion-ext-commons/src/arrow/cast.rs index afa759fe0..2d8ff7572 100644 --- a/native-engine/datafusion-ext-commons/src/arrow/cast.rs +++ b/native-engine/datafusion-ext-commons/src/arrow/cast.rs @@ -166,7 +166,7 @@ pub fn cast_impl( col = struct_.column_by_name(adjust.as_str()); } if col.is_some() { - cast_impl(col.unwrap(), field.data_type(), match_struct_fields) + cast_impl(col.expect("missing column"), field.data_type(), match_struct_fields) } else { null_column_name.push(field.name().clone()); Ok(new_null_array(field.data_type(), struct_.len())) @@ -227,7 +227,8 @@ pub fn cast_impl( } fn to_plain_string_array(array: &dyn Array) -> ArrayRef { - let array = array.as_any().downcast_ref::().unwrap(); + let array = array.as_any().downcast_ref::() + .expect("Expected a StringArray"); let mut converted_values: Vec> = Vec::with_capacity(array.len()); for v in array.iter() { match v { @@ -305,7 +306,7 @@ fn to_integer(input: &str) } let separator = b'.'; - let radix = T::from_usize(10).unwrap(); + let radix = T::from_usize(10).expect("from_usize(10)"); let stop_value = T::min_value() / radix; let mut result = T::zero(); @@ -333,7 +334,7 @@ fn to_integer(input: &str) return None; } - result = result * radix - T::from_u8(digit).unwrap(); + result = result * radix - T::from_u8(digit).expect("digit 0..=9"); // Since the previous result is less than or equal to stopValue(Long.MIN_VALUE / // radix), we can just use `result > 0` to check overflow. If result // overflows, we should stop. diff --git a/native-engine/datafusion-ext-commons/src/arrow/eq_comparator.rs b/native-engine/datafusion-ext-commons/src/arrow/eq_comparator.rs index 51c05b7f5..367c6a32c 100644 --- a/native-engine/datafusion-ext-commons/src/arrow/eq_comparator.rs +++ b/native-engine/datafusion-ext-commons/src/arrow/eq_comparator.rs @@ -275,8 +275,8 @@ fn eq_fixed_list( ignores_null, )?; - let l_size = left.value_length().to_usize().unwrap(); - let r_size = right.value_length().to_usize().unwrap(); + let l_size = left.value_length().to_usize().expect("left len to_usize"); + let r_size = right.value_length().to_usize().expect("right len to_usize"); let size_eq = l_size == r_size; let f = eq_impl(left, right, ignores_null, move |i, j| { @@ -390,78 +390,85 @@ pub mod tests { use super::*; #[test] - fn test_fixed_size_binary() { + fn test_fixed_size_binary() -> Result<()> { let items = vec![vec![1u8], vec![2u8]]; - let array = FixedSizeBinaryArray::try_from_iter(items.into_iter()).unwrap(); + let array = FixedSizeBinaryArray::try_from_iter(items.into_iter())?; - let eq = make_eq_comparator(&array, &array, false).unwrap(); + let eq = make_eq_comparator(&array, &array, false)?; assert_eq!(false, eq(0, 1)); + Ok(()) } #[test] - fn test_fixed_size_binary_fixed_size_binary() { + fn test_fixed_size_binary_fixed_size_binary() -> Result<()> { let items = vec![vec![1u8]]; - let array1 = FixedSizeBinaryArray::try_from_iter(items.into_iter()).unwrap(); + let array1 = FixedSizeBinaryArray::try_from_iter(items.into_iter())?; let items = vec![vec![2u8]]; - let array2 = FixedSizeBinaryArray::try_from_iter(items.into_iter()).unwrap(); + let array2 = FixedSizeBinaryArray::try_from_iter(items.into_iter())?; - let eq = make_eq_comparator(&array1, &array2, false).unwrap(); + let eq = make_eq_comparator(&array1, &array2, false)?; assert_eq!(false, eq(0, 0)); + Ok(()) } #[test] - fn test_i32() { + fn test_i32() -> Result<()> { let array = Int32Array::from(vec![1, 2]); - let eq = make_eq_comparator(&array, &array, false).unwrap(); + let eq = make_eq_comparator(&array, &array, false)?; assert_eq!(false, (eq)(0, 1)); + Ok(()) } #[test] - fn test_i32_i32() { + fn test_i32_i32() -> Result<()> { let array1 = Int32Array::from(vec![1]); let array2 = Int32Array::from(vec![2]); - let eq = make_eq_comparator(&array1, &array2, false).unwrap(); + let eq = make_eq_comparator(&array1, &array2, false)?; assert_eq!(false, eq(0, 0)); + Ok(()) } #[test] - fn test_f64() { + fn test_f64() -> Result<()> { let array = Float64Array::from(vec![1.0, 2.0]); - let eq = make_eq_comparator(&array, &array, false).unwrap(); + let eq = make_eq_comparator(&array, &array, false)?; assert_eq!(false, eq(0, 1)); + Ok(()) } #[test] - fn test_f64_nan() { + fn test_f64_nan() -> Result<()> { let array = Float64Array::from(vec![1.0, f64::NAN]); - let eq = make_eq_comparator(&array, &array, false).unwrap(); + let eq = make_eq_comparator(&array, &array, false)?; assert_eq!(true, eq(0, 0)); assert_eq!(false, eq(0, 1)); assert_eq!(false, eq(1, 1)); // NaN != NaN + Ok(()) } #[test] - fn test_f64_zeros() { + fn test_f64_zeros() -> Result<()> { let array = Float64Array::from(vec![-0.0, 0.0]); - let eq = make_eq_comparator(&array, &array, false).unwrap(); + let eq = make_eq_comparator(&array, &array, false)?; assert_eq!(true, eq(0, 1)); // -0.0 == 0.0 assert_eq!(true, eq(1, 0)); + Ok(()) } #[test] - fn test_interval_day_time() { + fn test_interval_day_time() -> Result<()> { let array = IntervalDayTimeArray::from(vec![ // 0 days, 1 second IntervalDayTimeType::make_value(0, 1000), @@ -471,7 +478,7 @@ pub mod tests { IntervalDayTimeType::make_value(0, 90_000_000), ]); - let eq = make_eq_comparator(&array, &array, false).unwrap(); + let eq = make_eq_comparator(&array, &array, false)?; assert_eq!(false, eq(0, 1)); assert_eq!(false, eq(1, 0)); @@ -481,10 +488,11 @@ pub mod tests { // values not field by field assert_eq!(false, eq(1, 2)); assert_eq!(false, eq(2, 1)); + Ok(()) } #[test] - fn test_interval_year_month() { + fn test_interval_year_month() -> Result<()> { let array = IntervalYearMonthArray::from(vec![ // 1 year, 0 months IntervalYearMonthType::make_value(1, 0), @@ -494,7 +502,7 @@ pub mod tests { IntervalYearMonthType::make_value(1, 1), ]); - let eq = make_eq_comparator(&array, &array, false).unwrap(); + let eq = make_eq_comparator(&array, &array, false)?; assert_eq!(false, eq(0, 1)); assert_eq!(false, eq(1, 0)); @@ -502,10 +510,11 @@ pub mod tests { // the underlying representation is months, so both quantities are the same assert_eq!(true, eq(1, 2)); assert_eq!(true, eq(2, 1)); + Ok(()) } #[test] - fn test_interval_month_day_nano() { + fn test_interval_month_day_nano() -> Result<()> { let array = IntervalMonthDayNanoArray::from(vec![ // 100 days IntervalMonthDayNanoType::make_value(0, 100, 0), @@ -515,7 +524,7 @@ pub mod tests { IntervalMonthDayNanoType::make_value(0, 100, 2), ]); - let eq = make_eq_comparator(&array, &array, false).unwrap(); + let eq = make_eq_comparator(&array, &array, false)?; assert_eq!(false, eq(0, 1)); assert_eq!(false, eq(1, 0)); @@ -525,23 +534,24 @@ pub mod tests { // values not field by field assert_eq!(false, eq(1, 2)); assert_eq!(false, eq(2, 1)); + Ok(()) } #[test] - fn test_decimal() { + fn test_decimal() -> Result<()> { let array = vec![Some(5_i128), Some(2_i128), Some(3_i128)] .into_iter() .collect::() - .with_precision_and_scale(23, 6) - .unwrap(); + .with_precision_and_scale(23, 6)?; - let eq = make_eq_comparator(&array, &array, false).unwrap(); + let eq = make_eq_comparator(&array, &array, false)?; assert_eq!(false, eq(1, 0)); assert_eq!(false, eq(0, 2)); + Ok(()) } #[test] - fn test_decimali256() { + fn test_decimali256() -> Result<()> { let array = vec![ Some(i256::from_i128(5_i128)), Some(i256::from_i128(2_i128)), @@ -549,42 +559,44 @@ pub mod tests { ] .into_iter() .collect::() - .with_precision_and_scale(53, 6) - .unwrap(); + .with_precision_and_scale(53, 6)?; - let eq = make_eq_comparator(&array, &array, false).unwrap(); + let eq = make_eq_comparator(&array, &array, false)?; assert_eq!(false, eq(1, 0)); assert_eq!(false, eq(0, 2)); + Ok(()) } #[test] - fn test_dict() { + fn test_dict() -> Result<()> { let data = vec!["a", "b", "c", "a", "a", "c", "c"]; let array = data.into_iter().collect::>(); - let eq = make_eq_comparator(&array, &array, false).unwrap(); + let eq = make_eq_comparator(&array, &array, false)?; assert_eq!(false, eq(0, 1)); assert_eq!(true, eq(3, 4)); assert_eq!(false, eq(2, 3)); + Ok(()) } #[test] - fn test_multiple_dict() { + fn test_multiple_dict() -> Result<()> { let d1 = vec!["a", "b", "c", "d"]; let a1 = d1.into_iter().collect::>(); let d2 = vec!["e", "f", "g", "a"]; let a2 = d2.into_iter().collect::>(); - let eq = make_eq_comparator(&a1, &a2, false).unwrap(); + let eq = make_eq_comparator(&a1, &a2, false)?; assert_eq!(false, eq(0, 0)); assert_eq!(true, eq(0, 3)); assert_eq!(false, eq(1, 3)); + Ok(()) } #[test] - fn test_primitive_dict() { + fn test_primitive_dict() -> Result<()> { let values = Int32Array::from(vec![1_i32, 0, 2, 5]); let keys = Int8Array::from_iter_values([0, 0, 1, 3]); let array1 = DictionaryArray::new(keys, Arc::new(values)); @@ -593,36 +605,38 @@ pub mod tests { let keys = Int8Array::from_iter_values([0, 1, 1, 3]); let array2 = DictionaryArray::new(keys, Arc::new(values)); - let eq = make_eq_comparator(&array1, &array2, false).unwrap(); + let eq = make_eq_comparator(&array1, &array2, false)?; assert_eq!(false, eq(0, 0)); assert_eq!(false, eq(0, 3)); assert_eq!(true, eq(3, 3)); assert_eq!(false, eq(3, 1)); assert_eq!(false, eq(3, 2)); + Ok(()) } #[test] - fn test_float_dict() { + fn test_float_dict() -> Result<()> { let values = Float32Array::from(vec![1.0, 0.5, 2.1, 5.5]); let keys = Int8Array::from_iter_values([0, 0, 1, 3]); - let array1 = DictionaryArray::try_new(keys, Arc::new(values)).unwrap(); + let array1 = DictionaryArray::try_new(keys, Arc::new(values))?; let values = Float32Array::from(vec![1.2, 3.2, 4.0, 5.5]); let keys = Int8Array::from_iter_values([0, 1, 1, 3]); let array2 = DictionaryArray::new(keys, Arc::new(values)); - let eq = make_eq_comparator(&array1, &array2, false).unwrap(); + let eq = make_eq_comparator(&array1, &array2, false)?; assert_eq!(false, eq(0, 0)); assert_eq!(false, eq(0, 3)); assert_eq!(true, eq(3, 3)); assert_eq!(false, eq(3, 1)); assert_eq!(false, eq(3, 2)); + Ok(()) } #[test] - fn test_timestamp_dict() { + fn test_timestamp_dict() -> Result<()> { let values = TimestampSecondArray::from(vec![1, 0, 2, 5]); let keys = Int8Array::from_iter_values([0, 0, 1, 3]); let array1 = DictionaryArray::new(keys, Arc::new(values)); @@ -631,17 +645,18 @@ pub mod tests { let keys = Int8Array::from_iter_values([0, 1, 1, 3]); let array2 = DictionaryArray::new(keys, Arc::new(values)); - let eq = make_eq_comparator(&array1, &array2, false).unwrap(); + let eq = make_eq_comparator(&array1, &array2, false)?; assert_eq!(false, eq(0, 0)); assert_eq!(false, eq(0, 3)); assert_eq!(true, eq(3, 3)); assert_eq!(false, eq(3, 1)); assert_eq!(false, eq(3, 2)); + Ok(()) } #[test] - fn test_duration_dict() { + fn test_duration_dict() -> Result<()> { let values = DurationSecondArray::from(vec![1, 0, 2, 5]); let keys = Int8Array::from_iter_values([0, 0, 1, 3]); let array1 = DictionaryArray::new(keys, Arc::new(values)); @@ -650,17 +665,18 @@ pub mod tests { let keys = Int8Array::from_iter_values([0, 1, 1, 3]); let array2 = DictionaryArray::new(keys, Arc::new(values)); - let eq = make_eq_comparator(&array1, &array2, false).unwrap(); + let eq = make_eq_comparator(&array1, &array2, false)?; assert_eq!(false, eq(0, 0)); assert_eq!(false, eq(0, 3)); assert_eq!(true, eq(3, 3)); assert_eq!(false, eq(3, 1)); assert_eq!(false, eq(3, 2)); + Ok(()) } #[test] - fn test_decimal_dict() { + fn test_decimal_dict() -> Result<()> { let values = Decimal128Array::from(vec![1, 0, 2, 5]); let keys = Int8Array::from_iter_values([0, 0, 1, 3]); let array1 = DictionaryArray::new(keys, Arc::new(values)); @@ -669,17 +685,18 @@ pub mod tests { let keys = Int8Array::from_iter_values([0, 1, 1, 3]); let array2 = DictionaryArray::new(keys, Arc::new(values)); - let eq = make_eq_comparator(&array1, &array2, false).unwrap(); + let eq = make_eq_comparator(&array1, &array2, false)?; assert_eq!(false, eq(0, 0)); assert_eq!(false, eq(0, 3)); assert_eq!(true, eq(3, 3)); assert_eq!(false, eq(3, 1)); assert_eq!(false, eq(3, 2)); + Ok(()) } #[test] - fn test_decimal256_dict() { + fn test_decimal256_dict() -> Result<()> { let values = Decimal256Array::from(vec![ i256::from_i128(1), i256::from_i128(0), @@ -698,23 +715,25 @@ pub mod tests { let keys = Int8Array::from_iter_values([0, 1, 1, 3]); let array2 = DictionaryArray::new(keys, Arc::new(values)); - let eq = make_eq_comparator(&array1, &array2, false).unwrap(); + let eq = make_eq_comparator(&array1, &array2, false)?; assert_eq!(false, eq(0, 0)); assert_eq!(false, eq(0, 3)); assert_eq!(true, eq(3, 3)); assert_eq!(false, eq(3, 1)); assert_eq!(false, eq(3, 2)); + Ok(()) } - fn test_bytes_impl() { + fn test_bytes_impl() -> Result<()> { let offsets = OffsetBuffer::from_lengths([3, 3, 1]); let a = GenericByteArray::::new(offsets, b"abcdefa".into(), None); - let eq = make_eq_comparator(&a, &a, false).unwrap(); + let eq = make_eq_comparator(&a, &a, false)?; assert_eq!(false, eq(0, 1)); assert_eq!(false, eq(0, 2)); assert_eq!(true, eq(1, 1)); + Ok(()) } #[test] @@ -726,7 +745,7 @@ pub mod tests { } #[test] - fn test_lists() { + fn test_lists() -> Result<()> { let mut a = ListBuilder::new(ListBuilder::new(Int32Builder::new())); a.extend([ Some(vec![Some(vec![Some(1), Some(2), None]), Some(vec![None])]), @@ -755,7 +774,7 @@ pub mod tests { ]); let b = b.finish(); - let eq = make_eq_comparator(&a, &b, false).unwrap(); + let eq = make_eq_comparator(&a, &b, false)?; assert_eq!(eq(0, 0), false); // lists contains null never equal assert_eq!(eq(0, 1), false); assert_eq!(eq(0, 2), false); @@ -763,10 +782,11 @@ pub mod tests { assert_eq!(eq(1, 3), false); assert_eq!(eq(2, 0), false); assert_eq!(eq(4, 4), true); + Ok(()) } #[test] - fn test_struct() { + fn test_struct() -> Result<()> { let fields = Fields::from(vec![ Field::new("a", DataType::Int32, true), Field::new_list("b", Field::new("item", DataType::Int32, true), true), @@ -789,7 +809,7 @@ pub mod tests { let values = vec![Arc::new(a) as _, Arc::new(b) as _]; let s2 = StructArray::new(fields.clone(), values, None); - let eq = make_eq_comparator(&s1, &s2, false).unwrap(); + let eq = make_eq_comparator(&s1, &s2, false)?; assert_eq!(eq(0, 1), false); // (1, [1, 2]) eq (2, None) assert_eq!(eq(0, 0), false); // (1, [1, 2]) eq (None, None) assert_eq!(eq(1, 1), false); // (2, [None]) eq (2, None) @@ -797,5 +817,6 @@ pub mod tests { assert_eq!(eq(3, 0), false); // None eq (None, []) assert_eq!(eq(2, 0), false); // (None, None) eq (None, None) assert_eq!(eq(3, 0), false); // None eq (None, None) + Ok(()) } } diff --git a/native-engine/datafusion-ext-commons/src/arrow/selection.rs b/native-engine/datafusion-ext-commons/src/arrow/selection.rs index bf67bc28d..8afff282e 100644 --- a/native-engine/datafusion-ext-commons/src/arrow/selection.rs +++ b/native-engine/datafusion-ext-commons/src/arrow/selection.rs @@ -248,7 +248,7 @@ pub fn create_array_interleaver( let interleaver = Interleave::new( values .iter() - .map(|v| downcast_any!(v, PrimitiveArray<$t>).unwrap().clone()) + .map(|v| downcast_any!(v, PrimitiveArray<$t>).expect("Excepted a PrimitiveArray").clone()) .collect::>(), ); let dt = $dt.clone(); @@ -266,7 +266,7 @@ pub fn create_array_interleaver( DataType::Utf8 => { let interleaver = Interleave::new(values .iter() - .map(|v| downcast_any!(v, StringArray).unwrap().clone()) + .map(|v| downcast_any!(v, StringArray).expect("Excepted a StringArray").clone()) .collect::>(), ); return Ok(Box::new(move |indices| if with_prefetching { @@ -278,7 +278,7 @@ pub fn create_array_interleaver( DataType::Binary => { let interleaver = Interleave::new(values .iter() - .map(|v| downcast_any!(v, BinaryArray).unwrap().clone()) + .map(|v| downcast_any!(v, BinaryArray).expect("Excepted a BinaryArray").clone()) .collect::>(), ); return Ok(Box::new(move |indices| if with_prefetching { diff --git a/native-engine/datafusion-ext-commons/src/io/batch_serde.rs b/native-engine/datafusion-ext-commons/src/io/batch_serde.rs index bf740b86a..c4489338a 100644 --- a/native-engine/datafusion-ext-commons/src/io/batch_serde.rs +++ b/native-engine/datafusion-ext-commons/src/io/batch_serde.rs @@ -48,7 +48,7 @@ impl TransposeOpt { DataType::Null => 0, DataType::Boolean => 0, dt if dt.primitive_width() == Some(1) => 0, - dt if dt.primitive_width() >= Some(2) => dt.primitive_width().unwrap(), + dt if dt.primitive_width() >= Some(2) => dt.primitive_width().expect("width"), DataType::Utf8 | DataType::Binary => 4, DataType::List(f) | DataType::Map(f, _) => { Self::data_type_bytes_width(f.data_type()).max(4) @@ -390,7 +390,7 @@ fn read_list_array( }; let offsets = read_offsets(input, num_rows, transpose_opt)?; - let values_len = offsets.last().cloned().unwrap() as usize; + let values_len = offsets.last().cloned().expect("offsets is non-empty") as usize; let offsets_buffer: Buffer = Buffer::from_vec(offsets); let values = read_array( input, @@ -431,8 +431,8 @@ fn write_map_array( let value_offsets = array.value_offsets(); write_offsets(output, value_offsets, transpose_opt)?; - let first_offset = value_offsets.first().cloned().unwrap() as usize; - let entries_len = value_offsets.last().cloned().unwrap() as usize - first_offset; + let first_offset = value_offsets.first().cloned().expect("value_offsets is non-empty") as usize; + let entries_len = value_offsets.last().cloned().expect("value_offsets is non-empty") as usize - first_offset; let keys = array.keys().slice(first_offset, entries_len); let values = array.values().slice(first_offset, entries_len); @@ -458,7 +458,7 @@ fn read_map_array( }; let offsets = read_offsets(input, num_rows, transpose_opt)?; - let entries_len = offsets.last().cloned().unwrap() as usize; + let entries_len = offsets.last().cloned().expect("offsets is non-empty") as usize; let offsets_buffer = Buffer::from_vec(offsets); // build inner struct @@ -612,8 +612,8 @@ fn write_bytes_array, W: Write>( let value_offsets = array.value_offsets(); write_offsets(output, value_offsets, transpose_opt)?; - let first_offset = value_offsets.first().cloned().unwrap() as usize; - let last_offset = value_offsets.last().cloned().unwrap() as usize; + let first_offset = value_offsets.first().cloned().expect("value_offsets is non-empty")as usize; + let last_offset = value_offsets.last().cloned().expect("value_offsets is non-empty") as usize; output.write_all(&array.value_data()[first_offset..last_offset])?; Ok(()) } @@ -632,7 +632,7 @@ fn read_bytes_array( }; let offsets = read_offsets(input, num_rows, transpose_opt)?; - let values_len = offsets.last().cloned().unwrap() as usize; + let values_len = offsets.last().cloned().expect("offsets is non-empty") as usize; let offsets_buffer = Buffer::from_vec(offsets); let data_buffer = Buffer::from_vec(read_bytes_slice(input, values_len)?.into()); diff --git a/native-engine/datafusion-ext-commons/src/spark_hash.rs b/native-engine/datafusion-ext-commons/src/spark_hash.rs index 323f2ac86..a68d84ed7 100644 --- a/native-engine/datafusion-ext-commons/src/spark_hash.rs +++ b/native-engine/datafusion-ext-commons/src/spark_hash.rs @@ -121,7 +121,8 @@ fn hash_array( match array.data_type() { DataType::Null => {} DataType::Boolean => { - let array = array.as_any().downcast_ref::().unwrap(); + let array = array.as_any().downcast_ref::() + .expect("Expected a BooleanArray"); if array.null_count() == 0 { for (i, hash) in hashes_buffer.iter_mut().enumerate() { *hash = h( @@ -217,7 +218,8 @@ fn create_hashes_dictionary( hashes_buffer: &mut [T], h: impl Fn(&[u8], T) -> T + Copy, ) { - let dict_array = array.as_any().downcast_ref::>().unwrap(); + let dict_array = array.as_any().downcast_ref::>() + .expect("Expected a DictionaryArray"); // Hash each dictionary value once, and then use that computed // hash for each key value to avoid a potentially expensive @@ -264,7 +266,8 @@ fn hash_one( match col.data_type() { DataType::Null => {} DataType::Boolean => { - let array = col.as_any().downcast_ref::().unwrap(); + let array = col.as_any().downcast_ref::() + .expect("Expected a BooleanArray"); *hash = h( (if array.value(idx) { 1u32 } else { 0u32 }) .to_le_bytes() @@ -324,14 +327,16 @@ fn hash_one( hash_one_decimal!(Decimal128Array, col, hash, idx, h); } DataType::List(..) => { - let list_array = col.as_any().downcast_ref::().unwrap(); + let list_array = col.as_any().downcast_ref::() + .expect("Expected a ListArray"); let value_array = list_array.value(idx); for i in 0..value_array.len() { hash_one(&value_array, i, hash, h); } } DataType::Map(..) => { - let map_array = col.as_any().downcast_ref::().unwrap(); + let map_array = col.as_any().downcast_ref::() + .expect("Expected a MapArray"); let kv_array = map_array.value(idx); let key_array = kv_array.column(0); let value_array = kv_array.column(1); @@ -341,7 +346,8 @@ fn hash_one( } } DataType::Struct(_) => { - let struct_array = col.as_any().downcast_ref::().unwrap(); + let struct_array = col.as_any().downcast_ref::() + .expect("Expected a StructArray"); for col in struct_array.columns() { hash_one(col, idx, hash, h); } diff --git a/native-engine/datafusion-ext-exprs/src/bloom_filter_might_contain.rs b/native-engine/datafusion-ext-exprs/src/bloom_filter_might_contain.rs index 86af57376..f8c37f2c5 100644 --- a/native-engine/datafusion-ext-exprs/src/bloom_filter_might_contain.rs +++ b/native-engine/datafusion-ext-exprs/src/bloom_filter_might_contain.rs @@ -137,7 +137,7 @@ impl PhysicalExpr for BloomFilterMightContainExpr { if bloom_filter.is_none() { return Ok(ColumnarValue::Scalar(ScalarValue::from(false))); } - let bloom_filter = bloom_filter.as_ref().as_ref().unwrap(); + let bloom_filter = bloom_filter.as_ref().as_ref().expect("non-null bloom_filter"); // process with bloom filter let value = self.value_expr.evaluate(batch)?; diff --git a/native-engine/datafusion-ext-exprs/src/lib.rs b/native-engine/datafusion-ext-exprs/src/lib.rs index eb729e2dd..4c3a790c6 100644 --- a/native-engine/datafusion-ext-exprs/src/lib.rs +++ b/native-engine/datafusion-ext-exprs/src/lib.rs @@ -31,10 +31,10 @@ pub mod string_starts_with; fn down_cast_any_ref(any: &dyn Any) -> &dyn Any { if any.is::() { - any.downcast_ref::().unwrap().as_any() + any.downcast_ref::().expect("Expected a PhysicalExpr").as_any() } else if any.is::>() { any.downcast_ref::>() - .unwrap() + .expect("Expected a PhysicalExpr") .as_any() } else { any diff --git a/native-engine/datafusion-ext-exprs/src/string_contains.rs b/native-engine/datafusion-ext-exprs/src/string_contains.rs index 029041d65..1826e8526 100644 --- a/native-engine/datafusion-ext-exprs/src/string_contains.rs +++ b/native-engine/datafusion-ext-exprs/src/string_contains.rs @@ -25,6 +25,7 @@ use arrow::{ datatypes::{DataType, Schema}, record_batch::RecordBatch, }; +use arrow::array::DictionaryArray; use datafusion::{ common::{Result, ScalarValue}, logical_expr::ColumnarValue, @@ -83,7 +84,7 @@ impl PhysicalExpr for StringContainsExpr { match expr { ColumnarValue::Array(array) => { - let string_array = array.as_any().downcast_ref::().unwrap(); + let string_array = array.as_any().downcast_ref::().expect("Expected a StringArray"); let ret_array = Arc::new(BooleanArray::from_iter(string_array.iter().map( |maybe_string| maybe_string.map(|string| string.contains(&self.infix)), diff --git a/native-engine/datafusion-ext-exprs/src/string_ends_with.rs b/native-engine/datafusion-ext-exprs/src/string_ends_with.rs index 1432a24cb..dd5b2dc21 100644 --- a/native-engine/datafusion-ext-exprs/src/string_ends_with.rs +++ b/native-engine/datafusion-ext-exprs/src/string_ends_with.rs @@ -83,7 +83,7 @@ impl PhysicalExpr for StringEndsWithExpr { match expr { ColumnarValue::Array(array) => { - let string_array = array.as_any().downcast_ref::().unwrap(); + let string_array = array.as_any().downcast_ref::().expect("Expected a StringArray"); let ret_array = Arc::new(BooleanArray::from_iter(string_array.iter().map( |maybe_string| maybe_string.map(|string| string.ends_with(&self.suffix)), ))); diff --git a/native-engine/datafusion-ext-exprs/src/string_starts_with.rs b/native-engine/datafusion-ext-exprs/src/string_starts_with.rs index 4a47fcb22..4dd073f53 100644 --- a/native-engine/datafusion-ext-exprs/src/string_starts_with.rs +++ b/native-engine/datafusion-ext-exprs/src/string_starts_with.rs @@ -83,7 +83,7 @@ impl PhysicalExpr for StringStartsWithExpr { match expr { ColumnarValue::Array(array) => { - let string_array = array.as_any().downcast_ref::().unwrap(); + let string_array = array.as_any().downcast_ref::().expect("Expected a StringArray"); let ret_array = Arc::new(BooleanArray::from_iter(string_array.iter().map( |maybe_string| maybe_string.map(|string| string.starts_with(&self.prefix)), ))); diff --git a/native-engine/datafusion-ext-functions/src/spark_check_overflow.rs b/native-engine/datafusion-ext-functions/src/spark_check_overflow.rs index a72384603..7fbdd95a7 100644 --- a/native-engine/datafusion-ext-functions/src/spark_check_overflow.rs +++ b/native-engine/datafusion-ext-functions/src/spark_check_overflow.rs @@ -55,7 +55,7 @@ pub fn spark_check_overflow(args: &[ColumnarValue]) -> Result { _ => ColumnarValue::Scalar(ScalarValue::Decimal128(None, to_precision, to_scale)), }, ColumnarValue::Array(array) => { - let array = array.as_any().downcast_ref::().unwrap(); + let array = array.as_any().downcast_ref::().expect("Expected a Decimal128Array");; let mut output = Decimal128Builder::with_capacity(array.len()); for v in array.into_iter() { diff --git a/native-engine/datafusion-ext-functions/src/spark_get_json_object.rs b/native-engine/datafusion-ext-functions/src/spark_get_json_object.rs index 9a8f490a9..c1aaf91a5 100644 --- a/native-engine/datafusion-ext-functions/src/spark_get_json_object.rs +++ b/native-engine/datafusion-ext-functions/src/spark_get_json_object.rs @@ -42,7 +42,7 @@ pub fn spark_get_json_object(args: &[ColumnarValue]) -> Result { let json_strings = json_string_array .as_any() .downcast_ref::() - .unwrap(); + .expect("Expected a StringArray");; let path_string = match &args[1] { ColumnarValue::Scalar(ScalarValue::Utf8(str)) => match str { Some(path) => path, @@ -87,7 +87,7 @@ pub fn spark_parse_json(args: &[ColumnarValue]) -> Result { let json_strings = json_string_array .as_any() .downcast_ref::() - .unwrap(); + .expect("Expected a StringArray");; let fallback_enabled = conf::PARSE_JSON_ERROR_FALLBACK.value().unwrap_or(false); let json_values: Vec>> = json_strings @@ -121,7 +121,7 @@ pub fn spark_parse_json(args: &[ColumnarValue]) -> Result { pub fn spark_get_parsed_json_object(args: &[ColumnarValue]) -> Result { let json_array = match &args[0] { - ColumnarValue::Array(array) => array.as_any().downcast_ref::().unwrap(), + ColumnarValue::Array(array) => array.as_any().downcast_ref::().expect("Expected a UserDefinedArray"), ColumnarValue::Scalar(_) => unreachable!(), }; @@ -153,7 +153,7 @@ pub fn spark_get_parsed_json_object(args: &[ColumnarValue]) -> Result Option> { - let json_value = value.downcast_ref::().unwrap(); + let json_value = value.downcast_ref::().expect("Expected a ParsedJsonValue"); match json_value { ParsedJsonValue::SerdeJson(v) => evaluator .evaluate_with_value_serde_json(v) @@ -166,7 +166,7 @@ pub fn spark_get_parsed_json_object(args: &[ColumnarValue]) -> Result { - fallback_results_iter.next().unwrap().map(Cow::from) + fallback_results_iter.next().expect("next").map(Cow::from) } } }) @@ -186,7 +186,7 @@ pub fn spark_get_parsed_json_simple_field( let output = StringArray::from_iter(json_array.iter().map(|value| { value.as_ref().and_then(|value| { - let json_value = value.downcast_ref::().unwrap(); + let json_value = value.downcast_ref::().expect("Expected a ParsedJsonValue"); match json_value { ParsedJsonValue::SerdeJson(v) => v .as_object() @@ -199,7 +199,7 @@ pub fn spark_get_parsed_json_simple_field( .and_then(|v| sonic_value_to_string(v).unwrap_or_default()) .map(Cow::from), ParsedJsonValue::Fallback(_) => { - fallback_results_iter.next().unwrap().map(Cow::from) + fallback_results_iter.next().expect("next").map(Cow::from) } } }) @@ -214,7 +214,7 @@ fn parse_fallback(json_path: &str, json_array: &UserDefinedArray) -> Result Option<&str> { - let json_value = value.downcast_ref::().unwrap(); + let json_value = value.downcast_ref::().expect("Expected a ParsedJsonValue"); if let ParsedJsonValue::Fallback(json) = json_value { return Some(json.as_ref()); } @@ -544,7 +544,7 @@ impl HiveGetJsonObjectMatcher { .flat_map(|r| { // keep consistent with hive UDFJson let iter: Box> = match r { - v if v.is_array() => Box::new(v.into_array().unwrap().into_iter()), + v if v.is_array() => Box::new(v.into_array().expect("array").into_iter()), other => Box::new(std::iter::once(other)), }; iter diff --git a/native-engine/datafusion-ext-functions/src/spark_make_decimal.rs b/native-engine/datafusion-ext-functions/src/spark_make_decimal.rs index 3d221bb2b..93de8d83f 100644 --- a/native-engine/datafusion-ext-functions/src/spark_make_decimal.rs +++ b/native-engine/datafusion-ext-functions/src/spark_make_decimal.rs @@ -45,7 +45,7 @@ pub fn spark_make_decimal(args: &[ColumnarValue]) -> Result { _ => ColumnarValue::Scalar(ScalarValue::Decimal128(None, precision, scale)), }, ColumnarValue::Array(array) => { - let array = array.as_any().downcast_ref::().unwrap(); + let array = array.as_any().downcast_ref::().expect("Expected a Int64Array"); let mut output = Decimal128Builder::with_capacity(array.len()); for v in array.into_iter() { diff --git a/native-engine/datafusion-ext-functions/src/spark_strings.rs b/native-engine/datafusion-ext-functions/src/spark_strings.rs index 83f526f90..94b2ef7c5 100644 --- a/native-engine/datafusion-ext-functions/src/spark_strings.rs +++ b/native-engine/datafusion-ext-functions/src/spark_strings.rs @@ -143,7 +143,7 @@ pub fn string_concat(args: &[ColumnarValue]) -> Result { } ColumnarValue::Array(v) => { if v.is_valid(index) { - let v = as_string_array(v).unwrap(); + let v = as_string_array(v).expect("string array"); owned_string.push_str(v.value(index)); } else { is_not_null = false; @@ -293,7 +293,7 @@ pub fn string_concat_ws(args: &[ColumnarValue]) -> Result { } Arg::List(list) => { if list.is_valid(i) { - let strings = as_string_array(list.values()).unwrap(); + let strings = as_string_array(list.values()).expect("string array"); let offsets = list.value_offsets(); let l = offsets[i] as usize; let r = offsets[i + 1] as usize; diff --git a/native-engine/datafusion-ext-functions/src/spark_unscaled_value.rs b/native-engine/datafusion-ext-functions/src/spark_unscaled_value.rs index 1ae2b0635..926d51923 100644 --- a/native-engine/datafusion-ext-functions/src/spark_unscaled_value.rs +++ b/native-engine/datafusion-ext-functions/src/spark_unscaled_value.rs @@ -31,7 +31,7 @@ pub fn spark_unscaled_value(args: &[ColumnarValue]) -> Result { _ => ColumnarValue::Scalar(ScalarValue::Int64(None)), }, ColumnarValue::Array(array) => { - let array = array.as_any().downcast_ref::().unwrap(); + let array = array.as_any().downcast_ref::().expect("Expected a Decimal128Array"); let mut output = Int64Builder::new(); for v in array.into_iter() { diff --git a/native-engine/datafusion-ext-plans/src/agg/agg_ctx.rs b/native-engine/datafusion-ext-plans/src/agg/agg_ctx.rs index aa1ee0019..9d15f3692 100644 --- a/native-engine/datafusion-ext-plans/src/agg/agg_ctx.rs +++ b/native-engine/datafusion-ext-plans/src/agg/agg_ctx.rs @@ -277,12 +277,12 @@ impl AggContext { let mut merging_acc_table = self.create_acc_table(0); if self.need_partial_merge { - let partial_merged_array = as_binary_array(batch.columns().last().unwrap())?; + let partial_merged_array = as_binary_array(batch.columns().last().expect("last column"))?; let array = partial_merged_array .iter() .skip(batch_start_idx) .take(batch_end_idx - batch_start_idx) - .map(|bytes| bytes.unwrap()) + .map(|bytes| bytes.expect("bytes")) .collect::>(); let mut cursors = array .iter() diff --git a/native-engine/datafusion-ext-plans/src/agg/agg_table.rs b/native-engine/datafusion-ext-plans/src/agg/agg_table.rs index 2e4333912..d1c26e072 100644 --- a/native-engine/datafusion-ext-plans/src/agg/agg_table.rs +++ b/native-engine/datafusion-ext-plans/src/agg/agg_table.rs @@ -821,8 +821,8 @@ impl<'a> RecordsSpillCursor<'a> { )?; // load next bucket head - self.cur_bucket_idx = read_len(&mut self.input).unwrap(); - self.cur_bucket_count = read_len(&mut self.input).unwrap(); + self.cur_bucket_idx = read_len(&mut self.input).expect("read_len"); + self.cur_bucket_count = read_len(&mut self.input).expect("read_len"); Ok((acc_table, keys)) } } diff --git a/native-engine/datafusion-ext-plans/src/agg/bloom_filter.rs b/native-engine/datafusion-ext-plans/src/agg/bloom_filter.rs index 7c6f1da33..b939c0796 100644 --- a/native-engine/datafusion-ext-plans/src/agg/bloom_filter.rs +++ b/native-engine/datafusion-ext-plans/src/agg/bloom_filter.rs @@ -127,7 +127,7 @@ impl Agg for AggBloomFilter { self.num_bits, )); } - bf.as_mut().unwrap() + bf.as_mut().expect("bf") } _ => return df_unimplemented_err!("AggBloomFilter only supports one bloom filter"), }; diff --git a/native-engine/datafusion-ext-plans/src/agg/collect.rs b/native-engine/datafusion-ext-plans/src/agg/collect.rs index 6ddeed832..bce7264cb 100644 --- a/native-engine/datafusion-ext-plans/src/agg/collect.rs +++ b/native-engine/datafusion-ext-plans/src/agg/collect.rs @@ -442,7 +442,7 @@ impl AccList { } pub fn append(&mut self, value: &ScalarValue, nullable: bool) { - write_scalar(&value, nullable, &mut self.raw).unwrap(); + write_scalar(&value, nullable, &mut self.raw).expect("write_scalar"); } pub fn merge(&mut self, other: &mut Self) { @@ -456,7 +456,7 @@ impl AccList { fn next(&mut self) -> Option { if self.0.position() < self.0.get_ref().len() as u64 { - return Some(read_scalar(&mut self.0, &self.1, self.2).unwrap()); + return Some(read_scalar(&mut self.0, &self.1, self.2).expect("read_scalar")); } None } @@ -534,7 +534,7 @@ impl AccSet { pub fn append(&mut self, value: &ScalarValue, nullable: bool) { let old_raw_len = self.list.raw.len(); - write_scalar(value, nullable, &mut self.list.raw).unwrap(); + write_scalar(value, nullable, &mut self.list.raw).expect("write_scalar"); self.append_raw_inline(old_raw_len); } @@ -651,7 +651,7 @@ fn acc_hash(value: impl AsRef<[u8]>) -> u64 { mod tests { use arrow::datatypes::DataType; use auron_memmgr::spill::Spill; - use datafusion::common::ScalarValue; + use datafusion::common::{DataFusionError, ScalarValue}; use super::*; @@ -713,7 +713,7 @@ mod tests { } #[test] - fn test_acc_set_spill() { + fn test_acc_set_spill() -> Result<()> { let mut acc_col = AccSetColumn::empty(DataType::Int32); acc_col.resize(3); acc_col.append_item(1, &ScalarValue::Int32(Some(1))); @@ -726,18 +726,16 @@ mod tests { let mut spill: Box = Box::new(vec![]); let mut spill_writer = spill.get_compressed_writer(); - acc_col - .spill(IdxSelection::Range(0, 3), &mut spill_writer) - .unwrap(); - spill_writer.finish().unwrap(); + acc_col.spill(IdxSelection::Range(0, 3), &mut spill_writer)?; + spill_writer.finish()?; let mut acc_col_unspill = AccSetColumn::empty(DataType::Int32); acc_col_unspill - .unspill(3, &mut spill.get_compressed_reader()) - .unwrap(); + .unspill(3, &mut spill.get_compressed_reader())?; assert_eq!(acc_col.take_values(0), acc_col_unspill.take_values(0)); assert_eq!(acc_col.take_values(1), acc_col_unspill.take_values(1)); assert_eq!(acc_col.take_values(2), acc_col_unspill.take_values(2)); + Ok(()) } } diff --git a/native-engine/datafusion-ext-plans/src/agg/spark_udaf_wrapper.rs b/native-engine/datafusion-ext-plans/src/agg/spark_udaf_wrapper.rs index deba5d344..c29b58f13 100644 --- a/native-engine/datafusion-ext-plans/src/agg/spark_udaf_wrapper.rs +++ b/native-engine/datafusion-ext-plans/src/agg/spark_udaf_wrapper.rs @@ -227,13 +227,13 @@ impl Agg for SparkUDAFWrapper { } fn create_acc_column(&self, num_rows: usize) -> AccColumnRef { - let jcontext = self.jcontext().unwrap(); + let jcontext = self.jcontext().expect("jcontext"); let rows = jni_call!(SparkUDAFWrapperContext(jcontext.as_obj()).initialize( num_rows as i32, )-> JObject) .unwrap(); - let jcontext = self.jcontext().unwrap(); + let jcontext = self.jcontext().expect("jcontext"); let obj = jni_new_global_ref!(rows.as_obj()).unwrap(); Box::new(AccUDAFBufferRowsColumn { obj, jcontext }) } diff --git a/native-engine/datafusion-ext-plans/src/agg_exec.rs b/native-engine/datafusion-ext-plans/src/agg_exec.rs index a77babd04..c99ad2623 100644 --- a/native-engine/datafusion-ext-plans/src/agg_exec.rs +++ b/native-engine/datafusion-ext-plans/src/agg_exec.rs @@ -418,6 +418,7 @@ mod test { datatypes::{DataType, Field, Schema}, record_batch::RecordBatch, }; + use auron_memmgr::MemManager; use datafusion::{ assert_batches_sorted_eq, @@ -447,7 +448,7 @@ mod test { f: (&str, &Vec), g: (&str, &Vec), h: (&str, &Vec), - ) -> RecordBatch { + ) -> Result { let schema = Schema::new(vec![ Field::new(a.0, DataType::Int32, false), Field::new(b.0, DataType::Int32, false), @@ -459,7 +460,7 @@ mod test { Field::new(h.0, DataType::Int32, false), ]); - RecordBatch::try_new( + let batch = RecordBatch::try_new( Arc::new(schema), vec![ Arc::new(Int32Array::from(a.1.clone())), @@ -471,8 +472,8 @@ mod test { Arc::new(Int32Array::from(g.1.clone())), Arc::new(Int32Array::from(h.1.clone())), ], - ) - .unwrap() + )?; + Ok(batch) } fn build_table( @@ -484,10 +485,11 @@ mod test { f: (&str, &Vec), g: (&str, &Vec), h: (&str, &Vec), - ) -> Arc { - let batch = build_table_i32(a, b, c, d, e, f, g, h); + ) -> Result> { + let batch = build_table_i32(a, b, c, d, e, f, g, h)?; let schema = batch.schema(); - Arc::new(TestMemoryExec::try_new(&[vec![batch]], schema, None).unwrap()) + let exec = TestMemoryExec::try_new(&[vec![batch]], schema, None)?; + Ok(Arc::new(exec)) } #[tokio::test(flavor = "multi_thread", worker_threads = 1)] @@ -503,7 +505,7 @@ mod test { ("f", &vec![0, 1, 2, 3, 4, 5, 6]), ("g", &vec![6, 3, 6, 3, 1, 5, 4]), ("h", &vec![6, 3, 6, 3, 1, 5, 4]), - ); + )?; let agg_expr_sum = create_agg( AggFunction::Sum, diff --git a/native-engine/datafusion-ext-plans/src/common/cached_exprs_evaluator.rs b/native-engine/datafusion-ext-plans/src/common/cached_exprs_evaluator.rs index 1c7ec79fe..38c4cff34 100644 --- a/native-engine/datafusion-ext-plans/src/common/cached_exprs_evaluator.rs +++ b/native-engine/datafusion-ext-plans/src/common/cached_exprs_evaluator.rs @@ -479,7 +479,7 @@ fn prune_expr_cols(expr: &PhysicalExprRef) -> (PhysicalExprRef, Vec) { Ok(Transformed::yes(expr)) } }) - .unwrap() + .expect("transform") .data; let mapped_cols: Vec = used_cols diff --git a/native-engine/datafusion-ext-plans/src/common/column_pruning.rs b/native-engine/datafusion-ext-plans/src/common/column_pruning.rs index 267ffa190..fa29324ba 100644 --- a/native-engine/datafusion-ext-plans/src/common/column_pruning.rs +++ b/native-engine/datafusion-ext-plans/src/common/column_pruning.rs @@ -114,7 +114,7 @@ pub fn extend_projection_by_expr( )) }) .map(|r| r.data) - .unwrap() + .expect("transform") } pub fn map_columns(expr: &PhysicalExprRef, mapping: &HashMap) -> PhysicalExprRef { @@ -130,5 +130,5 @@ pub fn map_columns(expr: &PhysicalExprRef, mapping: &HashMap) -> P )) }) .map(|r| r.data) - .unwrap() + .expect("transform") } diff --git a/native-engine/datafusion-ext-plans/src/common/execution_context.rs b/native-engine/datafusion-ext-plans/src/common/execution_context.rs index 15b6e69d0..4dd4e3193 100644 --- a/native-engine/datafusion-ext-plans/src/common/execution_context.rs +++ b/native-engine/datafusion-ext-plans/src/common/execution_context.rs @@ -724,7 +724,7 @@ impl WrappedSender { send_time.inspect(|send_time| { exclude_time .as_ref() - .unwrap() + .expect("exclude_time") .sub_duration(send_time.elapsed()); }); } diff --git a/native-engine/datafusion-ext-plans/src/common/row_null_checker.rs b/native-engine/datafusion-ext-plans/src/common/row_null_checker.rs index 83582068c..5ff21c30b 100644 --- a/native-engine/datafusion-ext-plans/src/common/row_null_checker.rs +++ b/native-engine/datafusion-ext-plans/src/common/row_null_checker.rs @@ -76,7 +76,7 @@ impl RowNullChecker { }, DataType::Boolean => FieldConfig::new_boolean(sort_options), dt if dt.is_primitive() => { - FieldConfig::new_primitive(sort_options, 1 + dt.primitive_width().unwrap()) + FieldConfig::new_primitive(sort_options, 1 + dt.primitive_width().expect("primitive_width")) } // DataType::Int8 => FieldConfig::new_primitive(sort_options, 2), // 1 byte null flag + // // 1 byte value @@ -445,6 +445,7 @@ impl FieldConfig { #[cfg(test)] mod tests { + use std::error::Error; use std::sync::Arc; use arrow::{ @@ -539,7 +540,7 @@ mod tests { } #[test] - fn test_roundtrip_with_record_batch() { + fn test_roundtrip_with_record_batch() -> Result<(), Box> { // Create a schema with multiple data types let schema = Arc::new(Schema::new(vec![ Field::new("id", DataType::Int32, false), @@ -570,8 +571,7 @@ mod tests { Arc::new(name_array), Arc::new(active_array), ], - ) - .unwrap(); + )?; // Create RowNullChecker let checker = RowNullChecker::new( @@ -618,6 +618,7 @@ mod tests { // Verify that row count matches RecordBatch assert_eq!(record_batch.num_rows(), 4); assert_eq!(record_batch.num_columns(), 3); + Ok(()) } #[test] diff --git a/native-engine/datafusion-ext-plans/src/expand_exec.rs b/native-engine/datafusion-ext-plans/src/expand_exec.rs index 9030e3cdf..7a4c06109 100644 --- a/native-engine/datafusion-ext-plans/src/expand_exec.rs +++ b/native-engine/datafusion-ext-plans/src/expand_exec.rs @@ -208,125 +208,123 @@ mod test { use crate::expand_exec::ExpandExec; // build i32 table - fn build_table_i32(a: (&str, &Vec)) -> RecordBatch { + fn build_table_i32(a: (&str, &Vec)) -> Result { let schema = Schema::new(vec![Field::new(a.0, DataType::Int32, false)]); - RecordBatch::try_new( + let batch = RecordBatch::try_new( Arc::new(schema), vec![Arc::new(Int32Array::from(a.1.clone()))], - ) - .unwrap() + )?; + Ok(batch) } - fn build_table_int(a: (&str, &Vec)) -> Arc { - let batch = build_table_i32(a); + fn build_table_int(a: (&str, &Vec)) -> Result> { + let batch = build_table_i32(a)?; let schema = batch.schema(); - Arc::new(TestMemoryExec::try_new(&[vec![batch]], schema, None).unwrap()) + Ok(Arc::new(TestMemoryExec::try_new(&[vec![batch]], schema, None)?)) } // build f32 table - fn build_table_f32(a: (&str, &Vec)) -> RecordBatch { + fn build_table_f32(a: (&str, &Vec)) -> Result { let schema = Schema::new(vec![Field::new(a.0, DataType::Float32, false)]); - RecordBatch::try_new( + let batch = RecordBatch::try_new( Arc::new(schema), vec![Arc::new(Float32Array::from(a.1.clone()))], - ) - .unwrap() + )?; + Ok(batch) } - fn build_table_float(a: (&str, &Vec)) -> Arc { - let batch = build_table_f32(a); + fn build_table_float(a: (&str, &Vec)) -> Result> { + let batch = build_table_f32(a)?; let schema = batch.schema(); - Arc::new(TestMemoryExec::try_new(&[vec![batch]], schema, None).unwrap()) + Ok(Arc::new(TestMemoryExec::try_new(&[vec![batch]], schema, None)?)) } // build str table - fn build_table_str(a: (&str, &Vec)) -> RecordBatch { + fn build_table_str(a: (&str, &Vec)) -> Result { let schema = Schema::new(vec![Field::new(a.0, DataType::Utf8, false)]); - RecordBatch::try_new( + let batch = RecordBatch::try_new( Arc::new(schema), vec![Arc::new(StringArray::from(a.1.clone()))], - ) - .unwrap() + )?; + Ok(batch) } - fn build_table_string(a: (&str, &Vec)) -> Arc { - let batch = build_table_str(a); + fn build_table_string(a: (&str, &Vec)) -> Result> { + let batch = build_table_str(a)?; let schema = batch.schema(); - Arc::new(TestMemoryExec::try_new(&[vec![batch]], schema, None).unwrap()) + Ok(Arc::new(TestMemoryExec::try_new(&[vec![batch]], schema, None)?)) } // build boolean table - fn build_table_bool(a: (&str, &Vec)) -> RecordBatch { + fn build_table_bool(a: (&str, &Vec)) -> Result { let schema = Schema::new(vec![Field::new(a.0, DataType::Boolean, false)]); - RecordBatch::try_new( + let batch = RecordBatch::try_new( Arc::new(schema), vec![Arc::new(BooleanArray::from(a.1.clone()))], - ) - .unwrap() + )?; + Ok(batch) } - fn build_table_boolean(a: (&str, &Vec)) -> Arc { - let batch = build_table_bool(a); + fn build_table_boolean(a: (&str, &Vec)) -> Result> { + let batch = build_table_bool(a)?; let schema = batch.schema(); - Arc::new(TestMemoryExec::try_new(&[vec![batch]], schema, None).unwrap()) + Ok(Arc::new(TestMemoryExec::try_new(&[vec![batch]], schema, None)?)) } #[tokio::test] async fn test_expand_exec_i32() -> Result<()> { MemManager::init(10000); - let input = build_table_int(("a", &vec![-1, -2, 0, 3])); + let input = build_table_int(("a", &vec![-1, -2, 0, 3]))?; let schema = Schema::new(vec![Field::new("test_i32", DataType::Int32, false)]); let projections = vec![ vec![ binary( - col("test_i32", &schema).unwrap(), + col("test_i32", &schema)?, Operator::Multiply, lit(ScalarValue::from(2)), &schema, - ) - .unwrap(), + )?, ], vec![ binary( - col("test_i32", &schema).unwrap(), + col("test_i32", &schema)?, Operator::Plus, lit(ScalarValue::from(100)), &schema, - ) - .unwrap(), + )?, ], vec![ binary( - col("test_i32", &schema).unwrap(), + col("test_i32", &schema)?, Operator::Divide, lit(ScalarValue::from(-2)), &schema, ) - .unwrap(), + ?, ], vec![ binary( - col("test_i32", &schema).unwrap(), + col("test_i32", &schema)?, Operator::Modulo, lit(ScalarValue::from(2)), &schema, ) - .unwrap(), + ?, ], vec![ binary( - col("test_i32", &schema).unwrap(), + col("test_i32", &schema)?, Operator::BitwiseShiftLeft, lit(ScalarValue::from(1)), &schema, ) - .unwrap(), + ?, ], ]; @@ -334,7 +332,7 @@ mod test { let session_ctx = SessionContext::new(); let task_ctx = session_ctx.task_ctx(); - let output = expand_exec.execute(0, task_ctx).unwrap(); + let output = expand_exec.execute(0, task_ctx)?; let batches = common::collect(output).await?; let expected = vec![ "+-----+", "| a |", "+-----+", "| -2 |", "| -4 |", "| 0 |", "| 6 |", "| 99 |", @@ -350,45 +348,45 @@ mod test { async fn test_expand_exec_f32() -> Result<()> { MemManager::init(10000); - let input = build_table_float(("a", &vec![-1.2, -2.3, 0.0, 3.4])); + let input = build_table_float(("a", &vec![-1.2, -2.3, 0.0, 3.4]))?; let schema = Schema::new(vec![Field::new("test_f32", DataType::Float32, false)]); let projections = vec![ vec![ binary( - col("test_f32", &schema).unwrap(), + col("test_f32", &schema)?, Operator::Multiply, lit(ScalarValue::from(2.1_f32)), &schema, ) - .unwrap(), + ?, ], vec![ binary( - col("test_f32", &schema).unwrap(), + col("test_f32", &schema)?, Operator::Plus, lit(ScalarValue::from(100_f32)), &schema, ) - .unwrap(), + ?, ], vec![ binary( - col("test_f32", &schema).unwrap(), + col("test_f32", &schema)?, Operator::Divide, lit(ScalarValue::from(-2_f32)), &schema, ) - .unwrap(), + ?, ], vec![ binary( - col("test_f32", &schema).unwrap(), + col("test_f32", &schema)?, Operator::Modulo, lit(ScalarValue::from(-2_f32)), &schema, ) - .unwrap(), + ?, ], ]; @@ -396,7 +394,7 @@ mod test { let session_ctx = SessionContext::new(); let task_ctx = session_ctx.task_ctx(); - let output = expand_exec.execute(0, task_ctx).unwrap(); + let output = expand_exec.execute(0, task_ctx)?; let batches = common::collect(output).await?; let expected = vec![ "+-------------+", @@ -437,24 +435,23 @@ mod test { "rust".to_string(), "!".to_string(), ], - )); + ))?; let schema = Schema::new(vec![Field::new("test_str", DataType::Utf8, false)]); let projections = vec![vec![ binary( - col("test_str", &schema).unwrap(), + col("test_str", &schema)?, Operator::StringConcat, - lit(Some("app").unwrap()), + lit(Some("app").expect("app")), &schema, - ) - .unwrap(), + )?, ]]; let expand_exec = ExpandExec::try_new(input.schema(), projections, input)?; let session_ctx = SessionContext::new(); let task_ctx = session_ctx.task_ctx(); - let output = expand_exec.execute(0, task_ctx).unwrap(); + let output = expand_exec.execute(0, task_ctx)?; let batches = common::collect(output).await?; let expected = vec![ "+----------+", @@ -475,27 +472,27 @@ mod test { async fn test_expand_exec_bool() -> Result<()> { MemManager::init(10000); - let input = build_table_boolean(("a", &vec![true, false, true, false])); + let input = build_table_boolean(("a", &vec![true, false, true, false]))?; let schema = Schema::new(vec![Field::new("test_bool", DataType::Boolean, false)]); let projections = vec![ vec![ binary( - col("test_bool", &schema).unwrap(), + col("test_bool", &schema)?, Operator::And, lit(ScalarValue::Boolean(Some(true))), &schema, ) - .unwrap(), + ?, ], vec![ binary( - col("test_bool", &schema).unwrap(), + col("test_bool", &schema)?, Operator::Or, lit(ScalarValue::Boolean(Some(true))), &schema, ) - .unwrap(), + ?, ], ]; @@ -503,7 +500,7 @@ mod test { let session_ctx = SessionContext::new(); let task_ctx = session_ctx.task_ctx(); - let output = expand_exec.execute(0, task_ctx).unwrap(); + let output = expand_exec.execute(0, task_ctx)?; let batches = common::collect(output).await?; let expected = vec![ "+-------+", diff --git a/native-engine/datafusion-ext-plans/src/generate_exec.rs b/native-engine/datafusion-ext-plans/src/generate_exec.rs index 34dcb4ffa..2faf09b8e 100644 --- a/native-engine/datafusion-ext-plans/src/generate_exec.rs +++ b/native-engine/datafusion-ext-plans/src/generate_exec.rs @@ -106,7 +106,7 @@ impl GenerateExec { self.generator_output_schema.clone(), outer, ) - .unwrap() + .expect("try_new") } } diff --git a/native-engine/datafusion-ext-plans/src/joins/join_hash_map.rs b/native-engine/datafusion-ext-plans/src/joins/join_hash_map.rs index 49be1d4e5..43383353a 100644 --- a/native-engine/datafusion-ext-plans/src/joins/join_hash_map.rs +++ b/native-engine/datafusion-ext-plans/src/joins/join_hash_map.rs @@ -146,8 +146,8 @@ impl Table { match len { 0 => unreachable!(), 1 => { - let single = mapped_indices.pop().unwrap(); - let _len = mapped_indices.pop().unwrap(); + let single = mapped_indices.pop().expect("pop"); + let _len = mapped_indices.pop().expect("pop"); MapValue::new_single(single) } _ => MapValue::new_range(start), diff --git a/native-engine/datafusion-ext-plans/src/joins/test.rs b/native-engine/datafusion-ext-plans/src/joins/test.rs index 9125ed53e..894256fb4 100644 --- a/native-engine/datafusion-ext-plans/src/joins/test.rs +++ b/native-engine/datafusion-ext-plans/src/joins/test.rs @@ -29,12 +29,11 @@ mod tests { use datafusion::{ assert_batches_sorted_eq, common::JoinSide, - error::Result, physical_expr::expressions::Column, physical_plan::{ExecutionPlan, common, joins::utils::*, test::TestMemoryExec}, prelude::SessionContext, }; - + use datafusion::common::DataFusionError; use crate::{ broadcast_join_build_hash_map_exec::BroadcastJoinBuildHashMapExec, broadcast_join_exec::BroadcastJoinExec, @@ -59,44 +58,45 @@ mod tests { a: (&str, &Vec), b: (&str, &Vec), c: (&str, &Vec), - ) -> RecordBatch { + ) -> Result { let schema = Schema::new(vec![ Field::new(a.0, DataType::Int32, false), Field::new(b.0, DataType::Int32, false), Field::new(c.0, DataType::Int32, false), ]); - RecordBatch::try_new( + let batch = RecordBatch::try_new( Arc::new(schema), vec![ Arc::new(Int32Array::from(a.1.clone())), Arc::new(Int32Array::from(b.1.clone())), Arc::new(Int32Array::from(c.1.clone())), ], - ) - .unwrap() + )?; + Ok(batch) } fn build_table( a: (&str, &Vec), b: (&str, &Vec), c: (&str, &Vec), - ) -> Arc { - let batch = build_table_i32(a, b, c); + ) -> Result> { + let batch = build_table_i32(a, b, c)?; let schema = batch.schema(); - Arc::new(TestMemoryExec::try_new(&[vec![batch]], schema, None).unwrap()) + Ok(Arc::new(TestMemoryExec::try_new(&[vec![batch]], schema, None)?)) } - fn build_table_from_batches(batches: Vec) -> Arc { - let schema = batches.first().unwrap().schema(); - Arc::new(TestMemoryExec::try_new(&[batches], schema, None).unwrap()) + fn build_table_from_batches(batches: Vec) -> Result> { + let first = batches.into_iter().next().ok_or_else(|| DataFusionError::Internal("empty batches".into()))?; + let schema = first.schema(); + Ok(Arc::new(TestMemoryExec::try_new(&[batches], schema, None)?)) } fn build_date_table( a: (&str, &Vec), b: (&str, &Vec), c: (&str, &Vec), - ) -> Arc { + ) -> Result> { let schema = Schema::new(vec![ Field::new(a.0, DataType::Date32, false), Field::new(b.0, DataType::Date32, false), @@ -110,18 +110,17 @@ mod tests { Arc::new(Date32Array::from(b.1.clone())), Arc::new(Date32Array::from(c.1.clone())), ], - ) - .unwrap(); + )?; let schema = batch.schema(); - Arc::new(TestMemoryExec::try_new(&[vec![batch]], schema, None).unwrap()) + Ok(Arc::new(TestMemoryExec::try_new(&[vec![batch]], schema, None)?)) } fn build_date64_table( a: (&str, &Vec), b: (&str, &Vec), c: (&str, &Vec), - ) -> Arc { + ) -> Result> { let schema = Schema::new(vec![ Field::new(a.0, DataType::Date64, false), Field::new(b.0, DataType::Date64, false), @@ -135,11 +134,10 @@ mod tests { Arc::new(Date64Array::from(b.1.clone())), Arc::new(Date64Array::from(c.1.clone())), ], - ) - .unwrap(); + )?; let schema = batch.schema(); - Arc::new(TestMemoryExec::try_new(&[vec![batch]], schema, None).unwrap()) + Ok(Arc::new(TestMemoryExec::try_new(&[vec![batch]], schema, None)?)) } /// returns a table with 3 columns of i32 in memory @@ -147,7 +145,7 @@ mod tests { a: (&str, &Vec>), b: (&str, &Vec>), c: (&str, &Vec>), - ) -> Arc { + ) -> Result> { let schema = Arc::new(Schema::new(vec![ Field::new(a.0, DataType::Int32, true), Field::new(b.0, DataType::Int32, true), @@ -160,9 +158,8 @@ mod tests { Arc::new(Int32Array::from(b.1.clone())), Arc::new(Int32Array::from(c.1.clone())), ], - ) - .unwrap(); - Arc::new(TestMemoryExec::try_new(&[vec![batch]], schema, None).unwrap()) + )?; + Ok(Arc::new(TestMemoryExec::try_new(&[vec![batch]], schema, None)?)); } fn build_join_schema_for_test( diff --git a/native-engine/datafusion-ext-plans/src/limit_exec.rs b/native-engine/datafusion-ext-plans/src/limit_exec.rs index 1b9f5892e..977e0db9f 100644 --- a/native-engine/datafusion-ext-plans/src/limit_exec.rs +++ b/native-engine/datafusion-ext-plans/src/limit_exec.rs @@ -167,32 +167,32 @@ mod test { a: (&str, &Vec), b: (&str, &Vec), c: (&str, &Vec), - ) -> RecordBatch { + ) -> Result { let schema = Schema::new(vec![ Field::new(a.0, DataType::Int32, false), Field::new(b.0, DataType::Int32, false), Field::new(c.0, DataType::Int32, false), ]); - RecordBatch::try_new( + let batch = RecordBatch::try_new( Arc::new(schema), vec![ Arc::new(Int32Array::from(a.1.clone())), Arc::new(Int32Array::from(b.1.clone())), Arc::new(Int32Array::from(c.1.clone())), ], - ) - .unwrap() + )?; + Ok(batch) } fn build_table( a: (&str, &Vec), b: (&str, &Vec), c: (&str, &Vec), - ) -> Arc { - let batch = build_table_i32(a, b, c); + ) -> Result> { + let batch = build_table_i32(a, b, c)?; let schema = batch.schema(); - Arc::new(TestMemoryExec::try_new(&[vec![batch]], schema, None).unwrap()) + Ok(Arc::new(TestMemoryExec::try_new(&[vec![batch]], schema, None)?)) } #[tokio::test] @@ -206,7 +206,7 @@ mod test { let limit_exec = LimitExec::new(input, 2_u64); let session_ctx = SessionContext::new(); let task_ctx = session_ctx.task_ctx(); - let output = limit_exec.execute(0, task_ctx).unwrap(); + let output = limit_exec.execute(0, task_ctx)?; let batches = common::collect(output).await?; let row_count = limit_exec.statistics()?.num_rows; diff --git a/native-engine/datafusion-ext-plans/src/parquet_exec.rs b/native-engine/datafusion-ext-plans/src/parquet_exec.rs index c80b86b21..f8c8a2b0c 100644 --- a/native-engine/datafusion-ext-plans/src/parquet_exec.rs +++ b/native-engine/datafusion-ext-plans/src/parquet_exec.rs @@ -332,7 +332,7 @@ impl AsyncFileReader for ParquetFileReaderRef { continue; } - let last_merged_range = merged_ranges.last_mut().unwrap(); + let last_merged_range = merged_ranges.last_mut().expect("last_mut"); if range.start <= last_merged_range.end + max_over_read_size as u64 { last_merged_range.end = range.end.max(last_merged_range.end); } else { diff --git a/native-engine/datafusion-ext-plans/src/parquet_sink_exec.rs b/native-engine/datafusion-ext-plans/src/parquet_sink_exec.rs index a6102516b..c7d2cd81d 100644 --- a/native-engine/datafusion-ext-plans/src/parquet_sink_exec.rs +++ b/native-engine/datafusion-ext-plans/src/parquet_sink_exec.rs @@ -291,7 +291,7 @@ fn execute_parquet_sink( tokio::task::spawn_blocking(move || { let mut part_writer = part_writer.lock(); - let w = part_writer.as_mut().unwrap(); + let w = part_writer.as_mut().expect("part_writer"); w.write(&sub_batch) }) .await diff --git a/native-engine/datafusion-ext-plans/src/shuffle/buffered_data.rs b/native-engine/datafusion-ext-plans/src/shuffle/buffered_data.rs index 6b88ba8fd..58a60df25 100644 --- a/native-engine/datafusion-ext-plans/src/shuffle/buffered_data.rs +++ b/native-engine/datafusion-ext-plans/src/shuffle/buffered_data.rs @@ -311,7 +311,7 @@ fn sort_batches_by_partition_id( part_ids } Partitioning::RangePartitioning(sort_expr, _, bounds) => { - evaluate_range_partition_ids(&batch, sort_expr, bounds).unwrap() + evaluate_range_partition_ids(&batch, sort_expr, bounds).expect("eval_part_ids") } _ => unreachable!("unsupported partitioning: {:?}", partitioning), }; @@ -449,7 +449,7 @@ mod test { .collect::>>()?, )?)); - let rows: Rows = sort_row_converter.lock().convert_columns(&bounds).unwrap(); + let rows: Rows = sort_row_converter.lock().convert_columns(&bounds)?; let partition_num = rows.num_rows() + 1; let range_repartitioning = @@ -511,7 +511,7 @@ mod test { .collect::>>()?, )?)); - let rows: Rows = sort_row_converter.lock().convert_columns(&bounds).unwrap(); + let rows: Rows = sort_row_converter.lock().convert_columns(&bounds)?; let partition_num = rows.num_rows() + 1; let range_repartitioning = diff --git a/native-engine/datafusion-ext-plans/src/shuffle/single_repartitioner.rs b/native-engine/datafusion-ext-plans/src/shuffle/single_repartitioner.rs index d5f4f31dc..787c23394 100644 --- a/native-engine/datafusion-ext-plans/src/shuffle/single_repartitioner.rs +++ b/native-engine/datafusion-ext-plans/src/shuffle/single_repartitioner.rs @@ -57,7 +57,7 @@ impl SingleShuffleRepartitioner { .wrap_writer(open_shuffle_file(&self.output_data_file)?), )); } - Ok(output_data.as_mut().unwrap()) + Ok(output_data.as_mut().expect("output_data")) } } diff --git a/native-engine/datafusion-ext-plans/src/sort_exec.rs b/native-engine/datafusion-ext-plans/src/sort_exec.rs index 24d0beb6b..1dd166788 100644 --- a/native-engine/datafusion-ext-plans/src/sort_exec.rs +++ b/native-engine/datafusion-ext-plans/src/sort_exec.rs @@ -460,7 +460,7 @@ impl SortedBlock for InMemSortedBlock { if let Some(batch) = batch { self.mem_used -= batch.get_batch_mem_size(); self.mem_used -= self.sorted_keys[0].mem_size(); - self.sorted_keys.pop_front().unwrap(); + self.sorted_keys.pop_front().expect("pop_front"); self.cur_row_idx = usize::MAX; Ok(Some(batch)) } else { @@ -1049,7 +1049,7 @@ fn create_zero_column_batch(num_rows: usize) -> RecordBatch { vec![], &RecordBatchOptions::new().with_row_count(Some(num_rows)), ) - .unwrap() + .expect("record_batch") } struct PruneSortKeysFromBatch { @@ -1293,7 +1293,7 @@ impl KeyCollector for SqueezeKeyCollector { fn add_key(&mut self, key: &[u8]) { self.sorted_key_writer .write_key(key, &mut self.store) - .unwrap(); + .expect("write_key"); } fn freeze(&mut self) { @@ -1425,32 +1425,32 @@ mod test { a: (&str, &Vec), b: (&str, &Vec), c: (&str, &Vec), - ) -> RecordBatch { + ) -> Result { let schema = Schema::new(vec![ Field::new(a.0, DataType::Int32, false), Field::new(b.0, DataType::Int32, false), Field::new(c.0, DataType::Int32, false), ]); - RecordBatch::try_new( + let batch = RecordBatch::try_new( Arc::new(schema), vec![ Arc::new(Int32Array::from(a.1.clone())), Arc::new(Int32Array::from(b.1.clone())), Arc::new(Int32Array::from(c.1.clone())), ], - ) - .unwrap() + )?; + Ok(batch) } fn build_table( a: (&str, &Vec), b: (&str, &Vec), c: (&str, &Vec), - ) -> Arc { - let batch = build_table_i32(a, b, c); + ) -> Result> { + let batch = build_table_i32(a, b, c)?; let schema = batch.schema(); - Arc::new(TestMemoryExec::try_new(&[vec![batch]], schema, None).unwrap()) + Ok(Arc::new(TestMemoryExec::try_new(&[vec![batch]], schema, None)?)) } #[tokio::test] @@ -1462,7 +1462,7 @@ mod test { ("a", &vec![9, 8, 7, 6, 5, 4, 3, 2, 1, 0]), ("b", &vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), ("c", &vec![5, 6, 7, 8, 9, 0, 1, 2, 3, 4]), - ); + )?; let sort_exprs = vec![PhysicalSortExpr { expr: Arc::new(Column::new("a", 0)), options: SortOptions::default(), diff --git a/native-engine/datafusion-ext-plans/src/window/window_context.rs b/native-engine/datafusion-ext-plans/src/window/window_context.rs index 182b462f8..411ee92de 100644 --- a/native-engine/datafusion-ext-plans/src/window/window_context.rs +++ b/native-engine/datafusion-ext-plans/src/window/window_context.rs @@ -137,7 +137,7 @@ impl WindowContext { Ok(self .partition_row_converter .lock() - .unwrap() + .expect("lock") .convert_columns( &self .partition_spec @@ -151,7 +151,7 @@ impl WindowContext { } pub fn get_order_rows(&self, batch: &RecordBatch) -> Result { - Ok(self.order_row_converter.lock().unwrap().convert_columns( + Ok(self.order_row_converter.lock().expect("lock").convert_columns( &self .order_spec .iter() From 7cc45369ed6a11445dc76167cb679f565286927e Mon Sep 17 00:00:00 2001 From: yew1eb Date: Tue, 25 Nov 2025 20:59:56 +0800 Subject: [PATCH 02/10] up --- .../auron-jni-bridge/src/jni_bridge.rs | 12 ++-- native-engine/auron-jni-bridge/src/lib.rs | 2 +- native-engine/auron/src/http/mod.rs | 2 +- native-engine/auron/src/http/pprof.rs | 2 +- .../datafusion-ext-commons/src/arrow/cast.rs | 38 +++++------ .../src/io/batch_serde.rs | 60 ++++++++--------- .../src/io/ipc_compression.rs | 4 +- .../src/io/scalar_serde.rs | 8 +-- .../datafusion-ext-commons/src/spark_hash.rs | 29 ++++---- .../datafusion-ext-exprs/src/cast.rs | 20 +++--- .../datafusion-ext-exprs/src/get_map_value.rs | 8 +-- .../src/string_contains.rs | 6 +- .../src/string_ends_with.rs | 6 +- .../src/string_starts_with.rs | 6 +- .../src/spark_crypto.rs | 2 +- .../src/spark_dates.rs | 25 ++++--- .../src/spark_get_json_object.rs | 66 +++++++++---------- .../src/spark_null_if.rs | 2 +- .../src/agg/spark_udaf_wrapper.rs | 4 +- .../src/common/row_null_checker.rs | 24 +++---- .../src/shuffle/buffered_data.rs | 8 +-- .../datafusion-ext-plans/src/sort_exec.rs | 2 +- .../datafusion-ext-plans/src/window_exec.rs | 4 +- 23 files changed, 175 insertions(+), 165 deletions(-) diff --git a/native-engine/auron-jni-bridge/src/jni_bridge.rs b/native-engine/auron-jni-bridge/src/jni_bridge.rs index 90614cc23..dac7025c8 100644 --- a/native-engine/auron-jni-bridge/src/jni_bridge.rs +++ b/native-engine/auron-jni-bridge/src/jni_bridge.rs @@ -87,9 +87,9 @@ macro_rules! jni_map_error_with_env { match $result { Ok(result) => $crate::jni_bridge::datafusion::error::Result::Ok(result), Err($crate::jni_bridge::jni::errors::Error::JavaException) => { - let ex = $env.exception_occurred().unwrap(); - $env.exception_describe().unwrap(); - $env.exception_clear().unwrap(); + let ex = $env.exception_occurred().expect("exception"); + $env.exception_describe().expect("exception"); + $env.exception_clear().expect("exception"); let message_obj = $env .call_method_unchecked( ex, @@ -102,13 +102,13 @@ macro_rules! jni_map_error_with_env { .clone(), &[], ) - .unwrap() + .expect("exception") .l() - .unwrap(); + .expect("exception"); let message = $env .get_string(message_obj.into()) .map(|s| String::from(s)) - .unwrap(); + .expect("exception"); Err( $crate::jni_bridge::datafusion::error::DataFusionError::External( diff --git a/native-engine/auron-jni-bridge/src/lib.rs b/native-engine/auron-jni-bridge/src/lib.rs index bee861f65..6bb3b9866 100644 --- a/native-engine/auron-jni-bridge/src/lib.rs +++ b/native-engine/auron-jni-bridge/src/lib.rs @@ -34,7 +34,7 @@ pub fn ensure_jni_bridge_inited() -> Result<()> { pub fn is_task_running() -> bool { fn is_task_running_impl() -> Result { - if !jni_call_static!(JniBridge.isTaskRunning() -> bool).unwrap() { + if !jni_call_static!(JniBridge.isTaskRunning() -> bool)? { jni_exception_clear!()?; return Ok(false); } diff --git a/native-engine/auron/src/http/mod.rs b/native-engine/auron/src/http/mod.rs index e7f301ffc..b13d2b62a 100644 --- a/native-engine/auron/src/http/mod.rs +++ b/native-engine/auron/src/http/mod.rs @@ -50,7 +50,7 @@ impl DefaultHTTPServer { .worker_threads(1) .enable_io() .build() - .unwrap(), + ?, handlers: Mutex::new(vec![]), } } diff --git a/native-engine/auron/src/http/pprof.rs b/native-engine/auron/src/http/pprof.rs index 81cd9c11d..7635026a2 100644 --- a/native-engine/auron/src/http/pprof.rs +++ b/native-engine/auron/src/http/pprof.rs @@ -34,7 +34,7 @@ impl Default for PProfRequest { fn default() -> Self { PProfRequest { seconds: 5, - frequency: NonZeroI32::new(100).unwrap(), + frequency: NonZeroI32::new(100)?, } } } diff --git a/native-engine/datafusion-ext-commons/src/arrow/cast.rs b/native-engine/datafusion-ext-commons/src/arrow/cast.rs index 2d8ff7572..5da1cd5aa 100644 --- a/native-engine/datafusion-ext-commons/src/arrow/cast.rs +++ b/native-engine/datafusion-ext-commons/src/arrow/cast.rs @@ -253,7 +253,7 @@ fn try_cast_string_array_to_integer(array: &dyn Array, cast_type: &DataType) -> macro_rules! cast { ($target_type:ident) => {{ type B = paste::paste! {[<$target_type Builder>]}; - let array = array.as_any().downcast_ref::().unwrap(); + let array = array.as_any().downcast_ref::().expect("Excepted a StringArray"); let mut builder = B::new(); for v in array.iter() { @@ -434,7 +434,7 @@ mod test { fn test_boolean_to_string() { let bool_array: ArrayRef = Arc::new(BooleanArray::from_iter(vec![None, Some(true), Some(false)])); - let casted = cast(&bool_array, &DataType::Utf8).unwrap(); + let casted = cast(&bool_array, &DataType::Utf8)?; assert_eq!( as_string_array(&casted), &StringArray::from_iter(vec![None, Some("true"), Some("false")]) @@ -453,9 +453,9 @@ mod test { Some(f64::NEG_INFINITY), Some(f64::NAN), ])); - let casted = cast(&f64_array, &DataType::Int32).unwrap(); + let casted = cast(&f64_array, &DataType::Int32)?; assert_eq!( - as_int32_array(&casted).unwrap(), + as_int32_array(&casted)?, &Int32Array::from_iter(vec![ None, Some(123), @@ -478,9 +478,9 @@ mod test { Some(i32::MAX), Some(i32::MIN), ])); - let casted = cast(&i32_array, &DataType::Float64).unwrap(); + let casted = cast(&i32_array, &DataType::Float64)?; assert_eq!( - as_float64_array(&casted).unwrap(), + as_float64_array(&casted)?, &Float64Array::from_iter(vec![ None, Some(123.0), @@ -500,9 +500,9 @@ mod test { Some(i32::MAX), Some(i32::MIN), ])); - let casted = cast(&i32_array, &DataType::Decimal128(38, 18)).unwrap(); + let casted = cast(&i32_array, &DataType::Decimal128(38, 18))?; assert_eq!( - as_decimal128_array(&casted).unwrap(), + as_decimal128_array(&casted)?, &Decimal128Array::from_iter(vec![ None, Some(123000000000000000000), @@ -511,7 +511,7 @@ mod test { Some(i32::MIN as i128 * 1000000000000000000), ]) .with_precision_and_scale(38, 18) - .unwrap() + ? ); } @@ -528,9 +528,9 @@ mod test { Some("123456789012345.678901234567890"), Some("-123456789012345.678901234567890"), ])); - let casted = cast(&string_array, &DataType::Decimal128(38, 18)).unwrap(); + let casted = cast(&string_array, &DataType::Decimal128(38, 18))?; assert_eq!( - as_decimal128_array(&casted).unwrap(), + as_decimal128_array(&casted)?, &Decimal128Array::from_iter(vec![ None, Some(10000000000), @@ -543,7 +543,7 @@ mod test { Some(-123456789012345678901234567890000i128), ]) .with_precision_and_scale(38, 18) - .unwrap() + ? ); } @@ -559,11 +559,11 @@ mod test { Some(i32::MIN as i128 * 1000000000000000000), ]) .with_precision_and_scale(38, 18) - .unwrap(), + ?, ); - let casted = cast(&decimal_array, &DataType::Utf8).unwrap(); + let casted = cast(&decimal_array, &DataType::Utf8)?; assert_eq!( - casted.as_any().downcast_ref::().unwrap(), + casted.as_any().downcast_ref::()?, &StringArray::from_iter(vec![ None, Some("123.000000000000000000"), @@ -586,9 +586,9 @@ mod test { Some("-123456789012345"), Some("999999999999999999999999999999999"), ])); - let casted = cast(&string_array, &DataType::Int64).unwrap(); + let casted = cast(&string_array, &DataType::Int64)?; assert_eq!( - casted.as_any().downcast_ref::().unwrap(), + casted.as_any().downcast_ref::()?, &Int64Array::from_iter(vec![ None, Some(123), @@ -615,10 +615,10 @@ mod test { Some("9999-99"), Some("99999-01"), ])); - let casted = cast(&string_array, &DataType::Date32).unwrap(); + let casted = cast(&string_array, &DataType::Date32)?; assert_eq!( arrow::compute::cast(&casted, &DataType::Utf8) - .unwrap() + ? .as_string(), &StringArray::from_iter(vec![ None, diff --git a/native-engine/datafusion-ext-commons/src/io/batch_serde.rs b/native-engine/datafusion-ext-commons/src/io/batch_serde.rs index c4489338a..ab82c54b4 100644 --- a/native-engine/datafusion-ext-commons/src/io/batch_serde.rs +++ b/native-engine/datafusion-ext-commons/src/io/batch_serde.rs @@ -684,28 +684,28 @@ mod test { ("u64", array2, true), ("bool", array3, true), ]) - .unwrap(); + ?; // test read after write let mut buf = vec![]; - write_batch(batch.num_rows(), batch.columns(), &mut buf).unwrap(); + write_batch(batch.num_rows(), batch.columns(), &mut buf)?; let mut cursor = Cursor::new(buf); let (decoded_num_rows, decoded_cols) = - read_batch(&mut cursor, &batch.schema()).unwrap().unwrap(); + read_batch(&mut cursor, &batch.schema())??; assert_eq!( - recover_named_batch(decoded_num_rows, &decoded_cols, batch.schema()).unwrap(), + recover_named_batch(decoded_num_rows, &decoded_cols, batch.schema())?, batch ); // test read after write sliced let sliced = batch.slice(1, 2); let mut buf = vec![]; - write_batch(sliced.num_rows(), sliced.columns(), &mut buf).unwrap(); + write_batch(sliced.num_rows(), sliced.columns(), &mut buf)?; let mut cursor = Cursor::new(buf); let (decoded_num_rows, decoded_cols) = - read_batch(&mut cursor, &batch.schema()).unwrap().unwrap(); + read_batch(&mut cursor, &batch.schema())??; assert_eq!( - recover_named_batch(decoded_num_rows, &decoded_cols, batch.schema()).unwrap(), + recover_named_batch(decoded_num_rows, &decoded_cols, batch.schema())?, sliced ); } @@ -724,7 +724,7 @@ mod test { ("list1", list_array.clone(), true), ("list2", list_array.clone(), true), ]) - .unwrap(); + ?; assert_batches_eq!( vec![ @@ -742,10 +742,10 @@ mod test { // test read after write let mut buf = vec![]; - write_batch(batch.num_rows(), batch.columns(), &mut buf).unwrap(); + write_batch(batch.num_rows(), batch.columns(), &mut buf)?; let mut cursor = Cursor::new(buf); let (decoded_num_rows, decoded_cols) = - read_batch(&mut cursor, &batch.schema()).unwrap().unwrap(); + read_batch(&mut cursor, &batch.schema())??; assert_batches_eq!( vec![ "+-----------+-----------+", @@ -757,16 +757,16 @@ mod test { "| [6, 7] | [6, 7] |", "+-----------+-----------+", ], - &[recover_named_batch(decoded_num_rows, &decoded_cols, batch.schema()).unwrap()] + &[recover_named_batch(decoded_num_rows, &decoded_cols, batch.schema())?] ); // test read after write sliced let sliced = batch.slice(1, 2); let mut buf = vec![]; - write_batch(sliced.num_rows(), sliced.columns(), &mut buf).unwrap(); + write_batch(sliced.num_rows(), sliced.columns(), &mut buf)?; let mut cursor = Cursor::new(buf); let (decoded_num_rows, decoded_cols) = - read_batch(&mut cursor, &batch.schema()).unwrap().unwrap(); + read_batch(&mut cursor, &batch.schema())??; assert_batches_eq!( vec![ "+----------+----------+", @@ -776,7 +776,7 @@ mod test { "| [3, , 5] | [3, , 5] |", "+----------+----------+", ], - &[recover_named_batch(decoded_num_rows, &decoded_cols, sliced.schema()).unwrap()] + &[recover_named_batch(decoded_num_rows, &decoded_cols, sliced.schema())?] ); } @@ -797,35 +797,35 @@ mod test { ]), &[0, 3, 6, 8], // [00,11,22], [33,44,55], [66,77] ) - .unwrap(), + ?, ); let batch = RecordBatch::try_from_iter_with_nullable(vec![ ("map1", map_array.clone(), true), ("map2", map_array.clone(), true), ]) - .unwrap(); + ?; // test read after write let mut buf = vec![]; - write_batch(batch.num_rows(), batch.columns(), &mut buf).unwrap(); + write_batch(batch.num_rows(), batch.columns(), &mut buf)?; let mut cursor = Cursor::new(buf); let (decoded_num_rows, decoded_cols) = - read_batch(&mut cursor, &batch.schema()).unwrap().unwrap(); + read_batch(&mut cursor, &batch.schema())??; assert_eq!( - recover_named_batch(decoded_num_rows, &decoded_cols, batch.schema()).unwrap(), + recover_named_batch(decoded_num_rows, &decoded_cols, batch.schema())?, batch ); // test read after write sliced let sliced = batch.slice(1, 2); let mut buf = vec![]; - write_batch(sliced.num_rows(), sliced.columns(), &mut buf).unwrap(); + write_batch(sliced.num_rows(), sliced.columns(), &mut buf)?; let mut cursor = Cursor::new(buf); let (decoded_num_rows, decoded_cols) = - read_batch(&mut cursor, &batch.schema()).unwrap().unwrap(); + read_batch(&mut cursor, &batch.schema())??; assert_eq!( - recover_named_batch(decoded_num_rows, &decoded_cols, sliced.schema()).unwrap(), + recover_named_batch(decoded_num_rows, &decoded_cols, sliced.schema())?, sliced ); } @@ -837,35 +837,35 @@ mod test { let c3: ArrayRef = Arc::new(BooleanArray::from(vec![None, None, None, Some(true)])); let c4: ArrayRef = Arc::new(Int32Array::from(vec![None, None, None, Some(31)])); let struct_array: ArrayRef = Arc::new( - StructArray::try_from(vec![("c1", c1), ("c2", c2), ("c3", c3), ("c4", c4)]).unwrap(), + StructArray::try_from(vec![("c1", c1), ("c2", c2), ("c3", c3), ("c4", c4)])?, ); let batch = RecordBatch::try_from_iter_with_nullable(vec![ ("struct1", struct_array.clone(), true), ("struct2", struct_array.clone(), true), ]) - .unwrap(); + ?; // test read after write let mut buf = vec![]; - write_batch(batch.num_rows(), batch.columns(), &mut buf).unwrap(); + write_batch(batch.num_rows(), batch.columns(), &mut buf)?; let mut cursor = Cursor::new(buf); let (decoded_num_rows, decoded_cols) = - read_batch(&mut cursor, &batch.schema()).unwrap().unwrap(); + read_batch(&mut cursor, &batch.schema())??; assert_eq!( - recover_named_batch(decoded_num_rows, &decoded_cols, batch.schema()).unwrap(), + recover_named_batch(decoded_num_rows, &decoded_cols, batch.schema())?, batch ); // test read after write sliced let sliced = batch.slice(1, 2); let mut buf = vec![]; - write_batch(sliced.num_rows(), sliced.columns(), &mut buf).unwrap(); + write_batch(sliced.num_rows(), sliced.columns(), &mut buf)?; let mut cursor = Cursor::new(buf); let (decoded_num_rows, decoded_cols) = - read_batch(&mut cursor, &batch.schema()).unwrap().unwrap(); + read_batch(&mut cursor, &batch.schema())??; assert_eq!( - recover_named_batch(decoded_num_rows, &decoded_cols, batch.schema()).unwrap(), + recover_named_batch(decoded_num_rows, &decoded_cols, batch.schema())?, sliced ); } diff --git a/native-engine/datafusion-ext-commons/src/io/ipc_compression.rs b/native-engine/datafusion-ext-commons/src/io/ipc_compression.rs index d3ea0b293..c74fc931a 100644 --- a/native-engine/datafusion-ext-commons/src/io/ipc_compression.rs +++ b/native-engine/datafusion-ext-commons/src/io/ipc_compression.rs @@ -341,10 +341,10 @@ mod tests { writer.finish_current_buf()?; let mut reader = IpcCompressionReader::new(Cursor::new(buf)); - let (num_rows1, arrays1) = reader.read_batch(&schema)?.unwrap(); + let (num_rows1, arrays1) = reader.read_batch(&schema)??; assert_eq!(num_rows1, 2); assert_eq!(arrays1, &[test_array1]); - let (num_rows2, arrays2) = reader.read_batch(&schema)?.unwrap(); + let (num_rows2, arrays2) = reader.read_batch(&schema)??; assert_eq!(num_rows2, 2); assert_eq!(arrays2, &[test_array2]); assert!(reader.read_batch(&schema)?.is_none()); diff --git a/native-engine/datafusion-ext-commons/src/io/scalar_serde.rs b/native-engine/datafusion-ext-commons/src/io/scalar_serde.rs index bbbd2a1e1..4902ee19d 100644 --- a/native-engine/datafusion-ext-commons/src/io/scalar_serde.rs +++ b/native-engine/datafusion-ext-commons/src/io/scalar_serde.rs @@ -36,13 +36,13 @@ pub fn write_scalar(value: &ScalarValue, nullable: bool, output: &mut ($v:expr) => {{ if nullable { if let Some(v) = $v { - write_u8(1, output)?; - output.write_all(&v.to_ne_bytes())?; + write_u8(1, output).unwrap(); + output.write_all(&v.to_ne_bytes()).unwrap(); } else { - write_u8(0, output)?; + write_u8(0, output).unwrap(); } } else { - output.write_all(&$v.unwrap().to_ne_bytes())?; + output.write_all(&$v.unwrap().to_ne_bytes()).unwrap(); } }}; } diff --git a/native-engine/datafusion-ext-commons/src/spark_hash.rs b/native-engine/datafusion-ext-commons/src/spark_hash.rs index a68d84ed7..cd84c148b 100644 --- a/native-engine/datafusion-ext-commons/src/spark_hash.rs +++ b/native-engine/datafusion-ext-commons/src/spark_hash.rs @@ -102,7 +102,7 @@ fn hash_array( macro_rules! hash_array_decimal { ($array_type:ident, $column:ident, $hashes:ident, $h:expr) => { - let array = $column.as_any().downcast_ref::<$array_type>().unwrap(); + let array = $column.as_any().downcast_ref::<$array_type>().expect("downcast"); if array.null_count() == 0 { for (i, hash) in $hashes.iter_mut().enumerate() { @@ -240,7 +240,7 @@ fn hash_one( ) { macro_rules! hash_one_primitive { ($array_type:ident, $column:ident, $ty:ident, $hash:ident, $idx:ident, $h:expr) => { - let array = $column.as_any().downcast_ref::<$array_type>().unwrap(); + let array = $column.as_any().downcast_ref::<$array_type>().expect("downcast"); *$hash = $h( (array.value($idx as usize) as $ty).to_le_bytes().as_ref(), *$hash, @@ -250,14 +250,14 @@ fn hash_one( macro_rules! hash_one_binary { ($array_type:ident, $column:ident, $hash:ident, $idx:ident, $h:expr) => { - let array = $column.as_any().downcast_ref::<$array_type>().unwrap(); + let array = $column.as_any().downcast_ref::<$array_type>().expect("downcast"); *$hash = $h(&array.value($idx as usize).as_ref(), *$hash); }; } macro_rules! hash_one_decimal { ($array_type:ident, $column:ident, $hash:ident, $idx:ident, $h:expr) => { - let array = $column.as_any().downcast_ref::<$array_type>().unwrap(); + let array = $column.as_any().downcast_ref::<$array_type>().expect("downcast"); *$hash = $h(array.value($idx as usize).to_le_bytes().as_ref(), *$hash); }; } @@ -488,7 +488,7 @@ mod tests { } #[test] - fn test_list_array() { + fn test_list_array() -> Result<()> { // Create inner array data: [1, 2, 3, 4, 5, 6] let value_data = ArrayData::builder(DataType::Int32) .len(6) @@ -496,7 +496,7 @@ mod tests { &[1i32, 2, 3, 4, 5, 6].to_byte_slice(), )) .build() - .unwrap(); + ?; // Create offset array to define list boundaries: [[1, 2], [3, 4, 5], [6]] let list_data_type = DataType::new_list(DataType::Int32, false); @@ -505,7 +505,7 @@ mod tests { .add_buffer(Buffer::from_slice_ref(&[0i32, 2, 5, 6].to_byte_slice())) .add_child_data(value_data) .build() - .unwrap(); + ?; let list_array = ListArray::from(list_data); let array_ref = Arc::new(list_array) as ArrayRef; @@ -513,6 +513,7 @@ mod tests { // Test Murmur3 hash let hashes = create_murmur3_hashes(3, &[array_ref.clone()], 42); assert_eq!(hashes, vec![-222940379, -374492525, -331964951]); + Ok(()) } #[test] @@ -524,7 +525,7 @@ mod tests { &[0, 1, 2, 3, 4, 5, 6, 7].to_byte_slice(), )) .build() - .unwrap(); + ?; let value_data = ArrayData::builder(DataType::UInt32) .len(8) .add_buffer(Buffer::from_slice_ref( @@ -532,7 +533,7 @@ mod tests { )) .null_bit_buffer(Some(Buffer::from(&[0b11010110]))) .build() - .unwrap(); + ?; // Construct a buffer for value offsets, for the nested array: // [[0, 1, 2], [3, 4, 5], [6, 7]] @@ -559,7 +560,7 @@ mod tests { .add_buffer(entry_offsets) .add_child_data(entry_struct.into_data()) .build() - .unwrap(); + ?; let map_array = MapArray::from(map_data); assert_eq!(&value_data, &map_array.values().to_data()); @@ -585,7 +586,7 @@ mod tests { unsafe { map_array.value_unchecked(0) } .as_any() .downcast_ref::() - .unwrap() + ? ); for i in 0..3 { assert!(map_array.is_valid(i)); @@ -599,7 +600,7 @@ mod tests { .add_buffer(map_array.to_data().buffers()[0].clone()) .add_child_data(map_array.to_data().child_data()[0].clone()) .build() - .unwrap(); + ?; let map_array = MapArray::from(map_data); assert_eq!(&value_data, &map_array.values().to_data()); @@ -619,14 +620,14 @@ mod tests { .value(0) .as_any() .downcast_ref::() - .unwrap() + ? ); assert_eq!( &struct_array, unsafe { map_array.value_unchecked(0) } .as_any() .downcast_ref::() - .unwrap() + ? ); } } diff --git a/native-engine/datafusion-ext-exprs/src/cast.rs b/native-engine/datafusion-ext-exprs/src/cast.rs index 12f6c5402..0ec1be8a3 100644 --- a/native-engine/datafusion-ext-exprs/src/cast.rs +++ b/native-engine/datafusion-ext-exprs/src/cast.rs @@ -102,6 +102,7 @@ impl PhysicalExpr for TryCastExpr { #[cfg(test)] mod test { + use std::error::Error; use std::sync::Arc; use arrow::{ @@ -114,7 +115,7 @@ mod test { use crate::cast::TryCastExpr; #[test] - fn test_ok_1() { + fn test_ok_1() -> Result<(), Box> { // input: Array // cast Float32 into Int32 let float_arr: ArrayRef = Arc::new(Float32Array::from(vec![ @@ -138,7 +139,7 @@ mod test { let cast_type = DataType::Int32; let expr = Arc::new(TryCastExpr::new( - phys_expr::col("col", &batch.schema()).unwrap(), + phys_expr::col("col", &batch.schema())?, cast_type, )); @@ -146,7 +147,7 @@ mod test { .evaluate(&batch) .expect("Error evaluating expr") .into_array(batch.num_rows()) - .unwrap(); + ?; let expected: ArrayRef = Arc::new(Int32Array::from(vec![ Some(7), @@ -157,10 +158,11 @@ mod test { None, ])); assert_eq!(&ret, &expected); + Ok(()) } #[test] - fn test_ok_2() { + fn test_ok_2() -> Result<(), Box> { // input: Array // cast Utf8 into Float32 let string_arr: ArrayRef = Arc::new(StringArray::from(vec![ @@ -179,7 +181,7 @@ mod test { let cast_type = DataType::Float32; let expr = Arc::new(TryCastExpr::new( - phys_expr::col("col", &batch.schema()).unwrap(), + phys_expr::col("col", &batch.schema())?, cast_type, )); @@ -187,7 +189,7 @@ mod test { .evaluate(&batch) .expect("Error evaluating expr") .into_array(batch.num_rows()) - .unwrap(); + ?; let expected: ArrayRef = Arc::new(Float32Array::from(vec![ Some(123.0), @@ -197,10 +199,11 @@ mod test { None, ])); assert_eq!(&ret, &expected); + Ok(()) } #[test] - fn test_ok_3() { + fn test_ok_3() -> Result<(), Box> { // input: Scalar // cast Utf8 into Float32 let string_arr: ArrayRef = Arc::new(StringArray::from(vec![ @@ -224,7 +227,7 @@ mod test { .evaluate(&batch) .expect("Error evaluating expr") .into_array(batch.num_rows()) - .unwrap(); + ?; let expected: ArrayRef = Arc::new(Float32Array::from(vec![ Some(123.4), @@ -234,5 +237,6 @@ mod test { Some(123.4), ])); assert_eq!(&ret, &expected); + Ok(()) } } diff --git a/native-engine/datafusion-ext-exprs/src/get_map_value.rs b/native-engine/datafusion-ext-exprs/src/get_map_value.rs index 892fbdba3..564835981 100644 --- a/native-engine/datafusion-ext-exprs/src/get_map_value.rs +++ b/native-engine/datafusion-ext-exprs/src/get_map_value.rs @@ -181,7 +181,7 @@ mod test { &[0, 1, 2, 3, 4, 5, 6, 7].to_byte_slice(), )) .build() - .unwrap(); + ?; let value_data = ArrayData::builder(DataType::UInt32) .len(8) .add_buffer(Buffer::from_slice_ref( @@ -189,7 +189,7 @@ mod test { )) .null_bit_buffer(Some(Buffer::from_slice_ref(&[0b11010110]))) .build() - .unwrap(); + ?; let entry_offsets = Buffer::from_slice_ref(&[0, 3, 6, 8].to_byte_slice()); @@ -215,7 +215,7 @@ mod test { .add_buffer(entry_offsets) .add_child_data(entry_struct.into_data()) .build() - .unwrap(); + ?; let map_array: ArrayRef = Arc::new(MapArray::from(map_data)); let input_batch = RecordBatch::try_from_iter_with_nullable(vec![("test col", map_array, true)])?; @@ -264,7 +264,7 @@ mod test { let map_array: ArrayRef = Arc::new( MapArray::new_from_strings(keys.clone().into_iter(), &values_data, &entry_offsets) - .unwrap(), + ?, ); let input_batch = RecordBatch::try_from_iter_with_nullable(vec![("test col", map_array, true)])?; diff --git a/native-engine/datafusion-ext-exprs/src/string_contains.rs b/native-engine/datafusion-ext-exprs/src/string_contains.rs index 1826e8526..b3447f5ae 100644 --- a/native-engine/datafusion-ext-exprs/src/string_contains.rs +++ b/native-engine/datafusion-ext-exprs/src/string_contains.rs @@ -149,14 +149,14 @@ mod test { // test: col1 like 'ba%' let pattern = "ba".to_string(); let expr = Arc::new(StringContainsExpr::new( - phys_expr::col("col1", &batch.schema()).unwrap(), + phys_expr::col("col1", &batch.schema())?, pattern, )); let ret = expr .evaluate(&batch) .expect("Error evaluating expr") .into_array(batch.num_rows()) - .unwrap(); + ?; // verify result let expected: ArrayRef = Arc::new(BooleanArray::from(vec![ @@ -194,7 +194,7 @@ mod test { .evaluate(&batch) .expect("Error evaluating expr") .into_array(batch.num_rows()) - .unwrap(); + ?; // verify result let expected: ArrayRef = Arc::new(BooleanArray::from(vec![ diff --git a/native-engine/datafusion-ext-exprs/src/string_ends_with.rs b/native-engine/datafusion-ext-exprs/src/string_ends_with.rs index dd5b2dc21..6f0fb6eea 100644 --- a/native-engine/datafusion-ext-exprs/src/string_ends_with.rs +++ b/native-engine/datafusion-ext-exprs/src/string_ends_with.rs @@ -149,14 +149,14 @@ mod test { // test: col2 like '%rr' let pattern = "rr".to_string(); let expr = Arc::new(StringEndsWithExpr::new( - phys_expr::col("col2", &batch.schema()).unwrap(), + phys_expr::col("col2", &batch.schema())?, pattern, )); let ret = expr .evaluate(&batch) .expect("Error evaluating expr") .into_array(batch.num_rows()) - .unwrap(); + ?; // verify result let expected: ArrayRef = Arc::new(BooleanArray::from(vec![ @@ -197,7 +197,7 @@ mod test { .evaluate(&batch) .expect("Error evaluating expr") .into_array(batch.num_rows()) - .unwrap(); + ?; // verify result let expected: ArrayRef = Arc::new(BooleanArray::from(vec![ diff --git a/native-engine/datafusion-ext-exprs/src/string_starts_with.rs b/native-engine/datafusion-ext-exprs/src/string_starts_with.rs index 4dd073f53..45141a9a9 100644 --- a/native-engine/datafusion-ext-exprs/src/string_starts_with.rs +++ b/native-engine/datafusion-ext-exprs/src/string_starts_with.rs @@ -145,14 +145,14 @@ mod test { let pattern = "ra".to_string(); let expr = Arc::new(StringStartsWithExpr::new( - phys_expr::col("col3", &batch.schema()).unwrap(), + phys_expr::col("col3", &batch.schema())?, pattern, )); let ret = expr .evaluate(&batch) .expect("Error evaluating expr") .into_array(batch.num_rows()) - .unwrap(); + ?; let expected: ArrayRef = Arc::new(BooleanArray::from(vec![ None, @@ -185,7 +185,7 @@ mod test { .evaluate(&batch) .expect("Error evaluating expr") .into_array(batch.num_rows()) - .unwrap(); + ?; let expected: ArrayRef = Arc::new(BooleanArray::from(vec![ Some(true), diff --git a/native-engine/datafusion-ext-functions/src/spark_crypto.rs b/native-engine/datafusion-ext-functions/src/spark_crypto.rs index fe6dd1842..188a9bd9a 100644 --- a/native-engine/datafusion-ext-functions/src/spark_crypto.rs +++ b/native-engine/datafusion-ext-functions/src/spark_crypto.rs @@ -99,7 +99,7 @@ fn hex_encode>(data: T) -> String { let mut s = String::with_capacity(data.as_ref().len() * 2); for b in data.as_ref() { // Writing to a string never errors, so we can unwrap here. - write!(&mut s, "{b:02x}").unwrap(); + write!(&mut s, "{b:02x}").expect("data"); } s } diff --git a/native-engine/datafusion-ext-functions/src/spark_dates.rs b/native-engine/datafusion-ext-functions/src/spark_dates.rs index f712f9989..5037b8326 100644 --- a/native-engine/datafusion-ext-functions/src/spark_dates.rs +++ b/native-engine/datafusion-ext-functions/src/spark_dates.rs @@ -207,7 +207,7 @@ mod tests { use super::*; #[test] - fn test_spark_year() { + fn test_spark_year() -> Result<()> { let input = Arc::new(Date32Array::from(vec![ Some(0), Some(1000), @@ -222,25 +222,27 @@ mod tests { None, ])); assert_eq!( - &spark_year(&args).unwrap().into_array(1).unwrap(), + &spark_year(&args)?.into_array(1)?, &expected_ret ); + Ok(()) } #[test] - fn test_spark_month() { + fn test_spark_month() -> Result<()> { let input = Arc::new(Date32Array::from(vec![Some(0), Some(35), Some(65), None])); let args = vec![ColumnarValue::Array(input)]; let expected_ret: ArrayRef = Arc::new(Int32Array::from(vec![Some(1), Some(2), Some(3), None])); assert_eq!( - &spark_month(&args).unwrap().into_array(1).unwrap(), + &spark_month(&args)?.into_array(1)?, &expected_ret ); + Ok(()) } #[test] - fn test_spark_day() { + fn test_spark_day() -> Result<()> { let input = Arc::new(Date32Array::from(vec![ Some(0), Some(10), @@ -259,13 +261,14 @@ mod tests { None, ])); assert_eq!( - &spark_day(&args).unwrap().into_array(1).unwrap(), + &spark_day(&args)?.into_array(1)?, &expected_ret ); + Ok(()) } #[test] - fn test_spark_quarter_basic() { + fn test_spark_quarter_basic() -> Result<()> { // Date32 days relative to 1970-01-01: // 0 -> 1970-01-01 (Q1) // 40 -> ~1970-02-10 (Q1) @@ -290,19 +293,21 @@ mod tests { None, ])); - let out = spark_quarter(&args).unwrap().into_array(1).unwrap(); + let out = spark_quarter(&args)?.into_array(1)?; assert_eq!(&out, &expected); + Ok(()) } #[test] - fn test_spark_quarter_null_only() { + fn test_spark_quarter_null_only() -> Result<()> { // Ensure NULL propagation let input = Arc::new(Date32Array::from(vec![None, None])); let args = vec![ColumnarValue::Array(input)]; let expected: ArrayRef = Arc::new(Int32Array::from(vec![None, None])); - let out = spark_quarter(&args).unwrap().into_array(1).unwrap(); + let out = spark_quarter(&args)?.into_array(1)?; assert_eq!(&out, &expected); + Ok(()) } #[inline] diff --git a/native-engine/datafusion-ext-functions/src/spark_get_json_object.rs b/native-engine/datafusion-ext-functions/src/spark_get_json_object.rs index c1aaf91a5..1090bf40a 100644 --- a/native-engine/datafusion-ext-functions/src/spark_get_json_object.rs +++ b/native-engine/datafusion-ext-functions/src/spark_get_json_object.rs @@ -613,108 +613,108 @@ mod test { let path = "$.owner"; assert_eq!( HiveGetJsonObjectEvaluator::try_new(path) - .unwrap() + ? .evaluate(input) - .unwrap(), + ?, Some("amy".to_owned()) ); let path = "$. owner"; assert_eq!( HiveGetJsonObjectEvaluator::try_new(path) - .unwrap() + ? .evaluate(input) - .unwrap(), + ?, Some("amy".to_owned()) ); let path = "$.store.bicycle.price"; assert_eq!( HiveGetJsonObjectEvaluator::try_new(path) - .unwrap() + ? .evaluate(input) - .unwrap(), + ?, Some("19.95".to_owned()) ); let path = "$. store. bicycle. price"; assert_eq!( HiveGetJsonObjectEvaluator::try_new(path) - .unwrap() + ? .evaluate(input) - .unwrap(), + ?, Some("19.95".to_owned()) ); let path = "$.store.fruit[0]"; assert_eq!( HiveGetJsonObjectEvaluator::try_new(path) - .unwrap() + ? .evaluate(input) - .unwrap(), + ?, Some(r#"{"weight":8,"type":"apple"}"#.to_owned()) ); let path = "$. store. fruit[0]"; assert_eq!( HiveGetJsonObjectEvaluator::try_new(path) - .unwrap() + ? .evaluate(input) - .unwrap(), + ?, Some(r#"{"weight":8,"type":"apple"}"#.to_owned()) ); let path = "$.store.fruit[1].weight"; assert_eq!( HiveGetJsonObjectEvaluator::try_new(path) - .unwrap() + ? .evaluate(input) - .unwrap(), + ?, Some("9".to_owned()) ); let path = "$.store.fruit[*]"; assert_eq!( HiveGetJsonObjectEvaluator::try_new(path) - .unwrap() + ? .evaluate(input) - .unwrap(), + ?, Some(r#"[{"weight":8,"type":"apple"},{"weight":9,"type":"pear"}]"#.to_owned()) ); let path = "$. store. fruit[*]"; assert_eq!( HiveGetJsonObjectEvaluator::try_new(path) - .unwrap() + ? .evaluate(input) - .unwrap(), + ?, Some(r#"[{"weight":8,"type":"apple"},{"weight":9,"type":"pear"}]"#.to_owned()) ); let path = "$.store.fruit.[1].type"; assert_eq!( HiveGetJsonObjectEvaluator::try_new(path) - .unwrap() + ? .evaluate(input) - .unwrap(), + ?, Some("pear".to_owned()) ); let path = "$. store. fruit. [1]. type"; assert_eq!( HiveGetJsonObjectEvaluator::try_new(path) - .unwrap() + ? .evaluate(input) - .unwrap(), + ?, Some("pear".to_owned()) ); let path = "$.non_exist_key"; assert_eq!( HiveGetJsonObjectEvaluator::try_new(path) - .unwrap() + ? .evaluate(input) - .unwrap(), + ?, None ); Ok(()) @@ -748,36 +748,36 @@ mod test { } }"#; let input_array = Arc::new(StringArray::from(vec![input])); - let parsed = spark_parse_json(&[ColumnarValue::Array(input_array)]).unwrap(); + let parsed = spark_parse_json(&[ColumnarValue::Array(input_array)])?; let path = ColumnarValue::Scalar(ScalarValue::from("$.message.location.county")); let r = spark_get_parsed_json_object(&[parsed.clone(), path])?.into_array(1)?; - let v = r.as_string::().iter().next().unwrap(); + let v = r.as_string::().iter().next()?; assert_eq!(v, Some(r#"["浦东","西直门"]"#)); let path = ColumnarValue::Scalar(ScalarValue::from("$.message.location.NOT_EXISTED")); let r = spark_get_parsed_json_object(&[parsed.clone(), path])?.into_array(1)?; - let v = r.as_string::().iter().next().unwrap(); + let v = r.as_string::().iter().next()?; assert_eq!(v, None); let path = ColumnarValue::Scalar(ScalarValue::from("$.message.name")); let r = spark_get_parsed_json_object(&[parsed.clone(), path])?.into_array(1)?; - let v = r.as_string::().iter().next().unwrap(); - assert!(v.unwrap().contains("Asher")); + let v = r.as_string::().iter().next()?; + assert!(v?.contains("Asher")); let path = ColumnarValue::Scalar(ScalarValue::from("$.message.location.city")); let r = spark_get_parsed_json_object(&[parsed.clone(), path])?.into_array(1)?; - let v = r.as_string::().iter().next().unwrap(); + let v = r.as_string::().iter().next()?; assert_eq!(v, Some(r#"["1.234",1.234]"#)); let path = ColumnarValue::Scalar(ScalarValue::from("$.message.location[0]")); let r = spark_get_parsed_json_object(&[parsed.clone(), path])?.into_array(1)?; - let v = r.as_string::().iter().next().unwrap(); + let v = r.as_string::().iter().next()?; assert_eq!(v, Some(r#"{"city":"1.234","county":"浦东"}"#)); let path = ColumnarValue::Scalar(ScalarValue::from("$.message.location[].county")); let r = spark_get_parsed_json_object(&[parsed.clone(), path])?.into_array(1)?; - let v = r.as_string::().iter().next().unwrap(); + let v = r.as_string::().iter().next()?; assert_eq!(v, Some(r#"["浦东","西直门"]"#)); Ok(()) } @@ -814,7 +814,7 @@ mod test { let path = ColumnarValue::Scalar(ScalarValue::from("$.i1.j2")); let r = spark_get_parsed_json_object(&[parsed.clone(), path])?.into_array(1)?; - let v = r.as_string::().iter().next().unwrap(); + let v = r.as_string::().iter().next()?; // NOTE: // standard jsonpath should output [[200,300],[400, 500],null,"other"] diff --git a/native-engine/datafusion-ext-functions/src/spark_null_if.rs b/native-engine/datafusion-ext-functions/src/spark_null_if.rs index 41af8e478..6270efc61 100644 --- a/native-engine/datafusion-ext-functions/src/spark_null_if.rs +++ b/native-engine/datafusion-ext-functions/src/spark_null_if.rs @@ -161,7 +161,7 @@ mod test { let expected = Decimal128Array::from(vec![Some(1230427389124691)]) .with_precision_and_scale(20, 2) - .unwrap(); + ?; let expected: ArrayRef = Arc::new(expected); assert_eq!(&result, &expected); diff --git a/native-engine/datafusion-ext-plans/src/agg/spark_udaf_wrapper.rs b/native-engine/datafusion-ext-plans/src/agg/spark_udaf_wrapper.rs index c29b58f13..59f8540b0 100644 --- a/native-engine/datafusion-ext-plans/src/agg/spark_udaf_wrapper.rs +++ b/native-engine/datafusion-ext-plans/src/agg/spark_udaf_wrapper.rs @@ -231,10 +231,10 @@ impl Agg for SparkUDAFWrapper { let rows = jni_call!(SparkUDAFWrapperContext(jcontext.as_obj()).initialize( num_rows as i32, )-> JObject) - .unwrap(); + .expect("num_rows"); let jcontext = self.jcontext().expect("jcontext"); - let obj = jni_new_global_ref!(rows.as_obj()).unwrap(); + let obj = jni_new_global_ref!(rows.as_obj()).expect("rows"); Box::new(AccUDAFBufferRowsColumn { obj, jcontext }) } diff --git a/native-engine/datafusion-ext-plans/src/common/row_null_checker.rs b/native-engine/datafusion-ext-plans/src/common/row_null_checker.rs index 5ff21c30b..1e34f34ee 100644 --- a/native-engine/datafusion-ext-plans/src/common/row_null_checker.rs +++ b/native-engine/datafusion-ext-plans/src/common/row_null_checker.rs @@ -709,7 +709,7 @@ mod tests { let columns: Vec = vec![Arc::new(id_array), Arc::new(name_array)]; // Create RecordBatch - let batch = RecordBatch::try_new(schema.clone(), columns).unwrap(); + let batch = RecordBatch::try_new(schema.clone(), columns)?; // Create sort fields for RowConverter let sort_fields: Vec = schema @@ -721,8 +721,8 @@ mod tests { .collect(); // Convert RecordBatch to Rows - let converter = RowConverter::new(sort_fields.clone()).unwrap(); - let rows = converter.convert_columns(&batch.columns()).unwrap(); + let converter = RowConverter::new(sort_fields.clone())?; + let rows = converter.convert_columns(&batch.columns())?; // Create field configs for RowNullChecker let field_configs: Vec<(DataType, SortOptions)> = schema @@ -761,7 +761,7 @@ mod tests { let id_array = Int32Array::from(Vec::>::new()); let columns: Vec = vec![Arc::new(id_array)]; - let batch = RecordBatch::try_new(schema.clone(), columns).unwrap(); + let batch = RecordBatch::try_new(schema.clone(), columns)?; let sort_fields: Vec = schema .fields() @@ -771,8 +771,8 @@ mod tests { }) .collect(); - let converter = RowConverter::new(sort_fields.clone()).unwrap(); - let rows = converter.convert_columns(&batch.columns()).unwrap(); + let converter = RowConverter::new(sort_fields.clone())?; + let rows = converter.convert_columns(&batch.columns())?; let null_buffer = checker.has_nulls(&rows); assert_eq!(null_buffer.len(), 0); @@ -785,7 +785,7 @@ mod tests { let id_array = Int32Array::from(vec![None, None, None]); let columns: Vec = vec![Arc::new(id_array)]; - let batch = RecordBatch::try_new(schema.clone(), columns).unwrap(); + let batch = RecordBatch::try_new(schema.clone(), columns)?; let sort_fields: Vec = schema .fields() @@ -795,8 +795,8 @@ mod tests { }) .collect(); - let converter = RowConverter::new(sort_fields.clone()).unwrap(); - let rows = converter.convert_columns(&batch.columns()).unwrap(); + let converter = RowConverter::new(sort_fields.clone())?; + let rows = converter.convert_columns(&batch.columns())?; let field_configs: Vec<(DataType, SortOptions)> = schema .fields() @@ -825,7 +825,7 @@ mod tests { let id_array = Int32Array::from(vec![Some(1), Some(2), Some(3)]); let name_array = StringArray::from(vec![Some("Alice"), Some("Bob"), Some("Charlie")]); let columns: Vec = vec![Arc::new(id_array), Arc::new(name_array)]; - let batch = RecordBatch::try_new(schema.clone(), columns).unwrap(); + let batch = RecordBatch::try_new(schema.clone(), columns)?; let sort_fields: Vec = schema .fields() @@ -835,8 +835,8 @@ mod tests { }) .collect(); - let converter = RowConverter::new(sort_fields.clone()).unwrap(); - let rows = converter.convert_columns(&batch.columns()).unwrap(); + let converter = RowConverter::new(sort_fields.clone())?; + let rows = converter.convert_columns(&batch.columns())?; let field_configs: Vec<(DataType, SortOptions)> = schema .fields() diff --git a/native-engine/datafusion-ext-plans/src/shuffle/buffered_data.rs b/native-engine/datafusion-ext-plans/src/shuffle/buffered_data.rs index 58a60df25..ee5fc3c5f 100644 --- a/native-engine/datafusion-ext-plans/src/shuffle/buffered_data.rs +++ b/native-engine/datafusion-ext-plans/src/shuffle/buffered_data.rs @@ -373,22 +373,22 @@ mod test { a: (&str, &Vec), b: (&str, &Vec), c: (&str, &Vec), - ) -> RecordBatch { + ) -> Result { let schema = Schema::new(vec![ Field::new(a.0, DataType::Int32, false), Field::new(b.0, DataType::Int32, false), Field::new(c.0, DataType::Int32, false), ]); - RecordBatch::try_new( + let batch = RecordBatch::try_new( Arc::new(schema), vec![ Arc::new(Int32Array::from(a.1.clone())), Arc::new(Int32Array::from(b.1.clone())), Arc::new(Int32Array::from(c.1.clone())), ], - ) - .unwrap() + )?; + Ok(batch) } #[tokio::test] diff --git a/native-engine/datafusion-ext-plans/src/sort_exec.rs b/native-engine/datafusion-ext-plans/src/sort_exec.rs index 1dd166788..e6ace53df 100644 --- a/native-engine/datafusion-ext-plans/src/sort_exec.rs +++ b/native-engine/datafusion-ext-plans/src/sort_exec.rs @@ -1592,7 +1592,7 @@ mod fuzztest { None, )?); let sort = Arc::new(datafusion::physical_plan::sorts::sort::SortExec::new( - LexOrdering::new(sort_exprs.iter().cloned()).unwrap(), + LexOrdering::new(sort_exprs.iter().cloned()).expect("sort_exprs"), input, )); let output = datafusion::physical_plan::collect(sort.clone(), task_ctx.clone()).await?; diff --git a/native-engine/datafusion-ext-plans/src/window_exec.rs b/native-engine/datafusion-ext-plans/src/window_exec.rs index 1ba6f5355..fe3b307a3 100644 --- a/native-engine/datafusion-ext-plans/src/window_exec.rs +++ b/native-engine/datafusion-ext-plans/src/window_exec.rs @@ -302,7 +302,7 @@ mod test { Arc::new(Int32Array::from(c.1.clone())), ], ) - .unwrap() + ? } fn build_table( @@ -312,7 +312,7 @@ mod test { ) -> Arc { let batch = build_table_i32(a, b, c); let schema = batch.schema(); - Arc::new(TestMemoryExec::try_new(&[vec![batch]], schema, None).unwrap()) + Arc::new(TestMemoryExec::try_new(&[vec![batch]], schema, None)?) } #[tokio::test] From 416257ce15a1e803b701d97c63cf5a42c57baa59 Mon Sep 17 00:00:00 2001 From: yew1eb Date: Tue, 25 Nov 2025 21:47:38 +0800 Subject: [PATCH 03/10] up --- .../datafusion-ext-commons/src/arrow/cast.rs | 30 ++++++++++++------ .../src/io/batch_serde.rs | 28 ++++++++++------- .../src/io/ipc_compression.rs | 4 +-- .../datafusion-ext-commons/src/spark_hash.rs | 11 ++++--- .../datafusion-ext-exprs/src/cast.rs | 2 +- .../src/string_contains.rs | 7 +++-- .../src/string_ends_with.rs | 6 ++-- .../src/string_starts_with.rs | 7 +++-- .../src/spark_check_overflow.rs | 2 +- .../src/spark_get_json_object.rs | 31 +++++++++++++------ .../datafusion-ext-plans/src/joins/test.rs | 10 +++--- 11 files changed, 85 insertions(+), 53 deletions(-) diff --git a/native-engine/datafusion-ext-commons/src/arrow/cast.rs b/native-engine/datafusion-ext-commons/src/arrow/cast.rs index 5da1cd5aa..004674ffb 100644 --- a/native-engine/datafusion-ext-commons/src/arrow/cast.rs +++ b/native-engine/datafusion-ext-commons/src/arrow/cast.rs @@ -429,9 +429,11 @@ mod test { use datafusion::common::cast::{as_decimal128_array, as_float64_array, as_int32_array}; use super::*; + use std::result::Result; + use std::error::Error; #[test] - fn test_boolean_to_string() { + fn test_boolean_to_string() -> Result<(), Box> { let bool_array: ArrayRef = Arc::new(BooleanArray::from_iter(vec![None, Some(true), Some(false)])); let casted = cast(&bool_array, &DataType::Utf8)?; @@ -439,10 +441,11 @@ mod test { as_string_array(&casted), &StringArray::from_iter(vec![None, Some("true"), Some("false")]) ); + Ok(()) } #[test] - fn test_float_to_int() { + fn test_float_to_int() -> Result<(), Box> { let f64_array: ArrayRef = Arc::new(Float64Array::from_iter(vec![ None, Some(123.456), @@ -467,10 +470,11 @@ mod test { Some(0), ]) ); + Ok(()) } #[test] - fn test_int_to_float() { + fn test_int_to_float() -> Result<(), Box> { let i32_array: ArrayRef = Arc::new(Int32Array::from_iter(vec![ None, Some(123), @@ -489,10 +493,11 @@ mod test { Some(i32::MIN as f64), ]) ); + Ok(()) } #[test] - fn test_int_to_decimal() { + fn test_int_to_decimal() -> Result<(), Box> { let i32_array: ArrayRef = Arc::new(Int32Array::from_iter(vec![ None, Some(123), @@ -513,10 +518,11 @@ mod test { .with_precision_and_scale(38, 18) ? ); + Ok(()) } #[test] - fn test_string_to_decimal() { + fn test_string_to_decimal() -> Result<(), Box> { let string_array: ArrayRef = Arc::new(StringArray::from_iter(vec![ None, Some("1e-8"), @@ -545,10 +551,11 @@ mod test { .with_precision_and_scale(38, 18) ? ); + Ok(()) } #[test] - fn test_decimal_to_string() { + fn test_decimal_to_string() -> Result<(), Box> { let decimal_array: ArrayRef = Arc::new( Decimal128Array::from_iter(vec![ None, @@ -563,7 +570,7 @@ mod test { ); let casted = cast(&decimal_array, &DataType::Utf8)?; assert_eq!( - casted.as_any().downcast_ref::()?, + casted.as_any().downcast_ref::().ok_or("StringArray")?, &StringArray::from_iter(vec![ None, Some("123.000000000000000000"), @@ -573,10 +580,11 @@ mod test { Some("-2147483648.000000000000000000"), ]) ); + Ok(()) } #[test] - fn test_string_to_bigint() { + fn test_string_to_bigint() -> Result<(), Box> { let string_array: ArrayRef = Arc::new(StringArray::from_iter(vec![ None, Some("123"), @@ -588,7 +596,7 @@ mod test { ])); let casted = cast(&string_array, &DataType::Int64)?; assert_eq!( - casted.as_any().downcast_ref::()?, + casted.as_any().downcast_ref::().ok_or("Int64Array")?, &Int64Array::from_iter(vec![ None, Some(123), @@ -599,10 +607,11 @@ mod test { None, ]) ); + Ok(()) } #[test] - fn test_string_to_date() { + fn test_string_to_date() -> Result<(), Box> { let string_array: ArrayRef = Arc::new(StringArray::from_iter(vec![ None, Some("2001-02-03"), @@ -633,5 +642,6 @@ mod test { None, ]) ); + Ok(()) } } diff --git a/native-engine/datafusion-ext-commons/src/io/batch_serde.rs b/native-engine/datafusion-ext-commons/src/io/batch_serde.rs index ab82c54b4..cec9652ae 100644 --- a/native-engine/datafusion-ext-commons/src/io/batch_serde.rs +++ b/native-engine/datafusion-ext-commons/src/io/batch_serde.rs @@ -660,7 +660,7 @@ mod test { }; #[test] - fn test_write_and_read_batch() { + fn test_write_and_read_batch() -> std::result::Result<(), Box> { let array1: ArrayRef = Arc::new(StringArray::from_iter([ Some("20220101".to_owned()), Some("20220102你好🍹".to_owned()), @@ -691,7 +691,7 @@ mod test { write_batch(batch.num_rows(), batch.columns(), &mut buf)?; let mut cursor = Cursor::new(buf); let (decoded_num_rows, decoded_cols) = - read_batch(&mut cursor, &batch.schema())??; + read_batch(&mut cursor, &batch.schema()).unwrap().unwrap(); assert_eq!( recover_named_batch(decoded_num_rows, &decoded_cols, batch.schema())?, batch @@ -703,15 +703,16 @@ mod test { write_batch(sliced.num_rows(), sliced.columns(), &mut buf)?; let mut cursor = Cursor::new(buf); let (decoded_num_rows, decoded_cols) = - read_batch(&mut cursor, &batch.schema())??; + read_batch(&mut cursor, &batch.schema()).expect("read_batch").expect("non-empty"); assert_eq!( recover_named_batch(decoded_num_rows, &decoded_cols, batch.schema())?, sliced ); + Ok(()) } #[test] - fn test_write_and_read_batch_for_list() { + fn test_write_and_read_batch_for_list() -> std::result::Result<(), Box> { let data = vec![ Some(vec![Some(0), Some(1), Some(2)]), None, @@ -745,7 +746,7 @@ mod test { write_batch(batch.num_rows(), batch.columns(), &mut buf)?; let mut cursor = Cursor::new(buf); let (decoded_num_rows, decoded_cols) = - read_batch(&mut cursor, &batch.schema())??; + read_batch(&mut cursor, &batch.schema()).expect("read_batch").expect("non-empty"); assert_batches_eq!( vec![ "+-----------+-----------+", @@ -766,7 +767,7 @@ mod test { write_batch(sliced.num_rows(), sliced.columns(), &mut buf)?; let mut cursor = Cursor::new(buf); let (decoded_num_rows, decoded_cols) = - read_batch(&mut cursor, &batch.schema())??; + read_batch(&mut cursor, &batch.schema()).expect("read_batch").expect("non-empty"); assert_batches_eq!( vec![ "+----------+----------+", @@ -778,10 +779,11 @@ mod test { ], &[recover_named_batch(decoded_num_rows, &decoded_cols, sliced.schema())?] ); + Ok(()) } #[test] - fn test_write_and_read_batch_for_map() { + fn test_write_and_read_batch_for_map() -> std::result::Result<(), Box> { let map_array: ArrayRef = Arc::new( MapArray::new_from_strings( ["00", "11", "22", "33", "44", "55", "66", "77"].into_iter(), @@ -811,7 +813,7 @@ mod test { write_batch(batch.num_rows(), batch.columns(), &mut buf)?; let mut cursor = Cursor::new(buf); let (decoded_num_rows, decoded_cols) = - read_batch(&mut cursor, &batch.schema())??; + read_batch(&mut cursor, &batch.schema()).expect("read_batch").expect("non-empty"); assert_eq!( recover_named_batch(decoded_num_rows, &decoded_cols, batch.schema())?, batch @@ -823,15 +825,16 @@ mod test { write_batch(sliced.num_rows(), sliced.columns(), &mut buf)?; let mut cursor = Cursor::new(buf); let (decoded_num_rows, decoded_cols) = - read_batch(&mut cursor, &batch.schema())??; + read_batch(&mut cursor, &batch.schema()).expect("read_batch").expect("non-empty"); assert_eq!( recover_named_batch(decoded_num_rows, &decoded_cols, sliced.schema())?, sliced ); + Ok(()) } #[test] - fn test_write_and_read_batch_for_struct() { + fn test_write_and_read_batch_for_struct() -> std::result::Result<(), Box> { let c1: ArrayRef = Arc::new(BooleanArray::from(vec![false, false, true, true])); let c2: ArrayRef = Arc::new(Int32Array::from(vec![42, 28, 19, 31])); let c3: ArrayRef = Arc::new(BooleanArray::from(vec![None, None, None, Some(true)])); @@ -851,7 +854,7 @@ mod test { write_batch(batch.num_rows(), batch.columns(), &mut buf)?; let mut cursor = Cursor::new(buf); let (decoded_num_rows, decoded_cols) = - read_batch(&mut cursor, &batch.schema())??; + read_batch(&mut cursor, &batch.schema()).expect("read_batch").expect("non-empty"); assert_eq!( recover_named_batch(decoded_num_rows, &decoded_cols, batch.schema())?, batch @@ -863,10 +866,11 @@ mod test { write_batch(sliced.num_rows(), sliced.columns(), &mut buf)?; let mut cursor = Cursor::new(buf); let (decoded_num_rows, decoded_cols) = - read_batch(&mut cursor, &batch.schema())??; + read_batch(&mut cursor, &batch.schema()).expect("read_batch").expect("non-empty"); assert_eq!( recover_named_batch(decoded_num_rows, &decoded_cols, batch.schema())?, sliced ); + Ok(()) } } diff --git a/native-engine/datafusion-ext-commons/src/io/ipc_compression.rs b/native-engine/datafusion-ext-commons/src/io/ipc_compression.rs index c74fc931a..eff7669d2 100644 --- a/native-engine/datafusion-ext-commons/src/io/ipc_compression.rs +++ b/native-engine/datafusion-ext-commons/src/io/ipc_compression.rs @@ -341,10 +341,10 @@ mod tests { writer.finish_current_buf()?; let mut reader = IpcCompressionReader::new(Cursor::new(buf)); - let (num_rows1, arrays1) = reader.read_batch(&schema)??; + let (num_rows1, arrays1) = reader.read_batch(&schema).expect("read_batch").expect("non-empty"); assert_eq!(num_rows1, 2); assert_eq!(arrays1, &[test_array1]); - let (num_rows2, arrays2) = reader.read_batch(&schema)??; + let (num_rows2, arrays2) = reader.read_batch(&schema).expect("read_batch").expect("non-empty"); assert_eq!(num_rows2, 2); assert_eq!(arrays2, &[test_array2]); assert!(reader.read_batch(&schema)?.is_none()); diff --git a/native-engine/datafusion-ext-commons/src/spark_hash.rs b/native-engine/datafusion-ext-commons/src/spark_hash.rs index cd84c148b..43aff709f 100644 --- a/native-engine/datafusion-ext-commons/src/spark_hash.rs +++ b/native-engine/datafusion-ext-commons/src/spark_hash.rs @@ -488,7 +488,7 @@ mod tests { } #[test] - fn test_list_array() -> Result<()> { + fn test_list_array() -> Result<(), Box> { // Create inner array data: [1, 2, 3, 4, 5, 6] let value_data = ArrayData::builder(DataType::Int32) .len(6) @@ -517,7 +517,7 @@ mod tests { } #[test] - fn test_map_array() { + fn test_map_array() -> Result<(), Box> { // Construct key and values let key_data = ArrayData::builder(DataType::Int32) .len(8) @@ -586,7 +586,7 @@ mod tests { unsafe { map_array.value_unchecked(0) } .as_any() .downcast_ref::() - ? + .expect("Expected a StructArray") ); for i in 0..3 { assert!(map_array.is_valid(i)); @@ -620,14 +620,15 @@ mod tests { .value(0) .as_any() .downcast_ref::() - ? + .expect("Expected a StructArray") ); assert_eq!( &struct_array, unsafe { map_array.value_unchecked(0) } .as_any() .downcast_ref::() - ? + .expect("Expected a StructArray") ); + Ok(()) } } diff --git a/native-engine/datafusion-ext-exprs/src/cast.rs b/native-engine/datafusion-ext-exprs/src/cast.rs index 0ec1be8a3..c257b2f72 100644 --- a/native-engine/datafusion-ext-exprs/src/cast.rs +++ b/native-engine/datafusion-ext-exprs/src/cast.rs @@ -102,7 +102,7 @@ impl PhysicalExpr for TryCastExpr { #[cfg(test)] mod test { - use std::error::Error; + use std::sync::Arc; use arrow::{ diff --git a/native-engine/datafusion-ext-exprs/src/string_contains.rs b/native-engine/datafusion-ext-exprs/src/string_contains.rs index b3447f5ae..2b5127804 100644 --- a/native-engine/datafusion-ext-exprs/src/string_contains.rs +++ b/native-engine/datafusion-ext-exprs/src/string_contains.rs @@ -25,7 +25,6 @@ use arrow::{ datatypes::{DataType, Schema}, record_batch::RecordBatch, }; -use arrow::array::DictionaryArray; use datafusion::{ common::{Result, ScalarValue}, logical_expr::ColumnarValue, @@ -129,7 +128,7 @@ mod test { use crate::string_contains::StringContainsExpr; #[test] - fn test_ok() { + fn test_ok() -> std::result::Result<(), Box> { // create a StringArray from the vector let string_array: ArrayRef = Arc::new(StringArray::from(vec![ Some("abrr".to_string()), @@ -167,10 +166,11 @@ mod test { None, ])); assert_eq!(&ret, &expected); + Ok(()) } #[test] - fn test_scalar_string() { + fn test_scalar_string() -> std::result::Result<(), Box> { // create a StringArray from the vector let string_array: ArrayRef = Arc::new(StringArray::from(vec![ Some("abrr".to_string()), @@ -205,5 +205,6 @@ mod test { Some(true), ])); assert_eq!(&ret, &expected); + Ok(()) } } diff --git a/native-engine/datafusion-ext-exprs/src/string_ends_with.rs b/native-engine/datafusion-ext-exprs/src/string_ends_with.rs index 6f0fb6eea..9b65d1054 100644 --- a/native-engine/datafusion-ext-exprs/src/string_ends_with.rs +++ b/native-engine/datafusion-ext-exprs/src/string_ends_with.rs @@ -131,7 +131,7 @@ mod test { use crate::string_ends_with::StringEndsWithExpr; #[test] - fn test_array() { + fn test_array() -> std::result::Result<(), Box> { let string_array: ArrayRef = Arc::new(StringArray::from(vec![ Some("abrrbrr".to_string()), Some("rrjndebcsabdji".to_string()), @@ -167,10 +167,11 @@ mod test { Some(false), ])); assert_eq!(&ret, &expected); + Ok(()) } #[test] - fn test_scalar_string() { + fn test_scalar_string() -> std::result::Result<(), Box> { // create a StringArray from the vector let string_array: ArrayRef = Arc::new(StringArray::from(vec![ Some("Hello, Rust".to_string()), @@ -208,5 +209,6 @@ mod test { Some(false), ])); assert_eq!(&ret, &expected); + Ok(()) } } diff --git a/native-engine/datafusion-ext-exprs/src/string_starts_with.rs b/native-engine/datafusion-ext-exprs/src/string_starts_with.rs index 45141a9a9..62de41b01 100644 --- a/native-engine/datafusion-ext-exprs/src/string_starts_with.rs +++ b/native-engine/datafusion-ext-exprs/src/string_starts_with.rs @@ -118,6 +118,7 @@ impl PhysicalExpr for StringStartsWithExpr { #[cfg(test)] mod test { + use std::sync::Arc; use arrow::{ @@ -130,7 +131,7 @@ mod test { use crate::string_starts_with::StringStartsWithExpr; #[test] - fn test_ok() { + fn test_ok() -> std::result::Result<(), Box> { let string_array: ArrayRef = Arc::new(StringArray::from(vec![ None, Some("rabaok".to_string()), @@ -163,10 +164,11 @@ mod test { // None ])); assert_eq!(&ret, &expected); + Ok(()) } #[test] - fn test_scalar_string() { + fn test_scalar_string() -> std::result::Result<(), Box> { let string_array: ArrayRef = Arc::new(StringArray::from(vec![ Some("Hello, Rust".to_string()), Some("Hello, He".to_string()), @@ -195,5 +197,6 @@ mod test { Some(true), ])); assert_eq!(&ret, &expected); + Ok(()) } } diff --git a/native-engine/datafusion-ext-functions/src/spark_check_overflow.rs b/native-engine/datafusion-ext-functions/src/spark_check_overflow.rs index 7fbdd95a7..c97676e28 100644 --- a/native-engine/datafusion-ext-functions/src/spark_check_overflow.rs +++ b/native-engine/datafusion-ext-functions/src/spark_check_overflow.rs @@ -55,7 +55,7 @@ pub fn spark_check_overflow(args: &[ColumnarValue]) -> Result { _ => ColumnarValue::Scalar(ScalarValue::Decimal128(None, to_precision, to_scale)), }, ColumnarValue::Array(array) => { - let array = array.as_any().downcast_ref::().expect("Expected a Decimal128Array");; + let array = array.as_any().downcast_ref::().expect("Expected a Decimal128Array"); let mut output = Decimal128Builder::with_capacity(array.len()); for v in array.into_iter() { diff --git a/native-engine/datafusion-ext-functions/src/spark_get_json_object.rs b/native-engine/datafusion-ext-functions/src/spark_get_json_object.rs index 1090bf40a..8b2e2a1a6 100644 --- a/native-engine/datafusion-ext-functions/src/spark_get_json_object.rs +++ b/native-engine/datafusion-ext-functions/src/spark_get_json_object.rs @@ -42,7 +42,7 @@ pub fn spark_get_json_object(args: &[ColumnarValue]) -> Result { let json_strings = json_string_array .as_any() .downcast_ref::() - .expect("Expected a StringArray");; + .expect("Expected a StringArray"); let path_string = match &args[1] { ColumnarValue::Scalar(ScalarValue::Utf8(str)) => match str { Some(path) => path, @@ -87,7 +87,7 @@ pub fn spark_parse_json(args: &[ColumnarValue]) -> Result { let json_strings = json_string_array .as_any() .downcast_ref::() - .expect("Expected a StringArray");; + .expect("Expected a StringArray"); let fallback_enabled = conf::PARSE_JSON_ERROR_FALLBACK.value().unwrap_or(false); let json_values: Vec>> = json_strings @@ -266,6 +266,17 @@ enum HiveGetJsonObjectError { InvalidInput, } +impl std::fmt::Display for HiveGetJsonObjectError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_str(match self { + HiveGetJsonObjectError::InvalidJsonPath => "InvalidJsonPath", + HiveGetJsonObjectError::InvalidInput => "InvalidInput", + }) + } +} + +impl std::error::Error for HiveGetJsonObjectError {} + struct HiveGetJsonObjectEvaluator { matchers: Vec, } @@ -752,32 +763,32 @@ mod test { let path = ColumnarValue::Scalar(ScalarValue::from("$.message.location.county")); let r = spark_get_parsed_json_object(&[parsed.clone(), path])?.into_array(1)?; - let v = r.as_string::().iter().next()?; + let v = r.as_string::().iter().next().expect("v"); assert_eq!(v, Some(r#"["浦东","西直门"]"#)); let path = ColumnarValue::Scalar(ScalarValue::from("$.message.location.NOT_EXISTED")); let r = spark_get_parsed_json_object(&[parsed.clone(), path])?.into_array(1)?; - let v = r.as_string::().iter().next()?; + let v = r.as_string::().iter().next().expect("v"); assert_eq!(v, None); let path = ColumnarValue::Scalar(ScalarValue::from("$.message.name")); let r = spark_get_parsed_json_object(&[parsed.clone(), path])?.into_array(1)?; - let v = r.as_string::().iter().next()?; - assert!(v?.contains("Asher")); + let v = r.as_string::().iter().next().expect("v").ok_or("v")?; + assert!(v.contains("Asher")); let path = ColumnarValue::Scalar(ScalarValue::from("$.message.location.city")); let r = spark_get_parsed_json_object(&[parsed.clone(), path])?.into_array(1)?; - let v = r.as_string::().iter().next()?; + let v = r.as_string::().iter().next().expect("v"); assert_eq!(v, Some(r#"["1.234",1.234]"#)); let path = ColumnarValue::Scalar(ScalarValue::from("$.message.location[0]")); let r = spark_get_parsed_json_object(&[parsed.clone(), path])?.into_array(1)?; - let v = r.as_string::().iter().next()?; + let v = r.as_string::().iter().next().expect("v"); assert_eq!(v, Some(r#"{"city":"1.234","county":"浦东"}"#)); let path = ColumnarValue::Scalar(ScalarValue::from("$.message.location[].county")); let r = spark_get_parsed_json_object(&[parsed.clone(), path])?.into_array(1)?; - let v = r.as_string::().iter().next()?; + let v = r.as_string::().iter().next().expect("v"); assert_eq!(v, Some(r#"["浦东","西直门"]"#)); Ok(()) } @@ -814,7 +825,7 @@ mod test { let path = ColumnarValue::Scalar(ScalarValue::from("$.i1.j2")); let r = spark_get_parsed_json_object(&[parsed.clone(), path])?.into_array(1)?; - let v = r.as_string::().iter().next()?; + let v = r.as_string::().iter().next().expect("v"); // NOTE: // standard jsonpath should output [[200,300],[400, 500],null,"other"] diff --git a/native-engine/datafusion-ext-plans/src/joins/test.rs b/native-engine/datafusion-ext-plans/src/joins/test.rs index 894256fb4..5d44ed205 100644 --- a/native-engine/datafusion-ext-plans/src/joins/test.rs +++ b/native-engine/datafusion-ext-plans/src/joins/test.rs @@ -28,7 +28,7 @@ mod tests { use auron_memmgr::MemManager; use datafusion::{ assert_batches_sorted_eq, - common::JoinSide, + common::{JoinSide, Result}, physical_expr::expressions::Column, physical_plan::{ExecutionPlan, common, joins::utils::*, test::TestMemoryExec}, prelude::SessionContext, @@ -80,13 +80,13 @@ mod tests { a: (&str, &Vec), b: (&str, &Vec), c: (&str, &Vec), - ) -> Result> { + ) -> Result, Box> { let batch = build_table_i32(a, b, c)?; let schema = batch.schema(); Ok(Arc::new(TestMemoryExec::try_new(&[vec![batch]], schema, None)?)) } - fn build_table_from_batches(batches: Vec) -> Result> { + fn build_table_from_batches(batches: Vec) -> Result, Box> { let first = batches.into_iter().next().ok_or_else(|| DataFusionError::Internal("empty batches".into()))?; let schema = first.schema(); Ok(Arc::new(TestMemoryExec::try_new(&[batches], schema, None)?)) @@ -96,7 +96,7 @@ mod tests { a: (&str, &Vec), b: (&str, &Vec), c: (&str, &Vec), - ) -> Result> { + ) -> Result, Box> { let schema = Schema::new(vec![ Field::new(a.0, DataType::Date32, false), Field::new(b.0, DataType::Date32, false), @@ -120,7 +120,7 @@ mod tests { a: (&str, &Vec), b: (&str, &Vec), c: (&str, &Vec), - ) -> Result> { + ) -> Result, Box> { let schema = Schema::new(vec![ Field::new(a.0, DataType::Date64, false), Field::new(b.0, DataType::Date64, false), From 59c8a21d64bd7e9b22b56ef11b9031682d5f1518 Mon Sep 17 00:00:00 2001 From: yew1eb Date: Tue, 25 Nov 2025 23:09:49 +0800 Subject: [PATCH 04/10] up --- native-engine/auron-serde/build.rs | 3 +- native-engine/auron-serde/src/from_proto.rs | 57 +++-- native-engine/auron/src/exec.rs | 5 +- native-engine/auron/src/http/mod.rs | 3 +- .../datafusion-ext-commons/src/arrow/cast.rs | 44 ++-- .../src/arrow/eq_comparator.rs | 44 ++-- .../src/arrow/selection.rs | 6 +- .../src/io/batch_serde.rs | 128 ++++++----- .../src/io/ipc_compression.rs | 10 +- .../datafusion-ext-commons/src/spark_hash.rs | 62 ++++-- .../src/bloom_filter_might_contain.rs | 5 +- .../datafusion-ext-exprs/src/cast.rs | 17 +- .../datafusion-ext-exprs/src/get_map_value.rs | 18 +- native-engine/datafusion-ext-exprs/src/lib.rs | 4 +- .../src/string_contains.rs | 11 +- .../src/string_ends_with.rs | 11 +- .../src/string_starts_with.rs | 13 +- .../src/spark_check_overflow.rs | 5 +- .../src/spark_dates.rs | 19 +- .../src/spark_get_json_object.rs | 81 +++---- .../src/spark_make_decimal.rs | 5 +- .../src/spark_null_if.rs | 5 +- .../src/spark_unscaled_value.rs | 5 +- .../datafusion-ext-plans/src/agg/agg_ctx.rs | 3 +- .../datafusion-ext-plans/src/agg/collect.rs | 5 +- .../src/agg/spark_udaf_wrapper.rs | 2 +- .../datafusion-ext-plans/src/agg_exec.rs | 1 - .../src/common/row_null_checker.rs | 22 +- .../datafusion-ext-plans/src/expand_exec.rs | 201 ++++++++---------- .../datafusion-ext-plans/src/joins/test.rs | 127 ++++++----- .../datafusion-ext-plans/src/limit_exec.rs | 8 +- .../src/shuffle/buffered_data.rs | 6 +- .../datafusion-ext-plans/src/sort_exec.rs | 8 +- .../src/window/window_context.rs | 26 ++- .../datafusion-ext-plans/src/window_exec.rs | 25 ++- 35 files changed, 539 insertions(+), 456 deletions(-) diff --git a/native-engine/auron-serde/build.rs b/native-engine/auron-serde/build.rs index fa7ae5221..b3f59f275 100644 --- a/native-engine/auron-serde/build.rs +++ b/native-engine/auron-serde/build.rs @@ -45,6 +45,5 @@ fn main() -> Result<(), String> { } prost_build .compile_protos(&["proto/auron.proto"], &["proto"]) - .map_err(|e| format!("protobuf compilation failed: {e}") - ) + .map_err(|e| format!("protobuf compilation failed: {e}")) } diff --git a/native-engine/auron-serde/src/from_proto.rs b/native-engine/auron-serde/src/from_proto.rs index 3eb22f36b..442223d4f 100644 --- a/native-engine/auron-serde/src/from_proto.rs +++ b/native-engine/auron-serde/src/from_proto.rs @@ -151,7 +151,8 @@ impl TryInto> for &protobuf::PhysicalPlanNode { Ok(Arc::new(FilterExec::try_new(predicates, input)?)) } PhysicalPlanType::ParquetScan(scan) => { - let conf: FileScanConfig = scan.base_conf.as_ref().expect("base_conf").try_into()?; + let conf: FileScanConfig = + scan.base_conf.as_ref().expect("base_conf").try_into()?; let predicate = scan .pruning_predicates .iter() @@ -168,7 +169,8 @@ impl TryInto> for &protobuf::PhysicalPlanNode { ))) } PhysicalPlanType::OrcScan(scan) => { - let conf: FileScanConfig = scan.base_conf.as_ref().expect("base_conf").try_into()?; + let conf: FileScanConfig = + scan.base_conf.as_ref().expect("base_conf").try_into()?; let predicate = scan .pruning_predicates .iter() @@ -192,10 +194,14 @@ impl TryInto> for &protobuf::PhysicalPlanNode { .on .iter() .map(|col| { - let left_key = - try_parse_physical_expr(&col.left.as_ref().expect("left"), &left.schema())?; - let right_key = - try_parse_physical_expr(&col.right.as_ref().expect("right"), &right.schema())?; + let left_key = try_parse_physical_expr( + &col.left.as_ref().expect("left"), + &left.schema(), + )?; + let right_key = try_parse_physical_expr( + &col.right.as_ref().expect("right"), + &right.schema(), + )?; Ok((left_key, right_key)) }) .collect::>()?; @@ -229,10 +235,14 @@ impl TryInto> for &protobuf::PhysicalPlanNode { .on .iter() .map(|col| { - let left_key = - try_parse_physical_expr(&col.left.as_ref().expect("left"), &left.schema())?; - let right_key = - try_parse_physical_expr(&col.right.as_ref().expect("right"), &right.schema())?; + let left_key = try_parse_physical_expr( + &col.left.as_ref().expect("left"), + &left.schema(), + )?; + let right_key = try_parse_physical_expr( + &col.right.as_ref().expect("right"), + &right.schema(), + )?; Ok((left_key, right_key)) }) .collect::>()?; @@ -339,10 +349,14 @@ impl TryInto> for &protobuf::PhysicalPlanNode { .on .iter() .map(|col| { - let left_key = - try_parse_physical_expr(&col.left.as_ref().expect("left"), &left.schema())?; - let right_key = - try_parse_physical_expr(&col.right.as_ref().expect("right"), &right.schema())?; + let left_key = try_parse_physical_expr( + &col.left.as_ref().expect("left"), + &left.schema(), + )?; + let right_key = try_parse_physical_expr( + &col.right.as_ref().expect("right"), + &right.schema(), + )?; Ok((left_key, right_key)) }) .collect::>()?; @@ -1136,13 +1150,19 @@ pub fn parse_protobuf_partitioning( .collect::, _>>()?; Ok(Some(Partitioning::HashPartitioning( expr, - hash_part.partition_count.try_into().expect("partition_count"), + hash_part + .partition_count + .try_into() + .expect("partition_count"), ))) } RepartitionType::RoundRobinRepartition(round_robin_part) => { Ok(Some(Partitioning::RoundRobinPartitioning( - round_robin_part.partition_count.try_into().expect("partition_count"), + round_robin_part + .partition_count + .try_into() + .expect("partition_count"), ))) } @@ -1187,7 +1207,10 @@ pub fn parse_protobuf_partitioning( let bound_rows = sort_row_converter.lock().convert_columns(&bound_cols)?; Ok(Some(Partitioning::RangePartitioning( exprs, - range_part.partition_count.try_into().expect("partition_count"), + range_part + .partition_count + .try_into() + .expect("partition_count"), Arc::new(bound_rows), ))) } diff --git a/native-engine/auron/src/exec.rs b/native-engine/auron/src/exec.rs index 01a0b329e..dcf43ffcd 100644 --- a/native-engine/auron/src/exec.rs +++ b/native-engine/auron/src/exec.rs @@ -62,7 +62,10 @@ pub extern "system" fn Java_org_apache_auron_jni_JniBridge_callNative( INIT.get_or_try_init(|| { // logging is not initialized at this moment eprintln!("------ initializing auron native environment ------"); - let log_level = env.get_string(log_level).map(|s| String::from(s)).expect("log_level"); + let log_level = env + .get_string(log_level) + .map(|s| String::from(s)) + .expect("log_level"); eprintln!("initializing logging with level: {}", log_level); init_logging(log_level.as_str()); diff --git a/native-engine/auron/src/http/mod.rs b/native-engine/auron/src/http/mod.rs index b13d2b62a..5fb47ebee 100644 --- a/native-engine/auron/src/http/mod.rs +++ b/native-engine/auron/src/http/mod.rs @@ -49,8 +49,7 @@ impl DefaultHTTPServer { runtime: tokio::runtime::Builder::new_multi_thread() .worker_threads(1) .enable_io() - .build() - ?, + .build()?, handlers: Mutex::new(vec![]), } } diff --git a/native-engine/datafusion-ext-commons/src/arrow/cast.rs b/native-engine/datafusion-ext-commons/src/arrow/cast.rs index 004674ffb..7d11e948e 100644 --- a/native-engine/datafusion-ext-commons/src/arrow/cast.rs +++ b/native-engine/datafusion-ext-commons/src/arrow/cast.rs @@ -166,7 +166,11 @@ pub fn cast_impl( col = struct_.column_by_name(adjust.as_str()); } if col.is_some() { - cast_impl(col.expect("missing column"), field.data_type(), match_struct_fields) + cast_impl( + col.expect("missing column"), + field.data_type(), + match_struct_fields, + ) } else { null_column_name.push(field.name().clone()); Ok(new_null_array(field.data_type(), struct_.len())) @@ -227,7 +231,9 @@ pub fn cast_impl( } fn to_plain_string_array(array: &dyn Array) -> ArrayRef { - let array = array.as_any().downcast_ref::() + let array = array + .as_any() + .downcast_ref::() .expect("Expected a StringArray"); let mut converted_values: Vec> = Vec::with_capacity(array.len()); for v in array.iter() { @@ -253,7 +259,10 @@ fn try_cast_string_array_to_integer(array: &dyn Array, cast_type: &DataType) -> macro_rules! cast { ($target_type:ident) => {{ type B = paste::paste! {[<$target_type Builder>]}; - let array = array.as_any().downcast_ref::().expect("Excepted a StringArray"); + let array = array + .as_any() + .downcast_ref::() + .expect("Excepted a StringArray"); let mut builder = B::new(); for v in array.iter() { @@ -426,14 +435,14 @@ fn to_date(s: &str) -> Option { #[cfg(test)] mod test { + use std::{error::Error, result::Result}; + use datafusion::common::cast::{as_decimal128_array, as_float64_array, as_int32_array}; use super::*; - use std::result::Result; - use std::error::Error; #[test] - fn test_boolean_to_string() -> Result<(), Box> { + fn test_boolean_to_string() -> Result<(), Box> { let bool_array: ArrayRef = Arc::new(BooleanArray::from_iter(vec![None, Some(true), Some(false)])); let casted = cast(&bool_array, &DataType::Utf8)?; @@ -515,8 +524,7 @@ mod test { Some(i32::MAX as i128 * 1000000000000000000), Some(i32::MIN as i128 * 1000000000000000000), ]) - .with_precision_and_scale(38, 18) - ? + .with_precision_and_scale(38, 18)? ); Ok(()) } @@ -548,8 +556,7 @@ mod test { Some(123456789012345678901234567890000i128), Some(-123456789012345678901234567890000i128), ]) - .with_precision_and_scale(38, 18) - ? + .with_precision_and_scale(38, 18)? ); Ok(()) } @@ -565,12 +572,14 @@ mod test { Some(i32::MAX as i128 * 1000000000000000000), Some(i32::MIN as i128 * 1000000000000000000), ]) - .with_precision_and_scale(38, 18) - ?, + .with_precision_and_scale(38, 18)?, ); let casted = cast(&decimal_array, &DataType::Utf8)?; assert_eq!( - casted.as_any().downcast_ref::().ok_or("StringArray")?, + casted + .as_any() + .downcast_ref::() + .ok_or("StringArray")?, &StringArray::from_iter(vec![ None, Some("123.000000000000000000"), @@ -596,7 +605,10 @@ mod test { ])); let casted = cast(&string_array, &DataType::Int64)?; assert_eq!( - casted.as_any().downcast_ref::().ok_or("Int64Array")?, + casted + .as_any() + .downcast_ref::() + .ok_or("Int64Array")?, &Int64Array::from_iter(vec![ None, Some(123), @@ -626,9 +638,7 @@ mod test { ])); let casted = cast(&string_array, &DataType::Date32)?; assert_eq!( - arrow::compute::cast(&casted, &DataType::Utf8) - ? - .as_string(), + arrow::compute::cast(&casted, &DataType::Utf8)?.as_string(), &StringArray::from_iter(vec![ None, Some("2001-02-03"), diff --git a/native-engine/datafusion-ext-commons/src/arrow/eq_comparator.rs b/native-engine/datafusion-ext-commons/src/arrow/eq_comparator.rs index 367c6a32c..0cbc07401 100644 --- a/native-engine/datafusion-ext-commons/src/arrow/eq_comparator.rs +++ b/native-engine/datafusion-ext-commons/src/arrow/eq_comparator.rs @@ -401,7 +401,7 @@ pub mod tests { } #[test] - fn test_fixed_size_binary_fixed_size_binary() -> Result<()> { + fn test_fixed_size_binary_fixed_size_binary() -> Result<()> { let items = vec![vec![1u8]]; let array1 = FixedSizeBinaryArray::try_from_iter(items.into_iter())?; let items = vec![vec![2u8]]; @@ -414,7 +414,7 @@ pub mod tests { } #[test] - fn test_i32() -> Result<()> { + fn test_i32() -> Result<()> { let array = Int32Array::from(vec![1, 2]); let eq = make_eq_comparator(&array, &array, false)?; @@ -424,7 +424,7 @@ pub mod tests { } #[test] - fn test_i32_i32() -> Result<()> { + fn test_i32_i32() -> Result<()> { let array1 = Int32Array::from(vec![1]); let array2 = Int32Array::from(vec![2]); @@ -435,7 +435,7 @@ pub mod tests { } #[test] - fn test_f64() -> Result<()> { + fn test_f64() -> Result<()> { let array = Float64Array::from(vec![1.0, 2.0]); let eq = make_eq_comparator(&array, &array, false)?; @@ -445,7 +445,7 @@ pub mod tests { } #[test] - fn test_f64_nan() -> Result<()> { + fn test_f64_nan() -> Result<()> { let array = Float64Array::from(vec![1.0, f64::NAN]); let eq = make_eq_comparator(&array, &array, false)?; @@ -457,7 +457,7 @@ pub mod tests { } #[test] - fn test_f64_zeros() -> Result<()> { + fn test_f64_zeros() -> Result<()> { let array = Float64Array::from(vec![-0.0, 0.0]); let eq = make_eq_comparator(&array, &array, false)?; @@ -468,7 +468,7 @@ pub mod tests { } #[test] - fn test_interval_day_time() -> Result<()> { + fn test_interval_day_time() -> Result<()> { let array = IntervalDayTimeArray::from(vec![ // 0 days, 1 second IntervalDayTimeType::make_value(0, 1000), @@ -492,7 +492,7 @@ pub mod tests { } #[test] - fn test_interval_year_month() -> Result<()> { + fn test_interval_year_month() -> Result<()> { let array = IntervalYearMonthArray::from(vec![ // 1 year, 0 months IntervalYearMonthType::make_value(1, 0), @@ -514,7 +514,7 @@ pub mod tests { } #[test] - fn test_interval_month_day_nano() -> Result<()> { + fn test_interval_month_day_nano() -> Result<()> { let array = IntervalMonthDayNanoArray::from(vec![ // 100 days IntervalMonthDayNanoType::make_value(0, 100, 0), @@ -538,7 +538,7 @@ pub mod tests { } #[test] - fn test_decimal() -> Result<()> { + fn test_decimal() -> Result<()> { let array = vec![Some(5_i128), Some(2_i128), Some(3_i128)] .into_iter() .collect::() @@ -551,7 +551,7 @@ pub mod tests { } #[test] - fn test_decimali256() -> Result<()> { + fn test_decimali256() -> Result<()> { let array = vec![ Some(i256::from_i128(5_i128)), Some(i256::from_i128(2_i128)), @@ -568,7 +568,7 @@ pub mod tests { } #[test] - fn test_dict() -> Result<()> { + fn test_dict() -> Result<()> { let data = vec!["a", "b", "c", "a", "a", "c", "c"]; let array = data.into_iter().collect::>(); @@ -581,7 +581,7 @@ pub mod tests { } #[test] - fn test_multiple_dict() -> Result<()> { + fn test_multiple_dict() -> Result<()> { let d1 = vec!["a", "b", "c", "d"]; let a1 = d1.into_iter().collect::>(); let d2 = vec!["e", "f", "g", "a"]; @@ -596,7 +596,7 @@ pub mod tests { } #[test] - fn test_primitive_dict() -> Result<()> { + fn test_primitive_dict() -> Result<()> { let values = Int32Array::from(vec![1_i32, 0, 2, 5]); let keys = Int8Array::from_iter_values([0, 0, 1, 3]); let array1 = DictionaryArray::new(keys, Arc::new(values)); @@ -616,7 +616,7 @@ pub mod tests { } #[test] - fn test_float_dict() -> Result<()> { + fn test_float_dict() -> Result<()> { let values = Float32Array::from(vec![1.0, 0.5, 2.1, 5.5]); let keys = Int8Array::from_iter_values([0, 0, 1, 3]); let array1 = DictionaryArray::try_new(keys, Arc::new(values))?; @@ -636,7 +636,7 @@ pub mod tests { } #[test] - fn test_timestamp_dict() -> Result<()> { + fn test_timestamp_dict() -> Result<()> { let values = TimestampSecondArray::from(vec![1, 0, 2, 5]); let keys = Int8Array::from_iter_values([0, 0, 1, 3]); let array1 = DictionaryArray::new(keys, Arc::new(values)); @@ -656,7 +656,7 @@ pub mod tests { } #[test] - fn test_duration_dict() -> Result<()> { + fn test_duration_dict() -> Result<()> { let values = DurationSecondArray::from(vec![1, 0, 2, 5]); let keys = Int8Array::from_iter_values([0, 0, 1, 3]); let array1 = DictionaryArray::new(keys, Arc::new(values)); @@ -676,7 +676,7 @@ pub mod tests { } #[test] - fn test_decimal_dict() -> Result<()> { + fn test_decimal_dict() -> Result<()> { let values = Decimal128Array::from(vec![1, 0, 2, 5]); let keys = Int8Array::from_iter_values([0, 0, 1, 3]); let array1 = DictionaryArray::new(keys, Arc::new(values)); @@ -696,7 +696,7 @@ pub mod tests { } #[test] - fn test_decimal256_dict() -> Result<()> { + fn test_decimal256_dict() -> Result<()> { let values = Decimal256Array::from(vec![ i256::from_i128(1), i256::from_i128(0), @@ -725,7 +725,7 @@ pub mod tests { Ok(()) } - fn test_bytes_impl() -> Result<()> { + fn test_bytes_impl() -> Result<()> { let offsets = OffsetBuffer::from_lengths([3, 3, 1]); let a = GenericByteArray::::new(offsets, b"abcdefa".into(), None); let eq = make_eq_comparator(&a, &a, false)?; @@ -745,7 +745,7 @@ pub mod tests { } #[test] - fn test_lists() -> Result<()> { + fn test_lists() -> Result<()> { let mut a = ListBuilder::new(ListBuilder::new(Int32Builder::new())); a.extend([ Some(vec![Some(vec![Some(1), Some(2), None]), Some(vec![None])]), @@ -786,7 +786,7 @@ pub mod tests { } #[test] - fn test_struct() -> Result<()> { + fn test_struct() -> Result<()> { let fields = Fields::from(vec![ Field::new("a", DataType::Int32, true), Field::new_list("b", Field::new("item", DataType::Int32, true), true), diff --git a/native-engine/datafusion-ext-commons/src/arrow/selection.rs b/native-engine/datafusion-ext-commons/src/arrow/selection.rs index 8afff282e..0669323f9 100644 --- a/native-engine/datafusion-ext-commons/src/arrow/selection.rs +++ b/native-engine/datafusion-ext-commons/src/arrow/selection.rs @@ -248,7 +248,11 @@ pub fn create_array_interleaver( let interleaver = Interleave::new( values .iter() - .map(|v| downcast_any!(v, PrimitiveArray<$t>).expect("Excepted a PrimitiveArray").clone()) + .map(|v| { + downcast_any!(v, PrimitiveArray<$t>) + .expect("Excepted a PrimitiveArray") + .clone() + }) .collect::>(), ); let dt = $dt.clone(); diff --git a/native-engine/datafusion-ext-commons/src/io/batch_serde.rs b/native-engine/datafusion-ext-commons/src/io/batch_serde.rs index cec9652ae..1aa4536ee 100644 --- a/native-engine/datafusion-ext-commons/src/io/batch_serde.rs +++ b/native-engine/datafusion-ext-commons/src/io/batch_serde.rs @@ -431,8 +431,15 @@ fn write_map_array( let value_offsets = array.value_offsets(); write_offsets(output, value_offsets, transpose_opt)?; - let first_offset = value_offsets.first().cloned().expect("value_offsets is non-empty") as usize; - let entries_len = value_offsets.last().cloned().expect("value_offsets is non-empty") as usize - first_offset; + let first_offset = value_offsets + .first() + .cloned() + .expect("value_offsets is non-empty") as usize; + let entries_len = value_offsets + .last() + .cloned() + .expect("value_offsets is non-empty") as usize + - first_offset; let keys = array.keys().slice(first_offset, entries_len); let values = array.values().slice(first_offset, entries_len); @@ -612,8 +619,14 @@ fn write_bytes_array, W: Write>( let value_offsets = array.value_offsets(); write_offsets(output, value_offsets, transpose_opt)?; - let first_offset = value_offsets.first().cloned().expect("value_offsets is non-empty")as usize; - let last_offset = value_offsets.last().cloned().expect("value_offsets is non-empty") as usize; + let first_offset = value_offsets + .first() + .cloned() + .expect("value_offsets is non-empty") as usize; + let last_offset = value_offsets + .last() + .cloned() + .expect("value_offsets is non-empty") as usize; output.write_all(&array.value_data()[first_offset..last_offset])?; Ok(()) } @@ -683,15 +696,15 @@ mod test { ("str", array1, true), ("u64", array2, true), ("bool", array3, true), - ]) - ?; + ])?; // test read after write let mut buf = vec![]; write_batch(batch.num_rows(), batch.columns(), &mut buf)?; let mut cursor = Cursor::new(buf); - let (decoded_num_rows, decoded_cols) = - read_batch(&mut cursor, &batch.schema()).unwrap().unwrap(); + let (decoded_num_rows, decoded_cols) = read_batch(&mut cursor, &batch.schema()) + .expect("read_batch") + .expect("non-empty"); assert_eq!( recover_named_batch(decoded_num_rows, &decoded_cols, batch.schema())?, batch @@ -702,8 +715,9 @@ mod test { let mut buf = vec![]; write_batch(sliced.num_rows(), sliced.columns(), &mut buf)?; let mut cursor = Cursor::new(buf); - let (decoded_num_rows, decoded_cols) = - read_batch(&mut cursor, &batch.schema()).expect("read_batch").expect("non-empty"); + let (decoded_num_rows, decoded_cols) = read_batch(&mut cursor, &batch.schema()) + .expect("read_batch") + .expect("non-empty"); assert_eq!( recover_named_batch(decoded_num_rows, &decoded_cols, batch.schema())?, sliced @@ -724,8 +738,7 @@ mod test { let batch = RecordBatch::try_from_iter_with_nullable(vec![ ("list1", list_array.clone(), true), ("list2", list_array.clone(), true), - ]) - ?; + ])?; assert_batches_eq!( vec![ @@ -745,8 +758,9 @@ mod test { let mut buf = vec![]; write_batch(batch.num_rows(), batch.columns(), &mut buf)?; let mut cursor = Cursor::new(buf); - let (decoded_num_rows, decoded_cols) = - read_batch(&mut cursor, &batch.schema()).expect("read_batch").expect("non-empty"); + let (decoded_num_rows, decoded_cols) = read_batch(&mut cursor, &batch.schema()) + .expect("read_batch") + .expect("non-empty"); assert_batches_eq!( vec![ "+-----------+-----------+", @@ -758,7 +772,11 @@ mod test { "| [6, 7] | [6, 7] |", "+-----------+-----------+", ], - &[recover_named_batch(decoded_num_rows, &decoded_cols, batch.schema())?] + &[recover_named_batch( + decoded_num_rows, + &decoded_cols, + batch.schema() + )?] ); // test read after write sliced @@ -766,8 +784,9 @@ mod test { let mut buf = vec![]; write_batch(sliced.num_rows(), sliced.columns(), &mut buf)?; let mut cursor = Cursor::new(buf); - let (decoded_num_rows, decoded_cols) = - read_batch(&mut cursor, &batch.schema()).expect("read_batch").expect("non-empty"); + let (decoded_num_rows, decoded_cols) = read_batch(&mut cursor, &batch.schema()) + .expect("read_batch") + .expect("non-empty"); assert_batches_eq!( vec![ "+----------+----------+", @@ -777,43 +796,44 @@ mod test { "| [3, , 5] | [3, , 5] |", "+----------+----------+", ], - &[recover_named_batch(decoded_num_rows, &decoded_cols, sliced.schema())?] + &[recover_named_batch( + decoded_num_rows, + &decoded_cols, + sliced.schema() + )?] ); Ok(()) } #[test] fn test_write_and_read_batch_for_map() -> std::result::Result<(), Box> { - let map_array: ArrayRef = Arc::new( - MapArray::new_from_strings( - ["00", "11", "22", "33", "44", "55", "66", "77"].into_iter(), - &StringArray::from(vec![ - Some("aa"), - None, - Some("cc"), - Some("dd"), - Some("ee"), - Some("ff"), - Some("gg"), - Some("hh"), - ]), - &[0, 3, 6, 8], // [00,11,22], [33,44,55], [66,77] - ) - ?, - ); + let map_array: ArrayRef = Arc::new(MapArray::new_from_strings( + ["00", "11", "22", "33", "44", "55", "66", "77"].into_iter(), + &StringArray::from(vec![ + Some("aa"), + None, + Some("cc"), + Some("dd"), + Some("ee"), + Some("ff"), + Some("gg"), + Some("hh"), + ]), + &[0, 3, 6, 8], // [00,11,22], [33,44,55], [66,77] + )?); let batch = RecordBatch::try_from_iter_with_nullable(vec![ ("map1", map_array.clone(), true), ("map2", map_array.clone(), true), - ]) - ?; + ])?; // test read after write let mut buf = vec![]; write_batch(batch.num_rows(), batch.columns(), &mut buf)?; let mut cursor = Cursor::new(buf); - let (decoded_num_rows, decoded_cols) = - read_batch(&mut cursor, &batch.schema()).expect("read_batch").expect("non-empty"); + let (decoded_num_rows, decoded_cols) = read_batch(&mut cursor, &batch.schema()) + .expect("read_batch") + .expect("non-empty"); assert_eq!( recover_named_batch(decoded_num_rows, &decoded_cols, batch.schema())?, batch @@ -824,8 +844,9 @@ mod test { let mut buf = vec![]; write_batch(sliced.num_rows(), sliced.columns(), &mut buf)?; let mut cursor = Cursor::new(buf); - let (decoded_num_rows, decoded_cols) = - read_batch(&mut cursor, &batch.schema()).expect("read_batch").expect("non-empty"); + let (decoded_num_rows, decoded_cols) = read_batch(&mut cursor, &batch.schema()) + .expect("read_batch") + .expect("non-empty"); assert_eq!( recover_named_batch(decoded_num_rows, &decoded_cols, sliced.schema())?, sliced @@ -834,27 +855,31 @@ mod test { } #[test] - fn test_write_and_read_batch_for_struct() -> std::result::Result<(), Box> { + fn test_write_and_read_batch_for_struct() -> std::result::Result<(), Box> + { let c1: ArrayRef = Arc::new(BooleanArray::from(vec![false, false, true, true])); let c2: ArrayRef = Arc::new(Int32Array::from(vec![42, 28, 19, 31])); let c3: ArrayRef = Arc::new(BooleanArray::from(vec![None, None, None, Some(true)])); let c4: ArrayRef = Arc::new(Int32Array::from(vec![None, None, None, Some(31)])); - let struct_array: ArrayRef = Arc::new( - StructArray::try_from(vec![("c1", c1), ("c2", c2), ("c3", c3), ("c4", c4)])?, - ); + let struct_array: ArrayRef = Arc::new(StructArray::try_from(vec![ + ("c1", c1), + ("c2", c2), + ("c3", c3), + ("c4", c4), + ])?); let batch = RecordBatch::try_from_iter_with_nullable(vec![ ("struct1", struct_array.clone(), true), ("struct2", struct_array.clone(), true), - ]) - ?; + ])?; // test read after write let mut buf = vec![]; write_batch(batch.num_rows(), batch.columns(), &mut buf)?; let mut cursor = Cursor::new(buf); - let (decoded_num_rows, decoded_cols) = - read_batch(&mut cursor, &batch.schema()).expect("read_batch").expect("non-empty"); + let (decoded_num_rows, decoded_cols) = read_batch(&mut cursor, &batch.schema()) + .expect("read_batch") + .expect("non-empty"); assert_eq!( recover_named_batch(decoded_num_rows, &decoded_cols, batch.schema())?, batch @@ -865,8 +890,9 @@ mod test { let mut buf = vec![]; write_batch(sliced.num_rows(), sliced.columns(), &mut buf)?; let mut cursor = Cursor::new(buf); - let (decoded_num_rows, decoded_cols) = - read_batch(&mut cursor, &batch.schema()).expect("read_batch").expect("non-empty"); + let (decoded_num_rows, decoded_cols) = read_batch(&mut cursor, &batch.schema()) + .expect("read_batch") + .expect("non-empty"); assert_eq!( recover_named_batch(decoded_num_rows, &decoded_cols, batch.schema())?, sliced diff --git a/native-engine/datafusion-ext-commons/src/io/ipc_compression.rs b/native-engine/datafusion-ext-commons/src/io/ipc_compression.rs index eff7669d2..5d643af41 100644 --- a/native-engine/datafusion-ext-commons/src/io/ipc_compression.rs +++ b/native-engine/datafusion-ext-commons/src/io/ipc_compression.rs @@ -341,10 +341,16 @@ mod tests { writer.finish_current_buf()?; let mut reader = IpcCompressionReader::new(Cursor::new(buf)); - let (num_rows1, arrays1) = reader.read_batch(&schema).expect("read_batch").expect("non-empty"); + let (num_rows1, arrays1) = reader + .read_batch(&schema) + .expect("read_batch") + .expect("non-empty"); assert_eq!(num_rows1, 2); assert_eq!(arrays1, &[test_array1]); - let (num_rows2, arrays2) = reader.read_batch(&schema).expect("read_batch").expect("non-empty"); + let (num_rows2, arrays2) = reader + .read_batch(&schema) + .expect("read_batch") + .expect("non-empty"); assert_eq!(num_rows2, 2); assert_eq!(arrays2, &[test_array2]); assert!(reader.read_batch(&schema)?.is_none()); diff --git a/native-engine/datafusion-ext-commons/src/spark_hash.rs b/native-engine/datafusion-ext-commons/src/spark_hash.rs index 43aff709f..c8198813b 100644 --- a/native-engine/datafusion-ext-commons/src/spark_hash.rs +++ b/native-engine/datafusion-ext-commons/src/spark_hash.rs @@ -102,7 +102,10 @@ fn hash_array( macro_rules! hash_array_decimal { ($array_type:ident, $column:ident, $hashes:ident, $h:expr) => { - let array = $column.as_any().downcast_ref::<$array_type>().expect("downcast"); + let array = $column + .as_any() + .downcast_ref::<$array_type>() + .expect("downcast"); if array.null_count() == 0 { for (i, hash) in $hashes.iter_mut().enumerate() { @@ -121,7 +124,9 @@ fn hash_array( match array.data_type() { DataType::Null => {} DataType::Boolean => { - let array = array.as_any().downcast_ref::() + let array = array + .as_any() + .downcast_ref::() .expect("Expected a BooleanArray"); if array.null_count() == 0 { for (i, hash) in hashes_buffer.iter_mut().enumerate() { @@ -218,7 +223,9 @@ fn create_hashes_dictionary( hashes_buffer: &mut [T], h: impl Fn(&[u8], T) -> T + Copy, ) { - let dict_array = array.as_any().downcast_ref::>() + let dict_array = array + .as_any() + .downcast_ref::>() .expect("Expected a DictionaryArray"); // Hash each dictionary value once, and then use that computed @@ -240,7 +247,10 @@ fn hash_one( ) { macro_rules! hash_one_primitive { ($array_type:ident, $column:ident, $ty:ident, $hash:ident, $idx:ident, $h:expr) => { - let array = $column.as_any().downcast_ref::<$array_type>().expect("downcast"); + let array = $column + .as_any() + .downcast_ref::<$array_type>() + .expect("downcast"); *$hash = $h( (array.value($idx as usize) as $ty).to_le_bytes().as_ref(), *$hash, @@ -250,14 +260,20 @@ fn hash_one( macro_rules! hash_one_binary { ($array_type:ident, $column:ident, $hash:ident, $idx:ident, $h:expr) => { - let array = $column.as_any().downcast_ref::<$array_type>().expect("downcast"); + let array = $column + .as_any() + .downcast_ref::<$array_type>() + .expect("downcast"); *$hash = $h(&array.value($idx as usize).as_ref(), *$hash); }; } macro_rules! hash_one_decimal { ($array_type:ident, $column:ident, $hash:ident, $idx:ident, $h:expr) => { - let array = $column.as_any().downcast_ref::<$array_type>().expect("downcast"); + let array = $column + .as_any() + .downcast_ref::<$array_type>() + .expect("downcast"); *$hash = $h(array.value($idx as usize).to_le_bytes().as_ref(), *$hash); }; } @@ -266,7 +282,9 @@ fn hash_one( match col.data_type() { DataType::Null => {} DataType::Boolean => { - let array = col.as_any().downcast_ref::() + let array = col + .as_any() + .downcast_ref::() .expect("Expected a BooleanArray"); *hash = h( (if array.value(idx) { 1u32 } else { 0u32 }) @@ -327,7 +345,9 @@ fn hash_one( hash_one_decimal!(Decimal128Array, col, hash, idx, h); } DataType::List(..) => { - let list_array = col.as_any().downcast_ref::() + let list_array = col + .as_any() + .downcast_ref::() .expect("Expected a ListArray"); let value_array = list_array.value(idx); for i in 0..value_array.len() { @@ -335,7 +355,9 @@ fn hash_one( } } DataType::Map(..) => { - let map_array = col.as_any().downcast_ref::() + let map_array = col + .as_any() + .downcast_ref::() .expect("Expected a MapArray"); let kv_array = map_array.value(idx); let key_array = kv_array.column(0); @@ -346,7 +368,9 @@ fn hash_one( } } DataType::Struct(_) => { - let struct_array = col.as_any().downcast_ref::() + let struct_array = col + .as_any() + .downcast_ref::() .expect("Expected a StructArray"); for col in struct_array.columns() { hash_one(col, idx, hash, h); @@ -495,8 +519,7 @@ mod tests { .add_buffer(Buffer::from_slice_ref( &[1i32, 2, 3, 4, 5, 6].to_byte_slice(), )) - .build() - ?; + .build()?; // Create offset array to define list boundaries: [[1, 2], [3, 4, 5], [6]] let list_data_type = DataType::new_list(DataType::Int32, false); @@ -504,8 +527,7 @@ mod tests { .len(3) .add_buffer(Buffer::from_slice_ref(&[0i32, 2, 5, 6].to_byte_slice())) .add_child_data(value_data) - .build() - ?; + .build()?; let list_array = ListArray::from(list_data); let array_ref = Arc::new(list_array) as ArrayRef; @@ -524,16 +546,14 @@ mod tests { .add_buffer(Buffer::from_slice_ref( &[0, 1, 2, 3, 4, 5, 6, 7].to_byte_slice(), )) - .build() - ?; + .build()?; let value_data = ArrayData::builder(DataType::UInt32) .len(8) .add_buffer(Buffer::from_slice_ref( &[0u32, 10, 20, 0, 40, 0, 60, 70].to_byte_slice(), )) .null_bit_buffer(Some(Buffer::from(&[0b11010110]))) - .build() - ?; + .build()?; // Construct a buffer for value offsets, for the nested array: // [[0, 1, 2], [3, 4, 5], [6, 7]] @@ -559,8 +579,7 @@ mod tests { .len(3) .add_buffer(entry_offsets) .add_child_data(entry_struct.into_data()) - .build() - ?; + .build()?; let map_array = MapArray::from(map_data); assert_eq!(&value_data, &map_array.values().to_data()); @@ -599,8 +618,7 @@ mod tests { .offset(1) .add_buffer(map_array.to_data().buffers()[0].clone()) .add_child_data(map_array.to_data().child_data()[0].clone()) - .build() - ?; + .build()?; let map_array = MapArray::from(map_data); assert_eq!(&value_data, &map_array.values().to_data()); diff --git a/native-engine/datafusion-ext-exprs/src/bloom_filter_might_contain.rs b/native-engine/datafusion-ext-exprs/src/bloom_filter_might_contain.rs index f8c37f2c5..652551eab 100644 --- a/native-engine/datafusion-ext-exprs/src/bloom_filter_might_contain.rs +++ b/native-engine/datafusion-ext-exprs/src/bloom_filter_might_contain.rs @@ -137,7 +137,10 @@ impl PhysicalExpr for BloomFilterMightContainExpr { if bloom_filter.is_none() { return Ok(ColumnarValue::Scalar(ScalarValue::from(false))); } - let bloom_filter = bloom_filter.as_ref().as_ref().expect("non-null bloom_filter"); + let bloom_filter = bloom_filter + .as_ref() + .as_ref() + .expect("non-null bloom_filter"); // process with bloom filter let value = self.value_expr.evaluate(batch)?; diff --git a/native-engine/datafusion-ext-exprs/src/cast.rs b/native-engine/datafusion-ext-exprs/src/cast.rs index c257b2f72..fcf8d6416 100644 --- a/native-engine/datafusion-ext-exprs/src/cast.rs +++ b/native-engine/datafusion-ext-exprs/src/cast.rs @@ -102,7 +102,7 @@ impl PhysicalExpr for TryCastExpr { #[cfg(test)] mod test { - + use std::sync::Arc; use arrow::{ @@ -115,7 +115,7 @@ mod test { use crate::cast::TryCastExpr; #[test] - fn test_ok_1() -> Result<(), Box> { + fn test_ok_1() -> Result<(), Box> { // input: Array // cast Float32 into Int32 let float_arr: ArrayRef = Arc::new(Float32Array::from(vec![ @@ -146,8 +146,7 @@ mod test { let ret = expr .evaluate(&batch) .expect("Error evaluating expr") - .into_array(batch.num_rows()) - ?; + .into_array(batch.num_rows())?; let expected: ArrayRef = Arc::new(Int32Array::from(vec![ Some(7), @@ -162,7 +161,7 @@ mod test { } #[test] - fn test_ok_2() -> Result<(), Box> { + fn test_ok_2() -> Result<(), Box> { // input: Array // cast Utf8 into Float32 let string_arr: ArrayRef = Arc::new(StringArray::from(vec![ @@ -188,8 +187,7 @@ mod test { let ret = expr .evaluate(&batch) .expect("Error evaluating expr") - .into_array(batch.num_rows()) - ?; + .into_array(batch.num_rows())?; let expected: ArrayRef = Arc::new(Float32Array::from(vec![ Some(123.0), @@ -203,7 +201,7 @@ mod test { } #[test] - fn test_ok_3() -> Result<(), Box> { + fn test_ok_3() -> Result<(), Box> { // input: Scalar // cast Utf8 into Float32 let string_arr: ArrayRef = Arc::new(StringArray::from(vec![ @@ -226,8 +224,7 @@ mod test { let ret = expr .evaluate(&batch) .expect("Error evaluating expr") - .into_array(batch.num_rows()) - ?; + .into_array(batch.num_rows())?; let expected: ArrayRef = Arc::new(Float32Array::from(vec![ Some(123.4), diff --git a/native-engine/datafusion-ext-exprs/src/get_map_value.rs b/native-engine/datafusion-ext-exprs/src/get_map_value.rs index 564835981..e2a968e0d 100644 --- a/native-engine/datafusion-ext-exprs/src/get_map_value.rs +++ b/native-engine/datafusion-ext-exprs/src/get_map_value.rs @@ -180,16 +180,14 @@ mod test { .add_buffer(Buffer::from_slice_ref( &[0, 1, 2, 3, 4, 5, 6, 7].to_byte_slice(), )) - .build() - ?; + .build()?; let value_data = ArrayData::builder(DataType::UInt32) .len(8) .add_buffer(Buffer::from_slice_ref( &[0u32, 10, 20, 0, 40, 0, 60, 70].to_byte_slice(), )) .null_bit_buffer(Some(Buffer::from_slice_ref(&[0b11010110]))) - .build() - ?; + .build()?; let entry_offsets = Buffer::from_slice_ref(&[0, 3, 6, 8].to_byte_slice()); @@ -214,8 +212,7 @@ mod test { .len(3) .add_buffer(entry_offsets) .add_child_data(entry_struct.into_data()) - .build() - ?; + .build()?; let map_array: ArrayRef = Arc::new(MapArray::from(map_data)); let input_batch = RecordBatch::try_from_iter_with_nullable(vec![("test col", map_array, true)])?; @@ -262,10 +259,11 @@ mod test { // [[a, b, c], [d, e, f], [g, h]] let entry_offsets = [0, 3, 6, 8]; - let map_array: ArrayRef = Arc::new( - MapArray::new_from_strings(keys.clone().into_iter(), &values_data, &entry_offsets) - ?, - ); + let map_array: ArrayRef = Arc::new(MapArray::new_from_strings( + keys.clone().into_iter(), + &values_data, + &entry_offsets, + )?); let input_batch = RecordBatch::try_from_iter_with_nullable(vec![("test col", map_array, true)])?; let get_indexed = Arc::new(GetMapValueExpr::new( diff --git a/native-engine/datafusion-ext-exprs/src/lib.rs b/native-engine/datafusion-ext-exprs/src/lib.rs index 4c3a790c6..3a685a41f 100644 --- a/native-engine/datafusion-ext-exprs/src/lib.rs +++ b/native-engine/datafusion-ext-exprs/src/lib.rs @@ -31,7 +31,9 @@ pub mod string_starts_with; fn down_cast_any_ref(any: &dyn Any) -> &dyn Any { if any.is::() { - any.downcast_ref::().expect("Expected a PhysicalExpr").as_any() + any.downcast_ref::() + .expect("Expected a PhysicalExpr") + .as_any() } else if any.is::>() { any.downcast_ref::>() .expect("Expected a PhysicalExpr") diff --git a/native-engine/datafusion-ext-exprs/src/string_contains.rs b/native-engine/datafusion-ext-exprs/src/string_contains.rs index 2b5127804..21e6995bf 100644 --- a/native-engine/datafusion-ext-exprs/src/string_contains.rs +++ b/native-engine/datafusion-ext-exprs/src/string_contains.rs @@ -83,7 +83,10 @@ impl PhysicalExpr for StringContainsExpr { match expr { ColumnarValue::Array(array) => { - let string_array = array.as_any().downcast_ref::().expect("Expected a StringArray"); + let string_array = array + .as_any() + .downcast_ref::() + .expect("Expected a StringArray"); let ret_array = Arc::new(BooleanArray::from_iter(string_array.iter().map( |maybe_string| maybe_string.map(|string| string.contains(&self.infix)), @@ -154,8 +157,7 @@ mod test { let ret = expr .evaluate(&batch) .expect("Error evaluating expr") - .into_array(batch.num_rows()) - ?; + .into_array(batch.num_rows())?; // verify result let expected: ArrayRef = Arc::new(BooleanArray::from(vec![ @@ -193,8 +195,7 @@ mod test { let ret = expr .evaluate(&batch) .expect("Error evaluating expr") - .into_array(batch.num_rows()) - ?; + .into_array(batch.num_rows())?; // verify result let expected: ArrayRef = Arc::new(BooleanArray::from(vec![ diff --git a/native-engine/datafusion-ext-exprs/src/string_ends_with.rs b/native-engine/datafusion-ext-exprs/src/string_ends_with.rs index 9b65d1054..a4ec448ae 100644 --- a/native-engine/datafusion-ext-exprs/src/string_ends_with.rs +++ b/native-engine/datafusion-ext-exprs/src/string_ends_with.rs @@ -83,7 +83,10 @@ impl PhysicalExpr for StringEndsWithExpr { match expr { ColumnarValue::Array(array) => { - let string_array = array.as_any().downcast_ref::().expect("Expected a StringArray"); + let string_array = array + .as_any() + .downcast_ref::() + .expect("Expected a StringArray"); let ret_array = Arc::new(BooleanArray::from_iter(string_array.iter().map( |maybe_string| maybe_string.map(|string| string.ends_with(&self.suffix)), ))); @@ -155,8 +158,7 @@ mod test { let ret = expr .evaluate(&batch) .expect("Error evaluating expr") - .into_array(batch.num_rows()) - ?; + .into_array(batch.num_rows())?; // verify result let expected: ArrayRef = Arc::new(BooleanArray::from(vec![ @@ -197,8 +199,7 @@ mod test { let ret = expr .evaluate(&batch) .expect("Error evaluating expr") - .into_array(batch.num_rows()) - ?; + .into_array(batch.num_rows())?; // verify result let expected: ArrayRef = Arc::new(BooleanArray::from(vec![ diff --git a/native-engine/datafusion-ext-exprs/src/string_starts_with.rs b/native-engine/datafusion-ext-exprs/src/string_starts_with.rs index 62de41b01..4be5216c4 100644 --- a/native-engine/datafusion-ext-exprs/src/string_starts_with.rs +++ b/native-engine/datafusion-ext-exprs/src/string_starts_with.rs @@ -83,7 +83,10 @@ impl PhysicalExpr for StringStartsWithExpr { match expr { ColumnarValue::Array(array) => { - let string_array = array.as_any().downcast_ref::().expect("Expected a StringArray"); + let string_array = array + .as_any() + .downcast_ref::() + .expect("Expected a StringArray"); let ret_array = Arc::new(BooleanArray::from_iter(string_array.iter().map( |maybe_string| maybe_string.map(|string| string.starts_with(&self.prefix)), ))); @@ -118,7 +121,7 @@ impl PhysicalExpr for StringStartsWithExpr { #[cfg(test)] mod test { - + use std::sync::Arc; use arrow::{ @@ -152,8 +155,7 @@ mod test { let ret = expr .evaluate(&batch) .expect("Error evaluating expr") - .into_array(batch.num_rows()) - ?; + .into_array(batch.num_rows())?; let expected: ArrayRef = Arc::new(BooleanArray::from(vec![ None, @@ -186,8 +188,7 @@ mod test { let ret = expr .evaluate(&batch) .expect("Error evaluating expr") - .into_array(batch.num_rows()) - ?; + .into_array(batch.num_rows())?; let expected: ArrayRef = Arc::new(BooleanArray::from(vec![ Some(true), diff --git a/native-engine/datafusion-ext-functions/src/spark_check_overflow.rs b/native-engine/datafusion-ext-functions/src/spark_check_overflow.rs index c97676e28..5a67a8444 100644 --- a/native-engine/datafusion-ext-functions/src/spark_check_overflow.rs +++ b/native-engine/datafusion-ext-functions/src/spark_check_overflow.rs @@ -55,7 +55,10 @@ pub fn spark_check_overflow(args: &[ColumnarValue]) -> Result { _ => ColumnarValue::Scalar(ScalarValue::Decimal128(None, to_precision, to_scale)), }, ColumnarValue::Array(array) => { - let array = array.as_any().downcast_ref::().expect("Expected a Decimal128Array"); + let array = array + .as_any() + .downcast_ref::() + .expect("Expected a Decimal128Array"); let mut output = Decimal128Builder::with_capacity(array.len()); for v in array.into_iter() { diff --git a/native-engine/datafusion-ext-functions/src/spark_dates.rs b/native-engine/datafusion-ext-functions/src/spark_dates.rs index 5037b8326..3ea5e23f6 100644 --- a/native-engine/datafusion-ext-functions/src/spark_dates.rs +++ b/native-engine/datafusion-ext-functions/src/spark_dates.rs @@ -221,10 +221,7 @@ mod tests { Some(1975), None, ])); - assert_eq!( - &spark_year(&args)?.into_array(1)?, - &expected_ret - ); + assert_eq!(&spark_year(&args)?.into_array(1)?, &expected_ret); Ok(()) } @@ -234,15 +231,12 @@ mod tests { let args = vec![ColumnarValue::Array(input)]; let expected_ret: ArrayRef = Arc::new(Int32Array::from(vec![Some(1), Some(2), Some(3), None])); - assert_eq!( - &spark_month(&args)?.into_array(1)?, - &expected_ret - ); + assert_eq!(&spark_month(&args)?.into_array(1)?, &expected_ret); Ok(()) } #[test] - fn test_spark_day() -> Result<()> { + fn test_spark_day() -> Result<()> { let input = Arc::new(Date32Array::from(vec![ Some(0), Some(10), @@ -260,15 +254,12 @@ mod tests { Some(10), None, ])); - assert_eq!( - &spark_day(&args)?.into_array(1)?, - &expected_ret - ); + assert_eq!(&spark_day(&args)?.into_array(1)?, &expected_ret); Ok(()) } #[test] - fn test_spark_quarter_basic() -> Result<()> { + fn test_spark_quarter_basic() -> Result<()> { // Date32 days relative to 1970-01-01: // 0 -> 1970-01-01 (Q1) // 40 -> ~1970-02-10 (Q1) diff --git a/native-engine/datafusion-ext-functions/src/spark_get_json_object.rs b/native-engine/datafusion-ext-functions/src/spark_get_json_object.rs index 8b2e2a1a6..c9d9b6bb8 100644 --- a/native-engine/datafusion-ext-functions/src/spark_get_json_object.rs +++ b/native-engine/datafusion-ext-functions/src/spark_get_json_object.rs @@ -121,7 +121,10 @@ pub fn spark_parse_json(args: &[ColumnarValue]) -> Result { pub fn spark_get_parsed_json_object(args: &[ColumnarValue]) -> Result { let json_array = match &args[0] { - ColumnarValue::Array(array) => array.as_any().downcast_ref::().expect("Expected a UserDefinedArray"), + ColumnarValue::Array(array) => array + .as_any() + .downcast_ref::() + .expect("Expected a UserDefinedArray"), ColumnarValue::Scalar(_) => unreachable!(), }; @@ -153,7 +156,9 @@ pub fn spark_get_parsed_json_object(args: &[ColumnarValue]) -> Result Option> { - let json_value = value.downcast_ref::().expect("Expected a ParsedJsonValue"); + let json_value = value + .downcast_ref::() + .expect("Expected a ParsedJsonValue"); match json_value { ParsedJsonValue::SerdeJson(v) => evaluator .evaluate_with_value_serde_json(v) @@ -186,7 +191,9 @@ pub fn spark_get_parsed_json_simple_field( let output = StringArray::from_iter(json_array.iter().map(|value| { value.as_ref().and_then(|value| { - let json_value = value.downcast_ref::().expect("Expected a ParsedJsonValue"); + let json_value = value + .downcast_ref::() + .expect("Expected a ParsedJsonValue"); match json_value { ParsedJsonValue::SerdeJson(v) => v .as_object() @@ -214,7 +221,9 @@ fn parse_fallback(json_path: &str, json_array: &UserDefinedArray) -> Result Option<&str> { - let json_value = value.downcast_ref::().expect("Expected a ParsedJsonValue"); + let json_value = value + .downcast_ref::() + .expect("Expected a ParsedJsonValue"); if let ParsedJsonValue::Fallback(json) = json_value { return Some(json.as_ref()); } @@ -555,7 +564,9 @@ impl HiveGetJsonObjectMatcher { .flat_map(|r| { // keep consistent with hive UDFJson let iter: Box> = match r { - v if v.is_array() => Box::new(v.into_array().expect("array").into_iter()), + v if v.is_array() => { + Box::new(v.into_array().expect("array").into_iter()) + } other => Box::new(std::iter::once(other)), }; iter @@ -623,109 +634,73 @@ mod test { let path = "$.owner"; assert_eq!( - HiveGetJsonObjectEvaluator::try_new(path) - ? - .evaluate(input) - ?, + HiveGetJsonObjectEvaluator::try_new(path)?.evaluate(input)?, Some("amy".to_owned()) ); let path = "$. owner"; assert_eq!( - HiveGetJsonObjectEvaluator::try_new(path) - ? - .evaluate(input) - ?, + HiveGetJsonObjectEvaluator::try_new(path)?.evaluate(input)?, Some("amy".to_owned()) ); let path = "$.store.bicycle.price"; assert_eq!( - HiveGetJsonObjectEvaluator::try_new(path) - ? - .evaluate(input) - ?, + HiveGetJsonObjectEvaluator::try_new(path)?.evaluate(input)?, Some("19.95".to_owned()) ); let path = "$. store. bicycle. price"; assert_eq!( - HiveGetJsonObjectEvaluator::try_new(path) - ? - .evaluate(input) - ?, + HiveGetJsonObjectEvaluator::try_new(path)?.evaluate(input)?, Some("19.95".to_owned()) ); let path = "$.store.fruit[0]"; assert_eq!( - HiveGetJsonObjectEvaluator::try_new(path) - ? - .evaluate(input) - ?, + HiveGetJsonObjectEvaluator::try_new(path)?.evaluate(input)?, Some(r#"{"weight":8,"type":"apple"}"#.to_owned()) ); let path = "$. store. fruit[0]"; assert_eq!( - HiveGetJsonObjectEvaluator::try_new(path) - ? - .evaluate(input) - ?, + HiveGetJsonObjectEvaluator::try_new(path)?.evaluate(input)?, Some(r#"{"weight":8,"type":"apple"}"#.to_owned()) ); let path = "$.store.fruit[1].weight"; assert_eq!( - HiveGetJsonObjectEvaluator::try_new(path) - ? - .evaluate(input) - ?, + HiveGetJsonObjectEvaluator::try_new(path)?.evaluate(input)?, Some("9".to_owned()) ); let path = "$.store.fruit[*]"; assert_eq!( - HiveGetJsonObjectEvaluator::try_new(path) - ? - .evaluate(input) - ?, + HiveGetJsonObjectEvaluator::try_new(path)?.evaluate(input)?, Some(r#"[{"weight":8,"type":"apple"},{"weight":9,"type":"pear"}]"#.to_owned()) ); let path = "$. store. fruit[*]"; assert_eq!( - HiveGetJsonObjectEvaluator::try_new(path) - ? - .evaluate(input) - ?, + HiveGetJsonObjectEvaluator::try_new(path)?.evaluate(input)?, Some(r#"[{"weight":8,"type":"apple"},{"weight":9,"type":"pear"}]"#.to_owned()) ); let path = "$.store.fruit.[1].type"; assert_eq!( - HiveGetJsonObjectEvaluator::try_new(path) - ? - .evaluate(input) - ?, + HiveGetJsonObjectEvaluator::try_new(path)?.evaluate(input)?, Some("pear".to_owned()) ); let path = "$. store. fruit. [1]. type"; assert_eq!( - HiveGetJsonObjectEvaluator::try_new(path) - ? - .evaluate(input) - ?, + HiveGetJsonObjectEvaluator::try_new(path)?.evaluate(input)?, Some("pear".to_owned()) ); let path = "$.non_exist_key"; assert_eq!( - HiveGetJsonObjectEvaluator::try_new(path) - ? - .evaluate(input) - ?, + HiveGetJsonObjectEvaluator::try_new(path)?.evaluate(input)?, None ); Ok(()) diff --git a/native-engine/datafusion-ext-functions/src/spark_make_decimal.rs b/native-engine/datafusion-ext-functions/src/spark_make_decimal.rs index 93de8d83f..ad22e6d0a 100644 --- a/native-engine/datafusion-ext-functions/src/spark_make_decimal.rs +++ b/native-engine/datafusion-ext-functions/src/spark_make_decimal.rs @@ -45,7 +45,10 @@ pub fn spark_make_decimal(args: &[ColumnarValue]) -> Result { _ => ColumnarValue::Scalar(ScalarValue::Decimal128(None, precision, scale)), }, ColumnarValue::Array(array) => { - let array = array.as_any().downcast_ref::().expect("Expected a Int64Array"); + let array = array + .as_any() + .downcast_ref::() + .expect("Expected a Int64Array"); let mut output = Decimal128Builder::with_capacity(array.len()); for v in array.into_iter() { diff --git a/native-engine/datafusion-ext-functions/src/spark_null_if.rs b/native-engine/datafusion-ext-functions/src/spark_null_if.rs index 6270efc61..79cf375cd 100644 --- a/native-engine/datafusion-ext-functions/src/spark_null_if.rs +++ b/native-engine/datafusion-ext-functions/src/spark_null_if.rs @@ -159,9 +159,8 @@ mod test { ))])? .into_array(1)?; - let expected = Decimal128Array::from(vec![Some(1230427389124691)]) - .with_precision_and_scale(20, 2) - ?; + let expected = + Decimal128Array::from(vec![Some(1230427389124691)]).with_precision_and_scale(20, 2)?; let expected: ArrayRef = Arc::new(expected); assert_eq!(&result, &expected); diff --git a/native-engine/datafusion-ext-functions/src/spark_unscaled_value.rs b/native-engine/datafusion-ext-functions/src/spark_unscaled_value.rs index 926d51923..30a4b8582 100644 --- a/native-engine/datafusion-ext-functions/src/spark_unscaled_value.rs +++ b/native-engine/datafusion-ext-functions/src/spark_unscaled_value.rs @@ -31,7 +31,10 @@ pub fn spark_unscaled_value(args: &[ColumnarValue]) -> Result { _ => ColumnarValue::Scalar(ScalarValue::Int64(None)), }, ColumnarValue::Array(array) => { - let array = array.as_any().downcast_ref::().expect("Expected a Decimal128Array"); + let array = array + .as_any() + .downcast_ref::() + .expect("Expected a Decimal128Array"); let mut output = Int64Builder::new(); for v in array.into_iter() { diff --git a/native-engine/datafusion-ext-plans/src/agg/agg_ctx.rs b/native-engine/datafusion-ext-plans/src/agg/agg_ctx.rs index 9d15f3692..8c3ccdb9b 100644 --- a/native-engine/datafusion-ext-plans/src/agg/agg_ctx.rs +++ b/native-engine/datafusion-ext-plans/src/agg/agg_ctx.rs @@ -277,7 +277,8 @@ impl AggContext { let mut merging_acc_table = self.create_acc_table(0); if self.need_partial_merge { - let partial_merged_array = as_binary_array(batch.columns().last().expect("last column"))?; + let partial_merged_array = + as_binary_array(batch.columns().last().expect("last column"))?; let array = partial_merged_array .iter() .skip(batch_start_idx) diff --git a/native-engine/datafusion-ext-plans/src/agg/collect.rs b/native-engine/datafusion-ext-plans/src/agg/collect.rs index bce7264cb..e03725196 100644 --- a/native-engine/datafusion-ext-plans/src/agg/collect.rs +++ b/native-engine/datafusion-ext-plans/src/agg/collect.rs @@ -651,7 +651,7 @@ fn acc_hash(value: impl AsRef<[u8]>) -> u64 { mod tests { use arrow::datatypes::DataType; use auron_memmgr::spill::Spill; - use datafusion::common::{DataFusionError, ScalarValue}; + use datafusion::common::ScalarValue; use super::*; @@ -730,8 +730,7 @@ mod tests { spill_writer.finish()?; let mut acc_col_unspill = AccSetColumn::empty(DataType::Int32); - acc_col_unspill - .unspill(3, &mut spill.get_compressed_reader())?; + acc_col_unspill.unspill(3, &mut spill.get_compressed_reader())?; assert_eq!(acc_col.take_values(0), acc_col_unspill.take_values(0)); assert_eq!(acc_col.take_values(1), acc_col_unspill.take_values(1)); diff --git a/native-engine/datafusion-ext-plans/src/agg/spark_udaf_wrapper.rs b/native-engine/datafusion-ext-plans/src/agg/spark_udaf_wrapper.rs index 59f8540b0..cc747d79c 100644 --- a/native-engine/datafusion-ext-plans/src/agg/spark_udaf_wrapper.rs +++ b/native-engine/datafusion-ext-plans/src/agg/spark_udaf_wrapper.rs @@ -231,7 +231,7 @@ impl Agg for SparkUDAFWrapper { let rows = jni_call!(SparkUDAFWrapperContext(jcontext.as_obj()).initialize( num_rows as i32, )-> JObject) - .expect("num_rows"); + .expect("num_rows"); let jcontext = self.jcontext().expect("jcontext"); let obj = jni_new_global_ref!(rows.as_obj()).expect("rows"); diff --git a/native-engine/datafusion-ext-plans/src/agg_exec.rs b/native-engine/datafusion-ext-plans/src/agg_exec.rs index c99ad2623..23ceacbf9 100644 --- a/native-engine/datafusion-ext-plans/src/agg_exec.rs +++ b/native-engine/datafusion-ext-plans/src/agg_exec.rs @@ -418,7 +418,6 @@ mod test { datatypes::{DataType, Field, Schema}, record_batch::RecordBatch, }; - use auron_memmgr::MemManager; use datafusion::{ assert_batches_sorted_eq, diff --git a/native-engine/datafusion-ext-plans/src/common/row_null_checker.rs b/native-engine/datafusion-ext-plans/src/common/row_null_checker.rs index 1e34f34ee..73e04353e 100644 --- a/native-engine/datafusion-ext-plans/src/common/row_null_checker.rs +++ b/native-engine/datafusion-ext-plans/src/common/row_null_checker.rs @@ -75,9 +75,10 @@ impl RowNullChecker { encoded_length: 0, }, DataType::Boolean => FieldConfig::new_boolean(sort_options), - dt if dt.is_primitive() => { - FieldConfig::new_primitive(sort_options, 1 + dt.primitive_width().expect("primitive_width")) - } + dt if dt.is_primitive() => FieldConfig::new_primitive( + sort_options, + 1 + dt.primitive_width().expect("primitive_width"), + ), // DataType::Int8 => FieldConfig::new_primitive(sort_options, 2), // 1 byte null flag + // // 1 byte value // DataType::Int16 => FieldConfig::new_primitive(sort_options, 3), /* 1 byte null flag + @@ -445,8 +446,7 @@ impl FieldConfig { #[cfg(test)] mod tests { - use std::error::Error; - use std::sync::Arc; + use std::{error::Error, sync::Arc}; use arrow::{ array::{ArrayRef, BooleanArray, Int32Array, RecordBatch, StringArray}, @@ -692,7 +692,7 @@ mod tests { } #[test] - fn test_has_nulls_with_rows() { + fn test_has_nulls_with_rows() -> Result<(), Box> { use arrow::{array::ArrayRef, row::RowConverter}; // Create a schema @@ -747,10 +747,11 @@ mod tests { assert_eq!(null_buffer.is_valid(1), false); // Has null in name assert_eq!(null_buffer.is_valid(2), false); // Has null in id assert_eq!(null_buffer.is_valid(3), true); // No nulls + Ok(()) } #[test] - fn test_has_nulls_empty_rows() { + fn test_has_nulls_empty_rows() -> Result<(), Box> { // Test with empty rows let field_configs = vec![(DataType::Int32, SortOptions::default())]; let checker = RowNullChecker::new(&field_configs); @@ -776,10 +777,11 @@ mod tests { let null_buffer = checker.has_nulls(&rows); assert_eq!(null_buffer.len(), 0); + Ok(()) } #[test] - fn test_has_nulls_all_nulls() { + fn test_has_nulls_all_nulls() -> Result<(), Box> { // Test with all rows containing nulls let schema = Arc::new(Schema::new(vec![Field::new("id", DataType::Int32, true)])); @@ -812,10 +814,11 @@ mod tests { for i in 0..3 { assert_eq!(null_buffer.is_valid(i), false); } + Ok(()) } #[test] - fn test_has_nulls_no_nulls() { + fn test_has_nulls_no_nulls() -> Result<(), Box> { // Test with no nulls in any row let schema = Arc::new(Schema::new(vec![ Field::new("id", DataType::Int32, true), @@ -852,5 +855,6 @@ mod tests { for i in 0..3 { assert_eq!(null_buffer.is_valid(i), true); } + Ok(()) } } diff --git a/native-engine/datafusion-ext-plans/src/expand_exec.rs b/native-engine/datafusion-ext-plans/src/expand_exec.rs index 7a4c06109..5485e926f 100644 --- a/native-engine/datafusion-ext-plans/src/expand_exec.rs +++ b/native-engine/datafusion-ext-plans/src/expand_exec.rs @@ -221,7 +221,11 @@ mod test { fn build_table_int(a: (&str, &Vec)) -> Result> { let batch = build_table_i32(a)?; let schema = batch.schema(); - Ok(Arc::new(TestMemoryExec::try_new(&[vec![batch]], schema, None)?)) + Ok(Arc::new(TestMemoryExec::try_new( + &[vec![batch]], + schema, + None, + )?)) } // build f32 table @@ -238,7 +242,11 @@ mod test { fn build_table_float(a: (&str, &Vec)) -> Result> { let batch = build_table_f32(a)?; let schema = batch.schema(); - Ok(Arc::new(TestMemoryExec::try_new(&[vec![batch]], schema, None)?)) + Ok(Arc::new(TestMemoryExec::try_new( + &[vec![batch]], + schema, + None, + )?)) } // build str table @@ -255,7 +263,11 @@ mod test { fn build_table_string(a: (&str, &Vec)) -> Result> { let batch = build_table_str(a)?; let schema = batch.schema(); - Ok(Arc::new(TestMemoryExec::try_new(&[vec![batch]], schema, None)?)) + Ok(Arc::new(TestMemoryExec::try_new( + &[vec![batch]], + schema, + None, + )?)) } // build boolean table @@ -272,7 +284,11 @@ mod test { fn build_table_boolean(a: (&str, &Vec)) -> Result> { let batch = build_table_bool(a)?; let schema = batch.schema(); - Ok(Arc::new(TestMemoryExec::try_new(&[vec![batch]], schema, None)?)) + Ok(Arc::new(TestMemoryExec::try_new( + &[vec![batch]], + schema, + None, + )?)) } #[tokio::test] @@ -283,49 +299,36 @@ mod test { let schema = Schema::new(vec![Field::new("test_i32", DataType::Int32, false)]); let projections = vec![ - vec![ - binary( - col("test_i32", &schema)?, - Operator::Multiply, - lit(ScalarValue::from(2)), - &schema, - )?, - ], - vec![ - binary( - col("test_i32", &schema)?, - Operator::Plus, - lit(ScalarValue::from(100)), - &schema, - )?, - ], - vec![ - binary( - col("test_i32", &schema)?, - Operator::Divide, - lit(ScalarValue::from(-2)), - &schema, - ) - ?, - ], - vec![ - binary( - col("test_i32", &schema)?, - Operator::Modulo, - lit(ScalarValue::from(2)), - &schema, - ) - ?, - ], - vec![ - binary( - col("test_i32", &schema)?, - Operator::BitwiseShiftLeft, - lit(ScalarValue::from(1)), - &schema, - ) - ?, - ], + vec![binary( + col("test_i32", &schema)?, + Operator::Multiply, + lit(ScalarValue::from(2)), + &schema, + )?], + vec![binary( + col("test_i32", &schema)?, + Operator::Plus, + lit(ScalarValue::from(100)), + &schema, + )?], + vec![binary( + col("test_i32", &schema)?, + Operator::Divide, + lit(ScalarValue::from(-2)), + &schema, + )?], + vec![binary( + col("test_i32", &schema)?, + Operator::Modulo, + lit(ScalarValue::from(2)), + &schema, + )?], + vec![binary( + col("test_i32", &schema)?, + Operator::BitwiseShiftLeft, + lit(ScalarValue::from(1)), + &schema, + )?], ]; let expand_exec = ExpandExec::try_new(input.schema(), projections, input)?; @@ -352,42 +355,30 @@ mod test { let schema = Schema::new(vec![Field::new("test_f32", DataType::Float32, false)]); let projections = vec![ - vec![ - binary( - col("test_f32", &schema)?, - Operator::Multiply, - lit(ScalarValue::from(2.1_f32)), - &schema, - ) - ?, - ], - vec![ - binary( - col("test_f32", &schema)?, - Operator::Plus, - lit(ScalarValue::from(100_f32)), - &schema, - ) - ?, - ], - vec![ - binary( - col("test_f32", &schema)?, - Operator::Divide, - lit(ScalarValue::from(-2_f32)), - &schema, - ) - ?, - ], - vec![ - binary( - col("test_f32", &schema)?, - Operator::Modulo, - lit(ScalarValue::from(-2_f32)), - &schema, - ) - ?, - ], + vec![binary( + col("test_f32", &schema)?, + Operator::Multiply, + lit(ScalarValue::from(2.1_f32)), + &schema, + )?], + vec![binary( + col("test_f32", &schema)?, + Operator::Plus, + lit(ScalarValue::from(100_f32)), + &schema, + )?], + vec![binary( + col("test_f32", &schema)?, + Operator::Divide, + lit(ScalarValue::from(-2_f32)), + &schema, + )?], + vec![binary( + col("test_f32", &schema)?, + Operator::Modulo, + lit(ScalarValue::from(-2_f32)), + &schema, + )?], ]; let expand_exec = ExpandExec::try_new(input.schema(), projections, input)?; @@ -438,14 +429,12 @@ mod test { ))?; let schema = Schema::new(vec![Field::new("test_str", DataType::Utf8, false)]); - let projections = vec![vec![ - binary( - col("test_str", &schema)?, - Operator::StringConcat, - lit(Some("app").expect("app")), - &schema, - )?, - ]]; + let projections = vec![vec![binary( + col("test_str", &schema)?, + Operator::StringConcat, + lit(Some("app").expect("app")), + &schema, + )?]]; let expand_exec = ExpandExec::try_new(input.schema(), projections, input)?; @@ -476,24 +465,18 @@ mod test { let schema = Schema::new(vec![Field::new("test_bool", DataType::Boolean, false)]); let projections = vec![ - vec![ - binary( - col("test_bool", &schema)?, - Operator::And, - lit(ScalarValue::Boolean(Some(true))), - &schema, - ) - ?, - ], - vec![ - binary( - col("test_bool", &schema)?, - Operator::Or, - lit(ScalarValue::Boolean(Some(true))), - &schema, - ) - ?, - ], + vec![binary( + col("test_bool", &schema)?, + Operator::And, + lit(ScalarValue::Boolean(Some(true))), + &schema, + )?], + vec![binary( + col("test_bool", &schema)?, + Operator::Or, + lit(ScalarValue::Boolean(Some(true))), + &schema, + )?], ]; let expand_exec = ExpandExec::try_new(input.schema(), projections, input)?; diff --git a/native-engine/datafusion-ext-plans/src/joins/test.rs b/native-engine/datafusion-ext-plans/src/joins/test.rs index 5d44ed205..90282bb41 100644 --- a/native-engine/datafusion-ext-plans/src/joins/test.rs +++ b/native-engine/datafusion-ext-plans/src/joins/test.rs @@ -33,7 +33,7 @@ mod tests { physical_plan::{ExecutionPlan, common, joins::utils::*, test::TestMemoryExec}, prelude::SessionContext, }; - use datafusion::common::DataFusionError; + use crate::{ broadcast_join_build_hash_map_exec::BroadcastJoinBuildHashMapExec, broadcast_join_exec::BroadcastJoinExec, @@ -80,23 +80,26 @@ mod tests { a: (&str, &Vec), b: (&str, &Vec), c: (&str, &Vec), - ) -> Result, Box> { + ) -> Result> { let batch = build_table_i32(a, b, c)?; let schema = batch.schema(); - Ok(Arc::new(TestMemoryExec::try_new(&[vec![batch]], schema, None)?)) + Ok(Arc::new(TestMemoryExec::try_new( + &[vec![batch]], + schema, + None, + )?)) } - fn build_table_from_batches(batches: Vec) -> Result, Box> { - let first = batches.into_iter().next().ok_or_else(|| DataFusionError::Internal("empty batches".into()))?; - let schema = first.schema(); - Ok(Arc::new(TestMemoryExec::try_new(&[batches], schema, None)?)) + fn build_table_from_batches(batches: Vec) -> Arc { + let schema = batches.first().expect("first").schema(); + Arc::new(TestMemoryExec::try_new(&[batches], schema, None).expect("memory_exec")) } fn build_date_table( a: (&str, &Vec), b: (&str, &Vec), c: (&str, &Vec), - ) -> Result, Box> { + ) -> Result> { let schema = Schema::new(vec![ Field::new(a.0, DataType::Date32, false), Field::new(b.0, DataType::Date32, false), @@ -113,14 +116,18 @@ mod tests { )?; let schema = batch.schema(); - Ok(Arc::new(TestMemoryExec::try_new(&[vec![batch]], schema, None)?)) + Ok(Arc::new(TestMemoryExec::try_new( + &[vec![batch]], + schema, + None, + )?)) } fn build_date64_table( a: (&str, &Vec), b: (&str, &Vec), c: (&str, &Vec), - ) -> Result, Box> { + ) -> Result> { let schema = Schema::new(vec![ Field::new(a.0, DataType::Date64, false), Field::new(b.0, DataType::Date64, false), @@ -137,7 +144,11 @@ mod tests { )?; let schema = batch.schema(); - Ok(Arc::new(TestMemoryExec::try_new(&[vec![batch]], schema, None)?)) + Ok(Arc::new(TestMemoryExec::try_new( + &[vec![batch]], + schema, + None, + )?)) } /// returns a table with 3 columns of i32 in memory @@ -159,7 +170,11 @@ mod tests { Arc::new(Int32Array::from(c.1.clone())), ], )?; - Ok(Arc::new(TestMemoryExec::try_new(&[vec![batch]], schema, None)?)); + Ok(Arc::new(TestMemoryExec::try_new( + &[vec![batch]], + schema, + None, + )?)) } fn build_join_schema_for_test( @@ -276,12 +291,12 @@ mod tests { ("a1", &vec![1, 2, 3]), ("b1", &vec![4, 5, 5]), // this has a repetition ("c1", &vec![7, 8, 9]), - ); + )?; let right = build_table( ("a2", &vec![10, 20, 30]), ("b1", &vec![4, 5, 6]), ("c2", &vec![70, 80, 90]), - ); + )?; let on: JoinOn = vec![( Arc::new(Column::new_with_schema("b1", &left.schema())?), @@ -311,12 +326,12 @@ mod tests { ("a1", &vec![1, 2, 2]), ("b2", &vec![1, 2, 2]), ("c1", &vec![7, 8, 9]), - ); + )?; let right = build_table( ("a1", &vec![1, 2, 3]), ("b2", &vec![1, 2, 2]), ("c2", &vec![70, 80, 90]), - ); + )?; let on: JoinOn = vec![ ( Arc::new(Column::new_with_schema("a1", &left.schema())?), @@ -351,12 +366,12 @@ mod tests { ("a1", &vec![1, 1, 2]), ("b2", &vec![1, 1, 2]), ("c1", &vec![7, 8, 9]), - ); + )?; let right = build_table( ("a1", &vec![1, 1, 3]), ("b2", &vec![1, 1, 2]), ("c2", &vec![70, 80, 90]), - ); + )?; let on: JoinOn = vec![ ( Arc::new(Column::new_with_schema("a1", &left.schema())?), @@ -392,12 +407,12 @@ mod tests { ("a1", &vec![Some(1), Some(1), Some(2), Some(2)]), ("b2", &vec![None, Some(1), Some(2), Some(2)]), // null in key field ("c1", &vec![Some(1), None, Some(8), Some(9)]), // null in non-key field - ); + )?; let right = build_table_i32_nullable( ("a1", &vec![Some(1), Some(1), Some(2), Some(3)]), ("b2", &vec![None, Some(1), Some(2), Some(2)]), ("c2", &vec![Some(10), Some(70), Some(80), Some(90)]), - ); + )?; let on: JoinOn = vec![ ( Arc::new(Column::new_with_schema("a1", &left.schema())?), @@ -432,12 +447,12 @@ mod tests { ("a1", &vec![1, 2, 3]), ("b1", &vec![4, 5, 7]), // 7 does not exist on the right ("c1", &vec![7, 8, 9]), - ); + )?; let right = build_table( ("a2", &vec![10, 20, 30]), ("b1", &vec![4, 5, 6]), ("c2", &vec![70, 80, 90]), - ); + )?; let on: JoinOn = vec![( Arc::new(Column::new_with_schema("b1", &left.schema())?), Arc::new(Column::new_with_schema("b1", &right.schema())?), @@ -466,12 +481,12 @@ mod tests { ("a1", &vec![1, 2, 3]), ("b1", &vec![4, 5, 7]), ("c1", &vec![7, 8, 9]), - ); + )?; let right = build_table( ("a2", &vec![10, 20, 30]), ("b1", &vec![4, 5, 6]), // 6 does not exist on the left ("c2", &vec![70, 80, 90]), - ); + )?; let on: JoinOn = vec![( Arc::new(Column::new_with_schema("b1", &left.schema())?), Arc::new(Column::new_with_schema("b1", &right.schema())?), @@ -500,12 +515,12 @@ mod tests { ("a1", &vec![1, 2, 2, 3]), ("b1", &vec![4, 5, 5, 7]), // 7 does not exist on the right ("c1", &vec![7, 8, 80, 9]), - ); + )?; let right = build_table( ("a2", &vec![10, 20, 20, 30]), ("b2", &vec![4, 5, 5, 6]), ("c2", &vec![70, 80, 800, 90]), - ); + )?; let on: JoinOn = vec![( Arc::new(Column::new_with_schema("b1", &left.schema())?), Arc::new(Column::new_with_schema("b2", &right.schema())?), @@ -537,12 +552,12 @@ mod tests { ("a1", &vec![1, 2, 2, 3, 5]), ("b1", &vec![4, 5, 5, 7, 7]), // 7 does not exist on the right ("c1", &vec![7, 8, 8, 9, 11]), - ); + )?; let right = build_table( ("a2", &vec![10, 20, 30]), ("b1", &vec![4, 5, 6]), ("c2", &vec![70, 80, 90]), - ); + )?; let on: JoinOn = vec![( Arc::new(Column::new_with_schema("b1", &left.schema())?), Arc::new(Column::new_with_schema("b1", &right.schema())?), @@ -570,12 +585,12 @@ mod tests { ("a1", &vec![1, 2, 2, 3]), ("b1", &vec![4, 5, 5, 7]), // 7 does not exist on the right ("c1", &vec![7, 8, 8, 9]), - ); + )?; let right = build_table( ("a2", &vec![10, 20, 30]), ("b1", &vec![4, 5, 6]), // 5 is double on the right ("c2", &vec![70, 80, 90]), - ); + )?; let on: JoinOn = vec![( Arc::new(Column::new_with_schema("b1", &left.schema())?), Arc::new(Column::new_with_schema("b1", &right.schema())?), @@ -604,12 +619,12 @@ mod tests { ("a", &vec![1, 2, 3]), ("b", &vec![4, 5, 7]), ("c", &vec![7, 8, 9]), - ); + )?; let right = build_table( ("a", &vec![10, 20, 30]), ("b", &vec![1, 2, 7]), ("c", &vec![70, 80, 90]), - ); + )?; let on: JoinOn = vec![( // join on a=b so there are duplicate column names on unjoined columns Arc::new(Column::new_with_schema("a", &left.schema())?), @@ -638,12 +653,12 @@ mod tests { ("a1", &vec![1, 2, 3]), ("b1", &vec![19107, 19108, 19108]), // this has a repetition ("c1", &vec![7, 8, 9]), - ); + )?; let right = build_date_table( ("a2", &vec![10, 20, 30]), ("b1", &vec![19107, 19108, 19109]), ("c2", &vec![70, 80, 90]), - ); + )?; let on: JoinOn = vec![( Arc::new(Column::new_with_schema("b1", &left.schema())?), @@ -675,12 +690,12 @@ mod tests { ("b1", &vec![1650703441000, 1650903441000, 1650903441000]), /* this has a * repetition */ ("c1", &vec![7, 8, 9]), - ); + )?; let right = build_date64_table( ("a2", &vec![10, 20, 30]), ("b1", &vec![1650703441000, 1650503441000, 1650903441000]), ("c2", &vec![70, 80, 90]), - ); + )?; let on: JoinOn = vec![( Arc::new(Column::new_with_schema("b1", &left.schema())?), @@ -711,12 +726,12 @@ mod tests { ("a1", &vec![0, 1, 2, 3, 4, 5]), ("b1", &vec![3, 4, 5, 6, 6, 7]), ("c1", &vec![4, 5, 6, 7, 8, 9]), - ); + )?; let right = build_table( ("a2", &vec![0, 10, 20, 30, 40]), ("b2", &vec![2, 4, 6, 6, 8]), ("c2", &vec![50, 60, 70, 80, 90]), - ); + )?; let on: JoinOn = vec![( Arc::new(Column::new_with_schema("b1", &left.schema())?), Arc::new(Column::new_with_schema("b2", &right.schema())?), @@ -749,12 +764,12 @@ mod tests { ("a1", &vec![0, 1, 2, 3]), ("b1", &vec![3, 4, 5, 7]), ("c1", &vec![6, 7, 8, 9]), - ); + )?; let right = build_table( ("a2", &vec![0, 10, 20, 30]), ("b2", &vec![2, 4, 5, 6]), ("c2", &vec![60, 70, 80, 90]), - ); + )?; let on: JoinOn = vec![( Arc::new(Column::new_with_schema("b1", &left.schema())?), Arc::new(Column::new_with_schema("b2", &right.schema())?), @@ -783,22 +798,22 @@ mod tests { ("a1", &vec![0, 1, 2]), ("b1", &vec![3, 4, 5]), ("c1", &vec![4, 5, 6]), - ); + )?; let left_batch_2 = build_table_i32( ("a1", &vec![3, 4, 5, 6]), ("b1", &vec![6, 6, 7, 9]), ("c1", &vec![7, 8, 9, 9]), - ); + )?; let right_batch_1 = build_table_i32( ("a2", &vec![0, 10, 20]), ("b2", &vec![2, 4, 6]), ("c2", &vec![50, 60, 70]), - ); + )?; let right_batch_2 = build_table_i32( ("a2", &vec![30, 40]), ("b2", &vec![6, 8]), ("c2", &vec![80, 90]), - ); + )?; let left = build_table_from_batches(vec![left_batch_1, left_batch_2]); let right = build_table_from_batches(vec![right_batch_1, right_batch_2]); let on: JoinOn = vec![( @@ -834,22 +849,22 @@ mod tests { ("a2", &vec![0, 1, 2]), ("b2", &vec![3, 4, 5]), ("c2", &vec![4, 5, 6]), - ); + )?; let right_batch_2 = build_table_i32( ("a2", &vec![3, 4, 5, 6]), ("b2", &vec![6, 6, 7, 9]), ("c2", &vec![7, 8, 9, 9]), - ); + )?; let left_batch_1 = build_table_i32( ("a1", &vec![0, 10, 20]), ("b1", &vec![2, 4, 6]), ("c1", &vec![50, 60, 70]), - ); + )?; let left_batch_2 = build_table_i32( ("a1", &vec![30, 40]), ("b1", &vec![6, 8]), ("c1", &vec![80, 90]), - ); + )?; let left = build_table_from_batches(vec![left_batch_1, left_batch_2]); let right = build_table_from_batches(vec![right_batch_1, right_batch_2]); let on: JoinOn = vec![( @@ -885,22 +900,22 @@ mod tests { ("a1", &vec![0, 1, 2]), ("b1", &vec![3, 4, 5]), ("c1", &vec![4, 5, 6]), - ); + )?; let left_batch_2 = build_table_i32( ("a1", &vec![3, 4, 5, 6]), ("b1", &vec![6, 6, 7, 9]), ("c1", &vec![7, 8, 9, 9]), - ); + )?; let right_batch_1 = build_table_i32( ("a2", &vec![0, 10, 20]), ("b2", &vec![2, 4, 6]), ("c2", &vec![50, 60, 70]), - ); + )?; let right_batch_2 = build_table_i32( ("a2", &vec![30, 40]), ("b2", &vec![6, 8]), ("c2", &vec![80, 90]), - ); + )?; let left = build_table_from_batches(vec![left_batch_1, left_batch_2]); let right = build_table_from_batches(vec![right_batch_1, right_batch_2]); let on: JoinOn = vec![( @@ -938,22 +953,22 @@ mod tests { ("a1", &vec![0, 1, 2]), ("b1", &vec![3, 4, 5]), ("c1", &vec![4, 5, 6]), - ); + )?; let left_batch_2 = build_table_i32( ("a1", &vec![3, 4, 5, 6]), ("b1", &vec![6, 6, 7, 9]), ("c1", &vec![7, 8, 9, 9]), - ); + )?; let right_batch_1 = build_table_i32( ("a2", &vec![0, 10, 20]), ("b2", &vec![2, 4, 6]), ("c2", &vec![50, 60, 70]), - ); + )?; let right_batch_2 = build_table_i32( ("a2", &vec![30, 40]), ("b2", &vec![6, 8]), ("c2", &vec![80, 90]), - ); + )?; let left = build_table_from_batches(vec![left_batch_1, left_batch_2]); let right = build_table_from_batches(vec![right_batch_1, right_batch_2]); let on: JoinOn = vec![( diff --git a/native-engine/datafusion-ext-plans/src/limit_exec.rs b/native-engine/datafusion-ext-plans/src/limit_exec.rs index 977e0db9f..dd1e40d99 100644 --- a/native-engine/datafusion-ext-plans/src/limit_exec.rs +++ b/native-engine/datafusion-ext-plans/src/limit_exec.rs @@ -192,7 +192,11 @@ mod test { ) -> Result> { let batch = build_table_i32(a, b, c)?; let schema = batch.schema(); - Ok(Arc::new(TestMemoryExec::try_new(&[vec![batch]], schema, None)?)) + Ok(Arc::new(TestMemoryExec::try_new( + &[vec![batch]], + schema, + None, + )?)) } #[tokio::test] @@ -202,7 +206,7 @@ mod test { ("a", &vec![9, 8, 7, 6, 5, 4, 3, 2, 1, 0]), ("b", &vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), ("c", &vec![5, 6, 7, 8, 9, 0, 1, 2, 3, 4]), - ); + )?; let limit_exec = LimitExec::new(input, 2_u64); let session_ctx = SessionContext::new(); let task_ctx = session_ctx.task_ctx(); diff --git a/native-engine/datafusion-ext-plans/src/shuffle/buffered_data.rs b/native-engine/datafusion-ext-plans/src/shuffle/buffered_data.rs index ee5fc3c5f..95c8ac4ef 100644 --- a/native-engine/datafusion-ext-plans/src/shuffle/buffered_data.rs +++ b/native-engine/datafusion-ext-plans/src/shuffle/buffered_data.rs @@ -397,7 +397,7 @@ mod test { ("a", &vec![19, 18, 17, 16, 15, 14, 13, 12, 11, 10]), ("b", &vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), ("c", &vec![5, 6, 7, 8, 9, 0, 1, 2, 3, 4]), - ); + )?; let round_robin_partitioning = Partitioning::RoundRobinPartitioning(4); let (_parts, sorted_batch) = @@ -429,7 +429,7 @@ mod test { ("a", &vec![19, 18, 17, 16, 15, 14, 13, 12, 11, 10]), ("b", &vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), ("c", &vec![5, 6, 7, 8, 9, 0, 1, 2, 3, 4]), - ); + )?; let sort_exprs = vec![PhysicalSortExpr { expr: Arc::new(Column::new("a", 0)), options: SortOptions::default(), @@ -483,7 +483,7 @@ mod test { ("a", &vec![19, 18, 17, 16, 15, 14, 13, 12, 11, 10]), ("b", &vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), ("c", &vec![5, 6, 7, 8, 9, 0, 1, 2, 3, 4]), - ); + )?; let sort_exprs = vec![ PhysicalSortExpr { expr: Arc::new(Column::new("a", 0)), diff --git a/native-engine/datafusion-ext-plans/src/sort_exec.rs b/native-engine/datafusion-ext-plans/src/sort_exec.rs index e6ace53df..92c836aba 100644 --- a/native-engine/datafusion-ext-plans/src/sort_exec.rs +++ b/native-engine/datafusion-ext-plans/src/sort_exec.rs @@ -1049,7 +1049,7 @@ fn create_zero_column_batch(num_rows: usize) -> RecordBatch { vec![], &RecordBatchOptions::new().with_row_count(Some(num_rows)), ) - .expect("record_batch") + .expect("record_batch") } struct PruneSortKeysFromBatch { @@ -1450,7 +1450,11 @@ mod test { ) -> Result> { let batch = build_table_i32(a, b, c)?; let schema = batch.schema(); - Ok(Arc::new(TestMemoryExec::try_new(&[vec![batch]], schema, None)?)) + Ok(Arc::new(TestMemoryExec::try_new( + &[vec![batch]], + schema, + None, + )?)) } #[tokio::test] diff --git a/native-engine/datafusion-ext-plans/src/window/window_context.rs b/native-engine/datafusion-ext-plans/src/window/window_context.rs index 411ee92de..37e423747 100644 --- a/native-engine/datafusion-ext-plans/src/window/window_context.rs +++ b/native-engine/datafusion-ext-plans/src/window/window_context.rs @@ -151,16 +151,20 @@ impl WindowContext { } pub fn get_order_rows(&self, batch: &RecordBatch) -> Result { - Ok(self.order_row_converter.lock().expect("lock").convert_columns( - &self - .order_spec - .iter() - .map(|expr: &PhysicalSortExpr| { - expr.expr - .evaluate(batch) - .and_then(|v| v.into_array(batch.num_rows())) - }) - .collect::>>()?, - )?) + Ok(self + .order_row_converter + .lock() + .expect("lock") + .convert_columns( + &self + .order_spec + .iter() + .map(|expr: &PhysicalSortExpr| { + expr.expr + .evaluate(batch) + .and_then(|v| v.into_array(batch.num_rows())) + }) + .collect::>>()?, + )?) } } diff --git a/native-engine/datafusion-ext-plans/src/window_exec.rs b/native-engine/datafusion-ext-plans/src/window_exec.rs index fe3b307a3..5bb698eec 100644 --- a/native-engine/datafusion-ext-plans/src/window_exec.rs +++ b/native-engine/datafusion-ext-plans/src/window_exec.rs @@ -272,6 +272,7 @@ mod test { use arrow::{array::*, datatypes::*, record_batch::RecordBatch}; use datafusion::{ assert_batches_eq, + common::Result, physical_expr::{PhysicalSortExpr, expressions::Column}, physical_plan::{ExecutionPlan, test::TestMemoryExec}, prelude::SessionContext, @@ -287,32 +288,36 @@ mod test { a: (&str, &Vec), b: (&str, &Vec), c: (&str, &Vec), - ) -> RecordBatch { + ) -> Result { let schema = Schema::new(vec![ Field::new(a.0, DataType::Int32, false), Field::new(b.0, DataType::Int32, false), Field::new(c.0, DataType::Int32, false), ]); - RecordBatch::try_new( + let batch = RecordBatch::try_new( Arc::new(schema), vec![ Arc::new(Int32Array::from(a.1.clone())), Arc::new(Int32Array::from(b.1.clone())), Arc::new(Int32Array::from(c.1.clone())), ], - ) - ? + )?; + Ok(batch) } fn build_table( a: (&str, &Vec), b: (&str, &Vec), c: (&str, &Vec), - ) -> Arc { - let batch = build_table_i32(a, b, c); + ) -> Result> { + let batch = build_table_i32(a, b, c)?; let schema = batch.schema(); - Arc::new(TestMemoryExec::try_new(&[vec![batch]], schema, None)?) + Ok(Arc::new(TestMemoryExec::try_new( + &[vec![batch]], + schema, + None, + )?)) } #[tokio::test] @@ -325,7 +330,7 @@ mod test { ("a1", &vec![1, 1, 1, 1, 2, 3, 3]), ("b1", &vec![1, 2, 2, 3, 4, 1, 1]), ("c1", &vec![0, 0, 0, 0, 0, 0, 0]), - ); + )?; let window_exprs = vec![ WindowExpr::new( WindowFunction::RankLike(WindowRankType::RowNumber), @@ -385,7 +390,7 @@ mod test { ("a1", &vec![1, 3, 3, 1, 1, 1, 2]), ("b1", &vec![1, 1, 1, 2, 2, 3, 4]), ("c1", &vec![0, 0, 0, 0, 0, 0, 0]), - ); + )?; let window_exprs = vec![ WindowExpr::new( WindowFunction::RankLike(WindowRankType::RowNumber), @@ -452,7 +457,7 @@ mod test { ("a1", &vec![1, 1, 1, 1, 2, 3, 3]), ("b1", &vec![1, 2, 2, 3, 4, 1, 1]), ("c1", &vec![0, 0, 0, 0, 0, 0, 0]), - ); + )?; let window_exprs = vec![WindowExpr::new( WindowFunction::RankLike(WindowRankType::RowNumber), vec![], From cfafb023056c1610b438441b89ba2de7ad7ad2eb Mon Sep 17 00:00:00 2001 From: yew1eb Date: Tue, 25 Nov 2025 23:21:39 +0800 Subject: [PATCH 05/10] fixup --- native-engine/auron/src/http/mod.rs | 3 ++- native-engine/auron/src/http/pprof.rs | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/native-engine/auron/src/http/mod.rs b/native-engine/auron/src/http/mod.rs index 5fb47ebee..0fa27cae9 100644 --- a/native-engine/auron/src/http/mod.rs +++ b/native-engine/auron/src/http/mod.rs @@ -49,7 +49,8 @@ impl DefaultHTTPServer { runtime: tokio::runtime::Builder::new_multi_thread() .worker_threads(1) .enable_io() - .build()?, + .build() + .expect("runtime"), handlers: Mutex::new(vec![]), } } diff --git a/native-engine/auron/src/http/pprof.rs b/native-engine/auron/src/http/pprof.rs index 7635026a2..66ab76aa8 100644 --- a/native-engine/auron/src/http/pprof.rs +++ b/native-engine/auron/src/http/pprof.rs @@ -34,7 +34,7 @@ impl Default for PProfRequest { fn default() -> Self { PProfRequest { seconds: 5, - frequency: NonZeroI32::new(100)?, + frequency: NonZeroI32::new(100).expect("nonzero"), } } } From d420efca0031580f72fb09567c5171fb20e0cb68 Mon Sep 17 00:00:00 2001 From: yew1eb Date: Tue, 25 Nov 2025 23:51:19 +0800 Subject: [PATCH 06/10] fixup --- .github/workflows/tpcds-reusable.yml | 1 + Cargo.toml | 1 + dev/mvn-build-helper/build-native.sh | 3 ++- native-engine/auron-jni-bridge/src/jni_bridge.rs | 12 ++++++------ .../datafusion-ext-commons/src/io/scalar_serde.rs | 8 ++++---- .../datafusion-ext-commons/src/spark_hash.rs | 4 ++-- .../datafusion-ext-functions/src/spark_null_if.rs | 2 +- 7 files changed, 17 insertions(+), 14 deletions(-) diff --git a/.github/workflows/tpcds-reusable.yml b/.github/workflows/tpcds-reusable.yml index 5981fa709..7cba480eb 100644 --- a/.github/workflows/tpcds-reusable.yml +++ b/.github/workflows/tpcds-reusable.yml @@ -155,6 +155,7 @@ jobs: - name: Cargo clippy run: | +# Eliminate unwrap first, then switch to -D warnings to enforce all lints. cargo clippy --all-targets --workspace -- -A warnings -A clippy::all -D clippy::unwrap_used - name: Cargo test diff --git a/Cargo.toml b/Cargo.toml index b123f60d8..1f2567dd9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -29,6 +29,7 @@ members = [ [workspace.lints.clippy] unwrap_used = "deny" +panic = "deny" [profile.release] opt-level = 3 diff --git a/dev/mvn-build-helper/build-native.sh b/dev/mvn-build-helper/build-native.sh index 6fa4f860b..690d36d26 100755 --- a/dev/mvn-build-helper/build-native.sh +++ b/dev/mvn-build-helper/build-native.sh @@ -85,7 +85,8 @@ if [ ! -f "$cache_libpath" ] || [ "$new_checksum" != "$old_checksum" ]; then cargo fmt --all -q -- 2>&1 echo "Running cargo clippy..." - cargo clippy --all-targets --workspace -- -A warnings -A clippy::all -D clippy::unwrap_used 2>&1 + # Eliminate unwrap first, then switch to -D warnings to enforce all lints. + cargo clippy --all-targets --workspace -- -A warnings -A clippy::all -D clippy::unwrap_used 2>&1 echo "Building native with [$profile] profile..." cargo build --profile="$profile" $features_arg --verbose --locked --frozen 2>&1 diff --git a/native-engine/auron-jni-bridge/src/jni_bridge.rs b/native-engine/auron-jni-bridge/src/jni_bridge.rs index dac7025c8..75169bc15 100644 --- a/native-engine/auron-jni-bridge/src/jni_bridge.rs +++ b/native-engine/auron-jni-bridge/src/jni_bridge.rs @@ -87,9 +87,9 @@ macro_rules! jni_map_error_with_env { match $result { Ok(result) => $crate::jni_bridge::datafusion::error::Result::Ok(result), Err($crate::jni_bridge::jni::errors::Error::JavaException) => { - let ex = $env.exception_occurred().expect("exception"); - $env.exception_describe().expect("exception"); - $env.exception_clear().expect("exception"); + let ex = $env.exception_occurred().expect("ok"); + $env.exception_describe().expect("ok"); + $env.exception_clear().expect("ok"); let message_obj = $env .call_method_unchecked( ex, @@ -102,13 +102,13 @@ macro_rules! jni_map_error_with_env { .clone(), &[], ) - .expect("exception") + .expect("ok") .l() - .expect("exception"); + .expect("ok"); let message = $env .get_string(message_obj.into()) .map(|s| String::from(s)) - .expect("exception"); + .expect("ok"); Err( $crate::jni_bridge::datafusion::error::DataFusionError::External( diff --git a/native-engine/datafusion-ext-commons/src/io/scalar_serde.rs b/native-engine/datafusion-ext-commons/src/io/scalar_serde.rs index 4902ee19d..87e897f70 100644 --- a/native-engine/datafusion-ext-commons/src/io/scalar_serde.rs +++ b/native-engine/datafusion-ext-commons/src/io/scalar_serde.rs @@ -36,13 +36,13 @@ pub fn write_scalar(value: &ScalarValue, nullable: bool, output: &mut ($v:expr) => {{ if nullable { if let Some(v) = $v { - write_u8(1, output).unwrap(); - output.write_all(&v.to_ne_bytes()).unwrap(); + write_u8(1, output).expect("ok"); + output.write_all(&v.to_ne_bytes()).expect("ok"); } else { - write_u8(0, output).unwrap(); + write_u8(0, output).expect("ok"); } } else { - output.write_all(&$v.unwrap().to_ne_bytes()).unwrap(); + output.write_all(&$v.expect("ok").to_ne_bytes()).expect("ok"); } }}; } diff --git a/native-engine/datafusion-ext-commons/src/spark_hash.rs b/native-engine/datafusion-ext-commons/src/spark_hash.rs index c8198813b..18d05b583 100644 --- a/native-engine/datafusion-ext-commons/src/spark_hash.rs +++ b/native-engine/datafusion-ext-commons/src/spark_hash.rs @@ -66,7 +66,7 @@ fn hash_array( macro_rules! hash_array { ($array_type:ident, $column:ident, $hashes:ident, $h:expr) => { - let array = $column.as_any().downcast_ref::<$array_type>().unwrap(); + let array = $column.as_any().downcast_ref::<$array_type>().expect("downcast"); if array.null_count() == 0 { for (i, hash) in $hashes.iter_mut().enumerate() { *hash = $h(&array.value(i).as_ref(), *hash); @@ -83,7 +83,7 @@ fn hash_array( macro_rules! hash_array_primitive { ($array_type:ident, $column:ident, $ty:ident, $hashes:ident, $h:expr) => { - let array = $column.as_any().downcast_ref::<$array_type>().unwrap(); + let array = $column.as_any().downcast_ref::<$array_type>().expect("downcast"); let values = array.values(); if array.null_count() == 0 { diff --git a/native-engine/datafusion-ext-functions/src/spark_null_if.rs b/native-engine/datafusion-ext-functions/src/spark_null_if.rs index 79cf375cd..bbca3f3a9 100644 --- a/native-engine/datafusion-ext-functions/src/spark_null_if.rs +++ b/native-engine/datafusion-ext-functions/src/spark_null_if.rs @@ -93,7 +93,7 @@ pub fn spark_null_if_zero(args: &[ColumnarValue]) -> Result { macro_rules! handle_decimal { ($dt:ident, $precision:expr, $scale:expr) => {{ type T = paste::paste! {arrow::datatypes::[<$dt Type>]}; - let array = array.as_any().downcast_ref::>().unwrap(); + let array = array.as_any().downcast_ref::>().expect("Expected a PrimitiveArray"); let _0 = ::Native::from_le_bytes([0; T::BYTE_LENGTH]); let filtered = array.iter().map(|v| v.filter(|v| *v != _0)); Arc::new( From 4052162d6cf20bb2f0b7f7a572b60a92d49ebea3 Mon Sep 17 00:00:00 2001 From: yew1eb Date: Wed, 26 Nov 2025 00:08:23 +0800 Subject: [PATCH 07/10] fixup --- .github/workflows/tpcds-reusable.yml | 2 +- dev/mvn-build-helper/build-native.sh | 2 +- native-engine/auron-serde/build.rs | 4 ++-- native-engine/auron-serde/src/from_proto.rs | 4 ++-- .../datafusion-ext-commons/src/arrow/cast.rs | 2 +- .../datafusion-ext-commons/src/io/batch_serde.rs | 16 ++++++++-------- .../src/io/ipc_compression.rs | 10 ++-------- .../src/io/scalar_serde.rs | 8 ++++---- 8 files changed, 21 insertions(+), 27 deletions(-) diff --git a/.github/workflows/tpcds-reusable.yml b/.github/workflows/tpcds-reusable.yml index 7cba480eb..7cbb18241 100644 --- a/.github/workflows/tpcds-reusable.yml +++ b/.github/workflows/tpcds-reusable.yml @@ -155,7 +155,7 @@ jobs: - name: Cargo clippy run: | -# Eliminate unwrap first, then switch to -D warnings to enforce all lints. +# First eliminate unwrap; then enable -D warnings to enforce all lints. cargo clippy --all-targets --workspace -- -A warnings -A clippy::all -D clippy::unwrap_used - name: Cargo test diff --git a/dev/mvn-build-helper/build-native.sh b/dev/mvn-build-helper/build-native.sh index 690d36d26..d2b16aab4 100755 --- a/dev/mvn-build-helper/build-native.sh +++ b/dev/mvn-build-helper/build-native.sh @@ -85,7 +85,7 @@ if [ ! -f "$cache_libpath" ] || [ "$new_checksum" != "$old_checksum" ]; then cargo fmt --all -q -- 2>&1 echo "Running cargo clippy..." - # Eliminate unwrap first, then switch to -D warnings to enforce all lints. + # First eliminate unwrap; then enable -D warnings to enforce all lints. cargo clippy --all-targets --workspace -- -A warnings -A clippy::all -D clippy::unwrap_used 2>&1 echo "Building native with [$profile] profile..." diff --git a/native-engine/auron-serde/build.rs b/native-engine/auron-serde/build.rs index b3f59f275..afcf85157 100644 --- a/native-engine/auron-serde/build.rs +++ b/native-engine/auron-serde/build.rs @@ -40,10 +40,10 @@ fn main() -> Result<(), String> { } } if let Some(path) = protoc_file { - eprintln!("Using protoc executable: {}", path.display()); + eprintln!("Using protoc executable: {:?}", path); prost_build.protoc_executable(path); } prost_build .compile_protos(&["proto/auron.proto"], &["proto"]) - .map_err(|e| format!("protobuf compilation failed: {e}")) + .map_err(|e| format!("protobuf compilation failed: {}", e)) } diff --git a/native-engine/auron-serde/src/from_proto.rs b/native-engine/auron-serde/src/from_proto.rs index 442223d4f..d5cb2ee20 100644 --- a/native-engine/auron-serde/src/from_proto.rs +++ b/native-engine/auron-serde/src/from_proto.rs @@ -280,7 +280,7 @@ impl TryInto> for &protobuf::PhysicalPlanNode { Ok(Arc::new(ShuffleWriterExec::try_new( input, - output_partitioning.expect("partitioning"), + output_partitioning.expect("output_partitioning"), shuffle_writer.output_data_file.clone(), shuffle_writer.output_index_file.clone(), )?)) @@ -295,7 +295,7 @@ impl TryInto> for &protobuf::PhysicalPlanNode { )?; Ok(Arc::new(RssShuffleWriterExec::try_new( input, - output_partitioning.expect("partitioning"), + output_partitioning.expect("output_partitioning"), rss_shuffle_writer.rss_partition_writer_resource_id.clone(), )?)) } diff --git a/native-engine/datafusion-ext-commons/src/arrow/cast.rs b/native-engine/datafusion-ext-commons/src/arrow/cast.rs index 7d11e948e..f3f565cdd 100644 --- a/native-engine/datafusion-ext-commons/src/arrow/cast.rs +++ b/native-engine/datafusion-ext-commons/src/arrow/cast.rs @@ -167,7 +167,7 @@ pub fn cast_impl( } if col.is_some() { cast_impl( - col.expect("missing column"), + col.expect("col"), field.data_type(), match_struct_fields, ) diff --git a/native-engine/datafusion-ext-commons/src/io/batch_serde.rs b/native-engine/datafusion-ext-commons/src/io/batch_serde.rs index 1aa4536ee..190955e8d 100644 --- a/native-engine/datafusion-ext-commons/src/io/batch_serde.rs +++ b/native-engine/datafusion-ext-commons/src/io/batch_serde.rs @@ -703,7 +703,7 @@ mod test { write_batch(batch.num_rows(), batch.columns(), &mut buf)?; let mut cursor = Cursor::new(buf); let (decoded_num_rows, decoded_cols) = read_batch(&mut cursor, &batch.schema()) - .expect("read_batch") + ? .expect("non-empty"); assert_eq!( recover_named_batch(decoded_num_rows, &decoded_cols, batch.schema())?, @@ -716,7 +716,7 @@ mod test { write_batch(sliced.num_rows(), sliced.columns(), &mut buf)?; let mut cursor = Cursor::new(buf); let (decoded_num_rows, decoded_cols) = read_batch(&mut cursor, &batch.schema()) - .expect("read_batch") + ? .expect("non-empty"); assert_eq!( recover_named_batch(decoded_num_rows, &decoded_cols, batch.schema())?, @@ -759,7 +759,7 @@ mod test { write_batch(batch.num_rows(), batch.columns(), &mut buf)?; let mut cursor = Cursor::new(buf); let (decoded_num_rows, decoded_cols) = read_batch(&mut cursor, &batch.schema()) - .expect("read_batch") + ? .expect("non-empty"); assert_batches_eq!( vec![ @@ -785,7 +785,7 @@ mod test { write_batch(sliced.num_rows(), sliced.columns(), &mut buf)?; let mut cursor = Cursor::new(buf); let (decoded_num_rows, decoded_cols) = read_batch(&mut cursor, &batch.schema()) - .expect("read_batch") + ? .expect("non-empty"); assert_batches_eq!( vec![ @@ -832,7 +832,7 @@ mod test { write_batch(batch.num_rows(), batch.columns(), &mut buf)?; let mut cursor = Cursor::new(buf); let (decoded_num_rows, decoded_cols) = read_batch(&mut cursor, &batch.schema()) - .expect("read_batch") + ? .expect("non-empty"); assert_eq!( recover_named_batch(decoded_num_rows, &decoded_cols, batch.schema())?, @@ -845,7 +845,7 @@ mod test { write_batch(sliced.num_rows(), sliced.columns(), &mut buf)?; let mut cursor = Cursor::new(buf); let (decoded_num_rows, decoded_cols) = read_batch(&mut cursor, &batch.schema()) - .expect("read_batch") + ? .expect("non-empty"); assert_eq!( recover_named_batch(decoded_num_rows, &decoded_cols, sliced.schema())?, @@ -878,7 +878,7 @@ mod test { write_batch(batch.num_rows(), batch.columns(), &mut buf)?; let mut cursor = Cursor::new(buf); let (decoded_num_rows, decoded_cols) = read_batch(&mut cursor, &batch.schema()) - .expect("read_batch") + ? .expect("non-empty"); assert_eq!( recover_named_batch(decoded_num_rows, &decoded_cols, batch.schema())?, @@ -891,7 +891,7 @@ mod test { write_batch(sliced.num_rows(), sliced.columns(), &mut buf)?; let mut cursor = Cursor::new(buf); let (decoded_num_rows, decoded_cols) = read_batch(&mut cursor, &batch.schema()) - .expect("read_batch") + ? .expect("non-empty"); assert_eq!( recover_named_batch(decoded_num_rows, &decoded_cols, batch.schema())?, diff --git a/native-engine/datafusion-ext-commons/src/io/ipc_compression.rs b/native-engine/datafusion-ext-commons/src/io/ipc_compression.rs index 5d643af41..df3acdec6 100644 --- a/native-engine/datafusion-ext-commons/src/io/ipc_compression.rs +++ b/native-engine/datafusion-ext-commons/src/io/ipc_compression.rs @@ -341,16 +341,10 @@ mod tests { writer.finish_current_buf()?; let mut reader = IpcCompressionReader::new(Cursor::new(buf)); - let (num_rows1, arrays1) = reader - .read_batch(&schema) - .expect("read_batch") - .expect("non-empty"); + let (num_rows1, arrays1) = reader.read_batch(&schema)?.expect("non-empty"); assert_eq!(num_rows1, 2); assert_eq!(arrays1, &[test_array1]); - let (num_rows2, arrays2) = reader - .read_batch(&schema) - .expect("read_batch") - .expect("non-empty"); + let (num_rows2, arrays2) = reader.read_batch(&schema)?.expect("non-empty"); assert_eq!(num_rows2, 2); assert_eq!(arrays2, &[test_array2]); assert!(reader.read_batch(&schema)?.is_none()); diff --git a/native-engine/datafusion-ext-commons/src/io/scalar_serde.rs b/native-engine/datafusion-ext-commons/src/io/scalar_serde.rs index 87e897f70..ce28e2bf6 100644 --- a/native-engine/datafusion-ext-commons/src/io/scalar_serde.rs +++ b/native-engine/datafusion-ext-commons/src/io/scalar_serde.rs @@ -36,13 +36,13 @@ pub fn write_scalar(value: &ScalarValue, nullable: bool, output: &mut ($v:expr) => {{ if nullable { if let Some(v) = $v { - write_u8(1, output).expect("ok"); - output.write_all(&v.to_ne_bytes()).expect("ok"); + write_u8(1, output)?; + output.write_all(&v.to_ne_bytes())?; } else { - write_u8(0, output).expect("ok"); + write_u8(0, output)?; } } else { - output.write_all(&$v.expect("ok").to_ne_bytes()).expect("ok"); + output.write_all(&$v.expect("ok").to_ne_bytes())?; } }}; } From b582ab066bcae16082aca4cad932d9c5f3163eb6 Mon Sep 17 00:00:00 2001 From: yew1eb Date: Wed, 26 Nov 2025 00:21:35 +0800 Subject: [PATCH 08/10] format code style --- .../datafusion-ext-commons/src/arrow/cast.rs | 6 +-- .../src/io/batch_serde.rs | 40 ++++++++----------- .../datafusion-ext-commons/src/spark_hash.rs | 10 ++++- .../src/spark_null_if.rs | 5 ++- 4 files changed, 29 insertions(+), 32 deletions(-) diff --git a/native-engine/datafusion-ext-commons/src/arrow/cast.rs b/native-engine/datafusion-ext-commons/src/arrow/cast.rs index f3f565cdd..6882f9e83 100644 --- a/native-engine/datafusion-ext-commons/src/arrow/cast.rs +++ b/native-engine/datafusion-ext-commons/src/arrow/cast.rs @@ -166,11 +166,7 @@ pub fn cast_impl( col = struct_.column_by_name(adjust.as_str()); } if col.is_some() { - cast_impl( - col.expect("col"), - field.data_type(), - match_struct_fields, - ) + cast_impl(col.expect("col"), field.data_type(), match_struct_fields) } else { null_column_name.push(field.name().clone()); Ok(new_null_array(field.data_type(), struct_.len())) diff --git a/native-engine/datafusion-ext-commons/src/io/batch_serde.rs b/native-engine/datafusion-ext-commons/src/io/batch_serde.rs index 190955e8d..fec747fd7 100644 --- a/native-engine/datafusion-ext-commons/src/io/batch_serde.rs +++ b/native-engine/datafusion-ext-commons/src/io/batch_serde.rs @@ -702,9 +702,8 @@ mod test { let mut buf = vec![]; write_batch(batch.num_rows(), batch.columns(), &mut buf)?; let mut cursor = Cursor::new(buf); - let (decoded_num_rows, decoded_cols) = read_batch(&mut cursor, &batch.schema()) - ? - .expect("non-empty"); + let (decoded_num_rows, decoded_cols) = + read_batch(&mut cursor, &batch.schema())?.expect("non-empty"); assert_eq!( recover_named_batch(decoded_num_rows, &decoded_cols, batch.schema())?, batch @@ -715,9 +714,8 @@ mod test { let mut buf = vec![]; write_batch(sliced.num_rows(), sliced.columns(), &mut buf)?; let mut cursor = Cursor::new(buf); - let (decoded_num_rows, decoded_cols) = read_batch(&mut cursor, &batch.schema()) - ? - .expect("non-empty"); + let (decoded_num_rows, decoded_cols) = + read_batch(&mut cursor, &batch.schema())?.expect("non-empty"); assert_eq!( recover_named_batch(decoded_num_rows, &decoded_cols, batch.schema())?, sliced @@ -758,9 +756,8 @@ mod test { let mut buf = vec![]; write_batch(batch.num_rows(), batch.columns(), &mut buf)?; let mut cursor = Cursor::new(buf); - let (decoded_num_rows, decoded_cols) = read_batch(&mut cursor, &batch.schema()) - ? - .expect("non-empty"); + let (decoded_num_rows, decoded_cols) = + read_batch(&mut cursor, &batch.schema())?.expect("non-empty"); assert_batches_eq!( vec![ "+-----------+-----------+", @@ -784,9 +781,8 @@ mod test { let mut buf = vec![]; write_batch(sliced.num_rows(), sliced.columns(), &mut buf)?; let mut cursor = Cursor::new(buf); - let (decoded_num_rows, decoded_cols) = read_batch(&mut cursor, &batch.schema()) - ? - .expect("non-empty"); + let (decoded_num_rows, decoded_cols) = + read_batch(&mut cursor, &batch.schema())?.expect("non-empty"); assert_batches_eq!( vec![ "+----------+----------+", @@ -831,9 +827,8 @@ mod test { let mut buf = vec![]; write_batch(batch.num_rows(), batch.columns(), &mut buf)?; let mut cursor = Cursor::new(buf); - let (decoded_num_rows, decoded_cols) = read_batch(&mut cursor, &batch.schema()) - ? - .expect("non-empty"); + let (decoded_num_rows, decoded_cols) = + read_batch(&mut cursor, &batch.schema())?.expect("non-empty"); assert_eq!( recover_named_batch(decoded_num_rows, &decoded_cols, batch.schema())?, batch @@ -844,9 +839,8 @@ mod test { let mut buf = vec![]; write_batch(sliced.num_rows(), sliced.columns(), &mut buf)?; let mut cursor = Cursor::new(buf); - let (decoded_num_rows, decoded_cols) = read_batch(&mut cursor, &batch.schema()) - ? - .expect("non-empty"); + let (decoded_num_rows, decoded_cols) = + read_batch(&mut cursor, &batch.schema())?.expect("non-empty"); assert_eq!( recover_named_batch(decoded_num_rows, &decoded_cols, sliced.schema())?, sliced @@ -877,9 +871,8 @@ mod test { let mut buf = vec![]; write_batch(batch.num_rows(), batch.columns(), &mut buf)?; let mut cursor = Cursor::new(buf); - let (decoded_num_rows, decoded_cols) = read_batch(&mut cursor, &batch.schema()) - ? - .expect("non-empty"); + let (decoded_num_rows, decoded_cols) = + read_batch(&mut cursor, &batch.schema())?.expect("non-empty"); assert_eq!( recover_named_batch(decoded_num_rows, &decoded_cols, batch.schema())?, batch @@ -890,9 +883,8 @@ mod test { let mut buf = vec![]; write_batch(sliced.num_rows(), sliced.columns(), &mut buf)?; let mut cursor = Cursor::new(buf); - let (decoded_num_rows, decoded_cols) = read_batch(&mut cursor, &batch.schema()) - ? - .expect("non-empty"); + let (decoded_num_rows, decoded_cols) = + read_batch(&mut cursor, &batch.schema())?.expect("non-empty"); assert_eq!( recover_named_batch(decoded_num_rows, &decoded_cols, batch.schema())?, sliced diff --git a/native-engine/datafusion-ext-commons/src/spark_hash.rs b/native-engine/datafusion-ext-commons/src/spark_hash.rs index 18d05b583..830233d33 100644 --- a/native-engine/datafusion-ext-commons/src/spark_hash.rs +++ b/native-engine/datafusion-ext-commons/src/spark_hash.rs @@ -66,7 +66,10 @@ fn hash_array( macro_rules! hash_array { ($array_type:ident, $column:ident, $hashes:ident, $h:expr) => { - let array = $column.as_any().downcast_ref::<$array_type>().expect("downcast"); + let array = $column + .as_any() + .downcast_ref::<$array_type>() + .expect("downcast"); if array.null_count() == 0 { for (i, hash) in $hashes.iter_mut().enumerate() { *hash = $h(&array.value(i).as_ref(), *hash); @@ -83,7 +86,10 @@ fn hash_array( macro_rules! hash_array_primitive { ($array_type:ident, $column:ident, $ty:ident, $hashes:ident, $h:expr) => { - let array = $column.as_any().downcast_ref::<$array_type>().expect("downcast"); + let array = $column + .as_any() + .downcast_ref::<$array_type>() + .expect("downcast"); let values = array.values(); if array.null_count() == 0 { diff --git a/native-engine/datafusion-ext-functions/src/spark_null_if.rs b/native-engine/datafusion-ext-functions/src/spark_null_if.rs index bbca3f3a9..b8a999f70 100644 --- a/native-engine/datafusion-ext-functions/src/spark_null_if.rs +++ b/native-engine/datafusion-ext-functions/src/spark_null_if.rs @@ -93,7 +93,10 @@ pub fn spark_null_if_zero(args: &[ColumnarValue]) -> Result { macro_rules! handle_decimal { ($dt:ident, $precision:expr, $scale:expr) => {{ type T = paste::paste! {arrow::datatypes::[<$dt Type>]}; - let array = array.as_any().downcast_ref::>().expect("Expected a PrimitiveArray"); + let array = array + .as_any() + .downcast_ref::>() + .expect("Expected a PrimitiveArray"); let _0 = ::Native::from_le_bytes([0; T::BYTE_LENGTH]); let filtered = array.iter().map(|v| v.filter(|v| *v != _0)); Arc::new( From 2250843df55db0df7ae0c1f1b867b14f05bfe502 Mon Sep 17 00:00:00 2001 From: yew1eb Date: Tue, 2 Dec 2025 01:12:25 +0800 Subject: [PATCH 09/10] up --- .github/workflows/tpcds-reusable.yml | 2 +- Cargo.lock | 1 + dev/mvn-build-helper/build-native.sh | 2 +- .../auron-jni-bridge/src/jni_bridge.rs | 16 ++- native-engine/auron-jni-bridge/src/lib.rs | 4 +- native-engine/auron-serde/src/from_proto.rs | 55 ++++--- native-engine/auron/Cargo.toml | 1 + native-engine/auron/src/alloc.rs | 9 +- native-engine/auron/src/exec.rs | 7 +- native-engine/auron/src/http/mod.rs | 9 +- native-engine/auron/src/http/pprof.rs | 2 +- native-engine/auron/src/lib.rs | 2 +- .../datafusion-ext-commons/src/arrow/cast.rs | 44 +++--- .../src/arrow/eq_comparator.rs | 10 +- .../src/io/batch_serde.rs | 45 +++--- .../src/io/ipc_compression.rs | 4 +- .../datafusion-ext-commons/src/spark_hash.rs | 18 +-- .../src/bloom_filter_might_contain.rs | 2 +- .../datafusion-ext-exprs/src/cast.rs | 9 +- .../src/string_contains.rs | 6 +- .../src/string_ends_with.rs | 7 +- .../src/string_starts_with.rs | 7 +- .../src/spark_crypto.rs | 2 +- .../src/spark_get_json_object.rs | 134 +++++++++++++----- .../src/spark_strings.rs | 5 +- .../datafusion-ext-plans/src/agg/agg_ctx.rs | 2 +- .../datafusion-ext-plans/src/agg/agg_table.rs | 4 +- .../src/agg/bloom_filter.rs | 2 +- .../datafusion-ext-plans/src/agg/collect.rs | 8 +- .../src/agg/spark_udaf_wrapper.rs | 8 +- .../src/common/cached_exprs_evaluator.rs | 2 +- .../src/common/column_pruning.rs | 4 +- .../src/common/execution_context.rs | 2 +- .../src/common/row_null_checker.rs | 4 +- .../datafusion-ext-plans/src/generate_exec.rs | 2 +- .../src/joins/join_hash_map.rs | 4 +- .../datafusion-ext-plans/src/parquet_exec.rs | 2 +- .../src/parquet_sink_exec.rs | 2 +- .../src/shuffle/buffered_data.rs | 3 +- .../src/shuffle/single_repartitioner.rs | 2 +- .../datafusion-ext-plans/src/sort_exec.rs | 8 +- .../src/window/window_context.rs | 4 +- 42 files changed, 281 insertions(+), 185 deletions(-) diff --git a/.github/workflows/tpcds-reusable.yml b/.github/workflows/tpcds-reusable.yml index 7cbb18241..91f12fc05 100644 --- a/.github/workflows/tpcds-reusable.yml +++ b/.github/workflows/tpcds-reusable.yml @@ -155,7 +155,7 @@ jobs: - name: Cargo clippy run: | -# First eliminate unwrap; then enable -D warnings to enforce all lints. +# First eliminate unwrap; then enable -D warnings to enforce all default lints. cargo clippy --all-targets --workspace -- -A warnings -A clippy::all -D clippy::unwrap_used - name: Cargo test diff --git a/Cargo.lock b/Cargo.lock index b915a5f51..75b25c4e7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -360,6 +360,7 @@ dependencies = [ "log", "once_cell", "panic-message", + "parking_lot", "poem", "pprof", "prost 0.14.1", diff --git a/dev/mvn-build-helper/build-native.sh b/dev/mvn-build-helper/build-native.sh index d2b16aab4..c378f775a 100755 --- a/dev/mvn-build-helper/build-native.sh +++ b/dev/mvn-build-helper/build-native.sh @@ -85,7 +85,7 @@ if [ ! -f "$cache_libpath" ] || [ "$new_checksum" != "$old_checksum" ]; then cargo fmt --all -q -- 2>&1 echo "Running cargo clippy..." - # First eliminate unwrap; then enable -D warnings to enforce all lints. + # First eliminate unwrap; then enable -D warnings to enforce all default lints. cargo clippy --all-targets --workspace -- -A warnings -A clippy::all -D clippy::unwrap_used 2>&1 echo "Building native with [$profile] profile..." diff --git a/native-engine/auron-jni-bridge/src/jni_bridge.rs b/native-engine/auron-jni-bridge/src/jni_bridge.rs index 75169bc15..4305d33c1 100644 --- a/native-engine/auron-jni-bridge/src/jni_bridge.rs +++ b/native-engine/auron-jni-bridge/src/jni_bridge.rs @@ -87,9 +87,13 @@ macro_rules! jni_map_error_with_env { match $result { Ok(result) => $crate::jni_bridge::datafusion::error::Result::Ok(result), Err($crate::jni_bridge::jni::errors::Error::JavaException) => { - let ex = $env.exception_occurred().expect("ok"); - $env.exception_describe().expect("ok"); - $env.exception_clear().expect("ok"); + let ex = $env + .exception_occurred() + .expect("failed to obtain pending Java execption object"); + $env.exception_describe() + .expect("failed to print Java exception to stderr"); + $env.exception_clear() + .expect("failed to clear pending Java exception"); let message_obj = $env .call_method_unchecked( ex, @@ -102,13 +106,13 @@ macro_rules! jni_map_error_with_env { .clone(), &[], ) - .expect("ok") + .expect("call Java Throwable.toString() failed") .l() - .expect("ok"); + .expect("expected object return from Throwable.toString()"); let message = $env .get_string(message_obj.into()) .map(|s| String::from(s)) - .expect("ok"); + .expect("failed to read Throwable.toString() result as Java string"); Err( $crate::jni_bridge::datafusion::error::DataFusionError::External( diff --git a/native-engine/auron-jni-bridge/src/lib.rs b/native-engine/auron-jni-bridge/src/lib.rs index 6bb3b9866..7bdb77a0e 100644 --- a/native-engine/auron-jni-bridge/src/lib.rs +++ b/native-engine/auron-jni-bridge/src/lib.rs @@ -34,7 +34,9 @@ pub fn ensure_jni_bridge_inited() -> Result<()> { pub fn is_task_running() -> bool { fn is_task_running_impl() -> Result { - if !jni_call_static!(JniBridge.isTaskRunning() -> bool)? { + if !jni_call_static!(JniBridge.isTaskRunning() -> bool) + .expect("calling JniBridge.isTaskRunning() error") + { jni_exception_clear!()?; return Ok(false); } diff --git a/native-engine/auron-serde/src/from_proto.rs b/native-engine/auron-serde/src/from_proto.rs index d5cb2ee20..804e72d89 100644 --- a/native-engine/auron-serde/src/from_proto.rs +++ b/native-engine/auron-serde/src/from_proto.rs @@ -151,8 +151,11 @@ impl TryInto> for &protobuf::PhysicalPlanNode { Ok(Arc::new(FilterExec::try_new(predicates, input)?)) } PhysicalPlanType::ParquetScan(scan) => { - let conf: FileScanConfig = - scan.base_conf.as_ref().expect("base_conf").try_into()?; + let conf: FileScanConfig = scan + .base_conf + .as_ref() + .expect("base_conf must be set for ParquetScan") + .try_into()?; let predicate = scan .pruning_predicates .iter() @@ -169,8 +172,11 @@ impl TryInto> for &protobuf::PhysicalPlanNode { ))) } PhysicalPlanType::OrcScan(scan) => { - let conf: FileScanConfig = - scan.base_conf.as_ref().expect("base_conf").try_into()?; + let conf: FileScanConfig = scan + .base_conf + .as_ref() + .expect("base_conf must be set for OrcScan") + .try_into()?; let predicate = scan .pruning_predicates .iter() @@ -195,11 +201,15 @@ impl TryInto> for &protobuf::PhysicalPlanNode { .iter() .map(|col| { let left_key = try_parse_physical_expr( - &col.left.as_ref().expect("left"), + &col.left + .as_ref() + .expect("hash join: left join key must be present"), &left.schema(), )?; let right_key = try_parse_physical_expr( - &col.right.as_ref().expect("right"), + &col.right + .as_ref() + .expect("hash join: right join key must be present"), &right.schema(), )?; Ok((left_key, right_key)) @@ -236,11 +246,15 @@ impl TryInto> for &protobuf::PhysicalPlanNode { .iter() .map(|col| { let left_key = try_parse_physical_expr( - &col.left.as_ref().expect("left"), + &col.left + .as_ref() + .expect("sort-merge join: left join key must be present"), &left.schema(), )?; let right_key = try_parse_physical_expr( - &col.right.as_ref().expect("right"), + &col.right + .as_ref() + .expect("sort-merge join: right join key must be present"), &right.schema(), )?; Ok((left_key, right_key)) @@ -280,7 +294,7 @@ impl TryInto> for &protobuf::PhysicalPlanNode { Ok(Arc::new(ShuffleWriterExec::try_new( input, - output_partitioning.expect("output_partitioning"), + output_partitioning.expect("shuffle writer: output_partitioning must be set"), shuffle_writer.output_data_file.clone(), shuffle_writer.output_index_file.clone(), )?)) @@ -295,7 +309,8 @@ impl TryInto> for &protobuf::PhysicalPlanNode { )?; Ok(Arc::new(RssShuffleWriterExec::try_new( input, - output_partitioning.expect("output_partitioning"), + output_partitioning + .expect("rss shuffle writer: output_partitioning must be set"), rss_shuffle_writer.rss_partition_writer_resource_id.clone(), )?)) } @@ -350,11 +365,15 @@ impl TryInto> for &protobuf::PhysicalPlanNode { .iter() .map(|col| { let left_key = try_parse_physical_expr( - &col.left.as_ref().expect("left"), + &col.left + .as_ref() + .expect("broadcast join: left join key must be present"), &left.schema(), )?; let right_key = try_parse_physical_expr( - &col.right.as_ref().expect("right"), + &col.right + .as_ref() + .expect("broadcast join: right join key must be present"), &right.schema(), )?; Ok((left_key, right_key)) @@ -485,7 +504,7 @@ impl TryInto> for &protobuf::PhysicalPlanNode { let agg = match AggFunction::from(agg_function) { AggFunction::Udaf => { - let udaf = agg_node.udaf.as_ref().expect("udaf"); + let udaf = agg_node.udaf.as_ref().expect("udaf missing"); let serialized = udaf.serialized.clone(); create_udaf_agg(serialized, return_type, agg_children_exprs)? } @@ -710,7 +729,7 @@ impl TryInto> for &protobuf::PhysicalPlanNode { children, )?, GenerateFunction::Udtf => { - let udtf = pb_generator.udtf.as_ref().expect("udtf"); + let udtf = pb_generator.udtf.as_ref().expect("udtf missing"); let serialized = udtf.serialized.clone(); let return_schema = Arc::new(convert_required!(udtf.return_schema)?); create_udtf_generator(serialized, return_schema, children)? @@ -1153,7 +1172,7 @@ pub fn parse_protobuf_partitioning( hash_part .partition_count .try_into() - .expect("partition_count"), + .expect("hash repartition: invalid partition_count"), ))) } @@ -1162,7 +1181,7 @@ pub fn parse_protobuf_partitioning( round_robin_part .partition_count .try_into() - .expect("partition_count"), + .expect("round-robin repartition: invalid partition_count"), ))) } @@ -1170,7 +1189,7 @@ pub fn parse_protobuf_partitioning( if range_part.partition_count == 1 { Ok(Some(Partitioning::SinglePartitioning())) } else { - let sort = range_part.sort_expr.clone().expect("sort_expr"); + let sort = range_part.sort_expr.clone().expect("sort_expr missing"); let exprs = try_parse_physical_sort_expr(&input, &sort).unwrap_or_else(|e| { panic!("Failed to parse physical sort expressions: {}", e); }); @@ -1210,7 +1229,7 @@ pub fn parse_protobuf_partitioning( range_part .partition_count .try_into() - .expect("partition_count"), + .expect("range partition: invalid partition_count"), Arc::new(bound_rows), ))) } diff --git a/native-engine/auron/Cargo.toml b/native-engine/auron/Cargo.toml index 9e3f1353c..a8d338210 100644 --- a/native-engine/auron/Cargo.toml +++ b/native-engine/auron/Cargo.toml @@ -47,6 +47,7 @@ panic-message = { workspace = true } prost = { workspace = true } tokio = { workspace = true } chrono = { workspace = true } +parking_lot = { workspace = true } [dependencies.tikv-jemalloc-ctl] version = "0.6.1" diff --git a/native-engine/auron/src/alloc.rs b/native-engine/auron/src/alloc.rs index 75ce62b49..9d9767212 100644 --- a/native-engine/auron/src/alloc.rs +++ b/native-engine/auron/src/alloc.rs @@ -20,12 +20,11 @@ use std::{ alloc::{GlobalAlloc, Layout}, - sync::{ - Mutex, - atomic::{AtomicUsize, Ordering::SeqCst}, - }, + sync::atomic::{AtomicUsize, Ordering::SeqCst}, }; +use parking_lot::Mutex; + #[cfg(any(feature = "jemalloc", feature = "jemalloc-pprof"))] #[cfg(not(windows))] #[cfg_attr(not(windows), global_allocator)] @@ -57,7 +56,7 @@ impl DebugAlloc { } fn update(&self) { - let _lock = self.mutex.lock().expect("lock"); + let _lock = self.mutex.lock(); let current = self.current.load(SeqCst); let last_updated = self.last_updated.load(SeqCst); let delta = (current as isize - last_updated as isize).abs(); diff --git a/native-engine/auron/src/exec.rs b/native-engine/auron/src/exec.rs index dcf43ffcd..af0db9cf0 100644 --- a/native-engine/auron/src/exec.rs +++ b/native-engine/auron/src/exec.rs @@ -65,7 +65,7 @@ pub extern "system" fn Java_org_apache_auron_jni_JniBridge_callNative( let log_level = env .get_string(log_level) .map(|s| String::from(s)) - .expect("log_level"); + .expect("init: failed to read log_level from env"); eprintln!("initializing logging with level: {}", log_level); init_logging(log_level.as_str()); @@ -106,7 +106,10 @@ pub extern "system" fn Java_org_apache_auron_jni_JniBridge_callNative( // create execution runtime let runtime = Box::new(NativeExecutionRuntime::start( native_wrapper, - SESSION.get().expect("session").task_ctx(), + SESSION + .get() + .expect("session must be initialized") + .task_ctx(), )?); // returns runtime raw pointer diff --git a/native-engine/auron/src/http/mod.rs b/native-engine/auron/src/http/mod.rs index 0fa27cae9..1c51d02b1 100644 --- a/native-engine/auron/src/http/mod.rs +++ b/native-engine/auron/src/http/mod.rs @@ -18,9 +18,8 @@ mod memory_profiling; #[cfg(feature = "jemalloc-pprof")] mod pprof; -use std::sync::Mutex; - use once_cell::sync::OnceCell; +use parking_lot::Mutex; use poem::{Route, RouteMethod, Server, listener::TcpListener}; pub static HTTP_SERVICE: OnceCell = OnceCell::new(); @@ -50,7 +49,7 @@ impl DefaultHTTPServer { .worker_threads(1) .enable_io() .build() - .expect("runtime"), + .expect("fast fail: error initializing tokio runtime"), handlers: Mutex::new(vec![]), } } @@ -66,7 +65,7 @@ impl HTTPServer for DefaultHTTPServer { fn start(&self) { if let Some(port) = find_available_port() { let mut app = Route::new(); - let handlers = self.handlers.lock().expect("lock"); + let handlers = self.handlers.lock(); for handler in handlers.iter() { app = app.at(handler.get_route_path(), handler.get_route_method()); } @@ -83,7 +82,7 @@ impl HTTPServer for DefaultHTTPServer { } fn register_handler(&self, handler: Box) { - let mut handlers = self.handlers.lock().expect("lock"); + let mut handlers = self.handlers.lock(); handlers.push(handler); } } diff --git a/native-engine/auron/src/http/pprof.rs b/native-engine/auron/src/http/pprof.rs index 66ab76aa8..a7a0b3b0a 100644 --- a/native-engine/auron/src/http/pprof.rs +++ b/native-engine/auron/src/http/pprof.rs @@ -34,7 +34,7 @@ impl Default for PProfRequest { fn default() -> Self { PProfRequest { seconds: 5, - frequency: NonZeroI32::new(100).expect("nonzero"), + frequency: NonZeroI32::new(100).expect("non-zero frequency"), } } } diff --git a/native-engine/auron/src/lib.rs b/native-engine/auron/src/lib.rs index 46cc82adc..2dfb336e0 100644 --- a/native-engine/auron/src/lib.rs +++ b/native-engine/auron/src/lib.rs @@ -55,7 +55,7 @@ fn handle_unwinded(err: Box) { } fn handle_unwinded_scope(scope: impl FnOnce() -> Result) -> T { - match std::panic::catch_unwind(AssertUnwindSafe(|| scope().expect("scope"))) { + match std::panic::catch_unwind(AssertUnwindSafe(|| scope().expect("scope failed"))) { Ok(v) => v, Err(err) => { handle_unwinded(err); diff --git a/native-engine/datafusion-ext-commons/src/arrow/cast.rs b/native-engine/datafusion-ext-commons/src/arrow/cast.rs index 6882f9e83..293f76be1 100644 --- a/native-engine/datafusion-ext-commons/src/arrow/cast.rs +++ b/native-engine/datafusion-ext-commons/src/arrow/cast.rs @@ -166,7 +166,11 @@ pub fn cast_impl( col = struct_.column_by_name(adjust.as_str()); } if col.is_some() { - cast_impl(col.expect("col"), field.data_type(), match_struct_fields) + cast_impl( + col.expect("column missing"), + field.data_type(), + match_struct_fields, + ) } else { null_column_name.push(field.name().clone()); Ok(new_null_array(field.data_type(), struct_.len())) @@ -255,13 +259,10 @@ fn try_cast_string_array_to_integer(array: &dyn Array, cast_type: &DataType) -> macro_rules! cast { ($target_type:ident) => {{ type B = paste::paste! {[<$target_type Builder>]}; - let array = array - .as_any() - .downcast_ref::() - .expect("Excepted a StringArray"); + let string_array = as_string_array(array); let mut builder = B::new(); - for v in array.iter() { + for v in string_array.iter() { match v { Some(s) => builder.append_option(to_integer(s)), None => builder.append_null(), @@ -311,7 +312,7 @@ fn to_integer(input: &str) } let separator = b'.'; - let radix = T::from_usize(10).expect("from_usize(10)"); + let radix = T::from_usize(10).expect("from_usize(10) failed"); let stop_value = T::min_value() / radix; let mut result = T::zero(); @@ -339,7 +340,7 @@ fn to_integer(input: &str) return None; } - result = result * radix - T::from_u8(digit).expect("digit 0..=9"); + result = result * radix - T::from_u8(digit).expect("digit must be in 0..=9"); // Since the previous result is less than or equal to stopValue(Long.MIN_VALUE / // radix), we can just use `result > 0` to check overflow. If result // overflows, we should stop. @@ -431,14 +432,15 @@ fn to_date(s: &str) -> Option { #[cfg(test)] mod test { - use std::{error::Error, result::Result}; - - use datafusion::common::cast::{as_decimal128_array, as_float64_array, as_int32_array}; + use datafusion::common::{ + Result, + cast::{as_decimal128_array, as_float64_array, as_int32_array}, + }; use super::*; #[test] - fn test_boolean_to_string() -> Result<(), Box> { + fn test_boolean_to_string() -> Result<()> { let bool_array: ArrayRef = Arc::new(BooleanArray::from_iter(vec![None, Some(true), Some(false)])); let casted = cast(&bool_array, &DataType::Utf8)?; @@ -450,7 +452,7 @@ mod test { } #[test] - fn test_float_to_int() -> Result<(), Box> { + fn test_float_to_int() -> Result<()> { let f64_array: ArrayRef = Arc::new(Float64Array::from_iter(vec![ None, Some(123.456), @@ -479,7 +481,7 @@ mod test { } #[test] - fn test_int_to_float() -> Result<(), Box> { + fn test_int_to_float() -> Result<()> { let i32_array: ArrayRef = Arc::new(Int32Array::from_iter(vec![ None, Some(123), @@ -502,7 +504,7 @@ mod test { } #[test] - fn test_int_to_decimal() -> Result<(), Box> { + fn test_int_to_decimal() -> Result<()> { let i32_array: ArrayRef = Arc::new(Int32Array::from_iter(vec![ None, Some(123), @@ -526,7 +528,7 @@ mod test { } #[test] - fn test_string_to_decimal() -> Result<(), Box> { + fn test_string_to_decimal() -> Result<()> { let string_array: ArrayRef = Arc::new(StringArray::from_iter(vec![ None, Some("1e-8"), @@ -558,7 +560,7 @@ mod test { } #[test] - fn test_decimal_to_string() -> Result<(), Box> { + fn test_decimal_to_string() -> Result<()> { let decimal_array: ArrayRef = Arc::new( Decimal128Array::from_iter(vec![ None, @@ -575,7 +577,7 @@ mod test { casted .as_any() .downcast_ref::() - .ok_or("StringArray")?, + .expect("Expected a StringArray"), &StringArray::from_iter(vec![ None, Some("123.000000000000000000"), @@ -589,7 +591,7 @@ mod test { } #[test] - fn test_string_to_bigint() -> Result<(), Box> { + fn test_string_to_bigint() -> Result<()> { let string_array: ArrayRef = Arc::new(StringArray::from_iter(vec![ None, Some("123"), @@ -604,7 +606,7 @@ mod test { casted .as_any() .downcast_ref::() - .ok_or("Int64Array")?, + .expect("Expected a Int64Array"), &Int64Array::from_iter(vec![ None, Some(123), @@ -619,7 +621,7 @@ mod test { } #[test] - fn test_string_to_date() -> Result<(), Box> { + fn test_string_to_date() -> Result<()> { let string_array: ArrayRef = Arc::new(StringArray::from_iter(vec![ None, Some("2001-02-03"), diff --git a/native-engine/datafusion-ext-commons/src/arrow/eq_comparator.rs b/native-engine/datafusion-ext-commons/src/arrow/eq_comparator.rs index 0cbc07401..1cccb9e7c 100644 --- a/native-engine/datafusion-ext-commons/src/arrow/eq_comparator.rs +++ b/native-engine/datafusion-ext-commons/src/arrow/eq_comparator.rs @@ -275,8 +275,14 @@ fn eq_fixed_list( ignores_null, )?; - let l_size = left.value_length().to_usize().expect("left len to_usize"); - let r_size = right.value_length().to_usize().expect("right len to_usize"); + let l_size = left + .value_length() + .to_usize() + .expect("left length to_usize failed"); + let r_size = right + .value_length() + .to_usize() + .expect("right length to_usize failed"); let size_eq = l_size == r_size; let f = eq_impl(left, right, ignores_null, move |i, j| { diff --git a/native-engine/datafusion-ext-commons/src/io/batch_serde.rs b/native-engine/datafusion-ext-commons/src/io/batch_serde.rs index fec747fd7..bbed9b365 100644 --- a/native-engine/datafusion-ext-commons/src/io/batch_serde.rs +++ b/native-engine/datafusion-ext-commons/src/io/batch_serde.rs @@ -48,7 +48,9 @@ impl TransposeOpt { DataType::Null => 0, DataType::Boolean => 0, dt if dt.primitive_width() == Some(1) => 0, - dt if dt.primitive_width() >= Some(2) => dt.primitive_width().expect("width"), + dt if dt.primitive_width() >= Some(2) => { + dt.primitive_width().expect("primitive_width must be Some") + } DataType::Utf8 | DataType::Binary => 4, DataType::List(f) | DataType::Map(f, _) => { Self::data_type_bytes_width(f.data_type()).max(4) @@ -390,7 +392,7 @@ fn read_list_array( }; let offsets = read_offsets(input, num_rows, transpose_opt)?; - let values_len = offsets.last().cloned().expect("offsets is non-empty") as usize; + let values_len = offsets.last().cloned().expect("offsets must be non-empty") as usize; let offsets_buffer: Buffer = Buffer::from_vec(offsets); let values = read_array( input, @@ -434,11 +436,11 @@ fn write_map_array( let first_offset = value_offsets .first() .cloned() - .expect("value_offsets is non-empty") as usize; + .expect("value_offsets must be non-empty") as usize; let entries_len = value_offsets .last() .cloned() - .expect("value_offsets is non-empty") as usize + .expect("value_offsets must be non-empty") as usize - first_offset; let keys = array.keys().slice(first_offset, entries_len); let values = array.values().slice(first_offset, entries_len); @@ -465,7 +467,7 @@ fn read_map_array( }; let offsets = read_offsets(input, num_rows, transpose_opt)?; - let entries_len = offsets.last().cloned().expect("offsets is non-empty") as usize; + let entries_len = offsets.last().cloned().expect("offsets must be non-empty") as usize; let offsets_buffer = Buffer::from_vec(offsets); // build inner struct @@ -622,11 +624,11 @@ fn write_bytes_array, W: Write>( let first_offset = value_offsets .first() .cloned() - .expect("value_offsets is non-empty") as usize; + .expect("value_offsets must be non-empty") as usize; let last_offset = value_offsets .last() .cloned() - .expect("value_offsets is non-empty") as usize; + .expect("value_offsets must be non-empty") as usize; output.write_all(&array.value_data()[first_offset..last_offset])?; Ok(()) } @@ -645,7 +647,7 @@ fn read_bytes_array( }; let offsets = read_offsets(input, num_rows, transpose_opt)?; - let values_len = offsets.last().cloned().expect("offsets is non-empty") as usize; + let values_len = offsets.last().cloned().expect("offsets must be non-empty") as usize; let offsets_buffer = Buffer::from_vec(offsets); let data_buffer = Buffer::from_vec(read_bytes_slice(input, values_len)?.into()); @@ -665,7 +667,7 @@ mod test { use std::{io::Cursor, sync::Arc}; use arrow::{array::*, datatypes::*, record_batch::RecordBatch}; - use datafusion::assert_batches_eq; + use datafusion::{assert_batches_eq, common::Result}; use crate::io::{ batch_serde::{read_batch, write_batch}, @@ -673,7 +675,7 @@ mod test { }; #[test] - fn test_write_and_read_batch() -> std::result::Result<(), Box> { + fn test_write_and_read_batch() -> Result<()> { let array1: ArrayRef = Arc::new(StringArray::from_iter([ Some("20220101".to_owned()), Some("20220102你好🍹".to_owned()), @@ -703,7 +705,7 @@ mod test { write_batch(batch.num_rows(), batch.columns(), &mut buf)?; let mut cursor = Cursor::new(buf); let (decoded_num_rows, decoded_cols) = - read_batch(&mut cursor, &batch.schema())?.expect("non-empty"); + read_batch(&mut cursor, &batch.schema())?.expect("non-empty batch"); assert_eq!( recover_named_batch(decoded_num_rows, &decoded_cols, batch.schema())?, batch @@ -715,7 +717,7 @@ mod test { write_batch(sliced.num_rows(), sliced.columns(), &mut buf)?; let mut cursor = Cursor::new(buf); let (decoded_num_rows, decoded_cols) = - read_batch(&mut cursor, &batch.schema())?.expect("non-empty"); + read_batch(&mut cursor, &batch.schema())?.expect("non-empty batch"); assert_eq!( recover_named_batch(decoded_num_rows, &decoded_cols, batch.schema())?, sliced @@ -724,7 +726,7 @@ mod test { } #[test] - fn test_write_and_read_batch_for_list() -> std::result::Result<(), Box> { + fn test_write_and_read_batch_for_list() -> Result<()> { let data = vec![ Some(vec![Some(0), Some(1), Some(2)]), None, @@ -757,7 +759,7 @@ mod test { write_batch(batch.num_rows(), batch.columns(), &mut buf)?; let mut cursor = Cursor::new(buf); let (decoded_num_rows, decoded_cols) = - read_batch(&mut cursor, &batch.schema())?.expect("non-empty"); + read_batch(&mut cursor, &batch.schema())?.expect("non-empty batch"); assert_batches_eq!( vec![ "+-----------+-----------+", @@ -782,7 +784,7 @@ mod test { write_batch(sliced.num_rows(), sliced.columns(), &mut buf)?; let mut cursor = Cursor::new(buf); let (decoded_num_rows, decoded_cols) = - read_batch(&mut cursor, &batch.schema())?.expect("non-empty"); + read_batch(&mut cursor, &batch.schema())?.expect("non-empty batch"); assert_batches_eq!( vec![ "+----------+----------+", @@ -802,7 +804,7 @@ mod test { } #[test] - fn test_write_and_read_batch_for_map() -> std::result::Result<(), Box> { + fn test_write_and_read_batch_for_map() -> Result<()> { let map_array: ArrayRef = Arc::new(MapArray::new_from_strings( ["00", "11", "22", "33", "44", "55", "66", "77"].into_iter(), &StringArray::from(vec![ @@ -828,7 +830,7 @@ mod test { write_batch(batch.num_rows(), batch.columns(), &mut buf)?; let mut cursor = Cursor::new(buf); let (decoded_num_rows, decoded_cols) = - read_batch(&mut cursor, &batch.schema())?.expect("non-empty"); + read_batch(&mut cursor, &batch.schema())?.expect("non-empty batch"); assert_eq!( recover_named_batch(decoded_num_rows, &decoded_cols, batch.schema())?, batch @@ -840,7 +842,7 @@ mod test { write_batch(sliced.num_rows(), sliced.columns(), &mut buf)?; let mut cursor = Cursor::new(buf); let (decoded_num_rows, decoded_cols) = - read_batch(&mut cursor, &batch.schema())?.expect("non-empty"); + read_batch(&mut cursor, &batch.schema())?.expect("non-empty batch"); assert_eq!( recover_named_batch(decoded_num_rows, &decoded_cols, sliced.schema())?, sliced @@ -849,8 +851,7 @@ mod test { } #[test] - fn test_write_and_read_batch_for_struct() -> std::result::Result<(), Box> - { + fn test_write_and_read_batch_for_struct() -> Result<()> { let c1: ArrayRef = Arc::new(BooleanArray::from(vec![false, false, true, true])); let c2: ArrayRef = Arc::new(Int32Array::from(vec![42, 28, 19, 31])); let c3: ArrayRef = Arc::new(BooleanArray::from(vec![None, None, None, Some(true)])); @@ -872,7 +873,7 @@ mod test { write_batch(batch.num_rows(), batch.columns(), &mut buf)?; let mut cursor = Cursor::new(buf); let (decoded_num_rows, decoded_cols) = - read_batch(&mut cursor, &batch.schema())?.expect("non-empty"); + read_batch(&mut cursor, &batch.schema())?.expect("non-empty batch"); assert_eq!( recover_named_batch(decoded_num_rows, &decoded_cols, batch.schema())?, batch @@ -884,7 +885,7 @@ mod test { write_batch(sliced.num_rows(), sliced.columns(), &mut buf)?; let mut cursor = Cursor::new(buf); let (decoded_num_rows, decoded_cols) = - read_batch(&mut cursor, &batch.schema())?.expect("non-empty"); + read_batch(&mut cursor, &batch.schema())?.expect("non-empty batch"); assert_eq!( recover_named_batch(decoded_num_rows, &decoded_cols, batch.schema())?, sliced diff --git a/native-engine/datafusion-ext-commons/src/io/ipc_compression.rs b/native-engine/datafusion-ext-commons/src/io/ipc_compression.rs index df3acdec6..451a4cd8f 100644 --- a/native-engine/datafusion-ext-commons/src/io/ipc_compression.rs +++ b/native-engine/datafusion-ext-commons/src/io/ipc_compression.rs @@ -341,10 +341,10 @@ mod tests { writer.finish_current_buf()?; let mut reader = IpcCompressionReader::new(Cursor::new(buf)); - let (num_rows1, arrays1) = reader.read_batch(&schema)?.expect("non-empty"); + let (num_rows1, arrays1) = reader.read_batch(&schema)?.expect("non-empty batch"); assert_eq!(num_rows1, 2); assert_eq!(arrays1, &[test_array1]); - let (num_rows2, arrays2) = reader.read_batch(&schema)?.expect("non-empty"); + let (num_rows2, arrays2) = reader.read_batch(&schema)?.expect("non-empty batch"); assert_eq!(num_rows2, 2); assert_eq!(arrays2, &[test_array2]); assert!(reader.read_batch(&schema)?.is_none()); diff --git a/native-engine/datafusion-ext-commons/src/spark_hash.rs b/native-engine/datafusion-ext-commons/src/spark_hash.rs index 830233d33..66be98486 100644 --- a/native-engine/datafusion-ext-commons/src/spark_hash.rs +++ b/native-engine/datafusion-ext-commons/src/spark_hash.rs @@ -69,7 +69,7 @@ fn hash_array( let array = $column .as_any() .downcast_ref::<$array_type>() - .expect("downcast"); + .expect("downcast to expected array type failed"); if array.null_count() == 0 { for (i, hash) in $hashes.iter_mut().enumerate() { *hash = $h(&array.value(i).as_ref(), *hash); @@ -89,7 +89,7 @@ fn hash_array( let array = $column .as_any() .downcast_ref::<$array_type>() - .expect("downcast"); + .expect("downcast to expected array type failed"); let values = array.values(); if array.null_count() == 0 { @@ -111,7 +111,7 @@ fn hash_array( let array = $column .as_any() .downcast_ref::<$array_type>() - .expect("downcast"); + .expect("downcast to expected array type failed"); if array.null_count() == 0 { for (i, hash) in $hashes.iter_mut().enumerate() { @@ -256,7 +256,7 @@ fn hash_one( let array = $column .as_any() .downcast_ref::<$array_type>() - .expect("downcast"); + .expect("downcast to expected array type failed"); *$hash = $h( (array.value($idx as usize) as $ty).to_le_bytes().as_ref(), *$hash, @@ -269,7 +269,7 @@ fn hash_one( let array = $column .as_any() .downcast_ref::<$array_type>() - .expect("downcast"); + .expect("downcast to expected array type failed"); *$hash = $h(&array.value($idx as usize).as_ref(), *$hash); }; } @@ -279,7 +279,7 @@ fn hash_one( let array = $column .as_any() .downcast_ref::<$array_type>() - .expect("downcast"); + .expect("downcast to expected array type failed"); *$hash = $h(array.value($idx as usize).to_le_bytes().as_ref(), *$hash); }; } @@ -389,7 +389,7 @@ fn hash_one( #[cfg(test)] mod tests { - use std::sync::Arc; + use std::{error::Error, sync::Arc}; use arrow::{ array::{ @@ -518,7 +518,7 @@ mod tests { } #[test] - fn test_list_array() -> Result<(), Box> { + fn test_list_array() -> Result<(), Box> { // Create inner array data: [1, 2, 3, 4, 5, 6] let value_data = ArrayData::builder(DataType::Int32) .len(6) @@ -545,7 +545,7 @@ mod tests { } #[test] - fn test_map_array() -> Result<(), Box> { + fn test_map_array() -> Result<(), Box> { // Construct key and values let key_data = ArrayData::builder(DataType::Int32) .len(8) diff --git a/native-engine/datafusion-ext-exprs/src/bloom_filter_might_contain.rs b/native-engine/datafusion-ext-exprs/src/bloom_filter_might_contain.rs index 652551eab..663de05a6 100644 --- a/native-engine/datafusion-ext-exprs/src/bloom_filter_might_contain.rs +++ b/native-engine/datafusion-ext-exprs/src/bloom_filter_might_contain.rs @@ -140,7 +140,7 @@ impl PhysicalExpr for BloomFilterMightContainExpr { let bloom_filter = bloom_filter .as_ref() .as_ref() - .expect("non-null bloom_filter"); + .expect("bloom_filter must be present"); // process with bloom filter let value = self.value_expr.evaluate(batch)?; diff --git a/native-engine/datafusion-ext-exprs/src/cast.rs b/native-engine/datafusion-ext-exprs/src/cast.rs index fcf8d6416..aaa859e00 100644 --- a/native-engine/datafusion-ext-exprs/src/cast.rs +++ b/native-engine/datafusion-ext-exprs/src/cast.rs @@ -102,8 +102,7 @@ impl PhysicalExpr for TryCastExpr { #[cfg(test)] mod test { - - use std::sync::Arc; + use std::{error::Error, sync::Arc}; use arrow::{ array::{ArrayRef, Float32Array, Int32Array, StringArray}, @@ -115,7 +114,7 @@ mod test { use crate::cast::TryCastExpr; #[test] - fn test_ok_1() -> Result<(), Box> { + fn test_ok_1() -> Result<(), Box> { // input: Array // cast Float32 into Int32 let float_arr: ArrayRef = Arc::new(Float32Array::from(vec![ @@ -161,7 +160,7 @@ mod test { } #[test] - fn test_ok_2() -> Result<(), Box> { + fn test_ok_2() -> Result<(), Box> { // input: Array // cast Utf8 into Float32 let string_arr: ArrayRef = Arc::new(StringArray::from(vec![ @@ -201,7 +200,7 @@ mod test { } #[test] - fn test_ok_3() -> Result<(), Box> { + fn test_ok_3() -> Result<(), Box> { // input: Scalar // cast Utf8 into Float32 let string_arr: ArrayRef = Arc::new(StringArray::from(vec![ diff --git a/native-engine/datafusion-ext-exprs/src/string_contains.rs b/native-engine/datafusion-ext-exprs/src/string_contains.rs index 21e6995bf..f0f60cf0d 100644 --- a/native-engine/datafusion-ext-exprs/src/string_contains.rs +++ b/native-engine/datafusion-ext-exprs/src/string_contains.rs @@ -119,7 +119,7 @@ impl PhysicalExpr for StringContainsExpr { #[cfg(test)] mod test { - use std::sync::Arc; + use std::{error::Error, sync::Arc}; use arrow::{ array::{ArrayRef, BooleanArray, StringArray}, @@ -131,7 +131,7 @@ mod test { use crate::string_contains::StringContainsExpr; #[test] - fn test_ok() -> std::result::Result<(), Box> { + fn test_ok() -> Result<(), Box> { // create a StringArray from the vector let string_array: ArrayRef = Arc::new(StringArray::from(vec![ Some("abrr".to_string()), @@ -172,7 +172,7 @@ mod test { } #[test] - fn test_scalar_string() -> std::result::Result<(), Box> { + fn test_scalar_string() -> Result<(), Box> { // create a StringArray from the vector let string_array: ArrayRef = Arc::new(StringArray::from(vec![ Some("abrr".to_string()), diff --git a/native-engine/datafusion-ext-exprs/src/string_ends_with.rs b/native-engine/datafusion-ext-exprs/src/string_ends_with.rs index a4ec448ae..77b55f241 100644 --- a/native-engine/datafusion-ext-exprs/src/string_ends_with.rs +++ b/native-engine/datafusion-ext-exprs/src/string_ends_with.rs @@ -121,8 +121,7 @@ impl PhysicalExpr for StringEndsWithExpr { #[cfg(test)] mod test { - - use std::sync::Arc; + use std::{error::Error, sync::Arc}; use arrow::{ array::{ArrayRef, BooleanArray, StringArray}, @@ -134,7 +133,7 @@ mod test { use crate::string_ends_with::StringEndsWithExpr; #[test] - fn test_array() -> std::result::Result<(), Box> { + fn test_array() -> Result<(), Box> { let string_array: ArrayRef = Arc::new(StringArray::from(vec![ Some("abrrbrr".to_string()), Some("rrjndebcsabdji".to_string()), @@ -173,7 +172,7 @@ mod test { } #[test] - fn test_scalar_string() -> std::result::Result<(), Box> { + fn test_scalar_string() -> Result<(), Box> { // create a StringArray from the vector let string_array: ArrayRef = Arc::new(StringArray::from(vec![ Some("Hello, Rust".to_string()), diff --git a/native-engine/datafusion-ext-exprs/src/string_starts_with.rs b/native-engine/datafusion-ext-exprs/src/string_starts_with.rs index 4be5216c4..68a2813ea 100644 --- a/native-engine/datafusion-ext-exprs/src/string_starts_with.rs +++ b/native-engine/datafusion-ext-exprs/src/string_starts_with.rs @@ -121,8 +121,7 @@ impl PhysicalExpr for StringStartsWithExpr { #[cfg(test)] mod test { - - use std::sync::Arc; + use std::{error::Error, sync::Arc}; use arrow::{ array::{ArrayRef, BooleanArray, StringArray}, @@ -134,7 +133,7 @@ mod test { use crate::string_starts_with::StringStartsWithExpr; #[test] - fn test_ok() -> std::result::Result<(), Box> { + fn test_ok() -> Result<(), Box> { let string_array: ArrayRef = Arc::new(StringArray::from(vec![ None, Some("rabaok".to_string()), @@ -170,7 +169,7 @@ mod test { } #[test] - fn test_scalar_string() -> std::result::Result<(), Box> { + fn test_scalar_string() -> Result<(), Box> { let string_array: ArrayRef = Arc::new(StringArray::from(vec![ Some("Hello, Rust".to_string()), Some("Hello, He".to_string()), diff --git a/native-engine/datafusion-ext-functions/src/spark_crypto.rs b/native-engine/datafusion-ext-functions/src/spark_crypto.rs index 188a9bd9a..de406f307 100644 --- a/native-engine/datafusion-ext-functions/src/spark_crypto.rs +++ b/native-engine/datafusion-ext-functions/src/spark_crypto.rs @@ -99,7 +99,7 @@ fn hex_encode>(data: T) -> String { let mut s = String::with_capacity(data.as_ref().len() * 2); for b in data.as_ref() { // Writing to a string never errors, so we can unwrap here. - write!(&mut s, "{b:02x}").expect("data"); + write!(&mut s, "{b:02x}").expect("writing to String should not fail"); } s } diff --git a/native-engine/datafusion-ext-functions/src/spark_get_json_object.rs b/native-engine/datafusion-ext-functions/src/spark_get_json_object.rs index c9d9b6bb8..9d5c4078f 100644 --- a/native-engine/datafusion-ext-functions/src/spark_get_json_object.rs +++ b/native-engine/datafusion-ext-functions/src/spark_get_json_object.rs @@ -170,9 +170,10 @@ pub fn spark_get_parsed_json_object(args: &[ColumnarValue]) -> Result { - fallback_results_iter.next().expect("next").map(Cow::from) - } + ParsedJsonValue::Fallback(_) => fallback_results_iter + .next() + .expect("fallback result iterator must have a next element") + .map(Cow::from), } }) })); @@ -205,9 +206,10 @@ pub fn spark_get_parsed_json_simple_field( .and_then(|object| object.get(field)) .and_then(|v| sonic_value_to_string(v).unwrap_or_default()) .map(Cow::from), - ParsedJsonValue::Fallback(_) => { - fallback_results_iter.next().expect("next").map(Cow::from) - } + ParsedJsonValue::Fallback(_) => fallback_results_iter + .next() + .expect("fallback result iterator must have a next element") + .map(Cow::from), } }) })); @@ -275,17 +277,6 @@ enum HiveGetJsonObjectError { InvalidInput, } -impl std::fmt::Display for HiveGetJsonObjectError { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.write_str(match self { - HiveGetJsonObjectError::InvalidJsonPath => "InvalidJsonPath", - HiveGetJsonObjectError::InvalidInput => "InvalidInput", - }) - } -} - -impl std::error::Error for HiveGetJsonObjectError {} - struct HiveGetJsonObjectEvaluator { matchers: Vec, } @@ -564,9 +555,9 @@ impl HiveGetJsonObjectMatcher { .flat_map(|r| { // keep consistent with hive UDFJson let iter: Box> = match r { - v if v.is_array() => { - Box::new(v.into_array().expect("array").into_iter()) - } + v if v.is_array() => Box::new( + v.into_array().expect("expected array value").into_iter(), + ), other => Box::new(std::iter::once(other)), }; iter @@ -634,73 +625,109 @@ mod test { let path = "$.owner"; assert_eq!( - HiveGetJsonObjectEvaluator::try_new(path)?.evaluate(input)?, + HiveGetJsonObjectEvaluator::try_new(path) + .expect("failed to create evaluator") + .evaluate(input) + .expect("evaluation failed"), Some("amy".to_owned()) ); let path = "$. owner"; assert_eq!( - HiveGetJsonObjectEvaluator::try_new(path)?.evaluate(input)?, + HiveGetJsonObjectEvaluator::try_new(path) + .expect("failed to create evaluator") + .evaluate(input) + .expect("evaluation failed"), Some("amy".to_owned()) ); let path = "$.store.bicycle.price"; assert_eq!( - HiveGetJsonObjectEvaluator::try_new(path)?.evaluate(input)?, + HiveGetJsonObjectEvaluator::try_new(path) + .expect("failed to create evaluator") + .evaluate(input) + .expect("evaluation failed"), Some("19.95".to_owned()) ); let path = "$. store. bicycle. price"; assert_eq!( - HiveGetJsonObjectEvaluator::try_new(path)?.evaluate(input)?, + HiveGetJsonObjectEvaluator::try_new(path) + .expect("failed to create evaluator") + .evaluate(input) + .expect("evaluation failed"), Some("19.95".to_owned()) ); let path = "$.store.fruit[0]"; assert_eq!( - HiveGetJsonObjectEvaluator::try_new(path)?.evaluate(input)?, + HiveGetJsonObjectEvaluator::try_new(path) + .expect("failed to create evaluator") + .evaluate(input) + .expect("evaluation failed"), Some(r#"{"weight":8,"type":"apple"}"#.to_owned()) ); let path = "$. store. fruit[0]"; assert_eq!( - HiveGetJsonObjectEvaluator::try_new(path)?.evaluate(input)?, + HiveGetJsonObjectEvaluator::try_new(path) + .expect("failed to create evaluator") + .evaluate(input) + .expect("evaluation failed"), Some(r#"{"weight":8,"type":"apple"}"#.to_owned()) ); let path = "$.store.fruit[1].weight"; assert_eq!( - HiveGetJsonObjectEvaluator::try_new(path)?.evaluate(input)?, + HiveGetJsonObjectEvaluator::try_new(path) + .expect("failed to create evaluator") + .evaluate(input) + .expect("evaluation failed"), Some("9".to_owned()) ); let path = "$.store.fruit[*]"; assert_eq!( - HiveGetJsonObjectEvaluator::try_new(path)?.evaluate(input)?, + HiveGetJsonObjectEvaluator::try_new(path) + .expect("failed to create evaluator") + .evaluate(input) + .expect("evaluation failed"), Some(r#"[{"weight":8,"type":"apple"},{"weight":9,"type":"pear"}]"#.to_owned()) ); let path = "$. store. fruit[*]"; assert_eq!( - HiveGetJsonObjectEvaluator::try_new(path)?.evaluate(input)?, + HiveGetJsonObjectEvaluator::try_new(path) + .expect("failed to create evaluator") + .evaluate(input) + .expect("evaluation failed"), Some(r#"[{"weight":8,"type":"apple"},{"weight":9,"type":"pear"}]"#.to_owned()) ); let path = "$.store.fruit.[1].type"; assert_eq!( - HiveGetJsonObjectEvaluator::try_new(path)?.evaluate(input)?, + HiveGetJsonObjectEvaluator::try_new(path) + .expect("failed to create evaluator") + .evaluate(input) + .expect("evaluation failed"), Some("pear".to_owned()) ); let path = "$. store. fruit. [1]. type"; assert_eq!( - HiveGetJsonObjectEvaluator::try_new(path)?.evaluate(input)?, + HiveGetJsonObjectEvaluator::try_new(path) + .expect("failed to create evaluator") + .evaluate(input) + .expect("evaluation failed"), Some("pear".to_owned()) ); let path = "$.non_exist_key"; assert_eq!( - HiveGetJsonObjectEvaluator::try_new(path)?.evaluate(input)?, + HiveGetJsonObjectEvaluator::try_new(path) + .expect("failed to create evaluator") + .evaluate(input) + .expect("evaluation failed"), None ); Ok(()) @@ -738,32 +765,57 @@ mod test { let path = ColumnarValue::Scalar(ScalarValue::from("$.message.location.county")); let r = spark_get_parsed_json_object(&[parsed.clone(), path])?.into_array(1)?; - let v = r.as_string::().iter().next().expect("v"); + let v = r + .as_string::() + .iter() + .next() + .expect("missing first element"); assert_eq!(v, Some(r#"["浦东","西直门"]"#)); let path = ColumnarValue::Scalar(ScalarValue::from("$.message.location.NOT_EXISTED")); let r = spark_get_parsed_json_object(&[parsed.clone(), path])?.into_array(1)?; - let v = r.as_string::().iter().next().expect("v"); + let v = r + .as_string::() + .iter() + .next() + .expect("missing first element"); assert_eq!(v, None); let path = ColumnarValue::Scalar(ScalarValue::from("$.message.name")); let r = spark_get_parsed_json_object(&[parsed.clone(), path])?.into_array(1)?; - let v = r.as_string::().iter().next().expect("v").ok_or("v")?; + let v = r + .as_string::() + .iter() + .next() + .expect("missing first element") + .ok_or("value is NULL")?; assert!(v.contains("Asher")); let path = ColumnarValue::Scalar(ScalarValue::from("$.message.location.city")); let r = spark_get_parsed_json_object(&[parsed.clone(), path])?.into_array(1)?; - let v = r.as_string::().iter().next().expect("v"); + let v = r + .as_string::() + .iter() + .next() + .expect("missing first element"); assert_eq!(v, Some(r#"["1.234",1.234]"#)); let path = ColumnarValue::Scalar(ScalarValue::from("$.message.location[0]")); let r = spark_get_parsed_json_object(&[parsed.clone(), path])?.into_array(1)?; - let v = r.as_string::().iter().next().expect("v"); + let v = r + .as_string::() + .iter() + .next() + .expect("missing first element"); assert_eq!(v, Some(r#"{"city":"1.234","county":"浦东"}"#)); let path = ColumnarValue::Scalar(ScalarValue::from("$.message.location[].county")); let r = spark_get_parsed_json_object(&[parsed.clone(), path])?.into_array(1)?; - let v = r.as_string::().iter().next().expect("v"); + let v = r + .as_string::() + .iter() + .next() + .expect("missing first element"); assert_eq!(v, Some(r#"["浦东","西直门"]"#)); Ok(()) } @@ -800,7 +852,11 @@ mod test { let path = ColumnarValue::Scalar(ScalarValue::from("$.i1.j2")); let r = spark_get_parsed_json_object(&[parsed.clone(), path])?.into_array(1)?; - let v = r.as_string::().iter().next().expect("v"); + let v = r + .as_string::() + .iter() + .next() + .expect("missing first element"); // NOTE: // standard jsonpath should output [[200,300],[400, 500],null,"other"] diff --git a/native-engine/datafusion-ext-functions/src/spark_strings.rs b/native-engine/datafusion-ext-functions/src/spark_strings.rs index 94b2ef7c5..0e5a12816 100644 --- a/native-engine/datafusion-ext-functions/src/spark_strings.rs +++ b/native-engine/datafusion-ext-functions/src/spark_strings.rs @@ -143,7 +143,7 @@ pub fn string_concat(args: &[ColumnarValue]) -> Result { } ColumnarValue::Array(v) => { if v.is_valid(index) { - let v = as_string_array(v).expect("string array"); + let v = as_string_array(v).expect("Expected a StringArray"); owned_string.push_str(v.value(index)); } else { is_not_null = false; @@ -293,7 +293,8 @@ pub fn string_concat_ws(args: &[ColumnarValue]) -> Result { } Arg::List(list) => { if list.is_valid(i) { - let strings = as_string_array(list.values()).expect("string array"); + let strings = + as_string_array(list.values()).expect("Expected a StringArray"); let offsets = list.value_offsets(); let l = offsets[i] as usize; let r = offsets[i + 1] as usize; diff --git a/native-engine/datafusion-ext-plans/src/agg/agg_ctx.rs b/native-engine/datafusion-ext-plans/src/agg/agg_ctx.rs index 8c3ccdb9b..2794684f9 100644 --- a/native-engine/datafusion-ext-plans/src/agg/agg_ctx.rs +++ b/native-engine/datafusion-ext-plans/src/agg/agg_ctx.rs @@ -283,7 +283,7 @@ impl AggContext { .iter() .skip(batch_start_idx) .take(batch_end_idx - batch_start_idx) - .map(|bytes| bytes.expect("bytes")) + .map(|bytes| bytes.expect("non-null bytes")) .collect::>(); let mut cursors = array .iter() diff --git a/native-engine/datafusion-ext-plans/src/agg/agg_table.rs b/native-engine/datafusion-ext-plans/src/agg/agg_table.rs index d1c26e072..c432c892d 100644 --- a/native-engine/datafusion-ext-plans/src/agg/agg_table.rs +++ b/native-engine/datafusion-ext-plans/src/agg/agg_table.rs @@ -821,8 +821,8 @@ impl<'a> RecordsSpillCursor<'a> { )?; // load next bucket head - self.cur_bucket_idx = read_len(&mut self.input).expect("read_len"); - self.cur_bucket_count = read_len(&mut self.input).expect("read_len"); + self.cur_bucket_idx = read_len(&mut self.input).expect("read bucket index failed"); + self.cur_bucket_count = read_len(&mut self.input).expect("read bucket count failed"); Ok((acc_table, keys)) } } diff --git a/native-engine/datafusion-ext-plans/src/agg/bloom_filter.rs b/native-engine/datafusion-ext-plans/src/agg/bloom_filter.rs index b939c0796..6aff3c849 100644 --- a/native-engine/datafusion-ext-plans/src/agg/bloom_filter.rs +++ b/native-engine/datafusion-ext-plans/src/agg/bloom_filter.rs @@ -127,7 +127,7 @@ impl Agg for AggBloomFilter { self.num_bits, )); } - bf.as_mut().expect("bf") + bf.as_mut().expect("bloom_filter missing") } _ => return df_unimplemented_err!("AggBloomFilter only supports one bloom filter"), }; diff --git a/native-engine/datafusion-ext-plans/src/agg/collect.rs b/native-engine/datafusion-ext-plans/src/agg/collect.rs index e03725196..225d4303d 100644 --- a/native-engine/datafusion-ext-plans/src/agg/collect.rs +++ b/native-engine/datafusion-ext-plans/src/agg/collect.rs @@ -442,7 +442,7 @@ impl AccList { } pub fn append(&mut self, value: &ScalarValue, nullable: bool) { - write_scalar(&value, nullable, &mut self.raw).expect("write_scalar"); + write_scalar(&value, nullable, &mut self.raw).expect("write scalar failed"); } pub fn merge(&mut self, other: &mut Self) { @@ -456,7 +456,9 @@ impl AccList { fn next(&mut self) -> Option { if self.0.position() < self.0.get_ref().len() as u64 { - return Some(read_scalar(&mut self.0, &self.1, self.2).expect("read_scalar")); + return Some( + read_scalar(&mut self.0, &self.1, self.2).expect("read scalar failed"), + ); } None } @@ -534,7 +536,7 @@ impl AccSet { pub fn append(&mut self, value: &ScalarValue, nullable: bool) { let old_raw_len = self.list.raw.len(); - write_scalar(value, nullable, &mut self.list.raw).expect("write_scalar"); + write_scalar(value, nullable, &mut self.list.raw).expect("write scalar failed"); self.append_raw_inline(old_raw_len); } diff --git a/native-engine/datafusion-ext-plans/src/agg/spark_udaf_wrapper.rs b/native-engine/datafusion-ext-plans/src/agg/spark_udaf_wrapper.rs index cc747d79c..15dd82bfc 100644 --- a/native-engine/datafusion-ext-plans/src/agg/spark_udaf_wrapper.rs +++ b/native-engine/datafusion-ext-plans/src/agg/spark_udaf_wrapper.rs @@ -227,14 +227,14 @@ impl Agg for SparkUDAFWrapper { } fn create_acc_column(&self, num_rows: usize) -> AccColumnRef { - let jcontext = self.jcontext().expect("jcontext"); + let jcontext = self.jcontext().expect("jcontext must be initialized"); let rows = jni_call!(SparkUDAFWrapperContext(jcontext.as_obj()).initialize( num_rows as i32, )-> JObject) - .expect("num_rows"); + .expect("init rows failed"); - let jcontext = self.jcontext().expect("jcontext"); - let obj = jni_new_global_ref!(rows.as_obj()).expect("rows"); + let jcontext = self.jcontext().expect("jcontext must be initialized"); + let obj = jni_new_global_ref!(rows.as_obj()).expect("failed to create global ref for rows"); Box::new(AccUDAFBufferRowsColumn { obj, jcontext }) } diff --git a/native-engine/datafusion-ext-plans/src/common/cached_exprs_evaluator.rs b/native-engine/datafusion-ext-plans/src/common/cached_exprs_evaluator.rs index 38c4cff34..a5c4d1ed0 100644 --- a/native-engine/datafusion-ext-plans/src/common/cached_exprs_evaluator.rs +++ b/native-engine/datafusion-ext-plans/src/common/cached_exprs_evaluator.rs @@ -479,7 +479,7 @@ fn prune_expr_cols(expr: &PhysicalExprRef) -> (PhysicalExprRef, Vec) { Ok(Transformed::yes(expr)) } }) - .expect("transform") + .expect("expr transform failed") .data; let mapped_cols: Vec = used_cols diff --git a/native-engine/datafusion-ext-plans/src/common/column_pruning.rs b/native-engine/datafusion-ext-plans/src/common/column_pruning.rs index fa29324ba..143aaa170 100644 --- a/native-engine/datafusion-ext-plans/src/common/column_pruning.rs +++ b/native-engine/datafusion-ext-plans/src/common/column_pruning.rs @@ -114,7 +114,7 @@ pub fn extend_projection_by_expr( )) }) .map(|r| r.data) - .expect("transform") + .expect("expr transform failed") } pub fn map_columns(expr: &PhysicalExprRef, mapping: &HashMap) -> PhysicalExprRef { @@ -130,5 +130,5 @@ pub fn map_columns(expr: &PhysicalExprRef, mapping: &HashMap) -> P )) }) .map(|r| r.data) - .expect("transform") + .expect("expr transform failed") } diff --git a/native-engine/datafusion-ext-plans/src/common/execution_context.rs b/native-engine/datafusion-ext-plans/src/common/execution_context.rs index 4dd4e3193..8f2dd14fc 100644 --- a/native-engine/datafusion-ext-plans/src/common/execution_context.rs +++ b/native-engine/datafusion-ext-plans/src/common/execution_context.rs @@ -724,7 +724,7 @@ impl WrappedSender { send_time.inspect(|send_time| { exclude_time .as_ref() - .expect("exclude_time") + .expect("exclude_time must be set") .sub_duration(send_time.elapsed()); }); } diff --git a/native-engine/datafusion-ext-plans/src/common/row_null_checker.rs b/native-engine/datafusion-ext-plans/src/common/row_null_checker.rs index 73e04353e..50a7cdfe1 100644 --- a/native-engine/datafusion-ext-plans/src/common/row_null_checker.rs +++ b/native-engine/datafusion-ext-plans/src/common/row_null_checker.rs @@ -77,7 +77,9 @@ impl RowNullChecker { DataType::Boolean => FieldConfig::new_boolean(sort_options), dt if dt.is_primitive() => FieldConfig::new_primitive( sort_options, - 1 + dt.primitive_width().expect("primitive_width"), + 1 + dt + .primitive_width() + .expect("primitive_width must be present"), ), // DataType::Int8 => FieldConfig::new_primitive(sort_options, 2), // 1 byte null flag + // // 1 byte value diff --git a/native-engine/datafusion-ext-plans/src/generate_exec.rs b/native-engine/datafusion-ext-plans/src/generate_exec.rs index 2faf09b8e..ae975237f 100644 --- a/native-engine/datafusion-ext-plans/src/generate_exec.rs +++ b/native-engine/datafusion-ext-plans/src/generate_exec.rs @@ -106,7 +106,7 @@ impl GenerateExec { self.generator_output_schema.clone(), outer, ) - .expect("try_new") + .expect("GeneratorExec::try_new failed") } } diff --git a/native-engine/datafusion-ext-plans/src/joins/join_hash_map.rs b/native-engine/datafusion-ext-plans/src/joins/join_hash_map.rs index 43383353a..c4c51d6a7 100644 --- a/native-engine/datafusion-ext-plans/src/joins/join_hash_map.rs +++ b/native-engine/datafusion-ext-plans/src/joins/join_hash_map.rs @@ -146,8 +146,8 @@ impl Table { match len { 0 => unreachable!(), 1 => { - let single = mapped_indices.pop().expect("pop"); - let _len = mapped_indices.pop().expect("pop"); + let single = mapped_indices.pop().expect("missing index"); + let _len = mapped_indices.pop().expect("missing length"); MapValue::new_single(single) } _ => MapValue::new_range(start), diff --git a/native-engine/datafusion-ext-plans/src/parquet_exec.rs b/native-engine/datafusion-ext-plans/src/parquet_exec.rs index f8c8a2b0c..da272c805 100644 --- a/native-engine/datafusion-ext-plans/src/parquet_exec.rs +++ b/native-engine/datafusion-ext-plans/src/parquet_exec.rs @@ -332,7 +332,7 @@ impl AsyncFileReader for ParquetFileReaderRef { continue; } - let last_merged_range = merged_ranges.last_mut().expect("last_mut"); + let last_merged_range = merged_ranges.last_mut().expect("missing last range"); if range.start <= last_merged_range.end + max_over_read_size as u64 { last_merged_range.end = range.end.max(last_merged_range.end); } else { diff --git a/native-engine/datafusion-ext-plans/src/parquet_sink_exec.rs b/native-engine/datafusion-ext-plans/src/parquet_sink_exec.rs index c7d2cd81d..647e7eff3 100644 --- a/native-engine/datafusion-ext-plans/src/parquet_sink_exec.rs +++ b/native-engine/datafusion-ext-plans/src/parquet_sink_exec.rs @@ -291,7 +291,7 @@ fn execute_parquet_sink( tokio::task::spawn_blocking(move || { let mut part_writer = part_writer.lock(); - let w = part_writer.as_mut().expect("part_writer"); + let w = part_writer.as_mut().expect("missing partition writer"); w.write(&sub_batch) }) .await diff --git a/native-engine/datafusion-ext-plans/src/shuffle/buffered_data.rs b/native-engine/datafusion-ext-plans/src/shuffle/buffered_data.rs index 95c8ac4ef..f29f6d217 100644 --- a/native-engine/datafusion-ext-plans/src/shuffle/buffered_data.rs +++ b/native-engine/datafusion-ext-plans/src/shuffle/buffered_data.rs @@ -311,7 +311,8 @@ fn sort_batches_by_partition_id( part_ids } Partitioning::RangePartitioning(sort_expr, _, bounds) => { - evaluate_range_partition_ids(&batch, sort_expr, bounds).expect("eval_part_ids") + evaluate_range_partition_ids(&batch, sort_expr, bounds) + .expect("failed to evaluate range partition ids") } _ => unreachable!("unsupported partitioning: {:?}", partitioning), }; diff --git a/native-engine/datafusion-ext-plans/src/shuffle/single_repartitioner.rs b/native-engine/datafusion-ext-plans/src/shuffle/single_repartitioner.rs index 787c23394..aceb1d00b 100644 --- a/native-engine/datafusion-ext-plans/src/shuffle/single_repartitioner.rs +++ b/native-engine/datafusion-ext-plans/src/shuffle/single_repartitioner.rs @@ -57,7 +57,7 @@ impl SingleShuffleRepartitioner { .wrap_writer(open_shuffle_file(&self.output_data_file)?), )); } - Ok(output_data.as_mut().expect("output_data")) + Ok(output_data.as_mut().expect("missing output writer")) } } diff --git a/native-engine/datafusion-ext-plans/src/sort_exec.rs b/native-engine/datafusion-ext-plans/src/sort_exec.rs index 92c836aba..96d9ef376 100644 --- a/native-engine/datafusion-ext-plans/src/sort_exec.rs +++ b/native-engine/datafusion-ext-plans/src/sort_exec.rs @@ -460,7 +460,7 @@ impl SortedBlock for InMemSortedBlock { if let Some(batch) = batch { self.mem_used -= batch.get_batch_mem_size(); self.mem_used -= self.sorted_keys[0].mem_size(); - self.sorted_keys.pop_front().expect("pop_front"); + self.sorted_keys.pop_front().expect("missing key"); self.cur_row_idx = usize::MAX; Ok(Some(batch)) } else { @@ -1049,7 +1049,7 @@ fn create_zero_column_batch(num_rows: usize) -> RecordBatch { vec![], &RecordBatchOptions::new().with_row_count(Some(num_rows)), ) - .expect("record_batch") + .expect("failed to create empty RecordBatch") } struct PruneSortKeysFromBatch { @@ -1293,7 +1293,7 @@ impl KeyCollector for SqueezeKeyCollector { fn add_key(&mut self, key: &[u8]) { self.sorted_key_writer .write_key(key, &mut self.store) - .expect("write_key"); + .expect("failed to write key"); } fn freeze(&mut self) { @@ -1596,7 +1596,7 @@ mod fuzztest { None, )?); let sort = Arc::new(datafusion::physical_plan::sorts::sort::SortExec::new( - LexOrdering::new(sort_exprs.iter().cloned()).expect("sort_exprs"), + LexOrdering::new(sort_exprs.iter().cloned()).expect("invalid sort exprs"), input, )); let output = datafusion::physical_plan::collect(sort.clone(), task_ctx.clone()).await?; diff --git a/native-engine/datafusion-ext-plans/src/window/window_context.rs b/native-engine/datafusion-ext-plans/src/window/window_context.rs index 37e423747..a76eb1253 100644 --- a/native-engine/datafusion-ext-plans/src/window/window_context.rs +++ b/native-engine/datafusion-ext-plans/src/window/window_context.rs @@ -137,7 +137,7 @@ impl WindowContext { Ok(self .partition_row_converter .lock() - .expect("lock") + .expect("partition_row_converter lock poisoned") .convert_columns( &self .partition_spec @@ -154,7 +154,7 @@ impl WindowContext { Ok(self .order_row_converter .lock() - .expect("lock") + .expect("order_row_converter lock poisoned") .convert_columns( &self .order_spec From 198cdb4340ae1b89fe6ae6a2bb3acbf09389871c Mon Sep 17 00:00:00 2001 From: yew1eb Date: Tue, 2 Dec 2025 01:28:48 +0800 Subject: [PATCH 10/10] up --- native-engine/auron-serde/src/from_proto.rs | 4 ++-- native-engine/datafusion-ext-commons/src/arrow/cast.rs | 5 ++++- .../datafusion-ext-commons/src/io/scalar_serde.rs | 2 +- native-engine/datafusion-ext-plans/src/joins/test.rs | 7 +++++-- 4 files changed, 12 insertions(+), 6 deletions(-) diff --git a/native-engine/auron-serde/src/from_proto.rs b/native-engine/auron-serde/src/from_proto.rs index 804e72d89..7b4ebc43e 100644 --- a/native-engine/auron-serde/src/from_proto.rs +++ b/native-engine/auron-serde/src/from_proto.rs @@ -1292,12 +1292,12 @@ impl From<&protobuf::ColumnStats> for ColumnStatistics { max_value: cs .max_value .as_ref() - .map(|m| Precision::Exact(m.try_into().expect("max_value"))) + .map(|m| Precision::Exact(m.try_into().expect("invalid max_value"))) .unwrap_or(Precision::Absent), min_value: cs .min_value .as_ref() - .map(|m| Precision::Exact(m.try_into().expect("min_value"))) + .map(|m| Precision::Exact(m.try_into().expect("invalid min_value"))) .unwrap_or(Precision::Absent), sum_value: Precision::Absent, distinct_count: Precision::Exact(cs.distinct_count as usize), diff --git a/native-engine/datafusion-ext-commons/src/arrow/cast.rs b/native-engine/datafusion-ext-commons/src/arrow/cast.rs index 293f76be1..8d1fd75bb 100644 --- a/native-engine/datafusion-ext-commons/src/arrow/cast.rs +++ b/native-engine/datafusion-ext-commons/src/arrow/cast.rs @@ -259,7 +259,10 @@ fn try_cast_string_array_to_integer(array: &dyn Array, cast_type: &DataType) -> macro_rules! cast { ($target_type:ident) => {{ type B = paste::paste! {[<$target_type Builder>]}; - let string_array = as_string_array(array); + let string_array = array + .as_any() + .downcast_ref::() + .expect("Expected a StringArray"); let mut builder = B::new(); for v in string_array.iter() { diff --git a/native-engine/datafusion-ext-commons/src/io/scalar_serde.rs b/native-engine/datafusion-ext-commons/src/io/scalar_serde.rs index ce28e2bf6..90d6ec98b 100644 --- a/native-engine/datafusion-ext-commons/src/io/scalar_serde.rs +++ b/native-engine/datafusion-ext-commons/src/io/scalar_serde.rs @@ -42,7 +42,7 @@ pub fn write_scalar(value: &ScalarValue, nullable: bool, output: &mut write_u8(0, output)?; } } else { - output.write_all(&$v.expect("ok").to_ne_bytes())?; + output.write_all(&$v.expect("non-null value required").to_ne_bytes())?; } }}; } diff --git a/native-engine/datafusion-ext-plans/src/joins/test.rs b/native-engine/datafusion-ext-plans/src/joins/test.rs index 90282bb41..ccdff53aa 100644 --- a/native-engine/datafusion-ext-plans/src/joins/test.rs +++ b/native-engine/datafusion-ext-plans/src/joins/test.rs @@ -91,8 +91,11 @@ mod tests { } fn build_table_from_batches(batches: Vec) -> Arc { - let schema = batches.first().expect("first").schema(); - Arc::new(TestMemoryExec::try_new(&[batches], schema, None).expect("memory_exec")) + let schema = batches.first().expect("missing first batch").schema(); + Arc::new( + TestMemoryExec::try_new(&[batches], schema, None) + .expect("failed to create memory exec"), + ) } fn build_date_table(