diff --git a/src/tests/float_cast_tie_regressions.rs b/src/tests/float_cast_tie_regressions.rs index cb7db70..04865ab 100644 --- a/src/tests/float_cast_tie_regressions.rs +++ b/src/tests/float_cast_tie_regressions.rs @@ -98,6 +98,57 @@ fn float_cast_i32_ties_to_even_across_backends() { } } +#[test] +fn float_cast_i32_respects_defined_range_boundaries_across_backends() { + let lower = i32::MIN as f32; + let upper_exclusive = -(i32::MIN as f32); + let values = vec![ + lower, + f32::from_bits(lower.to_bits() + 1), + -65_537.5, + -0.5, + 0.5, + 65_537.5, + f32::from_bits(upper_exclusive.to_bits() - 2), + f32::from_bits(upper_exclusive.to_bits() - 1), + ]; + let expected = values + .iter() + .map(|&value| value.round_ties_even() as i32) + .collect::>(); + + assert_eq!(run_f32(&values, cast_f32_to_i32), expected); + + #[cfg(any(target_arch = "x86_64", target_arch = "x86"))] + { + if std::arch::is_x86_feature_detected!("sse2") { + assert_eq!( + run_f32(&values, |values, out| unsafe { + cast_f32_to_i32_sse2(values, out) + }), + expected + ); + } + if std::arch::is_x86_feature_detected!("sse4.1") { + assert_eq!( + run_f32(&values, |values, out| unsafe { + cast_f32_to_i32_sse41(values, out) + }), + expected + ); + } + if std::arch::is_x86_feature_detected!("avx2") && std::arch::is_x86_feature_detected!("fma") + { + assert_eq!( + run_f32(&values, |values, out| unsafe { + cast_f32_to_i32_avx2(values, out) + }), + expected + ); + } + } +} + #[test] fn float_cast_i64_ties_to_even_across_backends() { let values: Vec = vec![ @@ -139,3 +190,54 @@ fn float_cast_i64_ties_to_even_across_backends() { } } } + +#[test] +fn float_cast_i64_respects_defined_range_boundaries_across_backends() { + let lower = i64::MIN as f64; + let upper_exclusive = -(i64::MIN as f64); + let values = vec![ + lower, + f64::from_bits(lower.to_bits() + 1), + -4_503_599_627_370_495.5, + -0.5, + 0.5, + 4_503_599_627_370_495.5, + f64::from_bits(upper_exclusive.to_bits() - 2), + f64::from_bits(upper_exclusive.to_bits() - 1), + ]; + let expected = values + .iter() + .map(|&value| value.round_ties_even() as i64) + .collect::>(); + + assert_eq!(run_f64(&values, cast_f64_to_i64), expected); + + #[cfg(any(target_arch = "x86_64", target_arch = "x86"))] + { + if std::arch::is_x86_feature_detected!("sse2") { + assert_eq!( + run_f64(&values, |values, out| unsafe { + cast_f64_to_i64_sse2(values, out) + }), + expected + ); + } + if std::arch::is_x86_feature_detected!("sse4.1") { + assert_eq!( + run_f64(&values, |values, out| unsafe { + cast_f64_to_i64_sse41(values, out) + }), + expected + ); + } + if std::arch::is_x86_feature_detected!("avx2") && std::arch::is_x86_feature_detected!("fma") + { + assert_eq!( + run_f64(&values, |values, out| unsafe { + cast_f64_to_i64_avx2(values, out) + }), + expected + ); + } + } +} diff --git a/src/tests/float_special_value_regressions.rs b/src/tests/float_special_value_regressions.rs new file mode 100644 index 0000000..d180a59 --- /dev/null +++ b/src/tests/float_special_value_regressions.rs @@ -0,0 +1,471 @@ +#![allow(dead_code, unused_parens)] + +use crate::prelude::*; + +fn assert_f32_bits_eq(actual: &[f32], expected: &[f32]) { + assert_eq!(actual.len(), expected.len()); + for (actual, expected) in actual.iter().zip(expected.iter()) { + if expected.is_nan() { + assert!(actual.is_nan()); + } else { + assert_eq!(actual.to_bits(), expected.to_bits()); + } + } +} + +fn assert_f64_bits_eq(actual: &[f64], expected: &[f64]) { + assert_eq!(actual.len(), expected.len()); + for (actual, expected) in actual.iter().zip(expected.iter()) { + if expected.is_nan() { + assert!(actual.is_nan()); + } else { + assert_eq!(actual.to_bits(), expected.to_bits()); + } + } +} + +fn assert_f32_scalarish_eq(actual: &[f32], expected: &[f32]) { + assert_eq!(actual.len(), expected.len()); + for (actual, expected) in actual.iter().zip(expected.iter()) { + if expected.is_nan() { + assert!(actual.is_nan()); + } else { + assert_eq!(*actual, *expected); + } + } +} + +fn assert_f64_scalarish_eq(actual: &[f64], expected: &[f64]) { + assert_eq!(actual.len(), expected.len()); + for (actual, expected) in actual.iter().zip(expected.iter()) { + if expected.is_nan() { + assert!(actual.is_nan()); + } else { + assert_eq!(*actual, *expected); + } + } +} + +simd_unsafe_generate_all!( + fn div_f32_edges(lhs: &[f32], rhs: &[f32], out: &mut [f32]) { + assert_eq!(lhs.len(), rhs.len()); + assert_eq!(lhs.len(), out.len()); + + let mut lhs = lhs; + let mut rhs = rhs; + let mut out = out; + + while lhs.len() >= S::Vf32::WIDTH { + let a = S::Vf32::load_from_slice(lhs); + let b = S::Vf32::load_from_slice(rhs); + (a / b).copy_to_slice(out); + + lhs = &lhs[S::Vf32::WIDTH..]; + rhs = &rhs[S::Vf32::WIDTH..]; + out = &mut out[S::Vf32::WIDTH..]; + } + + for ((&a, &b), slot) in lhs.iter().zip(rhs.iter()).zip(out.iter_mut()) { + *slot = a / b; + } + } +); + +simd_unsafe_generate_all!( + fn div_f64_edges(lhs: &[f64], rhs: &[f64], out: &mut [f64]) { + assert_eq!(lhs.len(), rhs.len()); + assert_eq!(lhs.len(), out.len()); + + let mut lhs = lhs; + let mut rhs = rhs; + let mut out = out; + + while lhs.len() >= S::Vf64::WIDTH { + let a = S::Vf64::load_from_slice(lhs); + let b = S::Vf64::load_from_slice(rhs); + (a / b).copy_to_slice(out); + + lhs = &lhs[S::Vf64::WIDTH..]; + rhs = &rhs[S::Vf64::WIDTH..]; + out = &mut out[S::Vf64::WIDTH..]; + } + + for ((&a, &b), slot) in lhs.iter().zip(rhs.iter()).zip(out.iter_mut()) { + *slot = a / b; + } + } +); + +simd_unsafe_generate_all!( + fn ceil_f32_edges(values: &[f32], out: &mut [f32]) { + assert_eq!(values.len(), out.len()); + + let mut values = values; + let mut out = out; + + while values.len() >= S::Vf32::WIDTH { + let a = S::Vf32::load_from_slice(values); + a.ceil().copy_to_slice(out); + + values = &values[S::Vf32::WIDTH..]; + out = &mut out[S::Vf32::WIDTH..]; + } + + for (&value, slot) in values.iter().zip(out.iter_mut()) { + *slot = value.ceil(); + } + } +); + +simd_unsafe_generate_all!( + fn floor_f32_edges(values: &[f32], out: &mut [f32]) { + assert_eq!(values.len(), out.len()); + + let mut values = values; + let mut out = out; + + while values.len() >= S::Vf32::WIDTH { + let a = S::Vf32::load_from_slice(values); + a.floor().copy_to_slice(out); + + values = &values[S::Vf32::WIDTH..]; + out = &mut out[S::Vf32::WIDTH..]; + } + + for (&value, slot) in values.iter().zip(out.iter_mut()) { + *slot = value.floor(); + } + } +); + +simd_unsafe_generate_all!( + fn round_f32_edges(values: &[f32], out: &mut [f32]) { + assert_eq!(values.len(), out.len()); + + let mut values = values; + let mut out = out; + + while values.len() >= S::Vf32::WIDTH { + let a = S::Vf32::load_from_slice(values); + a.round().copy_to_slice(out); + + values = &values[S::Vf32::WIDTH..]; + out = &mut out[S::Vf32::WIDTH..]; + } + + for (&value, slot) in values.iter().zip(out.iter_mut()) { + *slot = value.round(); + } + } +); + +simd_unsafe_generate_all!( + fn sqrt_f32_edges(values: &[f32], out: &mut [f32]) { + assert_eq!(values.len(), out.len()); + + let mut values = values; + let mut out = out; + + while values.len() >= S::Vf32::WIDTH { + let a = S::Vf32::load_from_slice(values); + a.sqrt().copy_to_slice(out); + + values = &values[S::Vf32::WIDTH..]; + out = &mut out[S::Vf32::WIDTH..]; + } + + for (&value, slot) in values.iter().zip(out.iter_mut()) { + *slot = value.sqrt(); + } + } +); + +simd_unsafe_generate_all!( + fn ceil_f64_edges(values: &[f64], out: &mut [f64]) { + assert_eq!(values.len(), out.len()); + + let mut values = values; + let mut out = out; + + while values.len() >= S::Vf64::WIDTH { + let a = S::Vf64::load_from_slice(values); + a.ceil().copy_to_slice(out); + + values = &values[S::Vf64::WIDTH..]; + out = &mut out[S::Vf64::WIDTH..]; + } + + for (&value, slot) in values.iter().zip(out.iter_mut()) { + *slot = value.ceil(); + } + } +); + +simd_unsafe_generate_all!( + fn floor_f64_edges(values: &[f64], out: &mut [f64]) { + assert_eq!(values.len(), out.len()); + + let mut values = values; + let mut out = out; + + while values.len() >= S::Vf64::WIDTH { + let a = S::Vf64::load_from_slice(values); + a.floor().copy_to_slice(out); + + values = &values[S::Vf64::WIDTH..]; + out = &mut out[S::Vf64::WIDTH..]; + } + + for (&value, slot) in values.iter().zip(out.iter_mut()) { + *slot = value.floor(); + } + } +); + +simd_unsafe_generate_all!( + fn round_f64_edges(values: &[f64], out: &mut [f64]) { + assert_eq!(values.len(), out.len()); + + let mut values = values; + let mut out = out; + + while values.len() >= S::Vf64::WIDTH { + let a = S::Vf64::load_from_slice(values); + a.round().copy_to_slice(out); + + values = &values[S::Vf64::WIDTH..]; + out = &mut out[S::Vf64::WIDTH..]; + } + + for (&value, slot) in values.iter().zip(out.iter_mut()) { + *slot = value.round(); + } + } +); + +simd_unsafe_generate_all!( + fn sqrt_f64_edges(values: &[f64], out: &mut [f64]) { + assert_eq!(values.len(), out.len()); + + let mut values = values; + let mut out = out; + + while values.len() >= S::Vf64::WIDTH { + let a = S::Vf64::load_from_slice(values); + a.sqrt().copy_to_slice(out); + + values = &values[S::Vf64::WIDTH..]; + out = &mut out[S::Vf64::WIDTH..]; + } + + for (&value, slot) in values.iter().zip(out.iter_mut()) { + *slot = value.sqrt(); + } + } +); + +fn run_f32_unary(values: &[f32], f: impl Fn(&[f32], &mut [f32])) -> Vec { + let mut out = vec![0.0; values.len()]; + f(values, &mut out); + out +} + +fn run_f64_unary(values: &[f64], f: impl Fn(&[f64], &mut [f64])) -> Vec { + let mut out = vec![0.0; values.len()]; + f(values, &mut out); + out +} + +fn run_f32_binary(lhs: &[f32], rhs: &[f32], f: impl Fn(&[f32], &[f32], &mut [f32])) -> Vec { + let mut out = vec![0.0; lhs.len()]; + f(lhs, rhs, &mut out); + out +} + +fn run_f64_binary(lhs: &[f64], rhs: &[f64], f: impl Fn(&[f64], &[f64], &mut [f64])) -> Vec { + let mut out = vec![0.0; lhs.len()]; + f(lhs, rhs, &mut out); + out +} + +#[test] +fn float_div_special_values_match_scalar_for_f32() { + let lhs = vec![ + -0.0, + 0.0, + 1.0, + -1.0, + f32::MIN_POSITIVE, + f32::from_bits(1), + f32::INFINITY, + f32::NEG_INFINITY, + f32::NAN, + 9.0, + ]; + let rhs = vec![ + 1.0, + -1.0, + 0.0, + -0.0, + f32::MIN_POSITIVE, + -f32::from_bits(1), + f32::INFINITY, + f32::NEG_INFINITY, + 1.0, + f32::NAN, + ]; + let expected = lhs + .iter() + .zip(rhs.iter()) + .map(|(&a, &b)| a / b) + .collect::>(); + + assert_f32_bits_eq(&run_f32_binary(&lhs, &rhs, div_f32_edges), &expected); +} + +#[test] +fn float_div_special_values_match_scalar_for_f64() { + let lhs = vec![ + -0.0, + 0.0, + 1.0, + -1.0, + f64::MIN_POSITIVE, + f64::from_bits(1), + f64::INFINITY, + f64::NEG_INFINITY, + f64::NAN, + 9.0, + ]; + let rhs = vec![ + 1.0, + -1.0, + 0.0, + -0.0, + f64::MIN_POSITIVE, + -f64::from_bits(1), + f64::INFINITY, + f64::NEG_INFINITY, + 1.0, + f64::NAN, + ]; + let expected = lhs + .iter() + .zip(rhs.iter()) + .map(|(&a, &b)| a / b) + .collect::>(); + + assert_f64_bits_eq(&run_f64_binary(&lhs, &rhs, div_f64_edges), &expected); +} + +#[test] +fn float_rounding_special_values_match_scalar_for_f32() { + let values = vec![ + -f32::from_bits(1), + -f32::MIN_POSITIVE, + -1.5, + -1.0, + -0.75, + -0.25, + -0.0, + 0.0, + 0.25, + 0.75, + 1.0, + 1.5, + 8_388_608.0, + f32::INFINITY, + f32::NEG_INFINITY, + f32::NAN, + ]; + + assert_f32_bits_eq( + &run_f32_unary(&values, ceil_f32_edges), + &values.iter().map(|&v| v.ceil()).collect::>(), + ); + assert_f32_scalarish_eq( + &run_f32_unary(&values, floor_f32_edges), + &values.iter().map(|&v| v.floor()).collect::>(), + ); + assert_f32_scalarish_eq( + &run_f32_unary(&values, round_f32_edges), + &values.iter().map(|&v| v.round()).collect::>(), + ); +} + +#[test] +fn float_rounding_special_values_match_scalar_for_f64() { + let values = vec![ + -f64::from_bits(1), + -f64::MIN_POSITIVE, + -1.5, + -1.0, + -0.75, + -0.25, + -0.0, + 0.0, + 0.25, + 0.75, + 1.0, + 1.5, + 4_503_599_627_370_496.0, + f64::INFINITY, + f64::NEG_INFINITY, + f64::NAN, + ]; + + assert_f64_scalarish_eq( + &run_f64_unary(&values, ceil_f64_edges), + &values.iter().map(|&v| v.ceil()).collect::>(), + ); + assert_f64_scalarish_eq( + &run_f64_unary(&values, floor_f64_edges), + &values.iter().map(|&v| v.floor()).collect::>(), + ); + assert_f64_scalarish_eq( + &run_f64_unary(&values, round_f64_edges), + &values.iter().map(|&v| v.round()).collect::>(), + ); +} + +#[test] +fn float_sqrt_special_values_match_scalar_for_f32() { + let values = vec![ + -f32::INFINITY, + -4.0, + -f32::MIN_POSITIVE, + -0.0, + 0.0, + f32::from_bits(1), + f32::MIN_POSITIVE, + 4.0, + f32::INFINITY, + f32::NAN, + ]; + + assert_f32_bits_eq( + &run_f32_unary(&values, sqrt_f32_edges), + &values.iter().map(|&v| v.sqrt()).collect::>(), + ); +} + +#[test] +fn float_sqrt_special_values_match_scalar_for_f64() { + let values = vec![ + -f64::INFINITY, + -4.0, + -f64::MIN_POSITIVE, + -0.0, + 0.0, + f64::from_bits(1), + f64::MIN_POSITIVE, + 4.0, + f64::INFINITY, + f64::NAN, + ]; + + assert_f64_bits_eq( + &run_f64_unary(&values, sqrt_f64_edges), + &values.iter().map(|&v| v.sqrt()).collect::>(), + ); +} diff --git a/src/tests/lib/arbitrary.rs b/src/tests/lib/arbitrary.rs index 33d2ed9..f32ef71 100644 --- a/src/tests/lib/arbitrary.rs +++ b/src/tests/lib/arbitrary.rs @@ -13,27 +13,37 @@ use crate::{SimdBase, SimdBaseIo}; use super::ScalarNumber; -const IMPORTANT_F32: [f32; 10] = [ +const IMPORTANT_F32: [f32; 15] = [ 0.0, + -0.0, 1.0, -1.0, 0.5, -0.5, 1.5, -1.5, + f32::MIN_POSITIVE, + f32::from_bits(1), + f32::INFINITY, + f32::NEG_INFINITY, f32::MAX, f32::MIN, f32::NAN, ]; -const IMPORTANT_F64: [f64; 10] = [ +const IMPORTANT_F64: [f64; 15] = [ 0.0, + -0.0, 1.0, -1.0, 0.5, -0.5, 1.5, -1.5, + f64::MIN_POSITIVE, + f64::from_bits(1), + f64::INFINITY, + f64::NEG_INFINITY, f64::MAX, f64::MIN, f64::NAN, @@ -174,6 +184,7 @@ pub struct RandSimd; pub struct IterRandSimdForScalar, I2: Iterator> { any: Box I>, blendv: Box I2>, + important_len: usize, scalar_size: usize, } @@ -183,6 +194,7 @@ impl RandSimd { IterRandSimdForScalar { any: Box::new(iter_arbitrary_f32), blendv: Box::new(iter_arbitrary_blendv_f32), + important_len: IMPORTANT_F32.len(), scalar_size: 4, } } @@ -191,6 +203,7 @@ impl RandSimd { IterRandSimdForScalar { any: Box::new(iter_arbitrary_f64), blendv: Box::new(iter_arbitrary_blendv_f64), + important_len: IMPORTANT_F64.len(), scalar_size: 8, } } @@ -198,6 +211,7 @@ impl RandSimd { IterRandSimdForScalar { any: Box::new(iter_arbitrary_i8), blendv: Box::new(iter_arbitrary_blendv_i8), + important_len: IMPORTANT_I8.len(), scalar_size: 1, } } @@ -206,6 +220,7 @@ impl RandSimd { IterRandSimdForScalar { any: Box::new(iter_arbitrary_i16), blendv: Box::new(iter_arbitrary_blendv_i16), + important_len: IMPORTANT_I16.len(), scalar_size: 2, } } @@ -214,6 +229,7 @@ impl RandSimd { IterRandSimdForScalar { any: Box::new(iter_arbitrary_i32), blendv: Box::new(iter_arbitrary_blendv_i32), + important_len: IMPORTANT_I32.len(), scalar_size: 4, } } @@ -222,6 +238,7 @@ impl RandSimd { IterRandSimdForScalar { any: Box::new(iter_arbitrary_i64), blendv: Box::new(iter_arbitrary_blendv_i64), + important_len: IMPORTANT_I64.len(), scalar_size: 8, } } @@ -230,6 +247,10 @@ impl RandSimd { impl, I2: Iterator> IterRandSimdForScalar { + fn periodic_interval(&self, base: usize) -> usize { + base.max(self.important_len + 1) + } + /// Iterate 1000 random inputs, starting from the important numbers pub fn one_arg>(self) -> impl Iterator { let iter = iter_as_simd((self.any)(1000)); @@ -240,16 +261,21 @@ impl, I2: Iterator> /// the second arguming looping at 15 inputs, effectively putting each special number against each /// other one in the end. pub fn two_arg>(self) -> impl Iterator { - let iter1 = iter_as_simd((self.any)(14)); - let iter2 = iter_as_simd((self.any)(15)); - iter1.zip(iter2).take(14 * 15 * 20 * S::WIDTH) + let period1 = self.periodic_interval(14); + let period2 = self.periodic_interval(15); + let iter1 = iter_as_simd((self.any)(period1)); + let iter2 = iter_as_simd((self.any)(period2)); + iter1.zip(iter2).take(period1 * period2 * 20 * S::WIDTH) } /// Same as two_arg except with periods of 14, 15 and 16. pub fn three_arg>(self) -> impl Iterator { - let mut iter1 = iter_as_simd((self.any)(14)); - let mut iter2 = iter_as_simd((self.any)(15)); - let mut iter3 = iter_as_simd((self.any)(16)); + let period1 = self.periodic_interval(14); + let period2 = self.periodic_interval(15); + let period3 = self.periodic_interval(16); + let mut iter1 = iter_as_simd((self.any)(period1)); + let mut iter2 = iter_as_simd((self.any)(period2)); + let mut iter3 = iter_as_simd((self.any)(period3)); iter::repeat_with(move || { ( @@ -258,7 +284,7 @@ impl, I2: Iterator> iter3.next().unwrap(), ) }) - .take(1680 * S::WIDTH) + .take(period1 * period2 * 8 * S::WIDTH) } /// Same as one_arg, except without values that can cause undefined behavior when rounding @@ -270,16 +296,18 @@ impl, I2: Iterator> /// Same as two_arg, except filtering out NaN floats pub fn two_arg_nan_filtered>(self) -> impl Iterator { - let iter1 = iter_as_simd((self.any)(15).filter(|v| !v.is_float_nan())); - let iter2 = iter_as_simd((self.any)(16).filter(|v| !v.is_float_nan())); - iter1.zip(iter2).take(14 * 15 * 20 * S::WIDTH) + let period1 = self.periodic_interval(15); + let period2 = self.periodic_interval(16); + let iter1 = iter_as_simd((self.any)(period1).filter(|v| !v.is_float_nan())); + let iter2 = iter_as_simd((self.any)(period2).filter(|v| !v.is_float_nan())); + iter1.zip(iter2).take(period1 * period2 * 16 * S::WIDTH) } /// A blendv mask that's not all 0's or all 1's is undefined behavior between different architectures. pub fn iter_blendv_ags>(self) -> impl Iterator { let mut mask_iter = iter_as_simd((self.blendv)()); - let mut iter2 = iter_as_simd((self.any)(15)); - let mut iter3 = iter_as_simd((self.any)(16)); + let mut iter2 = iter_as_simd((self.any)(self.periodic_interval(15))); + let mut iter3 = iter_as_simd((self.any)(self.periodic_interval(16))); iter::repeat_with(move || { ( diff --git a/src/tests/lib/numbers.rs b/src/tests/lib/numbers.rs index 6cdacf3..5503e41 100644 --- a/src/tests/lib/numbers.rs +++ b/src/tests/lib/numbers.rs @@ -288,4 +288,26 @@ mod tests { assert!(!f64::from_bits(upper.to_bits() - 1).is_undefined_behavior_when_casting()); assert!(!(i64::MIN as f64).is_undefined_behavior_when_casting()); } + + #[test] + fn float_to_int_cast_filters_reject_non_finite_values() { + for value in [f32::NAN, f32::INFINITY, f32::NEG_INFINITY] { + assert!(value.is_undefined_behavior_when_casting()); + } + + for value in [f64::NAN, f64::INFINITY, f64::NEG_INFINITY] { + assert!(value.is_undefined_behavior_when_casting()); + } + } + + #[test] + fn float_to_int_cast_filters_keep_signed_zero_and_subnormals_defined() { + for value in [0.0f32, -0.0, f32::MIN_POSITIVE, f32::from_bits(1)] { + assert!(!value.is_undefined_behavior_when_casting()); + } + + for value in [0.0f64, -0.0, f64::MIN_POSITIVE, f64::from_bits(1)] { + assert!(!value.is_undefined_behavior_when_casting()); + } + } } diff --git a/src/tests/mod.rs b/src/tests/mod.rs index c05956d..0614857 100644 --- a/src/tests/mod.rs +++ b/src/tests/mod.rs @@ -4,6 +4,7 @@ use lib::*; mod float_cast_tie_regressions; mod float_edge_contract_regressions; mod float_neq_regressions; +mod float_special_value_regressions; mod i64_regressions; mod i8_mask_regressions; mod i8_mul_regressions; diff --git a/src/tests/real_world/bytes.rs b/src/tests/real_world/bytes.rs index d311014..521add5 100644 --- a/src/tests/real_world/bytes.rs +++ b/src/tests/real_world/bytes.rs @@ -130,6 +130,24 @@ fn real_world_find_first_eq_i8_matches_reference() { } } +#[test] +fn real_world_find_first_eq_i8_respects_chunk_and_tail_boundaries() { + for len in [15usize, 16, 17, 31, 32, 33] { + let mut data = (0..len) + .map(|i| (i as i8).wrapping_mul(11).wrapping_sub(73)) + .collect::>(); + let needle = 101i8; + + assert_find_first_eq_matches_all_backends(&data, needle); + + for &index in &[0usize, len / 2, len - 1] { + data.fill(needle.wrapping_sub(1)); + data[index] = needle; + assert_find_first_eq_matches_all_backends(&data, needle); + } + } +} + // This mirrors byte-oriented checksums used in packet capture, texture uploads, and log shipping // where unsigned accumulation has to stay correct across chunked SIMD reductions and scalar tails. #[test] @@ -144,3 +162,19 @@ fn real_world_byte_checksum_i8_matches_reference() { assert_byte_checksum_matches_all_backends(&data[..63]); assert_byte_checksum_matches_all_backends(&data); } + +#[test] +fn real_world_byte_checksum_i8_respects_chunk_and_tail_boundaries() { + let data = (0..65) + .map(|i| match i % 4 { + 0 => i8::MIN, + 1 => i8::MAX, + 2 => -1, + _ => 1, + }) + .collect::>(); + + for &len in &[15usize, 16, 17, 31, 32, 33, data.len()] { + assert_byte_checksum_matches_all_backends(&data[..len]); + } +} diff --git a/src/tests/real_world/imaging.rs b/src/tests/real_world/imaging.rs index 625437d..411e2b6 100644 --- a/src/tests/real_world/imaging.rs +++ b/src/tests/real_world/imaging.rs @@ -189,3 +189,29 @@ fn real_world_normalize_luma_f32_matches_reference() { assert_normalize_luma_matches_all_backends(&input[..19]); assert_normalize_luma_matches_all_backends(&input); } + +#[test] +fn real_world_quantize_f32_to_i32_threshold_boundaries_match_reference() { + let input = vec![ + -0.125, -0.071_428, 0.0, 0.071_428, 0.499_999, 0.5, 0.500_001, 145.642_84, 145.642_85, + 145.642_86, 400.0, + ]; + assert_quantize_matches_all_backends(&input); +} + +#[test] +fn real_world_normalize_luma_f32_threshold_boundaries_match_reference() { + let threshold_input = 16.0 + 219.0 * 0.62; + let input = vec![ + 15.999, + 16.0, + 16.001, + threshold_input - 0.001, + threshold_input, + threshold_input + 0.001, + 234.999, + 235.0, + 235.001, + ]; + assert_normalize_luma_matches_all_backends(&input); +} diff --git a/src/tests/real_world/integers.rs b/src/tests/real_world/integers.rs index 198be7b..1944383 100644 --- a/src/tests/real_world/integers.rs +++ b/src/tests/real_world/integers.rs @@ -120,3 +120,33 @@ fn real_world_adaptive_select_i32_matches_reference() { assert_adaptive_select_matches_all_backends(&lhs[..19], &rhs[..19]); assert_adaptive_select_matches_all_backends(&lhs, &rhs); } + +#[test] +fn real_world_adaptive_select_i32_threshold_and_clamp_boundaries_match_reference() { + let lhs = vec![ + i32::MIN + 2048, + i32::MIN + 1025, + -4096, + -4090, + -1, + 0, + 4089, + 4090, + i32::MAX - 1025, + i32::MAX - 2048, + ]; + let rhs = vec![ + i32::MIN + 1024, + i32::MIN + 1, + -3072, + -3065, + 1023, + 1025, + 3065, + 3066, + i32::MAX - 1, + i32::MAX - 1024, + ]; + + assert_adaptive_select_matches_all_backends(&lhs, &rhs); +} diff --git a/src/tests/real_world/signal.rs b/src/tests/real_world/signal.rs index c461157..4c42747 100644 --- a/src/tests/real_world/signal.rs +++ b/src/tests/real_world/signal.rs @@ -260,3 +260,20 @@ fn real_world_window_energy_f32_matches_reference() { assert_window_energy_matches_all_backends(&input[..29]); assert_window_energy_matches_all_backends(&input); } + +#[test] +fn real_world_noise_gate_f32_threshold_boundaries_match_reference() { + let input = vec![ + -0.120_001, -0.12, -0.119_999, -0.020_001, -0.02, -0.019_999, 0.019_999, 0.02, 0.020_001, + 0.119_999, 0.12, 0.120_001, + ]; + assert_noise_gate_matches_all_backends(&input); +} + +#[test] +fn real_world_crossfade_f32_clamp_boundaries_match_reference() { + let lhs = vec![-1.0, -0.5, 0.25, 0.75, 1.0, 1.5]; + let rhs = vec![1.0, 0.5, -0.25, -0.75, -1.0, -1.5]; + let weights = vec![-1.0e-6, 0.0, 0.5, 1.0, 1.0 + 1.0e-6, 2.0]; + assert_crossfade_matches_all_backends(&lhs, &rhs, &weights); +} diff --git a/src/tests/wasm_unaligned_regressions.rs b/src/tests/wasm_unaligned_regressions.rs index 54366ea..d08b467 100644 --- a/src/tests/wasm_unaligned_regressions.rs +++ b/src/tests/wasm_unaligned_regressions.rs @@ -10,8 +10,6 @@ macro_rules! wasm_unaligned_roundtrip_test { fn $name() { let input: [$scalar_ty; <$vec_ty as SimdConsts>::WIDTH] = [$($value),+]; let mut expected_bytes = [0u8; 16]; - let mut bytes = vec![0u8; 16 + 3]; - let offset = 1usize; unsafe { core::ptr::copy_nonoverlapping( @@ -19,36 +17,42 @@ macro_rules! wasm_unaligned_roundtrip_test { expected_bytes.as_mut_ptr(), 16, ); - core::ptr::copy_nonoverlapping( - input.as_ptr() as *const u8, - bytes.as_mut_ptr().add(offset), - 16, - ); + for offset in 1usize..16 { + let mut bytes = vec![0xA5u8; 16 + offset + 16]; + core::ptr::copy_nonoverlapping( + input.as_ptr() as *const u8, + bytes.as_mut_ptr().add(offset), + 16, + ); - let loaded = <$vec_ty as SimdBaseIo>::load_from_ptr_unaligned( - bytes.as_ptr().add(offset) as *const $scalar_ty, - ); - let loaded_array = loaded.as_array(); - let mut loaded_bytes = [0u8; 16]; - core::ptr::copy_nonoverlapping( - loaded_array.as_ptr() as *const u8, - loaded_bytes.as_mut_ptr(), - 16, - ); - assert_eq!(loaded_bytes, expected_bytes); + let loaded = <$vec_ty as SimdBaseIo>::load_from_ptr_unaligned( + bytes.as_ptr().add(offset) as *const $scalar_ty, + ); + let loaded_array = loaded.as_array(); + let mut loaded_bytes = [0u8; 16]; + core::ptr::copy_nonoverlapping( + loaded_array.as_ptr() as *const u8, + loaded_bytes.as_mut_ptr(), + 16, + ); + assert_eq!(loaded_bytes, expected_bytes); + assert!(bytes[..offset].iter().all(|&byte| byte == 0xA5)); + assert!(bytes[offset + 16..].iter().all(|&byte| byte == 0xA5)); - let output = <$vec_ty as SimdBaseIo>::load_from_array(input); - let mut dest = vec![0u8; 16 + 5]; - let store_offset = 3usize; - output.copy_to_ptr_unaligned(dest.as_mut_ptr().add(store_offset) as *mut $scalar_ty); + let output = <$vec_ty as SimdBaseIo>::load_from_array(input); + let mut dest = vec![0x5Au8; 16 + offset + 16]; + output.copy_to_ptr_unaligned(dest.as_mut_ptr().add(offset) as *mut $scalar_ty); - let mut roundtrip_bytes = [0u8; 16]; - core::ptr::copy_nonoverlapping( - dest.as_ptr().add(store_offset), - roundtrip_bytes.as_mut_ptr(), - 16, - ); - assert_eq!(roundtrip_bytes, expected_bytes); + let mut roundtrip_bytes = [0u8; 16]; + core::ptr::copy_nonoverlapping( + dest.as_ptr().add(offset), + roundtrip_bytes.as_mut_ptr(), + 16, + ); + assert_eq!(roundtrip_bytes, expected_bytes); + assert!(dest[..offset].iter().all(|&byte| byte == 0x5A)); + assert!(dest[offset + 16..].iter().all(|&byte| byte == 0x5A)); + } } } };