diff --git a/src/lib.rs b/src/lib.rs index 404368f..a3a5060 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -4086,3 +4086,966 @@ mod gpu_tests { ); } } + +// ===== EXPANDED COVERAGE (P1) ===== +// +// The following modules add direct unit tests for foundational ops and core +// neural-network layers that previously had only indirect coverage. They +// follow the existing convention of inline `#[cfg(test)] mod` blocks and rely +// on `RawTensor::check_gradients_simple` for backward-pass validation. + +#[cfg(test)] +mod binary_ops_tests { + use super::*; + + fn make(shape: &[usize], data: Vec) -> Tensor { + RawTensor::new(data, shape, true) + } + + #[test] + fn add_forward_value_and_shape() { + let a = make(&[2, 2], vec![1.0, 2.0, 3.0, 4.0]); + let b = make(&[2, 2], vec![10.0, 20.0, 30.0, 40.0]); + let c = a.add(&b); + assert_eq!(c.borrow().shape, vec![2, 2]); + assert_eq!(c.borrow().data.to_vec(), vec![11.0, 22.0, 33.0, 44.0]); + } + + #[test] + fn add_gradcheck() { + let x = make(&[3], vec![0.5, -1.0, 2.0]); + let y = RawTensor::new(vec![0.25, 0.75, -0.5], &[3], false); + let passed = RawTensor::check_gradients_simple(&x, |t| t.add(&y).sum()); + assert!(passed, "add gradient check failed"); + } + + #[test] + fn sub_forward_value() { + let a = make(&[3], vec![10.0, 5.0, 0.0]); + let b = make(&[3], vec![3.0, 4.0, -1.0]); + let c = a.sub(&b); + assert_eq!(c.borrow().data.to_vec(), vec![7.0, 1.0, 1.0]); + } + + #[test] + fn sub_gradcheck() { + let x = make(&[2, 2], vec![1.0, 2.0, 3.0, 4.0]); + let y = RawTensor::new(vec![0.5, 0.5, 0.5, 0.5], &[2, 2], false); + let passed = RawTensor::check_gradients_simple(&x, |t| t.sub(&y).sum()); + assert!(passed, "sub gradient check failed"); + } + + #[test] + fn elem_mul_forward_value() { + let a = make(&[3], vec![2.0, 3.0, 4.0]); + let b = make(&[3], vec![5.0, 6.0, 7.0]); + let c = a.elem_mul(&b); + assert_eq!(c.borrow().data.to_vec(), vec![10.0, 18.0, 28.0]); + } + + #[test] + fn elem_mul_gradcheck() { + let x = make(&[3], vec![1.5, -2.0, 0.5]); + let y = RawTensor::new(vec![0.5, 1.0, -1.5], &[3], false); + let passed = RawTensor::check_gradients_simple(&x, |t| t.elem_mul(&y).sum()); + assert!(passed, "elem_mul gradient check failed"); + } + + #[test] + fn div_forward_value() { + let a = make(&[3], vec![10.0, 9.0, 1.0]); + let b = make(&[3], vec![2.0, 3.0, 4.0]); + let c = a.div(&b); + let out = c.borrow().data.to_vec(); + assert!((out.first().copied().unwrap_or(0.0) - 5.0).abs() < 1e-6); + assert!((out.get(1).copied().unwrap_or(0.0) - 3.0).abs() < 1e-6); + assert!((out.get(2).copied().unwrap_or(0.0) - 0.25).abs() < 1e-6); + } + + #[test] + fn div_gradcheck() { + let x = make(&[3], vec![2.0, -3.0, 5.0]); + let y = RawTensor::new(vec![1.5, 2.0, 0.5], &[3], false); + let passed = RawTensor::check_gradients_simple(&x, |t| t.div(&y).sum()); + assert!(passed, "div gradient check failed"); + } + + #[test] + fn max_elem_forward_value() { + let a = make(&[4], vec![1.0, 5.0, 2.0, 8.0]); + let b = make(&[4], vec![3.0, 4.0, 7.0, 6.0]); + let c = a.max_elem(&b); + assert_eq!(c.borrow().data.to_vec(), vec![3.0, 5.0, 7.0, 8.0]); + } + + #[test] + fn max_elem_gradient_routes_to_winner() { + // Use distinct, unambiguous values to avoid tie-handling ambiguity. + let a = make(&[3], vec![5.0, 1.0, 9.0]); + let b = make(&[3], vec![2.0, 7.0, 4.0]); + let z = a.max_elem(&b); + z.sum().backward(); + // a wins at indices 0 and 2. + assert_eq!(a.grad(), Some(vec![1.0, 0.0, 1.0])); + assert_eq!(b.grad(), Some(vec![0.0, 1.0, 0.0])); + } + + #[test] + fn modulo_forward_value() { + let a = RawTensor::new(vec![7.0, 8.0, 9.0], &[3], false); + let b = RawTensor::new(vec![3.0, 3.0, 3.0], &[3], false); + let c = a.modulo(&b); + let out = c.borrow().data.to_vec(); + assert!((out.first().copied().unwrap_or(0.0) - 1.0).abs() < 1e-6); + assert!((out.get(1).copied().unwrap_or(0.0) - 2.0).abs() < 1e-6); + assert!((out.get(2).copied().unwrap_or(0.0) - 0.0).abs() < 1e-6); + } + + #[test] + fn cmplt_forward_value() { + let a = RawTensor::new(vec![1.0, 2.0, 3.0, 4.0], &[4], false); + let b = RawTensor::new(vec![3.0, 1.0, 3.0, 5.0], &[4], false); + let c = a.cmplt(&b); + // 1<3=true, 2<1=false, 3<3=false, 4<5=true + assert_eq!(c.borrow().data.to_vec(), vec![1.0, 0.0, 0.0, 1.0]); + } + + #[test] + fn add_then_mul_chain_gradcheck() { + // (x + y) * z, gradient w.r.t. x should be z. + let x = make(&[3], vec![1.0, 2.0, 3.0]); + let y = RawTensor::new(vec![4.0, 5.0, 6.0], &[3], false); + let z = RawTensor::new(vec![0.5, -0.5, 1.0], &[3], false); + let passed = RawTensor::check_gradients_simple(&x, |t| t.add(&y).elem_mul(&z).sum()); + assert!(passed, "add+mul chain gradient check failed"); + } +} + +#[cfg(test)] +mod unary_ops_tests { + use super::*; + use approx::assert_relative_eq; + + fn shaped(data: Vec) -> Tensor { + let n = data.len(); + RawTensor::new(data, &[n], true) + } + + // Forward correctness ------------------------------------------------ + + #[test] + fn neg_forward() { + let x = shaped(vec![1.0, -2.0, 0.0]); + assert_eq!(x.neg().borrow().data.to_vec(), vec![-1.0, 2.0, 0.0]); + } + + #[test] + fn recip_forward() { + let x = shaped(vec![1.0, 2.0, 4.0]); + let out = x.recip().borrow().data.to_vec(); + assert_relative_eq!(out.first().copied().unwrap_or(0.0), 1.0, epsilon = 1e-6); + assert_relative_eq!(out.get(1).copied().unwrap_or(0.0), 0.5, epsilon = 1e-6); + assert_relative_eq!(out.get(2).copied().unwrap_or(0.0), 0.25, epsilon = 1e-6); + } + + #[test] + fn sqrt_forward() { + let x = shaped(vec![1.0, 4.0, 9.0]); + let out = x.sqrt().borrow().data.to_vec(); + assert_relative_eq!(out.first().copied().unwrap_or(0.0), 1.0, epsilon = 1e-6); + assert_relative_eq!(out.get(1).copied().unwrap_or(0.0), 2.0, epsilon = 1e-6); + assert_relative_eq!(out.get(2).copied().unwrap_or(0.0), 3.0, epsilon = 1e-6); + } + + #[test] + fn exp_forward() { + let x = shaped(vec![0.0, 1.0]); + let out = x.exp().borrow().data.to_vec(); + assert_relative_eq!(out.first().copied().unwrap_or(0.0), 1.0, epsilon = 1e-6); + assert_relative_eq!( + out.get(1).copied().unwrap_or(0.0), + std::f32::consts::E, + epsilon = 1e-5 + ); + } + + #[test] + fn log_forward() { + let x = shaped(vec![1.0, std::f32::consts::E]); + let out = x.log().borrow().data.to_vec(); + assert_relative_eq!(out.first().copied().unwrap_or(0.0), 0.0, epsilon = 1e-6); + assert_relative_eq!(out.get(1).copied().unwrap_or(0.0), 1.0, epsilon = 1e-5); + } + + #[test] + fn exp2_forward() { + let x = shaped(vec![0.0, 1.0, 3.0]); + let out = x.exp2().borrow().data.to_vec(); + assert_relative_eq!(out.first().copied().unwrap_or(0.0), 1.0, epsilon = 1e-6); + assert_relative_eq!(out.get(1).copied().unwrap_or(0.0), 2.0, epsilon = 1e-6); + assert_relative_eq!(out.get(2).copied().unwrap_or(0.0), 8.0, epsilon = 1e-5); + } + + #[test] + fn log2_forward() { + let x = shaped(vec![1.0, 2.0, 8.0]); + let out = x.log2().borrow().data.to_vec(); + assert_relative_eq!(out.first().copied().unwrap_or(0.0), 0.0, epsilon = 1e-6); + assert_relative_eq!(out.get(1).copied().unwrap_or(0.0), 1.0, epsilon = 1e-6); + assert_relative_eq!(out.get(2).copied().unwrap_or(0.0), 3.0, epsilon = 1e-5); + } + + #[test] + fn sin_forward() { + let x = shaped(vec![0.0, std::f32::consts::FRAC_PI_2]); + let out = x.sin().borrow().data.to_vec(); + assert_relative_eq!(out.first().copied().unwrap_or(0.0), 0.0, epsilon = 1e-6); + assert_relative_eq!(out.get(1).copied().unwrap_or(0.0), 1.0, epsilon = 1e-6); + } + + #[test] + fn cos_forward() { + let x = shaped(vec![0.0, std::f32::consts::PI]); + let out = x.cos().borrow().data.to_vec(); + assert_relative_eq!(out.first().copied().unwrap_or(0.0), 1.0, epsilon = 1e-6); + assert_relative_eq!(out.get(1).copied().unwrap_or(0.0), -1.0, epsilon = 1e-5); + } + + #[test] + fn tanh_forward() { + let x = shaped(vec![0.0, 100.0, -100.0]); + let out = x.tanh().borrow().data.to_vec(); + assert_relative_eq!(out.first().copied().unwrap_or(0.0), 0.0, epsilon = 1e-6); + assert_relative_eq!(out.get(1).copied().unwrap_or(0.0), 1.0, epsilon = 1e-6); + assert_relative_eq!(out.get(2).copied().unwrap_or(0.0), -1.0, epsilon = 1e-6); + } + + #[test] + fn sigmoid_forward() { + let x = shaped(vec![0.0, 100.0, -100.0]); + let out = x.sigmoid().borrow().data.to_vec(); + assert_relative_eq!(out.first().copied().unwrap_or(0.0), 0.5, epsilon = 1e-6); + assert_relative_eq!(out.get(1).copied().unwrap_or(0.0), 1.0, epsilon = 1e-6); + assert_relative_eq!(out.get(2).copied().unwrap_or(0.0), 0.0, epsilon = 1e-6); + } + + #[test] + fn relu_forward() { + let x = shaped(vec![1.0, -2.0, 0.0, 3.5]); + assert_eq!(x.relu().borrow().data.to_vec(), vec![1.0, 0.0, 0.0, 3.5]); + } + + // Backward correctness via gradient check ---------------------------- + + #[test] + fn neg_gradcheck() { + let x = shaped(vec![1.0, -1.0, 0.5]); + assert!(RawTensor::check_gradients_simple(&x, |t| t.neg().sum())); + } + + #[test] + fn recip_gradcheck() { + let x = shaped(vec![0.5, 1.0, 2.0, 4.0]); + assert!(RawTensor::check_gradients_simple(&x, |t| t.recip().sum())); + } + + #[test] + fn sqrt_gradcheck() { + let x = shaped(vec![0.5, 1.0, 4.0]); + assert!(RawTensor::check_gradients_simple(&x, |t| t.sqrt().sum())); + } + + #[test] + fn exp_gradcheck() { + let x = shaped(vec![-1.0, 0.0, 1.0]); + assert!(RawTensor::check_gradients_simple(&x, |t| t.exp().sum())); + } + + #[test] + fn log_gradcheck() { + let x = shaped(vec![0.5, 1.0, 2.0]); + assert!(RawTensor::check_gradients_simple(&x, |t| t.log().sum())); + } + + #[test] + fn exp2_gradcheck() { + let x = shaped(vec![-1.0, 0.0, 1.0]); + assert!(RawTensor::check_gradients_simple(&x, |t| t.exp2().sum())); + } + + #[test] + fn log2_gradcheck() { + let x = shaped(vec![0.5, 1.0, 2.0]); + assert!(RawTensor::check_gradients_simple(&x, |t| t.log2().sum())); + } + + #[test] + fn sin_gradcheck() { + let x = shaped(vec![-1.0, 0.0, 1.0]); + assert!(RawTensor::check_gradients_simple(&x, |t| t.sin().sum())); + } + + #[test] + fn cos_gradcheck() { + let x = shaped(vec![-1.0, 0.0, 1.0]); + assert!(RawTensor::check_gradients_simple(&x, |t| t.cos().sum())); + } + + #[test] + fn tanh_gradcheck() { + let x = shaped(vec![-1.0, 0.0, 1.0]); + assert!(RawTensor::check_gradients_simple(&x, |t| t.tanh().sum())); + } + + #[test] + fn sigmoid_gradcheck() { + let x = shaped(vec![-1.0, 0.0, 1.0]); + assert!(RawTensor::check_gradients_simple(&x, |t| t.sigmoid().sum())); + } + + #[test] + fn relu_gradcheck() { + // Avoid 0 to keep the gradient well-defined under finite differences. + let x = shaped(vec![-1.5, -0.5, 0.5, 1.5]); + assert!(RawTensor::check_gradients_simple(&x, |t| t.relu().sum())); + } + + #[test] + fn erf_gradcheck() { + let x = shaped(vec![-0.5, 0.0, 0.5]); + assert!(RawTensor::check_gradients_simple(&x, |t| t.erf().sum())); + } +} + +#[cfg(test)] +mod reduce_ops_tests { + use super::*; + use approx::assert_relative_eq; + + #[test] + fn sum_forward() { + let x = RawTensor::new(vec![1.0, 2.0, 3.0, 4.0], &[4], false); + assert_relative_eq!( + x.sum().borrow().data.first().copied().unwrap_or(0.0), + 10.0, + epsilon = 1e-6 + ); + } + + #[test] + fn sum_backward_is_ones() { + let x = RawTensor::new(vec![1.0, 2.0, 3.0], &[3], true); + x.sum().backward(); + assert_eq!(x.grad(), Some(vec![1.0, 1.0, 1.0])); + } + + #[test] + fn mean_forward() { + let x = RawTensor::new(vec![1.0, 2.0, 3.0, 4.0], &[4], false); + assert_relative_eq!( + x.mean().borrow().data.first().copied().unwrap_or(0.0), + 2.5, + epsilon = 1e-6 + ); + } + + #[test] + fn mean_backward_is_uniform() { + let x = RawTensor::new(vec![1.0, 2.0, 3.0, 4.0], &[4], true); + x.mean().backward(); + // dL/dx_i = 1/N + let g = x.grad().unwrap(); + for v in g { + assert_relative_eq!(v, 0.25, epsilon = 1e-6); + } + } + + #[test] + fn max_reduce_routes_grad_to_argmax() { + let x = RawTensor::new(vec![1.0, 7.0, 3.0, 2.0], &[4], true); + x.max_reduce().backward(); + assert_eq!(x.grad(), Some(vec![0.0, 1.0, 0.0, 0.0])); + } + + #[test] + fn sum_dim_keepdim_shape() { + // (2, 3) -> sum over dim 1 with keepdim -> (2, 1) + let x = RawTensor::new(vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0], &[2, 3], false); + let s = x.sum_dim(1, true); + assert_eq!(s.borrow().shape, vec![2, 1]); + assert_eq!(s.borrow().data.to_vec(), vec![6.0, 15.0]); + } + + #[test] + fn sum_dim_no_keepdim_shape() { + let x = RawTensor::new(vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0], &[2, 3], false); + let s = x.sum_dim(0, false); + // (2, 3) reduced over dim 0 -> (3,) + assert_eq!(s.borrow().shape, vec![3]); + assert_eq!(s.borrow().data.to_vec(), vec![5.0, 7.0, 9.0]); + } + + #[test] + fn sum_dim_gradcheck() { + let x = RawTensor::new(vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0], &[2, 3], true); + assert!(RawTensor::check_gradients_simple(&x, |t| t + .sum_dim(1, true) + .sum())); + } + + #[test] + fn mean_dim_keepdim_value() { + let x = RawTensor::new(vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0], &[2, 3], false); + let m = x.mean_dim(1, true); + assert_eq!(m.borrow().shape, vec![2, 1]); + let out = m.borrow().data.to_vec(); + assert_relative_eq!(out.first().copied().unwrap_or(0.0), 2.0, epsilon = 1e-6); + assert_relative_eq!(out.get(1).copied().unwrap_or(0.0), 5.0, epsilon = 1e-6); + } + + #[test] + fn mean_dim_gradcheck() { + let x = RawTensor::new(vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0], &[2, 3], true); + assert!(RawTensor::check_gradients_simple(&x, |t| t + .mean_dim(0, true) + .sum())); + } + + #[test] + fn max_dim_value_and_shape() { + // distinct values per row to avoid tie ambiguity + let x = RawTensor::new(vec![1.0, 5.0, 3.0, 9.0, 2.0, 7.0], &[2, 3], false); + let m = x.max_dim(1, true); + assert_eq!(m.borrow().shape, vec![2, 1]); + assert_eq!(m.borrow().data.to_vec(), vec![5.0, 9.0]); + } + + #[test] + fn max_dim_routes_grad_to_argmax_per_row() { + let x = RawTensor::new(vec![1.0, 5.0, 3.0, 9.0, 2.0, 7.0], &[2, 3], true); + x.max_dim(1, true).sum().backward(); + // argmax row 0 -> col 1; row 1 -> col 0 + assert_eq!(x.grad(), Some(vec![0.0, 1.0, 0.0, 1.0, 0.0, 0.0])); + } +} + +#[cfg(test)] +mod matmul_tests { + use super::*; + use approx::assert_relative_eq; + + #[test] + fn matmul_2d_2d_value() { + // (2,3) @ (3,2) -> (2,2). Use simple integer-valued data. + let a = RawTensor::new(vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0], &[2, 3], false); + let b = RawTensor::new(vec![1.0, 0.0, 0.0, 1.0, 1.0, 1.0], &[3, 2], false); + let c = a.matmul(&b); + assert_eq!(c.borrow().shape, vec![2, 2]); + // row 0: [1+0+3, 0+2+3] = [4, 5] + // row 1: [4+0+6, 0+5+6] = [10, 11] + assert_eq!(c.borrow().data.to_vec(), vec![4.0, 5.0, 10.0, 11.0]); + } + + #[test] + fn matmul_2d_2d_gradcheck() { + let a = RawTensor::new(vec![0.5, -1.0, 0.25, 1.0], &[2, 2], true); + let b = RawTensor::new(vec![1.0, 0.5, -0.5, 1.0], &[2, 2], false); + assert!(RawTensor::check_gradients_simple(&a, |t| t + .matmul(&b) + .sum())); + } + + #[test] + fn matmul_2d_1d_value() { + // (3,2) @ (2,) -> (3,) + let a = RawTensor::new(vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0], &[3, 2], false); + let v = RawTensor::new(vec![1.0, -1.0], &[2], false); + let z = a.matmul(&v); + assert_eq!(z.borrow().shape, vec![3]); + // [1-2, 3-4, 5-6] = [-1, -1, -1] + assert_eq!(z.borrow().data.to_vec(), vec![-1.0, -1.0, -1.0]); + } + + #[test] + fn matmul_2d_1d_gradcheck() { + let a = RawTensor::new(vec![1.0, 2.0, 3.0, 4.0], &[2, 2], true); + let v = RawTensor::new(vec![0.5, -0.5], &[2], false); + assert!(RawTensor::check_gradients_simple(&a, |t| t + .matmul(&v) + .sum())); + } + + #[test] + fn matmul_1d_2d_value() { + // (3,) @ (3,2) -> (2,) + let v = RawTensor::new(vec![1.0, 2.0, 3.0], &[3], false); + let m = RawTensor::new(vec![1.0, 0.0, 0.0, 1.0, 1.0, 1.0], &[3, 2], false); + let z = v.matmul(&m); + assert_eq!(z.borrow().shape, vec![2]); + // row dot col: col0 = 1*1+2*0+3*1 = 4, col1 = 1*0+2*1+3*1 = 5 + assert_eq!(z.borrow().data.to_vec(), vec![4.0, 5.0]); + } + + #[test] + fn matmul_1d_2d_gradcheck() { + let v = RawTensor::new(vec![0.5, -0.5, 1.0], &[3], true); + let m = RawTensor::new(vec![1.0, 0.0, 0.0, 1.0, 1.0, 1.0], &[3, 2], false); + assert!(RawTensor::check_gradients_simple(&v, |t| t + .matmul(&m) + .sum())); + } + + #[test] + fn matmul_1d_1d_dot_value() { + let a = RawTensor::new(vec![1.0, 2.0, 3.0], &[3], false); + let b = RawTensor::new(vec![4.0, -1.0, 2.0], &[3], false); + let z = a.matmul(&b); + // 4 - 2 + 6 = 8; result is scalar shape + assert_relative_eq!( + z.borrow().data.first().copied().unwrap_or(0.0), + 8.0, + epsilon = 1e-6 + ); + } + + #[test] + fn matmul_1d_1d_dot_gradcheck() { + let a = RawTensor::new(vec![1.0, 2.0, 3.0], &[3], true); + let b = RawTensor::new(vec![0.5, -0.5, 1.0], &[3], false); + assert!(RawTensor::check_gradients_simple(&a, |t| t + .matmul(&b) + .sum())); + } + + #[test] + fn matmul_batched_value_and_shape() { + // (2, 2, 3) @ (2, 3, 2) -> (2, 2, 2). All-ones inputs => entries = 3. + let a = RawTensor::ones(&[2, 2, 3]); + let b = RawTensor::ones(&[2, 3, 2]); + let c = a.matmul(&b); + assert_eq!(c.borrow().shape, vec![2, 2, 2]); + for v in c.borrow().data.to_vec() { + assert_relative_eq!(v, 3.0, epsilon = 1e-6); + } + } + + #[test] + fn matmul_batched_gradcheck() { + // Small batched matmul gradient check on `a`. + let a = RawTensor::new( + vec![ + 0.5, -0.5, 1.0, 1.5, -1.0, 0.5, 0.25, 0.75, -0.25, 1.0, 0.5, -0.5, + ], + &[2, 2, 3], + true, + ); + let b = RawTensor::new( + vec![ + 0.5, 0.25, 1.0, -0.5, 0.0, 1.0, -1.0, 0.5, 0.25, 1.0, 0.5, -0.5, + ], + &[2, 3, 2], + false, + ); + assert!(RawTensor::check_gradients_simple(&a, |t| t + .matmul(&b) + .sum())); + } + + #[test] + fn transpose_2d_shape_and_value() { + let x = RawTensor::new(vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0], &[2, 3], false); + let t = x.transpose(); + assert_eq!(t.borrow().shape, vec![3, 2]); + // permuted: original (i,j) -> (j,i). Row-major: [1,4, 2,5, 3,6] + assert_eq!(t.borrow().data.to_vec(), vec![1.0, 4.0, 2.0, 5.0, 3.0, 6.0]); + } +} + +#[cfg(test)] +mod linear_tests { + use super::*; + + #[test] + fn weight_shape_and_bias_present() { + let layer = Linear::new(4, 3, true); + assert_eq!(layer.weight.borrow().shape, vec![4, 3]); + assert!(layer.bias.is_some()); + let b = layer.bias.as_ref().unwrap(); + assert_eq!(b.borrow().shape, vec![3]); + } + + #[test] + fn no_bias_when_use_bias_false() { + let layer = Linear::new(4, 3, false); + assert!(layer.bias.is_none()); + assert_eq!(layer.parameters().len(), 1); + } + + #[test] + fn forward_shape_with_bias() { + let layer = Linear::new(5, 2, true); + let x = RawTensor::new(vec![1.0; 3 * 5], &[3, 5], true); + let y = layer.forward(&x); + assert_eq!(y.borrow().shape, vec![3, 2]); + } + + #[test] + fn forward_value_with_known_weights() { + // Build a deterministic Linear: y = xW + b with shape (1,2) input, + // (2,3) weight, (3,) bias. + let mut layer = Linear::new(2, 3, true); + // Override randomly initialized weights with known values. + { + let mut w = layer.weight.borrow_mut(); + w.data = Storage::cpu(vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0]); + } + { + let bias = layer.bias.as_mut().unwrap(); + let mut b = bias.borrow_mut(); + b.data = Storage::cpu(vec![10.0, 20.0, 30.0]); + } + let x = RawTensor::new(vec![1.0, 1.0], &[1, 2], false); + let y = layer.forward(&x); + // x @ W = [1*1+1*4, 1*2+1*5, 1*3+1*6] = [5, 7, 9]; + bias = [15, 27, 39] + assert_eq!(y.borrow().data.to_vec(), vec![15.0, 27.0, 39.0]); + } + + #[test] + fn backward_gradcheck_input() { + // Use a fixed weight tensor so the gradient check is deterministic. + let layer = Linear::new(3, 2, true); + // Snapshot the (random) weight so the closure captures stable values. + let w_snapshot = layer.weight.borrow().data.to_vec(); + let b_snapshot = layer + .bias + .as_ref() + .map(|b| b.borrow().data.to_vec()) + .unwrap_or_default(); + let w_shape = layer.weight.borrow().shape.clone(); + let b_shape = layer + .bias + .as_ref() + .map(|b| b.borrow().shape.clone()) + .unwrap_or_default(); + let x = RawTensor::new(vec![0.5, -1.0, 1.5, 0.25, 0.0, 2.0], &[2, 3], true); + + let passed = RawTensor::check_gradients_simple(&x, |t| { + let w = RawTensor::new(w_snapshot.clone(), &w_shape, false); + let b = RawTensor::new(b_snapshot.clone(), &b_shape, false); + t.matmul(&w).add(&b).sum() + }); + assert!(passed, "Linear input gradient check failed"); + } +} + +#[cfg(test)] +mod activation_tests { + use super::*; + + #[test] + fn relu_layer_forward_matches_op() { + let layer = ReLU; + let x = RawTensor::new(vec![-1.0, 0.0, 2.0, -3.0], &[4], true); + let y = ::forward(&layer, &x); + assert_eq!(y.borrow().data.to_vec(), vec![0.0, 0.0, 2.0, 0.0]); + assert!(layer.parameters().is_empty()); + } + + #[test] + fn relu_layer_gradcheck() { + let layer = ReLU; + let x = RawTensor::new(vec![-1.5, -0.5, 0.5, 1.5], &[4], true); + assert!(RawTensor::check_gradients_simple(&x, |t| { + ::forward(&layer, t).sum() + })); + } + + #[test] + fn sigmoid_layer_forward_matches_op() { + let layer = Sigmoid; + let x = RawTensor::new(vec![0.0, 100.0, -100.0], &[3], true); + let y = ::forward(&layer, &x); + let out = y.borrow().data.to_vec(); + assert!((out.first().copied().unwrap_or(0.0) - 0.5).abs() < 1e-6); + assert!((out.get(1).copied().unwrap_or(0.0) - 1.0).abs() < 1e-6); + assert!(out.get(2).copied().unwrap_or(0.0).abs() < 1e-6); + } + + #[test] + fn sigmoid_layer_gradcheck() { + let layer = Sigmoid; + let x = RawTensor::new(vec![-1.0, 0.0, 1.0], &[3], true); + assert!(RawTensor::check_gradients_simple(&x, |t| { + ::forward(&layer, t).sum() + })); + } + + #[test] + fn tanh_layer_forward_matches_op() { + let layer = Tanh; + let x = RawTensor::new(vec![0.0, 100.0, -100.0], &[3], true); + let y = ::forward(&layer, &x); + let out = y.borrow().data.to_vec(); + assert!(out.first().copied().unwrap_or(0.0).abs() < 1e-6); + assert!((out.get(1).copied().unwrap_or(0.0) - 1.0).abs() < 1e-6); + assert!((out.get(2).copied().unwrap_or(0.0) + 1.0).abs() < 1e-6); + } + + #[test] + fn tanh_layer_gradcheck() { + let layer = Tanh; + let x = RawTensor::new(vec![-1.0, 0.0, 1.0], &[3], true); + assert!(RawTensor::check_gradients_simple(&x, |t| { + ::forward(&layer, t).sum() + })); + } +} + +#[cfg(test)] +mod dropout_tests { + use super::*; + + #[test] + fn p_zero_is_identity() { + let layer = Dropout::new(0.0); + let x = RawTensor::new(vec![1.0, 2.0, 3.0, 4.0], &[4], true); + let y = ::forward(&layer, &x); + assert_eq!(y.borrow().data.to_vec(), vec![1.0, 2.0, 3.0, 4.0]); + assert_eq!(y.borrow().shape, vec![4]); + } + + #[test] + fn p_one_zeroes_all_elements() { + // With keep_prob=0, the mask is all zeros => output is all zeros. + let layer = Dropout::new(1.0); + let x = RawTensor::new(vec![1.0, 2.0, 3.0, 4.0], &[4], true); + let y = ::forward(&layer, &x); + for v in y.borrow().data.to_vec() { + assert_eq!(v, 0.0, "dropout p=1 should zero all elements"); + } + } + + #[test] + fn eval_mode_passthrough() { + // In eval mode, even high-p dropout returns x unchanged. + let mut layer = Dropout::new(0.9); + layer.train(false); + let x = RawTensor::new(vec![5.0; 16], &[16], true); + let y = ::forward(&layer, &x); + for v in y.borrow().data.to_vec() { + assert_eq!(v, 5.0); + } + } + + #[test] + fn shape_preserved_under_training() { + let layer = Dropout::new(0.5); + let x = RawTensor::new(vec![1.0; 2 * 3 * 4], &[2, 3, 4], true); + let y = ::forward(&layer, &x); + assert_eq!(y.borrow().shape, vec![2, 3, 4]); + } + + #[test] + fn statistical_mean_preservation() { + // For inverted dropout, E[output] == input. With a large enough sample + // the empirical mean stays within ~5% of the input value. + crate::tensor::manual_seed(0x00C0_FFEE); + let layer = Dropout::new(0.5); + let n = 20_000; + let x = RawTensor::new(vec![1.0; n], &[n], false); + let y = ::forward(&layer, &x); + let sum: f32 = y.borrow().data.to_vec().iter().sum(); + let mean = sum / n as f32; + // E[mean] = 1.0, std ≈ sqrt(p / ((1-p)*n)) ≈ 0.007 for p=0.5, n=20k. + // 0.05 is a comfortable bound that keeps this from flaking. + assert!( + (mean - 1.0).abs() < 0.05, + "dropout mean drifted: got {mean}, expected ~1.0" + ); + } +} + +#[cfg(test)] +mod batchnorm_tests { + use super::*; + use approx::assert_relative_eq; + + fn running_stats(state: &io::StateDict) -> (Vec, Vec) { + let rm = state + .get("running_mean") + .expect("state dict missing running_mean") + .data + .clone(); + let rv = state + .get("running_var") + .expect("state dict missing running_var") + .data + .clone(); + (rm, rv) + } + + // ---- BatchNorm1d ---- + + #[test] + fn bn1d_forward_shape_preserved() { + let layer = BatchNorm1d::new(4); + let x = RawTensor::new(vec![1.0; 8 * 4], &[8, 4], true); + let y = ::forward(&layer, &x); + assert_eq!(y.borrow().shape, vec![8, 4]); + } + + #[test] + fn bn1d_train_normalizes_to_zero_mean_unit_var() { + let layer = BatchNorm1d::new(2); + // Per-feature, batch-of-4: column 0 = {1, 2, 3, 4}, column 1 = {-1, 0, 1, 2} + let x = RawTensor::new( + vec![1.0, -1.0, 2.0, 0.0, 3.0, 1.0, 4.0, 2.0], + &[4, 2], + false, + ); + let y = ::forward(&layer, &x); + let out = y.borrow().data.to_vec(); + // Output mean per column should be ~0 and stddev ~1 (gamma=1, beta=0). + let col0: Vec = (0..4) + .map(|i| out.get(i * 2).copied().unwrap_or(0.0)) + .collect(); + let mean0: f32 = col0.iter().sum::() / 4.0; + let var0: f32 = col0.iter().map(|v| (v - mean0).powi(2)).sum::() / 4.0; + assert_relative_eq!(mean0, 0.0, epsilon = 1e-5); + assert_relative_eq!(var0, 1.0, max_relative = 1e-3); + } + + #[test] + fn bn1d_training_updates_running_stats() { + let layer = BatchNorm1d::new(3); + // Capture initial state (running_mean=0, running_var=1). + let (rm0, rv0) = running_stats(&::state_dict(&layer)); + assert_eq!(rm0, vec![0.0, 0.0, 0.0]); + assert_eq!(rv0, vec![1.0, 1.0, 1.0]); + + // Forward with a batch whose stats clearly differ from defaults. + let x = RawTensor::new( + vec![ + 1.0, 10.0, -5.0, 2.0, 12.0, -3.0, 3.0, 14.0, -1.0, 4.0, 16.0, 1.0, + ], + &[4, 3], + false, + ); + let _ = ::forward(&layer, &x); + + let (rm1, rv1) = running_stats(&::state_dict(&layer)); + // Running mean should have moved toward batch mean for each feature. + // Batch means: col0 = 2.5, col1 = 13, col2 = -2.0 + // With momentum=0.1: rm = 0.9*0 + 0.1*batch_mean + assert_relative_eq!(rm1.first().copied().unwrap_or(0.0), 0.25, epsilon = 1e-4); + assert_relative_eq!(rm1.get(1).copied().unwrap_or(0.0), 1.3, epsilon = 1e-4); + assert_relative_eq!(rm1.get(2).copied().unwrap_or(0.0), -0.2, epsilon = 1e-4); + // Variance updates should differ from the initial 1.0 for at least one feature. + let any_var_changed = rv1.iter().any(|v| (v - 1.0).abs() > 1e-3); + assert!( + any_var_changed, + "running_var should have moved after training step" + ); + } + + #[test] + fn bn1d_eval_uses_running_stats() { + let mut layer = BatchNorm1d::new(2); + // Pre-load known running stats via load_state_dict. + let mut state = ::state_dict(&layer); + state.insert( + "running_mean".to_string(), + io::TensorData { + data: vec![5.0, -5.0], + shape: vec![2], + }, + ); + state.insert( + "running_var".to_string(), + io::TensorData { + data: vec![4.0, 4.0], + shape: vec![2], + }, + ); + ::load_state_dict(&mut layer, &state); + layer.train(false); + + // Input == running_mean => normalized output should be ~0 (then * gamma=1 + beta=0). + let x = RawTensor::new(vec![5.0, -5.0, 5.0, -5.0], &[2, 2], false); + let y = ::forward(&layer, &x); + for v in y.borrow().data.to_vec() { + assert!( + v.abs() < 1e-4, + "eval-mode normalized output should be ~0, got {v}" + ); + } + } + + #[test] + fn bn1d_gradcheck_input() { + let layer = BatchNorm1d::new(2); + let x = RawTensor::new(vec![1.0, -1.0, 2.0, 0.0, 3.0, 1.0, 4.0, 2.0], &[4, 2], true); + // Use stricter epsilon on BN since it's mean-centered (small grads). + let (_max_e, _mean_e, passed) = RawTensor::check_gradients( + &x, + |t| ::forward(&layer, t).sum(), + 1e-2, + 1e-2, + ); + assert!(passed, "BatchNorm1d input gradient check failed"); + } + + // ---- BatchNorm2d ---- + + #[test] + fn bn2d_forward_shape_preserved() { + let layer = BatchNorm2d::new(3); + // (B=2, C=3, H=2, W=2) + let x = RawTensor::new(vec![1.0; 2 * 3 * 2 * 2], &[2, 3, 2, 2], true); + let y = ::forward(&layer, &x); + assert_eq!(y.borrow().shape, vec![2, 3, 2, 2]); + } + + #[test] + fn bn2d_training_updates_running_stats() { + let layer = BatchNorm2d::new(2); + let (rm0, rv0) = running_stats(&::state_dict(&layer)); + assert_eq!(rm0, vec![0.0, 0.0]); + assert_eq!(rv0, vec![1.0, 1.0]); + + // Channel 0 = all 1.0, Channel 1 = all 5.0 => batch_mean = [1, 5] + let mut data = Vec::with_capacity(2 * 2 * 2 * 2); + for _b in 0..2 { + for c in 0..2 { + for _ in 0..(2 * 2) { + data.push(if c == 0 { 1.0 } else { 5.0 }); + } + } + } + let x = RawTensor::new(data, &[2, 2, 2, 2], false); + let _ = ::forward(&layer, &x); + + let (rm1, _rv1) = running_stats(&::state_dict(&layer)); + // momentum=0.1: rm = 0.9*0 + 0.1*[1, 5] = [0.1, 0.5] + assert_relative_eq!(rm1.first().copied().unwrap_or(0.0), 0.1, epsilon = 1e-5); + assert_relative_eq!(rm1.get(1).copied().unwrap_or(0.0), 0.5, epsilon = 1e-5); + } + + #[test] + fn bn2d_gradcheck_input() { + let layer = BatchNorm2d::new(2); + // Distinct values so BN actually computes a non-trivial normalization. + let mut data = Vec::with_capacity(2 * 2 * 2 * 2); + for i in 0..(2 * 2 * 2 * 2) { + data.push((i as f32) * 0.1 + 1.0); + } + let x = RawTensor::new(data, &[2, 2, 2, 2], true); + let (_max_e, _mean_e, passed) = RawTensor::check_gradients( + &x, + |t| ::forward(&layer, t).sum(), + 1e-2, + 1e-2, + ); + assert!(passed, "BatchNorm2d input gradient check failed"); + } +} diff --git a/tests/common/mod.rs b/tests/common/mod.rs new file mode 100644 index 0000000..d95a707 --- /dev/null +++ b/tests/common/mod.rs @@ -0,0 +1,265 @@ +//! Shared helpers for CPU/GPU parity tests. +//! +//! These helpers run the same op on a CPU tensor and a GPU copy of the same +//! tensor and assert that forward (and optionally backward) results match +//! within a tolerance. Tests early-exit cleanly via `skip_if_no_gpu` when +//! no GPU adapter is available, so the file can be compiled and linked +//! everywhere `--features gpu` is enabled. +//! +//! GPU broadcasting is not supported by Volta — every helper takes matched +//! shapes (or single-input variants) only. + +#![cfg(feature = "gpu")] +#![allow(dead_code)] // each test file uses only a subset + +use volta::gpu::is_gpu_available; +use volta::tensor::TensorOps; +use volta::{Device, RawTensor, Tensor}; + +pub const TOL_FWD_GPU: f32 = 1e-5; +pub const TOL_BWD_GPU: f32 = 1e-4; +pub const TOL_MATMUL_FWD: f32 = 1e-4; +pub const TOL_MATMUL_BWD: f32 = 1e-3; + +/// Returns `true` if the test should bail because no GPU is available. +/// Idiomatic use: `if skip_if_no_gpu() { return; }` at the top of each test. +#[must_use] +pub fn skip_if_no_gpu() -> bool { + !is_gpu_available() +} + +#[must_use] +pub fn gpu_device() -> Device { + Device::GPU("ParityTest".to_string()) +} + +/// Build a deterministic CPU tensor with values linearly spaced over `range`. +/// Avoids zero-magnitude inputs that would blow up `log`/`recip`/`sqrt` etc. +#[must_use] +pub fn make_input(shape: &[usize], requires_grad: bool, range: (f32, f32)) -> Tensor { + let n: usize = shape.iter().product(); + let (lo, hi) = range; + let denom = (n.max(1)) as f32; + let data: Vec = (0..n) + .map(|i| lo + ((i as f32) / denom) * (hi - lo)) + .collect(); + RawTensor::new(data, shape, requires_grad) +} + +/// Element-wise comparison of two flattened tensors with absolute tolerance. +pub fn assert_close(label: &str, cpu: &[f32], gpu: &[f32], tol: f32) { + assert_eq!( + cpu.len(), + gpu.len(), + "{label}: length mismatch (cpu={}, gpu={})", + cpu.len(), + gpu.len() + ); + for (i, (c, g)) in cpu.iter().zip(gpu.iter()).enumerate() { + let diff = (c - g).abs(); + assert!( + diff < tol, + "{label} mismatch at index {i}: cpu={c}, gpu={g}, diff={diff}, tol={tol}" + ); + } +} + +/// Forward parity for a unary op: same input data on CPU and GPU should +/// produce numerically equivalent outputs. +pub fn assert_unary_parity(op_name: &str, op: F, shape: &[usize], range: (f32, f32), tol: f32) +where + F: Fn(&Tensor) -> Tensor, +{ + if skip_if_no_gpu() { + return; + } + let x_cpu = make_input(shape, false, range); + let x_gpu = x_cpu.to_device(gpu_device()); + + let y_cpu = op(&x_cpu); + let y_gpu = op(&x_gpu).to_device(Device::CPU); + + assert_close( + op_name, + &y_cpu.borrow().data.to_vec(), + &y_gpu.borrow().data.to_vec(), + tol, + ); +} + +/// Forward + backward parity for a unary op: also compares ∂L/∂x with L = sum(y). +pub fn assert_unary_parity_backward( + op_name: &str, + op: F, + shape: &[usize], + range: (f32, f32), + tol_fwd: f32, + tol_bwd: f32, +) where + F: Fn(&Tensor) -> Tensor, +{ + if skip_if_no_gpu() { + return; + } + let x_cpu = make_input(shape, true, range); + let x_gpu = x_cpu.to_device(gpu_device()); + + let y_cpu = op(&x_cpu); + y_cpu.sum().backward(); + let y_gpu = op(&x_gpu); + y_gpu.sum().backward(); + + assert_close( + &format!("{op_name} fwd"), + &y_cpu.borrow().data.to_vec(), + &y_gpu.to_device(Device::CPU).borrow().data.to_vec(), + tol_fwd, + ); + + let g_cpu = x_cpu.grad().expect("CPU gradient missing"); + let g_gpu = x_gpu + .borrow() + .grad + .as_ref() + .expect("GPU gradient missing") + .to_vec(); + assert_close(&format!("{op_name} bwd"), &g_cpu, &g_gpu, tol_bwd); +} + +/// Forward parity for a binary op with matched (non-broadcast) shapes. +pub fn assert_binary_parity(op_name: &str, op: F, shape: &[usize], range: (f32, f32), tol: f32) +where + F: Fn(&Tensor, &Tensor) -> Tensor, +{ + if skip_if_no_gpu() { + return; + } + let a_cpu = make_input(shape, false, range); + // Slightly different value space for `b` to avoid degenerate cases. + let b_cpu = make_input(shape, false, (range.0 + 0.1, range.1 - 0.1)); + let a_gpu = a_cpu.to_device(gpu_device()); + let b_gpu = b_cpu.to_device(gpu_device()); + + let c_cpu = op(&a_cpu, &b_cpu); + let c_gpu = op(&a_gpu, &b_gpu).to_device(Device::CPU); + + assert_close( + op_name, + &c_cpu.borrow().data.to_vec(), + &c_gpu.borrow().data.to_vec(), + tol, + ); +} + +/// Forward + backward parity for a binary op with matched shapes. +pub fn assert_binary_parity_backward( + op_name: &str, + op: F, + shape: &[usize], + range: (f32, f32), + tol_fwd: f32, + tol_bwd: f32, +) where + F: Fn(&Tensor, &Tensor) -> Tensor, +{ + if skip_if_no_gpu() { + return; + } + let a_cpu = make_input(shape, true, range); + let b_cpu = make_input(shape, true, (range.0 + 0.1, range.1 - 0.1)); + let a_gpu = a_cpu.to_device(gpu_device()); + let b_gpu = b_cpu.to_device(gpu_device()); + + let c_cpu = op(&a_cpu, &b_cpu); + c_cpu.sum().backward(); + let c_gpu = op(&a_gpu, &b_gpu); + c_gpu.sum().backward(); + + assert_close( + &format!("{op_name} fwd"), + &c_cpu.borrow().data.to_vec(), + &c_gpu.to_device(Device::CPU).borrow().data.to_vec(), + tol_fwd, + ); + + let ga_cpu = a_cpu.grad().expect("CPU a-grad missing"); + let ga_gpu = a_gpu + .borrow() + .grad + .as_ref() + .expect("GPU a-grad missing") + .to_vec(); + assert_close(&format!("{op_name} ∂/∂a"), &ga_cpu, &ga_gpu, tol_bwd); + + let gb_cpu = b_cpu.grad().expect("CPU b-grad missing"); + let gb_gpu = b_gpu + .borrow() + .grad + .as_ref() + .expect("GPU b-grad missing") + .to_vec(); + assert_close(&format!("{op_name} ∂/∂b"), &gb_cpu, &gb_gpu, tol_bwd); +} + +/// Forward parity for a reduction (sum/mean/max + axis variants). +pub fn assert_reduce_parity(op_name: &str, op: F, shape: &[usize], tol: f32) +where + F: Fn(&Tensor) -> Tensor, +{ + if skip_if_no_gpu() { + return; + } + let x_cpu = make_input(shape, false, (-2.0, 2.0)); + let x_gpu = x_cpu.to_device(gpu_device()); + let y_cpu = op(&x_cpu); + let y_gpu = op(&x_gpu).to_device(Device::CPU); + assert_close( + op_name, + &y_cpu.borrow().data.to_vec(), + &y_gpu.borrow().data.to_vec(), + tol, + ); +} + +/// Forward + backward parity for matmul over a given (`a_shape`, `b_shape`) pair. +/// Useful for sweeping the matmul shape matrix. +pub fn assert_matmul_parity(a_shape: &[usize], b_shape: &[usize], tol_fwd: f32, tol_bwd: f32) { + if skip_if_no_gpu() { + return; + } + let a_cpu = make_input(a_shape, true, (-1.0, 1.0)); + let b_cpu = make_input(b_shape, true, (-1.0, 1.0)); + let a_gpu = a_cpu.to_device(gpu_device()); + let b_gpu = b_cpu.to_device(gpu_device()); + + let c_cpu = a_cpu.matmul(&b_cpu); + c_cpu.sum().backward(); + let c_gpu = a_gpu.matmul(&b_gpu); + c_gpu.sum().backward(); + + let label = format!("matmul {a_shape:?} @ {b_shape:?}"); + assert_close( + &format!("{label} fwd"), + &c_cpu.borrow().data.to_vec(), + &c_gpu.to_device(Device::CPU).borrow().data.to_vec(), + tol_fwd, + ); + + let ga_cpu = a_cpu.grad().expect("CPU a-grad missing"); + let ga_gpu = a_gpu + .borrow() + .grad + .as_ref() + .expect("GPU a-grad missing") + .to_vec(); + assert_close(&format!("{label} ∂/∂a"), &ga_cpu, &ga_gpu, tol_bwd); + + let gb_cpu = b_cpu.grad().expect("CPU b-grad missing"); + let gb_gpu = b_gpu + .borrow() + .grad + .as_ref() + .expect("GPU b-grad missing") + .to_vec(); + assert_close(&format!("{label} ∂/∂b"), &gb_cpu, &gb_gpu, tol_bwd); +} diff --git a/tests/parity_binary.rs b/tests/parity_binary.rs new file mode 100644 index 0000000..83cdeb4 --- /dev/null +++ b/tests/parity_binary.rs @@ -0,0 +1,88 @@ +//! CPU/GPU parity for binary tensor ops. +//! +//! Differentiable ops are checked forward + backward; non-differentiable ops +//! (modulo, cmplt) are checked forward only. All shapes are matched — Volta +//! does not support broadcasting on the GPU path. + +#![cfg(feature = "gpu")] + +#[path = "common/mod.rs"] +mod common; + +use common::{TOL_BWD_GPU, TOL_FWD_GPU, assert_binary_parity, assert_binary_parity_backward}; +use volta::tensor::TensorOps; + +const SHAPE: &[usize] = &[3, 4]; + +#[test] +fn add_parity() { + assert_binary_parity_backward( + "add", + |a, b| a.add(b), + SHAPE, + (-2.0, 2.0), + TOL_FWD_GPU, + TOL_BWD_GPU, + ); +} + +#[test] +fn sub_parity() { + assert_binary_parity_backward( + "sub", + |a, b| a.sub(b), + SHAPE, + (-2.0, 2.0), + TOL_FWD_GPU, + TOL_BWD_GPU, + ); +} + +#[test] +fn elem_mul_parity() { + assert_binary_parity_backward( + "elem_mul", + |a, b| a.elem_mul(b), + SHAPE, + (-2.0, 2.0), + TOL_FWD_GPU, + TOL_BWD_GPU, + ); +} + +#[test] +fn div_parity() { + // Keep divisor away from 0 to keep gradient magnitudes well-conditioned. + assert_binary_parity_backward( + "div", + |a, b| a.div(b), + SHAPE, + (0.5, 2.0), + TOL_FWD_GPU, + TOL_BWD_GPU, + ); +} + +#[test] +fn max_elem_parity_forward_only() { + // Backward of max_elem can route gradient differently when ties occur — + // the make_input ranges don't guarantee strict inequality between a/b. + // Forward parity is the reliable invariant to check here. + assert_binary_parity( + "max_elem", + |a, b| a.max_elem(b), + SHAPE, + (-2.0, 2.0), + TOL_FWD_GPU, + ); +} + +#[test] +fn modulo_parity_forward() { + assert_binary_parity("modulo", |a, b| a.modulo(b), SHAPE, (1.0, 5.0), TOL_FWD_GPU); +} + +#[test] +fn cmplt_parity_forward() { + assert_binary_parity("cmplt", |a, b| a.cmplt(b), SHAPE, (-2.0, 2.0), TOL_FWD_GPU); +} diff --git a/tests/parity_matmul.rs b/tests/parity_matmul.rs new file mode 100644 index 0000000..56e77a8 --- /dev/null +++ b/tests/parity_matmul.rs @@ -0,0 +1,33 @@ +//! CPU/GPU parity for matmul across the supported shape matrix. + +#![cfg(feature = "gpu")] + +#[path = "common/mod.rs"] +mod common; + +use common::{TOL_MATMUL_BWD, TOL_MATMUL_FWD, assert_matmul_parity}; + +#[test] +fn matmul_2d_2d_parity() { + assert_matmul_parity(&[3, 4], &[4, 5], TOL_MATMUL_FWD, TOL_MATMUL_BWD); +} + +#[test] +fn matmul_2d_2d_square_parity() { + assert_matmul_parity(&[8, 8], &[8, 8], TOL_MATMUL_FWD, TOL_MATMUL_BWD); +} + +#[test] +fn matmul_2d_1d_parity() { + assert_matmul_parity(&[4, 3], &[3], TOL_MATMUL_FWD, TOL_MATMUL_BWD); +} + +#[test] +fn matmul_1d_2d_parity() { + assert_matmul_parity(&[3], &[3, 5], TOL_MATMUL_FWD, TOL_MATMUL_BWD); +} + +#[test] +fn matmul_1d_1d_dot_parity() { + assert_matmul_parity(&[6], &[6], TOL_MATMUL_FWD, TOL_MATMUL_BWD); +} diff --git a/tests/parity_reduce.rs b/tests/parity_reduce.rs new file mode 100644 index 0000000..2a1f697 --- /dev/null +++ b/tests/parity_reduce.rs @@ -0,0 +1,74 @@ +//! CPU/GPU parity for reduction ops (forward only). +//! +//! Backward parity for reductions is exercised end-to-end in the unary and +//! binary parity tests (which call `.sum()` to produce a scalar loss). Here +//! we verify forward equivalence across whole-tensor and axis variants. + +#![cfg(feature = "gpu")] + +#[path = "common/mod.rs"] +mod common; + +use common::{TOL_FWD_GPU, assert_reduce_parity}; +use volta::tensor::TensorOps; + +#[test] +fn sum_whole_tensor_parity() { + assert_reduce_parity("sum", |t| t.sum(), &[4, 5], TOL_FWD_GPU); +} + +#[test] +fn mean_whole_tensor_parity() { + assert_reduce_parity("mean", |t| t.mean(), &[4, 5], TOL_FWD_GPU); +} + +#[test] +fn max_reduce_whole_tensor_parity() { + assert_reduce_parity("max_reduce", |t| t.max_reduce(), &[4, 5], TOL_FWD_GPU); +} + +#[test] +fn sum_dim_keepdim_parity() { + assert_reduce_parity( + "sum_dim keepdim", + |t| t.sum_dim(1, true), + &[4, 5], + TOL_FWD_GPU, + ); +} + +#[test] +fn sum_dim_no_keepdim_parity() { + assert_reduce_parity("sum_dim", |t| t.sum_dim(0, false), &[4, 5], TOL_FWD_GPU); +} + +#[test] +fn mean_dim_keepdim_parity() { + assert_reduce_parity( + "mean_dim keepdim", + |t| t.mean_dim(1, true), + &[4, 5], + TOL_FWD_GPU, + ); +} + +#[test] +fn mean_dim_no_keepdim_parity() { + assert_reduce_parity("mean_dim", |t| t.mean_dim(0, false), &[4, 5], TOL_FWD_GPU); +} + +#[test] +fn max_dim_keepdim_parity() { + assert_reduce_parity( + "max_dim keepdim", + |t| t.max_dim(1, true), + &[4, 5], + TOL_FWD_GPU, + ); +} + +#[test] +fn sum_3d_parity() { + // Sanity: helper handles a non-2D shape. + assert_reduce_parity("sum 3d", |t| t.sum(), &[2, 3, 4], TOL_FWD_GPU); +} diff --git a/tests/parity_unary.rs b/tests/parity_unary.rs new file mode 100644 index 0000000..907effe --- /dev/null +++ b/tests/parity_unary.rs @@ -0,0 +1,178 @@ +//! CPU/GPU parity for unary tensor ops. +//! +//! Each test runs the op on a fixed CPU tensor and a GPU copy of the same +//! tensor and asserts that outputs (and gradients, where applicable) match +//! within tolerance. Tests early-exit if no GPU is available. + +#![cfg(feature = "gpu")] + +#[path = "common/mod.rs"] +mod common; + +use common::{TOL_BWD_GPU, TOL_FWD_GPU, assert_unary_parity, assert_unary_parity_backward}; +use volta::tensor::TensorOps; + +const SHAPE_2D: &[usize] = &[4, 5]; + +#[test] +fn neg_parity() { + assert_unary_parity_backward( + "neg", + |t| t.neg(), + SHAPE_2D, + (-2.0, 2.0), + TOL_FWD_GPU, + TOL_BWD_GPU, + ); +} + +#[test] +fn recip_parity() { + // Avoid 0; keep magnitudes well above eps to keep gradients well-conditioned. + assert_unary_parity_backward( + "recip", + |t| t.recip(), + SHAPE_2D, + (0.5, 5.0), + TOL_FWD_GPU, + TOL_BWD_GPU, + ); +} + +#[test] +fn sqrt_parity() { + assert_unary_parity_backward( + "sqrt", + |t| t.sqrt(), + SHAPE_2D, + (0.01, 5.0), + TOL_FWD_GPU, + TOL_BWD_GPU, + ); +} + +#[test] +fn exp_parity() { + assert_unary_parity_backward( + "exp", + |t| t.exp(), + SHAPE_2D, + (-3.0, 3.0), + TOL_FWD_GPU, + TOL_BWD_GPU, + ); +} + +#[test] +fn log_parity() { + assert_unary_parity_backward( + "log", + |t| t.log(), + SHAPE_2D, + (0.1, 5.0), + TOL_FWD_GPU, + TOL_BWD_GPU, + ); +} + +#[test] +fn exp2_parity() { + assert_unary_parity_backward( + "exp2", + |t| t.exp2(), + SHAPE_2D, + (-3.0, 3.0), + TOL_FWD_GPU, + TOL_BWD_GPU, + ); +} + +#[test] +fn log2_parity() { + assert_unary_parity_backward( + "log2", + |t| t.log2(), + SHAPE_2D, + (0.1, 5.0), + TOL_FWD_GPU, + TOL_BWD_GPU, + ); +} + +#[test] +fn sin_parity() { + assert_unary_parity_backward( + "sin", + |t| t.sin(), + SHAPE_2D, + (-2.0, 2.0), + TOL_FWD_GPU, + TOL_BWD_GPU, + ); +} + +#[test] +fn cos_parity() { + assert_unary_parity_backward( + "cos", + |t| t.cos(), + SHAPE_2D, + (-2.0, 2.0), + TOL_FWD_GPU, + TOL_BWD_GPU, + ); +} + +#[test] +fn tanh_parity() { + assert_unary_parity_backward( + "tanh", + |t| t.tanh(), + SHAPE_2D, + (-2.0, 2.0), + TOL_FWD_GPU, + TOL_BWD_GPU, + ); +} + +#[test] +fn sigmoid_parity() { + assert_unary_parity_backward( + "sigmoid", + |t| t.sigmoid(), + SHAPE_2D, + (-2.0, 2.0), + TOL_FWD_GPU, + TOL_BWD_GPU, + ); +} + +#[test] +fn relu_parity() { + // Skip 0 to keep finite differences well-defined; otherwise sign at 0 is ambiguous. + assert_unary_parity_backward( + "relu", + |t| t.relu(), + SHAPE_2D, + (-1.5, 1.5), + TOL_FWD_GPU, + TOL_BWD_GPU, + ); +} + +#[test] +fn erf_parity() { + assert_unary_parity("erf", |t| t.erf(), SHAPE_2D, (-1.5, 1.5), TOL_FWD_GPU); +} + +#[test] +fn unary_parity_3d_shape() { + // Sanity check: helper handles a non-2D shape. + assert_unary_parity( + "sigmoid 3d", + |t| t.sigmoid(), + &[2, 3, 4], + (-2.0, 2.0), + TOL_FWD_GPU, + ); +}