From 78bf2c3fa5da178c0b9b80f5f0f678972fb3f04a Mon Sep 17 00:00:00 2001
From: Roderick van Domburg <roderick@vandomburg.net>
Date: Sat, 6 Sep 2025 20:19:38 +0200
Subject: [PATCH] fix: use rounding for float-to-integer conversions

Replace truncating casts with proper rounding in float-to-integer sample
conversions to eliminate bias and preserve small signals.

Changes:
- Use f32::round() and f64::round() instead of truncating `as` casts
- Eliminates bias towards zero from truncation behavior
- Preserves small audio signals that would otherwise be truncated to zero
- Removes nonlinear distortion caused by signal values in (-1.0, 1.0)
  all mapping to zero, creating an interval twice as large as any other

Inlines sqrt and round functions for performance.

Additional tests verify proper rounding behavior for cases that would
fail with truncation.
---
 CHANGELOG.md              |  1 +
 dasp_sample/src/conv.rs   | 28 +++++++++++++++-------------
 dasp_sample/src/ops.rs    | 28 ++++++++++++++++++++++++++++
 dasp_sample/tests/conv.rs | 22 +++++++++++-----------
 4 files changed, 55 insertions(+), 24 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 5cb66e2d..69f8f170 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,6 +5,7 @@
   yielding samples when the underlying signal gets exhausted. This is a breaking
   change. The return type of the `IntoInterleavedSamples#next_sample` method was
   modified.
+- Improved float-to-integer conversions to use proper rounding instead of truncation.
 
 ---
 
diff --git a/dasp_sample/src/conv.rs b/dasp_sample/src/conv.rs
index f1ed3f90..57c9bbf8 100644
--- a/dasp_sample/src/conv.rs
+++ b/dasp_sample/src/conv.rs
@@ -126,7 +126,9 @@ macro_rules! conversion_fns {
 macro_rules! conversions {
     ($T:ident, $mod_name:ident { $($rest:tt)* }) => {
         pub mod $mod_name {
-            use $crate::types::{I24, U24, I48, U48};
+            #[allow(unused_imports)]
+            use $crate::ops;
+            use $crate::{types::{I24, U24, I48, U48}};
             conversion_fns!($T, $($rest)*);
         }
     };
@@ -531,12 +533,12 @@ conversions!(u64, u64 {
 // The following conversions assume `-1.0 <= s < 1.0` (note that +1.0 is excluded) and will
 // overflow otherwise.
 conversions!(f32, f32 {
-    s to_i8 { (s * 128.0) as i8 }
-    s to_i16 { (s * 32_768.0) as i16 }
-    s to_i24 { I24::new_unchecked((s * 8_388_608.0) as i32) }
-    s to_i32 { (s * 2_147_483_648.0) as i32 }
-    s to_i48 { I48::new_unchecked((s * 140_737_488_355_328.0) as i64) }
-    s to_i64 { (s * 9_223_372_036_854_775_808.0) as i64 }
+    s to_i8 { ops::f32::round(s * 128.0) as i8 }
+    s to_i16 { ops::f32::round(s * 32_768.0) as i16 }
+    s to_i24 { I24::new_unchecked(ops::f32::round(s * 8_388_608.0) as i32) }
+    s to_i32 { ops::f32::round(s * 2_147_483_648.0) as i32 }
+    s to_i48 { I48::new_unchecked(ops::f32::round(s * 140_737_488_355_328.0) as i64) }
+    s to_i64 { ops::f32::round(s * 9_223_372_036_854_775_808.0) as i64 }
     s to_u8 { super::i8::to_u8(to_i8(s)) }
     s to_u16 { super::i16::to_u16(to_i16(s)) }
     s to_u24 { super::i24::to_u24(to_i24(s)) }
@@ -549,12 +551,12 @@ conversions!(f32, f32 {
 // The following conversions assume `-1.0 <= s < 1.0` (note that +1.0 is excluded) and will
 // overflow otherwise.
 conversions!(f64, f64 {
-    s to_i8 { (s * 128.0) as i8 }
-    s to_i16 { (s * 32_768.0) as i16 }
-    s to_i24 { I24::new_unchecked((s * 8_388_608.0) as i32) }
-    s to_i32 { (s * 2_147_483_648.0) as i32 }
-    s to_i48 { I48::new_unchecked((s * 140_737_488_355_328.0) as i64) }
-    s to_i64 { (s * 9_223_372_036_854_775_808.0) as i64 }
+    s to_i8 { ops::f64::round(s * 128.0) as i8 }
+    s to_i16 { ops::f64::round(s * 32_768.0) as i16 }
+    s to_i24 { I24::new_unchecked(ops::f64::round(s * 8_388_608.0) as i32) }
+    s to_i32 { ops::f64::round(s * 2_147_483_648.0) as i32 }
+    s to_i48 { I48::new_unchecked(ops::f64::round(s * 140_737_488_355_328.0) as i64) }
+    s to_i64 { ops::f64::round(s * 9_223_372_036_854_775_808.0) as i64 }
     s to_u8 { super::i8::to_u8(to_i8(s)) }
     s to_u16 { super::i16::to_u16(to_i16(s)) }
     s to_u24 { super::i24::to_u24(to_i24(s)) }
diff --git a/dasp_sample/src/ops.rs b/dasp_sample/src/ops.rs
index 81a5b5c7..6fbd5a20 100644
--- a/dasp_sample/src/ops.rs
+++ b/dasp_sample/src/ops.rs
@@ -3,6 +3,7 @@ pub mod f32 {
     /// Uses bit manipulation for initial guess, then 3 iterations for ~6-7 decimal places.
     /// Accuracy: ~6-7 decimal places
     #[cfg(not(feature = "std"))]
+    #[inline]
     pub fn sqrt(x: f32) -> f32 {
         if x < 0.0 {
             return f32::NAN;
@@ -31,6 +32,19 @@ pub mod f32 {
     pub fn sqrt(x: f32) -> f32 {
         x.sqrt()
     }
+
+    #[cfg(not(feature = "std"))]
+    #[inline]
+    pub fn round(x: f32) -> f32 {
+        // Branchless rounding: copysign gives +0.5 for positive x, -0.5 for negative x
+        // This shifts the value toward zero before truncation, achieving proper rounding
+        (x + 0.5_f32.copysign(x)) as i64 as f32
+    }
+    #[cfg(feature = "std")]
+    #[inline]
+    pub fn round(x: f32) -> f32 {
+        x.round()
+    }
 }
 
 pub mod f64 {
@@ -38,6 +52,7 @@ pub mod f64 {
     /// Uses bit manipulation for initial guess, then 4 iterations for ~14-15 decimal places.
     /// Accuracy: ~14-15 decimal places
     #[cfg(not(feature = "std"))]
+    #[inline]
     pub fn sqrt(x: f64) -> f64 {
         if x < 0.0 {
             return f64::NAN;
@@ -66,4 +81,17 @@ pub mod f64 {
     pub fn sqrt(x: f64) -> f64 {
         x.sqrt()
     }
+
+    #[cfg(not(feature = "std"))]
+    #[inline]
+    pub fn round(x: f64) -> f64 {
+        // Branchless rounding: copysign gives +0.5 for positive x, -0.5 for negative x
+        // This shifts the value toward zero before truncation, achieving proper rounding
+        (x + 0.5_f64.copysign(x)) as i64 as f64
+    }
+    #[cfg(feature = "std")]
+    #[inline]
+    pub fn round(x: f64) -> f64 {
+        x.round()
+    }
 }
diff --git a/dasp_sample/tests/conv.rs b/dasp_sample/tests/conv.rs
index b442d246..fee081d1 100644
--- a/dasp_sample/tests/conv.rs
+++ b/dasp_sample/tests/conv.rs
@@ -479,11 +479,11 @@ tests!(u64 {
 });
 
 tests!(f32 {
-    to_i8  { -1.0, -128; 0.0, 0; }
-    to_i16 { -1.0, -32_768; 0.0, 0; }
-    to_i24 { -1.0, -8_388_608; 0.0, 0; }
-    to_i32 { -1.0, -2_147_483_648; 0.0, 0; }
-    to_i48 { -1.0, -140_737_488_355_328; 0.0, 0; }
+    to_i8  { -1.0, -128; 0.0, 0; 0.1, 13; 0.004, 1; -0.004, -1; 0.003, 0; }
+    to_i16 { -1.0, -32_768; 0.0, 0; 0.1, 3277; 0.00002, 1; 0.00001, 0; }
+    to_i24 { -1.0, -8_388_608; 0.0, 0; 0.1, 838861; 0.0000001, 1; -0.0000001, -1; 0.00000005, 0; }
+    to_i32 { -1.0, -2_147_483_648; 0.0, 0; 0.0000000004, 1; -0.0000000004, -1; 0.0000000002, 0; }
+    to_i48 { -1.0, -140_737_488_355_328; 0.0, 0; 0.000000000000006, 1; -0.000000000000006, -1; 0.000000000000003, 0; }
     to_i64 { -1.0, -9_223_372_036_854_775_808; 0.0, 0; }
     to_u8  { -1.0, 0; 0.0, 128; }
     to_u16 { -1.0, 0; 0.0, 32_768; }
@@ -495,12 +495,12 @@ tests!(f32 {
 });
 
 tests!(f64 {
-    to_i8  { -1.0, -128; 0.0, 0; }
-    to_i16 { -1.0, -32_768; 0.0, 0; }
-    to_i24 { -1.0, -8_388_608; 0.0, 0; }
-    to_i32 { -1.0, -2_147_483_648; 0.0, 0; }
-    to_i48 { -1.0, -140_737_488_355_328; 0.0, 0; }
-    to_i64 { -1.0, -9_223_372_036_854_775_808; 0.0, 0; }
+    to_i8  { -1.0, -128; 0.0, 0; 0.1, 13; 0.007, 1; -0.004, -1; 0.003, 0; }
+    to_i16 { -1.0, -32_768; 0.0, 0; 0.1, 3277; 0.00002, 1; -0.00002, -1; 0.00001, 0; }
+    to_i24 { -1.0, -8_388_608; 0.0, 0; 0.1, 838861; 0.0000001, 1; -0.0000001, -1; 0.00000005, 0; }
+    to_i32 { -1.0, -2_147_483_648; 0.0, 0; 0.1, 214748365; 0.0000000004, 1; -0.0000000004, -1; 0.0000000002, 0; }
+    to_i48 { -1.0, -140_737_488_355_328; 0.0, 0; 0.1, 14073748835533; 0.000000000000006, 1; -0.000000000000006, -1; 0.000000000000003, 0; }
+    to_i64 { -1.0, -9_223_372_036_854_775_808; 0.0, 0; 0.1, 922337203685477632; }
     to_u8  { -1.0, 0; 0.0, 128; }
     to_u16 { -1.0, 0; 0.0, 32_768; }
     to_u24 { -1.0, 0; 0.0, 8_388_608; }