livekit · theomonnom · Apr 15, 2025
diff --git a/livekit-rtc/livekit/rtc/audio_resampler.py b/livekit-rtc/livekit/rtc/audio_resampler.py
@@ -9,6 +9,8 @@
 from ._utils import get_address
 from .audio_frame import AudioFrame
 
+import numpy as np
+
 
 @unique
 class AudioResamplerQuality(str, Enum):
@@ -95,9 +97,18 @@ def push(self, data: bytearray | AudioFrame) -> list[AudioFrame]:
         """
         bdata = data if isinstance(data, bytearray) else data.data.cast("b")
 
+        # temp fix:
+        # define DITHERING + (1./32)*(int)(((ran1>>3)&31)-((ran2>>3)&31)
+        # soxr dithering seems to overflow the int16 range (it's unclear why it happens on our builds
+        # but not inside the soxr lib)
+        audio_array = np.frombuffer(bdata, dtype=np.int16).astype(np.float32)
+        scaled_int16 = (audio_array * 0.9).astype(np.int16)
+
+        audio_view = memoryview(scaled_int16)
+
         req = proto_ffi.FfiRequest()
         req.push_sox_resampler.resampler_handle = self._ffi_handle.handle
-        req.push_sox_resampler.data_ptr = get_address(memoryview(bdata))
+        req.push_sox_resampler.data_ptr = get_address(audio_view)
         req.push_sox_resampler.size = len(bdata)
 
         resp = FfiClient.instance.request(req)
@@ -111,6 +122,7 @@ def push(self, data: bytearray | AudioFrame) -> list[AudioFrame]:
         cdata = (ctypes.c_int8 * resp.push_sox_resampler.size).from_address(
             resp.push_sox_resampler.output_ptr
         )
+
         output_data = bytearray(cdata)
         return [
             AudioFrame(