feat: stereo audio output support (#23)

santhoshvai · coderabbitai[bot] · web-flow · commit 1e82d998011b · 2026-01-20T16:43:11.000+01:00
&lt;!-- This is an auto-generated comment: release notes by coderabbit.ai
--&gt;

## Summary by CodeRabbit

* **New Features**
* Added comprehensive audio device management with hardware-based echo
cancellation and noise suppression.
  * Introduced audio state monitoring and control capabilities.
  * Added audio level monitoring functionality.

* **Bug Fixes**
  * Adjusted audio session configuration for improved compatibility.

* **Refactor**
  * Restructured audio handling to use modern audio engine architecture.
  * Enhanced audio device initialization process.

&lt;sub&gt;✏️ Tip: You can customize this high-level summary in your review
settings.&lt;/sub&gt;

&lt;!-- end of auto-generated comment: release notes by coderabbit.ai --&gt;

---------

Co-authored-by: coderabbitai[bot] &lt;136622811+coderabbitai[bot]@users.noreply.github.com&gt;
diff --git a/android/build.gradle b/android/build.gradle
@@ -36,8 +36,6 @@ repositories {
     google()
 }
 
-
-
 def safeExtGet(prop, fallback) {
     rootProject.ext.has(prop) ? rootProject.ext.get(prop) : fallback
 }
diff --git a/android/src/main/java/com/oney/WebRTCModule/WebRTCModule.java b/android/src/main/java/com/oney/WebRTCModule/WebRTCModule.java
@@ -1,5 +1,6 @@
 package com.oney.WebRTCModule;
 
+import android.os.Build;
 import android.util.Log;
 import android.util.Pair;
 import android.util.SparseArray;
@@ -82,11 +83,10 @@ public WebRTCModule(ReactApplicationContext reactContext) {
             EglBase.Context eglContext = EglUtils.getRootEglBaseContext();
             encoderFactory = new SimulcastAlignedVideoEncoderFactory(eglContext, true, true, ResolutionAdjustment.MULTIPLE_OF_16);
             decoderFactory = new SelectiveVideoDecoderFactory(eglContext, false, Arrays.asList("VP9", "AV1"));
-
         }
 
         if (adm == null) {
-            adm = JavaAudioDeviceModule.builder(reactContext).createAudioDeviceModule();
+            adm = createAudioDeviceModule(reactContext);
         }
 
         AudioProcessingFactory audioProcessingFactory = null;
@@ -123,6 +123,15 @@ public WebRTCModule(ReactApplicationContext reactContext) {
         getUserMediaImpl = new GetUserMediaImpl(this, reactContext);
     }
 
+    private JavaAudioDeviceModule createAudioDeviceModule(ReactApplicationContext reactContext) {
+        return JavaAudioDeviceModule
+                .builder(reactContext)
+                .setUseHardwareAcousticEchoCanceler(Build.VERSION.SDK_INT >= Build.VERSION_CODES.Q)
+                .setUseHardwareNoiseSuppressor(Build.VERSION.SDK_INT >= Build.VERSION_CODES.Q)
+                .setUseStereoOutput(true)
+                .createAudioDeviceModule();
+    }
+
     @NonNull
     @Override
     public String getName() {
diff --git a/ios/RCTWebRTC/Utils/AudioDeviceModule/AudioDeviceModule.swift b/ios/RCTWebRTC/Utils/AudioDeviceModule/AudioDeviceModule.swift
diff --git a/ios/RCTWebRTC/Utils/AudioDeviceModule/AudioEngineLevelNodeAdapter.swift b/ios/RCTWebRTC/Utils/AudioDeviceModule/AudioEngineLevelNodeAdapter.swift
@@ -0,0 +1,122 @@
+//
+// Copyright © 2025 Stream.io Inc. All rights reserved.
+//
+
+import Accelerate
+import AVFoundation
+import Combine
+import Foundation
+
+protocol AudioEngineNodeAdapting {
+
+    var subject: CurrentValueSubject<Float, Never>? { get set }
+
+    func installInputTap(
+        on node: AVAudioNode,
+        format: AVAudioFormat,
+        bus: Int,
+        bufferSize: UInt32
+    )
+
+    func uninstall(on bus: Int)
+}
+
+/// Observes an `AVAudioMixerNode` and publishes decibel readings for UI and
+/// analytics consumers.
+final class AudioEngineLevelNodeAdapter: AudioEngineNodeAdapting {
+
+    enum Constant {
+        // The down limit of audio pipeline in DB that is considered silence.
+        static let silenceDB: Float = -160
+    }
+
+    var subject: CurrentValueSubject<Float, Never>?
+
+    private var inputTap: AVAudioMixerNode?
+
+    /// Installs a tap on the supplied audio node to monitor input levels.
+    /// - Parameters:
+    ///   - node: The node to observe; must be an `AVAudioMixerNode`.
+    ///   - format: Audio format expected by the tap.
+    ///   - bus: Output bus to observe.
+    ///   - bufferSize: Tap buffer size.
+    func installInputTap(
+        on node: AVAudioNode,
+        format: AVAudioFormat,
+        bus: Int = 0,
+        bufferSize: UInt32 = 1024
+    ) {
+        guard let mixer = node as? AVAudioMixerNode, inputTap == nil else { return }
+
+        mixer.installTap(
+            onBus: bus,
+            bufferSize: bufferSize,
+            format: format
+        ) { [weak self] buffer, _ in
+            self?.processInputBuffer(buffer)
+        }
+
+        inputTap = mixer
+        // log.debug("Input node installed", subsystems: .audioRecording)
+    }
+
+    /// Removes the tap and resets observed audio levels.
+    /// - Parameter bus: Bus to remove the tap from, defaults to `0`.
+    func uninstall(on bus: Int = 0) {
+        if let mixer = inputTap, mixer.engine != nil {
+            mixer.removeTap(onBus: 0)
+        }
+        subject?.send(Constant.silenceDB)
+        inputTap = nil
+        // log.debug("Input node uninstalled", subsystems: .audioRecording)
+    }
+
+    // MARK: - Private Helpers
+
+    /// Processes the PCM buffer produced by the tap and computes a clamped RMS
+    /// value which is forwarded to the publisher.
+    private func processInputBuffer(_ buffer: AVAudioPCMBuffer) {
+        // Safely unwrap the `subject` (used to publish updates) and the
+        // `floatChannelData` (pointer to the interleaved or non-interleaved
+        // channel samples in memory). If either is missing, exit early since
+        // processing cannot continue.
+        guard
+            let subject,
+            let channelData = buffer.floatChannelData
+        else { return }
+
+        // Obtain the total number of frames in the buffer as a vDSP-compatible
+        // length type (`vDSP_Length`). This represents how many samples exist
+        // per channel in the current audio buffer.
+        let frameCount = vDSP_Length(buffer.frameLength)
+
+        // Declare a variable to store the computed RMS (root-mean-square)
+        // amplitude value for the buffer. It will represent the signal's
+        // average power in linear scale (not decibels yet).
+        var rms: Float = 0
+
+        // Use Apple's Accelerate framework to efficiently compute the RMS
+        // (root mean square) of the float samples in the first channel.
+        // - Parameters:
+        //   - channelData[0]: Pointer to the first channel’s samples.
+        //   - 1: Stride between consecutive elements (every sample).
+        //   - &rms: Output variable to store the computed RMS.
+        //   - frameCount: Number of samples to process.
+        vDSP_rmsqv(channelData[0], 1, &rms, frameCount)
+
+        // Convert the linear RMS value to decibels using the formula
+        // 20 * log10(rms). To avoid a log of zero (which is undefined),
+        // use `max(rms, Float.ulpOfOne)` to ensure a minimal positive value.
+        let rmsDB = 20 * log10(max(rms, Float.ulpOfOne))
+
+        // Clamp the computed decibel value to a reasonable audio level range
+        // between -160 dB (silence) and 0 dB (maximum). This prevents extreme
+        // or invalid values that may occur due to noise or computation errors.
+        let clampedRMS = max(-160.0, min(0.0, Float(rmsDB)))
+
+        // Publish the clamped decibel value to the CurrentValueSubject so that
+        // subscribers (e.g., UI level meters or analytics systems) receive the
+        // updated level reading.
+        subject.send(clampedRMS)
+    }
+}
diff --git a/ios/RCTWebRTC/Utils/AudioDeviceModule/RTCAudioDeviceModuleControlling.swift b/ios/RCTWebRTC/Utils/AudioDeviceModule/RTCAudioDeviceModuleControlling.swift
@@ -0,0 +1,47 @@
+//
+// Copyright © 2025 Stream.io Inc. All rights reserved.
+//
+
+import Combine
+import WebRTC
+
+/// Abstraction over `RTCAudioDeviceModule` so tests can provide fakes while
+/// production code continues to rely on the WebRTC-backed implementation.
+protocol RTCAudioDeviceModuleControlling: AnyObject {
+    var observer: RTCAudioDeviceModuleDelegate? { get set }
+    var isPlaying: Bool { get }
+    var isRecording: Bool { get }
+    var isPlayoutInitialized: Bool { get }
+    var isRecordingInitialized: Bool { get }
+    var isMicrophoneMuted: Bool { get }
+    var isStereoPlayoutEnabled: Bool { get }
+    var isVoiceProcessingBypassed: Bool { get set }
+    var isVoiceProcessingEnabled: Bool { get }
+    var isVoiceProcessingAGCEnabled: Bool { get }
+    var prefersStereoPlayout: Bool { get set }
+
+    func reset() -> Int
+    func initAndStartPlayout() -> Int
+    func startPlayout() -> Int
+    func stopPlayout() -> Int
+    func initAndStartRecording() -> Int
+    func setMicrophoneMuted(_ isMuted: Bool) -> Int
+    func startRecording() -> Int
+    func stopRecording() -> Int
+    func refreshStereoPlayoutState()
+    func setMuteMode(_ mode: RTCAudioEngineMuteMode) -> Int
+    func setRecordingAlwaysPreparedMode(_ alwaysPreparedRecording: Bool) -> Int
+}
+
+extension RTCAudioDeviceModule: RTCAudioDeviceModuleControlling {
+    /// Convenience wrapper that mirrors the old `initPlayout` and
+    /// `startPlayout` sequence so the caller can request playout in one call.
+    func initAndStartPlayout() -> Int {
+        let result = initPlayout()
+        if result == 0 {
+            return startPlayout()
+        } else {
+            return result
+        }
+    }
+}
diff --git a/ios/RCTWebRTC/WebRTCModule+RTCMediaStream.m b/ios/RCTWebRTC/WebRTCModule+RTCMediaStream.m
@@ -636,9 +636,7 @@ - (void)ensureAudioSessionWithRecording {
     [session lockForConfiguration];
     config.category = AVAudioSessionCategoryPlayAndRecord;
     config.categoryOptions =
-             AVAudioSessionCategoryOptionAllowAirPlay|
              AVAudioSessionCategoryOptionAllowBluetooth|
-             AVAudioSessionCategoryOptionAllowBluetoothA2DP|
              AVAudioSessionCategoryOptionDefaultToSpeaker;
     config.mode = AVAudioSessionModeVideoChat;
     NSError* error = nil;
diff --git a/ios/RCTWebRTC/WebRTCModule.h b/ios/RCTWebRTC/WebRTCModule.h
@@ -23,13 +23,16 @@ static NSString *const kEventMediaStreamTrackEnded = @"mediaStreamTrackEnded";
 static NSString *const kEventPeerConnectionOnRemoveTrack = @"peerConnectionOnRemoveTrack";
 static NSString *const kEventPeerConnectionOnTrack = @"peerConnectionOnTrack";
 
+@class AudioDeviceModule;
+
 @interface WebRTCModule : RCTEventEmitter<RCTBridgeModule>
 
 @property(nonatomic, strong) dispatch_queue_t workerQueue;
 
 @property(nonatomic, strong) RTCPeerConnectionFactory *peerConnectionFactory;
 @property(nonatomic, strong) id<RTCVideoDecoderFactory> decoderFactory;
 @property(nonatomic, strong) id<RTCVideoEncoderFactory> encoderFactory;
+@property(nonatomic, strong) AudioDeviceModule *audioDeviceModule;
 
 @property(nonatomic, strong) NSMutableDictionary<NSNumber *, RTCPeerConnection *> *peerConnections;
 @property(nonatomic, strong) NSMutableDictionary<NSString *, RTCMediaStream *> *localStreams;
diff --git a/ios/RCTWebRTC/WebRTCModule.m b/ios/RCTWebRTC/WebRTCModule.m
@@ -11,6 +11,16 @@
 #import "WebRTCModule.h"
 #import "WebRTCModuleOptions.h"
 
+// Import Swift classes
+// We need the following if and elif directives to properly import the generated Swift header for the module,
+// handling both cases where CocoaPods module import path is available and where it is not.
+// This ensures compatibility regardless of whether the project is built with frameworks enabled or as static libraries.
+#if __has_include(<stream_react_native_webrtc/stream_react_native_webrtc-Swift.h>)
+#import <stream_react_native_webrtc/stream_react_native_webrtc-Swift.h>
+#elif __has_include("stream_react_native_webrtc-Swift.h")
+#import "stream_react_native_webrtc-Swift.h"
+#endif
+
 @interface WebRTCModule ()
 @end
 
@@ -78,7 +88,7 @@ - (instancetype)init {
             }
             RCTLogInfo(@"Using audio processing module: %@", NSStringFromClass([audioProcessingModule class]));
             _peerConnectionFactory =
-                [[RTCPeerConnectionFactory alloc] initWithAudioDeviceModuleType:RTCAudioDeviceModuleTypePlatformDefault
+                [[RTCPeerConnectionFactory alloc] initWithAudioDeviceModuleType:RTCAudioDeviceModuleTypeAudioEngine
                                                           bypassVoiceProcessing:NO
                                                                  encoderFactory:encoderFactory
                                                                  decoderFactory:decoderFactory
@@ -90,13 +100,15 @@ - (instancetype)init {
                                                                                   audioDevice:audioDevice];
         } else {
             _peerConnectionFactory =
-                [[RTCPeerConnectionFactory alloc] initWithAudioDeviceModuleType:RTCAudioDeviceModuleTypePlatformDefault
+                [[RTCPeerConnectionFactory alloc] initWithAudioDeviceModuleType:RTCAudioDeviceModuleTypeAudioEngine
                                                           bypassVoiceProcessing:NO
                                                                  encoderFactory:encoderFactory
                                                                  decoderFactory:decoderFactory
                                                           audioProcessingModule:nil];
         }
 
+        _audioDeviceModule = [[AudioDeviceModule alloc] initWithSource:_peerConnectionFactory.audioDeviceModule];
+
         _peerConnections = [NSMutableDictionary new];
         _localStreams = [NSMutableDictionary new];
         _localTracks = [NSMutableDictionary new];
diff --git a/package-lock.json b/package-lock.json
diff --git a/package.json b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "@stream-io/react-native-webrtc",
-  "version": "137.0.2",
+  "version": "137.1.0-alpha.2",
   "repository": {
     "type": "git",
     "url": "git+https://github.com/GetStream/react-native-webrtc.git"
diff --git a/stream-react-native-webrtc.podspec b/stream-react-native-webrtc.podspec
@@ -15,12 +15,13 @@ Pod::Spec.new do |s|
   s.platform            = :ios, '13.0'
 
   s.preserve_paths      = 'ios/**/*'
-  s.source_files        = 'ios/**/*.{h,m}'
+  s.source_files        = 'ios/**/*.{h,m,mm,swift}'
   s.libraries           = 'c', 'sqlite3', 'stdc++'
   s.framework           = 'AudioToolbox','AVFoundation', 'CoreAudio', 'CoreGraphics', 'CoreVideo', 'GLKit', 'VideoToolbox'
+  s.swift_version       = '5.0'
   s.dependency          'React-Core'
   # WebRTC version from https://github.com/GetStream/stream-video-swift-webrtc releases
-  s.dependency          'StreamWebRTC', '~>137.0.52'
+  s.dependency          'StreamWebRTC', '~>137.0.54'
   # Swift/Objective-C compatibility #https://blog.cocoapods.org/CocoaPods-1.5.0/
   s.pod_target_xcconfig = {
     'DEFINES_MODULE' => 'YES'

Original file line number	Diff line number	Diff line change
`@@ -36,8 +36,6 @@ repositories {`
`36`	`36`	`google()`
`37`	`37`	`}`
`38`	`38`
`39`		`-`
`40`		`-`
`41`	`39`	`def safeExtGet(prop, fallback) {`
`42`	`40`	`rootProject.ext.has(prop) ? rootProject.ext.get(prop) : fallback`
`43`	`41`	`}`
Original file line number	Diff line number	Diff line change
`@@ -1,6 +1,6 @@`
`1`	`1`	`{`
`2`	`2`	`"name": "@stream-io/react-native-webrtc",`
`3`		`- "version": "137.0.2",`
	`3`	`+ "version": "137.1.0-alpha.2",`
`4`	`4`	`"repository": {`
`5`	`5`	`"type": "git",`
`6`	`6`	`"url": "git+https://github.com/GetStream/react-native-webrtc.git"`