From 1a6bdbb5e9cd6e1fd6eec30e70e421f5e463ed56 Mon Sep 17 00:00:00 2001 From: Artem Grintsevich Date: Fri, 20 Feb 2026 15:24:21 +0100 Subject: [PATCH 01/14] feat: added captured audio for screen sharing for Android --- .../oney/WebRTCModule/GetUserMediaImpl.java | 15 ++++++- .../com/oney/WebRTCModule/WebRTCModule.java | 43 +++++++++++++++++++ .../WebRTCModule/WebRTCModuleOptions.java | 25 +++++++++++ .../audio/AudioProcessingController.java | 13 ++++-- 4 files changed, 91 insertions(+), 5 deletions(-) diff --git a/android/src/main/java/com/oney/WebRTCModule/GetUserMediaImpl.java b/android/src/main/java/com/oney/WebRTCModule/GetUserMediaImpl.java index 861919513..c17254a8b 100644 --- a/android/src/main/java/com/oney/WebRTCModule/GetUserMediaImpl.java +++ b/android/src/main/java/com/oney/WebRTCModule/GetUserMediaImpl.java @@ -39,7 +39,7 @@ * The implementation of {@code getUserMedia} extracted into a separate file in * order to reduce complexity and to (somewhat) separate concerns. */ -class GetUserMediaImpl { +public class GetUserMediaImpl { /** * The {@link Log} tag with which {@code GetUserMediaImpl} is to log. */ @@ -62,6 +62,15 @@ class GetUserMediaImpl { private Promise displayMediaPromise; private Intent mediaProjectionPermissionResultData; + /** + * Returns the MediaProjection permission result data Intent. + * This Intent can be used to create a MediaProjection for audio capture + * via AudioPlaybackCaptureConfiguration. + */ + public Intent getMediaProjectionPermissionResultData() { + return mediaProjectionPermissionResultData; + } + private final ServiceConnection mediaProjectionServiceConnection = new ServiceConnection() { @Override public void onServiceConnected(ComponentName name, IBinder service) { @@ -355,7 +364,9 @@ private void createScreenStream() { } // Cleanup - mediaProjectionPermissionResultData = null; + // Note: mediaProjectionPermissionResultData is intentionally NOT nulled here. + // It is retained so it can be reused to create a MediaProjection for + // screen share audio capture (AudioPlaybackCaptureConfiguration). displayMediaPromise = null; } diff --git a/android/src/main/java/com/oney/WebRTCModule/WebRTCModule.java b/android/src/main/java/com/oney/WebRTCModule/WebRTCModule.java index 6e981f077..dd0a29e27 100644 --- a/android/src/main/java/com/oney/WebRTCModule/WebRTCModule.java +++ b/android/src/main/java/com/oney/WebRTCModule/WebRTCModule.java @@ -129,9 +129,48 @@ private JavaAudioDeviceModule createAudioDeviceModule(ReactApplicationContext re .setUseHardwareAcousticEchoCanceler(Build.VERSION.SDK_INT >= Build.VERSION_CODES.Q) .setUseHardwareNoiseSuppressor(Build.VERSION.SDK_INT >= Build.VERSION_CODES.Q) .setUseStereoOutput(true) + .setAudioBufferCallback((audioBuffer, audioFormat, channelCount, sampleRate, bytesRead, captureTimeNs) -> { + if (bytesRead > 0) { + WebRTCModuleOptions.ScreenAudioBytesProvider provider = + WebRTCModuleOptions.getInstance().screenAudioBytesProvider; + if (provider != null) { + java.nio.ByteBuffer screenBuffer = provider.getScreenAudioBytes(bytesRead); + if (screenBuffer != null && screenBuffer.remaining() > 0) { + mixScreenAudioIntoBuffer(audioBuffer, screenBuffer, bytesRead); + } + } + } + return captureTimeNs; + }) .createAudioDeviceModule(); } + /** + * Mixes screen audio into the microphone buffer using PCM additive mixing with clamping. + */ + private static void mixScreenAudioIntoBuffer(java.nio.ByteBuffer micBuffer, + java.nio.ByteBuffer screenBuffer, + int bytesRead) { + micBuffer.position(0); + screenBuffer.position(0); + + micBuffer.order(java.nio.ByteOrder.LITTLE_ENDIAN); + screenBuffer.order(java.nio.ByteOrder.LITTLE_ENDIAN); + + java.nio.ShortBuffer micShorts = micBuffer.asShortBuffer(); + java.nio.ShortBuffer screenShorts = screenBuffer.asShortBuffer(); + + int samplesToMix = Math.min(bytesRead / 2, + Math.min(micShorts.remaining(), screenShorts.remaining())); + + for (int i = 0; i < samplesToMix; i++) { + int sum = micShorts.get(i) + screenShorts.get(i); + if (sum > Short.MAX_VALUE) sum = Short.MAX_VALUE; + if (sum < Short.MIN_VALUE) sum = Short.MIN_VALUE; + micShorts.put(i, (short) sum); + } + } + @NonNull @Override public String getName() { @@ -142,6 +181,10 @@ public AudioDeviceModule getAudioDeviceModule() { return mAudioDeviceModule; } + public GetUserMediaImpl getUserMediaImpl() { + return getUserMediaImpl; + } + public PeerConnectionObserver getPeerConnectionObserver(int id) { return mPeerConnectionObservers.get(id); } diff --git a/android/src/main/java/com/oney/WebRTCModule/WebRTCModuleOptions.java b/android/src/main/java/com/oney/WebRTCModule/WebRTCModuleOptions.java index 6187c9472..24e53f8ce 100644 --- a/android/src/main/java/com/oney/WebRTCModule/WebRTCModuleOptions.java +++ b/android/src/main/java/com/oney/WebRTCModule/WebRTCModuleOptions.java @@ -8,6 +8,8 @@ import org.webrtc.VideoEncoderFactory; import org.webrtc.audio.AudioDeviceModule; +import java.nio.ByteBuffer; + public class WebRTCModuleOptions { private static WebRTCModuleOptions instance; @@ -20,6 +22,29 @@ public class WebRTCModuleOptions { public boolean enableMediaProjectionService; public AudioProcessingFactoryProvider audioProcessingFactoryProvider; + /** + * Provider for screen share audio bytes. When set, the AudioDeviceModule's + * AudioBufferCallback will mix screen audio into the mic buffer before + * WebRTC processing. This allows screen audio mixing to work alongside + * any audio processing factory (including noise cancellation). + * + * Set this when screen share audio capture starts, clear it when it stops. + */ + public volatile ScreenAudioBytesProvider screenAudioBytesProvider; + + /** + * Functional interface for providing screen audio bytes on demand. + */ + public interface ScreenAudioBytesProvider { + /** + * Returns a ByteBuffer containing screen audio PCM data. + * + * @param bytesRequested number of bytes to read (matching mic buffer size) + * @return ByteBuffer with screen audio, or null if not available + */ + ByteBuffer getScreenAudioBytes(int bytesRequested); + } + public static WebRTCModuleOptions getInstance() { if (instance == null) { instance = new WebRTCModuleOptions(); diff --git a/android/src/main/java/com/oney/WebRTCModule/audio/AudioProcessingController.java b/android/src/main/java/com/oney/WebRTCModule/audio/AudioProcessingController.java index 9444eb781..17ae78679 100644 --- a/android/src/main/java/com/oney/WebRTCModule/audio/AudioProcessingController.java +++ b/android/src/main/java/com/oney/WebRTCModule/audio/AudioProcessingController.java @@ -17,13 +17,20 @@ public class AudioProcessingController implements AudioProcessingFactoryProvider public ExternalAudioProcessingFactory externalAudioProcessingFactory; public AudioProcessingController() { - this.externalAudioProcessingFactory = new ExternalAudioProcessingFactory(); - this.externalAudioProcessingFactory.setCapturePostProcessing(capturePostProcessing); - this.externalAudioProcessingFactory.setRenderPreProcessing(renderPreProcessing); + // ExternalAudioProcessingFactory creation is deferred to getFactory() + // because its constructor calls JNI native methods that require the + // WebRTC native library to be loaded first (via PeerConnectionFactory.initialize()). + // This allows AudioProcessingController to be safely instantiated in + // MainApplication.onCreate() before the native library is loaded. } @Override public AudioProcessingFactory getFactory() { + if (this.externalAudioProcessingFactory == null) { + this.externalAudioProcessingFactory = new ExternalAudioProcessingFactory(); + this.externalAudioProcessingFactory.setCapturePostProcessing(capturePostProcessing); + this.externalAudioProcessingFactory.setRenderPreProcessing(renderPreProcessing); + } return this.externalAudioProcessingFactory; } } \ No newline at end of file From c4637a204ade8e15cfa8a08a8d7e86cc067d4e6a Mon Sep 17 00:00:00 2001 From: Artem Grintsevich Date: Tue, 24 Feb 2026 13:31:43 +0100 Subject: [PATCH 02/14] feat: added in app screen sharing --- ios/RCTWebRTC/InAppScreenCaptureController.h | 18 ++++ ios/RCTWebRTC/InAppScreenCaptureController.m | 49 ++++++++++ ios/RCTWebRTC/InAppScreenCapturer.h | 21 +++++ ios/RCTWebRTC/InAppScreenCapturer.m | 98 ++++++++++++++++++++ ios/RCTWebRTC/WebRTCModule+RTCMediaStream.m | 28 ++++-- ios/RCTWebRTC/WebRTCModuleOptions.h | 7 ++ 6 files changed, 215 insertions(+), 6 deletions(-) create mode 100644 ios/RCTWebRTC/InAppScreenCaptureController.h create mode 100644 ios/RCTWebRTC/InAppScreenCaptureController.m create mode 100644 ios/RCTWebRTC/InAppScreenCapturer.h create mode 100644 ios/RCTWebRTC/InAppScreenCapturer.m diff --git a/ios/RCTWebRTC/InAppScreenCaptureController.h b/ios/RCTWebRTC/InAppScreenCaptureController.h new file mode 100644 index 000000000..535f5863f --- /dev/null +++ b/ios/RCTWebRTC/InAppScreenCaptureController.h @@ -0,0 +1,18 @@ +#import +#import "CaptureController.h" +#import "CapturerEventsDelegate.h" + +NS_ASSUME_NONNULL_BEGIN + +@class InAppScreenCapturer; + +@interface InAppScreenCaptureController : CaptureController + +- (instancetype)initWithCapturer:(nonnull InAppScreenCapturer *)capturer; + +/// The underlying RPScreenRecorder-based capturer. +@property(nonatomic, strong, readonly) InAppScreenCapturer *capturer; + +@end + +NS_ASSUME_NONNULL_END diff --git a/ios/RCTWebRTC/InAppScreenCaptureController.m b/ios/RCTWebRTC/InAppScreenCaptureController.m new file mode 100644 index 000000000..1b9561d39 --- /dev/null +++ b/ios/RCTWebRTC/InAppScreenCaptureController.m @@ -0,0 +1,49 @@ +#if TARGET_OS_IOS + +#import "InAppScreenCaptureController.h" +#import "InAppScreenCapturer.h" + +@interface InAppScreenCaptureController () +@end + +@implementation InAppScreenCaptureController + +- (instancetype)initWithCapturer:(nonnull InAppScreenCapturer *)capturer { + self = [super init]; + if (self) { + _capturer = capturer; + _capturer.eventsDelegate = self; + self.deviceId = @"in-app-screen-capture"; + } + return self; +} + +- (void)dealloc { + [self.capturer stopCapture]; +} + +- (void)startCapture { + [self.capturer startCapture]; +} + +- (void)stopCapture { + [self.capturer stopCapture]; +} + +- (NSDictionary *)getSettings { + return @{ + @"deviceId": self.deviceId ?: @"in-app-screen-capture", + @"groupId": @"", + @"frameRate": @(30) + }; +} + +#pragma mark - CapturerEventsDelegate + +- (void)capturerDidEnd:(RTCVideoCapturer *)capturer { + [self.eventsDelegate capturerDidEnd:capturer]; +} + +@end + +#endif diff --git a/ios/RCTWebRTC/InAppScreenCapturer.h b/ios/RCTWebRTC/InAppScreenCapturer.h new file mode 100644 index 000000000..3a8566676 --- /dev/null +++ b/ios/RCTWebRTC/InAppScreenCapturer.h @@ -0,0 +1,21 @@ +#import +#import +#import "CapturerEventsDelegate.h" + +NS_ASSUME_NONNULL_BEGIN + +@interface InAppScreenCapturer : RTCVideoCapturer + +@property(nonatomic, weak) id eventsDelegate; + +/// Callback invoked for each .audioApp CMSampleBuffer from RPScreenRecorder. +/// Set this before calling startCapture if audio mixing is desired. +@property(nonatomic, copy, nullable) void (^audioBufferHandler)(CMSampleBufferRef); + +- (instancetype)initWithDelegate:(__weak id)delegate; +- (void)startCapture; +- (void)stopCapture; + +@end + +NS_ASSUME_NONNULL_END diff --git a/ios/RCTWebRTC/InAppScreenCapturer.m b/ios/RCTWebRTC/InAppScreenCapturer.m new file mode 100644 index 000000000..9c86a378c --- /dev/null +++ b/ios/RCTWebRTC/InAppScreenCapturer.m @@ -0,0 +1,98 @@ +#if TARGET_OS_IOS + +#import +#import +#import + +#import "InAppScreenCapturer.h" + +@implementation InAppScreenCapturer { + BOOL _capturing; +} + +- (instancetype)initWithDelegate:(__weak id)delegate { + self = [super initWithDelegate:delegate]; + return self; +} + +- (void)startCapture { + if (_capturing) { + return; + } + _capturing = YES; + + RPScreenRecorder *recorder = [RPScreenRecorder sharedRecorder]; + recorder.microphoneEnabled = NO; // WebRTC handles mic input + + __weak __typeof__(self) weakSelf = self; + [recorder startCaptureWithHandler:^(CMSampleBufferRef _Nonnull sampleBuffer, + RPSampleBufferType bufferType, + NSError * _Nullable error) { + __strong __typeof__(weakSelf) strongSelf = weakSelf; + if (!strongSelf || error || !strongSelf->_capturing) { + return; + } + + switch (bufferType) { + case RPSampleBufferTypeVideo: + [strongSelf processVideoSampleBuffer:sampleBuffer]; + break; + case RPSampleBufferTypeAudioApp: + if (strongSelf.audioBufferHandler) { + strongSelf.audioBufferHandler(sampleBuffer); + } + break; + case RPSampleBufferTypeAudioMic: + // Ignored — WebRTC handles mic capture via AudioDeviceModule + break; + } + } completionHandler:^(NSError * _Nullable error) { + if (error) { + NSLog(@"[InAppScreenCapturer] startCapture failed: %@", error.localizedDescription); + [weakSelf.eventsDelegate capturerDidEnd:weakSelf]; + } + }]; +} + +- (void)processVideoSampleBuffer:(CMSampleBufferRef)sampleBuffer { + CVPixelBufferRef pixelBuffer = CMSampleBufferGetImageBuffer(sampleBuffer); + if (!pixelBuffer) { + return; + } + + int64_t timeStampNs = (int64_t)(CMTimeGetSeconds( + CMSampleBufferGetPresentationTimeStamp(sampleBuffer)) * NSEC_PER_SEC); + + RTCCVPixelBuffer *rtcPixelBuffer = [[RTCCVPixelBuffer alloc] initWithPixelBuffer:pixelBuffer]; + RTCVideoFrame *videoFrame = [[RTCVideoFrame alloc] initWithBuffer:rtcPixelBuffer + rotation:RTCVideoRotation_0 + timeStampNs:timeStampNs]; + + [self.delegate capturer:self didCaptureVideoFrame:videoFrame]; +} + +- (void)stopCapture { + if (!_capturing) { + return; + } + _capturing = NO; + self.audioBufferHandler = nil; + + [[RPScreenRecorder sharedRecorder] stopCaptureWithHandler:^(NSError * _Nullable error) { + if (error) { + NSLog(@"[InAppScreenCapturer] stopCapture error: %@", error.localizedDescription); + } + }]; +} + +- (void)dealloc { + if (_capturing) { + _capturing = NO; + self.audioBufferHandler = nil; + [[RPScreenRecorder sharedRecorder] stopCaptureWithHandler:nil]; + } +} + +@end + +#endif diff --git a/ios/RCTWebRTC/WebRTCModule+RTCMediaStream.m b/ios/RCTWebRTC/WebRTCModule+RTCMediaStream.m index ae6f9fb93..a6b976f16 100644 --- a/ios/RCTWebRTC/WebRTCModule+RTCMediaStream.m +++ b/ios/RCTWebRTC/WebRTCModule+RTCMediaStream.m @@ -13,6 +13,8 @@ #import "WebRTCModule+VideoTrackAdapter.h" #import "ProcessorProvider.h" +#import "InAppScreenCaptureController.h" +#import "InAppScreenCapturer.h" #import "ScreenCaptureController.h" #import "ScreenCapturer.h" #import "TrackCapturerEventsEmitter.h" @@ -202,14 +204,28 @@ - (RTCVideoTrack *)createScreenCaptureVideoTrack { NSString *trackUUID = [[NSUUID UUID] UUIDString]; RTCVideoTrack *videoTrack = [self.peerConnectionFactory videoTrackWithSource:videoSource trackId:trackUUID]; - ScreenCapturer *screenCapturer = [[ScreenCapturer alloc] initWithDelegate:videoSource]; - ScreenCaptureController *screenCaptureController = - [[ScreenCaptureController alloc] initWithCapturer:screenCapturer]; + WebRTCModuleOptions *options = [WebRTCModuleOptions sharedInstance]; + CaptureController *captureController; + + if (options.useInAppScreenCapture) { + // Clear the flag so subsequent getDisplayMedia calls use broadcast by default + options.useInAppScreenCapture = NO; + + InAppScreenCapturer *capturer = [[InAppScreenCapturer alloc] initWithDelegate:videoSource]; + InAppScreenCaptureController *controller = [[InAppScreenCaptureController alloc] initWithCapturer:capturer]; + captureController = controller; + } else { + // Existing broadcast extension path + ScreenCapturer *screenCapturer = [[ScreenCapturer alloc] initWithDelegate:videoSource]; + ScreenCaptureController *screenCaptureController = + [[ScreenCaptureController alloc] initWithCapturer:screenCapturer]; + captureController = screenCaptureController; + } TrackCapturerEventsEmitter *emitter = [[TrackCapturerEventsEmitter alloc] initWith:trackUUID webRTCModule:self]; - screenCaptureController.eventsDelegate = emitter; - videoTrack.captureController = screenCaptureController; - [screenCaptureController startCapture]; + captureController.eventsDelegate = emitter; + videoTrack.captureController = captureController; + [captureController startCapture]; // Add dimension detection for local video tracks immediately [self addLocalVideoTrackDimensionDetection:videoTrack]; diff --git a/ios/RCTWebRTC/WebRTCModuleOptions.h b/ios/RCTWebRTC/WebRTCModuleOptions.h index d99cb8200..100995f9d 100644 --- a/ios/RCTWebRTC/WebRTCModuleOptions.h +++ b/ios/RCTWebRTC/WebRTCModuleOptions.h @@ -13,6 +13,13 @@ NS_ASSUME_NONNULL_BEGIN @property(nonatomic, assign) RTCLoggingSeverity loggingSeverity; @property(nonatomic, assign) BOOL enableMultitaskingCameraAccess; +/// When YES, the next getDisplayMedia() call will use RPScreenRecorder (in-app capture) +/// instead of the broadcast extension. Auto-cleared after use. +@property(nonatomic, assign) BOOL useInAppScreenCapture; + +/// When YES, in-app screen capture will route .audioApp buffers to the audio mixer. +@property(nonatomic, assign) BOOL includeScreenShareAudio; + #pragma mark - This class is a singleton + (instancetype _Nonnull)sharedInstance; From 1364e73343458e71f8d5463ade5a2fb7b02c94e9 Mon Sep 17 00:00:00 2001 From: Artem Grintsevich Date: Tue, 24 Feb 2026 13:52:14 +0100 Subject: [PATCH 03/14] feat: handle screen capturing on app state changes --- ios/RCTWebRTC/InAppScreenCapturer.m | 41 +++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/ios/RCTWebRTC/InAppScreenCapturer.m b/ios/RCTWebRTC/InAppScreenCapturer.m index 9c86a378c..de2d4f403 100644 --- a/ios/RCTWebRTC/InAppScreenCapturer.m +++ b/ios/RCTWebRTC/InAppScreenCapturer.m @@ -8,10 +8,21 @@ @implementation InAppScreenCapturer { BOOL _capturing; + BOOL _shouldResumeOnForeground; } - (instancetype)initWithDelegate:(__weak id)delegate { self = [super initWithDelegate:delegate]; + if (self) { + [[NSNotificationCenter defaultCenter] addObserver:self + selector:@selector(appDidBecomeActive) + name:UIApplicationDidBecomeActiveNotification + object:nil]; + [[NSNotificationCenter defaultCenter] addObserver:self + selector:@selector(appWillResignActive) + name:UIApplicationWillResignActiveNotification + object:nil]; + } return self; } @@ -21,6 +32,10 @@ - (void)startCapture { } _capturing = YES; + [self startRPScreenRecorder]; +} + +- (void)startRPScreenRecorder { RPScreenRecorder *recorder = [RPScreenRecorder sharedRecorder]; recorder.microphoneEnabled = NO; // WebRTC handles mic input @@ -76,8 +91,11 @@ - (void)stopCapture { return; } _capturing = NO; + _shouldResumeOnForeground = NO; self.audioBufferHandler = nil; + [[NSNotificationCenter defaultCenter] removeObserver:self]; + [[RPScreenRecorder sharedRecorder] stopCaptureWithHandler:^(NSError * _Nullable error) { if (error) { NSLog(@"[InAppScreenCapturer] stopCapture error: %@", error.localizedDescription); @@ -85,7 +103,30 @@ - (void)stopCapture { }]; } +#pragma mark - App Lifecycle + +- (void)appWillResignActive { + if (_capturing) { + _shouldResumeOnForeground = YES; + // Stop the RPScreenRecorder session — iOS suspends it in background anyway + [[RPScreenRecorder sharedRecorder] stopCaptureWithHandler:^(NSError * _Nullable error) { + if (error) { + NSLog(@"[InAppScreenCapturer] background stop error: %@", error.localizedDescription); + } + }]; + } +} + +- (void)appDidBecomeActive { + if (_shouldResumeOnForeground && _capturing) { + _shouldResumeOnForeground = NO; + NSLog(@"[InAppScreenCapturer] Resuming capture after returning to foreground"); + [self startRPScreenRecorder]; + } +} + - (void)dealloc { + [[NSNotificationCenter defaultCenter] removeObserver:self]; if (_capturing) { _capturing = NO; self.audioBufferHandler = nil; From bc5de6ab919a399270ab14d8eb2a2d311fc89a1d Mon Sep 17 00:00:00 2001 From: Artem Grintsevich Date: Thu, 26 Feb 2026 10:42:54 +0100 Subject: [PATCH 04/14] feat: added ios screen share audio capturing --- ios/RCTWebRTC/InAppScreenCapturer.m | 16 +- .../AudioDeviceModule/AudioDeviceModule.swift | 46 +++ .../AudioGraphConfigurationDelegate.swift | 38 +++ .../ScreenShareAudioConverter.swift | 208 +++++++++++++ .../ScreenShareAudioMixer.swift | 285 ++++++++++++++++++ ios/RCTWebRTC/WebRTCModule+RTCMediaStream.m | 24 ++ ios/RCTWebRTC/WebRTCModule.m | 21 +- ios/RCTWebRTC/WebRTCModuleOptions.h | 19 ++ ios/RCTWebRTC/WebRTCModuleOptions.m | 7 + 9 files changed, 655 insertions(+), 9 deletions(-) create mode 100644 ios/RCTWebRTC/Utils/AudioDeviceModule/AudioGraphConfigurationDelegate.swift create mode 100644 ios/RCTWebRTC/Utils/AudioDeviceModule/ScreenShareAudioConverter.swift create mode 100644 ios/RCTWebRTC/Utils/AudioDeviceModule/ScreenShareAudioMixer.swift diff --git a/ios/RCTWebRTC/InAppScreenCapturer.m b/ios/RCTWebRTC/InAppScreenCapturer.m index de2d4f403..8eb6cb3aa 100644 --- a/ios/RCTWebRTC/InAppScreenCapturer.m +++ b/ios/RCTWebRTC/InAppScreenCapturer.m @@ -14,14 +14,14 @@ @implementation InAppScreenCapturer { - (instancetype)initWithDelegate:(__weak id)delegate { self = [super initWithDelegate:delegate]; if (self) { - [[NSNotificationCenter defaultCenter] addObserver:self - selector:@selector(appDidBecomeActive) - name:UIApplicationDidBecomeActiveNotification - object:nil]; - [[NSNotificationCenter defaultCenter] addObserver:self - selector:@selector(appWillResignActive) - name:UIApplicationWillResignActiveNotification - object:nil]; + // [[NSNotificationCenter defaultCenter] addObserver:self + // selector:@selector(appDidBecomeActive) + // name:UIApplicationDidBecomeActiveNotification + // object:nil]; + // [[NSNotificationCenter defaultCenter] addObserver:self + // selector:@selector(appWillResignActive) + // name:UIApplicationWillResignActiveNotification + // object:nil]; } return self; } diff --git a/ios/RCTWebRTC/Utils/AudioDeviceModule/AudioDeviceModule.swift b/ios/RCTWebRTC/Utils/AudioDeviceModule/AudioDeviceModule.swift index f46c2c911..23a728760 100644 --- a/ios/RCTWebRTC/Utils/AudioDeviceModule/AudioDeviceModule.swift +++ b/ios/RCTWebRTC/Utils/AudioDeviceModule/AudioDeviceModule.swift @@ -179,6 +179,17 @@ import WebRTC /// Strong reference to the current engine so we can introspect it if needed. @objc public var engine: AVAudioEngine? + /// Delegate that receives synchronous input graph configuration callbacks. + /// Used by `ScreenShareAudioMixer` to modify the engine graph during mixing. + @objc public weak var audioGraphDelegate: AudioGraphConfigurationDelegate? + + /// Cached input context from the last `configureInputFromSource` callback. + /// These allow `startMixing` to configure the graph immediately when the + /// engine is already running, without waiting for the next callback. + @objc public private(set) weak var lastInputSource: AVAudioNode? + @objc public private(set) weak var lastInputDestination: AVAudioNode? + @objc public private(set) var lastInputFormat: AVAudioFormat? + /// Secondary observer that receives forwarded delegate callbacks. /// This allows the AudioDeviceModuleObserver to receive events and forward them to JS. private let delegateObserver: RTCAudioDeviceModuleDelegate @@ -268,6 +279,15 @@ import WebRTC source.isVoiceProcessingBypassed = isPreferred } + /// Sets voice processing bypass on the underlying audio device module. + /// When bypassed, echo cancellation / AGC / noise suppression are disabled, + /// which prevents the system from treating screen share audio as echo. + /// - Parameter isBypassed: `true` to bypass voice processing, `false` to restore. + @objc public func setVoiceProcessingBypassed(_ isBypassed: Bool) { + source.isVoiceProcessingBypassed = isBypassed + NSLog("[AudioDeviceModule] setVoiceProcessingBypassed: %@", isBypassed ? "YES" : "NO") + } + /// Starts or stops speaker playout on the ADM, retrying transient failures. /// - Parameter isActive: `true` to start playout, `false` to stop. /// - Throws: `AudioDeviceError` when WebRTC returns a non-zero status. @@ -445,6 +465,8 @@ import WebRTC isPlayoutEnabled: Bool, isRecordingEnabled: Bool ) -> Int { + audioGraphDelegate?.onDidStopEngine?(engine) + subject.send( .didStopAudioEngine( engine, @@ -474,6 +496,8 @@ import WebRTC isPlayoutEnabled: Bool, isRecordingEnabled: Bool ) -> Int { + audioGraphDelegate?.onDidDisableEngine?(engine) + subject.send( .didDisableAudioEngine( engine, @@ -500,7 +524,14 @@ import WebRTC _ audioDeviceModule: RTCAudioDeviceModule, willReleaseEngine engine: AVAudioEngine ) -> Int { + // Notify delegate BEFORE clearing cached context so it can + // tear down its graph while references are still valid. + audioGraphDelegate?.onWillReleaseEngine?(engine) + self.engine = nil + lastInputSource = nil + lastInputDestination = nil + lastInputFormat = nil subject.send(.willReleaseAudioEngine(engine)) audioLevelsAdapter.uninstall(on: 0) @@ -520,6 +551,11 @@ import WebRTC format: AVAudioFormat, context: [AnyHashable: Any] ) -> Int { + // Cache the input context for on-demand use by ScreenShareAudioMixer. + lastInputSource = source + lastInputDestination = destination + lastInputFormat = format + subject.send( .configureInputFromSource( engine, @@ -528,6 +564,16 @@ import WebRTC format: format ) ) + + // Notify the audio graph delegate synchronously — this must happen + // BEFORE the audio levels tap so the mixer can modify the graph first. + audioGraphDelegate?.onConfigureInputFromSource( + engine, + source: source, + destination: destination, + format: format + ) + audioLevelsAdapter.installInputTap( on: destination, format: format, diff --git a/ios/RCTWebRTC/Utils/AudioDeviceModule/AudioGraphConfigurationDelegate.swift b/ios/RCTWebRTC/Utils/AudioDeviceModule/AudioGraphConfigurationDelegate.swift new file mode 100644 index 000000000..3d5a372e1 --- /dev/null +++ b/ios/RCTWebRTC/Utils/AudioDeviceModule/AudioGraphConfigurationDelegate.swift @@ -0,0 +1,38 @@ +// +// Copyright © 2026 Stream.io Inc. All rights reserved. +// + +import AVFoundation + +/// Protocol that allows external code to hook into AVAudioEngine lifecycle +/// events synchronously. Callbacks fire on WebRTC's audio thread. +/// +/// Implementations must perform any AVAudioEngine graph modifications +/// synchronously within the callback — async dispatch will race with +/// WebRTC's `ConfigureVoiceProcessingNode`. +@objc public protocol AudioGraphConfigurationDelegate: AnyObject { + + /// Called when WebRTC (re)configures the engine's input graph. + /// This fires during engine setup, **before** `willStartEngine`. + /// + /// - Parameters: + /// - engine: The current `AVAudioEngine` instance. + /// - source: The upstream node (VP input), or `nil` when voice processing is disabled. + /// - destination: The node that receives the input stream (WebRTC capture mixer). + /// - format: The expected audio format for the input path. + func onConfigureInputFromSource( + _ engine: AVAudioEngine, + source: AVAudioNode?, + destination: AVAudioNode, + format: AVAudioFormat + ) + + /// Called when the engine is about to be released/deallocated. + @objc optional func onWillReleaseEngine(_ engine: AVAudioEngine) + + /// Called after the engine has fully stopped. + @objc optional func onDidStopEngine(_ engine: AVAudioEngine) + + /// Called after the engine has been disabled. + @objc optional func onDidDisableEngine(_ engine: AVAudioEngine) +} diff --git a/ios/RCTWebRTC/Utils/AudioDeviceModule/ScreenShareAudioConverter.swift b/ios/RCTWebRTC/Utils/AudioDeviceModule/ScreenShareAudioConverter.swift new file mode 100644 index 000000000..45144553e --- /dev/null +++ b/ios/RCTWebRTC/Utils/AudioDeviceModule/ScreenShareAudioConverter.swift @@ -0,0 +1,208 @@ +// +// Copyright © 2026 Stream.io Inc. All rights reserved. +// + +import Accelerate +import AVFoundation +import CoreMedia + +/// Converts RPScreenRecorder `.audioApp` CMSampleBuffers into +/// `AVAudioPCMBuffer`s suitable for scheduling on an `AVAudioPlayerNode`. +/// +/// Handles: +/// - CMSampleBuffer → AVAudioPCMBuffer extraction (float32, int16, interleaved, non-interleaved) +/// - Sample rate / channel / format conversion via cached AVAudioConverter +/// - Silence detection via vDSP RMS analysis +final class ScreenShareAudioConverter { + + // MARK: - Constants + + /// Buffers with RMS below this threshold (in dB) are considered silent. + private static let silenceThresholdDB: Float = -60.0 + + // MARK: - Cached converter + + private var converter: AVAudioConverter? + private var converterInputFormat: AVAudioFormat? + private var converterOutputFormat: AVAudioFormat? + + // MARK: - CMSampleBuffer → AVAudioPCMBuffer + + /// Extracts audio data from a `CMSampleBuffer` into an `AVAudioPCMBuffer`. + /// + /// Supports float32 and int16 PCM formats, both interleaved and + /// non-interleaved layouts. + func pcmBuffer(from sampleBuffer: CMSampleBuffer) -> AVAudioPCMBuffer? { + guard let formatDescription = CMSampleBufferGetFormatDescription(sampleBuffer) else { + NSLog("[ScreenShareAudio] Converter: no format description in CMSampleBuffer") + return nil + } + + guard let asbdPtr = CMAudioFormatDescriptionGetStreamBasicDescription(formatDescription) else { + NSLog("[ScreenShareAudio] Converter: no ASBD in format description") + return nil + } + + guard let avFormat = AVAudioFormat(streamDescription: asbdPtr) else { + NSLog("[ScreenShareAudio] Converter: failed to create AVAudioFormat from ASBD") + return nil + } + + let frameCount = CMSampleBufferGetNumSamples(sampleBuffer) + guard frameCount > 0, + let pcmBuffer = AVAudioPCMBuffer(pcmFormat: avFormat, frameCapacity: AVAudioFrameCount(frameCount)) else { + return nil + } + + pcmBuffer.frameLength = AVAudioFrameCount(frameCount) + + guard let blockBuffer = CMSampleBufferGetDataBuffer(sampleBuffer) else { + return nil + } + + var lengthAtOffset: Int = 0 + var totalLength: Int = 0 + var dataPointer: UnsafeMutablePointer? + + let status = CMBlockBufferGetDataPointer( + blockBuffer, + atOffset: 0, + lengthAtOffsetOut: &lengthAtOffset, + totalLengthOut: &totalLength, + dataPointerOut: &dataPointer + ) + guard status == kCMBlockBufferNoErr, let dataPointer = dataPointer else { + return nil + } + + // Copy audio data into PCM buffer + if let floatData = pcmBuffer.floatChannelData { + let channelCount = Int(avFormat.channelCount) + let bytesPerFrame = Int(avFormat.streamDescription.pointee.mBytesPerFrame) + + if avFormat.isInterleaved { + // Interleaved: single buffer, copy all at once + memcpy(floatData[0], dataPointer, min(totalLength, Int(frameCount) * bytesPerFrame)) + } else { + // Non-interleaved: separate buffers per channel + let framesSize = Int(frameCount) * MemoryLayout.size + for ch in 0.. AVAudioPCMBuffer? { + // Identity optimization: skip conversion when formats match + if formatsMatch(inputBuffer.format, outputFormat) { + return inputBuffer + } + + // Create or reuse converter for current format pair + if converter == nil + || !formatsMatch(converterInputFormat, inputBuffer.format) + || !formatsMatch(converterOutputFormat, outputFormat) { + converter = AVAudioConverter(from: inputBuffer.format, to: outputFormat) + converter?.sampleRateConverterQuality = AVAudioQuality.max.rawValue + converter?.sampleRateConverterAlgorithm = AVSampleRateConverterAlgorithm_Mastering + converterInputFormat = inputBuffer.format + converterOutputFormat = outputFormat + } + + guard let converter = converter else { + NSLog("[ScreenShareAudio] Converter: AVAudioConverter creation failed") + return nil + } + + // Calculate output frame capacity from sample rate ratio + let ratio = outputFormat.sampleRate / inputBuffer.format.sampleRate + let outputFrameCapacity = AVAudioFrameCount(ceil(Double(inputBuffer.frameLength) * ratio)) + + guard let outputBuffer = AVAudioPCMBuffer(pcmFormat: outputFormat, frameCapacity: outputFrameCapacity) else { + return nil + } + + var error: NSError? + var hasData = true + converter.convert(to: outputBuffer, error: &error) { _, outStatus in + if hasData { + outStatus.pointee = .haveData + hasData = false + return inputBuffer + } else { + outStatus.pointee = .noDataNow + return nil + } + } + + if let error = error { + NSLog("[ScreenShareAudio] Converter: conversion error: \(error.localizedDescription)") + return nil + } + + return outputBuffer + } + + // MARK: - Silence detection + + /// Returns `true` if the buffer is silent (RMS below -60 dB). + /// + /// For non-float formats (e.g., int16 from RPScreenRecorder), this returns + /// `false` — silence detection requires float data for vDSP, and these + /// buffers will be converted before scheduling anyway. + static func isSilent(_ buffer: AVAudioPCMBuffer) -> Bool { + guard let channelData = buffer.floatChannelData else { + return false + } + + let frameCount = vDSP_Length(buffer.frameLength) + guard frameCount > 0 else { + return true + } + + var rms: Float = 0 + vDSP_rmsqv(channelData[0], 1, &rms, frameCount) + + let rmsDB = 20 * log10(max(rms, Float.ulpOfOne)) + return rmsDB <= silenceThresholdDB + } + + // MARK: - Cleanup + + func reset() { + converter = nil + converterInputFormat = nil + converterOutputFormat = nil + } + + // MARK: - Private + + /// Compares two formats by sample rate, channel count, common format, + /// and interleaving — matching the Swift SDK's `AVAudioFormat+Equality`. + private func formatsMatch(_ lhs: AVAudioFormat?, _ rhs: AVAudioFormat?) -> Bool { + guard let lhs = lhs, let rhs = rhs else { return false } + return lhs.sampleRate == rhs.sampleRate + && lhs.channelCount == rhs.channelCount + && lhs.commonFormat == rhs.commonFormat + && lhs.isInterleaved == rhs.isInterleaved + } +} diff --git a/ios/RCTWebRTC/Utils/AudioDeviceModule/ScreenShareAudioMixer.swift b/ios/RCTWebRTC/Utils/AudioDeviceModule/ScreenShareAudioMixer.swift new file mode 100644 index 000000000..3c9871f72 --- /dev/null +++ b/ios/RCTWebRTC/Utils/AudioDeviceModule/ScreenShareAudioMixer.swift @@ -0,0 +1,285 @@ +// +// Copyright © 2026 Stream.io Inc. All rights reserved. +// + +import AVFoundation +import CoreMedia +import WebRTC + +/// Mixes screen share audio (from RPScreenRecorder `.audioApp` buffers) into the +/// WebRTC microphone capture stream using `RTCAudioCustomProcessingDelegate`. +/// +/// Screen audio samples are written into a ring buffer. WebRTC's audio processing +/// pipeline calls `audioProcessingProcess(_:)` on its own thread; this method reads +/// from the ring buffer and additively mixes the screen audio into the mic samples. +@objc public final class ScreenShareAudioMixer: NSObject, RTCAudioCustomProcessingDelegate { + + // MARK: - Ring buffer + + private var ringBuffer: [Float] + private var writeIndex: Int = 0 + private var readIndex: Int = 0 + private let ringCapacity: Int + private let lock = NSLock() + + // MARK: - Audio conversion + + private let audioConverter = ScreenShareAudioConverter() + + // MARK: - State + + private var isMixing = false + private var processingFormat: AVAudioFormat? + + // MARK: - Diagnostics + + private var processCallCount: Int = 0 + private var processWithDataCount: Int = 0 + private var enqueueCallCount: Int = 0 + private var enqueueWrittenCount: Int = 0 + private var enqueueSilenceCount: Int = 0 + private var enqueuePcmFailCount: Int = 0 + private var enqueueConvFailCount: Int = 0 + private var enqueueNoFormatCount: Int = 0 + private var formatLogged = false + + // MARK: - Init + + @objc public override init() { + // 1 second at 48 kHz — enough to absorb jitter between + // RPScreenRecorder delivery and WebRTC processing cadence. + ringCapacity = 48000 + ringBuffer = [Float](repeating: 0, count: ringCapacity) + super.init() + NSLog("[ScreenShareAudio] Mixer instance created") + } + + deinit { + NSLog("[ScreenShareAudio] Mixer instance deallocated!") + } + + // MARK: - RTCAudioCustomProcessingDelegate + + public func audioProcessingInitialize(sampleRate sampleRateHz: Int, channels: Int) { + lock.lock() + defer { lock.unlock() } + processingFormat = AVAudioFormat( + standardFormatWithSampleRate: Double(sampleRateHz), + channels: AVAudioChannelCount(channels) + ) + writeIndex = 0 + readIndex = 0 + NSLog("[ScreenShareAudio] audioProcessingInitialize: %dHz, %dch", sampleRateHz, channels) + } + + public func audioProcessingProcess(audioBuffer: RTCAudioBuffer) { + guard isMixing else { return } + lock.lock() + defer { lock.unlock() } + + processCallCount += 1 + + let frames = audioBuffer.frames + let channelBuffer = audioBuffer.rawBuffer(forChannel: 0) + + // Mix ring buffer data into the mic capture if available + let available = writeIndex - readIndex + if available > 0 { + let framesToRead = min(frames, available) + for i in 0.. ringCapacity { + readIndex = writeIndex + frames - ringCapacity + } + + for i in 0..) +#import +#elif __has_include("stream_react_native_webrtc-Swift.h") +#import "stream_react_native_webrtc-Swift.h" +#endif #import "VideoCaptureController.h" @implementation WebRTCModule (RTCMediaStream) @@ -213,6 +220,23 @@ - (RTCVideoTrack *)createScreenCaptureVideoTrack { InAppScreenCapturer *capturer = [[InAppScreenCapturer alloc] initWithDelegate:videoSource]; InAppScreenCaptureController *controller = [[InAppScreenCaptureController alloc] initWithCapturer:capturer]; + + // Store weak reference for audio mixing wiring + options.activeInAppScreenCapturer = capturer; + + // If audio mixing is requested, set up the audio buffer handler. + // The handler forwards .audioApp CMSampleBuffers to the mixer's enqueue method. + // The mixer may not exist yet (created by startScreenShareAudioMixing), + // so we check at each callback invocation. + if (options.includeScreenShareAudio) { + capturer.audioBufferHandler = ^(CMSampleBufferRef sampleBuffer) { + ScreenShareAudioMixer *mixer = [WebRTCModuleOptions sharedInstance].screenShareAudioMixer; + if (mixer) { + [mixer enqueue:sampleBuffer]; + } + }; + } + captureController = controller; } else { // Existing broadcast extension path diff --git a/ios/RCTWebRTC/WebRTCModule.m b/ios/RCTWebRTC/WebRTCModule.m index da9a335a2..25e1b5120 100644 --- a/ios/RCTWebRTC/WebRTCModule.m +++ b/ios/RCTWebRTC/WebRTCModule.m @@ -91,6 +91,13 @@ - (instancetype)init { NSLog(@"Both audioProcessingModule and audioDevice are provided, but only one can be used. Ignoring audioDevice."); } RCTLogInfo(@"Using audio processing module: %@", NSStringFromClass([audioProcessingModule class])); + + // Store reference to the default APM if it is one, so we can set + // capturePostProcessingDelegate later for screen share audio mixing. + if ([audioProcessingModule isKindOfClass:[RTCDefaultAudioProcessingModule class]]) { + options.defaultAudioProcessingModule = (RTCDefaultAudioProcessingModule *)audioProcessingModule; + } + _peerConnectionFactory = [[RTCPeerConnectionFactory alloc] initWithAudioDeviceModuleType:RTCAudioDeviceModuleTypeAudioEngine bypassVoiceProcessing:NO @@ -103,12 +110,24 @@ - (instancetype)init { decoderFactory:decoderFactory audioDevice:audioDevice]; } else { + // No custom APM provided — create a mixer eagerly and set it as + // capturePostProcessingDelegate at APM creation time (not runtime). + // The mixer stays dormant (isMixing=false) until startMixing is called. + ScreenShareAudioMixer *mixer = [[ScreenShareAudioMixer alloc] init]; + options.screenShareAudioMixer = mixer; + + RTCDefaultAudioProcessingModule *defaultAPM = [[RTCDefaultAudioProcessingModule alloc] + initWithConfig:nil + capturePostProcessingDelegate:mixer + renderPreProcessingDelegate:nil]; + options.defaultAudioProcessingModule = defaultAPM; + _peerConnectionFactory = [[RTCPeerConnectionFactory alloc] initWithAudioDeviceModuleType:RTCAudioDeviceModuleTypeAudioEngine bypassVoiceProcessing:NO encoderFactory:encoderFactory decoderFactory:decoderFactory - audioProcessingModule:nil]; + audioProcessingModule:defaultAPM]; } _rtcAudioDeviceModuleObserver = [[AudioDeviceModuleObserver alloc] initWithWebRTCModule:self]; diff --git a/ios/RCTWebRTC/WebRTCModuleOptions.h b/ios/RCTWebRTC/WebRTCModuleOptions.h index 100995f9d..c964df4ce 100644 --- a/ios/RCTWebRTC/WebRTCModuleOptions.h +++ b/ios/RCTWebRTC/WebRTCModuleOptions.h @@ -1,14 +1,25 @@ #import #import +@class InAppScreenCapturer; +@class RTCDefaultAudioProcessingModule; + NS_ASSUME_NONNULL_BEGIN +// Forward declare the Swift class — the actual import happens in the .m file. +@class ScreenShareAudioMixer; + @interface WebRTCModuleOptions : NSObject @property(nonatomic, strong, nullable) id videoDecoderFactory; @property(nonatomic, strong, nullable) id videoEncoderFactory; @property(nonatomic, strong, nullable) id audioDevice; @property(nonatomic, strong, nullable) id audioProcessingModule; + +/// Retained reference to the default audio processing module. +/// Used to dynamically set capturePostProcessingDelegate for screen share audio mixing. +@property(nonatomic, strong, nullable) RTCDefaultAudioProcessingModule *defaultAudioProcessingModule; + @property(nonatomic, strong, nullable) NSDictionary *fieldTrials; @property(nonatomic, assign) RTCLoggingSeverity loggingSeverity; @property(nonatomic, assign) BOOL enableMultitaskingCameraAccess; @@ -20,6 +31,14 @@ NS_ASSUME_NONNULL_BEGIN /// When YES, in-app screen capture will route .audioApp buffers to the audio mixer. @property(nonatomic, assign) BOOL includeScreenShareAudio; +/// The active screen share audio mixer instance. Created by +/// `startScreenShareAudioMixing` and cleared by `stopScreenShareAudioMixing`. +@property(nonatomic, strong, nullable) ScreenShareAudioMixer *screenShareAudioMixer; + +/// Weak reference to the current in-app screen capturer, set during +/// `createScreenCaptureVideoTrack` when in-app mode is used. +@property(nonatomic, weak, nullable) InAppScreenCapturer *activeInAppScreenCapturer; + #pragma mark - This class is a singleton + (instancetype _Nonnull)sharedInstance; diff --git a/ios/RCTWebRTC/WebRTCModuleOptions.m b/ios/RCTWebRTC/WebRTCModuleOptions.m index ba108da6e..f29ae67f9 100644 --- a/ios/RCTWebRTC/WebRTCModuleOptions.m +++ b/ios/RCTWebRTC/WebRTCModuleOptions.m @@ -1,5 +1,12 @@ #import "WebRTCModuleOptions.h" +// Import Swift-generated header for ScreenShareAudioMixer +#if __has_include() +#import +#elif __has_include("stream_react_native_webrtc-Swift.h") +#import "stream_react_native_webrtc-Swift.h" +#endif + @implementation WebRTCModuleOptions #pragma mark - This class is a singleton From f64916ffc3ee8695551dbddf356738c82d22bca4 Mon Sep 17 00:00:00 2001 From: Artem Grintsevich Date: Tue, 10 Mar 2026 12:24:48 +0100 Subject: [PATCH 05/14] chore: adjusted android mix summing --- .../com/oney/WebRTCModule/WebRTCModule.java | 26 ++++++++++++++----- 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/android/src/main/java/com/oney/WebRTCModule/WebRTCModule.java b/android/src/main/java/com/oney/WebRTCModule/WebRTCModule.java index dd0a29e27..831232cef 100644 --- a/android/src/main/java/com/oney/WebRTCModule/WebRTCModule.java +++ b/android/src/main/java/com/oney/WebRTCModule/WebRTCModule.java @@ -146,7 +146,10 @@ private JavaAudioDeviceModule createAudioDeviceModule(ReactApplicationContext re } /** - * Mixes screen audio into the microphone buffer using PCM additive mixing with clamping. + * Mixes screen audio into the microphone buffer using PCM 16-bit additive mixing + * with clamping. Handles different buffer sizes safely: each buffer is read only + * within its own bounds. When one buffer is shorter, the other's samples pass + * through unmodified (mic samples stay as-is, or screen-only samples are written). */ private static void mixScreenAudioIntoBuffer(java.nio.ByteBuffer micBuffer, java.nio.ByteBuffer screenBuffer, @@ -160,11 +163,22 @@ private static void mixScreenAudioIntoBuffer(java.nio.ByteBuffer micBuffer, java.nio.ShortBuffer micShorts = micBuffer.asShortBuffer(); java.nio.ShortBuffer screenShorts = screenBuffer.asShortBuffer(); - int samplesToMix = Math.min(bytesRead / 2, - Math.min(micShorts.remaining(), screenShorts.remaining())); - - for (int i = 0; i < samplesToMix; i++) { - int sum = micShorts.get(i) + screenShorts.get(i); + int micSamples = Math.min(bytesRead / 2, micShorts.remaining()); + int screenSamples = screenShorts.remaining(); + int totalSamples = Math.max(micSamples, screenSamples); + + for (int i = 0; i < totalSamples; i++) { + int sum; + if (i >= micSamples) { + // Screen-only: mic buffer is shorter — write screen sample directly + sum = screenShorts.get(i); + } else if (i >= screenSamples) { + // Mic-only: screen buffer is shorter — keep mic sample as-is + break; + } else { + // Both buffers have data — add samples + sum = micShorts.get(i) + screenShorts.get(i); + } if (sum > Short.MAX_VALUE) sum = Short.MAX_VALUE; if (sum < Short.MIN_VALUE) sum = Short.MIN_VALUE; micShorts.put(i, (short) sum); From e34e2578bf43ca1aace9780a9a32a52f6b954d7b Mon Sep 17 00:00:00 2001 From: Artem Grintsevich Date: Tue, 10 Mar 2026 16:38:36 +0100 Subject: [PATCH 06/14] chore: ios moved to mixer node implementation --- .../ScreenShareAudioMixer.swift | 269 ++++++++---------- ios/RCTWebRTC/WebRTCModule.m | 28 +- 2 files changed, 134 insertions(+), 163 deletions(-) diff --git a/ios/RCTWebRTC/Utils/AudioDeviceModule/ScreenShareAudioMixer.swift b/ios/RCTWebRTC/Utils/AudioDeviceModule/ScreenShareAudioMixer.swift index 3c9871f72..5af2dc686 100644 --- a/ios/RCTWebRTC/Utils/AudioDeviceModule/ScreenShareAudioMixer.swift +++ b/ios/RCTWebRTC/Utils/AudioDeviceModule/ScreenShareAudioMixer.swift @@ -7,20 +7,24 @@ import CoreMedia import WebRTC /// Mixes screen share audio (from RPScreenRecorder `.audioApp` buffers) into the -/// WebRTC microphone capture stream using `RTCAudioCustomProcessingDelegate`. +/// WebRTC microphone capture stream by inserting an `AVAudioPlayerNode` and +/// `AVAudioMixerNode` into the engine's input graph. /// -/// Screen audio samples are written into a ring buffer. WebRTC's audio processing -/// pipeline calls `audioProcessingProcess(_:)` on its own thread; this method reads -/// from the ring buffer and additively mixes the screen audio into the mic samples. -@objc public final class ScreenShareAudioMixer: NSObject, RTCAudioCustomProcessingDelegate { +/// Graph topology (wired in `onConfigureInputFromSource`): +/// ``` +/// source (mic VP) --> mixerNode --> destination (WebRTC capture) +/// ^ +/// playerNode -----------/ +/// ``` +/// +/// The mixer stays dormant (no nodes attached) until `startMixing` is called. +/// Screen audio buffers are scheduled on the player node via `enqueue(_:)`. +@objc public final class ScreenShareAudioMixer: NSObject, AudioGraphConfigurationDelegate { - // MARK: - Ring buffer + // MARK: - Audio graph nodes - private var ringBuffer: [Float] - private var writeIndex: Int = 0 - private var readIndex: Int = 0 - private let ringCapacity: Int - private let lock = NSLock() + private let playerNode = AVAudioPlayerNode() + private let mixerNode = AVAudioMixerNode() // MARK: - Audio conversion @@ -29,142 +33,113 @@ import WebRTC // MARK: - State private var isMixing = false - private var processingFormat: AVAudioFormat? + + /// The engine reference from the last `onConfigureInputFromSource` call. + /// Used to detach nodes on cleanup. + private weak var currentEngine: AVAudioEngine? + + /// Format of the input graph path, used for converting screen audio. + private var graphFormat: AVAudioFormat? + + /// Whether our nodes are currently attached to the engine. + private var nodesAttached = false // MARK: - Diagnostics - private var processCallCount: Int = 0 - private var processWithDataCount: Int = 0 private var enqueueCallCount: Int = 0 - private var enqueueWrittenCount: Int = 0 + private var enqueueScheduledCount: Int = 0 private var enqueueSilenceCount: Int = 0 private var enqueuePcmFailCount: Int = 0 private var enqueueConvFailCount: Int = 0 - private var enqueueNoFormatCount: Int = 0 private var formatLogged = false // MARK: - Init @objc public override init() { - // 1 second at 48 kHz — enough to absorb jitter between - // RPScreenRecorder delivery and WebRTC processing cadence. - ringCapacity = 48000 - ringBuffer = [Float](repeating: 0, count: ringCapacity) super.init() - NSLog("[ScreenShareAudio] Mixer instance created") + NSLog("[ScreenShareAudio] Mixer instance created (graph approach)") } deinit { NSLog("[ScreenShareAudio] Mixer instance deallocated!") } - // MARK: - RTCAudioCustomProcessingDelegate - - public func audioProcessingInitialize(sampleRate sampleRateHz: Int, channels: Int) { - lock.lock() - defer { lock.unlock() } - processingFormat = AVAudioFormat( - standardFormatWithSampleRate: Double(sampleRateHz), - channels: AVAudioChannelCount(channels) - ) - writeIndex = 0 - readIndex = 0 - NSLog("[ScreenShareAudio] audioProcessingInitialize: %dHz, %dch", sampleRateHz, channels) - } + // MARK: - AudioGraphConfigurationDelegate - public func audioProcessingProcess(audioBuffer: RTCAudioBuffer) { - guard isMixing else { return } - lock.lock() - defer { lock.unlock() } + public func onConfigureInputFromSource( + _ engine: AVAudioEngine, + source: AVAudioNode?, + destination: AVAudioNode, + format: AVAudioFormat + ) { + currentEngine = engine + graphFormat = format - processCallCount += 1 + guard isMixing else { + NSLog("[ScreenShareAudio] onConfigureInputFromSource: not mixing, skipping graph modification") + return + } - let frames = audioBuffer.frames - let channelBuffer = audioBuffer.rawBuffer(forChannel: 0) + attachAndWireNodes(engine: engine, source: source, destination: destination, format: format) + } - // Mix ring buffer data into the mic capture if available - let available = writeIndex - readIndex - if available > 0 { - let framesToRead = min(frames, available) - for i in 0.. ringCapacity { - readIndex = writeIndex + frames - ringCapacity + // Start playback if not already playing + if !playerNode.isPlaying { + playerNode.play() } - for i in 0.. Date: Tue, 10 Mar 2026 16:43:00 +0100 Subject: [PATCH 07/14] chore: removed diagnostic logs --- .../ScreenShareAudioConverter.swift | 8 +- .../ScreenShareAudioMixer.swift | 102 ++---------------- 2 files changed, 9 insertions(+), 101 deletions(-) diff --git a/ios/RCTWebRTC/Utils/AudioDeviceModule/ScreenShareAudioConverter.swift b/ios/RCTWebRTC/Utils/AudioDeviceModule/ScreenShareAudioConverter.swift index 45144553e..83e27273c 100644 --- a/ios/RCTWebRTC/Utils/AudioDeviceModule/ScreenShareAudioConverter.swift +++ b/ios/RCTWebRTC/Utils/AudioDeviceModule/ScreenShareAudioConverter.swift @@ -34,17 +34,14 @@ final class ScreenShareAudioConverter { /// non-interleaved layouts. func pcmBuffer(from sampleBuffer: CMSampleBuffer) -> AVAudioPCMBuffer? { guard let formatDescription = CMSampleBufferGetFormatDescription(sampleBuffer) else { - NSLog("[ScreenShareAudio] Converter: no format description in CMSampleBuffer") return nil } guard let asbdPtr = CMAudioFormatDescriptionGetStreamBasicDescription(formatDescription) else { - NSLog("[ScreenShareAudio] Converter: no ASBD in format description") return nil } guard let avFormat = AVAudioFormat(streamDescription: asbdPtr) else { - NSLog("[ScreenShareAudio] Converter: failed to create AVAudioFormat from ASBD") return nil } @@ -94,7 +91,6 @@ final class ScreenShareAudioConverter { let bytesPerFrame = Int(avFormat.streamDescription.pointee.mBytesPerFrame) memcpy(int16Data[0], dataPointer, min(totalLength, Int(frameCount) * bytesPerFrame)) } else { - NSLog("[ScreenShareAudio] Converter: unsupported PCM format (no float or int16 channel data)") return nil } @@ -129,7 +125,6 @@ final class ScreenShareAudioConverter { } guard let converter = converter else { - NSLog("[ScreenShareAudio] Converter: AVAudioConverter creation failed") return nil } @@ -154,8 +149,7 @@ final class ScreenShareAudioConverter { } } - if let error = error { - NSLog("[ScreenShareAudio] Converter: conversion error: \(error.localizedDescription)") + if error != nil { return nil } diff --git a/ios/RCTWebRTC/Utils/AudioDeviceModule/ScreenShareAudioMixer.swift b/ios/RCTWebRTC/Utils/AudioDeviceModule/ScreenShareAudioMixer.swift index 5af2dc686..011f9d46a 100644 --- a/ios/RCTWebRTC/Utils/AudioDeviceModule/ScreenShareAudioMixer.swift +++ b/ios/RCTWebRTC/Utils/AudioDeviceModule/ScreenShareAudioMixer.swift @@ -44,24 +44,10 @@ import WebRTC /// Whether our nodes are currently attached to the engine. private var nodesAttached = false - // MARK: - Diagnostics - - private var enqueueCallCount: Int = 0 - private var enqueueScheduledCount: Int = 0 - private var enqueueSilenceCount: Int = 0 - private var enqueuePcmFailCount: Int = 0 - private var enqueueConvFailCount: Int = 0 - private var formatLogged = false - // MARK: - Init @objc public override init() { super.init() - NSLog("[ScreenShareAudio] Mixer instance created (graph approach)") - } - - deinit { - NSLog("[ScreenShareAudio] Mixer instance deallocated!") } // MARK: - AudioGraphConfigurationDelegate @@ -75,10 +61,7 @@ import WebRTC currentEngine = engine graphFormat = format - guard isMixing else { - NSLog("[ScreenShareAudio] onConfigureInputFromSource: not mixing, skipping graph modification") - return - } + guard isMixing else { return } attachAndWireNodes(engine: engine, source: source, destination: destination, format: format) } @@ -105,37 +88,15 @@ import WebRTC /// already fired), this triggers an ADM reconfiguration so the graph gets /// rewired with our nodes. @objc public func startMixing() { - guard !isMixing else { - NSLog("[ScreenShareAudio] startMixing called but already mixing") - return - } + guard !isMixing else { return } isMixing = true - - // Reset diagnostic counters - enqueueCallCount = 0 - enqueueScheduledCount = 0 - enqueueSilenceCount = 0 - enqueuePcmFailCount = 0 - enqueueConvFailCount = 0 - formatLogged = false - - NSLog("[ScreenShareAudio] startMixing (graphFormat=%@)", - graphFormat != nil ? "\(graphFormat!.sampleRate)Hz/\(graphFormat!.channelCount)ch" : "nil") } /// Stop audio mixing and detach nodes from the engine. @objc public func stopMixing() { - guard isMixing else { - NSLog("[ScreenShareAudio] stopMixing called but not mixing") - return - } + guard isMixing else { return } isMixing = false - NSLog("[ScreenShareAudio] stopMixing — FINAL STATS: enqueue=%d (scheduled=%d, silence=%d, pcmFail=%d, convFail=%d)", - enqueueCallCount, enqueueScheduledCount, enqueueSilenceCount, - enqueuePcmFailCount, enqueueConvFailCount) - - // Stop player and detach nodes playerNode.stop() if let engine = currentEngine { detachNodes(from: engine) @@ -145,37 +106,13 @@ import WebRTC /// Receive a screen audio CMSampleBuffer from InAppScreenCapturer. @objc public func enqueue(_ sampleBuffer: CMSampleBuffer) { - guard isMixing else { return } - - guard let targetFormat = graphFormat else { - return - } - - enqueueCallCount += 1 + guard isMixing, let targetFormat = graphFormat else { return } // 1. CMSampleBuffer → AVAudioPCMBuffer - guard let pcm = audioConverter.pcmBuffer(from: sampleBuffer) else { - enqueuePcmFailCount += 1 - if enqueuePcmFailCount <= 5 { - NSLog("[ScreenShareAudio] ENQUEUE: pcmBuffer extraction failed (count=%d)", enqueuePcmFailCount) - } - return - } - - // One-time format logging - if !formatLogged { - formatLogged = true - let srcFmt = pcm.format - NSLog("[ScreenShareAudio] ENQUEUE FORMAT: screen=%gHz/%dch → target=%gHz/%dch", - srcFmt.sampleRate, srcFmt.channelCount, - targetFormat.sampleRate, targetFormat.channelCount) - } + guard let pcm = audioConverter.pcmBuffer(from: sampleBuffer) else { return } // 2. Silence detection - if ScreenShareAudioConverter.isSilent(pcm) { - enqueueSilenceCount += 1 - return - } + if ScreenShareAudioConverter.isSilent(pcm) { return } // 3. Convert to graph format (e.g. 48 kHz / 1 ch / float32) let buffer: AVAudioPCMBuffer @@ -183,36 +120,20 @@ import WebRTC || pcm.format.channelCount != targetFormat.channelCount || pcm.format.commonFormat != targetFormat.commonFormat || pcm.format.isInterleaved != targetFormat.isInterleaved { - guard let converted = audioConverter.convertIfRequired(pcm, to: targetFormat) else { - enqueueConvFailCount += 1 - if enqueueConvFailCount <= 5 { - NSLog("[ScreenShareAudio] ENQUEUE: conversion failed (count=%d)", enqueueConvFailCount) - } - return - } + guard let converted = audioConverter.convertIfRequired(pcm, to: targetFormat) else { return } buffer = converted } else { buffer = pcm } // 4. Schedule on player node - guard nodesAttached else { - return - } + guard nodesAttached else { return } playerNode.scheduleBuffer(buffer) - enqueueScheduledCount += 1 - // Start playback if not already playing if !playerNode.isPlaying { playerNode.play() } - - // Periodic stats (every ~50 buffers ≈ ~1s) - if enqueueScheduledCount % 50 == 0 { - NSLog("[ScreenShareAudio] ENQUEUE stats: calls=%d, scheduled=%d, silence=%d", - enqueueCallCount, enqueueScheduledCount, enqueueSilenceCount) - } } // MARK: - Private graph management @@ -223,13 +144,11 @@ import WebRTC destination: AVAudioNode, format: AVAudioFormat ) { - // Detach if previously attached (e.g., engine reconfiguration) detachNodes(from: engine) engine.attach(mixerNode) engine.attach(playerNode) - // Wire: source → mixerNode → destination if let source = source { engine.connect(source, to: mixerNode, format: format) } @@ -237,18 +156,13 @@ import WebRTC engine.connect(mixerNode, to: destination, format: format) nodesAttached = true - NSLog("[ScreenShareAudio] Graph wired: source(%@) → mixer → destination, format=%gHz/%dch", - source != nil ? "VP" : "nil", format.sampleRate, format.channelCount) } private func detachNodes(from engine: AVAudioEngine) { guard nodesAttached else { return } - // Detaching automatically disconnects all connections engine.detach(playerNode) engine.detach(mixerNode) nodesAttached = false - - NSLog("[ScreenShareAudio] Nodes detached from engine") } } From 9fa28d3e720671aca69ac3985e1b0863291fe11e Mon Sep 17 00:00:00 2001 From: Artem Grintsevich Date: Wed, 11 Mar 2026 11:21:55 +0100 Subject: [PATCH 08/14] chore: added in app screen sharing restoration --- ios/RCTWebRTC/InAppScreenCapturer.m | 56 +++++++++++++++++++++-------- 1 file changed, 42 insertions(+), 14 deletions(-) diff --git a/ios/RCTWebRTC/InAppScreenCapturer.m b/ios/RCTWebRTC/InAppScreenCapturer.m index 8eb6cb3aa..cf14a938b 100644 --- a/ios/RCTWebRTC/InAppScreenCapturer.m +++ b/ios/RCTWebRTC/InAppScreenCapturer.m @@ -9,20 +9,11 @@ @implementation InAppScreenCapturer { BOOL _capturing; BOOL _shouldResumeOnForeground; + BOOL _observingAppState; } - (instancetype)initWithDelegate:(__weak id)delegate { self = [super initWithDelegate:delegate]; - if (self) { - // [[NSNotificationCenter defaultCenter] addObserver:self - // selector:@selector(appDidBecomeActive) - // name:UIApplicationDidBecomeActiveNotification - // object:nil]; - // [[NSNotificationCenter defaultCenter] addObserver:self - // selector:@selector(appWillResignActive) - // name:UIApplicationWillResignActiveNotification - // object:nil]; - } return self; } @@ -62,10 +53,20 @@ - (void)startRPScreenRecorder { break; } } completionHandler:^(NSError * _Nullable error) { + __strong __typeof__(weakSelf) strongSelf = weakSelf; + if (!strongSelf) return; + if (error) { NSLog(@"[InAppScreenCapturer] startCapture failed: %@", error.localizedDescription); - [weakSelf.eventsDelegate capturerDidEnd:weakSelf]; + strongSelf->_capturing = NO; + [strongSelf.eventsDelegate capturerDidEnd:strongSelf]; + return; } + + // Capture started successfully — register for app lifecycle events. + // Done here (not in startCapture) so the RPScreenRecorder permission + // dialog doesn't trigger appWillResignActive before capture begins. + [strongSelf registerAppStateObservers]; }]; } @@ -94,7 +95,7 @@ - (void)stopCapture { _shouldResumeOnForeground = NO; self.audioBufferHandler = nil; - [[NSNotificationCenter defaultCenter] removeObserver:self]; + [self unregisterAppStateObservers]; [[RPScreenRecorder sharedRecorder] stopCaptureWithHandler:^(NSError * _Nullable error) { if (error) { @@ -105,6 +106,34 @@ - (void)stopCapture { #pragma mark - App Lifecycle +- (void)registerAppStateObservers { + if (_observingAppState) return; + _observingAppState = YES; + + dispatch_async(dispatch_get_main_queue(), ^{ + [[NSNotificationCenter defaultCenter] addObserver:self + selector:@selector(appDidBecomeActive) + name:UIApplicationDidBecomeActiveNotification + object:nil]; + [[NSNotificationCenter defaultCenter] addObserver:self + selector:@selector(appWillResignActive) + name:UIApplicationWillResignActiveNotification + object:nil]; + }); +} + +- (void)unregisterAppStateObservers { + if (!_observingAppState) return; + _observingAppState = NO; + + [[NSNotificationCenter defaultCenter] removeObserver:self + name:UIApplicationDidBecomeActiveNotification + object:nil]; + [[NSNotificationCenter defaultCenter] removeObserver:self + name:UIApplicationWillResignActiveNotification + object:nil]; +} + - (void)appWillResignActive { if (_capturing) { _shouldResumeOnForeground = YES; @@ -120,13 +149,12 @@ - (void)appWillResignActive { - (void)appDidBecomeActive { if (_shouldResumeOnForeground && _capturing) { _shouldResumeOnForeground = NO; - NSLog(@"[InAppScreenCapturer] Resuming capture after returning to foreground"); [self startRPScreenRecorder]; } } - (void)dealloc { - [[NSNotificationCenter defaultCenter] removeObserver:self]; + [self unregisterAppStateObservers]; if (_capturing) { _capturing = NO; self.audioBufferHandler = nil; From b099459ff205c884aeda98a456927560837683fa Mon Sep 17 00:00:00 2001 From: Artem Grintsevich Date: Thu, 19 Mar 2026 17:32:18 +0100 Subject: [PATCH 09/14] chore: code cleanup --- .../audio/AudioProcessingController.java | 13 +++---------- ios/RCTWebRTC/WebRTCModule+RTCMediaStream.m | 13 ------------- ios/RCTWebRTC/WebRTCModule.m | 12 +----------- ios/RCTWebRTC/WebRTCModuleOptions.h | 10 ++-------- ios/RCTWebRTC/WebRTCModuleOptions.m | 7 ------- 5 files changed, 6 insertions(+), 49 deletions(-) diff --git a/android/src/main/java/com/oney/WebRTCModule/audio/AudioProcessingController.java b/android/src/main/java/com/oney/WebRTCModule/audio/AudioProcessingController.java index 17ae78679..9444eb781 100644 --- a/android/src/main/java/com/oney/WebRTCModule/audio/AudioProcessingController.java +++ b/android/src/main/java/com/oney/WebRTCModule/audio/AudioProcessingController.java @@ -17,20 +17,13 @@ public class AudioProcessingController implements AudioProcessingFactoryProvider public ExternalAudioProcessingFactory externalAudioProcessingFactory; public AudioProcessingController() { - // ExternalAudioProcessingFactory creation is deferred to getFactory() - // because its constructor calls JNI native methods that require the - // WebRTC native library to be loaded first (via PeerConnectionFactory.initialize()). - // This allows AudioProcessingController to be safely instantiated in - // MainApplication.onCreate() before the native library is loaded. + this.externalAudioProcessingFactory = new ExternalAudioProcessingFactory(); + this.externalAudioProcessingFactory.setCapturePostProcessing(capturePostProcessing); + this.externalAudioProcessingFactory.setRenderPreProcessing(renderPreProcessing); } @Override public AudioProcessingFactory getFactory() { - if (this.externalAudioProcessingFactory == null) { - this.externalAudioProcessingFactory = new ExternalAudioProcessingFactory(); - this.externalAudioProcessingFactory.setCapturePostProcessing(capturePostProcessing); - this.externalAudioProcessingFactory.setRenderPreProcessing(renderPreProcessing); - } return this.externalAudioProcessingFactory; } } \ No newline at end of file diff --git a/ios/RCTWebRTC/WebRTCModule+RTCMediaStream.m b/ios/RCTWebRTC/WebRTCModule+RTCMediaStream.m index 92edc3eef..c4562df70 100644 --- a/ios/RCTWebRTC/WebRTCModule+RTCMediaStream.m +++ b/ios/RCTWebRTC/WebRTCModule+RTCMediaStream.m @@ -224,19 +224,6 @@ - (RTCVideoTrack *)createScreenCaptureVideoTrack { // Store weak reference for audio mixing wiring options.activeInAppScreenCapturer = capturer; - // If audio mixing is requested, set up the audio buffer handler. - // The handler forwards .audioApp CMSampleBuffers to the mixer's enqueue method. - // The mixer may not exist yet (created by startScreenShareAudioMixing), - // so we check at each callback invocation. - if (options.includeScreenShareAudio) { - capturer.audioBufferHandler = ^(CMSampleBufferRef sampleBuffer) { - ScreenShareAudioMixer *mixer = [WebRTCModuleOptions sharedInstance].screenShareAudioMixer; - if (mixer) { - [mixer enqueue:sampleBuffer]; - } - }; - } - captureController = controller; } else { // Existing broadcast extension path diff --git a/ios/RCTWebRTC/WebRTCModule.m b/ios/RCTWebRTC/WebRTCModule.m index dd07e312b..9d417cc7d 100644 --- a/ios/RCTWebRTC/WebRTCModule.m +++ b/ios/RCTWebRTC/WebRTCModule.m @@ -98,11 +98,6 @@ - (instancetype)init { } RCTLogInfo(@"Using audio processing module: %@", NSStringFromClass([audioProcessingModule class])); - // Store reference to the default APM if it is one. - if ([audioProcessingModule isKindOfClass:[RTCDefaultAudioProcessingModule class]]) { - options.defaultAudioProcessingModule = (RTCDefaultAudioProcessingModule *)audioProcessingModule; - } - _peerConnectionFactory = [[RTCPeerConnectionFactory alloc] initWithAudioDeviceModuleType:RTCAudioDeviceModuleTypeAudioEngine bypassVoiceProcessing:NO @@ -115,17 +110,12 @@ - (instancetype)init { decoderFactory:decoderFactory audioDevice:audioDevice]; } else { - // No custom APM provided — create a default one (no capturePostProcessingDelegate needed; - // screen share audio mixing uses the AVAudioEngine graph approach via audioGraphDelegate). - RTCDefaultAudioProcessingModule *defaultAPM = [[RTCDefaultAudioProcessingModule alloc] init]; - options.defaultAudioProcessingModule = defaultAPM; - _peerConnectionFactory = [[RTCPeerConnectionFactory alloc] initWithAudioDeviceModuleType:RTCAudioDeviceModuleTypeAudioEngine bypassVoiceProcessing:NO encoderFactory:encoderFactory decoderFactory:decoderFactory - audioProcessingModule:defaultAPM]; + audioProcessingModule:nil]; } _rtcAudioDeviceModuleObserver = [[AudioDeviceModuleObserver alloc] initWithWebRTCModule:self]; diff --git a/ios/RCTWebRTC/WebRTCModuleOptions.h b/ios/RCTWebRTC/WebRTCModuleOptions.h index c964df4ce..229c4716c 100644 --- a/ios/RCTWebRTC/WebRTCModuleOptions.h +++ b/ios/RCTWebRTC/WebRTCModuleOptions.h @@ -2,8 +2,6 @@ #import @class InAppScreenCapturer; -@class RTCDefaultAudioProcessingModule; - NS_ASSUME_NONNULL_BEGIN // Forward declare the Swift class — the actual import happens in the .m file. @@ -16,10 +14,6 @@ NS_ASSUME_NONNULL_BEGIN @property(nonatomic, strong, nullable) id audioDevice; @property(nonatomic, strong, nullable) id audioProcessingModule; -/// Retained reference to the default audio processing module. -/// Used to dynamically set capturePostProcessingDelegate for screen share audio mixing. -@property(nonatomic, strong, nullable) RTCDefaultAudioProcessingModule *defaultAudioProcessingModule; - @property(nonatomic, strong, nullable) NSDictionary *fieldTrials; @property(nonatomic, assign) RTCLoggingSeverity loggingSeverity; @property(nonatomic, assign) BOOL enableMultitaskingCameraAccess; @@ -31,8 +25,8 @@ NS_ASSUME_NONNULL_BEGIN /// When YES, in-app screen capture will route .audioApp buffers to the audio mixer. @property(nonatomic, assign) BOOL includeScreenShareAudio; -/// The active screen share audio mixer instance. Created by -/// `startScreenShareAudioMixing` and cleared by `stopScreenShareAudioMixing`. +/// The screen share audio mixer instance. Created eagerly during WebRTCModule +/// init and retained for the lifetime of the module (never cleared). @property(nonatomic, strong, nullable) ScreenShareAudioMixer *screenShareAudioMixer; /// Weak reference to the current in-app screen capturer, set during diff --git a/ios/RCTWebRTC/WebRTCModuleOptions.m b/ios/RCTWebRTC/WebRTCModuleOptions.m index f29ae67f9..ba108da6e 100644 --- a/ios/RCTWebRTC/WebRTCModuleOptions.m +++ b/ios/RCTWebRTC/WebRTCModuleOptions.m @@ -1,12 +1,5 @@ #import "WebRTCModuleOptions.h" -// Import Swift-generated header for ScreenShareAudioMixer -#if __has_include() -#import -#elif __has_include("stream_react_native_webrtc-Swift.h") -#import "stream_react_native_webrtc-Swift.h" -#endif - @implementation WebRTCModuleOptions #pragma mark - This class is a singleton From 160d8b113b7b15e7486a33a8f323afe9b69124c9 Mon Sep 17 00:00:00 2001 From: Artem Grintsevich Date: Mon, 23 Mar 2026 14:59:12 +0100 Subject: [PATCH 10/14] chore: made mixer initialization lazy --- .../Utils/AudioDeviceModule/AudioDeviceModule.swift | 2 +- ios/RCTWebRTC/WebRTCModule.m | 10 ---------- ios/RCTWebRTC/WebRTCModuleOptions.h | 7 ------- 3 files changed, 1 insertion(+), 18 deletions(-) diff --git a/ios/RCTWebRTC/Utils/AudioDeviceModule/AudioDeviceModule.swift b/ios/RCTWebRTC/Utils/AudioDeviceModule/AudioDeviceModule.swift index 23a728760..854f756f2 100644 --- a/ios/RCTWebRTC/Utils/AudioDeviceModule/AudioDeviceModule.swift +++ b/ios/RCTWebRTC/Utils/AudioDeviceModule/AudioDeviceModule.swift @@ -181,7 +181,7 @@ import WebRTC /// Delegate that receives synchronous input graph configuration callbacks. /// Used by `ScreenShareAudioMixer` to modify the engine graph during mixing. - @objc public weak var audioGraphDelegate: AudioGraphConfigurationDelegate? + @objc public var audioGraphDelegate: AudioGraphConfigurationDelegate? /// Cached input context from the last `configureInputFromSource` callback. /// These allow `startMixing` to configure the graph immediately when the diff --git a/ios/RCTWebRTC/WebRTCModule.m b/ios/RCTWebRTC/WebRTCModule.m index 9d417cc7d..0c14160d1 100644 --- a/ios/RCTWebRTC/WebRTCModule.m +++ b/ios/RCTWebRTC/WebRTCModule.m @@ -86,12 +86,6 @@ - (instancetype)init { RCTLogInfo(@"Using video encoder factory: %@", NSStringFromClass([encoderFactory class])); RCTLogInfo(@"Using video decoder factory: %@", NSStringFromClass([decoderFactory class])); - // Always create the screen share audio mixer eagerly. - // It stays dormant (isMixing=false) until startMixing is called. - // It will be wired as audioGraphDelegate on the ADM after factory creation. - ScreenShareAudioMixer *mixer = [[ScreenShareAudioMixer alloc] init]; - options.screenShareAudioMixer = mixer; - if (audioProcessingModule != nil) { if (audioDevice != nil) { NSLog(@"Both audioProcessingModule and audioDevice are provided, but only one can be used. Ignoring audioDevice."); @@ -122,10 +116,6 @@ - (instancetype)init { _audioDeviceModule = [[AudioDeviceModule alloc] initWithSource:_peerConnectionFactory.audioDeviceModule delegateObserver:_rtcAudioDeviceModuleObserver]; - // Wire the mixer as the audio graph delegate so it receives - // onConfigureInputFromSource callbacks to modify the engine graph. - _audioDeviceModule.audioGraphDelegate = mixer; - _peerConnections = [NSMutableDictionary new]; _localStreams = [NSMutableDictionary new]; _localTracks = [NSMutableDictionary new]; diff --git a/ios/RCTWebRTC/WebRTCModuleOptions.h b/ios/RCTWebRTC/WebRTCModuleOptions.h index 229c4716c..b363cc4ff 100644 --- a/ios/RCTWebRTC/WebRTCModuleOptions.h +++ b/ios/RCTWebRTC/WebRTCModuleOptions.h @@ -4,9 +4,6 @@ @class InAppScreenCapturer; NS_ASSUME_NONNULL_BEGIN -// Forward declare the Swift class — the actual import happens in the .m file. -@class ScreenShareAudioMixer; - @interface WebRTCModuleOptions : NSObject @property(nonatomic, strong, nullable) id videoDecoderFactory; @@ -25,10 +22,6 @@ NS_ASSUME_NONNULL_BEGIN /// When YES, in-app screen capture will route .audioApp buffers to the audio mixer. @property(nonatomic, assign) BOOL includeScreenShareAudio; -/// The screen share audio mixer instance. Created eagerly during WebRTCModule -/// init and retained for the lifetime of the module (never cleared). -@property(nonatomic, strong, nullable) ScreenShareAudioMixer *screenShareAudioMixer; - /// Weak reference to the current in-app screen capturer, set during /// `createScreenCaptureVideoTrack` when in-app mode is used. @property(nonatomic, weak, nullable) InAppScreenCapturer *activeInAppScreenCapturer; From 351879c3f680bff66acdbfee48fb533f0d8a4a05 Mon Sep 17 00:00:00 2001 From: Artem Grintsevich Date: Mon, 23 Mar 2026 18:17:11 +0100 Subject: [PATCH 11/14] chore: pr comment fix --- ios/RCTWebRTC/Utils/AudioDeviceModule/AudioDeviceModule.swift | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ios/RCTWebRTC/Utils/AudioDeviceModule/AudioDeviceModule.swift b/ios/RCTWebRTC/Utils/AudioDeviceModule/AudioDeviceModule.swift index 854f756f2..474bbdd7f 100644 --- a/ios/RCTWebRTC/Utils/AudioDeviceModule/AudioDeviceModule.swift +++ b/ios/RCTWebRTC/Utils/AudioDeviceModule/AudioDeviceModule.swift @@ -285,7 +285,7 @@ import WebRTC /// - Parameter isBypassed: `true` to bypass voice processing, `false` to restore. @objc public func setVoiceProcessingBypassed(_ isBypassed: Bool) { source.isVoiceProcessingBypassed = isBypassed - NSLog("[AudioDeviceModule] setVoiceProcessingBypassed: %@", isBypassed ? "YES" : "NO") + isVoiceProcessingBypassedSubject.send(isBypassed) } /// Starts or stops speaker playout on the ADM, retrying transient failures. From 7e4c0155f8d326a309b8666088637ad04b53d988 Mon Sep 17 00:00:00 2001 From: Artem Grintsevich Date: Mon, 23 Mar 2026 18:21:01 +0100 Subject: [PATCH 12/14] chore: audio capture improvement --- .../AudioDeviceModule/AudioDeviceModule.swift | 39 +-- .../AudioGraphConfigurationDelegate.swift | 38 --- .../ScreenShare/AudioRingBuffer.swift | 115 +++++++++ .../ScreenShareAudioConverter.swift | 233 ++++++++++++++++++ .../ScreenShare/ScreenShareAudioMixer.swift | 143 +++++++++++ .../ScreenShareAudioConverter.swift | 202 --------------- .../ScreenShareAudioMixer.swift | 168 ------------- ios/RCTWebRTC/WebRTCModule.m | 11 + 8 files changed, 506 insertions(+), 443 deletions(-) delete mode 100644 ios/RCTWebRTC/Utils/AudioDeviceModule/AudioGraphConfigurationDelegate.swift create mode 100644 ios/RCTWebRTC/Utils/AudioDeviceModule/ScreenShare/AudioRingBuffer.swift create mode 100644 ios/RCTWebRTC/Utils/AudioDeviceModule/ScreenShare/ScreenShareAudioConverter.swift create mode 100644 ios/RCTWebRTC/Utils/AudioDeviceModule/ScreenShare/ScreenShareAudioMixer.swift delete mode 100644 ios/RCTWebRTC/Utils/AudioDeviceModule/ScreenShareAudioConverter.swift delete mode 100644 ios/RCTWebRTC/Utils/AudioDeviceModule/ScreenShareAudioMixer.swift diff --git a/ios/RCTWebRTC/Utils/AudioDeviceModule/AudioDeviceModule.swift b/ios/RCTWebRTC/Utils/AudioDeviceModule/AudioDeviceModule.swift index 474bbdd7f..c71dd7cbd 100644 --- a/ios/RCTWebRTC/Utils/AudioDeviceModule/AudioDeviceModule.swift +++ b/ios/RCTWebRTC/Utils/AudioDeviceModule/AudioDeviceModule.swift @@ -179,16 +179,10 @@ import WebRTC /// Strong reference to the current engine so we can introspect it if needed. @objc public var engine: AVAudioEngine? - /// Delegate that receives synchronous input graph configuration callbacks. - /// Used by `ScreenShareAudioMixer` to modify the engine graph during mixing. - @objc public var audioGraphDelegate: AudioGraphConfigurationDelegate? - - /// Cached input context from the last `configureInputFromSource` callback. - /// These allow `startMixing` to configure the graph immediately when the - /// engine is already running, without waiting for the next callback. - @objc public private(set) weak var lastInputSource: AVAudioNode? - @objc public private(set) weak var lastInputDestination: AVAudioNode? - @objc public private(set) var lastInputFormat: AVAudioFormat? + /// Screen share audio mixer. Implements `RTCAudioCustomProcessingDelegate` + /// and is set as `capturePostProcessingDelegate` on the + /// `RTCDefaultAudioProcessingModule` when screen share audio mixing starts. + @objc public let screenShareAudioMixer = ScreenShareAudioMixer() /// Secondary observer that receives forwarded delegate callbacks. /// This allows the AudioDeviceModuleObserver to receive events and forward them to JS. @@ -239,7 +233,6 @@ import WebRTC .eraseToAnyPublisher() super.init() - _ = source.setMuteMode(.inputMixer) audioLevelsAdapter.subject = audioLevelSubject source.observer = self } @@ -465,8 +458,6 @@ import WebRTC isPlayoutEnabled: Bool, isRecordingEnabled: Bool ) -> Int { - audioGraphDelegate?.onDidStopEngine?(engine) - subject.send( .didStopAudioEngine( engine, @@ -496,8 +487,6 @@ import WebRTC isPlayoutEnabled: Bool, isRecordingEnabled: Bool ) -> Int { - audioGraphDelegate?.onDidDisableEngine?(engine) - subject.send( .didDisableAudioEngine( engine, @@ -524,14 +513,7 @@ import WebRTC _ audioDeviceModule: RTCAudioDeviceModule, willReleaseEngine engine: AVAudioEngine ) -> Int { - // Notify delegate BEFORE clearing cached context so it can - // tear down its graph while references are still valid. - audioGraphDelegate?.onWillReleaseEngine?(engine) - self.engine = nil - lastInputSource = nil - lastInputDestination = nil - lastInputFormat = nil subject.send(.willReleaseAudioEngine(engine)) audioLevelsAdapter.uninstall(on: 0) @@ -551,11 +533,6 @@ import WebRTC format: AVAudioFormat, context: [AnyHashable: Any] ) -> Int { - // Cache the input context for on-demand use by ScreenShareAudioMixer. - lastInputSource = source - lastInputDestination = destination - lastInputFormat = format - subject.send( .configureInputFromSource( engine, @@ -566,14 +543,6 @@ import WebRTC ) // Notify the audio graph delegate synchronously — this must happen - // BEFORE the audio levels tap so the mixer can modify the graph first. - audioGraphDelegate?.onConfigureInputFromSource( - engine, - source: source, - destination: destination, - format: format - ) - audioLevelsAdapter.installInputTap( on: destination, format: format, diff --git a/ios/RCTWebRTC/Utils/AudioDeviceModule/AudioGraphConfigurationDelegate.swift b/ios/RCTWebRTC/Utils/AudioDeviceModule/AudioGraphConfigurationDelegate.swift deleted file mode 100644 index 3d5a372e1..000000000 --- a/ios/RCTWebRTC/Utils/AudioDeviceModule/AudioGraphConfigurationDelegate.swift +++ /dev/null @@ -1,38 +0,0 @@ -// -// Copyright © 2026 Stream.io Inc. All rights reserved. -// - -import AVFoundation - -/// Protocol that allows external code to hook into AVAudioEngine lifecycle -/// events synchronously. Callbacks fire on WebRTC's audio thread. -/// -/// Implementations must perform any AVAudioEngine graph modifications -/// synchronously within the callback — async dispatch will race with -/// WebRTC's `ConfigureVoiceProcessingNode`. -@objc public protocol AudioGraphConfigurationDelegate: AnyObject { - - /// Called when WebRTC (re)configures the engine's input graph. - /// This fires during engine setup, **before** `willStartEngine`. - /// - /// - Parameters: - /// - engine: The current `AVAudioEngine` instance. - /// - source: The upstream node (VP input), or `nil` when voice processing is disabled. - /// - destination: The node that receives the input stream (WebRTC capture mixer). - /// - format: The expected audio format for the input path. - func onConfigureInputFromSource( - _ engine: AVAudioEngine, - source: AVAudioNode?, - destination: AVAudioNode, - format: AVAudioFormat - ) - - /// Called when the engine is about to be released/deallocated. - @objc optional func onWillReleaseEngine(_ engine: AVAudioEngine) - - /// Called after the engine has fully stopped. - @objc optional func onDidStopEngine(_ engine: AVAudioEngine) - - /// Called after the engine has been disabled. - @objc optional func onDidDisableEngine(_ engine: AVAudioEngine) -} diff --git a/ios/RCTWebRTC/Utils/AudioDeviceModule/ScreenShare/AudioRingBuffer.swift b/ios/RCTWebRTC/Utils/AudioDeviceModule/ScreenShare/AudioRingBuffer.swift new file mode 100644 index 000000000..10e589ff1 --- /dev/null +++ b/ios/RCTWebRTC/Utils/AudioDeviceModule/ScreenShare/AudioRingBuffer.swift @@ -0,0 +1,115 @@ +// +// Copyright © 2026 Stream.io Inc. All rights reserved. +// + +import Darwin +import Foundation + +/// Thread-safe single-producer single-consumer ring buffer for Float32 audio samples. +/// +/// Uses `os_unfair_lock` for minimal-overhead synchronization between the +/// ReplayKit callback thread (writer) and the audio render thread (reader). +/// The lock is uncontended in the vast majority of cases (different cadences), +/// making it suitable for real-time audio contexts. +final class AudioRingBuffer { + + private let buffer: UnsafeMutablePointer + private let capacity: Int + private var writePos: Int = 0 + private var readPos: Int = 0 + private var lock = os_unfair_lock_s() + + /// Creates a ring buffer with the given capacity in frames. + /// - Parameter capacity: Maximum number of Float32 samples the buffer can hold. + init(capacity: Int) { + self.capacity = capacity + self.buffer = .allocate(capacity: capacity) + self.buffer.initialize(repeating: 0, count: capacity) + } + + deinit { + buffer.deallocate() + } + + /// Number of frames available to read (thread-safe). + var availableToRead: Int { + os_unfair_lock_lock(&lock) + let result = _availableToRead + os_unfair_lock_unlock(&lock) + return result + } + + // MARK: - Internal (lock held) + + private var _availableToRead: Int { + let w = writePos + let r = readPos + return (w >= r) ? (w - r) : (capacity - r + w) + } + + private var _availableToWrite: Int { + // Reserve 1 slot to distinguish full from empty. + return capacity - 1 - _availableToRead + } + + // MARK: - Producer API (ReplayKit thread) + + /// Writes up to `count` samples from `source` into the ring buffer. + /// - Returns: The number of samples actually written (may be less if buffer is full). + @discardableResult + func write(_ source: UnsafePointer, count: Int) -> Int { + os_unfair_lock_lock(&lock) + defer { os_unfair_lock_unlock(&lock) } + + let toWrite = min(count, _availableToWrite) + guard toWrite > 0 else { return 0 } + + let w = writePos + let firstPart = min(toWrite, capacity - w) + let secondPart = toWrite - firstPart + + memcpy(buffer.advanced(by: w), source, firstPart * MemoryLayout.size) + if secondPart > 0 { + memcpy(buffer, source.advanced(by: firstPart), secondPart * MemoryLayout.size) + } + + writePos = (w + toWrite) % capacity + return toWrite + } + + // MARK: - Consumer API (audio render thread) + + /// Reads up to `count` samples into `destination` from the ring buffer. + /// - Returns: The number of samples actually read (may be less if buffer is empty). + @discardableResult + func read(into destination: UnsafeMutablePointer, count: Int) -> Int { + os_unfair_lock_lock(&lock) + defer { os_unfair_lock_unlock(&lock) } + + let toRead = min(count, _availableToRead) + guard toRead > 0 else { return 0 } + + let r = readPos + let firstPart = min(toRead, capacity - r) + let secondPart = toRead - firstPart + + memcpy(destination, buffer.advanced(by: r), firstPart * MemoryLayout.size) + if secondPart > 0 { + memcpy(destination.advanced(by: firstPart), buffer, secondPart * MemoryLayout.size) + } + + readPos = (r + toRead) % capacity + return toRead + } + + // MARK: - Reset + + /// Clears all buffered data. Call when not concurrently accessed by both + /// producer and consumer, or when it is acceptable to lose data. + func reset() { + os_unfair_lock_lock(&lock) + writePos = 0 + readPos = 0 + os_unfair_lock_unlock(&lock) + } +} diff --git a/ios/RCTWebRTC/Utils/AudioDeviceModule/ScreenShare/ScreenShareAudioConverter.swift b/ios/RCTWebRTC/Utils/AudioDeviceModule/ScreenShare/ScreenShareAudioConverter.swift new file mode 100644 index 000000000..bc55fb988 --- /dev/null +++ b/ios/RCTWebRTC/Utils/AudioDeviceModule/ScreenShare/ScreenShareAudioConverter.swift @@ -0,0 +1,233 @@ +// +// Copyright © 2026 Stream.io Inc. All rights reserved. +// + +import Accelerate +import AudioToolbox +import AVFoundation +import CoreMedia + +/// Converts RPScreenRecorder `.audioApp` CMSampleBuffers into +/// `AVAudioPCMBuffer`s suitable for scheduling on an `AVAudioPlayerNode`. +/// +/// Handles: +/// - CMSampleBuffer → AVAudioPCMBuffer extraction via `CMSampleBufferCopyPCMDataIntoAudioBufferList` +/// - Sample rate / channel / format conversion via cached AVAudioConverter +/// - Silence detection via vDSP RMS analysis +final class ScreenShareAudioConverter { + + // MARK: - Constants + + /// Buffers with RMS below this threshold (in dB) are considered silent. + private static let silenceThresholdDB: Float = -60.0 + + // MARK: - Cached converter + + private var converter: AVAudioConverter? + private var converterInputFormat: AVAudioFormat? + private var converterOutputFormat: AVAudioFormat? + + // MARK: - CMSampleBuffer → AVAudioPCMBuffer + + /// Extracts audio data from a `CMSampleBuffer` into an `AVAudioPCMBuffer` + /// using Apple's `CMSampleBufferCopyPCMDataIntoAudioBufferList`. + /// + /// Matches the Swift SDK's `AVAudioPCMBuffer.from(_:)` implementation. + func pcmBuffer(from sampleBuffer: CMSampleBuffer) -> AVAudioPCMBuffer? { + guard let formatDescription = CMSampleBufferGetFormatDescription(sampleBuffer), + let asbd = CMAudioFormatDescriptionGetStreamBasicDescription(formatDescription) else { + return nil + } + + // Only linear PCM can be copied into AVAudioPCMBuffer. + guard asbd.pointee.mFormatID == kAudioFormatLinearPCM else { + return nil + } + + // Inspect format flags to build the correct AVAudioFormat. + let formatFlags = asbd.pointee.mFormatFlags + let isFloat = (formatFlags & kAudioFormatFlagIsFloat) != 0 + let isSignedInt = (formatFlags & kAudioFormatFlagIsSignedInteger) != 0 + let isBigEndian = (formatFlags & kAudioFormatFlagIsBigEndian) != 0 + let isInterleaved = (formatFlags & kAudioFormatFlagIsNonInterleaved) == 0 + let bitsPerChannel = Int(asbd.pointee.mBitsPerChannel) + + // Choose an AVAudioCommonFormat compatible with the sample format. + let commonFormat: AVAudioCommonFormat + if isFloat, bitsPerChannel == 32 { + commonFormat = .pcmFormatFloat32 + } else if isSignedInt, bitsPerChannel == 16 { + commonFormat = .pcmFormatInt16 + } else { + return nil + } + + // Build AVAudioFormat from explicit parameters (not streamDescription) + // to ensure consistent format identity for downstream comparisons. + guard let inputFormat = AVAudioFormat( + commonFormat: commonFormat, + sampleRate: asbd.pointee.mSampleRate, + channels: asbd.pointee.mChannelsPerFrame, + interleaved: isInterleaved + ) else { + return nil + } + + let frameCount = AVAudioFrameCount(CMSampleBufferGetNumSamples(sampleBuffer)) + guard frameCount > 0, + let pcmBuffer = AVAudioPCMBuffer(pcmFormat: inputFormat, frameCapacity: frameCount) else { + return nil + } + + pcmBuffer.frameLength = frameCount + + let bytesPerFrame = Int(asbd.pointee.mBytesPerFrame) + guard bytesPerFrame > 0 else { + return nil + } + + // Prepare the destination AudioBufferList with correct byte sizes. + let destinationList = UnsafeMutableAudioBufferListPointer(pcmBuffer.mutableAudioBufferList) + let bytesToCopy = Int(frameCount) * bytesPerFrame + for index in 0...size + let intPtr = mData.assumingMemoryBound(to: Int16.self) + for i in 0...size + let intPtr = mData.assumingMemoryBound(to: UInt32.self) + for i in 0.. AVAudioPCMBuffer? { + if formatsMatch(inputBuffer.format, outputFormat) { + return inputBuffer + } + + // Create or reuse converter for current format pair + if converter == nil + || !formatsMatch(converterInputFormat, inputBuffer.format) + || !formatsMatch(converterOutputFormat, outputFormat) { + converter = AVAudioConverter(from: inputBuffer.format, to: outputFormat) + converter?.sampleRateConverterQuality = AVAudioQuality.max.rawValue + converter?.sampleRateConverterAlgorithm = AVSampleRateConverterAlgorithm_Mastering + converterInputFormat = inputBuffer.format + converterOutputFormat = outputFormat + } + + guard let converter = converter else { + return nil + } + + // Calculate output frame capacity from sample rate ratio + let inputFrames = Double(inputBuffer.frameLength) + let ratio = outputFormat.sampleRate / inputBuffer.format.sampleRate + let outputFrameCapacity = AVAudioFrameCount(max(1, ceil(inputFrames * ratio))) + + guard let outputBuffer = AVAudioPCMBuffer(pcmFormat: outputFormat, frameCapacity: outputFrameCapacity) else { + return nil + } + + var error: NSError? + var didProvideData = false + let status = converter.convert(to: outputBuffer, error: &error) { _, outStatus in + if didProvideData { + outStatus.pointee = .noDataNow + return nil + } + guard inputBuffer.frameLength > 0 else { + outStatus.pointee = .noDataNow + return nil + } + didProvideData = true + outStatus.pointee = .haveData + return inputBuffer + } + + if status == .error || error != nil { + return nil + } + + guard outputBuffer.frameLength > 0 else { + return nil + } + + return outputBuffer + } + + // MARK: - Silence detection + + /// Returns `true` if the buffer is silent (RMS below -60 dB). + static func isSilent(_ buffer: AVAudioPCMBuffer) -> Bool { + guard let channelData = buffer.floatChannelData else { + return false + } + + let frameCount = vDSP_Length(buffer.frameLength) + guard frameCount > 0 else { + return true + } + + var rms: Float = 0 + vDSP_rmsqv(channelData[0], 1, &rms, frameCount) + + let rmsDB = 20 * log10(max(rms, Float.ulpOfOne)) + return rmsDB <= silenceThresholdDB + } + + // MARK: - Cleanup + + func reset() { + converter = nil + converterInputFormat = nil + converterOutputFormat = nil + } + + // MARK: - Private + + private func formatsMatch(_ lhs: AVAudioFormat?, _ rhs: AVAudioFormat?) -> Bool { + guard let lhs = lhs, let rhs = rhs else { return false } + return lhs.sampleRate == rhs.sampleRate + && lhs.channelCount == rhs.channelCount + && lhs.commonFormat == rhs.commonFormat + && lhs.isInterleaved == rhs.isInterleaved + } +} diff --git a/ios/RCTWebRTC/Utils/AudioDeviceModule/ScreenShare/ScreenShareAudioMixer.swift b/ios/RCTWebRTC/Utils/AudioDeviceModule/ScreenShare/ScreenShareAudioMixer.swift new file mode 100644 index 000000000..173e6750e --- /dev/null +++ b/ios/RCTWebRTC/Utils/AudioDeviceModule/ScreenShare/ScreenShareAudioMixer.swift @@ -0,0 +1,143 @@ +// +// Copyright © 2026 Stream.io Inc. All rights reserved. +// + +import AVFoundation +import CoreMedia +import WebRTC + +/// Mixes screen share audio into the WebRTC microphone capture stream via +/// `RTCAudioCustomProcessingDelegate` — direct PCM additive mixing in the +/// WebRTC capture post-processing pipeline. +/// +/// Set as `capturePostProcessingDelegate` on `RTCDefaultAudioProcessingModule`. +/// The delegate callback runs after AEC/AGC/NS, so screen audio passes through +/// without echo cancellation interference. +/// +/// ``` +/// RPScreenRecorder → convert → ring buffer → audioProcessingProcess → encoding +/// (44100→48k) (producer) (consumer) +/// ``` +/// +/// **Important:** `RTCAudioBuffer` uses FloatS16 format (Float32 in the Int16 +/// range -32768…32767). Audio from `AVAudioConverter` (normalized -1…1) must +/// be scaled by 32768 before mixing. +@objc public final class ScreenShareAudioMixer: NSObject, RTCAudioCustomProcessingDelegate { + + /// Ring buffer for passing converted audio from the RPScreenRecorder callback + /// thread (producer) to the audio processing thread (consumer). + /// Capacity: 1 second of mono Float32 at 48 kHz. + private let ringBuffer = AudioRingBuffer(capacity: 48000) + private let audioConverter = ScreenShareAudioConverter() + + private var isMixing = false + /// Processing format from `audioProcessingInitialize`. + private var processingSampleRate: Double = 0 + private var processingChannels: Int = 0 + /// Target format for conversion, built from processing parameters. + private var targetFormat: AVAudioFormat? + + /// Scale factor: RTCAudioBuffer uses FloatS16 format (Float32 values in the + /// Int16 range -32768…32767), NOT normalized Float32 (-1…1). + /// AVAudioConverter produces normalized Float32, so we must scale up. + private static let floatS16Scale: Float = 32768.0 + + // MARK: - RTCAudioCustomProcessingDelegate + + /// Called by WebRTC when the processing pipeline initializes or reconfigures. + /// May be called multiple times (e.g., on route changes). + public func audioProcessingInitialize(sampleRate: Int, channels: Int) { + processingSampleRate = Double(sampleRate) + processingChannels = channels + + targetFormat = AVAudioFormat( + commonFormat: .pcmFormatFloat32, + sampleRate: processingSampleRate, + channels: AVAudioChannelCount(channels), + interleaved: false + ) + + ringBuffer.reset() + audioConverter.reset() + } + + /// Called on the audio processing thread for each captured audio chunk. + /// Reads from the ring buffer and ADDs screen audio samples to the mic buffer. + public func audioProcessingProcess(audioBuffer: RTCAudioBuffer) { + guard isMixing else { return } + + let frames = Int(audioBuffer.frames) + let channels = Int(audioBuffer.channels) + guard frames > 0, channels > 0 else { return } + + mixFromRingBuffer(into: audioBuffer, frames: frames, channels: channels) + } + + /// Called when the processing pipeline is released. + public func audioProcessingRelease() { + ringBuffer.reset() + targetFormat = nil + } + + // MARK: - Public API + + /// Enable audio mixing. After this, `enqueue(_:)` writes to the ring buffer + /// and the processing callback reads from it. + @objc public func startMixing() { + guard !isMixing else { return } + ringBuffer.reset() + isMixing = true + } + + /// Stop audio mixing. + @objc public func stopMixing() { + guard isMixing else { return } + isMixing = false + ringBuffer.reset() + audioConverter.reset() + } + + /// Receive a screen audio CMSampleBuffer from InAppScreenCapturer. + /// Converts to the processing format and writes to the ring buffer. + @objc public func enqueue(_ sampleBuffer: CMSampleBuffer) { + guard isMixing, let targetFmt = targetFormat else { return } + + guard let pcm = audioConverter.pcmBuffer(from: sampleBuffer) else { return } + + let buffer: AVAudioPCMBuffer + if pcm.format.sampleRate != targetFmt.sampleRate + || pcm.format.channelCount != targetFmt.channelCount + || pcm.format.commonFormat != targetFmt.commonFormat + || pcm.format.isInterleaved != targetFmt.isInterleaved { + guard let converted = audioConverter.convertIfRequired(pcm, to: targetFmt) else { return } + buffer = converted + } else { + buffer = pcm + } + + if ScreenShareAudioConverter.isSilent(buffer) { return } + + guard let channelData = buffer.floatChannelData else { return } + ringBuffer.write(channelData[0], count: Int(buffer.frameLength)) + } + + // MARK: - Private mixing + + /// Read from ring buffer and ADD to the mic audio buffer (additive mixing). + /// Ring buffer contains normalized Float32 [-1,1] from AVAudioConverter; + /// RTCAudioBuffer uses FloatS16 [-32768,32767], so we scale before adding. + private func mixFromRingBuffer(into audioBuffer: RTCAudioBuffer, frames: Int, channels: Int) { + let tempBuffer = UnsafeMutablePointer.allocate(capacity: frames) + defer { tempBuffer.deallocate() } + + let framesRead = ringBuffer.read(into: tempBuffer, count: frames) + guard framesRead > 0 else { return } + + for ch in 0.. AVAudioPCMBuffer? { - guard let formatDescription = CMSampleBufferGetFormatDescription(sampleBuffer) else { - return nil - } - - guard let asbdPtr = CMAudioFormatDescriptionGetStreamBasicDescription(formatDescription) else { - return nil - } - - guard let avFormat = AVAudioFormat(streamDescription: asbdPtr) else { - return nil - } - - let frameCount = CMSampleBufferGetNumSamples(sampleBuffer) - guard frameCount > 0, - let pcmBuffer = AVAudioPCMBuffer(pcmFormat: avFormat, frameCapacity: AVAudioFrameCount(frameCount)) else { - return nil - } - - pcmBuffer.frameLength = AVAudioFrameCount(frameCount) - - guard let blockBuffer = CMSampleBufferGetDataBuffer(sampleBuffer) else { - return nil - } - - var lengthAtOffset: Int = 0 - var totalLength: Int = 0 - var dataPointer: UnsafeMutablePointer? - - let status = CMBlockBufferGetDataPointer( - blockBuffer, - atOffset: 0, - lengthAtOffsetOut: &lengthAtOffset, - totalLengthOut: &totalLength, - dataPointerOut: &dataPointer - ) - guard status == kCMBlockBufferNoErr, let dataPointer = dataPointer else { - return nil - } - - // Copy audio data into PCM buffer - if let floatData = pcmBuffer.floatChannelData { - let channelCount = Int(avFormat.channelCount) - let bytesPerFrame = Int(avFormat.streamDescription.pointee.mBytesPerFrame) - - if avFormat.isInterleaved { - // Interleaved: single buffer, copy all at once - memcpy(floatData[0], dataPointer, min(totalLength, Int(frameCount) * bytesPerFrame)) - } else { - // Non-interleaved: separate buffers per channel - let framesSize = Int(frameCount) * MemoryLayout.size - for ch in 0.. AVAudioPCMBuffer? { - // Identity optimization: skip conversion when formats match - if formatsMatch(inputBuffer.format, outputFormat) { - return inputBuffer - } - - // Create or reuse converter for current format pair - if converter == nil - || !formatsMatch(converterInputFormat, inputBuffer.format) - || !formatsMatch(converterOutputFormat, outputFormat) { - converter = AVAudioConverter(from: inputBuffer.format, to: outputFormat) - converter?.sampleRateConverterQuality = AVAudioQuality.max.rawValue - converter?.sampleRateConverterAlgorithm = AVSampleRateConverterAlgorithm_Mastering - converterInputFormat = inputBuffer.format - converterOutputFormat = outputFormat - } - - guard let converter = converter else { - return nil - } - - // Calculate output frame capacity from sample rate ratio - let ratio = outputFormat.sampleRate / inputBuffer.format.sampleRate - let outputFrameCapacity = AVAudioFrameCount(ceil(Double(inputBuffer.frameLength) * ratio)) - - guard let outputBuffer = AVAudioPCMBuffer(pcmFormat: outputFormat, frameCapacity: outputFrameCapacity) else { - return nil - } - - var error: NSError? - var hasData = true - converter.convert(to: outputBuffer, error: &error) { _, outStatus in - if hasData { - outStatus.pointee = .haveData - hasData = false - return inputBuffer - } else { - outStatus.pointee = .noDataNow - return nil - } - } - - if error != nil { - return nil - } - - return outputBuffer - } - - // MARK: - Silence detection - - /// Returns `true` if the buffer is silent (RMS below -60 dB). - /// - /// For non-float formats (e.g., int16 from RPScreenRecorder), this returns - /// `false` — silence detection requires float data for vDSP, and these - /// buffers will be converted before scheduling anyway. - static func isSilent(_ buffer: AVAudioPCMBuffer) -> Bool { - guard let channelData = buffer.floatChannelData else { - return false - } - - let frameCount = vDSP_Length(buffer.frameLength) - guard frameCount > 0 else { - return true - } - - var rms: Float = 0 - vDSP_rmsqv(channelData[0], 1, &rms, frameCount) - - let rmsDB = 20 * log10(max(rms, Float.ulpOfOne)) - return rmsDB <= silenceThresholdDB - } - - // MARK: - Cleanup - - func reset() { - converter = nil - converterInputFormat = nil - converterOutputFormat = nil - } - - // MARK: - Private - - /// Compares two formats by sample rate, channel count, common format, - /// and interleaving — matching the Swift SDK's `AVAudioFormat+Equality`. - private func formatsMatch(_ lhs: AVAudioFormat?, _ rhs: AVAudioFormat?) -> Bool { - guard let lhs = lhs, let rhs = rhs else { return false } - return lhs.sampleRate == rhs.sampleRate - && lhs.channelCount == rhs.channelCount - && lhs.commonFormat == rhs.commonFormat - && lhs.isInterleaved == rhs.isInterleaved - } -} diff --git a/ios/RCTWebRTC/Utils/AudioDeviceModule/ScreenShareAudioMixer.swift b/ios/RCTWebRTC/Utils/AudioDeviceModule/ScreenShareAudioMixer.swift deleted file mode 100644 index 011f9d46a..000000000 --- a/ios/RCTWebRTC/Utils/AudioDeviceModule/ScreenShareAudioMixer.swift +++ /dev/null @@ -1,168 +0,0 @@ -// -// Copyright © 2026 Stream.io Inc. All rights reserved. -// - -import AVFoundation -import CoreMedia -import WebRTC - -/// Mixes screen share audio (from RPScreenRecorder `.audioApp` buffers) into the -/// WebRTC microphone capture stream by inserting an `AVAudioPlayerNode` and -/// `AVAudioMixerNode` into the engine's input graph. -/// -/// Graph topology (wired in `onConfigureInputFromSource`): -/// ``` -/// source (mic VP) --> mixerNode --> destination (WebRTC capture) -/// ^ -/// playerNode -----------/ -/// ``` -/// -/// The mixer stays dormant (no nodes attached) until `startMixing` is called. -/// Screen audio buffers are scheduled on the player node via `enqueue(_:)`. -@objc public final class ScreenShareAudioMixer: NSObject, AudioGraphConfigurationDelegate { - - // MARK: - Audio graph nodes - - private let playerNode = AVAudioPlayerNode() - private let mixerNode = AVAudioMixerNode() - - // MARK: - Audio conversion - - private let audioConverter = ScreenShareAudioConverter() - - // MARK: - State - - private var isMixing = false - - /// The engine reference from the last `onConfigureInputFromSource` call. - /// Used to detach nodes on cleanup. - private weak var currentEngine: AVAudioEngine? - - /// Format of the input graph path, used for converting screen audio. - private var graphFormat: AVAudioFormat? - - /// Whether our nodes are currently attached to the engine. - private var nodesAttached = false - - // MARK: - Init - - @objc public override init() { - super.init() - } - - // MARK: - AudioGraphConfigurationDelegate - - public func onConfigureInputFromSource( - _ engine: AVAudioEngine, - source: AVAudioNode?, - destination: AVAudioNode, - format: AVAudioFormat - ) { - currentEngine = engine - graphFormat = format - - guard isMixing else { return } - - attachAndWireNodes(engine: engine, source: source, destination: destination, format: format) - } - - public func onDidStopEngine(_ engine: AVAudioEngine) { - detachNodes(from: engine) - } - - public func onDidDisableEngine(_ engine: AVAudioEngine) { - detachNodes(from: engine) - } - - public func onWillReleaseEngine(_ engine: AVAudioEngine) { - detachNodes(from: engine) - currentEngine = nil - graphFormat = nil - } - - // MARK: - Public API - - /// Enable audio mixing. Call when screen share with audio starts. - /// - /// If the engine is already running (i.e., `onConfigureInputFromSource` has - /// already fired), this triggers an ADM reconfiguration so the graph gets - /// rewired with our nodes. - @objc public func startMixing() { - guard !isMixing else { return } - isMixing = true - } - - /// Stop audio mixing and detach nodes from the engine. - @objc public func stopMixing() { - guard isMixing else { return } - isMixing = false - - playerNode.stop() - if let engine = currentEngine { - detachNodes(from: engine) - } - audioConverter.reset() - } - - /// Receive a screen audio CMSampleBuffer from InAppScreenCapturer. - @objc public func enqueue(_ sampleBuffer: CMSampleBuffer) { - guard isMixing, let targetFormat = graphFormat else { return } - - // 1. CMSampleBuffer → AVAudioPCMBuffer - guard let pcm = audioConverter.pcmBuffer(from: sampleBuffer) else { return } - - // 2. Silence detection - if ScreenShareAudioConverter.isSilent(pcm) { return } - - // 3. Convert to graph format (e.g. 48 kHz / 1 ch / float32) - let buffer: AVAudioPCMBuffer - if pcm.format.sampleRate != targetFormat.sampleRate - || pcm.format.channelCount != targetFormat.channelCount - || pcm.format.commonFormat != targetFormat.commonFormat - || pcm.format.isInterleaved != targetFormat.isInterleaved { - guard let converted = audioConverter.convertIfRequired(pcm, to: targetFormat) else { return } - buffer = converted - } else { - buffer = pcm - } - - // 4. Schedule on player node - guard nodesAttached else { return } - - playerNode.scheduleBuffer(buffer) - - if !playerNode.isPlaying { - playerNode.play() - } - } - - // MARK: - Private graph management - - private func attachAndWireNodes( - engine: AVAudioEngine, - source: AVAudioNode?, - destination: AVAudioNode, - format: AVAudioFormat - ) { - detachNodes(from: engine) - - engine.attach(mixerNode) - engine.attach(playerNode) - - if let source = source { - engine.connect(source, to: mixerNode, format: format) - } - engine.connect(playerNode, to: mixerNode, format: format) - engine.connect(mixerNode, to: destination, format: format) - - nodesAttached = true - } - - private func detachNodes(from engine: AVAudioEngine) { - guard nodesAttached else { return } - - engine.detach(playerNode) - engine.detach(mixerNode) - nodesAttached = false - } -} diff --git a/ios/RCTWebRTC/WebRTCModule.m b/ios/RCTWebRTC/WebRTCModule.m index 0c14160d1..4455e60f2 100644 --- a/ios/RCTWebRTC/WebRTCModule.m +++ b/ios/RCTWebRTC/WebRTCModule.m @@ -86,6 +86,17 @@ - (instancetype)init { RCTLogInfo(@"Using video encoder factory: %@", NSStringFromClass([encoderFactory class])); RCTLogInfo(@"Using video decoder factory: %@", NSStringFromClass([decoderFactory class])); + // Always ensure an audio processing module exists so screen share + // audio mixing can use capturePostProcessingDelegate at runtime. + if (audioProcessingModule == nil && audioDevice == nil) { + audioProcessingModule = [[RTCDefaultAudioProcessingModule alloc] + initWithConfig:nil + capturePostProcessingDelegate:nil + renderPreProcessingDelegate:nil]; + options.audioProcessingModule = audioProcessingModule; + RCTLogInfo(@"Created default audio processing module for screen share audio mixing"); + } + if (audioProcessingModule != nil) { if (audioDevice != nil) { NSLog(@"Both audioProcessingModule and audioDevice are provided, but only one can be used. Ignoring audioDevice."); From f8929b5293ec1efd647de2ed21c1714a0dea4a7a Mon Sep 17 00:00:00 2001 From: Artem Grintsevich Date: Wed, 25 Mar 2026 11:36:51 +0100 Subject: [PATCH 13/14] chore: code cleanup --- .../Utils/AudioDeviceModule/AudioDeviceModule.swift | 11 ----------- .../ScreenShare/ScreenShareAudioMixer.swift | 1 - 2 files changed, 12 deletions(-) diff --git a/ios/RCTWebRTC/Utils/AudioDeviceModule/AudioDeviceModule.swift b/ios/RCTWebRTC/Utils/AudioDeviceModule/AudioDeviceModule.swift index c71dd7cbd..91f0cee3a 100644 --- a/ios/RCTWebRTC/Utils/AudioDeviceModule/AudioDeviceModule.swift +++ b/ios/RCTWebRTC/Utils/AudioDeviceModule/AudioDeviceModule.swift @@ -272,15 +272,6 @@ import WebRTC source.isVoiceProcessingBypassed = isPreferred } - /// Sets voice processing bypass on the underlying audio device module. - /// When bypassed, echo cancellation / AGC / noise suppression are disabled, - /// which prevents the system from treating screen share audio as echo. - /// - Parameter isBypassed: `true` to bypass voice processing, `false` to restore. - @objc public func setVoiceProcessingBypassed(_ isBypassed: Bool) { - source.isVoiceProcessingBypassed = isBypassed - isVoiceProcessingBypassedSubject.send(isBypassed) - } - /// Starts or stops speaker playout on the ADM, retrying transient failures. /// - Parameter isActive: `true` to start playout, `false` to stop. /// - Throws: `AudioDeviceError` when WebRTC returns a non-zero status. @@ -541,8 +532,6 @@ import WebRTC format: format ) ) - - // Notify the audio graph delegate synchronously — this must happen audioLevelsAdapter.installInputTap( on: destination, format: format, diff --git a/ios/RCTWebRTC/Utils/AudioDeviceModule/ScreenShare/ScreenShareAudioMixer.swift b/ios/RCTWebRTC/Utils/AudioDeviceModule/ScreenShare/ScreenShareAudioMixer.swift index 173e6750e..d45a10e4e 100644 --- a/ios/RCTWebRTC/Utils/AudioDeviceModule/ScreenShare/ScreenShareAudioMixer.swift +++ b/ios/RCTWebRTC/Utils/AudioDeviceModule/ScreenShare/ScreenShareAudioMixer.swift @@ -34,7 +34,6 @@ import WebRTC /// Processing format from `audioProcessingInitialize`. private var processingSampleRate: Double = 0 private var processingChannels: Int = 0 - /// Target format for conversion, built from processing parameters. private var targetFormat: AVAudioFormat? /// Scale factor: RTCAudioBuffer uses FloatS16 format (Float32 values in the From c1b1d6c186f45ec9be5edcb14b37579d58faf995 Mon Sep 17 00:00:00 2001 From: Artem Grintsevich Date: Fri, 27 Mar 2026 16:33:25 +0100 Subject: [PATCH 14/14] chore: small tweak --- ios/RCTWebRTC/InAppScreenCapturer.m | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/ios/RCTWebRTC/InAppScreenCapturer.m b/ios/RCTWebRTC/InAppScreenCapturer.m index cf14a938b..893761ae9 100644 --- a/ios/RCTWebRTC/InAppScreenCapturer.m +++ b/ios/RCTWebRTC/InAppScreenCapturer.m @@ -108,9 +108,11 @@ - (void)stopCapture { - (void)registerAppStateObservers { if (_observingAppState) return; - _observingAppState = YES; dispatch_async(dispatch_get_main_queue(), ^{ + if (self->_observingAppState || !self->_capturing) return; + self->_observingAppState = YES; + [[NSNotificationCenter defaultCenter] addObserver:self selector:@selector(appDidBecomeActive) name:UIApplicationDidBecomeActiveNotification