From 1a6bdbb5e9cd6e1fd6eec30e70e421f5e463ed56 Mon Sep 17 00:00:00 2001
From: Artem Grintsevich <greenfrvr@gmail.com>
Date: Fri, 20 Feb 2026 15:24:21 +0100
Subject: [PATCH 01/14] feat: added captured audio for screen sharing for
 Android

---
 .../oney/WebRTCModule/GetUserMediaImpl.java   | 15 ++++++-
 .../com/oney/WebRTCModule/WebRTCModule.java   | 43 +++++++++++++++++++
 .../WebRTCModule/WebRTCModuleOptions.java     | 25 +++++++++++
 .../audio/AudioProcessingController.java      | 13 ++++--
 4 files changed, 91 insertions(+), 5 deletions(-)

diff --git a/android/src/main/java/com/oney/WebRTCModule/GetUserMediaImpl.java b/android/src/main/java/com/oney/WebRTCModule/GetUserMediaImpl.java
index 861919513..c17254a8b 100644
--- a/android/src/main/java/com/oney/WebRTCModule/GetUserMediaImpl.java
+++ b/android/src/main/java/com/oney/WebRTCModule/GetUserMediaImpl.java
@@ -39,7 +39,7 @@
  * The implementation of {@code getUserMedia} extracted into a separate file in
  * order to reduce complexity and to (somewhat) separate concerns.
  */
-class GetUserMediaImpl {
+public class GetUserMediaImpl {
     /**
      * The {@link Log} tag with which {@code GetUserMediaImpl} is to log.
      */
@@ -62,6 +62,15 @@ class GetUserMediaImpl {
     private Promise displayMediaPromise;
     private Intent mediaProjectionPermissionResultData;
 
+    /**
+     * Returns the MediaProjection permission result data Intent.
+     * This Intent can be used to create a MediaProjection for audio capture
+     * via AudioPlaybackCaptureConfiguration.
+     */
+    public Intent getMediaProjectionPermissionResultData() {
+        return mediaProjectionPermissionResultData;
+    }
+
     private final ServiceConnection mediaProjectionServiceConnection = new ServiceConnection() {
         @Override
         public void onServiceConnected(ComponentName name, IBinder service) {
@@ -355,7 +364,9 @@ private void createScreenStream() {
         }
 
         // Cleanup
-        mediaProjectionPermissionResultData = null;
+        // Note: mediaProjectionPermissionResultData is intentionally NOT nulled here.
+        // It is retained so it can be reused to create a MediaProjection for
+        // screen share audio capture (AudioPlaybackCaptureConfiguration).
         displayMediaPromise = null;
     }
 
diff --git a/android/src/main/java/com/oney/WebRTCModule/WebRTCModule.java b/android/src/main/java/com/oney/WebRTCModule/WebRTCModule.java
index 6e981f077..dd0a29e27 100644
--- a/android/src/main/java/com/oney/WebRTCModule/WebRTCModule.java
+++ b/android/src/main/java/com/oney/WebRTCModule/WebRTCModule.java
@@ -129,9 +129,48 @@ private JavaAudioDeviceModule createAudioDeviceModule(ReactApplicationContext re
                 .setUseHardwareAcousticEchoCanceler(Build.VERSION.SDK_INT >= Build.VERSION_CODES.Q)
                 .setUseHardwareNoiseSuppressor(Build.VERSION.SDK_INT >= Build.VERSION_CODES.Q)
                 .setUseStereoOutput(true)
+                .setAudioBufferCallback((audioBuffer, audioFormat, channelCount, sampleRate, bytesRead, captureTimeNs) -> {
+                    if (bytesRead > 0) {
+                        WebRTCModuleOptions.ScreenAudioBytesProvider provider =
+                                WebRTCModuleOptions.getInstance().screenAudioBytesProvider;
+                        if (provider != null) {
+                            java.nio.ByteBuffer screenBuffer = provider.getScreenAudioBytes(bytesRead);
+                            if (screenBuffer != null && screenBuffer.remaining() > 0) {
+                                mixScreenAudioIntoBuffer(audioBuffer, screenBuffer, bytesRead);
+                            }
+                        }
+                    }
+                    return captureTimeNs;
+                })
                 .createAudioDeviceModule();
     }
 
+    /**
+     * Mixes screen audio into the microphone buffer using PCM additive mixing with clamping.
+     */
+    private static void mixScreenAudioIntoBuffer(java.nio.ByteBuffer micBuffer,
+                                                  java.nio.ByteBuffer screenBuffer,
+                                                  int bytesRead) {
+        micBuffer.position(0);
+        screenBuffer.position(0);
+
+        micBuffer.order(java.nio.ByteOrder.LITTLE_ENDIAN);
+        screenBuffer.order(java.nio.ByteOrder.LITTLE_ENDIAN);
+
+        java.nio.ShortBuffer micShorts = micBuffer.asShortBuffer();
+        java.nio.ShortBuffer screenShorts = screenBuffer.asShortBuffer();
+
+        int samplesToMix = Math.min(bytesRead / 2,
+                Math.min(micShorts.remaining(), screenShorts.remaining()));
+
+        for (int i = 0; i < samplesToMix; i++) {
+            int sum = micShorts.get(i) + screenShorts.get(i);
+            if (sum > Short.MAX_VALUE) sum = Short.MAX_VALUE;
+            if (sum < Short.MIN_VALUE) sum = Short.MIN_VALUE;
+            micShorts.put(i, (short) sum);
+        }
+    }
+
     @NonNull
     @Override
     public String getName() {
@@ -142,6 +181,10 @@ public AudioDeviceModule getAudioDeviceModule() {
         return mAudioDeviceModule;
     }
 
+    public GetUserMediaImpl getUserMediaImpl() {
+        return getUserMediaImpl;
+    }
+
     public PeerConnectionObserver getPeerConnectionObserver(int id) {
         return mPeerConnectionObservers.get(id);
     }
diff --git a/android/src/main/java/com/oney/WebRTCModule/WebRTCModuleOptions.java b/android/src/main/java/com/oney/WebRTCModule/WebRTCModuleOptions.java
index 6187c9472..24e53f8ce 100644
--- a/android/src/main/java/com/oney/WebRTCModule/WebRTCModuleOptions.java
+++ b/android/src/main/java/com/oney/WebRTCModule/WebRTCModuleOptions.java
@@ -8,6 +8,8 @@
 import org.webrtc.VideoEncoderFactory;
 import org.webrtc.audio.AudioDeviceModule;
 
+import java.nio.ByteBuffer;
+
 public class WebRTCModuleOptions {
     private static WebRTCModuleOptions instance;
 
@@ -20,6 +22,29 @@ public class WebRTCModuleOptions {
     public boolean enableMediaProjectionService;
     public AudioProcessingFactoryProvider audioProcessingFactoryProvider;
 
+    /**
+     * Provider for screen share audio bytes. When set, the AudioDeviceModule's
+     * AudioBufferCallback will mix screen audio into the mic buffer before
+     * WebRTC processing. This allows screen audio mixing to work alongside
+     * any audio processing factory (including noise cancellation).
+     *
+     * Set this when screen share audio capture starts, clear it when it stops.
+     */
+    public volatile ScreenAudioBytesProvider screenAudioBytesProvider;
+
+    /**
+     * Functional interface for providing screen audio bytes on demand.
+     */
+    public interface ScreenAudioBytesProvider {
+        /**
+         * Returns a ByteBuffer containing screen audio PCM data.
+         *
+         * @param bytesRequested number of bytes to read (matching mic buffer size)
+         * @return ByteBuffer with screen audio, or null if not available
+         */
+        ByteBuffer getScreenAudioBytes(int bytesRequested);
+    }
+
     public static WebRTCModuleOptions getInstance() {
         if (instance == null) {
             instance = new WebRTCModuleOptions();
diff --git a/android/src/main/java/com/oney/WebRTCModule/audio/AudioProcessingController.java b/android/src/main/java/com/oney/WebRTCModule/audio/AudioProcessingController.java
index 9444eb781..17ae78679 100644
--- a/android/src/main/java/com/oney/WebRTCModule/audio/AudioProcessingController.java
+++ b/android/src/main/java/com/oney/WebRTCModule/audio/AudioProcessingController.java
@@ -17,13 +17,20 @@ public class AudioProcessingController implements AudioProcessingFactoryProvider
     public ExternalAudioProcessingFactory externalAudioProcessingFactory;
 
     public AudioProcessingController() {
-        this.externalAudioProcessingFactory = new ExternalAudioProcessingFactory();
-        this.externalAudioProcessingFactory.setCapturePostProcessing(capturePostProcessing);
-        this.externalAudioProcessingFactory.setRenderPreProcessing(renderPreProcessing);
+        // ExternalAudioProcessingFactory creation is deferred to getFactory()
+        // because its constructor calls JNI native methods that require the
+        // WebRTC native library to be loaded first (via PeerConnectionFactory.initialize()).
+        // This allows AudioProcessingController to be safely instantiated in
+        // MainApplication.onCreate() before the native library is loaded.
     }
 
     @Override
     public AudioProcessingFactory getFactory() {
+        if (this.externalAudioProcessingFactory == null) {
+            this.externalAudioProcessingFactory = new ExternalAudioProcessingFactory();
+            this.externalAudioProcessingFactory.setCapturePostProcessing(capturePostProcessing);
+            this.externalAudioProcessingFactory.setRenderPreProcessing(renderPreProcessing);
+        }
         return this.externalAudioProcessingFactory;
     }
 }
\ No newline at end of file

From c4637a204ade8e15cfa8a08a8d7e86cc067d4e6a Mon Sep 17 00:00:00 2001
From: Artem Grintsevich <greenfrvr@gmail.com>
Date: Tue, 24 Feb 2026 13:31:43 +0100
Subject: [PATCH 02/14] feat: added in app screen sharing

---
 ios/RCTWebRTC/InAppScreenCaptureController.h | 18 ++++
 ios/RCTWebRTC/InAppScreenCaptureController.m | 49 ++++++++++
 ios/RCTWebRTC/InAppScreenCapturer.h          | 21 +++++
 ios/RCTWebRTC/InAppScreenCapturer.m          | 98 ++++++++++++++++++++
 ios/RCTWebRTC/WebRTCModule+RTCMediaStream.m  | 28 ++++--
 ios/RCTWebRTC/WebRTCModuleOptions.h          |  7 ++
 6 files changed, 215 insertions(+), 6 deletions(-)
 create mode 100644 ios/RCTWebRTC/InAppScreenCaptureController.h
 create mode 100644 ios/RCTWebRTC/InAppScreenCaptureController.m
 create mode 100644 ios/RCTWebRTC/InAppScreenCapturer.h
 create mode 100644 ios/RCTWebRTC/InAppScreenCapturer.m

diff --git a/ios/RCTWebRTC/InAppScreenCaptureController.h b/ios/RCTWebRTC/InAppScreenCaptureController.h
new file mode 100644
index 000000000..535f5863f
--- /dev/null
+++ b/ios/RCTWebRTC/InAppScreenCaptureController.h
@@ -0,0 +1,18 @@
+#import <Foundation/Foundation.h>
+#import "CaptureController.h"
+#import "CapturerEventsDelegate.h"
+
+NS_ASSUME_NONNULL_BEGIN
+
+@class InAppScreenCapturer;
+
+@interface InAppScreenCaptureController : CaptureController
+
+- (instancetype)initWithCapturer:(nonnull InAppScreenCapturer *)capturer;
+
+/// The underlying RPScreenRecorder-based capturer.
+@property(nonatomic, strong, readonly) InAppScreenCapturer *capturer;
+
+@end
+
+NS_ASSUME_NONNULL_END
diff --git a/ios/RCTWebRTC/InAppScreenCaptureController.m b/ios/RCTWebRTC/InAppScreenCaptureController.m
new file mode 100644
index 000000000..1b9561d39
--- /dev/null
+++ b/ios/RCTWebRTC/InAppScreenCaptureController.m
@@ -0,0 +1,49 @@
+#if TARGET_OS_IOS
+
+#import "InAppScreenCaptureController.h"
+#import "InAppScreenCapturer.h"
+
+@interface InAppScreenCaptureController () <CapturerEventsDelegate>
+@end
+
+@implementation InAppScreenCaptureController
+
+- (instancetype)initWithCapturer:(nonnull InAppScreenCapturer *)capturer {
+    self = [super init];
+    if (self) {
+        _capturer = capturer;
+        _capturer.eventsDelegate = self;
+        self.deviceId = @"in-app-screen-capture";
+    }
+    return self;
+}
+
+- (void)dealloc {
+    [self.capturer stopCapture];
+}
+
+- (void)startCapture {
+    [self.capturer startCapture];
+}
+
+- (void)stopCapture {
+    [self.capturer stopCapture];
+}
+
+- (NSDictionary *)getSettings {
+    return @{
+        @"deviceId": self.deviceId ?: @"in-app-screen-capture",
+        @"groupId": @"",
+        @"frameRate": @(30)
+    };
+}
+
+#pragma mark - CapturerEventsDelegate
+
+- (void)capturerDidEnd:(RTCVideoCapturer *)capturer {
+    [self.eventsDelegate capturerDidEnd:capturer];
+}
+
+@end
+
+#endif
diff --git a/ios/RCTWebRTC/InAppScreenCapturer.h b/ios/RCTWebRTC/InAppScreenCapturer.h
new file mode 100644
index 000000000..3a8566676
--- /dev/null
+++ b/ios/RCTWebRTC/InAppScreenCapturer.h
@@ -0,0 +1,21 @@
+#import <AVFoundation/AVFoundation.h>
+#import <WebRTC/RTCVideoCapturer.h>
+#import "CapturerEventsDelegate.h"
+
+NS_ASSUME_NONNULL_BEGIN
+
+@interface InAppScreenCapturer : RTCVideoCapturer
+
+@property(nonatomic, weak) id<CapturerEventsDelegate> eventsDelegate;
+
+/// Callback invoked for each .audioApp CMSampleBuffer from RPScreenRecorder.
+/// Set this before calling startCapture if audio mixing is desired.
+@property(nonatomic, copy, nullable) void (^audioBufferHandler)(CMSampleBufferRef);
+
+- (instancetype)initWithDelegate:(__weak id<RTCVideoCapturerDelegate>)delegate;
+- (void)startCapture;
+- (void)stopCapture;
+
+@end
+
+NS_ASSUME_NONNULL_END
diff --git a/ios/RCTWebRTC/InAppScreenCapturer.m b/ios/RCTWebRTC/InAppScreenCapturer.m
new file mode 100644
index 000000000..9c86a378c
--- /dev/null
+++ b/ios/RCTWebRTC/InAppScreenCapturer.m
@@ -0,0 +1,98 @@
+#if TARGET_OS_IOS
+
+#import <ReplayKit/ReplayKit.h>
+#import <WebRTC/RTCCVPixelBuffer.h>
+#import <WebRTC/RTCVideoFrameBuffer.h>
+
+#import "InAppScreenCapturer.h"
+
+@implementation InAppScreenCapturer {
+    BOOL _capturing;
+}
+
+- (instancetype)initWithDelegate:(__weak id<RTCVideoCapturerDelegate>)delegate {
+    self = [super initWithDelegate:delegate];
+    return self;
+}
+
+- (void)startCapture {
+    if (_capturing) {
+        return;
+    }
+    _capturing = YES;
+
+    RPScreenRecorder *recorder = [RPScreenRecorder sharedRecorder];
+    recorder.microphoneEnabled = NO; // WebRTC handles mic input
+
+    __weak __typeof__(self) weakSelf = self;
+    [recorder startCaptureWithHandler:^(CMSampleBufferRef _Nonnull sampleBuffer,
+                                        RPSampleBufferType bufferType,
+                                        NSError * _Nullable error) {
+        __strong __typeof__(weakSelf) strongSelf = weakSelf;
+        if (!strongSelf || error || !strongSelf->_capturing) {
+            return;
+        }
+
+        switch (bufferType) {
+            case RPSampleBufferTypeVideo:
+                [strongSelf processVideoSampleBuffer:sampleBuffer];
+                break;
+            case RPSampleBufferTypeAudioApp:
+                if (strongSelf.audioBufferHandler) {
+                    strongSelf.audioBufferHandler(sampleBuffer);
+                }
+                break;
+            case RPSampleBufferTypeAudioMic:
+                // Ignored — WebRTC handles mic capture via AudioDeviceModule
+                break;
+        }
+    } completionHandler:^(NSError * _Nullable error) {
+        if (error) {
+            NSLog(@"[InAppScreenCapturer] startCapture failed: %@", error.localizedDescription);
+            [weakSelf.eventsDelegate capturerDidEnd:weakSelf];
+        }
+    }];
+}
+
+- (void)processVideoSampleBuffer:(CMSampleBufferRef)sampleBuffer {
+    CVPixelBufferRef pixelBuffer = CMSampleBufferGetImageBuffer(sampleBuffer);
+    if (!pixelBuffer) {
+        return;
+    }
+
+    int64_t timeStampNs = (int64_t)(CMTimeGetSeconds(
+        CMSampleBufferGetPresentationTimeStamp(sampleBuffer)) * NSEC_PER_SEC);
+
+    RTCCVPixelBuffer *rtcPixelBuffer = [[RTCCVPixelBuffer alloc] initWithPixelBuffer:pixelBuffer];
+    RTCVideoFrame *videoFrame = [[RTCVideoFrame alloc] initWithBuffer:rtcPixelBuffer
+                                                              rotation:RTCVideoRotation_0
+                                                           timeStampNs:timeStampNs];
+
+    [self.delegate capturer:self didCaptureVideoFrame:videoFrame];
+}
+
+- (void)stopCapture {
+    if (!_capturing) {
+        return;
+    }
+    _capturing = NO;
+    self.audioBufferHandler = nil;
+
+    [[RPScreenRecorder sharedRecorder] stopCaptureWithHandler:^(NSError * _Nullable error) {
+        if (error) {
+            NSLog(@"[InAppScreenCapturer] stopCapture error: %@", error.localizedDescription);
+        }
+    }];
+}
+
+- (void)dealloc {
+    if (_capturing) {
+        _capturing = NO;
+        self.audioBufferHandler = nil;
+        [[RPScreenRecorder sharedRecorder] stopCaptureWithHandler:nil];
+    }
+}
+
+@end
+
+#endif
diff --git a/ios/RCTWebRTC/WebRTCModule+RTCMediaStream.m b/ios/RCTWebRTC/WebRTCModule+RTCMediaStream.m
index ae6f9fb93..a6b976f16 100644
--- a/ios/RCTWebRTC/WebRTCModule+RTCMediaStream.m
+++ b/ios/RCTWebRTC/WebRTCModule+RTCMediaStream.m
@@ -13,6 +13,8 @@
 #import "WebRTCModule+VideoTrackAdapter.h"
 
 #import "ProcessorProvider.h"
+#import "InAppScreenCaptureController.h"
+#import "InAppScreenCapturer.h"
 #import "ScreenCaptureController.h"
 #import "ScreenCapturer.h"
 #import "TrackCapturerEventsEmitter.h"
@@ -202,14 +204,28 @@ - (RTCVideoTrack *)createScreenCaptureVideoTrack {
     NSString *trackUUID = [[NSUUID UUID] UUIDString];
     RTCVideoTrack *videoTrack = [self.peerConnectionFactory videoTrackWithSource:videoSource trackId:trackUUID];
 
-    ScreenCapturer *screenCapturer = [[ScreenCapturer alloc] initWithDelegate:videoSource];
-    ScreenCaptureController *screenCaptureController =
-        [[ScreenCaptureController alloc] initWithCapturer:screenCapturer];
+    WebRTCModuleOptions *options = [WebRTCModuleOptions sharedInstance];
+    CaptureController *captureController;
+
+    if (options.useInAppScreenCapture) {
+        // Clear the flag so subsequent getDisplayMedia calls use broadcast by default
+        options.useInAppScreenCapture = NO;
+
+        InAppScreenCapturer *capturer = [[InAppScreenCapturer alloc] initWithDelegate:videoSource];
+        InAppScreenCaptureController *controller = [[InAppScreenCaptureController alloc] initWithCapturer:capturer];
+        captureController = controller;
+    } else {
+        // Existing broadcast extension path
+        ScreenCapturer *screenCapturer = [[ScreenCapturer alloc] initWithDelegate:videoSource];
+        ScreenCaptureController *screenCaptureController =
+            [[ScreenCaptureController alloc] initWithCapturer:screenCapturer];
+        captureController = screenCaptureController;
+    }
 
     TrackCapturerEventsEmitter *emitter = [[TrackCapturerEventsEmitter alloc] initWith:trackUUID webRTCModule:self];
-    screenCaptureController.eventsDelegate = emitter;
-    videoTrack.captureController = screenCaptureController;
-    [screenCaptureController startCapture];
+    captureController.eventsDelegate = emitter;
+    videoTrack.captureController = captureController;
+    [captureController startCapture];
 
     // Add dimension detection for local video tracks immediately
     [self addLocalVideoTrackDimensionDetection:videoTrack];
diff --git a/ios/RCTWebRTC/WebRTCModuleOptions.h b/ios/RCTWebRTC/WebRTCModuleOptions.h
index d99cb8200..100995f9d 100644
--- a/ios/RCTWebRTC/WebRTCModuleOptions.h
+++ b/ios/RCTWebRTC/WebRTCModuleOptions.h
@@ -13,6 +13,13 @@ NS_ASSUME_NONNULL_BEGIN
 @property(nonatomic, assign) RTCLoggingSeverity loggingSeverity;
 @property(nonatomic, assign) BOOL enableMultitaskingCameraAccess;
 
+/// When YES, the next getDisplayMedia() call will use RPScreenRecorder (in-app capture)
+/// instead of the broadcast extension. Auto-cleared after use.
+@property(nonatomic, assign) BOOL useInAppScreenCapture;
+
+/// When YES, in-app screen capture will route .audioApp buffers to the audio mixer.
+@property(nonatomic, assign) BOOL includeScreenShareAudio;
+
 #pragma mark - This class is a singleton
 
 + (instancetype _Nonnull)sharedInstance;

From 1364e73343458e71f8d5463ade5a2fb7b02c94e9 Mon Sep 17 00:00:00 2001
From: Artem Grintsevich <greenfrvr@gmail.com>
Date: Tue, 24 Feb 2026 13:52:14 +0100
Subject: [PATCH 03/14] feat: handle screen capturing on app state changes

---
 ios/RCTWebRTC/InAppScreenCapturer.m | 41 +++++++++++++++++++++++++++++
 1 file changed, 41 insertions(+)

diff --git a/ios/RCTWebRTC/InAppScreenCapturer.m b/ios/RCTWebRTC/InAppScreenCapturer.m
index 9c86a378c..de2d4f403 100644
--- a/ios/RCTWebRTC/InAppScreenCapturer.m
+++ b/ios/RCTWebRTC/InAppScreenCapturer.m
@@ -8,10 +8,21 @@
 
 @implementation InAppScreenCapturer {
     BOOL _capturing;
+    BOOL _shouldResumeOnForeground;
 }
 
 - (instancetype)initWithDelegate:(__weak id<RTCVideoCapturerDelegate>)delegate {
     self = [super initWithDelegate:delegate];
+    if (self) {
+        [[NSNotificationCenter defaultCenter] addObserver:self
+                                                 selector:@selector(appDidBecomeActive)
+                                                     name:UIApplicationDidBecomeActiveNotification
+                                                   object:nil];
+        [[NSNotificationCenter defaultCenter] addObserver:self
+                                                 selector:@selector(appWillResignActive)
+                                                     name:UIApplicationWillResignActiveNotification
+                                                   object:nil];
+    }
     return self;
 }
 
@@ -21,6 +32,10 @@ - (void)startCapture {
     }
     _capturing = YES;
 
+    [self startRPScreenRecorder];
+}
+
+- (void)startRPScreenRecorder {
     RPScreenRecorder *recorder = [RPScreenRecorder sharedRecorder];
     recorder.microphoneEnabled = NO; // WebRTC handles mic input
 
@@ -76,8 +91,11 @@ - (void)stopCapture {
         return;
     }
     _capturing = NO;
+    _shouldResumeOnForeground = NO;
     self.audioBufferHandler = nil;
 
+    [[NSNotificationCenter defaultCenter] removeObserver:self];
+
     [[RPScreenRecorder sharedRecorder] stopCaptureWithHandler:^(NSError * _Nullable error) {
         if (error) {
             NSLog(@"[InAppScreenCapturer] stopCapture error: %@", error.localizedDescription);
@@ -85,7 +103,30 @@ - (void)stopCapture {
     }];
 }
 
+#pragma mark - App Lifecycle
+
+- (void)appWillResignActive {
+    if (_capturing) {
+        _shouldResumeOnForeground = YES;
+        // Stop the RPScreenRecorder session — iOS suspends it in background anyway
+        [[RPScreenRecorder sharedRecorder] stopCaptureWithHandler:^(NSError * _Nullable error) {
+            if (error) {
+                NSLog(@"[InAppScreenCapturer] background stop error: %@", error.localizedDescription);
+            }
+        }];
+    }
+}
+
+- (void)appDidBecomeActive {
+    if (_shouldResumeOnForeground && _capturing) {
+        _shouldResumeOnForeground = NO;
+        NSLog(@"[InAppScreenCapturer] Resuming capture after returning to foreground");
+        [self startRPScreenRecorder];
+    }
+}
+
 - (void)dealloc {
+    [[NSNotificationCenter defaultCenter] removeObserver:self];
     if (_capturing) {
         _capturing = NO;
         self.audioBufferHandler = nil;

From bc5de6ab919a399270ab14d8eb2a2d311fc89a1d Mon Sep 17 00:00:00 2001
From: Artem Grintsevich <greenfrvr@gmail.com>
Date: Thu, 26 Feb 2026 10:42:54 +0100
Subject: [PATCH 04/14] feat: added ios screen share audio capturing

---
 ios/RCTWebRTC/InAppScreenCapturer.m           |  16 +-
 .../AudioDeviceModule/AudioDeviceModule.swift |  46 +++
 .../AudioGraphConfigurationDelegate.swift     |  38 +++
 .../ScreenShareAudioConverter.swift           | 208 +++++++++++++
 .../ScreenShareAudioMixer.swift               | 285 ++++++++++++++++++
 ios/RCTWebRTC/WebRTCModule+RTCMediaStream.m   |  24 ++
 ios/RCTWebRTC/WebRTCModule.m                  |  21 +-
 ios/RCTWebRTC/WebRTCModuleOptions.h           |  19 ++
 ios/RCTWebRTC/WebRTCModuleOptions.m           |   7 +
 9 files changed, 655 insertions(+), 9 deletions(-)
 create mode 100644 ios/RCTWebRTC/Utils/AudioDeviceModule/AudioGraphConfigurationDelegate.swift
 create mode 100644 ios/RCTWebRTC/Utils/AudioDeviceModule/ScreenShareAudioConverter.swift
 create mode 100644 ios/RCTWebRTC/Utils/AudioDeviceModule/ScreenShareAudioMixer.swift

diff --git a/ios/RCTWebRTC/InAppScreenCapturer.m b/ios/RCTWebRTC/InAppScreenCapturer.m
index de2d4f403..8eb6cb3aa 100644
--- a/ios/RCTWebRTC/InAppScreenCapturer.m
+++ b/ios/RCTWebRTC/InAppScreenCapturer.m
@@ -14,14 +14,14 @@ @implementation InAppScreenCapturer {
 - (instancetype)initWithDelegate:(__weak id<RTCVideoCapturerDelegate>)delegate {
     self = [super initWithDelegate:delegate];
     if (self) {
-        [[NSNotificationCenter defaultCenter] addObserver:self
-                                                 selector:@selector(appDidBecomeActive)
-                                                     name:UIApplicationDidBecomeActiveNotification
-                                                   object:nil];
-        [[NSNotificationCenter defaultCenter] addObserver:self
-                                                 selector:@selector(appWillResignActive)
-                                                     name:UIApplicationWillResignActiveNotification
-                                                   object:nil];
+        // [[NSNotificationCenter defaultCenter] addObserver:self
+        //                                          selector:@selector(appDidBecomeActive)
+        //                                              name:UIApplicationDidBecomeActiveNotification
+        //                                            object:nil];
+        // [[NSNotificationCenter defaultCenter] addObserver:self
+        //                                          selector:@selector(appWillResignActive)
+        //                                              name:UIApplicationWillResignActiveNotification
+        //                                            object:nil];
     }
     return self;
 }
diff --git a/ios/RCTWebRTC/Utils/AudioDeviceModule/AudioDeviceModule.swift b/ios/RCTWebRTC/Utils/AudioDeviceModule/AudioDeviceModule.swift
index f46c2c911..23a728760 100644
--- a/ios/RCTWebRTC/Utils/AudioDeviceModule/AudioDeviceModule.swift
+++ b/ios/RCTWebRTC/Utils/AudioDeviceModule/AudioDeviceModule.swift
@@ -179,6 +179,17 @@ import WebRTC
     /// Strong reference to the current engine so we can introspect it if needed.
     @objc public var engine: AVAudioEngine?
 
+    /// Delegate that receives synchronous input graph configuration callbacks.
+    /// Used by `ScreenShareAudioMixer` to modify the engine graph during mixing.
+    @objc public weak var audioGraphDelegate: AudioGraphConfigurationDelegate?
+
+    /// Cached input context from the last `configureInputFromSource` callback.
+    /// These allow `startMixing` to configure the graph immediately when the
+    /// engine is already running, without waiting for the next callback.
+    @objc public private(set) weak var lastInputSource: AVAudioNode?
+    @objc public private(set) weak var lastInputDestination: AVAudioNode?
+    @objc public private(set) var lastInputFormat: AVAudioFormat?
+
     /// Secondary observer that receives forwarded delegate callbacks.
     /// This allows the AudioDeviceModuleObserver to receive events and forward them to JS.
     private let delegateObserver: RTCAudioDeviceModuleDelegate
@@ -268,6 +279,15 @@ import WebRTC
         source.isVoiceProcessingBypassed = isPreferred
     }
 
+    /// Sets voice processing bypass on the underlying audio device module.
+    /// When bypassed, echo cancellation / AGC / noise suppression are disabled,
+    /// which prevents the system from treating screen share audio as echo.
+    /// - Parameter isBypassed: `true` to bypass voice processing, `false` to restore.
+    @objc public func setVoiceProcessingBypassed(_ isBypassed: Bool) {
+        source.isVoiceProcessingBypassed = isBypassed
+        NSLog("[AudioDeviceModule] setVoiceProcessingBypassed: %@", isBypassed ? "YES" : "NO")
+    }
+
     /// Starts or stops speaker playout on the ADM, retrying transient failures.
     /// - Parameter isActive: `true` to start playout, `false` to stop.
     /// - Throws: `AudioDeviceError` when WebRTC returns a non-zero status.
@@ -445,6 +465,8 @@ import WebRTC
         isPlayoutEnabled: Bool,
         isRecordingEnabled: Bool
     ) -> Int {
+        audioGraphDelegate?.onDidStopEngine?(engine)
+
         subject.send(
             .didStopAudioEngine(
                 engine,
@@ -474,6 +496,8 @@ import WebRTC
         isPlayoutEnabled: Bool,
         isRecordingEnabled: Bool
     ) -> Int {
+        audioGraphDelegate?.onDidDisableEngine?(engine)
+
         subject.send(
             .didDisableAudioEngine(
                 engine,
@@ -500,7 +524,14 @@ import WebRTC
         _ audioDeviceModule: RTCAudioDeviceModule,
         willReleaseEngine engine: AVAudioEngine
     ) -> Int {
+        // Notify delegate BEFORE clearing cached context so it can
+        // tear down its graph while references are still valid.
+        audioGraphDelegate?.onWillReleaseEngine?(engine)
+
         self.engine = nil
+        lastInputSource = nil
+        lastInputDestination = nil
+        lastInputFormat = nil
         subject.send(.willReleaseAudioEngine(engine))
         audioLevelsAdapter.uninstall(on: 0)
         
@@ -520,6 +551,11 @@ import WebRTC
         format: AVAudioFormat,
         context: [AnyHashable: Any]
     ) -> Int {
+        // Cache the input context for on-demand use by ScreenShareAudioMixer.
+        lastInputSource = source
+        lastInputDestination = destination
+        lastInputFormat = format
+
         subject.send(
             .configureInputFromSource(
                 engine,
@@ -528,6 +564,16 @@ import WebRTC
                 format: format
             )
         )
+
+        // Notify the audio graph delegate synchronously — this must happen
+        // BEFORE the audio levels tap so the mixer can modify the graph first.
+        audioGraphDelegate?.onConfigureInputFromSource(
+            engine,
+            source: source,
+            destination: destination,
+            format: format
+        )
+
         audioLevelsAdapter.installInputTap(
             on: destination,
             format: format,
diff --git a/ios/RCTWebRTC/Utils/AudioDeviceModule/AudioGraphConfigurationDelegate.swift b/ios/RCTWebRTC/Utils/AudioDeviceModule/AudioGraphConfigurationDelegate.swift
new file mode 100644
index 000000000..3d5a372e1
--- /dev/null
+++ b/ios/RCTWebRTC/Utils/AudioDeviceModule/AudioGraphConfigurationDelegate.swift
@@ -0,0 +1,38 @@
+//
+// Copyright © 2026 Stream.io Inc. All rights reserved.
+//
+
+import AVFoundation
+
+/// Protocol that allows external code to hook into AVAudioEngine lifecycle
+/// events synchronously. Callbacks fire on WebRTC's audio thread.
+///
+/// Implementations must perform any AVAudioEngine graph modifications
+/// synchronously within the callback — async dispatch will race with
+/// WebRTC's `ConfigureVoiceProcessingNode`.
+@objc public protocol AudioGraphConfigurationDelegate: AnyObject {
+
+    /// Called when WebRTC (re)configures the engine's input graph.
+    /// This fires during engine setup, **before** `willStartEngine`.
+    ///
+    /// - Parameters:
+    ///   - engine: The current `AVAudioEngine` instance.
+    ///   - source: The upstream node (VP input), or `nil` when voice processing is disabled.
+    ///   - destination: The node that receives the input stream (WebRTC capture mixer).
+    ///   - format: The expected audio format for the input path.
+    func onConfigureInputFromSource(
+        _ engine: AVAudioEngine,
+        source: AVAudioNode?,
+        destination: AVAudioNode,
+        format: AVAudioFormat
+    )
+
+    /// Called when the engine is about to be released/deallocated.
+    @objc optional func onWillReleaseEngine(_ engine: AVAudioEngine)
+
+    /// Called after the engine has fully stopped.
+    @objc optional func onDidStopEngine(_ engine: AVAudioEngine)
+
+    /// Called after the engine has been disabled.
+    @objc optional func onDidDisableEngine(_ engine: AVAudioEngine)
+}
diff --git a/ios/RCTWebRTC/Utils/AudioDeviceModule/ScreenShareAudioConverter.swift b/ios/RCTWebRTC/Utils/AudioDeviceModule/ScreenShareAudioConverter.swift
new file mode 100644
index 000000000..45144553e
--- /dev/null
+++ b/ios/RCTWebRTC/Utils/AudioDeviceModule/ScreenShareAudioConverter.swift
@@ -0,0 +1,208 @@
+//
+// Copyright © 2026 Stream.io Inc. All rights reserved.
+//
+
+import Accelerate
+import AVFoundation
+import CoreMedia
+
+/// Converts RPScreenRecorder `.audioApp` CMSampleBuffers into
+/// `AVAudioPCMBuffer`s suitable for scheduling on an `AVAudioPlayerNode`.
+///
+/// Handles:
+/// - CMSampleBuffer → AVAudioPCMBuffer extraction (float32, int16, interleaved, non-interleaved)
+/// - Sample rate / channel / format conversion via cached AVAudioConverter
+/// - Silence detection via vDSP RMS analysis
+final class ScreenShareAudioConverter {
+
+    // MARK: - Constants
+
+    /// Buffers with RMS below this threshold (in dB) are considered silent.
+    private static let silenceThresholdDB: Float = -60.0
+
+    // MARK: - Cached converter
+
+    private var converter: AVAudioConverter?
+    private var converterInputFormat: AVAudioFormat?
+    private var converterOutputFormat: AVAudioFormat?
+
+    // MARK: - CMSampleBuffer → AVAudioPCMBuffer
+
+    /// Extracts audio data from a `CMSampleBuffer` into an `AVAudioPCMBuffer`.
+    ///
+    /// Supports float32 and int16 PCM formats, both interleaved and
+    /// non-interleaved layouts.
+    func pcmBuffer(from sampleBuffer: CMSampleBuffer) -> AVAudioPCMBuffer? {
+        guard let formatDescription = CMSampleBufferGetFormatDescription(sampleBuffer) else {
+            NSLog("[ScreenShareAudio] Converter: no format description in CMSampleBuffer")
+            return nil
+        }
+
+        guard let asbdPtr = CMAudioFormatDescriptionGetStreamBasicDescription(formatDescription) else {
+            NSLog("[ScreenShareAudio] Converter: no ASBD in format description")
+            return nil
+        }
+
+        guard let avFormat = AVAudioFormat(streamDescription: asbdPtr) else {
+            NSLog("[ScreenShareAudio] Converter: failed to create AVAudioFormat from ASBD")
+            return nil
+        }
+
+        let frameCount = CMSampleBufferGetNumSamples(sampleBuffer)
+        guard frameCount > 0,
+              let pcmBuffer = AVAudioPCMBuffer(pcmFormat: avFormat, frameCapacity: AVAudioFrameCount(frameCount)) else {
+            return nil
+        }
+
+        pcmBuffer.frameLength = AVAudioFrameCount(frameCount)
+
+        guard let blockBuffer = CMSampleBufferGetDataBuffer(sampleBuffer) else {
+            return nil
+        }
+
+        var lengthAtOffset: Int = 0
+        var totalLength: Int = 0
+        var dataPointer: UnsafeMutablePointer<Int8>?
+
+        let status = CMBlockBufferGetDataPointer(
+            blockBuffer,
+            atOffset: 0,
+            lengthAtOffsetOut: &lengthAtOffset,
+            totalLengthOut: &totalLength,
+            dataPointerOut: &dataPointer
+        )
+        guard status == kCMBlockBufferNoErr, let dataPointer = dataPointer else {
+            return nil
+        }
+
+        // Copy audio data into PCM buffer
+        if let floatData = pcmBuffer.floatChannelData {
+            let channelCount = Int(avFormat.channelCount)
+            let bytesPerFrame = Int(avFormat.streamDescription.pointee.mBytesPerFrame)
+
+            if avFormat.isInterleaved {
+                // Interleaved: single buffer, copy all at once
+                memcpy(floatData[0], dataPointer, min(totalLength, Int(frameCount) * bytesPerFrame))
+            } else {
+                // Non-interleaved: separate buffers per channel
+                let framesSize = Int(frameCount) * MemoryLayout<Float>.size
+                for ch in 0..<channelCount {
+                    memcpy(floatData[ch], dataPointer.advanced(by: ch * framesSize), framesSize)
+                }
+            }
+        } else if let int16Data = pcmBuffer.int16ChannelData {
+            let bytesPerFrame = Int(avFormat.streamDescription.pointee.mBytesPerFrame)
+            memcpy(int16Data[0], dataPointer, min(totalLength, Int(frameCount) * bytesPerFrame))
+        } else {
+            NSLog("[ScreenShareAudio] Converter: unsupported PCM format (no float or int16 channel data)")
+            return nil
+        }
+
+        return pcmBuffer
+    }
+
+    // MARK: - Format conversion
+
+    /// Converts `inputBuffer` to `outputFormat` if the formats differ.
+    /// Returns the input buffer unchanged when formats already match.
+    ///
+    /// Uses mastering-quality sample rate conversion, matching the Swift SDK's
+    /// `AudioConverter` implementation.
+    func convertIfRequired(
+        _ inputBuffer: AVAudioPCMBuffer,
+        to outputFormat: AVAudioFormat
+    ) -> AVAudioPCMBuffer? {
+        // Identity optimization: skip conversion when formats match
+        if formatsMatch(inputBuffer.format, outputFormat) {
+            return inputBuffer
+        }
+
+        // Create or reuse converter for current format pair
+        if converter == nil
+            || !formatsMatch(converterInputFormat, inputBuffer.format)
+            || !formatsMatch(converterOutputFormat, outputFormat) {
+            converter = AVAudioConverter(from: inputBuffer.format, to: outputFormat)
+            converter?.sampleRateConverterQuality = AVAudioQuality.max.rawValue
+            converter?.sampleRateConverterAlgorithm = AVSampleRateConverterAlgorithm_Mastering
+            converterInputFormat = inputBuffer.format
+            converterOutputFormat = outputFormat
+        }
+
+        guard let converter = converter else {
+            NSLog("[ScreenShareAudio] Converter: AVAudioConverter creation failed")
+            return nil
+        }
+
+        // Calculate output frame capacity from sample rate ratio
+        let ratio = outputFormat.sampleRate / inputBuffer.format.sampleRate
+        let outputFrameCapacity = AVAudioFrameCount(ceil(Double(inputBuffer.frameLength) * ratio))
+
+        guard let outputBuffer = AVAudioPCMBuffer(pcmFormat: outputFormat, frameCapacity: outputFrameCapacity) else {
+            return nil
+        }
+
+        var error: NSError?
+        var hasData = true
+        converter.convert(to: outputBuffer, error: &error) { _, outStatus in
+            if hasData {
+                outStatus.pointee = .haveData
+                hasData = false
+                return inputBuffer
+            } else {
+                outStatus.pointee = .noDataNow
+                return nil
+            }
+        }
+
+        if let error = error {
+            NSLog("[ScreenShareAudio] Converter: conversion error: \(error.localizedDescription)")
+            return nil
+        }
+
+        return outputBuffer
+    }
+
+    // MARK: - Silence detection
+
+    /// Returns `true` if the buffer is silent (RMS below -60 dB).
+    ///
+    /// For non-float formats (e.g., int16 from RPScreenRecorder), this returns
+    /// `false` — silence detection requires float data for vDSP, and these
+    /// buffers will be converted before scheduling anyway.
+    static func isSilent(_ buffer: AVAudioPCMBuffer) -> Bool {
+        guard let channelData = buffer.floatChannelData else {
+            return false
+        }
+
+        let frameCount = vDSP_Length(buffer.frameLength)
+        guard frameCount > 0 else {
+            return true
+        }
+
+        var rms: Float = 0
+        vDSP_rmsqv(channelData[0], 1, &rms, frameCount)
+
+        let rmsDB = 20 * log10(max(rms, Float.ulpOfOne))
+        return rmsDB <= silenceThresholdDB
+    }
+
+    // MARK: - Cleanup
+
+    func reset() {
+        converter = nil
+        converterInputFormat = nil
+        converterOutputFormat = nil
+    }
+
+    // MARK: - Private
+
+    /// Compares two formats by sample rate, channel count, common format,
+    /// and interleaving — matching the Swift SDK's `AVAudioFormat+Equality`.
+    private func formatsMatch(_ lhs: AVAudioFormat?, _ rhs: AVAudioFormat?) -> Bool {
+        guard let lhs = lhs, let rhs = rhs else { return false }
+        return lhs.sampleRate == rhs.sampleRate
+            && lhs.channelCount == rhs.channelCount
+            && lhs.commonFormat == rhs.commonFormat
+            && lhs.isInterleaved == rhs.isInterleaved
+    }
+}
diff --git a/ios/RCTWebRTC/Utils/AudioDeviceModule/ScreenShareAudioMixer.swift b/ios/RCTWebRTC/Utils/AudioDeviceModule/ScreenShareAudioMixer.swift
new file mode 100644
index 000000000..3c9871f72
--- /dev/null
+++ b/ios/RCTWebRTC/Utils/AudioDeviceModule/ScreenShareAudioMixer.swift
@@ -0,0 +1,285 @@
+//
+// Copyright © 2026 Stream.io Inc. All rights reserved.
+//
+
+import AVFoundation
+import CoreMedia
+import WebRTC
+
+/// Mixes screen share audio (from RPScreenRecorder `.audioApp` buffers) into the
+/// WebRTC microphone capture stream using `RTCAudioCustomProcessingDelegate`.
+///
+/// Screen audio samples are written into a ring buffer. WebRTC's audio processing
+/// pipeline calls `audioProcessingProcess(_:)` on its own thread; this method reads
+/// from the ring buffer and additively mixes the screen audio into the mic samples.
+@objc public final class ScreenShareAudioMixer: NSObject, RTCAudioCustomProcessingDelegate {
+
+    // MARK: - Ring buffer
+
+    private var ringBuffer: [Float]
+    private var writeIndex: Int = 0
+    private var readIndex: Int = 0
+    private let ringCapacity: Int
+    private let lock = NSLock()
+
+    // MARK: - Audio conversion
+
+    private let audioConverter = ScreenShareAudioConverter()
+
+    // MARK: - State
+
+    private var isMixing = false
+    private var processingFormat: AVAudioFormat?
+
+    // MARK: - Diagnostics
+
+    private var processCallCount: Int = 0
+    private var processWithDataCount: Int = 0
+    private var enqueueCallCount: Int = 0
+    private var enqueueWrittenCount: Int = 0
+    private var enqueueSilenceCount: Int = 0
+    private var enqueuePcmFailCount: Int = 0
+    private var enqueueConvFailCount: Int = 0
+    private var enqueueNoFormatCount: Int = 0
+    private var formatLogged = false
+
+    // MARK: - Init
+
+    @objc public override init() {
+        // 1 second at 48 kHz — enough to absorb jitter between
+        // RPScreenRecorder delivery and WebRTC processing cadence.
+        ringCapacity = 48000
+        ringBuffer = [Float](repeating: 0, count: ringCapacity)
+        super.init()
+        NSLog("[ScreenShareAudio] Mixer instance created")
+    }
+
+    deinit {
+        NSLog("[ScreenShareAudio] Mixer instance deallocated!")
+    }
+
+    // MARK: - RTCAudioCustomProcessingDelegate
+
+    public func audioProcessingInitialize(sampleRate sampleRateHz: Int, channels: Int) {
+        lock.lock()
+        defer { lock.unlock() }
+        processingFormat = AVAudioFormat(
+            standardFormatWithSampleRate: Double(sampleRateHz),
+            channels: AVAudioChannelCount(channels)
+        )
+        writeIndex = 0
+        readIndex = 0
+        NSLog("[ScreenShareAudio] audioProcessingInitialize: %dHz, %dch", sampleRateHz, channels)
+    }
+
+    public func audioProcessingProcess(audioBuffer: RTCAudioBuffer) {
+        guard isMixing else { return }
+        lock.lock()
+        defer { lock.unlock() }
+
+        processCallCount += 1
+
+        let frames = audioBuffer.frames
+        let channelBuffer = audioBuffer.rawBuffer(forChannel: 0)
+
+        // Mix ring buffer data into the mic capture if available
+        let available = writeIndex - readIndex
+        if available > 0 {
+            let framesToRead = min(frames, available)
+            for i in 0..<framesToRead {
+                channelBuffer[i] += ringBuffer[(readIndex + i) % ringCapacity]
+            }
+            readIndex += framesToRead
+            processWithDataCount += 1
+        }
+
+        // Periodic stats (every ~1s at 10ms cadence = 100 calls)
+        if processCallCount % 100 == 0 {
+            // Sample ring buffer amplitude at current read position
+            var ringPeak: Float = 0
+            let ringAvail = writeIndex - readIndex
+            let samplesToCheck = min(ringAvail, 480)
+            for i in 0..<samplesToCheck {
+                ringPeak = max(ringPeak, abs(ringBuffer[(readIndex + i) % ringCapacity]))
+            }
+            NSLog("[ScreenShareAudio] PROCESS stats: calls=%d, withData=%d, ringAvail=%d, ringPeak=%g, enqueued=%d, written=%d",
+                  processCallCount, processWithDataCount, ringAvail, ringPeak,
+                  enqueueCallCount, enqueueWrittenCount)
+        }
+    }
+
+    public func audioProcessingRelease() {
+        lock.lock()
+        defer { lock.unlock() }
+        writeIndex = 0
+        readIndex = 0
+        processingFormat = nil
+        NSLog("[ScreenShareAudio] audioProcessingRelease")
+    }
+
+    // MARK: - Public API
+
+    /// Enable audio buffer processing. Call when screen share with audio starts.
+    @objc public func startMixing() {
+        lock.lock()
+        defer { lock.unlock() }
+
+        guard !isMixing else {
+            NSLog("[ScreenShareAudio] startMixing called but already mixing")
+            return
+        }
+        isMixing = true
+        writeIndex = 0
+        readIndex = 0
+
+        // Reset diagnostic counters
+        processCallCount = 0
+        processWithDataCount = 0
+        enqueueCallCount = 0
+        enqueueWrittenCount = 0
+        enqueueSilenceCount = 0
+        enqueuePcmFailCount = 0
+        enqueueConvFailCount = 0
+        enqueueNoFormatCount = 0
+        formatLogged = false
+
+        NSLog("[ScreenShareAudio] startMixing (processingFormat=%@)",
+              processingFormat != nil ? "\(processingFormat!.sampleRate)Hz/\(processingFormat!.channelCount)ch" : "nil")
+    }
+
+    /// Stop processing audio buffers.
+    @objc public func stopMixing() {
+        lock.lock()
+        defer { lock.unlock() }
+
+        guard isMixing else {
+            NSLog("[ScreenShareAudio] stopMixing called but not mixing")
+            return
+        }
+        isMixing = false
+
+        NSLog("[ScreenShareAudio] stopMixing — FINAL STATS: process=%d (withData=%d), enqueue=%d (written=%d, silence=%d, pcmFail=%d, convFail=%d, noFmt=%d)",
+              processCallCount, processWithDataCount,
+              enqueueCallCount, enqueueWrittenCount, enqueueSilenceCount,
+              enqueuePcmFailCount, enqueueConvFailCount, enqueueNoFormatCount)
+
+        writeIndex = 0
+        readIndex = 0
+        audioConverter.reset()
+    }
+
+    /// Receive a screen audio CMSampleBuffer from InAppScreenCapturer.
+    @objc public func enqueue(_ sampleBuffer: CMSampleBuffer) {
+        guard isMixing else { return }
+
+        guard let targetFormat = processingFormat else {
+            enqueueNoFormatCount += 1
+            if enqueueNoFormatCount <= 5 {
+                NSLog("[ScreenShareAudio] ENQUEUE: no processingFormat yet (count=%d)", enqueueNoFormatCount)
+            }
+            return
+        }
+
+        enqueueCallCount += 1
+
+        // 1. CMSampleBuffer → AVAudioPCMBuffer
+        guard let pcm = audioConverter.pcmBuffer(from: sampleBuffer) else {
+            enqueuePcmFailCount += 1
+            if enqueuePcmFailCount <= 5 {
+                NSLog("[ScreenShareAudio] ENQUEUE: pcmBuffer extraction failed (count=%d)", enqueuePcmFailCount)
+            }
+            return
+        }
+
+        // One-time format logging with full ASBD details
+        if !formatLogged {
+            formatLogged = true
+            let srcFmt = pcm.format
+            let asbd = srcFmt.streamDescription.pointee
+            NSLog("[ScreenShareAudio] ENQUEUE FORMAT: screen=%gHz/%dch/fmt%d/interleaved=%d → target=%gHz/%dch",
+                  srcFmt.sampleRate, srcFmt.channelCount, srcFmt.commonFormat.rawValue,
+                  srcFmt.isInterleaved ? 1 : 0,
+                  targetFormat.sampleRate, targetFormat.channelCount)
+            NSLog("[ScreenShareAudio] ASBD: bitsPerCh=%d, bytesPerFrame=%d, bytesPerPacket=%d, formatFlags=0x%X, formatID=%d",
+                  asbd.mBitsPerChannel, asbd.mBytesPerFrame, asbd.mBytesPerPacket,
+                  asbd.mFormatFlags, asbd.mFormatID)
+            // Check raw PCM amplitude
+            var rawPeak: Float = 0
+            if let floatCh = pcm.floatChannelData {
+                for i in 0..<min(Int(pcm.frameLength), 1024) {
+                    rawPeak = max(rawPeak, abs(floatCh[0][i]))
+                }
+                NSLog("[ScreenShareAudio] RAW PCM peak (float ch0): %g", rawPeak)
+            } else if let int16Ch = pcm.int16ChannelData {
+                var int16Peak: Int16 = 0
+                for i in 0..<min(Int(pcm.frameLength), 1024) {
+                    int16Peak = max(int16Peak, abs(int16Ch[0][i]))
+                }
+                NSLog("[ScreenShareAudio] RAW PCM peak (int16 ch0): %d", int16Peak)
+            } else {
+                NSLog("[ScreenShareAudio] RAW PCM: NO float or int16 channel data! commonFormat=%d", srcFmt.commonFormat.rawValue)
+            }
+        }
+
+        // 2. Silence detection
+        if ScreenShareAudioConverter.isSilent(pcm) {
+            enqueueSilenceCount += 1
+            return
+        }
+
+        // 3. Convert to processing format (e.g. 48 kHz / 1 ch / float32)
+        let buffer: AVAudioPCMBuffer
+        if pcm.format.sampleRate != targetFormat.sampleRate
+            || pcm.format.channelCount != targetFormat.channelCount
+            || pcm.format.commonFormat != targetFormat.commonFormat
+            || pcm.format.isInterleaved != targetFormat.isInterleaved {
+            guard let converted = audioConverter.convertIfRequired(pcm, to: targetFormat) else {
+                enqueueConvFailCount += 1
+                if enqueueConvFailCount <= 5 {
+                    NSLog("[ScreenShareAudio] ENQUEUE: conversion failed (count=%d)", enqueueConvFailCount)
+                }
+                return
+            }
+            buffer = converted
+        } else {
+            buffer = pcm
+        }
+
+        // 4. Write to ring buffer
+        guard let floatData = buffer.floatChannelData else {
+            NSLog("[ScreenShareAudio] ENQUEUE: no floatChannelData after conversion!")
+            return
+        }
+        let frames = Int(buffer.frameLength)
+
+        // Periodic amplitude check on converted buffer (every 50th write)
+        if enqueueWrittenCount % 50 == 0 {
+            var peak: Float = 0
+            for i in 0..<min(frames, 1024) {
+                peak = max(peak, abs(floatData[0][i]))
+            }
+            NSLog("[ScreenShareAudio] CONVERTED peak amplitude: %g (frames=%d)", peak, frames)
+        }
+
+        lock.lock()
+        defer { lock.unlock() }
+
+        // Handle overflow: if ring is too full, advance read index
+        let available = writeIndex - readIndex
+        if available + frames > ringCapacity {
+            readIndex = writeIndex + frames - ringCapacity
+        }
+
+        for i in 0..<frames {
+            ringBuffer[(writeIndex + i) % ringCapacity] = floatData[0][i]
+        }
+        writeIndex += frames
+        enqueueWrittenCount += 1
+
+        // Periodic enqueue stats (every 50 ≈ ~1s)
+        if enqueueWrittenCount % 50 == 0 {
+            NSLog("[ScreenShareAudio] ENQUEUE stats: calls=%d, written=%d, frames=%d, ringAvail=%d, silence=%d",
+                  enqueueCallCount, enqueueWrittenCount, frames, writeIndex - readIndex, enqueueSilenceCount)
+        }
+    }
+}
diff --git a/ios/RCTWebRTC/WebRTCModule+RTCMediaStream.m b/ios/RCTWebRTC/WebRTCModule+RTCMediaStream.m
index a6b976f16..92edc3eef 100644
--- a/ios/RCTWebRTC/WebRTCModule+RTCMediaStream.m
+++ b/ios/RCTWebRTC/WebRTCModule+RTCMediaStream.m
@@ -18,6 +18,13 @@
 #import "ScreenCaptureController.h"
 #import "ScreenCapturer.h"
 #import "TrackCapturerEventsEmitter.h"
+
+// Import Swift-generated header for ScreenShareAudioMixer
+#if __has_include(<stream_react_native_webrtc/stream_react_native_webrtc-Swift.h>)
+#import <stream_react_native_webrtc/stream_react_native_webrtc-Swift.h>
+#elif __has_include("stream_react_native_webrtc-Swift.h")
+#import "stream_react_native_webrtc-Swift.h"
+#endif
 #import "VideoCaptureController.h"
 
 @implementation WebRTCModule (RTCMediaStream)
@@ -213,6 +220,23 @@ - (RTCVideoTrack *)createScreenCaptureVideoTrack {
 
         InAppScreenCapturer *capturer = [[InAppScreenCapturer alloc] initWithDelegate:videoSource];
         InAppScreenCaptureController *controller = [[InAppScreenCaptureController alloc] initWithCapturer:capturer];
+
+        // Store weak reference for audio mixing wiring
+        options.activeInAppScreenCapturer = capturer;
+
+        // If audio mixing is requested, set up the audio buffer handler.
+        // The handler forwards .audioApp CMSampleBuffers to the mixer's enqueue method.
+        // The mixer may not exist yet (created by startScreenShareAudioMixing),
+        // so we check at each callback invocation.
+        if (options.includeScreenShareAudio) {
+            capturer.audioBufferHandler = ^(CMSampleBufferRef sampleBuffer) {
+                ScreenShareAudioMixer *mixer = [WebRTCModuleOptions sharedInstance].screenShareAudioMixer;
+                if (mixer) {
+                    [mixer enqueue:sampleBuffer];
+                }
+            };
+        }
+
         captureController = controller;
     } else {
         // Existing broadcast extension path
diff --git a/ios/RCTWebRTC/WebRTCModule.m b/ios/RCTWebRTC/WebRTCModule.m
index da9a335a2..25e1b5120 100644
--- a/ios/RCTWebRTC/WebRTCModule.m
+++ b/ios/RCTWebRTC/WebRTCModule.m
@@ -91,6 +91,13 @@ - (instancetype)init {
                 NSLog(@"Both audioProcessingModule and audioDevice are provided, but only one can be used. Ignoring audioDevice.");
             }
             RCTLogInfo(@"Using audio processing module: %@", NSStringFromClass([audioProcessingModule class]));
+
+            // Store reference to the default APM if it is one, so we can set
+            // capturePostProcessingDelegate later for screen share audio mixing.
+            if ([audioProcessingModule isKindOfClass:[RTCDefaultAudioProcessingModule class]]) {
+                options.defaultAudioProcessingModule = (RTCDefaultAudioProcessingModule *)audioProcessingModule;
+            }
+
             _peerConnectionFactory =
                 [[RTCPeerConnectionFactory alloc] initWithAudioDeviceModuleType:RTCAudioDeviceModuleTypeAudioEngine
                                                           bypassVoiceProcessing:NO
@@ -103,12 +110,24 @@ - (instancetype)init {
                                                                                decoderFactory:decoderFactory
                                                                                   audioDevice:audioDevice];
         } else {
+            // No custom APM provided — create a mixer eagerly and set it as
+            // capturePostProcessingDelegate at APM creation time (not runtime).
+            // The mixer stays dormant (isMixing=false) until startMixing is called.
+            ScreenShareAudioMixer *mixer = [[ScreenShareAudioMixer alloc] init];
+            options.screenShareAudioMixer = mixer;
+
+            RTCDefaultAudioProcessingModule *defaultAPM = [[RTCDefaultAudioProcessingModule alloc]
+                initWithConfig:nil
+                capturePostProcessingDelegate:mixer
+                renderPreProcessingDelegate:nil];
+            options.defaultAudioProcessingModule = defaultAPM;
+
             _peerConnectionFactory =
                 [[RTCPeerConnectionFactory alloc] initWithAudioDeviceModuleType:RTCAudioDeviceModuleTypeAudioEngine
                                                           bypassVoiceProcessing:NO
                                                                  encoderFactory:encoderFactory
                                                                  decoderFactory:decoderFactory
-                                                          audioProcessingModule:nil];
+                                                          audioProcessingModule:defaultAPM];
         }
         
         _rtcAudioDeviceModuleObserver = [[AudioDeviceModuleObserver alloc] initWithWebRTCModule:self];
diff --git a/ios/RCTWebRTC/WebRTCModuleOptions.h b/ios/RCTWebRTC/WebRTCModuleOptions.h
index 100995f9d..c964df4ce 100644
--- a/ios/RCTWebRTC/WebRTCModuleOptions.h
+++ b/ios/RCTWebRTC/WebRTCModuleOptions.h
@@ -1,14 +1,25 @@
 #import <Foundation/Foundation.h>
 #import <WebRTC/WebRTC.h>
 
+@class InAppScreenCapturer;
+@class RTCDefaultAudioProcessingModule;
+
 NS_ASSUME_NONNULL_BEGIN
 
+// Forward declare the Swift class — the actual import happens in the .m file.
+@class ScreenShareAudioMixer;
+
 @interface WebRTCModuleOptions : NSObject
 
 @property(nonatomic, strong, nullable) id<RTCVideoDecoderFactory> videoDecoderFactory;
 @property(nonatomic, strong, nullable) id<RTCVideoEncoderFactory> videoEncoderFactory;
 @property(nonatomic, strong, nullable) id<RTCAudioDevice> audioDevice;
 @property(nonatomic, strong, nullable) id<RTCAudioProcessingModule> audioProcessingModule;
+
+/// Retained reference to the default audio processing module.
+/// Used to dynamically set capturePostProcessingDelegate for screen share audio mixing.
+@property(nonatomic, strong, nullable) RTCDefaultAudioProcessingModule *defaultAudioProcessingModule;
+
 @property(nonatomic, strong, nullable) NSDictionary *fieldTrials;
 @property(nonatomic, assign) RTCLoggingSeverity loggingSeverity;
 @property(nonatomic, assign) BOOL enableMultitaskingCameraAccess;
@@ -20,6 +31,14 @@ NS_ASSUME_NONNULL_BEGIN
 /// When YES, in-app screen capture will route .audioApp buffers to the audio mixer.
 @property(nonatomic, assign) BOOL includeScreenShareAudio;
 
+/// The active screen share audio mixer instance. Created by
+/// `startScreenShareAudioMixing` and cleared by `stopScreenShareAudioMixing`.
+@property(nonatomic, strong, nullable) ScreenShareAudioMixer *screenShareAudioMixer;
+
+/// Weak reference to the current in-app screen capturer, set during
+/// `createScreenCaptureVideoTrack` when in-app mode is used.
+@property(nonatomic, weak, nullable) InAppScreenCapturer *activeInAppScreenCapturer;
+
 #pragma mark - This class is a singleton
 
 + (instancetype _Nonnull)sharedInstance;
diff --git a/ios/RCTWebRTC/WebRTCModuleOptions.m b/ios/RCTWebRTC/WebRTCModuleOptions.m
index ba108da6e..f29ae67f9 100644
--- a/ios/RCTWebRTC/WebRTCModuleOptions.m
+++ b/ios/RCTWebRTC/WebRTCModuleOptions.m
@@ -1,5 +1,12 @@
 #import "WebRTCModuleOptions.h"
 
+// Import Swift-generated header for ScreenShareAudioMixer
+#if __has_include(<stream_react_native_webrtc/stream_react_native_webrtc-Swift.h>)
+#import <stream_react_native_webrtc/stream_react_native_webrtc-Swift.h>
+#elif __has_include("stream_react_native_webrtc-Swift.h")
+#import "stream_react_native_webrtc-Swift.h"
+#endif
+
 @implementation WebRTCModuleOptions
 
 #pragma mark - This class is a singleton

From f64916ffc3ee8695551dbddf356738c82d22bca4 Mon Sep 17 00:00:00 2001
From: Artem Grintsevich <greenfrvr@gmail.com>
Date: Tue, 10 Mar 2026 12:24:48 +0100
Subject: [PATCH 05/14] chore: adjusted android mix summing

---
 .../com/oney/WebRTCModule/WebRTCModule.java   | 26 ++++++++++++++-----
 1 file changed, 20 insertions(+), 6 deletions(-)

diff --git a/android/src/main/java/com/oney/WebRTCModule/WebRTCModule.java b/android/src/main/java/com/oney/WebRTCModule/WebRTCModule.java
index dd0a29e27..831232cef 100644
--- a/android/src/main/java/com/oney/WebRTCModule/WebRTCModule.java
+++ b/android/src/main/java/com/oney/WebRTCModule/WebRTCModule.java
@@ -146,7 +146,10 @@ private JavaAudioDeviceModule createAudioDeviceModule(ReactApplicationContext re
     }
 
     /**
-     * Mixes screen audio into the microphone buffer using PCM additive mixing with clamping.
+     * Mixes screen audio into the microphone buffer using PCM 16-bit additive mixing
+     * with clamping. Handles different buffer sizes safely: each buffer is read only
+     * within its own bounds. When one buffer is shorter, the other's samples pass
+     * through unmodified (mic samples stay as-is, or screen-only samples are written).
      */
     private static void mixScreenAudioIntoBuffer(java.nio.ByteBuffer micBuffer,
                                                   java.nio.ByteBuffer screenBuffer,
@@ -160,11 +163,22 @@ private static void mixScreenAudioIntoBuffer(java.nio.ByteBuffer micBuffer,
         java.nio.ShortBuffer micShorts = micBuffer.asShortBuffer();
         java.nio.ShortBuffer screenShorts = screenBuffer.asShortBuffer();
 
-        int samplesToMix = Math.min(bytesRead / 2,
-                Math.min(micShorts.remaining(), screenShorts.remaining()));
-
-        for (int i = 0; i < samplesToMix; i++) {
-            int sum = micShorts.get(i) + screenShorts.get(i);
+        int micSamples = Math.min(bytesRead / 2, micShorts.remaining());
+        int screenSamples = screenShorts.remaining();
+        int totalSamples = Math.max(micSamples, screenSamples);
+
+        for (int i = 0; i < totalSamples; i++) {
+            int sum;
+            if (i >= micSamples) {
+                // Screen-only: mic buffer is shorter — write screen sample directly
+                sum = screenShorts.get(i);
+            } else if (i >= screenSamples) {
+                // Mic-only: screen buffer is shorter — keep mic sample as-is
+                break;
+            } else {
+                // Both buffers have data — add samples
+                sum = micShorts.get(i) + screenShorts.get(i);
+            }
             if (sum > Short.MAX_VALUE) sum = Short.MAX_VALUE;
             if (sum < Short.MIN_VALUE) sum = Short.MIN_VALUE;
             micShorts.put(i, (short) sum);

From e34e2578bf43ca1aace9780a9a32a52f6b954d7b Mon Sep 17 00:00:00 2001
From: Artem Grintsevich <greenfrvr@gmail.com>
Date: Tue, 10 Mar 2026 16:38:36 +0100
Subject: [PATCH 06/14] chore: ios moved to mixer node implementation

---
 .../ScreenShareAudioMixer.swift               | 269 ++++++++----------
 ios/RCTWebRTC/WebRTCModule.m                  |  28 +-
 2 files changed, 134 insertions(+), 163 deletions(-)

diff --git a/ios/RCTWebRTC/Utils/AudioDeviceModule/ScreenShareAudioMixer.swift b/ios/RCTWebRTC/Utils/AudioDeviceModule/ScreenShareAudioMixer.swift
index 3c9871f72..5af2dc686 100644
--- a/ios/RCTWebRTC/Utils/AudioDeviceModule/ScreenShareAudioMixer.swift
+++ b/ios/RCTWebRTC/Utils/AudioDeviceModule/ScreenShareAudioMixer.swift
@@ -7,20 +7,24 @@ import CoreMedia
 import WebRTC
 
 /// Mixes screen share audio (from RPScreenRecorder `.audioApp` buffers) into the
-/// WebRTC microphone capture stream using `RTCAudioCustomProcessingDelegate`.
+/// WebRTC microphone capture stream by inserting an `AVAudioPlayerNode` and
+/// `AVAudioMixerNode` into the engine's input graph.
 ///
-/// Screen audio samples are written into a ring buffer. WebRTC's audio processing
-/// pipeline calls `audioProcessingProcess(_:)` on its own thread; this method reads
-/// from the ring buffer and additively mixes the screen audio into the mic samples.
-@objc public final class ScreenShareAudioMixer: NSObject, RTCAudioCustomProcessingDelegate {
+/// Graph topology (wired in `onConfigureInputFromSource`):
+/// ```
+/// source (mic VP) --> mixerNode --> destination (WebRTC capture)
+///                        ^
+/// playerNode -----------/
+/// ```
+///
+/// The mixer stays dormant (no nodes attached) until `startMixing` is called.
+/// Screen audio buffers are scheduled on the player node via `enqueue(_:)`.
+@objc public final class ScreenShareAudioMixer: NSObject, AudioGraphConfigurationDelegate {
 
-    // MARK: - Ring buffer
+    // MARK: - Audio graph nodes
 
-    private var ringBuffer: [Float]
-    private var writeIndex: Int = 0
-    private var readIndex: Int = 0
-    private let ringCapacity: Int
-    private let lock = NSLock()
+    private let playerNode = AVAudioPlayerNode()
+    private let mixerNode = AVAudioMixerNode()
 
     // MARK: - Audio conversion
 
@@ -29,142 +33,113 @@ import WebRTC
     // MARK: - State
 
     private var isMixing = false
-    private var processingFormat: AVAudioFormat?
+
+    /// The engine reference from the last `onConfigureInputFromSource` call.
+    /// Used to detach nodes on cleanup.
+    private weak var currentEngine: AVAudioEngine?
+
+    /// Format of the input graph path, used for converting screen audio.
+    private var graphFormat: AVAudioFormat?
+
+    /// Whether our nodes are currently attached to the engine.
+    private var nodesAttached = false
 
     // MARK: - Diagnostics
 
-    private var processCallCount: Int = 0
-    private var processWithDataCount: Int = 0
     private var enqueueCallCount: Int = 0
-    private var enqueueWrittenCount: Int = 0
+    private var enqueueScheduledCount: Int = 0
     private var enqueueSilenceCount: Int = 0
     private var enqueuePcmFailCount: Int = 0
     private var enqueueConvFailCount: Int = 0
-    private var enqueueNoFormatCount: Int = 0
     private var formatLogged = false
 
     // MARK: - Init
 
     @objc public override init() {
-        // 1 second at 48 kHz — enough to absorb jitter between
-        // RPScreenRecorder delivery and WebRTC processing cadence.
-        ringCapacity = 48000
-        ringBuffer = [Float](repeating: 0, count: ringCapacity)
         super.init()
-        NSLog("[ScreenShareAudio] Mixer instance created")
+        NSLog("[ScreenShareAudio] Mixer instance created (graph approach)")
     }
 
     deinit {
         NSLog("[ScreenShareAudio] Mixer instance deallocated!")
     }
 
-    // MARK: - RTCAudioCustomProcessingDelegate
-
-    public func audioProcessingInitialize(sampleRate sampleRateHz: Int, channels: Int) {
-        lock.lock()
-        defer { lock.unlock() }
-        processingFormat = AVAudioFormat(
-            standardFormatWithSampleRate: Double(sampleRateHz),
-            channels: AVAudioChannelCount(channels)
-        )
-        writeIndex = 0
-        readIndex = 0
-        NSLog("[ScreenShareAudio] audioProcessingInitialize: %dHz, %dch", sampleRateHz, channels)
-    }
+    // MARK: - AudioGraphConfigurationDelegate
 
-    public func audioProcessingProcess(audioBuffer: RTCAudioBuffer) {
-        guard isMixing else { return }
-        lock.lock()
-        defer { lock.unlock() }
+    public func onConfigureInputFromSource(
+        _ engine: AVAudioEngine,
+        source: AVAudioNode?,
+        destination: AVAudioNode,
+        format: AVAudioFormat
+    ) {
+        currentEngine = engine
+        graphFormat = format
 
-        processCallCount += 1
+        guard isMixing else {
+            NSLog("[ScreenShareAudio] onConfigureInputFromSource: not mixing, skipping graph modification")
+            return
+        }
 
-        let frames = audioBuffer.frames
-        let channelBuffer = audioBuffer.rawBuffer(forChannel: 0)
+        attachAndWireNodes(engine: engine, source: source, destination: destination, format: format)
+    }
 
-        // Mix ring buffer data into the mic capture if available
-        let available = writeIndex - readIndex
-        if available > 0 {
-            let framesToRead = min(frames, available)
-            for i in 0..<framesToRead {
-                channelBuffer[i] += ringBuffer[(readIndex + i) % ringCapacity]
-            }
-            readIndex += framesToRead
-            processWithDataCount += 1
-        }
+    public func onDidStopEngine(_ engine: AVAudioEngine) {
+        detachNodes(from: engine)
+    }
 
-        // Periodic stats (every ~1s at 10ms cadence = 100 calls)
-        if processCallCount % 100 == 0 {
-            // Sample ring buffer amplitude at current read position
-            var ringPeak: Float = 0
-            let ringAvail = writeIndex - readIndex
-            let samplesToCheck = min(ringAvail, 480)
-            for i in 0..<samplesToCheck {
-                ringPeak = max(ringPeak, abs(ringBuffer[(readIndex + i) % ringCapacity]))
-            }
-            NSLog("[ScreenShareAudio] PROCESS stats: calls=%d, withData=%d, ringAvail=%d, ringPeak=%g, enqueued=%d, written=%d",
-                  processCallCount, processWithDataCount, ringAvail, ringPeak,
-                  enqueueCallCount, enqueueWrittenCount)
-        }
+    public func onDidDisableEngine(_ engine: AVAudioEngine) {
+        detachNodes(from: engine)
     }
 
-    public func audioProcessingRelease() {
-        lock.lock()
-        defer { lock.unlock() }
-        writeIndex = 0
-        readIndex = 0
-        processingFormat = nil
-        NSLog("[ScreenShareAudio] audioProcessingRelease")
+    public func onWillReleaseEngine(_ engine: AVAudioEngine) {
+        detachNodes(from: engine)
+        currentEngine = nil
+        graphFormat = nil
     }
 
     // MARK: - Public API
 
-    /// Enable audio buffer processing. Call when screen share with audio starts.
+    /// Enable audio mixing. Call when screen share with audio starts.
+    ///
+    /// If the engine is already running (i.e., `onConfigureInputFromSource` has
+    /// already fired), this triggers an ADM reconfiguration so the graph gets
+    /// rewired with our nodes.
     @objc public func startMixing() {
-        lock.lock()
-        defer { lock.unlock() }
-
         guard !isMixing else {
             NSLog("[ScreenShareAudio] startMixing called but already mixing")
             return
         }
         isMixing = true
-        writeIndex = 0
-        readIndex = 0
 
         // Reset diagnostic counters
-        processCallCount = 0
-        processWithDataCount = 0
         enqueueCallCount = 0
-        enqueueWrittenCount = 0
+        enqueueScheduledCount = 0
         enqueueSilenceCount = 0
         enqueuePcmFailCount = 0
         enqueueConvFailCount = 0
-        enqueueNoFormatCount = 0
         formatLogged = false
 
-        NSLog("[ScreenShareAudio] startMixing (processingFormat=%@)",
-              processingFormat != nil ? "\(processingFormat!.sampleRate)Hz/\(processingFormat!.channelCount)ch" : "nil")
+        NSLog("[ScreenShareAudio] startMixing (graphFormat=%@)",
+              graphFormat != nil ? "\(graphFormat!.sampleRate)Hz/\(graphFormat!.channelCount)ch" : "nil")
     }
 
-    /// Stop processing audio buffers.
+    /// Stop audio mixing and detach nodes from the engine.
     @objc public func stopMixing() {
-        lock.lock()
-        defer { lock.unlock() }
-
         guard isMixing else {
             NSLog("[ScreenShareAudio] stopMixing called but not mixing")
             return
         }
         isMixing = false
 
-        NSLog("[ScreenShareAudio] stopMixing — FINAL STATS: process=%d (withData=%d), enqueue=%d (written=%d, silence=%d, pcmFail=%d, convFail=%d, noFmt=%d)",
-              processCallCount, processWithDataCount,
-              enqueueCallCount, enqueueWrittenCount, enqueueSilenceCount,
-              enqueuePcmFailCount, enqueueConvFailCount, enqueueNoFormatCount)
+        NSLog("[ScreenShareAudio] stopMixing — FINAL STATS: enqueue=%d (scheduled=%d, silence=%d, pcmFail=%d, convFail=%d)",
+              enqueueCallCount, enqueueScheduledCount, enqueueSilenceCount,
+              enqueuePcmFailCount, enqueueConvFailCount)
 
-        writeIndex = 0
-        readIndex = 0
+        // Stop player and detach nodes
+        playerNode.stop()
+        if let engine = currentEngine {
+            detachNodes(from: engine)
+        }
         audioConverter.reset()
     }
 
@@ -172,11 +147,7 @@ import WebRTC
     @objc public func enqueue(_ sampleBuffer: CMSampleBuffer) {
         guard isMixing else { return }
 
-        guard let targetFormat = processingFormat else {
-            enqueueNoFormatCount += 1
-            if enqueueNoFormatCount <= 5 {
-                NSLog("[ScreenShareAudio] ENQUEUE: no processingFormat yet (count=%d)", enqueueNoFormatCount)
-            }
+        guard let targetFormat = graphFormat else {
             return
         }
 
@@ -191,34 +162,13 @@ import WebRTC
             return
         }
 
-        // One-time format logging with full ASBD details
+        // One-time format logging
         if !formatLogged {
             formatLogged = true
             let srcFmt = pcm.format
-            let asbd = srcFmt.streamDescription.pointee
-            NSLog("[ScreenShareAudio] ENQUEUE FORMAT: screen=%gHz/%dch/fmt%d/interleaved=%d → target=%gHz/%dch",
-                  srcFmt.sampleRate, srcFmt.channelCount, srcFmt.commonFormat.rawValue,
-                  srcFmt.isInterleaved ? 1 : 0,
+            NSLog("[ScreenShareAudio] ENQUEUE FORMAT: screen=%gHz/%dch → target=%gHz/%dch",
+                  srcFmt.sampleRate, srcFmt.channelCount,
                   targetFormat.sampleRate, targetFormat.channelCount)
-            NSLog("[ScreenShareAudio] ASBD: bitsPerCh=%d, bytesPerFrame=%d, bytesPerPacket=%d, formatFlags=0x%X, formatID=%d",
-                  asbd.mBitsPerChannel, asbd.mBytesPerFrame, asbd.mBytesPerPacket,
-                  asbd.mFormatFlags, asbd.mFormatID)
-            // Check raw PCM amplitude
-            var rawPeak: Float = 0
-            if let floatCh = pcm.floatChannelData {
-                for i in 0..<min(Int(pcm.frameLength), 1024) {
-                    rawPeak = max(rawPeak, abs(floatCh[0][i]))
-                }
-                NSLog("[ScreenShareAudio] RAW PCM peak (float ch0): %g", rawPeak)
-            } else if let int16Ch = pcm.int16ChannelData {
-                var int16Peak: Int16 = 0
-                for i in 0..<min(Int(pcm.frameLength), 1024) {
-                    int16Peak = max(int16Peak, abs(int16Ch[0][i]))
-                }
-                NSLog("[ScreenShareAudio] RAW PCM peak (int16 ch0): %d", int16Peak)
-            } else {
-                NSLog("[ScreenShareAudio] RAW PCM: NO float or int16 channel data! commonFormat=%d", srcFmt.commonFormat.rawValue)
-            }
         }
 
         // 2. Silence detection
@@ -227,7 +177,7 @@ import WebRTC
             return
         }
 
-        // 3. Convert to processing format (e.g. 48 kHz / 1 ch / float32)
+        // 3. Convert to graph format (e.g. 48 kHz / 1 ch / float32)
         let buffer: AVAudioPCMBuffer
         if pcm.format.sampleRate != targetFormat.sampleRate
             || pcm.format.channelCount != targetFormat.channelCount
@@ -245,41 +195,60 @@ import WebRTC
             buffer = pcm
         }
 
-        // 4. Write to ring buffer
-        guard let floatData = buffer.floatChannelData else {
-            NSLog("[ScreenShareAudio] ENQUEUE: no floatChannelData after conversion!")
+        // 4. Schedule on player node
+        guard nodesAttached else {
             return
         }
-        let frames = Int(buffer.frameLength)
 
-        // Periodic amplitude check on converted buffer (every 50th write)
-        if enqueueWrittenCount % 50 == 0 {
-            var peak: Float = 0
-            for i in 0..<min(frames, 1024) {
-                peak = max(peak, abs(floatData[0][i]))
-            }
-            NSLog("[ScreenShareAudio] CONVERTED peak amplitude: %g (frames=%d)", peak, frames)
-        }
+        playerNode.scheduleBuffer(buffer)
+        enqueueScheduledCount += 1
 
-        lock.lock()
-        defer { lock.unlock() }
-
-        // Handle overflow: if ring is too full, advance read index
-        let available = writeIndex - readIndex
-        if available + frames > ringCapacity {
-            readIndex = writeIndex + frames - ringCapacity
+        // Start playback if not already playing
+        if !playerNode.isPlaying {
+            playerNode.play()
         }
 
-        for i in 0..<frames {
-            ringBuffer[(writeIndex + i) % ringCapacity] = floatData[0][i]
+        // Periodic stats (every ~50 buffers ≈ ~1s)
+        if enqueueScheduledCount % 50 == 0 {
+            NSLog("[ScreenShareAudio] ENQUEUE stats: calls=%d, scheduled=%d, silence=%d",
+                  enqueueCallCount, enqueueScheduledCount, enqueueSilenceCount)
         }
-        writeIndex += frames
-        enqueueWrittenCount += 1
+    }
 
-        // Periodic enqueue stats (every 50 ≈ ~1s)
-        if enqueueWrittenCount % 50 == 0 {
-            NSLog("[ScreenShareAudio] ENQUEUE stats: calls=%d, written=%d, frames=%d, ringAvail=%d, silence=%d",
-                  enqueueCallCount, enqueueWrittenCount, frames, writeIndex - readIndex, enqueueSilenceCount)
+    // MARK: - Private graph management
+
+    private func attachAndWireNodes(
+        engine: AVAudioEngine,
+        source: AVAudioNode?,
+        destination: AVAudioNode,
+        format: AVAudioFormat
+    ) {
+        // Detach if previously attached (e.g., engine reconfiguration)
+        detachNodes(from: engine)
+
+        engine.attach(mixerNode)
+        engine.attach(playerNode)
+
+        // Wire: source → mixerNode → destination
+        if let source = source {
+            engine.connect(source, to: mixerNode, format: format)
         }
+        engine.connect(playerNode, to: mixerNode, format: format)
+        engine.connect(mixerNode, to: destination, format: format)
+
+        nodesAttached = true
+        NSLog("[ScreenShareAudio] Graph wired: source(%@) → mixer → destination, format=%gHz/%dch",
+              source != nil ? "VP" : "nil", format.sampleRate, format.channelCount)
+    }
+
+    private func detachNodes(from engine: AVAudioEngine) {
+        guard nodesAttached else { return }
+
+        // Detaching automatically disconnects all connections
+        engine.detach(playerNode)
+        engine.detach(mixerNode)
+        nodesAttached = false
+
+        NSLog("[ScreenShareAudio] Nodes detached from engine")
     }
 }
diff --git a/ios/RCTWebRTC/WebRTCModule.m b/ios/RCTWebRTC/WebRTCModule.m
index 25e1b5120..dd07e312b 100644
--- a/ios/RCTWebRTC/WebRTCModule.m
+++ b/ios/RCTWebRTC/WebRTCModule.m
@@ -86,14 +86,19 @@ - (instancetype)init {
         RCTLogInfo(@"Using video encoder factory: %@", NSStringFromClass([encoderFactory class]));
         RCTLogInfo(@"Using video decoder factory: %@", NSStringFromClass([decoderFactory class]));
 
+        // Always create the screen share audio mixer eagerly.
+        // It stays dormant (isMixing=false) until startMixing is called.
+        // It will be wired as audioGraphDelegate on the ADM after factory creation.
+        ScreenShareAudioMixer *mixer = [[ScreenShareAudioMixer alloc] init];
+        options.screenShareAudioMixer = mixer;
+
         if (audioProcessingModule != nil) {
             if (audioDevice != nil) {
                 NSLog(@"Both audioProcessingModule and audioDevice are provided, but only one can be used. Ignoring audioDevice.");
             }
             RCTLogInfo(@"Using audio processing module: %@", NSStringFromClass([audioProcessingModule class]));
 
-            // Store reference to the default APM if it is one, so we can set
-            // capturePostProcessingDelegate later for screen share audio mixing.
+            // Store reference to the default APM if it is one.
             if ([audioProcessingModule isKindOfClass:[RTCDefaultAudioProcessingModule class]]) {
                 options.defaultAudioProcessingModule = (RTCDefaultAudioProcessingModule *)audioProcessingModule;
             }
@@ -110,16 +115,9 @@ - (instancetype)init {
                                                                                decoderFactory:decoderFactory
                                                                                   audioDevice:audioDevice];
         } else {
-            // No custom APM provided — create a mixer eagerly and set it as
-            // capturePostProcessingDelegate at APM creation time (not runtime).
-            // The mixer stays dormant (isMixing=false) until startMixing is called.
-            ScreenShareAudioMixer *mixer = [[ScreenShareAudioMixer alloc] init];
-            options.screenShareAudioMixer = mixer;
-
-            RTCDefaultAudioProcessingModule *defaultAPM = [[RTCDefaultAudioProcessingModule alloc]
-                initWithConfig:nil
-                capturePostProcessingDelegate:mixer
-                renderPreProcessingDelegate:nil];
+            // No custom APM provided — create a default one (no capturePostProcessingDelegate needed;
+            // screen share audio mixing uses the AVAudioEngine graph approach via audioGraphDelegate).
+            RTCDefaultAudioProcessingModule *defaultAPM = [[RTCDefaultAudioProcessingModule alloc] init];
             options.defaultAudioProcessingModule = defaultAPM;
 
             _peerConnectionFactory =
@@ -129,11 +127,15 @@ - (instancetype)init {
                                                                  decoderFactory:decoderFactory
                                                           audioProcessingModule:defaultAPM];
         }
-        
+
         _rtcAudioDeviceModuleObserver = [[AudioDeviceModuleObserver alloc] initWithWebRTCModule:self];
         _audioDeviceModule = [[AudioDeviceModule alloc] initWithSource:_peerConnectionFactory.audioDeviceModule
                                                       delegateObserver:_rtcAudioDeviceModuleObserver];
 
+        // Wire the mixer as the audio graph delegate so it receives
+        // onConfigureInputFromSource callbacks to modify the engine graph.
+        _audioDeviceModule.audioGraphDelegate = mixer;
+
         _peerConnections = [NSMutableDictionary new];
         _localStreams = [NSMutableDictionary new];
         _localTracks = [NSMutableDictionary new];

From 3691b7e00404bd0574c93392e726724c70a074e3 Mon Sep 17 00:00:00 2001
From: Artem Grintsevich <greenfrvr@gmail.com>
Date: Tue, 10 Mar 2026 16:43:00 +0100
Subject: [PATCH 07/14] chore: removed diagnostic logs

---
 .../ScreenShareAudioConverter.swift           |   8 +-
 .../ScreenShareAudioMixer.swift               | 102 ++----------------
 2 files changed, 9 insertions(+), 101 deletions(-)

diff --git a/ios/RCTWebRTC/Utils/AudioDeviceModule/ScreenShareAudioConverter.swift b/ios/RCTWebRTC/Utils/AudioDeviceModule/ScreenShareAudioConverter.swift
index 45144553e..83e27273c 100644
--- a/ios/RCTWebRTC/Utils/AudioDeviceModule/ScreenShareAudioConverter.swift
+++ b/ios/RCTWebRTC/Utils/AudioDeviceModule/ScreenShareAudioConverter.swift
@@ -34,17 +34,14 @@ final class ScreenShareAudioConverter {
     /// non-interleaved layouts.
     func pcmBuffer(from sampleBuffer: CMSampleBuffer) -> AVAudioPCMBuffer? {
         guard let formatDescription = CMSampleBufferGetFormatDescription(sampleBuffer) else {
-            NSLog("[ScreenShareAudio] Converter: no format description in CMSampleBuffer")
             return nil
         }
 
         guard let asbdPtr = CMAudioFormatDescriptionGetStreamBasicDescription(formatDescription) else {
-            NSLog("[ScreenShareAudio] Converter: no ASBD in format description")
             return nil
         }
 
         guard let avFormat = AVAudioFormat(streamDescription: asbdPtr) else {
-            NSLog("[ScreenShareAudio] Converter: failed to create AVAudioFormat from ASBD")
             return nil
         }
 
@@ -94,7 +91,6 @@ final class ScreenShareAudioConverter {
             let bytesPerFrame = Int(avFormat.streamDescription.pointee.mBytesPerFrame)
             memcpy(int16Data[0], dataPointer, min(totalLength, Int(frameCount) * bytesPerFrame))
         } else {
-            NSLog("[ScreenShareAudio] Converter: unsupported PCM format (no float or int16 channel data)")
             return nil
         }
 
@@ -129,7 +125,6 @@ final class ScreenShareAudioConverter {
         }
 
         guard let converter = converter else {
-            NSLog("[ScreenShareAudio] Converter: AVAudioConverter creation failed")
             return nil
         }
 
@@ -154,8 +149,7 @@ final class ScreenShareAudioConverter {
             }
         }
 
-        if let error = error {
-            NSLog("[ScreenShareAudio] Converter: conversion error: \(error.localizedDescription)")
+        if error != nil {
             return nil
         }
 
diff --git a/ios/RCTWebRTC/Utils/AudioDeviceModule/ScreenShareAudioMixer.swift b/ios/RCTWebRTC/Utils/AudioDeviceModule/ScreenShareAudioMixer.swift
index 5af2dc686..011f9d46a 100644
--- a/ios/RCTWebRTC/Utils/AudioDeviceModule/ScreenShareAudioMixer.swift
+++ b/ios/RCTWebRTC/Utils/AudioDeviceModule/ScreenShareAudioMixer.swift
@@ -44,24 +44,10 @@ import WebRTC
     /// Whether our nodes are currently attached to the engine.
     private var nodesAttached = false
 
-    // MARK: - Diagnostics
-
-    private var enqueueCallCount: Int = 0
-    private var enqueueScheduledCount: Int = 0
-    private var enqueueSilenceCount: Int = 0
-    private var enqueuePcmFailCount: Int = 0
-    private var enqueueConvFailCount: Int = 0
-    private var formatLogged = false
-
     // MARK: - Init
 
     @objc public override init() {
         super.init()
-        NSLog("[ScreenShareAudio] Mixer instance created (graph approach)")
-    }
-
-    deinit {
-        NSLog("[ScreenShareAudio] Mixer instance deallocated!")
     }
 
     // MARK: - AudioGraphConfigurationDelegate
@@ -75,10 +61,7 @@ import WebRTC
         currentEngine = engine
         graphFormat = format
 
-        guard isMixing else {
-            NSLog("[ScreenShareAudio] onConfigureInputFromSource: not mixing, skipping graph modification")
-            return
-        }
+        guard isMixing else { return }
 
         attachAndWireNodes(engine: engine, source: source, destination: destination, format: format)
     }
@@ -105,37 +88,15 @@ import WebRTC
     /// already fired), this triggers an ADM reconfiguration so the graph gets
     /// rewired with our nodes.
     @objc public func startMixing() {
-        guard !isMixing else {
-            NSLog("[ScreenShareAudio] startMixing called but already mixing")
-            return
-        }
+        guard !isMixing else { return }
         isMixing = true
-
-        // Reset diagnostic counters
-        enqueueCallCount = 0
-        enqueueScheduledCount = 0
-        enqueueSilenceCount = 0
-        enqueuePcmFailCount = 0
-        enqueueConvFailCount = 0
-        formatLogged = false
-
-        NSLog("[ScreenShareAudio] startMixing (graphFormat=%@)",
-              graphFormat != nil ? "\(graphFormat!.sampleRate)Hz/\(graphFormat!.channelCount)ch" : "nil")
     }
 
     /// Stop audio mixing and detach nodes from the engine.
     @objc public func stopMixing() {
-        guard isMixing else {
-            NSLog("[ScreenShareAudio] stopMixing called but not mixing")
-            return
-        }
+        guard isMixing else { return }
         isMixing = false
 
-        NSLog("[ScreenShareAudio] stopMixing — FINAL STATS: enqueue=%d (scheduled=%d, silence=%d, pcmFail=%d, convFail=%d)",
-              enqueueCallCount, enqueueScheduledCount, enqueueSilenceCount,
-              enqueuePcmFailCount, enqueueConvFailCount)
-
-        // Stop player and detach nodes
         playerNode.stop()
         if let engine = currentEngine {
             detachNodes(from: engine)
@@ -145,37 +106,13 @@ import WebRTC
 
     /// Receive a screen audio CMSampleBuffer from InAppScreenCapturer.
     @objc public func enqueue(_ sampleBuffer: CMSampleBuffer) {
-        guard isMixing else { return }
-
-        guard let targetFormat = graphFormat else {
-            return
-        }
-
-        enqueueCallCount += 1
+        guard isMixing, let targetFormat = graphFormat else { return }
 
         // 1. CMSampleBuffer → AVAudioPCMBuffer
-        guard let pcm = audioConverter.pcmBuffer(from: sampleBuffer) else {
-            enqueuePcmFailCount += 1
-            if enqueuePcmFailCount <= 5 {
-                NSLog("[ScreenShareAudio] ENQUEUE: pcmBuffer extraction failed (count=%d)", enqueuePcmFailCount)
-            }
-            return
-        }
-
-        // One-time format logging
-        if !formatLogged {
-            formatLogged = true
-            let srcFmt = pcm.format
-            NSLog("[ScreenShareAudio] ENQUEUE FORMAT: screen=%gHz/%dch → target=%gHz/%dch",
-                  srcFmt.sampleRate, srcFmt.channelCount,
-                  targetFormat.sampleRate, targetFormat.channelCount)
-        }
+        guard let pcm = audioConverter.pcmBuffer(from: sampleBuffer) else { return }
 
         // 2. Silence detection
-        if ScreenShareAudioConverter.isSilent(pcm) {
-            enqueueSilenceCount += 1
-            return
-        }
+        if ScreenShareAudioConverter.isSilent(pcm) { return }
 
         // 3. Convert to graph format (e.g. 48 kHz / 1 ch / float32)
         let buffer: AVAudioPCMBuffer
@@ -183,36 +120,20 @@ import WebRTC
             || pcm.format.channelCount != targetFormat.channelCount
             || pcm.format.commonFormat != targetFormat.commonFormat
             || pcm.format.isInterleaved != targetFormat.isInterleaved {
-            guard let converted = audioConverter.convertIfRequired(pcm, to: targetFormat) else {
-                enqueueConvFailCount += 1
-                if enqueueConvFailCount <= 5 {
-                    NSLog("[ScreenShareAudio] ENQUEUE: conversion failed (count=%d)", enqueueConvFailCount)
-                }
-                return
-            }
+            guard let converted = audioConverter.convertIfRequired(pcm, to: targetFormat) else { return }
             buffer = converted
         } else {
             buffer = pcm
         }
 
         // 4. Schedule on player node
-        guard nodesAttached else {
-            return
-        }
+        guard nodesAttached else { return }
 
         playerNode.scheduleBuffer(buffer)
-        enqueueScheduledCount += 1
 
-        // Start playback if not already playing
         if !playerNode.isPlaying {
             playerNode.play()
         }
-
-        // Periodic stats (every ~50 buffers ≈ ~1s)
-        if enqueueScheduledCount % 50 == 0 {
-            NSLog("[ScreenShareAudio] ENQUEUE stats: calls=%d, scheduled=%d, silence=%d",
-                  enqueueCallCount, enqueueScheduledCount, enqueueSilenceCount)
-        }
     }
 
     // MARK: - Private graph management
@@ -223,13 +144,11 @@ import WebRTC
         destination: AVAudioNode,
         format: AVAudioFormat
     ) {
-        // Detach if previously attached (e.g., engine reconfiguration)
         detachNodes(from: engine)
 
         engine.attach(mixerNode)
         engine.attach(playerNode)
 
-        // Wire: source → mixerNode → destination
         if let source = source {
             engine.connect(source, to: mixerNode, format: format)
         }
@@ -237,18 +156,13 @@ import WebRTC
         engine.connect(mixerNode, to: destination, format: format)
 
         nodesAttached = true
-        NSLog("[ScreenShareAudio] Graph wired: source(%@) → mixer → destination, format=%gHz/%dch",
-              source != nil ? "VP" : "nil", format.sampleRate, format.channelCount)
     }
 
     private func detachNodes(from engine: AVAudioEngine) {
         guard nodesAttached else { return }
 
-        // Detaching automatically disconnects all connections
         engine.detach(playerNode)
         engine.detach(mixerNode)
         nodesAttached = false
-
-        NSLog("[ScreenShareAudio] Nodes detached from engine")
     }
 }

From 9fa28d3e720671aca69ac3985e1b0863291fe11e Mon Sep 17 00:00:00 2001
From: Artem Grintsevich <greenfrvr@gmail.com>
Date: Wed, 11 Mar 2026 11:21:55 +0100
Subject: [PATCH 08/14] chore: added in app screen sharing restoration

---
 ios/RCTWebRTC/InAppScreenCapturer.m | 56 +++++++++++++++++++++--------
 1 file changed, 42 insertions(+), 14 deletions(-)

diff --git a/ios/RCTWebRTC/InAppScreenCapturer.m b/ios/RCTWebRTC/InAppScreenCapturer.m
index 8eb6cb3aa..cf14a938b 100644
--- a/ios/RCTWebRTC/InAppScreenCapturer.m
+++ b/ios/RCTWebRTC/InAppScreenCapturer.m
@@ -9,20 +9,11 @@
 @implementation InAppScreenCapturer {
     BOOL _capturing;
     BOOL _shouldResumeOnForeground;
+    BOOL _observingAppState;
 }
 
 - (instancetype)initWithDelegate:(__weak id<RTCVideoCapturerDelegate>)delegate {
     self = [super initWithDelegate:delegate];
-    if (self) {
-        // [[NSNotificationCenter defaultCenter] addObserver:self
-        //                                          selector:@selector(appDidBecomeActive)
-        //                                              name:UIApplicationDidBecomeActiveNotification
-        //                                            object:nil];
-        // [[NSNotificationCenter defaultCenter] addObserver:self
-        //                                          selector:@selector(appWillResignActive)
-        //                                              name:UIApplicationWillResignActiveNotification
-        //                                            object:nil];
-    }
     return self;
 }
 
@@ -62,10 +53,20 @@ - (void)startRPScreenRecorder {
                 break;
         }
     } completionHandler:^(NSError * _Nullable error) {
+        __strong __typeof__(weakSelf) strongSelf = weakSelf;
+        if (!strongSelf) return;
+
         if (error) {
             NSLog(@"[InAppScreenCapturer] startCapture failed: %@", error.localizedDescription);
-            [weakSelf.eventsDelegate capturerDidEnd:weakSelf];
+            strongSelf->_capturing = NO;
+            [strongSelf.eventsDelegate capturerDidEnd:strongSelf];
+            return;
         }
+
+        // Capture started successfully — register for app lifecycle events.
+        // Done here (not in startCapture) so the RPScreenRecorder permission
+        // dialog doesn't trigger appWillResignActive before capture begins.
+        [strongSelf registerAppStateObservers];
     }];
 }
 
@@ -94,7 +95,7 @@ - (void)stopCapture {
     _shouldResumeOnForeground = NO;
     self.audioBufferHandler = nil;
 
-    [[NSNotificationCenter defaultCenter] removeObserver:self];
+    [self unregisterAppStateObservers];
 
     [[RPScreenRecorder sharedRecorder] stopCaptureWithHandler:^(NSError * _Nullable error) {
         if (error) {
@@ -105,6 +106,34 @@ - (void)stopCapture {
 
 #pragma mark - App Lifecycle
 
+- (void)registerAppStateObservers {
+    if (_observingAppState) return;
+    _observingAppState = YES;
+
+    dispatch_async(dispatch_get_main_queue(), ^{
+        [[NSNotificationCenter defaultCenter] addObserver:self
+                                                 selector:@selector(appDidBecomeActive)
+                                                     name:UIApplicationDidBecomeActiveNotification
+                                                   object:nil];
+        [[NSNotificationCenter defaultCenter] addObserver:self
+                                                 selector:@selector(appWillResignActive)
+                                                     name:UIApplicationWillResignActiveNotification
+                                                   object:nil];
+    });
+}
+
+- (void)unregisterAppStateObservers {
+    if (!_observingAppState) return;
+    _observingAppState = NO;
+
+    [[NSNotificationCenter defaultCenter] removeObserver:self
+                                                    name:UIApplicationDidBecomeActiveNotification
+                                                  object:nil];
+    [[NSNotificationCenter defaultCenter] removeObserver:self
+                                                    name:UIApplicationWillResignActiveNotification
+                                                  object:nil];
+}
+
 - (void)appWillResignActive {
     if (_capturing) {
         _shouldResumeOnForeground = YES;
@@ -120,13 +149,12 @@ - (void)appWillResignActive {
 - (void)appDidBecomeActive {
     if (_shouldResumeOnForeground && _capturing) {
         _shouldResumeOnForeground = NO;
-        NSLog(@"[InAppScreenCapturer] Resuming capture after returning to foreground");
         [self startRPScreenRecorder];
     }
 }
 
 - (void)dealloc {
-    [[NSNotificationCenter defaultCenter] removeObserver:self];
+    [self unregisterAppStateObservers];
     if (_capturing) {
         _capturing = NO;
         self.audioBufferHandler = nil;

From b099459ff205c884aeda98a456927560837683fa Mon Sep 17 00:00:00 2001
From: Artem Grintsevich <greenfrvr@gmail.com>
Date: Thu, 19 Mar 2026 17:32:18 +0100
Subject: [PATCH 09/14] chore: code cleanup

---
 .../audio/AudioProcessingController.java            | 13 +++----------
 ios/RCTWebRTC/WebRTCModule+RTCMediaStream.m         | 13 -------------
 ios/RCTWebRTC/WebRTCModule.m                        | 12 +-----------
 ios/RCTWebRTC/WebRTCModuleOptions.h                 | 10 ++--------
 ios/RCTWebRTC/WebRTCModuleOptions.m                 |  7 -------
 5 files changed, 6 insertions(+), 49 deletions(-)

diff --git a/android/src/main/java/com/oney/WebRTCModule/audio/AudioProcessingController.java b/android/src/main/java/com/oney/WebRTCModule/audio/AudioProcessingController.java
index 17ae78679..9444eb781 100644
--- a/android/src/main/java/com/oney/WebRTCModule/audio/AudioProcessingController.java
+++ b/android/src/main/java/com/oney/WebRTCModule/audio/AudioProcessingController.java
@@ -17,20 +17,13 @@ public class AudioProcessingController implements AudioProcessingFactoryProvider
     public ExternalAudioProcessingFactory externalAudioProcessingFactory;
 
     public AudioProcessingController() {
-        // ExternalAudioProcessingFactory creation is deferred to getFactory()
-        // because its constructor calls JNI native methods that require the
-        // WebRTC native library to be loaded first (via PeerConnectionFactory.initialize()).
-        // This allows AudioProcessingController to be safely instantiated in
-        // MainApplication.onCreate() before the native library is loaded.
+        this.externalAudioProcessingFactory = new ExternalAudioProcessingFactory();
+        this.externalAudioProcessingFactory.setCapturePostProcessing(capturePostProcessing);
+        this.externalAudioProcessingFactory.setRenderPreProcessing(renderPreProcessing);
     }
 
     @Override
     public AudioProcessingFactory getFactory() {
-        if (this.externalAudioProcessingFactory == null) {
-            this.externalAudioProcessingFactory = new ExternalAudioProcessingFactory();
-            this.externalAudioProcessingFactory.setCapturePostProcessing(capturePostProcessing);
-            this.externalAudioProcessingFactory.setRenderPreProcessing(renderPreProcessing);
-        }
         return this.externalAudioProcessingFactory;
     }
 }
\ No newline at end of file
diff --git a/ios/RCTWebRTC/WebRTCModule+RTCMediaStream.m b/ios/RCTWebRTC/WebRTCModule+RTCMediaStream.m
index 92edc3eef..c4562df70 100644
--- a/ios/RCTWebRTC/WebRTCModule+RTCMediaStream.m
+++ b/ios/RCTWebRTC/WebRTCModule+RTCMediaStream.m
@@ -224,19 +224,6 @@ - (RTCVideoTrack *)createScreenCaptureVideoTrack {
         // Store weak reference for audio mixing wiring
         options.activeInAppScreenCapturer = capturer;
 
-        // If audio mixing is requested, set up the audio buffer handler.
-        // The handler forwards .audioApp CMSampleBuffers to the mixer's enqueue method.
-        // The mixer may not exist yet (created by startScreenShareAudioMixing),
-        // so we check at each callback invocation.
-        if (options.includeScreenShareAudio) {
-            capturer.audioBufferHandler = ^(CMSampleBufferRef sampleBuffer) {
-                ScreenShareAudioMixer *mixer = [WebRTCModuleOptions sharedInstance].screenShareAudioMixer;
-                if (mixer) {
-                    [mixer enqueue:sampleBuffer];
-                }
-            };
-        }
-
         captureController = controller;
     } else {
         // Existing broadcast extension path
diff --git a/ios/RCTWebRTC/WebRTCModule.m b/ios/RCTWebRTC/WebRTCModule.m
index dd07e312b..9d417cc7d 100644
--- a/ios/RCTWebRTC/WebRTCModule.m
+++ b/ios/RCTWebRTC/WebRTCModule.m
@@ -98,11 +98,6 @@ - (instancetype)init {
             }
             RCTLogInfo(@"Using audio processing module: %@", NSStringFromClass([audioProcessingModule class]));
 
-            // Store reference to the default APM if it is one.
-            if ([audioProcessingModule isKindOfClass:[RTCDefaultAudioProcessingModule class]]) {
-                options.defaultAudioProcessingModule = (RTCDefaultAudioProcessingModule *)audioProcessingModule;
-            }
-
             _peerConnectionFactory =
                 [[RTCPeerConnectionFactory alloc] initWithAudioDeviceModuleType:RTCAudioDeviceModuleTypeAudioEngine
                                                           bypassVoiceProcessing:NO
@@ -115,17 +110,12 @@ - (instancetype)init {
                                                                                decoderFactory:decoderFactory
                                                                                   audioDevice:audioDevice];
         } else {
-            // No custom APM provided — create a default one (no capturePostProcessingDelegate needed;
-            // screen share audio mixing uses the AVAudioEngine graph approach via audioGraphDelegate).
-            RTCDefaultAudioProcessingModule *defaultAPM = [[RTCDefaultAudioProcessingModule alloc] init];
-            options.defaultAudioProcessingModule = defaultAPM;
-
             _peerConnectionFactory =
                 [[RTCPeerConnectionFactory alloc] initWithAudioDeviceModuleType:RTCAudioDeviceModuleTypeAudioEngine
                                                           bypassVoiceProcessing:NO
                                                                  encoderFactory:encoderFactory
                                                                  decoderFactory:decoderFactory
-                                                          audioProcessingModule:defaultAPM];
+                                                          audioProcessingModule:nil];
         }
 
         _rtcAudioDeviceModuleObserver = [[AudioDeviceModuleObserver alloc] initWithWebRTCModule:self];
diff --git a/ios/RCTWebRTC/WebRTCModuleOptions.h b/ios/RCTWebRTC/WebRTCModuleOptions.h
index c964df4ce..229c4716c 100644
--- a/ios/RCTWebRTC/WebRTCModuleOptions.h
+++ b/ios/RCTWebRTC/WebRTCModuleOptions.h
@@ -2,8 +2,6 @@
 #import <WebRTC/WebRTC.h>
 
 @class InAppScreenCapturer;
-@class RTCDefaultAudioProcessingModule;
-
 NS_ASSUME_NONNULL_BEGIN
 
 // Forward declare the Swift class — the actual import happens in the .m file.
@@ -16,10 +14,6 @@ NS_ASSUME_NONNULL_BEGIN
 @property(nonatomic, strong, nullable) id<RTCAudioDevice> audioDevice;
 @property(nonatomic, strong, nullable) id<RTCAudioProcessingModule> audioProcessingModule;
 
-/// Retained reference to the default audio processing module.
-/// Used to dynamically set capturePostProcessingDelegate for screen share audio mixing.
-@property(nonatomic, strong, nullable) RTCDefaultAudioProcessingModule *defaultAudioProcessingModule;
-
 @property(nonatomic, strong, nullable) NSDictionary *fieldTrials;
 @property(nonatomic, assign) RTCLoggingSeverity loggingSeverity;
 @property(nonatomic, assign) BOOL enableMultitaskingCameraAccess;
@@ -31,8 +25,8 @@ NS_ASSUME_NONNULL_BEGIN
 /// When YES, in-app screen capture will route .audioApp buffers to the audio mixer.
 @property(nonatomic, assign) BOOL includeScreenShareAudio;
 
-/// The active screen share audio mixer instance. Created by
-/// `startScreenShareAudioMixing` and cleared by `stopScreenShareAudioMixing`.
+/// The screen share audio mixer instance. Created eagerly during WebRTCModule
+/// init and retained for the lifetime of the module (never cleared).
 @property(nonatomic, strong, nullable) ScreenShareAudioMixer *screenShareAudioMixer;
 
 /// Weak reference to the current in-app screen capturer, set during
diff --git a/ios/RCTWebRTC/WebRTCModuleOptions.m b/ios/RCTWebRTC/WebRTCModuleOptions.m
index f29ae67f9..ba108da6e 100644
--- a/ios/RCTWebRTC/WebRTCModuleOptions.m
+++ b/ios/RCTWebRTC/WebRTCModuleOptions.m
@@ -1,12 +1,5 @@
 #import "WebRTCModuleOptions.h"
 
-// Import Swift-generated header for ScreenShareAudioMixer
-#if __has_include(<stream_react_native_webrtc/stream_react_native_webrtc-Swift.h>)
-#import <stream_react_native_webrtc/stream_react_native_webrtc-Swift.h>
-#elif __has_include("stream_react_native_webrtc-Swift.h")
-#import "stream_react_native_webrtc-Swift.h"
-#endif
-
 @implementation WebRTCModuleOptions
 
 #pragma mark - This class is a singleton

From 160d8b113b7b15e7486a33a8f323afe9b69124c9 Mon Sep 17 00:00:00 2001
From: Artem Grintsevich <greenfrvr@gmail.com>
Date: Mon, 23 Mar 2026 14:59:12 +0100
Subject: [PATCH 10/14] chore: made mixer initialization lazy

---
 .../Utils/AudioDeviceModule/AudioDeviceModule.swift    |  2 +-
 ios/RCTWebRTC/WebRTCModule.m                           | 10 ----------
 ios/RCTWebRTC/WebRTCModuleOptions.h                    |  7 -------
 3 files changed, 1 insertion(+), 18 deletions(-)

diff --git a/ios/RCTWebRTC/Utils/AudioDeviceModule/AudioDeviceModule.swift b/ios/RCTWebRTC/Utils/AudioDeviceModule/AudioDeviceModule.swift
index 23a728760..854f756f2 100644
--- a/ios/RCTWebRTC/Utils/AudioDeviceModule/AudioDeviceModule.swift
+++ b/ios/RCTWebRTC/Utils/AudioDeviceModule/AudioDeviceModule.swift
@@ -181,7 +181,7 @@ import WebRTC
 
     /// Delegate that receives synchronous input graph configuration callbacks.
     /// Used by `ScreenShareAudioMixer` to modify the engine graph during mixing.
-    @objc public weak var audioGraphDelegate: AudioGraphConfigurationDelegate?
+    @objc public var audioGraphDelegate: AudioGraphConfigurationDelegate?
 
     /// Cached input context from the last `configureInputFromSource` callback.
     /// These allow `startMixing` to configure the graph immediately when the
diff --git a/ios/RCTWebRTC/WebRTCModule.m b/ios/RCTWebRTC/WebRTCModule.m
index 9d417cc7d..0c14160d1 100644
--- a/ios/RCTWebRTC/WebRTCModule.m
+++ b/ios/RCTWebRTC/WebRTCModule.m
@@ -86,12 +86,6 @@ - (instancetype)init {
         RCTLogInfo(@"Using video encoder factory: %@", NSStringFromClass([encoderFactory class]));
         RCTLogInfo(@"Using video decoder factory: %@", NSStringFromClass([decoderFactory class]));
 
-        // Always create the screen share audio mixer eagerly.
-        // It stays dormant (isMixing=false) until startMixing is called.
-        // It will be wired as audioGraphDelegate on the ADM after factory creation.
-        ScreenShareAudioMixer *mixer = [[ScreenShareAudioMixer alloc] init];
-        options.screenShareAudioMixer = mixer;
-
         if (audioProcessingModule != nil) {
             if (audioDevice != nil) {
                 NSLog(@"Both audioProcessingModule and audioDevice are provided, but only one can be used. Ignoring audioDevice.");
@@ -122,10 +116,6 @@ - (instancetype)init {
         _audioDeviceModule = [[AudioDeviceModule alloc] initWithSource:_peerConnectionFactory.audioDeviceModule
                                                       delegateObserver:_rtcAudioDeviceModuleObserver];
 
-        // Wire the mixer as the audio graph delegate so it receives
-        // onConfigureInputFromSource callbacks to modify the engine graph.
-        _audioDeviceModule.audioGraphDelegate = mixer;
-
         _peerConnections = [NSMutableDictionary new];
         _localStreams = [NSMutableDictionary new];
         _localTracks = [NSMutableDictionary new];
diff --git a/ios/RCTWebRTC/WebRTCModuleOptions.h b/ios/RCTWebRTC/WebRTCModuleOptions.h
index 229c4716c..b363cc4ff 100644
--- a/ios/RCTWebRTC/WebRTCModuleOptions.h
+++ b/ios/RCTWebRTC/WebRTCModuleOptions.h
@@ -4,9 +4,6 @@
 @class InAppScreenCapturer;
 NS_ASSUME_NONNULL_BEGIN
 
-// Forward declare the Swift class — the actual import happens in the .m file.
-@class ScreenShareAudioMixer;
-
 @interface WebRTCModuleOptions : NSObject
 
 @property(nonatomic, strong, nullable) id<RTCVideoDecoderFactory> videoDecoderFactory;
@@ -25,10 +22,6 @@ NS_ASSUME_NONNULL_BEGIN
 /// When YES, in-app screen capture will route .audioApp buffers to the audio mixer.
 @property(nonatomic, assign) BOOL includeScreenShareAudio;
 
-/// The screen share audio mixer instance. Created eagerly during WebRTCModule
-/// init and retained for the lifetime of the module (never cleared).
-@property(nonatomic, strong, nullable) ScreenShareAudioMixer *screenShareAudioMixer;
-
 /// Weak reference to the current in-app screen capturer, set during
 /// `createScreenCaptureVideoTrack` when in-app mode is used.
 @property(nonatomic, weak, nullable) InAppScreenCapturer *activeInAppScreenCapturer;

From 351879c3f680bff66acdbfee48fb533f0d8a4a05 Mon Sep 17 00:00:00 2001
From: Artem Grintsevich <greenfrvr@gmail.com>
Date: Mon, 23 Mar 2026 18:17:11 +0100
Subject: [PATCH 11/14] chore: pr comment fix

---
 ios/RCTWebRTC/Utils/AudioDeviceModule/AudioDeviceModule.swift | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ios/RCTWebRTC/Utils/AudioDeviceModule/AudioDeviceModule.swift b/ios/RCTWebRTC/Utils/AudioDeviceModule/AudioDeviceModule.swift
index 854f756f2..474bbdd7f 100644
--- a/ios/RCTWebRTC/Utils/AudioDeviceModule/AudioDeviceModule.swift
+++ b/ios/RCTWebRTC/Utils/AudioDeviceModule/AudioDeviceModule.swift
@@ -285,7 +285,7 @@ import WebRTC
     /// - Parameter isBypassed: `true` to bypass voice processing, `false` to restore.
     @objc public func setVoiceProcessingBypassed(_ isBypassed: Bool) {
         source.isVoiceProcessingBypassed = isBypassed
-        NSLog("[AudioDeviceModule] setVoiceProcessingBypassed: %@", isBypassed ? "YES" : "NO")
+        isVoiceProcessingBypassedSubject.send(isBypassed)
     }
 
     /// Starts or stops speaker playout on the ADM, retrying transient failures.

From 7e4c0155f8d326a309b8666088637ad04b53d988 Mon Sep 17 00:00:00 2001
From: Artem Grintsevich <greenfrvr@gmail.com>
Date: Mon, 23 Mar 2026 18:21:01 +0100
Subject: [PATCH 12/14] chore: audio capture improvement

---
 .../AudioDeviceModule/AudioDeviceModule.swift |  39 +--
 .../AudioGraphConfigurationDelegate.swift     |  38 ---
 .../ScreenShare/AudioRingBuffer.swift         | 115 +++++++++
 .../ScreenShareAudioConverter.swift           | 233 ++++++++++++++++++
 .../ScreenShare/ScreenShareAudioMixer.swift   | 143 +++++++++++
 .../ScreenShareAudioConverter.swift           | 202 ---------------
 .../ScreenShareAudioMixer.swift               | 168 -------------
 ios/RCTWebRTC/WebRTCModule.m                  |  11 +
 8 files changed, 506 insertions(+), 443 deletions(-)
 delete mode 100644 ios/RCTWebRTC/Utils/AudioDeviceModule/AudioGraphConfigurationDelegate.swift
 create mode 100644 ios/RCTWebRTC/Utils/AudioDeviceModule/ScreenShare/AudioRingBuffer.swift
 create mode 100644 ios/RCTWebRTC/Utils/AudioDeviceModule/ScreenShare/ScreenShareAudioConverter.swift
 create mode 100644 ios/RCTWebRTC/Utils/AudioDeviceModule/ScreenShare/ScreenShareAudioMixer.swift
 delete mode 100644 ios/RCTWebRTC/Utils/AudioDeviceModule/ScreenShareAudioConverter.swift
 delete mode 100644 ios/RCTWebRTC/Utils/AudioDeviceModule/ScreenShareAudioMixer.swift

diff --git a/ios/RCTWebRTC/Utils/AudioDeviceModule/AudioDeviceModule.swift b/ios/RCTWebRTC/Utils/AudioDeviceModule/AudioDeviceModule.swift
index 474bbdd7f..c71dd7cbd 100644
--- a/ios/RCTWebRTC/Utils/AudioDeviceModule/AudioDeviceModule.swift
+++ b/ios/RCTWebRTC/Utils/AudioDeviceModule/AudioDeviceModule.swift
@@ -179,16 +179,10 @@ import WebRTC
     /// Strong reference to the current engine so we can introspect it if needed.
     @objc public var engine: AVAudioEngine?
 
-    /// Delegate that receives synchronous input graph configuration callbacks.
-    /// Used by `ScreenShareAudioMixer` to modify the engine graph during mixing.
-    @objc public var audioGraphDelegate: AudioGraphConfigurationDelegate?
-
-    /// Cached input context from the last `configureInputFromSource` callback.
-    /// These allow `startMixing` to configure the graph immediately when the
-    /// engine is already running, without waiting for the next callback.
-    @objc public private(set) weak var lastInputSource: AVAudioNode?
-    @objc public private(set) weak var lastInputDestination: AVAudioNode?
-    @objc public private(set) var lastInputFormat: AVAudioFormat?
+    /// Screen share audio mixer. Implements `RTCAudioCustomProcessingDelegate`
+    /// and is set as `capturePostProcessingDelegate` on the
+    /// `RTCDefaultAudioProcessingModule` when screen share audio mixing starts.
+    @objc public let screenShareAudioMixer = ScreenShareAudioMixer()
 
     /// Secondary observer that receives forwarded delegate callbacks.
     /// This allows the AudioDeviceModuleObserver to receive events and forward them to JS.
@@ -239,7 +233,6 @@ import WebRTC
             .eraseToAnyPublisher()
         super.init()
 
-        _ = source.setMuteMode(.inputMixer)
         audioLevelsAdapter.subject = audioLevelSubject
         source.observer = self
     }
@@ -465,8 +458,6 @@ import WebRTC
         isPlayoutEnabled: Bool,
         isRecordingEnabled: Bool
     ) -> Int {
-        audioGraphDelegate?.onDidStopEngine?(engine)
-
         subject.send(
             .didStopAudioEngine(
                 engine,
@@ -496,8 +487,6 @@ import WebRTC
         isPlayoutEnabled: Bool,
         isRecordingEnabled: Bool
     ) -> Int {
-        audioGraphDelegate?.onDidDisableEngine?(engine)
-
         subject.send(
             .didDisableAudioEngine(
                 engine,
@@ -524,14 +513,7 @@ import WebRTC
         _ audioDeviceModule: RTCAudioDeviceModule,
         willReleaseEngine engine: AVAudioEngine
     ) -> Int {
-        // Notify delegate BEFORE clearing cached context so it can
-        // tear down its graph while references are still valid.
-        audioGraphDelegate?.onWillReleaseEngine?(engine)
-
         self.engine = nil
-        lastInputSource = nil
-        lastInputDestination = nil
-        lastInputFormat = nil
         subject.send(.willReleaseAudioEngine(engine))
         audioLevelsAdapter.uninstall(on: 0)
         
@@ -551,11 +533,6 @@ import WebRTC
         format: AVAudioFormat,
         context: [AnyHashable: Any]
     ) -> Int {
-        // Cache the input context for on-demand use by ScreenShareAudioMixer.
-        lastInputSource = source
-        lastInputDestination = destination
-        lastInputFormat = format
-
         subject.send(
             .configureInputFromSource(
                 engine,
@@ -566,14 +543,6 @@ import WebRTC
         )
 
         // Notify the audio graph delegate synchronously — this must happen
-        // BEFORE the audio levels tap so the mixer can modify the graph first.
-        audioGraphDelegate?.onConfigureInputFromSource(
-            engine,
-            source: source,
-            destination: destination,
-            format: format
-        )
-
         audioLevelsAdapter.installInputTap(
             on: destination,
             format: format,
diff --git a/ios/RCTWebRTC/Utils/AudioDeviceModule/AudioGraphConfigurationDelegate.swift b/ios/RCTWebRTC/Utils/AudioDeviceModule/AudioGraphConfigurationDelegate.swift
deleted file mode 100644
index 3d5a372e1..000000000
--- a/ios/RCTWebRTC/Utils/AudioDeviceModule/AudioGraphConfigurationDelegate.swift
+++ /dev/null
@@ -1,38 +0,0 @@
-//
-// Copyright © 2026 Stream.io Inc. All rights reserved.
-//
-
-import AVFoundation
-
-/// Protocol that allows external code to hook into AVAudioEngine lifecycle
-/// events synchronously. Callbacks fire on WebRTC's audio thread.
-///
-/// Implementations must perform any AVAudioEngine graph modifications
-/// synchronously within the callback — async dispatch will race with
-/// WebRTC's `ConfigureVoiceProcessingNode`.
-@objc public protocol AudioGraphConfigurationDelegate: AnyObject {
-
-    /// Called when WebRTC (re)configures the engine's input graph.
-    /// This fires during engine setup, **before** `willStartEngine`.
-    ///
-    /// - Parameters:
-    ///   - engine: The current `AVAudioEngine` instance.
-    ///   - source: The upstream node (VP input), or `nil` when voice processing is disabled.
-    ///   - destination: The node that receives the input stream (WebRTC capture mixer).
-    ///   - format: The expected audio format for the input path.
-    func onConfigureInputFromSource(
-        _ engine: AVAudioEngine,
-        source: AVAudioNode?,
-        destination: AVAudioNode,
-        format: AVAudioFormat
-    )
-
-    /// Called when the engine is about to be released/deallocated.
-    @objc optional func onWillReleaseEngine(_ engine: AVAudioEngine)
-
-    /// Called after the engine has fully stopped.
-    @objc optional func onDidStopEngine(_ engine: AVAudioEngine)
-
-    /// Called after the engine has been disabled.
-    @objc optional func onDidDisableEngine(_ engine: AVAudioEngine)
-}
diff --git a/ios/RCTWebRTC/Utils/AudioDeviceModule/ScreenShare/AudioRingBuffer.swift b/ios/RCTWebRTC/Utils/AudioDeviceModule/ScreenShare/AudioRingBuffer.swift
new file mode 100644
index 000000000..10e589ff1
--- /dev/null
+++ b/ios/RCTWebRTC/Utils/AudioDeviceModule/ScreenShare/AudioRingBuffer.swift
@@ -0,0 +1,115 @@
+//
+// Copyright © 2026 Stream.io Inc. All rights reserved.
+//
+
+import Darwin
+import Foundation
+
+/// Thread-safe single-producer single-consumer ring buffer for Float32 audio samples.
+///
+/// Uses `os_unfair_lock` for minimal-overhead synchronization between the
+/// ReplayKit callback thread (writer) and the audio render thread (reader).
+/// The lock is uncontended in the vast majority of cases (different cadences),
+/// making it suitable for real-time audio contexts.
+final class AudioRingBuffer {
+
+    private let buffer: UnsafeMutablePointer<Float>
+    private let capacity: Int
+    private var writePos: Int = 0
+    private var readPos: Int = 0
+    private var lock = os_unfair_lock_s()
+
+    /// Creates a ring buffer with the given capacity in frames.
+    /// - Parameter capacity: Maximum number of Float32 samples the buffer can hold.
+    init(capacity: Int) {
+        self.capacity = capacity
+        self.buffer = .allocate(capacity: capacity)
+        self.buffer.initialize(repeating: 0, count: capacity)
+    }
+
+    deinit {
+        buffer.deallocate()
+    }
+
+    /// Number of frames available to read (thread-safe).
+    var availableToRead: Int {
+        os_unfair_lock_lock(&lock)
+        let result = _availableToRead
+        os_unfair_lock_unlock(&lock)
+        return result
+    }
+
+    // MARK: - Internal (lock held)
+
+    private var _availableToRead: Int {
+        let w = writePos
+        let r = readPos
+        return (w >= r) ? (w - r) : (capacity - r + w)
+    }
+
+    private var _availableToWrite: Int {
+        // Reserve 1 slot to distinguish full from empty.
+        return capacity - 1 - _availableToRead
+    }
+
+    // MARK: - Producer API (ReplayKit thread)
+
+    /// Writes up to `count` samples from `source` into the ring buffer.
+    /// - Returns: The number of samples actually written (may be less if buffer is full).
+    @discardableResult
+    func write(_ source: UnsafePointer<Float>, count: Int) -> Int {
+        os_unfair_lock_lock(&lock)
+        defer { os_unfair_lock_unlock(&lock) }
+
+        let toWrite = min(count, _availableToWrite)
+        guard toWrite > 0 else { return 0 }
+
+        let w = writePos
+        let firstPart = min(toWrite, capacity - w)
+        let secondPart = toWrite - firstPart
+
+        memcpy(buffer.advanced(by: w), source, firstPart * MemoryLayout<Float>.size)
+        if secondPart > 0 {
+            memcpy(buffer, source.advanced(by: firstPart), secondPart * MemoryLayout<Float>.size)
+        }
+
+        writePos = (w + toWrite) % capacity
+        return toWrite
+    }
+
+    // MARK: - Consumer API (audio render thread)
+
+    /// Reads up to `count` samples into `destination` from the ring buffer.
+    /// - Returns: The number of samples actually read (may be less if buffer is empty).
+    @discardableResult
+    func read(into destination: UnsafeMutablePointer<Float>, count: Int) -> Int {
+        os_unfair_lock_lock(&lock)
+        defer { os_unfair_lock_unlock(&lock) }
+
+        let toRead = min(count, _availableToRead)
+        guard toRead > 0 else { return 0 }
+
+        let r = readPos
+        let firstPart = min(toRead, capacity - r)
+        let secondPart = toRead - firstPart
+
+        memcpy(destination, buffer.advanced(by: r), firstPart * MemoryLayout<Float>.size)
+        if secondPart > 0 {
+            memcpy(destination.advanced(by: firstPart), buffer, secondPart * MemoryLayout<Float>.size)
+        }
+
+        readPos = (r + toRead) % capacity
+        return toRead
+    }
+
+    // MARK: - Reset
+
+    /// Clears all buffered data. Call when not concurrently accessed by both
+    /// producer and consumer, or when it is acceptable to lose data.
+    func reset() {
+        os_unfair_lock_lock(&lock)
+        writePos = 0
+        readPos = 0
+        os_unfair_lock_unlock(&lock)
+    }
+}
diff --git a/ios/RCTWebRTC/Utils/AudioDeviceModule/ScreenShare/ScreenShareAudioConverter.swift b/ios/RCTWebRTC/Utils/AudioDeviceModule/ScreenShare/ScreenShareAudioConverter.swift
new file mode 100644
index 000000000..bc55fb988
--- /dev/null
+++ b/ios/RCTWebRTC/Utils/AudioDeviceModule/ScreenShare/ScreenShareAudioConverter.swift
@@ -0,0 +1,233 @@
+//
+// Copyright © 2026 Stream.io Inc. All rights reserved.
+//
+
+import Accelerate
+import AudioToolbox
+import AVFoundation
+import CoreMedia
+
+/// Converts RPScreenRecorder `.audioApp` CMSampleBuffers into
+/// `AVAudioPCMBuffer`s suitable for scheduling on an `AVAudioPlayerNode`.
+///
+/// Handles:
+/// - CMSampleBuffer → AVAudioPCMBuffer extraction via `CMSampleBufferCopyPCMDataIntoAudioBufferList`
+/// - Sample rate / channel / format conversion via cached AVAudioConverter
+/// - Silence detection via vDSP RMS analysis
+final class ScreenShareAudioConverter {
+
+    // MARK: - Constants
+
+    /// Buffers with RMS below this threshold (in dB) are considered silent.
+    private static let silenceThresholdDB: Float = -60.0
+
+    // MARK: - Cached converter
+
+    private var converter: AVAudioConverter?
+    private var converterInputFormat: AVAudioFormat?
+    private var converterOutputFormat: AVAudioFormat?
+
+    // MARK: - CMSampleBuffer → AVAudioPCMBuffer
+
+    /// Extracts audio data from a `CMSampleBuffer` into an `AVAudioPCMBuffer`
+    /// using Apple's `CMSampleBufferCopyPCMDataIntoAudioBufferList`.
+    ///
+    /// Matches the Swift SDK's `AVAudioPCMBuffer.from(_:)` implementation.
+    func pcmBuffer(from sampleBuffer: CMSampleBuffer) -> AVAudioPCMBuffer? {
+        guard let formatDescription = CMSampleBufferGetFormatDescription(sampleBuffer),
+              let asbd = CMAudioFormatDescriptionGetStreamBasicDescription(formatDescription) else {
+            return nil
+        }
+
+        // Only linear PCM can be copied into AVAudioPCMBuffer.
+        guard asbd.pointee.mFormatID == kAudioFormatLinearPCM else {
+            return nil
+        }
+
+        // Inspect format flags to build the correct AVAudioFormat.
+        let formatFlags = asbd.pointee.mFormatFlags
+        let isFloat = (formatFlags & kAudioFormatFlagIsFloat) != 0
+        let isSignedInt = (formatFlags & kAudioFormatFlagIsSignedInteger) != 0
+        let isBigEndian = (formatFlags & kAudioFormatFlagIsBigEndian) != 0
+        let isInterleaved = (formatFlags & kAudioFormatFlagIsNonInterleaved) == 0
+        let bitsPerChannel = Int(asbd.pointee.mBitsPerChannel)
+
+        // Choose an AVAudioCommonFormat compatible with the sample format.
+        let commonFormat: AVAudioCommonFormat
+        if isFloat, bitsPerChannel == 32 {
+            commonFormat = .pcmFormatFloat32
+        } else if isSignedInt, bitsPerChannel == 16 {
+            commonFormat = .pcmFormatInt16
+        } else {
+            return nil
+        }
+
+        // Build AVAudioFormat from explicit parameters (not streamDescription)
+        // to ensure consistent format identity for downstream comparisons.
+        guard let inputFormat = AVAudioFormat(
+            commonFormat: commonFormat,
+            sampleRate: asbd.pointee.mSampleRate,
+            channels: asbd.pointee.mChannelsPerFrame,
+            interleaved: isInterleaved
+        ) else {
+            return nil
+        }
+
+        let frameCount = AVAudioFrameCount(CMSampleBufferGetNumSamples(sampleBuffer))
+        guard frameCount > 0,
+              let pcmBuffer = AVAudioPCMBuffer(pcmFormat: inputFormat, frameCapacity: frameCount) else {
+            return nil
+        }
+
+        pcmBuffer.frameLength = frameCount
+
+        let bytesPerFrame = Int(asbd.pointee.mBytesPerFrame)
+        guard bytesPerFrame > 0 else {
+            return nil
+        }
+
+        // Prepare the destination AudioBufferList with correct byte sizes.
+        let destinationList = UnsafeMutableAudioBufferListPointer(pcmBuffer.mutableAudioBufferList)
+        let bytesToCopy = Int(frameCount) * bytesPerFrame
+        for index in 0..<destinationList.count {
+            var destinationBuffer = destinationList[index]
+            destinationBuffer.mDataByteSize = UInt32(bytesToCopy)
+            destinationList[index] = destinationBuffer
+        }
+
+        // Use Apple's official API to copy PCM data into the AudioBufferList.
+        let status = CMSampleBufferCopyPCMDataIntoAudioBufferList(
+            sampleBuffer,
+            at: 0,
+            frameCount: Int32(frameCount),
+            into: destinationList.unsafeMutablePointer
+        )
+        guard status == noErr else {
+            return nil
+        }
+
+        // Convert big-endian samples to native endianness in place.
+        if isBigEndian {
+            let bufferList = UnsafeMutableAudioBufferListPointer(pcmBuffer.mutableAudioBufferList)
+            for buffer in bufferList {
+                guard let mData = buffer.mData else { continue }
+                if commonFormat == .pcmFormatInt16 {
+                    let sampleCount = Int(buffer.mDataByteSize) / MemoryLayout<Int16>.size
+                    let intPtr = mData.assumingMemoryBound(to: Int16.self)
+                    for i in 0..<sampleCount {
+                        intPtr[i] = Int16(bigEndian: intPtr[i])
+                    }
+                } else if commonFormat == .pcmFormatFloat32 {
+                    let sampleCount = Int(buffer.mDataByteSize) / MemoryLayout<UInt32>.size
+                    let intPtr = mData.assumingMemoryBound(to: UInt32.self)
+                    for i in 0..<sampleCount {
+                        intPtr[i] = intPtr[i].byteSwapped
+                    }
+                }
+            }
+        }
+
+        return pcmBuffer
+    }
+
+    // MARK: - Format conversion
+
+    /// Converts `inputBuffer` to `outputFormat` if the formats differ.
+    /// Returns the input buffer unchanged when formats already match.
+    func convertIfRequired(
+        _ inputBuffer: AVAudioPCMBuffer,
+        to outputFormat: AVAudioFormat
+    ) -> AVAudioPCMBuffer? {
+        if formatsMatch(inputBuffer.format, outputFormat) {
+            return inputBuffer
+        }
+
+        // Create or reuse converter for current format pair
+        if converter == nil
+            || !formatsMatch(converterInputFormat, inputBuffer.format)
+            || !formatsMatch(converterOutputFormat, outputFormat) {
+            converter = AVAudioConverter(from: inputBuffer.format, to: outputFormat)
+            converter?.sampleRateConverterQuality = AVAudioQuality.max.rawValue
+            converter?.sampleRateConverterAlgorithm = AVSampleRateConverterAlgorithm_Mastering
+            converterInputFormat = inputBuffer.format
+            converterOutputFormat = outputFormat
+        }
+
+        guard let converter = converter else {
+            return nil
+        }
+
+        // Calculate output frame capacity from sample rate ratio
+        let inputFrames = Double(inputBuffer.frameLength)
+        let ratio = outputFormat.sampleRate / inputBuffer.format.sampleRate
+        let outputFrameCapacity = AVAudioFrameCount(max(1, ceil(inputFrames * ratio)))
+
+        guard let outputBuffer = AVAudioPCMBuffer(pcmFormat: outputFormat, frameCapacity: outputFrameCapacity) else {
+            return nil
+        }
+
+        var error: NSError?
+        var didProvideData = false
+        let status = converter.convert(to: outputBuffer, error: &error) { _, outStatus in
+            if didProvideData {
+                outStatus.pointee = .noDataNow
+                return nil
+            }
+            guard inputBuffer.frameLength > 0 else {
+                outStatus.pointee = .noDataNow
+                return nil
+            }
+            didProvideData = true
+            outStatus.pointee = .haveData
+            return inputBuffer
+        }
+
+        if status == .error || error != nil {
+            return nil
+        }
+
+        guard outputBuffer.frameLength > 0 else {
+            return nil
+        }
+
+        return outputBuffer
+    }
+
+    // MARK: - Silence detection
+
+    /// Returns `true` if the buffer is silent (RMS below -60 dB).
+    static func isSilent(_ buffer: AVAudioPCMBuffer) -> Bool {
+        guard let channelData = buffer.floatChannelData else {
+            return false
+        }
+
+        let frameCount = vDSP_Length(buffer.frameLength)
+        guard frameCount > 0 else {
+            return true
+        }
+
+        var rms: Float = 0
+        vDSP_rmsqv(channelData[0], 1, &rms, frameCount)
+
+        let rmsDB = 20 * log10(max(rms, Float.ulpOfOne))
+        return rmsDB <= silenceThresholdDB
+    }
+
+    // MARK: - Cleanup
+
+    func reset() {
+        converter = nil
+        converterInputFormat = nil
+        converterOutputFormat = nil
+    }
+
+    // MARK: - Private
+
+    private func formatsMatch(_ lhs: AVAudioFormat?, _ rhs: AVAudioFormat?) -> Bool {
+        guard let lhs = lhs, let rhs = rhs else { return false }
+        return lhs.sampleRate == rhs.sampleRate
+            && lhs.channelCount == rhs.channelCount
+            && lhs.commonFormat == rhs.commonFormat
+            && lhs.isInterleaved == rhs.isInterleaved
+    }
+}
diff --git a/ios/RCTWebRTC/Utils/AudioDeviceModule/ScreenShare/ScreenShareAudioMixer.swift b/ios/RCTWebRTC/Utils/AudioDeviceModule/ScreenShare/ScreenShareAudioMixer.swift
new file mode 100644
index 000000000..173e6750e
--- /dev/null
+++ b/ios/RCTWebRTC/Utils/AudioDeviceModule/ScreenShare/ScreenShareAudioMixer.swift
@@ -0,0 +1,143 @@
+//
+// Copyright © 2026 Stream.io Inc. All rights reserved.
+//
+
+import AVFoundation
+import CoreMedia
+import WebRTC
+
+/// Mixes screen share audio into the WebRTC microphone capture stream via
+/// `RTCAudioCustomProcessingDelegate` — direct PCM additive mixing in the
+/// WebRTC capture post-processing pipeline.
+///
+/// Set as `capturePostProcessingDelegate` on `RTCDefaultAudioProcessingModule`.
+/// The delegate callback runs after AEC/AGC/NS, so screen audio passes through
+/// without echo cancellation interference.
+///
+/// ```
+/// RPScreenRecorder → convert → ring buffer → audioProcessingProcess → encoding
+///                   (44100→48k)   (producer)       (consumer)
+/// ```
+///
+/// **Important:** `RTCAudioBuffer` uses FloatS16 format (Float32 in the Int16
+/// range -32768…32767). Audio from `AVAudioConverter` (normalized -1…1) must
+/// be scaled by 32768 before mixing.
+@objc public final class ScreenShareAudioMixer: NSObject, RTCAudioCustomProcessingDelegate {
+
+    /// Ring buffer for passing converted audio from the RPScreenRecorder callback
+    /// thread (producer) to the audio processing thread (consumer).
+    /// Capacity: 1 second of mono Float32 at 48 kHz.
+    private let ringBuffer = AudioRingBuffer(capacity: 48000)
+    private let audioConverter = ScreenShareAudioConverter()
+
+    private var isMixing = false
+    /// Processing format from `audioProcessingInitialize`.
+    private var processingSampleRate: Double = 0
+    private var processingChannels: Int = 0
+    /// Target format for conversion, built from processing parameters.
+    private var targetFormat: AVAudioFormat?
+
+    /// Scale factor: RTCAudioBuffer uses FloatS16 format (Float32 values in the
+    /// Int16 range -32768…32767), NOT normalized Float32 (-1…1).
+    /// AVAudioConverter produces normalized Float32, so we must scale up.
+    private static let floatS16Scale: Float = 32768.0
+
+    // MARK: - RTCAudioCustomProcessingDelegate
+
+    /// Called by WebRTC when the processing pipeline initializes or reconfigures.
+    /// May be called multiple times (e.g., on route changes).
+    public func audioProcessingInitialize(sampleRate: Int, channels: Int) {
+        processingSampleRate = Double(sampleRate)
+        processingChannels = channels
+
+        targetFormat = AVAudioFormat(
+            commonFormat: .pcmFormatFloat32,
+            sampleRate: processingSampleRate,
+            channels: AVAudioChannelCount(channels),
+            interleaved: false
+        )
+
+        ringBuffer.reset()
+        audioConverter.reset()
+    }
+
+    /// Called on the audio processing thread for each captured audio chunk.
+    /// Reads from the ring buffer and ADDs screen audio samples to the mic buffer.
+    public func audioProcessingProcess(audioBuffer: RTCAudioBuffer) {
+        guard isMixing else { return }
+
+        let frames = Int(audioBuffer.frames)
+        let channels = Int(audioBuffer.channels)
+        guard frames > 0, channels > 0 else { return }
+
+        mixFromRingBuffer(into: audioBuffer, frames: frames, channels: channels)
+    }
+
+    /// Called when the processing pipeline is released.
+    public func audioProcessingRelease() {
+        ringBuffer.reset()
+        targetFormat = nil
+    }
+
+    // MARK: - Public API
+
+    /// Enable audio mixing. After this, `enqueue(_:)` writes to the ring buffer
+    /// and the processing callback reads from it.
+    @objc public func startMixing() {
+        guard !isMixing else { return }
+        ringBuffer.reset()
+        isMixing = true
+    }
+
+    /// Stop audio mixing.
+    @objc public func stopMixing() {
+        guard isMixing else { return }
+        isMixing = false
+        ringBuffer.reset()
+        audioConverter.reset()
+    }
+
+    /// Receive a screen audio CMSampleBuffer from InAppScreenCapturer.
+    /// Converts to the processing format and writes to the ring buffer.
+    @objc public func enqueue(_ sampleBuffer: CMSampleBuffer) {
+        guard isMixing, let targetFmt = targetFormat else { return }
+
+        guard let pcm = audioConverter.pcmBuffer(from: sampleBuffer) else { return }
+
+        let buffer: AVAudioPCMBuffer
+        if pcm.format.sampleRate != targetFmt.sampleRate
+            || pcm.format.channelCount != targetFmt.channelCount
+            || pcm.format.commonFormat != targetFmt.commonFormat
+            || pcm.format.isInterleaved != targetFmt.isInterleaved {
+            guard let converted = audioConverter.convertIfRequired(pcm, to: targetFmt) else { return }
+            buffer = converted
+        } else {
+            buffer = pcm
+        }
+
+        if ScreenShareAudioConverter.isSilent(buffer) { return }
+
+        guard let channelData = buffer.floatChannelData else { return }
+        ringBuffer.write(channelData[0], count: Int(buffer.frameLength))
+    }
+
+    // MARK: - Private mixing
+
+    /// Read from ring buffer and ADD to the mic audio buffer (additive mixing).
+    /// Ring buffer contains normalized Float32 [-1,1] from AVAudioConverter;
+    /// RTCAudioBuffer uses FloatS16 [-32768,32767], so we scale before adding.
+    private func mixFromRingBuffer(into audioBuffer: RTCAudioBuffer, frames: Int, channels: Int) {
+        let tempBuffer = UnsafeMutablePointer<Float>.allocate(capacity: frames)
+        defer { tempBuffer.deallocate() }
+
+        let framesRead = ringBuffer.read(into: tempBuffer, count: frames)
+        guard framesRead > 0 else { return }
+
+        for ch in 0..<channels {
+            let channelData = audioBuffer.rawBuffer(forChannel: ch)
+            for i in 0..<framesRead {
+                channelData[i] += tempBuffer[i] * Self.floatS16Scale
+            }
+        }
+    }
+}
diff --git a/ios/RCTWebRTC/Utils/AudioDeviceModule/ScreenShareAudioConverter.swift b/ios/RCTWebRTC/Utils/AudioDeviceModule/ScreenShareAudioConverter.swift
deleted file mode 100644
index 83e27273c..000000000
--- a/ios/RCTWebRTC/Utils/AudioDeviceModule/ScreenShareAudioConverter.swift
+++ /dev/null
@@ -1,202 +0,0 @@
-//
-// Copyright © 2026 Stream.io Inc. All rights reserved.
-//
-
-import Accelerate
-import AVFoundation
-import CoreMedia
-
-/// Converts RPScreenRecorder `.audioApp` CMSampleBuffers into
-/// `AVAudioPCMBuffer`s suitable for scheduling on an `AVAudioPlayerNode`.
-///
-/// Handles:
-/// - CMSampleBuffer → AVAudioPCMBuffer extraction (float32, int16, interleaved, non-interleaved)
-/// - Sample rate / channel / format conversion via cached AVAudioConverter
-/// - Silence detection via vDSP RMS analysis
-final class ScreenShareAudioConverter {
-
-    // MARK: - Constants
-
-    /// Buffers with RMS below this threshold (in dB) are considered silent.
-    private static let silenceThresholdDB: Float = -60.0
-
-    // MARK: - Cached converter
-
-    private var converter: AVAudioConverter?
-    private var converterInputFormat: AVAudioFormat?
-    private var converterOutputFormat: AVAudioFormat?
-
-    // MARK: - CMSampleBuffer → AVAudioPCMBuffer
-
-    /// Extracts audio data from a `CMSampleBuffer` into an `AVAudioPCMBuffer`.
-    ///
-    /// Supports float32 and int16 PCM formats, both interleaved and
-    /// non-interleaved layouts.
-    func pcmBuffer(from sampleBuffer: CMSampleBuffer) -> AVAudioPCMBuffer? {
-        guard let formatDescription = CMSampleBufferGetFormatDescription(sampleBuffer) else {
-            return nil
-        }
-
-        guard let asbdPtr = CMAudioFormatDescriptionGetStreamBasicDescription(formatDescription) else {
-            return nil
-        }
-
-        guard let avFormat = AVAudioFormat(streamDescription: asbdPtr) else {
-            return nil
-        }
-
-        let frameCount = CMSampleBufferGetNumSamples(sampleBuffer)
-        guard frameCount > 0,
-              let pcmBuffer = AVAudioPCMBuffer(pcmFormat: avFormat, frameCapacity: AVAudioFrameCount(frameCount)) else {
-            return nil
-        }
-
-        pcmBuffer.frameLength = AVAudioFrameCount(frameCount)
-
-        guard let blockBuffer = CMSampleBufferGetDataBuffer(sampleBuffer) else {
-            return nil
-        }
-
-        var lengthAtOffset: Int = 0
-        var totalLength: Int = 0
-        var dataPointer: UnsafeMutablePointer<Int8>?
-
-        let status = CMBlockBufferGetDataPointer(
-            blockBuffer,
-            atOffset: 0,
-            lengthAtOffsetOut: &lengthAtOffset,
-            totalLengthOut: &totalLength,
-            dataPointerOut: &dataPointer
-        )
-        guard status == kCMBlockBufferNoErr, let dataPointer = dataPointer else {
-            return nil
-        }
-
-        // Copy audio data into PCM buffer
-        if let floatData = pcmBuffer.floatChannelData {
-            let channelCount = Int(avFormat.channelCount)
-            let bytesPerFrame = Int(avFormat.streamDescription.pointee.mBytesPerFrame)
-
-            if avFormat.isInterleaved {
-                // Interleaved: single buffer, copy all at once
-                memcpy(floatData[0], dataPointer, min(totalLength, Int(frameCount) * bytesPerFrame))
-            } else {
-                // Non-interleaved: separate buffers per channel
-                let framesSize = Int(frameCount) * MemoryLayout<Float>.size
-                for ch in 0..<channelCount {
-                    memcpy(floatData[ch], dataPointer.advanced(by: ch * framesSize), framesSize)
-                }
-            }
-        } else if let int16Data = pcmBuffer.int16ChannelData {
-            let bytesPerFrame = Int(avFormat.streamDescription.pointee.mBytesPerFrame)
-            memcpy(int16Data[0], dataPointer, min(totalLength, Int(frameCount) * bytesPerFrame))
-        } else {
-            return nil
-        }
-
-        return pcmBuffer
-    }
-
-    // MARK: - Format conversion
-
-    /// Converts `inputBuffer` to `outputFormat` if the formats differ.
-    /// Returns the input buffer unchanged when formats already match.
-    ///
-    /// Uses mastering-quality sample rate conversion, matching the Swift SDK's
-    /// `AudioConverter` implementation.
-    func convertIfRequired(
-        _ inputBuffer: AVAudioPCMBuffer,
-        to outputFormat: AVAudioFormat
-    ) -> AVAudioPCMBuffer? {
-        // Identity optimization: skip conversion when formats match
-        if formatsMatch(inputBuffer.format, outputFormat) {
-            return inputBuffer
-        }
-
-        // Create or reuse converter for current format pair
-        if converter == nil
-            || !formatsMatch(converterInputFormat, inputBuffer.format)
-            || !formatsMatch(converterOutputFormat, outputFormat) {
-            converter = AVAudioConverter(from: inputBuffer.format, to: outputFormat)
-            converter?.sampleRateConverterQuality = AVAudioQuality.max.rawValue
-            converter?.sampleRateConverterAlgorithm = AVSampleRateConverterAlgorithm_Mastering
-            converterInputFormat = inputBuffer.format
-            converterOutputFormat = outputFormat
-        }
-
-        guard let converter = converter else {
-            return nil
-        }
-
-        // Calculate output frame capacity from sample rate ratio
-        let ratio = outputFormat.sampleRate / inputBuffer.format.sampleRate
-        let outputFrameCapacity = AVAudioFrameCount(ceil(Double(inputBuffer.frameLength) * ratio))
-
-        guard let outputBuffer = AVAudioPCMBuffer(pcmFormat: outputFormat, frameCapacity: outputFrameCapacity) else {
-            return nil
-        }
-
-        var error: NSError?
-        var hasData = true
-        converter.convert(to: outputBuffer, error: &error) { _, outStatus in
-            if hasData {
-                outStatus.pointee = .haveData
-                hasData = false
-                return inputBuffer
-            } else {
-                outStatus.pointee = .noDataNow
-                return nil
-            }
-        }
-
-        if error != nil {
-            return nil
-        }
-
-        return outputBuffer
-    }
-
-    // MARK: - Silence detection
-
-    /// Returns `true` if the buffer is silent (RMS below -60 dB).
-    ///
-    /// For non-float formats (e.g., int16 from RPScreenRecorder), this returns
-    /// `false` — silence detection requires float data for vDSP, and these
-    /// buffers will be converted before scheduling anyway.
-    static func isSilent(_ buffer: AVAudioPCMBuffer) -> Bool {
-        guard let channelData = buffer.floatChannelData else {
-            return false
-        }
-
-        let frameCount = vDSP_Length(buffer.frameLength)
-        guard frameCount > 0 else {
-            return true
-        }
-
-        var rms: Float = 0
-        vDSP_rmsqv(channelData[0], 1, &rms, frameCount)
-
-        let rmsDB = 20 * log10(max(rms, Float.ulpOfOne))
-        return rmsDB <= silenceThresholdDB
-    }
-
-    // MARK: - Cleanup
-
-    func reset() {
-        converter = nil
-        converterInputFormat = nil
-        converterOutputFormat = nil
-    }
-
-    // MARK: - Private
-
-    /// Compares two formats by sample rate, channel count, common format,
-    /// and interleaving — matching the Swift SDK's `AVAudioFormat+Equality`.
-    private func formatsMatch(_ lhs: AVAudioFormat?, _ rhs: AVAudioFormat?) -> Bool {
-        guard let lhs = lhs, let rhs = rhs else { return false }
-        return lhs.sampleRate == rhs.sampleRate
-            && lhs.channelCount == rhs.channelCount
-            && lhs.commonFormat == rhs.commonFormat
-            && lhs.isInterleaved == rhs.isInterleaved
-    }
-}
diff --git a/ios/RCTWebRTC/Utils/AudioDeviceModule/ScreenShareAudioMixer.swift b/ios/RCTWebRTC/Utils/AudioDeviceModule/ScreenShareAudioMixer.swift
deleted file mode 100644
index 011f9d46a..000000000
--- a/ios/RCTWebRTC/Utils/AudioDeviceModule/ScreenShareAudioMixer.swift
+++ /dev/null
@@ -1,168 +0,0 @@
-//
-// Copyright © 2026 Stream.io Inc. All rights reserved.
-//
-
-import AVFoundation
-import CoreMedia
-import WebRTC
-
-/// Mixes screen share audio (from RPScreenRecorder `.audioApp` buffers) into the
-/// WebRTC microphone capture stream by inserting an `AVAudioPlayerNode` and
-/// `AVAudioMixerNode` into the engine's input graph.
-///
-/// Graph topology (wired in `onConfigureInputFromSource`):
-/// ```
-/// source (mic VP) --> mixerNode --> destination (WebRTC capture)
-///                        ^
-/// playerNode -----------/
-/// ```
-///
-/// The mixer stays dormant (no nodes attached) until `startMixing` is called.
-/// Screen audio buffers are scheduled on the player node via `enqueue(_:)`.
-@objc public final class ScreenShareAudioMixer: NSObject, AudioGraphConfigurationDelegate {
-
-    // MARK: - Audio graph nodes
-
-    private let playerNode = AVAudioPlayerNode()
-    private let mixerNode = AVAudioMixerNode()
-
-    // MARK: - Audio conversion
-
-    private let audioConverter = ScreenShareAudioConverter()
-
-    // MARK: - State
-
-    private var isMixing = false
-
-    /// The engine reference from the last `onConfigureInputFromSource` call.
-    /// Used to detach nodes on cleanup.
-    private weak var currentEngine: AVAudioEngine?
-
-    /// Format of the input graph path, used for converting screen audio.
-    private var graphFormat: AVAudioFormat?
-
-    /// Whether our nodes are currently attached to the engine.
-    private var nodesAttached = false
-
-    // MARK: - Init
-
-    @objc public override init() {
-        super.init()
-    }
-
-    // MARK: - AudioGraphConfigurationDelegate
-
-    public func onConfigureInputFromSource(
-        _ engine: AVAudioEngine,
-        source: AVAudioNode?,
-        destination: AVAudioNode,
-        format: AVAudioFormat
-    ) {
-        currentEngine = engine
-        graphFormat = format
-
-        guard isMixing else { return }
-
-        attachAndWireNodes(engine: engine, source: source, destination: destination, format: format)
-    }
-
-    public func onDidStopEngine(_ engine: AVAudioEngine) {
-        detachNodes(from: engine)
-    }
-
-    public func onDidDisableEngine(_ engine: AVAudioEngine) {
-        detachNodes(from: engine)
-    }
-
-    public func onWillReleaseEngine(_ engine: AVAudioEngine) {
-        detachNodes(from: engine)
-        currentEngine = nil
-        graphFormat = nil
-    }
-
-    // MARK: - Public API
-
-    /// Enable audio mixing. Call when screen share with audio starts.
-    ///
-    /// If the engine is already running (i.e., `onConfigureInputFromSource` has
-    /// already fired), this triggers an ADM reconfiguration so the graph gets
-    /// rewired with our nodes.
-    @objc public func startMixing() {
-        guard !isMixing else { return }
-        isMixing = true
-    }
-
-    /// Stop audio mixing and detach nodes from the engine.
-    @objc public func stopMixing() {
-        guard isMixing else { return }
-        isMixing = false
-
-        playerNode.stop()
-        if let engine = currentEngine {
-            detachNodes(from: engine)
-        }
-        audioConverter.reset()
-    }
-
-    /// Receive a screen audio CMSampleBuffer from InAppScreenCapturer.
-    @objc public func enqueue(_ sampleBuffer: CMSampleBuffer) {
-        guard isMixing, let targetFormat = graphFormat else { return }
-
-        // 1. CMSampleBuffer → AVAudioPCMBuffer
-        guard let pcm = audioConverter.pcmBuffer(from: sampleBuffer) else { return }
-
-        // 2. Silence detection
-        if ScreenShareAudioConverter.isSilent(pcm) { return }
-
-        // 3. Convert to graph format (e.g. 48 kHz / 1 ch / float32)
-        let buffer: AVAudioPCMBuffer
-        if pcm.format.sampleRate != targetFormat.sampleRate
-            || pcm.format.channelCount != targetFormat.channelCount
-            || pcm.format.commonFormat != targetFormat.commonFormat
-            || pcm.format.isInterleaved != targetFormat.isInterleaved {
-            guard let converted = audioConverter.convertIfRequired(pcm, to: targetFormat) else { return }
-            buffer = converted
-        } else {
-            buffer = pcm
-        }
-
-        // 4. Schedule on player node
-        guard nodesAttached else { return }
-
-        playerNode.scheduleBuffer(buffer)
-
-        if !playerNode.isPlaying {
-            playerNode.play()
-        }
-    }
-
-    // MARK: - Private graph management
-
-    private func attachAndWireNodes(
-        engine: AVAudioEngine,
-        source: AVAudioNode?,
-        destination: AVAudioNode,
-        format: AVAudioFormat
-    ) {
-        detachNodes(from: engine)
-
-        engine.attach(mixerNode)
-        engine.attach(playerNode)
-
-        if let source = source {
-            engine.connect(source, to: mixerNode, format: format)
-        }
-        engine.connect(playerNode, to: mixerNode, format: format)
-        engine.connect(mixerNode, to: destination, format: format)
-
-        nodesAttached = true
-    }
-
-    private func detachNodes(from engine: AVAudioEngine) {
-        guard nodesAttached else { return }
-
-        engine.detach(playerNode)
-        engine.detach(mixerNode)
-        nodesAttached = false
-    }
-}
diff --git a/ios/RCTWebRTC/WebRTCModule.m b/ios/RCTWebRTC/WebRTCModule.m
index 0c14160d1..4455e60f2 100644
--- a/ios/RCTWebRTC/WebRTCModule.m
+++ b/ios/RCTWebRTC/WebRTCModule.m
@@ -86,6 +86,17 @@ - (instancetype)init {
         RCTLogInfo(@"Using video encoder factory: %@", NSStringFromClass([encoderFactory class]));
         RCTLogInfo(@"Using video decoder factory: %@", NSStringFromClass([decoderFactory class]));
 
+        // Always ensure an audio processing module exists so screen share
+        // audio mixing can use capturePostProcessingDelegate at runtime.
+        if (audioProcessingModule == nil && audioDevice == nil) {
+            audioProcessingModule = [[RTCDefaultAudioProcessingModule alloc]
+                initWithConfig:nil
+                capturePostProcessingDelegate:nil
+                renderPreProcessingDelegate:nil];
+            options.audioProcessingModule = audioProcessingModule;
+            RCTLogInfo(@"Created default audio processing module for screen share audio mixing");
+        }
+
         if (audioProcessingModule != nil) {
             if (audioDevice != nil) {
                 NSLog(@"Both audioProcessingModule and audioDevice are provided, but only one can be used. Ignoring audioDevice.");

From f8929b5293ec1efd647de2ed21c1714a0dea4a7a Mon Sep 17 00:00:00 2001
From: Artem Grintsevich <greenfrvr@gmail.com>
Date: Wed, 25 Mar 2026 11:36:51 +0100
Subject: [PATCH 13/14] chore: code cleanup

---
 .../Utils/AudioDeviceModule/AudioDeviceModule.swift   | 11 -----------
 .../ScreenShare/ScreenShareAudioMixer.swift           |  1 -
 2 files changed, 12 deletions(-)

diff --git a/ios/RCTWebRTC/Utils/AudioDeviceModule/AudioDeviceModule.swift b/ios/RCTWebRTC/Utils/AudioDeviceModule/AudioDeviceModule.swift
index c71dd7cbd..91f0cee3a 100644
--- a/ios/RCTWebRTC/Utils/AudioDeviceModule/AudioDeviceModule.swift
+++ b/ios/RCTWebRTC/Utils/AudioDeviceModule/AudioDeviceModule.swift
@@ -272,15 +272,6 @@ import WebRTC
         source.isVoiceProcessingBypassed = isPreferred
     }
 
-    /// Sets voice processing bypass on the underlying audio device module.
-    /// When bypassed, echo cancellation / AGC / noise suppression are disabled,
-    /// which prevents the system from treating screen share audio as echo.
-    /// - Parameter isBypassed: `true` to bypass voice processing, `false` to restore.
-    @objc public func setVoiceProcessingBypassed(_ isBypassed: Bool) {
-        source.isVoiceProcessingBypassed = isBypassed
-        isVoiceProcessingBypassedSubject.send(isBypassed)
-    }
-
     /// Starts or stops speaker playout on the ADM, retrying transient failures.
     /// - Parameter isActive: `true` to start playout, `false` to stop.
     /// - Throws: `AudioDeviceError` when WebRTC returns a non-zero status.
@@ -541,8 +532,6 @@ import WebRTC
                 format: format
             )
         )
-
-        // Notify the audio graph delegate synchronously — this must happen
         audioLevelsAdapter.installInputTap(
             on: destination,
             format: format,
diff --git a/ios/RCTWebRTC/Utils/AudioDeviceModule/ScreenShare/ScreenShareAudioMixer.swift b/ios/RCTWebRTC/Utils/AudioDeviceModule/ScreenShare/ScreenShareAudioMixer.swift
index 173e6750e..d45a10e4e 100644
--- a/ios/RCTWebRTC/Utils/AudioDeviceModule/ScreenShare/ScreenShareAudioMixer.swift
+++ b/ios/RCTWebRTC/Utils/AudioDeviceModule/ScreenShare/ScreenShareAudioMixer.swift
@@ -34,7 +34,6 @@ import WebRTC
     /// Processing format from `audioProcessingInitialize`.
     private var processingSampleRate: Double = 0
     private var processingChannels: Int = 0
-    /// Target format for conversion, built from processing parameters.
     private var targetFormat: AVAudioFormat?
 
     /// Scale factor: RTCAudioBuffer uses FloatS16 format (Float32 values in the

From c1b1d6c186f45ec9be5edcb14b37579d58faf995 Mon Sep 17 00:00:00 2001
From: Artem Grintsevich <greenfrvr@gmail.com>
Date: Fri, 27 Mar 2026 16:33:25 +0100
Subject: [PATCH 14/14] chore: small tweak

---
 ios/RCTWebRTC/InAppScreenCapturer.m | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/ios/RCTWebRTC/InAppScreenCapturer.m b/ios/RCTWebRTC/InAppScreenCapturer.m
index cf14a938b..893761ae9 100644
--- a/ios/RCTWebRTC/InAppScreenCapturer.m
+++ b/ios/RCTWebRTC/InAppScreenCapturer.m
@@ -108,9 +108,11 @@ - (void)stopCapture {
 
 - (void)registerAppStateObservers {
     if (_observingAppState) return;
-    _observingAppState = YES;
 
     dispatch_async(dispatch_get_main_queue(), ^{
+        if (self->_observingAppState || !self->_capturing) return;
+        self->_observingAppState = YES;
+
         [[NSNotificationCenter defaultCenter] addObserver:self
                                                  selector:@selector(appDidBecomeActive)
                                                      name:UIApplicationDidBecomeActiveNotification