diff --git a/android/src/main/java/com/oney/WebRTCModule/GetUserMediaImpl.java b/android/src/main/java/com/oney/WebRTCModule/GetUserMediaImpl.java index 861919513..c17254a8b 100644 --- a/android/src/main/java/com/oney/WebRTCModule/GetUserMediaImpl.java +++ b/android/src/main/java/com/oney/WebRTCModule/GetUserMediaImpl.java @@ -39,7 +39,7 @@ * The implementation of {@code getUserMedia} extracted into a separate file in * order to reduce complexity and to (somewhat) separate concerns. */ -class GetUserMediaImpl { +public class GetUserMediaImpl { /** * The {@link Log} tag with which {@code GetUserMediaImpl} is to log. */ @@ -62,6 +62,15 @@ class GetUserMediaImpl { private Promise displayMediaPromise; private Intent mediaProjectionPermissionResultData; + /** + * Returns the MediaProjection permission result data Intent. + * This Intent can be used to create a MediaProjection for audio capture + * via AudioPlaybackCaptureConfiguration. + */ + public Intent getMediaProjectionPermissionResultData() { + return mediaProjectionPermissionResultData; + } + private final ServiceConnection mediaProjectionServiceConnection = new ServiceConnection() { @Override public void onServiceConnected(ComponentName name, IBinder service) { @@ -355,7 +364,9 @@ private void createScreenStream() { } // Cleanup - mediaProjectionPermissionResultData = null; + // Note: mediaProjectionPermissionResultData is intentionally NOT nulled here. + // It is retained so it can be reused to create a MediaProjection for + // screen share audio capture (AudioPlaybackCaptureConfiguration). displayMediaPromise = null; } diff --git a/android/src/main/java/com/oney/WebRTCModule/WebRTCModule.java b/android/src/main/java/com/oney/WebRTCModule/WebRTCModule.java index 6e981f077..831232cef 100644 --- a/android/src/main/java/com/oney/WebRTCModule/WebRTCModule.java +++ b/android/src/main/java/com/oney/WebRTCModule/WebRTCModule.java @@ -129,9 +129,62 @@ private JavaAudioDeviceModule createAudioDeviceModule(ReactApplicationContext re .setUseHardwareAcousticEchoCanceler(Build.VERSION.SDK_INT >= Build.VERSION_CODES.Q) .setUseHardwareNoiseSuppressor(Build.VERSION.SDK_INT >= Build.VERSION_CODES.Q) .setUseStereoOutput(true) + .setAudioBufferCallback((audioBuffer, audioFormat, channelCount, sampleRate, bytesRead, captureTimeNs) -> { + if (bytesRead > 0) { + WebRTCModuleOptions.ScreenAudioBytesProvider provider = + WebRTCModuleOptions.getInstance().screenAudioBytesProvider; + if (provider != null) { + java.nio.ByteBuffer screenBuffer = provider.getScreenAudioBytes(bytesRead); + if (screenBuffer != null && screenBuffer.remaining() > 0) { + mixScreenAudioIntoBuffer(audioBuffer, screenBuffer, bytesRead); + } + } + } + return captureTimeNs; + }) .createAudioDeviceModule(); } + /** + * Mixes screen audio into the microphone buffer using PCM 16-bit additive mixing + * with clamping. Handles different buffer sizes safely: each buffer is read only + * within its own bounds. When one buffer is shorter, the other's samples pass + * through unmodified (mic samples stay as-is, or screen-only samples are written). + */ + private static void mixScreenAudioIntoBuffer(java.nio.ByteBuffer micBuffer, + java.nio.ByteBuffer screenBuffer, + int bytesRead) { + micBuffer.position(0); + screenBuffer.position(0); + + micBuffer.order(java.nio.ByteOrder.LITTLE_ENDIAN); + screenBuffer.order(java.nio.ByteOrder.LITTLE_ENDIAN); + + java.nio.ShortBuffer micShorts = micBuffer.asShortBuffer(); + java.nio.ShortBuffer screenShorts = screenBuffer.asShortBuffer(); + + int micSamples = Math.min(bytesRead / 2, micShorts.remaining()); + int screenSamples = screenShorts.remaining(); + int totalSamples = Math.max(micSamples, screenSamples); + + for (int i = 0; i < totalSamples; i++) { + int sum; + if (i >= micSamples) { + // Screen-only: mic buffer is shorter — write screen sample directly + sum = screenShorts.get(i); + } else if (i >= screenSamples) { + // Mic-only: screen buffer is shorter — keep mic sample as-is + break; + } else { + // Both buffers have data — add samples + sum = micShorts.get(i) + screenShorts.get(i); + } + if (sum > Short.MAX_VALUE) sum = Short.MAX_VALUE; + if (sum < Short.MIN_VALUE) sum = Short.MIN_VALUE; + micShorts.put(i, (short) sum); + } + } + @NonNull @Override public String getName() { @@ -142,6 +195,10 @@ public AudioDeviceModule getAudioDeviceModule() { return mAudioDeviceModule; } + public GetUserMediaImpl getUserMediaImpl() { + return getUserMediaImpl; + } + public PeerConnectionObserver getPeerConnectionObserver(int id) { return mPeerConnectionObservers.get(id); } diff --git a/android/src/main/java/com/oney/WebRTCModule/WebRTCModuleOptions.java b/android/src/main/java/com/oney/WebRTCModule/WebRTCModuleOptions.java index 6187c9472..24e53f8ce 100644 --- a/android/src/main/java/com/oney/WebRTCModule/WebRTCModuleOptions.java +++ b/android/src/main/java/com/oney/WebRTCModule/WebRTCModuleOptions.java @@ -8,6 +8,8 @@ import org.webrtc.VideoEncoderFactory; import org.webrtc.audio.AudioDeviceModule; +import java.nio.ByteBuffer; + public class WebRTCModuleOptions { private static WebRTCModuleOptions instance; @@ -20,6 +22,29 @@ public class WebRTCModuleOptions { public boolean enableMediaProjectionService; public AudioProcessingFactoryProvider audioProcessingFactoryProvider; + /** + * Provider for screen share audio bytes. When set, the AudioDeviceModule's + * AudioBufferCallback will mix screen audio into the mic buffer before + * WebRTC processing. This allows screen audio mixing to work alongside + * any audio processing factory (including noise cancellation). + * + * Set this when screen share audio capture starts, clear it when it stops. + */ + public volatile ScreenAudioBytesProvider screenAudioBytesProvider; + + /** + * Functional interface for providing screen audio bytes on demand. + */ + public interface ScreenAudioBytesProvider { + /** + * Returns a ByteBuffer containing screen audio PCM data. + * + * @param bytesRequested number of bytes to read (matching mic buffer size) + * @return ByteBuffer with screen audio, or null if not available + */ + ByteBuffer getScreenAudioBytes(int bytesRequested); + } + public static WebRTCModuleOptions getInstance() { if (instance == null) { instance = new WebRTCModuleOptions(); diff --git a/ios/RCTWebRTC/InAppScreenCaptureController.h b/ios/RCTWebRTC/InAppScreenCaptureController.h new file mode 100644 index 000000000..535f5863f --- /dev/null +++ b/ios/RCTWebRTC/InAppScreenCaptureController.h @@ -0,0 +1,18 @@ +#import +#import "CaptureController.h" +#import "CapturerEventsDelegate.h" + +NS_ASSUME_NONNULL_BEGIN + +@class InAppScreenCapturer; + +@interface InAppScreenCaptureController : CaptureController + +- (instancetype)initWithCapturer:(nonnull InAppScreenCapturer *)capturer; + +/// The underlying RPScreenRecorder-based capturer. +@property(nonatomic, strong, readonly) InAppScreenCapturer *capturer; + +@end + +NS_ASSUME_NONNULL_END diff --git a/ios/RCTWebRTC/InAppScreenCaptureController.m b/ios/RCTWebRTC/InAppScreenCaptureController.m new file mode 100644 index 000000000..1b9561d39 --- /dev/null +++ b/ios/RCTWebRTC/InAppScreenCaptureController.m @@ -0,0 +1,49 @@ +#if TARGET_OS_IOS + +#import "InAppScreenCaptureController.h" +#import "InAppScreenCapturer.h" + +@interface InAppScreenCaptureController () +@end + +@implementation InAppScreenCaptureController + +- (instancetype)initWithCapturer:(nonnull InAppScreenCapturer *)capturer { + self = [super init]; + if (self) { + _capturer = capturer; + _capturer.eventsDelegate = self; + self.deviceId = @"in-app-screen-capture"; + } + return self; +} + +- (void)dealloc { + [self.capturer stopCapture]; +} + +- (void)startCapture { + [self.capturer startCapture]; +} + +- (void)stopCapture { + [self.capturer stopCapture]; +} + +- (NSDictionary *)getSettings { + return @{ + @"deviceId": self.deviceId ?: @"in-app-screen-capture", + @"groupId": @"", + @"frameRate": @(30) + }; +} + +#pragma mark - CapturerEventsDelegate + +- (void)capturerDidEnd:(RTCVideoCapturer *)capturer { + [self.eventsDelegate capturerDidEnd:capturer]; +} + +@end + +#endif diff --git a/ios/RCTWebRTC/InAppScreenCapturer.h b/ios/RCTWebRTC/InAppScreenCapturer.h new file mode 100644 index 000000000..3a8566676 --- /dev/null +++ b/ios/RCTWebRTC/InAppScreenCapturer.h @@ -0,0 +1,21 @@ +#import +#import +#import "CapturerEventsDelegate.h" + +NS_ASSUME_NONNULL_BEGIN + +@interface InAppScreenCapturer : RTCVideoCapturer + +@property(nonatomic, weak) id eventsDelegate; + +/// Callback invoked for each .audioApp CMSampleBuffer from RPScreenRecorder. +/// Set this before calling startCapture if audio mixing is desired. +@property(nonatomic, copy, nullable) void (^audioBufferHandler)(CMSampleBufferRef); + +- (instancetype)initWithDelegate:(__weak id)delegate; +- (void)startCapture; +- (void)stopCapture; + +@end + +NS_ASSUME_NONNULL_END diff --git a/ios/RCTWebRTC/InAppScreenCapturer.m b/ios/RCTWebRTC/InAppScreenCapturer.m new file mode 100644 index 000000000..893761ae9 --- /dev/null +++ b/ios/RCTWebRTC/InAppScreenCapturer.m @@ -0,0 +1,169 @@ +#if TARGET_OS_IOS + +#import +#import +#import + +#import "InAppScreenCapturer.h" + +@implementation InAppScreenCapturer { + BOOL _capturing; + BOOL _shouldResumeOnForeground; + BOOL _observingAppState; +} + +- (instancetype)initWithDelegate:(__weak id)delegate { + self = [super initWithDelegate:delegate]; + return self; +} + +- (void)startCapture { + if (_capturing) { + return; + } + _capturing = YES; + + [self startRPScreenRecorder]; +} + +- (void)startRPScreenRecorder { + RPScreenRecorder *recorder = [RPScreenRecorder sharedRecorder]; + recorder.microphoneEnabled = NO; // WebRTC handles mic input + + __weak __typeof__(self) weakSelf = self; + [recorder startCaptureWithHandler:^(CMSampleBufferRef _Nonnull sampleBuffer, + RPSampleBufferType bufferType, + NSError * _Nullable error) { + __strong __typeof__(weakSelf) strongSelf = weakSelf; + if (!strongSelf || error || !strongSelf->_capturing) { + return; + } + + switch (bufferType) { + case RPSampleBufferTypeVideo: + [strongSelf processVideoSampleBuffer:sampleBuffer]; + break; + case RPSampleBufferTypeAudioApp: + if (strongSelf.audioBufferHandler) { + strongSelf.audioBufferHandler(sampleBuffer); + } + break; + case RPSampleBufferTypeAudioMic: + // Ignored — WebRTC handles mic capture via AudioDeviceModule + break; + } + } completionHandler:^(NSError * _Nullable error) { + __strong __typeof__(weakSelf) strongSelf = weakSelf; + if (!strongSelf) return; + + if (error) { + NSLog(@"[InAppScreenCapturer] startCapture failed: %@", error.localizedDescription); + strongSelf->_capturing = NO; + [strongSelf.eventsDelegate capturerDidEnd:strongSelf]; + return; + } + + // Capture started successfully — register for app lifecycle events. + // Done here (not in startCapture) so the RPScreenRecorder permission + // dialog doesn't trigger appWillResignActive before capture begins. + [strongSelf registerAppStateObservers]; + }]; +} + +- (void)processVideoSampleBuffer:(CMSampleBufferRef)sampleBuffer { + CVPixelBufferRef pixelBuffer = CMSampleBufferGetImageBuffer(sampleBuffer); + if (!pixelBuffer) { + return; + } + + int64_t timeStampNs = (int64_t)(CMTimeGetSeconds( + CMSampleBufferGetPresentationTimeStamp(sampleBuffer)) * NSEC_PER_SEC); + + RTCCVPixelBuffer *rtcPixelBuffer = [[RTCCVPixelBuffer alloc] initWithPixelBuffer:pixelBuffer]; + RTCVideoFrame *videoFrame = [[RTCVideoFrame alloc] initWithBuffer:rtcPixelBuffer + rotation:RTCVideoRotation_0 + timeStampNs:timeStampNs]; + + [self.delegate capturer:self didCaptureVideoFrame:videoFrame]; +} + +- (void)stopCapture { + if (!_capturing) { + return; + } + _capturing = NO; + _shouldResumeOnForeground = NO; + self.audioBufferHandler = nil; + + [self unregisterAppStateObservers]; + + [[RPScreenRecorder sharedRecorder] stopCaptureWithHandler:^(NSError * _Nullable error) { + if (error) { + NSLog(@"[InAppScreenCapturer] stopCapture error: %@", error.localizedDescription); + } + }]; +} + +#pragma mark - App Lifecycle + +- (void)registerAppStateObservers { + if (_observingAppState) return; + + dispatch_async(dispatch_get_main_queue(), ^{ + if (self->_observingAppState || !self->_capturing) return; + self->_observingAppState = YES; + + [[NSNotificationCenter defaultCenter] addObserver:self + selector:@selector(appDidBecomeActive) + name:UIApplicationDidBecomeActiveNotification + object:nil]; + [[NSNotificationCenter defaultCenter] addObserver:self + selector:@selector(appWillResignActive) + name:UIApplicationWillResignActiveNotification + object:nil]; + }); +} + +- (void)unregisterAppStateObservers { + if (!_observingAppState) return; + _observingAppState = NO; + + [[NSNotificationCenter defaultCenter] removeObserver:self + name:UIApplicationDidBecomeActiveNotification + object:nil]; + [[NSNotificationCenter defaultCenter] removeObserver:self + name:UIApplicationWillResignActiveNotification + object:nil]; +} + +- (void)appWillResignActive { + if (_capturing) { + _shouldResumeOnForeground = YES; + // Stop the RPScreenRecorder session — iOS suspends it in background anyway + [[RPScreenRecorder sharedRecorder] stopCaptureWithHandler:^(NSError * _Nullable error) { + if (error) { + NSLog(@"[InAppScreenCapturer] background stop error: %@", error.localizedDescription); + } + }]; + } +} + +- (void)appDidBecomeActive { + if (_shouldResumeOnForeground && _capturing) { + _shouldResumeOnForeground = NO; + [self startRPScreenRecorder]; + } +} + +- (void)dealloc { + [self unregisterAppStateObservers]; + if (_capturing) { + _capturing = NO; + self.audioBufferHandler = nil; + [[RPScreenRecorder sharedRecorder] stopCaptureWithHandler:nil]; + } +} + +@end + +#endif diff --git a/ios/RCTWebRTC/Utils/AudioDeviceModule/AudioDeviceModule.swift b/ios/RCTWebRTC/Utils/AudioDeviceModule/AudioDeviceModule.swift index f46c2c911..91f0cee3a 100644 --- a/ios/RCTWebRTC/Utils/AudioDeviceModule/AudioDeviceModule.swift +++ b/ios/RCTWebRTC/Utils/AudioDeviceModule/AudioDeviceModule.swift @@ -179,6 +179,11 @@ import WebRTC /// Strong reference to the current engine so we can introspect it if needed. @objc public var engine: AVAudioEngine? + /// Screen share audio mixer. Implements `RTCAudioCustomProcessingDelegate` + /// and is set as `capturePostProcessingDelegate` on the + /// `RTCDefaultAudioProcessingModule` when screen share audio mixing starts. + @objc public let screenShareAudioMixer = ScreenShareAudioMixer() + /// Secondary observer that receives forwarded delegate callbacks. /// This allows the AudioDeviceModuleObserver to receive events and forward them to JS. private let delegateObserver: RTCAudioDeviceModuleDelegate @@ -228,7 +233,6 @@ import WebRTC .eraseToAnyPublisher() super.init() - _ = source.setMuteMode(.inputMixer) audioLevelsAdapter.subject = audioLevelSubject source.observer = self } diff --git a/ios/RCTWebRTC/Utils/AudioDeviceModule/ScreenShare/AudioRingBuffer.swift b/ios/RCTWebRTC/Utils/AudioDeviceModule/ScreenShare/AudioRingBuffer.swift new file mode 100644 index 000000000..10e589ff1 --- /dev/null +++ b/ios/RCTWebRTC/Utils/AudioDeviceModule/ScreenShare/AudioRingBuffer.swift @@ -0,0 +1,115 @@ +// +// Copyright © 2026 Stream.io Inc. All rights reserved. +// + +import Darwin +import Foundation + +/// Thread-safe single-producer single-consumer ring buffer for Float32 audio samples. +/// +/// Uses `os_unfair_lock` for minimal-overhead synchronization between the +/// ReplayKit callback thread (writer) and the audio render thread (reader). +/// The lock is uncontended in the vast majority of cases (different cadences), +/// making it suitable for real-time audio contexts. +final class AudioRingBuffer { + + private let buffer: UnsafeMutablePointer + private let capacity: Int + private var writePos: Int = 0 + private var readPos: Int = 0 + private var lock = os_unfair_lock_s() + + /// Creates a ring buffer with the given capacity in frames. + /// - Parameter capacity: Maximum number of Float32 samples the buffer can hold. + init(capacity: Int) { + self.capacity = capacity + self.buffer = .allocate(capacity: capacity) + self.buffer.initialize(repeating: 0, count: capacity) + } + + deinit { + buffer.deallocate() + } + + /// Number of frames available to read (thread-safe). + var availableToRead: Int { + os_unfair_lock_lock(&lock) + let result = _availableToRead + os_unfair_lock_unlock(&lock) + return result + } + + // MARK: - Internal (lock held) + + private var _availableToRead: Int { + let w = writePos + let r = readPos + return (w >= r) ? (w - r) : (capacity - r + w) + } + + private var _availableToWrite: Int { + // Reserve 1 slot to distinguish full from empty. + return capacity - 1 - _availableToRead + } + + // MARK: - Producer API (ReplayKit thread) + + /// Writes up to `count` samples from `source` into the ring buffer. + /// - Returns: The number of samples actually written (may be less if buffer is full). + @discardableResult + func write(_ source: UnsafePointer, count: Int) -> Int { + os_unfair_lock_lock(&lock) + defer { os_unfair_lock_unlock(&lock) } + + let toWrite = min(count, _availableToWrite) + guard toWrite > 0 else { return 0 } + + let w = writePos + let firstPart = min(toWrite, capacity - w) + let secondPart = toWrite - firstPart + + memcpy(buffer.advanced(by: w), source, firstPart * MemoryLayout.size) + if secondPart > 0 { + memcpy(buffer, source.advanced(by: firstPart), secondPart * MemoryLayout.size) + } + + writePos = (w + toWrite) % capacity + return toWrite + } + + // MARK: - Consumer API (audio render thread) + + /// Reads up to `count` samples into `destination` from the ring buffer. + /// - Returns: The number of samples actually read (may be less if buffer is empty). + @discardableResult + func read(into destination: UnsafeMutablePointer, count: Int) -> Int { + os_unfair_lock_lock(&lock) + defer { os_unfair_lock_unlock(&lock) } + + let toRead = min(count, _availableToRead) + guard toRead > 0 else { return 0 } + + let r = readPos + let firstPart = min(toRead, capacity - r) + let secondPart = toRead - firstPart + + memcpy(destination, buffer.advanced(by: r), firstPart * MemoryLayout.size) + if secondPart > 0 { + memcpy(destination.advanced(by: firstPart), buffer, secondPart * MemoryLayout.size) + } + + readPos = (r + toRead) % capacity + return toRead + } + + // MARK: - Reset + + /// Clears all buffered data. Call when not concurrently accessed by both + /// producer and consumer, or when it is acceptable to lose data. + func reset() { + os_unfair_lock_lock(&lock) + writePos = 0 + readPos = 0 + os_unfair_lock_unlock(&lock) + } +} diff --git a/ios/RCTWebRTC/Utils/AudioDeviceModule/ScreenShare/ScreenShareAudioConverter.swift b/ios/RCTWebRTC/Utils/AudioDeviceModule/ScreenShare/ScreenShareAudioConverter.swift new file mode 100644 index 000000000..bc55fb988 --- /dev/null +++ b/ios/RCTWebRTC/Utils/AudioDeviceModule/ScreenShare/ScreenShareAudioConverter.swift @@ -0,0 +1,233 @@ +// +// Copyright © 2026 Stream.io Inc. All rights reserved. +// + +import Accelerate +import AudioToolbox +import AVFoundation +import CoreMedia + +/// Converts RPScreenRecorder `.audioApp` CMSampleBuffers into +/// `AVAudioPCMBuffer`s suitable for scheduling on an `AVAudioPlayerNode`. +/// +/// Handles: +/// - CMSampleBuffer → AVAudioPCMBuffer extraction via `CMSampleBufferCopyPCMDataIntoAudioBufferList` +/// - Sample rate / channel / format conversion via cached AVAudioConverter +/// - Silence detection via vDSP RMS analysis +final class ScreenShareAudioConverter { + + // MARK: - Constants + + /// Buffers with RMS below this threshold (in dB) are considered silent. + private static let silenceThresholdDB: Float = -60.0 + + // MARK: - Cached converter + + private var converter: AVAudioConverter? + private var converterInputFormat: AVAudioFormat? + private var converterOutputFormat: AVAudioFormat? + + // MARK: - CMSampleBuffer → AVAudioPCMBuffer + + /// Extracts audio data from a `CMSampleBuffer` into an `AVAudioPCMBuffer` + /// using Apple's `CMSampleBufferCopyPCMDataIntoAudioBufferList`. + /// + /// Matches the Swift SDK's `AVAudioPCMBuffer.from(_:)` implementation. + func pcmBuffer(from sampleBuffer: CMSampleBuffer) -> AVAudioPCMBuffer? { + guard let formatDescription = CMSampleBufferGetFormatDescription(sampleBuffer), + let asbd = CMAudioFormatDescriptionGetStreamBasicDescription(formatDescription) else { + return nil + } + + // Only linear PCM can be copied into AVAudioPCMBuffer. + guard asbd.pointee.mFormatID == kAudioFormatLinearPCM else { + return nil + } + + // Inspect format flags to build the correct AVAudioFormat. + let formatFlags = asbd.pointee.mFormatFlags + let isFloat = (formatFlags & kAudioFormatFlagIsFloat) != 0 + let isSignedInt = (formatFlags & kAudioFormatFlagIsSignedInteger) != 0 + let isBigEndian = (formatFlags & kAudioFormatFlagIsBigEndian) != 0 + let isInterleaved = (formatFlags & kAudioFormatFlagIsNonInterleaved) == 0 + let bitsPerChannel = Int(asbd.pointee.mBitsPerChannel) + + // Choose an AVAudioCommonFormat compatible with the sample format. + let commonFormat: AVAudioCommonFormat + if isFloat, bitsPerChannel == 32 { + commonFormat = .pcmFormatFloat32 + } else if isSignedInt, bitsPerChannel == 16 { + commonFormat = .pcmFormatInt16 + } else { + return nil + } + + // Build AVAudioFormat from explicit parameters (not streamDescription) + // to ensure consistent format identity for downstream comparisons. + guard let inputFormat = AVAudioFormat( + commonFormat: commonFormat, + sampleRate: asbd.pointee.mSampleRate, + channels: asbd.pointee.mChannelsPerFrame, + interleaved: isInterleaved + ) else { + return nil + } + + let frameCount = AVAudioFrameCount(CMSampleBufferGetNumSamples(sampleBuffer)) + guard frameCount > 0, + let pcmBuffer = AVAudioPCMBuffer(pcmFormat: inputFormat, frameCapacity: frameCount) else { + return nil + } + + pcmBuffer.frameLength = frameCount + + let bytesPerFrame = Int(asbd.pointee.mBytesPerFrame) + guard bytesPerFrame > 0 else { + return nil + } + + // Prepare the destination AudioBufferList with correct byte sizes. + let destinationList = UnsafeMutableAudioBufferListPointer(pcmBuffer.mutableAudioBufferList) + let bytesToCopy = Int(frameCount) * bytesPerFrame + for index in 0...size + let intPtr = mData.assumingMemoryBound(to: Int16.self) + for i in 0...size + let intPtr = mData.assumingMemoryBound(to: UInt32.self) + for i in 0.. AVAudioPCMBuffer? { + if formatsMatch(inputBuffer.format, outputFormat) { + return inputBuffer + } + + // Create or reuse converter for current format pair + if converter == nil + || !formatsMatch(converterInputFormat, inputBuffer.format) + || !formatsMatch(converterOutputFormat, outputFormat) { + converter = AVAudioConverter(from: inputBuffer.format, to: outputFormat) + converter?.sampleRateConverterQuality = AVAudioQuality.max.rawValue + converter?.sampleRateConverterAlgorithm = AVSampleRateConverterAlgorithm_Mastering + converterInputFormat = inputBuffer.format + converterOutputFormat = outputFormat + } + + guard let converter = converter else { + return nil + } + + // Calculate output frame capacity from sample rate ratio + let inputFrames = Double(inputBuffer.frameLength) + let ratio = outputFormat.sampleRate / inputBuffer.format.sampleRate + let outputFrameCapacity = AVAudioFrameCount(max(1, ceil(inputFrames * ratio))) + + guard let outputBuffer = AVAudioPCMBuffer(pcmFormat: outputFormat, frameCapacity: outputFrameCapacity) else { + return nil + } + + var error: NSError? + var didProvideData = false + let status = converter.convert(to: outputBuffer, error: &error) { _, outStatus in + if didProvideData { + outStatus.pointee = .noDataNow + return nil + } + guard inputBuffer.frameLength > 0 else { + outStatus.pointee = .noDataNow + return nil + } + didProvideData = true + outStatus.pointee = .haveData + return inputBuffer + } + + if status == .error || error != nil { + return nil + } + + guard outputBuffer.frameLength > 0 else { + return nil + } + + return outputBuffer + } + + // MARK: - Silence detection + + /// Returns `true` if the buffer is silent (RMS below -60 dB). + static func isSilent(_ buffer: AVAudioPCMBuffer) -> Bool { + guard let channelData = buffer.floatChannelData else { + return false + } + + let frameCount = vDSP_Length(buffer.frameLength) + guard frameCount > 0 else { + return true + } + + var rms: Float = 0 + vDSP_rmsqv(channelData[0], 1, &rms, frameCount) + + let rmsDB = 20 * log10(max(rms, Float.ulpOfOne)) + return rmsDB <= silenceThresholdDB + } + + // MARK: - Cleanup + + func reset() { + converter = nil + converterInputFormat = nil + converterOutputFormat = nil + } + + // MARK: - Private + + private func formatsMatch(_ lhs: AVAudioFormat?, _ rhs: AVAudioFormat?) -> Bool { + guard let lhs = lhs, let rhs = rhs else { return false } + return lhs.sampleRate == rhs.sampleRate + && lhs.channelCount == rhs.channelCount + && lhs.commonFormat == rhs.commonFormat + && lhs.isInterleaved == rhs.isInterleaved + } +} diff --git a/ios/RCTWebRTC/Utils/AudioDeviceModule/ScreenShare/ScreenShareAudioMixer.swift b/ios/RCTWebRTC/Utils/AudioDeviceModule/ScreenShare/ScreenShareAudioMixer.swift new file mode 100644 index 000000000..d45a10e4e --- /dev/null +++ b/ios/RCTWebRTC/Utils/AudioDeviceModule/ScreenShare/ScreenShareAudioMixer.swift @@ -0,0 +1,142 @@ +// +// Copyright © 2026 Stream.io Inc. All rights reserved. +// + +import AVFoundation +import CoreMedia +import WebRTC + +/// Mixes screen share audio into the WebRTC microphone capture stream via +/// `RTCAudioCustomProcessingDelegate` — direct PCM additive mixing in the +/// WebRTC capture post-processing pipeline. +/// +/// Set as `capturePostProcessingDelegate` on `RTCDefaultAudioProcessingModule`. +/// The delegate callback runs after AEC/AGC/NS, so screen audio passes through +/// without echo cancellation interference. +/// +/// ``` +/// RPScreenRecorder → convert → ring buffer → audioProcessingProcess → encoding +/// (44100→48k) (producer) (consumer) +/// ``` +/// +/// **Important:** `RTCAudioBuffer` uses FloatS16 format (Float32 in the Int16 +/// range -32768…32767). Audio from `AVAudioConverter` (normalized -1…1) must +/// be scaled by 32768 before mixing. +@objc public final class ScreenShareAudioMixer: NSObject, RTCAudioCustomProcessingDelegate { + + /// Ring buffer for passing converted audio from the RPScreenRecorder callback + /// thread (producer) to the audio processing thread (consumer). + /// Capacity: 1 second of mono Float32 at 48 kHz. + private let ringBuffer = AudioRingBuffer(capacity: 48000) + private let audioConverter = ScreenShareAudioConverter() + + private var isMixing = false + /// Processing format from `audioProcessingInitialize`. + private var processingSampleRate: Double = 0 + private var processingChannels: Int = 0 + private var targetFormat: AVAudioFormat? + + /// Scale factor: RTCAudioBuffer uses FloatS16 format (Float32 values in the + /// Int16 range -32768…32767), NOT normalized Float32 (-1…1). + /// AVAudioConverter produces normalized Float32, so we must scale up. + private static let floatS16Scale: Float = 32768.0 + + // MARK: - RTCAudioCustomProcessingDelegate + + /// Called by WebRTC when the processing pipeline initializes or reconfigures. + /// May be called multiple times (e.g., on route changes). + public func audioProcessingInitialize(sampleRate: Int, channels: Int) { + processingSampleRate = Double(sampleRate) + processingChannels = channels + + targetFormat = AVAudioFormat( + commonFormat: .pcmFormatFloat32, + sampleRate: processingSampleRate, + channels: AVAudioChannelCount(channels), + interleaved: false + ) + + ringBuffer.reset() + audioConverter.reset() + } + + /// Called on the audio processing thread for each captured audio chunk. + /// Reads from the ring buffer and ADDs screen audio samples to the mic buffer. + public func audioProcessingProcess(audioBuffer: RTCAudioBuffer) { + guard isMixing else { return } + + let frames = Int(audioBuffer.frames) + let channels = Int(audioBuffer.channels) + guard frames > 0, channels > 0 else { return } + + mixFromRingBuffer(into: audioBuffer, frames: frames, channels: channels) + } + + /// Called when the processing pipeline is released. + public func audioProcessingRelease() { + ringBuffer.reset() + targetFormat = nil + } + + // MARK: - Public API + + /// Enable audio mixing. After this, `enqueue(_:)` writes to the ring buffer + /// and the processing callback reads from it. + @objc public func startMixing() { + guard !isMixing else { return } + ringBuffer.reset() + isMixing = true + } + + /// Stop audio mixing. + @objc public func stopMixing() { + guard isMixing else { return } + isMixing = false + ringBuffer.reset() + audioConverter.reset() + } + + /// Receive a screen audio CMSampleBuffer from InAppScreenCapturer. + /// Converts to the processing format and writes to the ring buffer. + @objc public func enqueue(_ sampleBuffer: CMSampleBuffer) { + guard isMixing, let targetFmt = targetFormat else { return } + + guard let pcm = audioConverter.pcmBuffer(from: sampleBuffer) else { return } + + let buffer: AVAudioPCMBuffer + if pcm.format.sampleRate != targetFmt.sampleRate + || pcm.format.channelCount != targetFmt.channelCount + || pcm.format.commonFormat != targetFmt.commonFormat + || pcm.format.isInterleaved != targetFmt.isInterleaved { + guard let converted = audioConverter.convertIfRequired(pcm, to: targetFmt) else { return } + buffer = converted + } else { + buffer = pcm + } + + if ScreenShareAudioConverter.isSilent(buffer) { return } + + guard let channelData = buffer.floatChannelData else { return } + ringBuffer.write(channelData[0], count: Int(buffer.frameLength)) + } + + // MARK: - Private mixing + + /// Read from ring buffer and ADD to the mic audio buffer (additive mixing). + /// Ring buffer contains normalized Float32 [-1,1] from AVAudioConverter; + /// RTCAudioBuffer uses FloatS16 [-32768,32767], so we scale before adding. + private func mixFromRingBuffer(into audioBuffer: RTCAudioBuffer, frames: Int, channels: Int) { + let tempBuffer = UnsafeMutablePointer.allocate(capacity: frames) + defer { tempBuffer.deallocate() } + + let framesRead = ringBuffer.read(into: tempBuffer, count: frames) + guard framesRead > 0 else { return } + + for ch in 0..) +#import +#elif __has_include("stream_react_native_webrtc-Swift.h") +#import "stream_react_native_webrtc-Swift.h" +#endif #import "VideoCaptureController.h" @implementation WebRTCModule (RTCMediaStream) @@ -202,14 +211,32 @@ - (RTCVideoTrack *)createScreenCaptureVideoTrack { NSString *trackUUID = [[NSUUID UUID] UUIDString]; RTCVideoTrack *videoTrack = [self.peerConnectionFactory videoTrackWithSource:videoSource trackId:trackUUID]; - ScreenCapturer *screenCapturer = [[ScreenCapturer alloc] initWithDelegate:videoSource]; - ScreenCaptureController *screenCaptureController = - [[ScreenCaptureController alloc] initWithCapturer:screenCapturer]; + WebRTCModuleOptions *options = [WebRTCModuleOptions sharedInstance]; + CaptureController *captureController; + + if (options.useInAppScreenCapture) { + // Clear the flag so subsequent getDisplayMedia calls use broadcast by default + options.useInAppScreenCapture = NO; + + InAppScreenCapturer *capturer = [[InAppScreenCapturer alloc] initWithDelegate:videoSource]; + InAppScreenCaptureController *controller = [[InAppScreenCaptureController alloc] initWithCapturer:capturer]; + + // Store weak reference for audio mixing wiring + options.activeInAppScreenCapturer = capturer; + + captureController = controller; + } else { + // Existing broadcast extension path + ScreenCapturer *screenCapturer = [[ScreenCapturer alloc] initWithDelegate:videoSource]; + ScreenCaptureController *screenCaptureController = + [[ScreenCaptureController alloc] initWithCapturer:screenCapturer]; + captureController = screenCaptureController; + } TrackCapturerEventsEmitter *emitter = [[TrackCapturerEventsEmitter alloc] initWith:trackUUID webRTCModule:self]; - screenCaptureController.eventsDelegate = emitter; - videoTrack.captureController = screenCaptureController; - [screenCaptureController startCapture]; + captureController.eventsDelegate = emitter; + videoTrack.captureController = captureController; + [captureController startCapture]; // Add dimension detection for local video tracks immediately [self addLocalVideoTrackDimensionDetection:videoTrack]; diff --git a/ios/RCTWebRTC/WebRTCModule.m b/ios/RCTWebRTC/WebRTCModule.m index da9a335a2..4455e60f2 100644 --- a/ios/RCTWebRTC/WebRTCModule.m +++ b/ios/RCTWebRTC/WebRTCModule.m @@ -86,11 +86,23 @@ - (instancetype)init { RCTLogInfo(@"Using video encoder factory: %@", NSStringFromClass([encoderFactory class])); RCTLogInfo(@"Using video decoder factory: %@", NSStringFromClass([decoderFactory class])); + // Always ensure an audio processing module exists so screen share + // audio mixing can use capturePostProcessingDelegate at runtime. + if (audioProcessingModule == nil && audioDevice == nil) { + audioProcessingModule = [[RTCDefaultAudioProcessingModule alloc] + initWithConfig:nil + capturePostProcessingDelegate:nil + renderPreProcessingDelegate:nil]; + options.audioProcessingModule = audioProcessingModule; + RCTLogInfo(@"Created default audio processing module for screen share audio mixing"); + } + if (audioProcessingModule != nil) { if (audioDevice != nil) { NSLog(@"Both audioProcessingModule and audioDevice are provided, but only one can be used. Ignoring audioDevice."); } RCTLogInfo(@"Using audio processing module: %@", NSStringFromClass([audioProcessingModule class])); + _peerConnectionFactory = [[RTCPeerConnectionFactory alloc] initWithAudioDeviceModuleType:RTCAudioDeviceModuleTypeAudioEngine bypassVoiceProcessing:NO @@ -110,7 +122,7 @@ - (instancetype)init { decoderFactory:decoderFactory audioProcessingModule:nil]; } - + _rtcAudioDeviceModuleObserver = [[AudioDeviceModuleObserver alloc] initWithWebRTCModule:self]; _audioDeviceModule = [[AudioDeviceModule alloc] initWithSource:_peerConnectionFactory.audioDeviceModule delegateObserver:_rtcAudioDeviceModuleObserver]; diff --git a/ios/RCTWebRTC/WebRTCModuleOptions.h b/ios/RCTWebRTC/WebRTCModuleOptions.h index d99cb8200..b363cc4ff 100644 --- a/ios/RCTWebRTC/WebRTCModuleOptions.h +++ b/ios/RCTWebRTC/WebRTCModuleOptions.h @@ -1,6 +1,7 @@ #import #import +@class InAppScreenCapturer; NS_ASSUME_NONNULL_BEGIN @interface WebRTCModuleOptions : NSObject @@ -9,10 +10,22 @@ NS_ASSUME_NONNULL_BEGIN @property(nonatomic, strong, nullable) id videoEncoderFactory; @property(nonatomic, strong, nullable) id audioDevice; @property(nonatomic, strong, nullable) id audioProcessingModule; + @property(nonatomic, strong, nullable) NSDictionary *fieldTrials; @property(nonatomic, assign) RTCLoggingSeverity loggingSeverity; @property(nonatomic, assign) BOOL enableMultitaskingCameraAccess; +/// When YES, the next getDisplayMedia() call will use RPScreenRecorder (in-app capture) +/// instead of the broadcast extension. Auto-cleared after use. +@property(nonatomic, assign) BOOL useInAppScreenCapture; + +/// When YES, in-app screen capture will route .audioApp buffers to the audio mixer. +@property(nonatomic, assign) BOOL includeScreenShareAudio; + +/// Weak reference to the current in-app screen capturer, set during +/// `createScreenCaptureVideoTrack` when in-app mode is used. +@property(nonatomic, weak, nullable) InAppScreenCapturer *activeInAppScreenCapturer; + #pragma mark - This class is a singleton + (instancetype _Nonnull)sharedInstance;