Skip to content

Commit 07a1c42

Browse files
authored
Merge pull request #28 from pedroSG94/fix/audio-yt
Fix/audio yt
2 parents eed1744 + 2e03928 commit 07a1c42

8 files changed

Lines changed: 333 additions & 52 deletions

File tree

RootEncoder/Sources/RootEncoder/encoder/audio/AudioEncoder.swift

Lines changed: 72 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,12 @@ public class AudioEncoder {
2020
private var codec = AudioCodec.AAC
2121
private var inputFormat: AVAudioFormat? = nil
2222
private var bitrate = 128 * 1000
23+
private var ringBuffer: AudioRingBuffer? = nil
24+
private let audioTime = AudioTime()
2325

2426
public init(callback: GetAacData) {
2527
self.callback = callback
28+
2629
}
2730

2831
public func setCodec(codec: AudioCodec) {
@@ -57,16 +60,17 @@ public class AudioEncoder {
5760
}
5861

5962
public func start() {
60-
self.initTs = UInt64(Date().millisecondsSince1970 * 1000)
6163
running = true
6264
syncQueue.clear()
6365
thread.async {
6466
while (self.running) {
6567
let pcmFrame = self.syncQueue.dequeue()
6668
if let pcmFrame = pcmFrame {
67-
let ts = UInt64(pcmFrame.ts * 1000)
6869
if self.inputFormat == nil {
69-
self.inputFormat = pcmFrame.buffer.format
70+
let format = pcmFrame.buffer.format
71+
self.inputFormat = format
72+
self.ringBuffer = AudioRingBuffer(format)
73+
self.audioTime.reset()
7074
}
7175
if self.converter == nil {
7276
if let inputFormat = self.inputFormat, let outputFormat = self.outputFormat {
@@ -78,28 +82,15 @@ public class AudioEncoder {
7882
}
7983
}
8084
var error: NSError? = nil
85+
guard let outputFormat = self.outputFormat else { continue }
86+
if !self.audioTime.hasAnchor {
87+
self.audioTime.anchor(pcmFrame.time, sampleRate: outputFormat.sampleRate)
88+
}
8189
if self.codec == AudioCodec.AAC {
82-
guard let aacBuffer = self.convertAAC(inputBuffer: pcmFrame.buffer, error: &error) else {
83-
continue
84-
}
85-
if error != nil {
86-
print("Encode error: \(error.debugDescription)")
87-
} else {
88-
let data = Array<UInt8>(UnsafeBufferPointer<UInt8>(start: aacBuffer.data.assumingMemoryBound(to: UInt8.self), count: Int(aacBuffer.byteLength)))
89-
let elapsedMicroSeconds = ts - self.initTs
90-
self.callback?.getAacData(frame: Frame(buffer: data, length: UInt32(data.count), timeStamp: elapsedMicroSeconds))
91-
}
90+
self.ringBuffer?.append(pcmFrame.buffer)
91+
self.convertAAC(error: &error)
9292
} else if self.codec == AudioCodec.G711 {
93-
guard let g711Buffer = self.convertG711(inputBuffer: pcmFrame.buffer, error: &error) else {
94-
continue
95-
}
96-
if error != nil {
97-
print("Encode error: \(error.debugDescription)")
98-
} else {
99-
let data = g711Buffer.audioBufferToBytes()
100-
let elapsedMicroSeconds = ts - self.initTs
101-
self.callback?.getAacData(frame: Frame(buffer: data, length: UInt32(data.count), timeStamp: elapsedMicroSeconds))
102-
}
93+
self.convertG711(inputBuffer: pcmFrame.buffer, error: &error)
10394
}
10495
}
10596
}
@@ -112,43 +103,77 @@ public class AudioEncoder {
112103
outputFormat = nil
113104
initTs = 0
114105
syncQueue.clear()
106+
audioTime.reset()
115107
}
116108

117-
private func convertAAC(inputBuffer: AVAudioPCMBuffer, error: NSErrorPointer) -> AVAudioCompressedBuffer? {
109+
private func convertAAC(error: NSErrorPointer) {
118110
if (running) {
119-
guard let outputFormat = outputFormat else {
120-
return nil
121-
}
122-
let outputBuffer = AVAudioCompressedBuffer(format: outputFormat, packetCapacity: 1, maximumPacketSize: 1024 * Int(outputFormat.channelCount))
111+
guard let inputFormat = inputFormat, let outputFormat = outputFormat else { return }
112+
let inputBuffer = AVAudioPCMBuffer(pcmFormat: inputFormat, frameCapacity: 1024 * 4)
113+
guard let inputBuffer = inputBuffer else { return }
123114

124-
converter?.convert(to: outputBuffer, error: nil) { _, outStatus in
125-
outStatus.pointee = .haveData
126-
return inputBuffer
127-
}
128-
return outputBuffer
129-
} else {
130-
return nil
115+
let outputBuffer = AVAudioCompressedBuffer(format: outputFormat, packetCapacity: 1, maximumPacketSize: 1024 * Int(outputFormat.channelCount))
116+
convert(inputBuffer: inputBuffer, outputBuffer: outputBuffer, extraTime: 1024)
131117
}
132118
}
133119

134-
private func convertG711(inputBuffer: AVAudioPCMBuffer, error: NSErrorPointer) -> AVAudioPCMBuffer? {
120+
private func convertG711(inputBuffer: AVAudioPCMBuffer, error: NSErrorPointer) {
135121
if (running) {
136-
guard let outputFormat = outputFormat else {
137-
return nil
138-
}
139-
let outputBuffer = AVAudioPCMBuffer(pcmFormat: outputFormat, frameCapacity: AVAudioFrameCount(outputFormat.sampleRate) * inputBuffer.frameLength / AVAudioFrameCount(inputBuffer.format.sampleRate))!
122+
guard let outputFormat = outputFormat else { return }
123+
let extraTime = AVAudioFrameCount(outputFormat.sampleRate) * inputBuffer.frameLength / AVAudioFrameCount(inputBuffer.format.sampleRate)
124+
let outputBuffer = AVAudioPCMBuffer(pcmFormat: outputFormat, frameCapacity: extraTime)!
140125
outputBuffer.frameLength = outputBuffer.frameCapacity
141126

142-
converter?.convert(to: outputBuffer, error: nil) { _, outStatus in
143-
outStatus.pointee = .haveData
144-
return inputBuffer
145-
}
146-
return outputBuffer
147-
} else {
148-
return nil
127+
convert(inputBuffer: inputBuffer, outputBuffer: outputBuffer, extraTime: AVAudioFramePosition(extraTime), force: true)
149128
}
150129
}
151130

131+
private func convert(inputBuffer: AVAudioPCMBuffer, outputBuffer: AVAudioBuffer, extraTime: AVAudioFramePosition, force: Bool = false) {
132+
guard let ringBuffer = ringBuffer else { return }
133+
var status: AVAudioConverterOutputStatus? = .endOfStream
134+
135+
repeat {
136+
status = converter?.convert(to: outputBuffer, error: nil) { inNumberFrames, status in
137+
if force {
138+
status.pointee = .haveData
139+
return inputBuffer
140+
} else if inNumberFrames <= ringBuffer.counts {
141+
_ = ringBuffer.render(inNumberFrames, ioData: inputBuffer.mutableAudioBufferList)
142+
inputBuffer.frameLength = inNumberFrames
143+
status.pointee = .haveData
144+
return inputBuffer
145+
} else {
146+
status.pointee = .noDataNow
147+
return nil
148+
}
149+
}
150+
switch status {
151+
case .haveData:
152+
let data: Array<UInt8>
153+
switch outputBuffer {
154+
case let outputBuffer as AVAudioCompressedBuffer:
155+
data = Array<UInt8>(UnsafeBufferPointer<UInt8>(start: outputBuffer.data.assumingMemoryBound(to: UInt8.self), count: Int(outputBuffer.byteLength)))
156+
case let outputBuffer as AVAudioPCMBuffer:
157+
data = outputBuffer.audioBufferToBytes()
158+
default:
159+
continue
160+
}
161+
162+
let ts = UInt64(self.audioTime.at.makeTime().seconds * 1000000)
163+
if self.initTs == 0 {
164+
self.initTs = ts
165+
}
166+
let elapsedMicroSeconds = ts - self.initTs
167+
self.callback?.getAacData(frame: Frame(buffer: data, length: UInt32(data.count), timeStamp: elapsedMicroSeconds))
168+
self.audioTime.advanced(extraTime)
169+
case .error:
170+
print("error")
171+
default:
172+
break
173+
}
174+
} while(status == .haveData && !force)
175+
}
176+
152177
private func getAACFormat(sampleRate: Double, channels: UInt32) -> AVAudioFormat? {
153178
var description = AudioStreamBasicDescription(mSampleRate: sampleRate,
154179
mFormatID: kAudioFormatMPEG4AAC,

RootEncoder/Sources/RootEncoder/encoder/input/audio/MicrophoneManager.swift

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
import Foundation
1010
import AVFoundation
11+
import Accelerate
1112

1213
public class MicrophoneManager: NSObject, AVCaptureAudioDataOutputSampleBufferDelegate {
1314

@@ -76,6 +77,7 @@ public class MicrophoneManager: NSObject, AVCaptureAudioDataOutputSampleBufferDe
7677

7778
public func captureOutput(_ output: AVCaptureOutput, didOutput sampleBuffer: CMSampleBuffer, from connection: AVCaptureConnection) {
7879
let ts = UInt64(Date().millisecondsSince1970)
80+
7981
guard let description = sampleBuffer.formatDescription, let blockBuffer = CMSampleBufferGetDataBuffer(sampleBuffer) else {
8082
return
8183
}
@@ -91,7 +93,7 @@ public class MicrophoneManager: NSObject, AVCaptureAudioDataOutputSampleBufferDe
9193
memcpy(buffer?.int16ChannelData?[0], dataPointer, length)
9294

9395
if let buffer = buffer {
94-
self.callback?.getPcmData(frame: PcmFrame(buffer: buffer.mute(enabled: self.muted), ts: ts, time: sampleBuffer.presentationTimeStamp))
96+
self.callback?.getPcmData(frame: PcmFrame(buffer: buffer.mute(enabled: muted), ts: ts, time: sampleBuffer.presentationTimeStamp))
9597
}
9698
}
9799
}
Lines changed: 163 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,163 @@
1+
//
2+
// File.swift
3+
//
4+
//
5+
// Created by Pedro on 9/7/24.
6+
//
7+
8+
import Accelerate
9+
import AVFoundation
10+
import CoreMedia
11+
import Foundation
12+
13+
14+
public final class AudioRingBuffer {
15+
private static let bufferCounts: UInt32 = 16
16+
private static let numSamples: UInt32 = 1024
17+
18+
var counts: Int {
19+
if tail <= head {
20+
return head - tail + skip
21+
}
22+
return Int(outputBuffer.frameLength) - tail + head + skip
23+
}
24+
25+
private var head = 0
26+
private var tail = 0
27+
private var skip = 0
28+
private var sampleTime: AVAudioFramePosition = 0
29+
private var inputFormat: AVAudioFormat
30+
private var inputBuffer: AVAudioPCMBuffer
31+
private var outputBuffer: AVAudioPCMBuffer
32+
33+
init?(_ inputFormat: AVAudioFormat, bufferCounts: UInt32 = AudioRingBuffer.bufferCounts) {
34+
guard
35+
let inputBuffer = AVAudioPCMBuffer(pcmFormat: inputFormat, frameCapacity: Self.numSamples) else {
36+
return nil
37+
}
38+
guard let outputBuffer = AVAudioPCMBuffer(pcmFormat: inputFormat, frameCapacity: Self.numSamples * bufferCounts) else {
39+
return nil
40+
}
41+
self.inputFormat = inputFormat
42+
self.inputBuffer = inputBuffer
43+
self.outputBuffer = outputBuffer
44+
self.outputBuffer.frameLength = self.outputBuffer.frameCapacity
45+
}
46+
47+
@inline(__always)
48+
func append(_ audioPCMBuffer: AVAudioPCMBuffer, offset: Int = 0) {
49+
let numSamples = min(Int(audioPCMBuffer.frameLength) - offset, Int(outputBuffer.frameLength) - head)
50+
if inputFormat.isInterleaved {
51+
let channelCount = Int(inputFormat.channelCount)
52+
switch inputFormat.commonFormat {
53+
case .pcmFormatInt16:
54+
memcpy(outputBuffer.int16ChannelData?[0].advanced(by: head * channelCount), audioPCMBuffer.int16ChannelData?[0].advanced(by: offset * channelCount), numSamples * channelCount * 2)
55+
case .pcmFormatInt32:
56+
memcpy(outputBuffer.int32ChannelData?[0].advanced(by: head * channelCount), audioPCMBuffer.int32ChannelData?[0].advanced(by: offset * channelCount), numSamples * channelCount * 4)
57+
case .pcmFormatFloat32:
58+
memcpy(outputBuffer.floatChannelData?[0].advanced(by: head * channelCount), audioPCMBuffer.floatChannelData?[0].advanced(by: offset * channelCount), numSamples * channelCount * 4)
59+
default:
60+
break
61+
}
62+
} else {
63+
for i in 0..<Int(inputFormat.channelCount) {
64+
switch inputFormat.commonFormat {
65+
case .pcmFormatInt16:
66+
memcpy(outputBuffer.int16ChannelData?[i].advanced(by: head), audioPCMBuffer.int16ChannelData?[i].advanced(by: offset), numSamples * 2)
67+
case .pcmFormatInt32:
68+
memcpy(outputBuffer.int32ChannelData?[i].advanced(by: head), audioPCMBuffer.int32ChannelData?[i].advanced(by: offset), numSamples * 4)
69+
case .pcmFormatFloat32:
70+
memcpy(outputBuffer.floatChannelData?[i].advanced(by: head), audioPCMBuffer.floatChannelData?[i].advanced(by: offset), numSamples * 4)
71+
default:
72+
break
73+
}
74+
}
75+
}
76+
head += numSamples
77+
sampleTime += Int64(numSamples)
78+
if head == outputBuffer.frameLength {
79+
head = 0
80+
if 0 < Int(audioPCMBuffer.frameLength) - numSamples {
81+
append(audioPCMBuffer, offset: numSamples)
82+
}
83+
}
84+
}
85+
86+
func render(_ inNumberFrames: UInt32, ioData: UnsafeMutablePointer<AudioBufferList>?, offset: Int = 0) -> OSStatus {
87+
if 0 < skip {
88+
let numSamples = min(Int(inNumberFrames), skip)
89+
guard let bufferList = UnsafeMutableAudioBufferListPointer(ioData) else {
90+
return noErr
91+
}
92+
if inputFormat.isInterleaved {
93+
let channelCount = Int(inputFormat.channelCount)
94+
switch inputFormat.commonFormat {
95+
case .pcmFormatInt16:
96+
bufferList[0].mData?.assumingMemoryBound(to: Int16.self).advanced(by: offset * channelCount).update(repeating: 0, count: numSamples)
97+
case .pcmFormatInt32:
98+
bufferList[0].mData?.assumingMemoryBound(to: Int32.self).advanced(by: offset * channelCount).update(repeating: 0, count: numSamples)
99+
case .pcmFormatFloat32:
100+
bufferList[0].mData?.assumingMemoryBound(to: Float32.self).advanced(by: offset * channelCount).update(repeating: 0, count: numSamples)
101+
default:
102+
break
103+
}
104+
} else {
105+
for i in 0..<Int(inputFormat.channelCount) {
106+
switch inputFormat.commonFormat {
107+
case .pcmFormatInt16:
108+
bufferList[i].mData?.assumingMemoryBound(to: Int16.self).advanced(by: offset).update(repeating: 0, count: numSamples)
109+
case .pcmFormatInt32:
110+
bufferList[i].mData?.assumingMemoryBound(to: Int32.self).advanced(by: offset).update(repeating: 0, count: numSamples)
111+
case .pcmFormatFloat32:
112+
bufferList[i].mData?.assumingMemoryBound(to: Float32.self).advanced(by: offset).update(repeating: 0, count: numSamples)
113+
default:
114+
break
115+
}
116+
}
117+
}
118+
skip -= numSamples
119+
if 0 < inNumberFrames - UInt32(numSamples) {
120+
return render(inNumberFrames - UInt32(numSamples), ioData: ioData, offset: numSamples)
121+
}
122+
return noErr
123+
}
124+
let numSamples = min(Int(inNumberFrames), Int(outputBuffer.frameLength) - tail)
125+
guard let bufferList = UnsafeMutableAudioBufferListPointer(ioData), head != tail else {
126+
return noErr
127+
}
128+
if inputFormat.isInterleaved {
129+
let channelCount = Int(inputFormat.channelCount)
130+
switch inputFormat.commonFormat {
131+
case .pcmFormatInt16:
132+
memcpy(bufferList[0].mData?.advanced(by: offset * channelCount * 2), outputBuffer.int16ChannelData?[0].advanced(by: tail * channelCount), numSamples * channelCount * 2)
133+
case .pcmFormatInt32:
134+
memcpy(bufferList[0].mData?.advanced(by: offset * channelCount * 4), outputBuffer.int32ChannelData?[0].advanced(by: tail * channelCount), numSamples * channelCount * 4)
135+
case .pcmFormatFloat32:
136+
memcpy(bufferList[0].mData?.advanced(by: offset * channelCount * 4), outputBuffer.floatChannelData?[0].advanced(by: tail * channelCount), numSamples * channelCount * 4)
137+
default:
138+
break
139+
}
140+
} else {
141+
for i in 0..<Int(inputFormat.channelCount) {
142+
switch inputFormat.commonFormat {
143+
case .pcmFormatInt16:
144+
memcpy(bufferList[i].mData?.advanced(by: offset * 2), outputBuffer.int16ChannelData?[i].advanced(by: tail), numSamples * 2)
145+
case .pcmFormatInt32:
146+
memcpy(bufferList[i].mData?.advanced(by: offset * 4), outputBuffer.int32ChannelData?[i].advanced(by: tail), numSamples * 4)
147+
case .pcmFormatFloat32:
148+
memcpy(bufferList[i].mData?.advanced(by: offset * 4), outputBuffer.floatChannelData?[i].advanced(by: tail), numSamples * 4)
149+
default:
150+
break
151+
}
152+
}
153+
}
154+
tail += numSamples
155+
if tail == outputBuffer.frameLength {
156+
tail = 0
157+
if 0 < inNumberFrames - UInt32(numSamples) {
158+
return render(inNumberFrames - UInt32(numSamples), ioData: ioData, offset: numSamples)
159+
}
160+
}
161+
return noErr
162+
}
163+
}

0 commit comments

Comments
 (0)