Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pkg/sip/media_port.go
Original file line number Diff line number Diff line change
Expand Up @@ -757,7 +757,7 @@ func (p *MediaPort) setupInput() {
}
audioHandler := p.conf.Audio.Codec.DecodeRTP(audioWriter, p.conf.Audio.Type)
// Wrap the decoder with silence suppression handler to fill gaps during silence suppression
audioHandler = newSilenceFiller(audioHandler, audioWriter, codecInfo.RTPClockRate, p.log)
audioHandler = newSilenceFiller(audioHandler, audioWriter, codecInfo.RTPClockRate, codecInfo.SampleRate, p.log)
p.audioInHandler = audioHandler

mux := rtp.NewMux(nil)
Expand Down
58 changes: 30 additions & 28 deletions pkg/sip/silence_filler.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,17 +27,18 @@ import (
// silenceFiller detects RTP timestamp discontinuities (silence suppression)
// and generates silence samples to fill the gaps before passing packets to the decoder.
type silenceFiller struct {
maxGapSize int
encodedSink rtp.Handler
pcmSink msdk.PCM16Writer
samplesPerFrame int
log logger.Logger
lastTS atomic.Uint64
lastSeq atomic.Uint64
packets atomic.Uint64
gapCount atomic.Uint64
gapSizeSum atomic.Uint64
lastPrintTime time.Time
maxGapSize int
encodedSink rtp.Handler
pcmSink msdk.PCM16Writer
rtpSamplesPerFrame int // For gap detection (based on RTP clock rate)
pcmSamplesPerFrame int // For silence generation (based on PCM output rate)
log logger.Logger
lastTS atomic.Uint64
lastSeq atomic.Uint64
packets atomic.Uint64
gapCount atomic.Uint64
gapSizeSum atomic.Uint64
lastPrintTime time.Time
}

type SilenceSuppressionOption func(*silenceFiller)
Expand All @@ -50,22 +51,23 @@ func WithMaxGapSize(maxGapSize int) SilenceSuppressionOption {
}
}

func newSilenceFiller(encodedSink rtp.Handler, pcmSink msdk.PCM16Writer, clockRate int, log logger.Logger, options ...SilenceSuppressionOption) rtp.Handler {
func newSilenceFiller(encodedSink rtp.Handler, pcmSink msdk.PCM16Writer, rtpClockRate int, pcmSampleRate int, log logger.Logger, options ...SilenceSuppressionOption) rtp.Handler {
// TODO: We assume 20ms frame. We would need to adjust this when:
// - When we add support for other frame durations.
// - When we add support for re-INVITE sdp renegotiation (maybe, if we don't destroy this and start over).
h := &silenceFiller{
maxGapSize: 25, // Default max gap size
encodedSink: encodedSink,
pcmSink: pcmSink,
samplesPerFrame: clockRate / rtp.DefFramesPerSec,
log: log,
packets: atomic.Uint64{},
lastSeq: atomic.Uint64{},
lastTS: atomic.Uint64{},
gapCount: atomic.Uint64{},
gapSizeSum: atomic.Uint64{},
lastPrintTime: time.Time{},
maxGapSize: 25, // Default max gap size
encodedSink: encodedSink,
pcmSink: pcmSink,
rtpSamplesPerFrame: rtpClockRate / rtp.DefFramesPerSec,
pcmSamplesPerFrame: pcmSampleRate / rtp.DefFramesPerSec,
log: log,
packets: atomic.Uint64{},
lastSeq: atomic.Uint64{},
lastTS: atomic.Uint64{},
gapCount: atomic.Uint64{},
gapSizeSum: atomic.Uint64{},
lastPrintTime: time.Time{},
}
for _, option := range options {
option(h)
Expand All @@ -89,7 +91,7 @@ func (h *silenceFiller) isSilenceSuppression(header *rtp.Header) (bool, int) {
currentSeq := header.SequenceNumber

expectedSeq := lastSeq + 1
expectedTS := lastTS + uint32(h.samplesPerFrame)
expectedTS := lastTS + uint32(h.rtpSamplesPerFrame)

seqDiff := currentSeq - expectedSeq
tsDiff := currentTS - expectedTS
Expand All @@ -100,10 +102,10 @@ func (h *silenceFiller) isSilenceSuppression(header *rtp.Header) (bool, int) {
return false, 0
}

missedFrames := int(tsDiff) / int(h.samplesPerFrame)
missedFrames := int(tsDiff) / int(h.rtpSamplesPerFrame)
if missedFrames == 0 {
// Needs to be after / int(h.samplesPerFrame),
// since some RTP resets may result in a 0 < TS diff < h.samplesPerFrame
// Needs to be after / int(h.rtpSamplesPerFrame),
// since some RTP resets may result in a 0 < TS diff < h.rtpSamplesPerFrame
return false, 0
}

Expand All @@ -113,7 +115,7 @@ func (h *silenceFiller) isSilenceSuppression(header *rtp.Header) (bool, int) {

func (h *silenceFiller) fillWithSilence(framesToFill int) error {
for ; framesToFill > 0; framesToFill-- {
silence := make(msdk.PCM16Sample, h.samplesPerFrame)
silence := make(msdk.PCM16Sample, h.pcmSamplesPerFrame)
if err := h.pcmSink.WriteSample(silence); err != nil {
return err
}
Expand Down
87 changes: 83 additions & 4 deletions pkg/sip/silence_filler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,20 +27,35 @@ import (
// Both a rtp.Handler and msdk.PCM16Writer designed to test the silence suppression handler
type SilenceSuppressionTester struct {
audioSampleRate int
clockRate int // RTP clock rate (may differ from sample rate for G722)
framesReceived []bool // true if received a signal frame, false if it was generated silence
receivedSilenceFrames uint64
receivedSignalFrames uint64
sampleCounts []int // number of samples in each silence frame
gapFiller rtp.Handler
}

func newSilenceSuppressionTester(audioSampleRate int, log logger.Logger) *SilenceSuppressionTester {
type silenceSuppressionTesterOption func(*SilenceSuppressionTester)

func WithClockRate(clockRate int) silenceSuppressionTesterOption {
return func(s *SilenceSuppressionTester) {
s.clockRate = clockRate
}
}

func newSilenceSuppressionTester(audioSampleRate int, log logger.Logger, options ...silenceSuppressionTesterOption) *SilenceSuppressionTester {
tester := &SilenceSuppressionTester{
audioSampleRate: audioSampleRate,
clockRate: audioSampleRate, // default to same as sample rate
framesReceived: make([]bool, 0),
sampleCounts: make([]int, 0),
receivedSilenceFrames: 0,
receivedSignalFrames: 0,
}
tester.gapFiller = newSilenceFiller(tester, tester, audioSampleRate, log)
for _, option := range options {
option(tester)
}
tester.gapFiller = newSilenceFiller(tester, tester, tester.clockRate, tester.audioSampleRate, log)
return tester
}

Expand All @@ -58,26 +73,30 @@ func (s *SilenceSuppressionTester) Close() error {

func (s *SilenceSuppressionTester) WriteSample(sample msdk.PCM16Sample) error {
s.framesReceived = append(s.framesReceived, false)
s.sampleCounts = append(s.sampleCounts, len(sample))
s.receivedSilenceFrames++
return nil
}

func (s *SilenceSuppressionTester) HandleRTP(h *rtp.Header, payload []byte) error {
s.framesReceived = append(s.framesReceived, true)
s.sampleCounts = append(s.sampleCounts, len(payload))
s.receivedSignalFrames++
return nil
}

func (s *SilenceSuppressionTester) SendSignalFrames(count int, nextSeq uint16, nextTimestamp uint32) (uint16, uint32, error) {
samplesPerFrame := s.audioSampleRate / rtp.DefFramesPerSec
clockPerFrame := s.clockRate / rtp.DefFramesPerSec
for i := 0; i < count; i++ {
h := &rtp.Header{
SequenceNumber: nextSeq,
Timestamp: nextTimestamp,
}
nextSeq++
nextTimestamp += uint32(samplesPerFrame)
err := s.gapFiller.HandleRTP(h, []byte{0x01, 0x02, 0x03})
nextTimestamp += uint32(clockPerFrame)
payload := make([]byte, samplesPerFrame)
err := s.gapFiller.HandleRTP(h, payload)
if err != nil {
return nextSeq, nextTimestamp, err
}
Expand All @@ -86,7 +105,9 @@ func (s *SilenceSuppressionTester) SendSignalFrames(count int, nextSeq uint16, n
}

func (s *SilenceSuppressionTester) assertSilenceIndexes(t *testing.T, expectedSize int, silenceIndexes []int) {
samplesPerFrame := s.audioSampleRate / rtp.DefFramesPerSec
require.Equal(t, expectedSize, len(s.framesReceived))
require.Equal(t, expectedSize, len(s.sampleCounts))
require.Equal(t, uint64(len(silenceIndexes)), s.receivedSilenceFrames)
require.Equal(t, uint64(expectedSize-len(silenceIndexes)), s.receivedSignalFrames)
for i, isSignal := range s.framesReceived {
Expand All @@ -95,6 +116,7 @@ func (s *SilenceSuppressionTester) assertSilenceIndexes(t *testing.T, expectedSi
} else {
require.True(t, isSignal, "frame %d should be signal", i)
}
require.Equal(t, samplesPerFrame, s.sampleCounts[i], "frame %d should have %d samples instead of %d", i, samplesPerFrame, s.sampleCounts[i])
}
for _, index := range silenceIndexes { // Make sure we're not missing any indexes
if index < 0 || index >= expectedSize {
Expand Down Expand Up @@ -220,3 +242,60 @@ func TestSilenceSuppressionHandling(t *testing.T) {
tester.assertSilenceIndexes(t, 7, []int{2, 3, 4})
})
}
func TestSilenceSuppressionDifferentCodecs(t *testing.T) {
log := logger.GetLogger()

testCases := []struct {
name string
clockRate int
sampleRate int
description string
}{
{
name: "PCMU",
clockRate: 8000,
sampleRate: 8000,
description: "PCMU: 8kHz clock rate / 8kHz sample rate",
},
{
name: "PCMA",
clockRate: 8000,
sampleRate: 8000,
description: "PCMA: 8kHz clock rate / 8kHz sample rate",
},
{
name: "Opus",
clockRate: 48000,
sampleRate: 48000,
description: "Opus: 48kHz clock rate / 48kHz sample rate",
},
{
name: "G722",
clockRate: 8000,
sampleRate: 16000,
description: "G722: 8kHz clock rate / 16kHz sample rate",
},
}

for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
tester := newSilenceSuppressionTester(tc.sampleRate, log, WithClockRate(tc.clockRate))
tsPerFrame := uint32(tc.clockRate / rtp.DefFramesPerSec)

// Send 5 frames, then create a gap of 2 frames, then send 5 more frames
nextSeq := uint16(100)
nextTimestamp := uint32(10000)
nextSeq, nextTimestamp, err := tester.SendSignalFrames(5, nextSeq, nextTimestamp)
require.NoError(t, err)

// Create a gap of 2 frames by incrementing timestamp
missingFrameCount := 2
nextTimestamp += uint32(missingFrameCount) * tsPerFrame
nextSeq, nextTimestamp, err = tester.SendSignalFrames(5, nextSeq, nextTimestamp)
require.NoError(t, err)

// Should have 5 signal + 2 silence + 5 signal = 12 total frames
tester.assertSilenceIndexes(t, 12, []int{5, 6})
})
}
}