From 79f7bf9a9e10508891f781b3315b93378dfd301d Mon Sep 17 00:00:00 2001 From: Milos Pesic Date: Thu, 22 Jan 2026 15:36:52 +0100 Subject: [PATCH 1/2] Make sure correct sample rate is used when filling gaps. --- pkg/sip/media_port.go | 2 +- pkg/sip/silence_filler.go | 58 ++++++++++++++++++---------------- pkg/sip/silence_filler_test.go | 2 +- 3 files changed, 32 insertions(+), 30 deletions(-) diff --git a/pkg/sip/media_port.go b/pkg/sip/media_port.go index f9afd99c..808caca6 100644 --- a/pkg/sip/media_port.go +++ b/pkg/sip/media_port.go @@ -757,7 +757,7 @@ func (p *MediaPort) setupInput() { } audioHandler := p.conf.Audio.Codec.DecodeRTP(audioWriter, p.conf.Audio.Type) // Wrap the decoder with silence suppression handler to fill gaps during silence suppression - audioHandler = newSilenceFiller(audioHandler, audioWriter, codecInfo.RTPClockRate, p.log) + audioHandler = newSilenceFiller(audioHandler, audioWriter, codecInfo.RTPClockRate, codecInfo.SampleRate, p.log) p.audioInHandler = audioHandler mux := rtp.NewMux(nil) diff --git a/pkg/sip/silence_filler.go b/pkg/sip/silence_filler.go index 35ed5984..4c9f5d73 100644 --- a/pkg/sip/silence_filler.go +++ b/pkg/sip/silence_filler.go @@ -27,17 +27,18 @@ import ( // silenceFiller detects RTP timestamp discontinuities (silence suppression) // and generates silence samples to fill the gaps before passing packets to the decoder. type silenceFiller struct { - maxGapSize int - encodedSink rtp.Handler - pcmSink msdk.PCM16Writer - samplesPerFrame int - log logger.Logger - lastTS atomic.Uint64 - lastSeq atomic.Uint64 - packets atomic.Uint64 - gapCount atomic.Uint64 - gapSizeSum atomic.Uint64 - lastPrintTime time.Time + maxGapSize int + encodedSink rtp.Handler + pcmSink msdk.PCM16Writer + rtpSamplesPerFrame int // For gap detection (based on RTP clock rate) + pcmSamplesPerFrame int // For silence generation (based on PCM output rate) + log logger.Logger + lastTS atomic.Uint64 + lastSeq atomic.Uint64 + packets atomic.Uint64 + gapCount atomic.Uint64 + gapSizeSum atomic.Uint64 + lastPrintTime time.Time } type SilenceSuppressionOption func(*silenceFiller) @@ -50,22 +51,23 @@ func WithMaxGapSize(maxGapSize int) SilenceSuppressionOption { } } -func newSilenceFiller(encodedSink rtp.Handler, pcmSink msdk.PCM16Writer, clockRate int, log logger.Logger, options ...SilenceSuppressionOption) rtp.Handler { +func newSilenceFiller(encodedSink rtp.Handler, pcmSink msdk.PCM16Writer, rtpClockRate int, pcmSampleRate int, log logger.Logger, options ...SilenceSuppressionOption) rtp.Handler { // TODO: We assume 20ms frame. We would need to adjust this when: // - When we add support for other frame durations. // - When we add support for re-INVITE sdp renegotiation (maybe, if we don't destroy this and start over). h := &silenceFiller{ - maxGapSize: 25, // Default max gap size - encodedSink: encodedSink, - pcmSink: pcmSink, - samplesPerFrame: clockRate / rtp.DefFramesPerSec, - log: log, - packets: atomic.Uint64{}, - lastSeq: atomic.Uint64{}, - lastTS: atomic.Uint64{}, - gapCount: atomic.Uint64{}, - gapSizeSum: atomic.Uint64{}, - lastPrintTime: time.Time{}, + maxGapSize: 25, // Default max gap size + encodedSink: encodedSink, + pcmSink: pcmSink, + rtpSamplesPerFrame: rtpClockRate / rtp.DefFramesPerSec, + pcmSamplesPerFrame: pcmSampleRate / rtp.DefFramesPerSec, + log: log, + packets: atomic.Uint64{}, + lastSeq: atomic.Uint64{}, + lastTS: atomic.Uint64{}, + gapCount: atomic.Uint64{}, + gapSizeSum: atomic.Uint64{}, + lastPrintTime: time.Time{}, } for _, option := range options { option(h) @@ -89,7 +91,7 @@ func (h *silenceFiller) isSilenceSuppression(header *rtp.Header) (bool, int) { currentSeq := header.SequenceNumber expectedSeq := lastSeq + 1 - expectedTS := lastTS + uint32(h.samplesPerFrame) + expectedTS := lastTS + uint32(h.rtpSamplesPerFrame) seqDiff := currentSeq - expectedSeq tsDiff := currentTS - expectedTS @@ -100,10 +102,10 @@ func (h *silenceFiller) isSilenceSuppression(header *rtp.Header) (bool, int) { return false, 0 } - missedFrames := int(tsDiff) / int(h.samplesPerFrame) + missedFrames := int(tsDiff) / int(h.rtpSamplesPerFrame) if missedFrames == 0 { - // Needs to be after / int(h.samplesPerFrame), - // since some RTP resets may result in a 0 < TS diff < h.samplesPerFrame + // Needs to be after / int(h.rtpSamplesPerFrame), + // since some RTP resets may result in a 0 < TS diff < h.rtpSamplesPerFrame return false, 0 } @@ -113,7 +115,7 @@ func (h *silenceFiller) isSilenceSuppression(header *rtp.Header) (bool, int) { func (h *silenceFiller) fillWithSilence(framesToFill int) error { for ; framesToFill > 0; framesToFill-- { - silence := make(msdk.PCM16Sample, h.samplesPerFrame) + silence := make(msdk.PCM16Sample, h.pcmSamplesPerFrame) if err := h.pcmSink.WriteSample(silence); err != nil { return err } diff --git a/pkg/sip/silence_filler_test.go b/pkg/sip/silence_filler_test.go index 1c3dcdbf..e1a7255a 100644 --- a/pkg/sip/silence_filler_test.go +++ b/pkg/sip/silence_filler_test.go @@ -40,7 +40,7 @@ func newSilenceSuppressionTester(audioSampleRate int, log logger.Logger) *Silenc receivedSilenceFrames: 0, receivedSignalFrames: 0, } - tester.gapFiller = newSilenceFiller(tester, tester, audioSampleRate, log) + tester.gapFiller = newSilenceFiller(tester, tester, audioSampleRate, audioSampleRate, log) return tester } From 7b082f38e2c5d2d5f09f3f01195d5b4f27882371 Mon Sep 17 00:00:00 2001 From: Alex Date: Thu, 22 Jan 2026 11:44:00 -0800 Subject: [PATCH 2/2] adding specific test for checking different clock/sample rates --- pkg/sip/silence_filler_test.go | 87 ++++++++++++++++++++++++++++++++-- 1 file changed, 83 insertions(+), 4 deletions(-) diff --git a/pkg/sip/silence_filler_test.go b/pkg/sip/silence_filler_test.go index e1a7255a..d63ceaf7 100644 --- a/pkg/sip/silence_filler_test.go +++ b/pkg/sip/silence_filler_test.go @@ -27,20 +27,35 @@ import ( // Both a rtp.Handler and msdk.PCM16Writer designed to test the silence suppression handler type SilenceSuppressionTester struct { audioSampleRate int + clockRate int // RTP clock rate (may differ from sample rate for G722) framesReceived []bool // true if received a signal frame, false if it was generated silence receivedSilenceFrames uint64 receivedSignalFrames uint64 + sampleCounts []int // number of samples in each silence frame gapFiller rtp.Handler } -func newSilenceSuppressionTester(audioSampleRate int, log logger.Logger) *SilenceSuppressionTester { +type silenceSuppressionTesterOption func(*SilenceSuppressionTester) + +func WithClockRate(clockRate int) silenceSuppressionTesterOption { + return func(s *SilenceSuppressionTester) { + s.clockRate = clockRate + } +} + +func newSilenceSuppressionTester(audioSampleRate int, log logger.Logger, options ...silenceSuppressionTesterOption) *SilenceSuppressionTester { tester := &SilenceSuppressionTester{ audioSampleRate: audioSampleRate, + clockRate: audioSampleRate, // default to same as sample rate framesReceived: make([]bool, 0), + sampleCounts: make([]int, 0), receivedSilenceFrames: 0, receivedSignalFrames: 0, } - tester.gapFiller = newSilenceFiller(tester, tester, audioSampleRate, audioSampleRate, log) + for _, option := range options { + option(tester) + } + tester.gapFiller = newSilenceFiller(tester, tester, tester.clockRate, tester.audioSampleRate, log) return tester } @@ -58,26 +73,30 @@ func (s *SilenceSuppressionTester) Close() error { func (s *SilenceSuppressionTester) WriteSample(sample msdk.PCM16Sample) error { s.framesReceived = append(s.framesReceived, false) + s.sampleCounts = append(s.sampleCounts, len(sample)) s.receivedSilenceFrames++ return nil } func (s *SilenceSuppressionTester) HandleRTP(h *rtp.Header, payload []byte) error { s.framesReceived = append(s.framesReceived, true) + s.sampleCounts = append(s.sampleCounts, len(payload)) s.receivedSignalFrames++ return nil } func (s *SilenceSuppressionTester) SendSignalFrames(count int, nextSeq uint16, nextTimestamp uint32) (uint16, uint32, error) { samplesPerFrame := s.audioSampleRate / rtp.DefFramesPerSec + clockPerFrame := s.clockRate / rtp.DefFramesPerSec for i := 0; i < count; i++ { h := &rtp.Header{ SequenceNumber: nextSeq, Timestamp: nextTimestamp, } nextSeq++ - nextTimestamp += uint32(samplesPerFrame) - err := s.gapFiller.HandleRTP(h, []byte{0x01, 0x02, 0x03}) + nextTimestamp += uint32(clockPerFrame) + payload := make([]byte, samplesPerFrame) + err := s.gapFiller.HandleRTP(h, payload) if err != nil { return nextSeq, nextTimestamp, err } @@ -86,7 +105,9 @@ func (s *SilenceSuppressionTester) SendSignalFrames(count int, nextSeq uint16, n } func (s *SilenceSuppressionTester) assertSilenceIndexes(t *testing.T, expectedSize int, silenceIndexes []int) { + samplesPerFrame := s.audioSampleRate / rtp.DefFramesPerSec require.Equal(t, expectedSize, len(s.framesReceived)) + require.Equal(t, expectedSize, len(s.sampleCounts)) require.Equal(t, uint64(len(silenceIndexes)), s.receivedSilenceFrames) require.Equal(t, uint64(expectedSize-len(silenceIndexes)), s.receivedSignalFrames) for i, isSignal := range s.framesReceived { @@ -95,6 +116,7 @@ func (s *SilenceSuppressionTester) assertSilenceIndexes(t *testing.T, expectedSi } else { require.True(t, isSignal, "frame %d should be signal", i) } + require.Equal(t, samplesPerFrame, s.sampleCounts[i], "frame %d should have %d samples instead of %d", i, samplesPerFrame, s.sampleCounts[i]) } for _, index := range silenceIndexes { // Make sure we're not missing any indexes if index < 0 || index >= expectedSize { @@ -220,3 +242,60 @@ func TestSilenceSuppressionHandling(t *testing.T) { tester.assertSilenceIndexes(t, 7, []int{2, 3, 4}) }) } +func TestSilenceSuppressionDifferentCodecs(t *testing.T) { + log := logger.GetLogger() + + testCases := []struct { + name string + clockRate int + sampleRate int + description string + }{ + { + name: "PCMU", + clockRate: 8000, + sampleRate: 8000, + description: "PCMU: 8kHz clock rate / 8kHz sample rate", + }, + { + name: "PCMA", + clockRate: 8000, + sampleRate: 8000, + description: "PCMA: 8kHz clock rate / 8kHz sample rate", + }, + { + name: "Opus", + clockRate: 48000, + sampleRate: 48000, + description: "Opus: 48kHz clock rate / 48kHz sample rate", + }, + { + name: "G722", + clockRate: 8000, + sampleRate: 16000, + description: "G722: 8kHz clock rate / 16kHz sample rate", + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + tester := newSilenceSuppressionTester(tc.sampleRate, log, WithClockRate(tc.clockRate)) + tsPerFrame := uint32(tc.clockRate / rtp.DefFramesPerSec) + + // Send 5 frames, then create a gap of 2 frames, then send 5 more frames + nextSeq := uint16(100) + nextTimestamp := uint32(10000) + nextSeq, nextTimestamp, err := tester.SendSignalFrames(5, nextSeq, nextTimestamp) + require.NoError(t, err) + + // Create a gap of 2 frames by incrementing timestamp + missingFrameCount := 2 + nextTimestamp += uint32(missingFrameCount) * tsPerFrame + nextSeq, nextTimestamp, err = tester.SendSignalFrames(5, nextSeq, nextTimestamp) + require.NoError(t, err) + + // Should have 5 signal + 2 silence + 5 signal = 12 total frames + tester.assertSilenceIndexes(t, 12, []int{5, 6}) + }) + } +}