From e27e44e2a665a1d3d3a3f1c36c7f221772485275 Mon Sep 17 00:00:00 2001 From: David Colburn Date: Thu, 23 Apr 2026 00:39:06 -0400 Subject: [PATCH 01/17] sync engine --- pkg/synchronizer/integration_test.go | 253 ++++++++++++ pkg/synchronizer/interfaces.go | 45 +++ pkg/synchronizer/ntpestimator.go | 256 ++++++++++++ pkg/synchronizer/ntpestimator_test.go | 224 +++++++++++ pkg/synchronizer/participantsync.go | 217 ++++++++++ pkg/synchronizer/participantsync_test.go | 185 +++++++++ pkg/synchronizer/sessiontimeline.go | 195 +++++++++ pkg/synchronizer/sessiontimeline_test.go | 223 ++++++++++ pkg/synchronizer/syncengine.go | 492 +++++++++++++++++++++++ pkg/synchronizer/syncengine_test.go | 197 +++++++++ pkg/synchronizer/synchronizer.go | 16 + pkg/synchronizer/synchronizer_test.go | 4 + 12 files changed, 2307 insertions(+) create mode 100644 pkg/synchronizer/integration_test.go create mode 100644 pkg/synchronizer/interfaces.go create mode 100644 pkg/synchronizer/ntpestimator.go create mode 100644 pkg/synchronizer/ntpestimator_test.go create mode 100644 pkg/synchronizer/participantsync.go create mode 100644 pkg/synchronizer/participantsync_test.go create mode 100644 pkg/synchronizer/sessiontimeline.go create mode 100644 pkg/synchronizer/sessiontimeline_test.go create mode 100644 pkg/synchronizer/syncengine.go create mode 100644 pkg/synchronizer/syncengine_test.go diff --git a/pkg/synchronizer/integration_test.go b/pkg/synchronizer/integration_test.go new file mode 100644 index 00000000..9f5484b5 --- /dev/null +++ b/pkg/synchronizer/integration_test.go @@ -0,0 +1,253 @@ +// Copyright 2026 LiveKit, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package synchronizer + +import ( + "testing" + "time" + + "github.com/stretchr/testify/require" +) + +// TestIntegration_CrossParticipantSync exercises the full SyncEngine stack +// (NtpEstimator -> SessionTimeline -> ParticipantSync -> SyncEngine) to verify +// that two participants producing audio at the same real-world time are aligned +// on the session timeline despite having different NTP clock offsets. +// +// Setup: +// - Alice: audio 48kHz, SSRC=1000, NTP clock offset = 0 (matches real time) +// - Bob: audio 48kHz, SSRC=2000, NTP clock offset = +500ms (ahead) +// - Both have real OWD of 50ms (same SFU -> egress path) +// - Session starts when Alice's first packet arrives +// +// The OWD estimator sees: +// - Alice: receivedAt - senderNTP = (realTime+50ms) - realTime = 50ms +// - Bob: receivedAt - senderNTP = (realTime+50ms) - (realTime+500ms) = -450ms +// +// The formula sessionPTS = ntpTime + OWD - sessionStart normalizes the clock +// offset because ntpTime includes the +500ms and OWD reflects the -500ms. +func TestIntegration_CrossParticipantSync(t *testing.T) { + const ( + clockRate = uint32(48000) + owd = 50 * time.Millisecond + bobNTPOffset = 500 * time.Millisecond + ) + + engine := NewSyncEngine() + + aliceTrack := newMockAudioTrack("audio-alice", 1000) + bobTrack := newMockAudioTrack("audio-bob", 2000) + + aliceTS := engine.AddTrack(aliceTrack, "alice") + bobTS := engine.AddTrack(bobTrack, "bob") + + // Session starts at a fixed base time. Both participants' first packets + // arrive at the same instant (same real OWD from the SFU). + baseTime := time.Date(2025, 7, 1, 12, 0, 0, 0, time.UTC) + firstArrival := baseTime.Add(owd) + + // Prime both tracks with their first packets (same arrival time). + alicePkt0 := makeExtPacket(0, 0, firstArrival) + bobPkt0 := makeExtPacket(0, 0, firstArrival) + _, _, aliceDone := aliceTS.PrimeForStart(alicePkt0) + _, _, bobDone := bobTS.PrimeForStart(bobPkt0) + require.True(t, aliceDone) + require.True(t, bobDone) + + // Feed 5 sender reports for each participant, 5 seconds apart. + // Alice's NTP = realTime (no offset), Bob's NTP = realTime + 500ms. + // Both SRs arrive at realTime + OWD. + for i := 0; i < 5; i++ { + realTime := baseTime.Add(time.Duration(i) * 5 * time.Second) + receivedAt := realTime.Add(owd) + rtpTS := uint32(i) * 5 * clockRate + + // Alice SR: NTP = realTime + aliceNTP := ntpToUint64(realTime) + aliceSR := makeSenderReport(1000, aliceNTP, rtpTS) + // Manually set receivedAt by calling OnSenderReport on the timeline directly + // since OnRTCP uses time.Now(). We need deterministic timing. + engine.timeline.OnSenderReport("alice", "audio-alice", clockRate, aliceNTP, rtpTS, receivedAt) + + // Also process through OnRTCP-like path to update ParticipantSync. + // We mimic the SR processing that OnRTCP does for ParticipantSync wiring. + engine.timeline.mu.RLock() + if pc, ok := engine.timeline.participants["alice"]; ok { + if pt, ok := pc.tracks["audio-alice"]; ok { + pc.participantSync.SetTrackEstimator("audio-alice", MediaTypeAudio, pt.estimator) + pc.participantSync.OnSenderReport("audio-alice") + } + } + engine.timeline.mu.RUnlock() + _ = aliceSR // used above indirectly + + // Bob SR: NTP = realTime + 500ms (Bob's NTP clock is 500ms ahead) + bobNTP := ntpToUint64(realTime.Add(bobNTPOffset)) + engine.timeline.OnSenderReport("bob", "audio-bob", clockRate, bobNTP, rtpTS, receivedAt) + + engine.timeline.mu.RLock() + if pc, ok := engine.timeline.participants["bob"]; ok { + if pt, ok := pc.tracks["audio-bob"]; ok { + pc.participantSync.SetTrackEstimator("audio-bob", MediaTypeAudio, pt.estimator) + pc.participantSync.OnSenderReport("audio-bob") + } + } + engine.timeline.mu.RUnlock() + } + + // Get PTS for both participants at "real time + 10s" with corresponding + // RTP timestamps (10s * 48kHz = 480000). + realTimeAt10s := baseTime.Add(10 * time.Second) + receivedAtAt10s := realTimeAt10s.Add(owd) + rtpAt10s := uint32(10) * clockRate + + alicePkt := makeExtPacket(rtpAt10s, 100, receivedAtAt10s) + bobPkt := makeExtPacket(rtpAt10s, 100, receivedAtAt10s) + + alicePTS, err := aliceTS.GetPTS(alicePkt) + require.NoError(t, err) + + bobPTS, err := bobTS.GetPTS(bobPkt) + require.NoError(t, err) + + // The 500ms NTP clock difference should be normalized away by OWD estimation. + diff := alicePTS - bobPTS + if diff < 0 { + diff = -diff + } + + t.Logf("Alice PTS: %v, Bob PTS: %v, diff: %v", alicePTS, bobPTS, diff) + require.Less(t, diff, 50*time.Millisecond, + "cross-participant PTS should be aligned despite 500ms NTP clock offset; alice=%v bob=%v diff=%v", + alicePTS, bobPTS, diff) +} + +// TestIntegration_AVLipSync exercises the full SyncEngine stack to verify that +// a single participant's audio and video tracks are kept in sync despite an +// 80ms video encoder delay (video NTP timestamps lag audio by 80ms in the +// sender's clock domain). +// +// Setup: +// - Audio: 48kHz, SSRC=1000 +// - Video: 90kHz, SSRC=2000 +// - Same participant "alice" +// - OWD = 50ms for both tracks +// - Video has 80ms encoder delay: video NTP = audio NTP + 80ms for same +// real-world instant (video capture is delayed by encoding pipeline) +// +// The ParticipantSync detects the A/V NTP offset and applies a slew-limited +// correction on the video track to bring them into alignment. +func TestIntegration_AVLipSync(t *testing.T) { + const ( + audioClockRate = uint32(48000) + videoClockRate = uint32(90000) + owd = 50 * time.Millisecond + videoEncoderDelay = 80 * time.Millisecond + ) + + engine := NewSyncEngine() + + audioTrack := newMockAudioTrack("audio-alice", 1000) + videoTrack := newMockVideoTrack("video-alice", 2000) + + audioTS := engine.AddTrack(audioTrack, "alice") + videoTS := engine.AddTrack(videoTrack, "alice") + + baseTime := time.Date(2025, 7, 1, 12, 0, 0, 0, time.UTC) + firstArrival := baseTime.Add(owd) + + // Prime both tracks. + audioPkt0 := makeExtPacket(0, 0, firstArrival) + videoPkt0 := makeExtPacket(0, 0, firstArrival) + _, _, audioDone := audioTS.PrimeForStart(audioPkt0) + _, _, videoDone := videoTS.PrimeForStart(videoPkt0) + require.True(t, audioDone) + require.True(t, videoDone) + + // Feed 5 SRs for audio and video, 5 seconds apart. + // Audio: NTP = baseNtp + i*5s, RTP = i * 5 * audioClockRate + // Video: NTP = baseNtp + i*5s + 80ms (encoder delay), RTP = i * 5 * videoClockRate + for i := 0; i < 5; i++ { + srTime := baseTime.Add(time.Duration(i) * 5 * time.Second) + receivedAt := srTime.Add(owd) + + audioRTP := uint32(i) * 5 * audioClockRate + audioNTP := ntpToUint64(srTime) + engine.timeline.OnSenderReport("alice", "audio-alice", audioClockRate, audioNTP, audioRTP, receivedAt) + + videoRTP := uint32(i) * 5 * videoClockRate + videoNTP := ntpToUint64(srTime.Add(videoEncoderDelay)) + engine.timeline.OnSenderReport("alice", "video-alice", videoClockRate, videoNTP, videoRTP, receivedAt) + + // Wire up ParticipantSync with latest estimators and trigger SR processing. + engine.timeline.mu.RLock() + if pc, ok := engine.timeline.participants["alice"]; ok { + if pt, ok := pc.tracks["audio-alice"]; ok { + pc.participantSync.SetTrackEstimator("audio-alice", MediaTypeAudio, pt.estimator) + pc.participantSync.OnSenderReport("audio-alice") + } + if pt, ok := pc.tracks["video-alice"]; ok { + pc.participantSync.SetTrackEstimator("video-alice", MediaTypeVideo, pt.estimator) + pc.participantSync.OnSenderReport("video-alice") + } + + // Drive slew adjustments with elapsed session time. + startedAt := engine.startedAt.Load() + if startedAt > 0 { + elapsed := time.Duration(receivedAt.UnixNano() - startedAt) + pc.participantSync.updateAdjustments(elapsed) + } + } + engine.timeline.mu.RUnlock() + } + + // Push multiple packets through GetPTS to drive the transition slew + // and allow the sync engine's per-call slew to converge. + for i := 1; i <= 200; i++ { + recvAt := firstArrival.Add(time.Duration(i) * 20 * time.Millisecond) + audioRTP := uint32(i) * 960 // 20ms at 48kHz + videoRTP := uint32(i) * 1800 // 20ms at 90kHz + + aPkt := makeExtPacket(audioRTP, uint16(i), recvAt) + vPkt := makeExtPacket(videoRTP, uint16(i), recvAt) + + audioTS.GetPTS(aPkt) + videoTS.GetPTS(vPkt) + } + + // Get PTS for audio at RTP=480000 (10s at 48kHz) and video at RTP=900000 (10s at 90kHz). + recvAt10s := firstArrival.Add(10 * time.Second) + audioPktFinal := makeExtPacket(10*audioClockRate, 500, recvAt10s) + videoPktFinal := makeExtPacket(10*videoClockRate, 500, recvAt10s) + + audioPTS, err := audioTS.GetPTS(audioPktFinal) + require.NoError(t, err) + + videoPTS, err := videoTS.GetPTS(videoPktFinal) + require.NoError(t, err) + + // The 80ms encoder delay should be corrected (or mostly corrected) by + // ParticipantSync's slew-limited adjustment. Allow 100ms tolerance to + // account for slew rate convergence. + diff := audioPTS - videoPTS + if diff < 0 { + diff = -diff + } + + t.Logf("Audio PTS: %v, Video PTS: %v, diff: %v", audioPTS, videoPTS, diff) + require.Less(t, diff, 100*time.Millisecond, + "A/V lip sync should be within 100ms after convergence; audio=%v video=%v diff=%v", + audioPTS, videoPTS, diff) +} diff --git a/pkg/synchronizer/interfaces.go b/pkg/synchronizer/interfaces.go new file mode 100644 index 00000000..ada94dbb --- /dev/null +++ b/pkg/synchronizer/interfaces.go @@ -0,0 +1,45 @@ +// Copyright 2026 LiveKit, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package synchronizer + +import ( + "time" + + "github.com/pion/rtcp" + + "github.com/livekit/media-sdk/jitter" +) + +// Sync is the top-level synchronization interface. +// Implemented by both Synchronizer (legacy) and SyncEngine (new). +type Sync interface { + AddTrack(track TrackRemote, identity string) TrackSync + RemoveTrack(trackID string) + OnRTCP(packet rtcp.Packet) + End() + GetStartedAt() int64 + GetEndedAt() int64 + SetMediaRunningTime(mediaRunningTime func() (time.Duration, bool)) +} + +// TrackSync is the per-track synchronization interface. +// Implemented by both TrackSynchronizer (legacy) and syncEngineTrack (new). +type TrackSync interface { + PrimeForStart(pkt jitter.ExtPacket) ([]jitter.ExtPacket, int, bool) + GetPTS(pkt jitter.ExtPacket) (time.Duration, error) + OnSenderReport(f func(drift time.Duration)) + LastPTSAdjusted() time.Duration + Close() +} diff --git a/pkg/synchronizer/ntpestimator.go b/pkg/synchronizer/ntpestimator.go new file mode 100644 index 00000000..4a489a9a --- /dev/null +++ b/pkg/synchronizer/ntpestimator.go @@ -0,0 +1,256 @@ +// Copyright 2026 LiveKit, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package synchronizer + +import ( + "errors" + "math" + "time" +) + +const ( + // maxSRSamples is the sliding window size for sender report pairs. + maxSRSamples = 20 + + // outlierThresholdStdDevs is the number of standard deviations beyond which + // a new SR is considered an outlier and excluded from the regression. + outlierThresholdStdDevs = 3.0 + + // ntpEpochOffset is the number of seconds between the NTP epoch (1900-01-01) + // and the Unix epoch (1970-01-01). + ntpEpochOffset = 2208988800 +) + +var errNotReady = errors.New("NtpEstimator: not enough sender reports for regression (need >= 2)") + +// srSample holds one sender report observation used in the regression. +type srSample struct { + unwrappedRTP int64 // RTP timestamp unwrapped to 64-bit + ntpNanos int64 // NTP wall-clock in nanoseconds since Unix epoch + receivedAt time.Time +} + +// NtpEstimator maintains a linear regression over a sliding window of RTCP +// sender report pairs to map RTP timestamps to NTP time. It is modeled after +// Chrome's RtpToNtpEstimator. +type NtpEstimator struct { + clockRate uint32 + + // Circular buffer of SR samples. These fields are unexported but + // package-accessible so that ParticipantSync (same package) can read + // the most recent sample. + samples [maxSRSamples]srSample + sampleLen int // number of valid samples in the buffer (0..maxSRSamples) + sampleHead int // index of the next write position + + // RTP unwrapping state + lastRTP uint32 + rtpOffset int64 // cumulative offset from wraparounds + hasLastRTP bool + + // Regression results (valid when sampleLen >= 2) + // The internal model is: ntpNanos = slopeNanos * (unwrappedRTP - meanX) + meanY + // where slopeNanos is nanos per RTP tick. + slopeNanos float64 // nanos of NTP time per RTP tick + meanX float64 // mean of unwrapped RTP values in the current window + meanY float64 // mean of NTP nanos values in the current window + residStd float64 // residual standard deviation in NTP nanos + ready bool +} + +// NewNtpEstimator creates an NtpEstimator for a codec with the given clock rate. +func NewNtpEstimator(clockRate uint32) *NtpEstimator { + return &NtpEstimator{ + clockRate: clockRate, + } +} + +// OnSenderReport ingests a new RTCP sender report observation. +// ntpTime is the 64-bit NTP timestamp from the SR, rtpTimestamp is the +// corresponding RTP timestamp, and receivedAt is the local wall-clock time +// when the SR was received. +func (e *NtpEstimator) OnSenderReport(ntpTime uint64, rtpTimestamp uint32, receivedAt time.Time) { + ntpNanos := ntpTimestampToNanos(ntpTime) + unwrapped := e.unwrapRTP(rtpTimestamp) + + // Outlier rejection: if we already have a valid regression, check whether + // this new sample deviates from the prediction by more than 3 standard + // deviations. + if e.ready && e.residStd > 0 { + predicted := e.slopeNanos*(float64(unwrapped)-e.meanX) + e.meanY + residual := math.Abs(float64(ntpNanos) - predicted) + if residual > outlierThresholdStdDevs*e.residStd { + // Reject this sample as an outlier. + return + } + } + + // Write into circular buffer. + e.samples[e.sampleHead] = srSample{ + unwrappedRTP: unwrapped, + ntpNanos: ntpNanos, + receivedAt: receivedAt, + } + e.sampleHead = (e.sampleHead + 1) % maxSRSamples + if e.sampleLen < maxSRSamples { + e.sampleLen++ + } + + // Recompute regression if we have enough samples. + if e.sampleLen >= 2 { + e.computeRegression() + e.ready = true + } +} + +// IsReady returns true once at least 2 sender reports have been processed +// and the regression is valid. +func (e *NtpEstimator) IsReady() bool { + return e.ready +} + +// RtpToNtp maps an RTP timestamp to wall-clock time using the current regression. +func (e *NtpEstimator) RtpToNtp(rtpTimestamp uint32) (time.Time, error) { + if !e.ready { + return time.Time{}, errNotReady + } + + unwrapped := e.unwrapRTPQuery(rtpTimestamp) + ntpNanos := e.slopeNanos*(float64(unwrapped)-e.meanX) + e.meanY + return nanosToTime(int64(math.Round(ntpNanos))), nil +} + +// Slope returns the regression slope: seconds of NTP time per RTP tick. +// For a perfect clock this equals 1/clockRate. +func (e *NtpEstimator) Slope() float64 { + return e.slopeNanos / 1e9 +} + +// computeRegression performs ordinary least squares on the current samples +// using centered data to preserve float64 precision. +// Model: ntpNanos = slopeNanos * (unwrappedRTP - meanX) + meanY +func (e *NtpEstimator) computeRegression() { + n := float64(e.sampleLen) + + // First pass: compute means for centering. + var sumX, sumY float64 + e.iterSamples(func(s srSample) { + sumX += float64(s.unwrappedRTP) + sumY += float64(s.ntpNanos) + }) + mX := sumX / n + mY := sumY / n + + // Second pass: compute centered sums for regression. + var sumDxDx, sumDxDy float64 + e.iterSamples(func(s srSample) { + dx := float64(s.unwrappedRTP) - mX + dy := float64(s.ntpNanos) - mY + sumDxDx += dx * dx + sumDxDy += dx * dy + }) + + if sumDxDx == 0 { + // Degenerate case: all RTP timestamps identical. + return + } + + e.slopeNanos = sumDxDy / sumDxDx + e.meanX = mX + e.meanY = mY + + // Compute residual standard deviation. + var sumResidSq float64 + e.iterSamples(func(s srSample) { + predicted := e.slopeNanos*(float64(s.unwrappedRTP)-mX) + mY + r := float64(s.ntpNanos) - predicted + sumResidSq += r * r + }) + + if e.sampleLen > 2 { + e.residStd = math.Sqrt(sumResidSq / (n - 2)) + } else { + // With exactly 2 points the regression is exact; use a small positive + // value so that the 3-sigma check is not trivially zero. + e.residStd = math.Sqrt(sumResidSq / n) + } +} + +// iterSamples calls fn for each valid sample in the circular buffer. +func (e *NtpEstimator) iterSamples(fn func(srSample)) { + start := 0 + if e.sampleLen == maxSRSamples { + start = e.sampleHead // oldest entry is at head when buffer is full + } + for i := 0; i < e.sampleLen; i++ { + idx := (start + i) % maxSRSamples + fn(e.samples[idx]) + } +} + +// unwrapRTP unwraps a 32-bit RTP timestamp to a 64-bit value, tracking +// forward/backward jumps via signed diff. This is used when ingesting SRs +// to maintain the running unwrap state. +func (e *NtpEstimator) unwrapRTP(rtpTS uint32) int64 { + if !e.hasLastRTP { + e.hasLastRTP = true + e.lastRTP = rtpTS + e.rtpOffset = 0 + return int64(rtpTS) + } + + diff := int32(rtpTS - e.lastRTP) + if diff > 0 && rtpTS < e.lastRTP { + // Forward jump that crossed the uint32 boundary. + e.rtpOffset += 1 << 32 + } else if diff < 0 && rtpTS > e.lastRTP { + // Backward jump that crossed the uint32 boundary. + e.rtpOffset -= 1 << 32 + } + + e.lastRTP = rtpTS + return e.rtpOffset + int64(rtpTS) +} + +// unwrapRTPQuery unwraps an RTP timestamp for a query (RtpToNtp) without +// mutating the unwrap state. It uses the current offset tracked from SRs. +func (e *NtpEstimator) unwrapRTPQuery(rtpTS uint32) int64 { + if !e.hasLastRTP { + return int64(rtpTS) + } + + offset := e.rtpOffset + diff := int32(rtpTS - e.lastRTP) + if diff > 0 && rtpTS < e.lastRTP { + offset += 1 << 32 + } else if diff < 0 && rtpTS > e.lastRTP { + offset -= 1 << 32 + } + return offset + int64(rtpTS) +} + +// ntpTimestampToNanos converts a 64-bit NTP timestamp to nanoseconds since +// the Unix epoch. +func ntpTimestampToNanos(ntpTS uint64) int64 { + secs := int64(ntpTS>>32) - ntpEpochOffset + frac := ntpTS & 0xFFFFFFFF + nanos := int64(frac) * 1e9 / (1 << 32) + return secs*1e9 + nanos +} + +// nanosToTime converts nanoseconds since the Unix epoch to a time.Time. +func nanosToTime(nanos int64) time.Time { + return time.Unix(0, nanos) +} diff --git a/pkg/synchronizer/ntpestimator_test.go b/pkg/synchronizer/ntpestimator_test.go new file mode 100644 index 00000000..5b87a4e7 --- /dev/null +++ b/pkg/synchronizer/ntpestimator_test.go @@ -0,0 +1,224 @@ +// Copyright 2026 LiveKit, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package synchronizer + +import ( + "math" + "testing" + "time" + + "github.com/stretchr/testify/require" +) + +// ntpToUint64 converts a time.Time to a 64-bit NTP timestamp. +// Upper 32 bits = seconds since NTP epoch (1900-01-01), +// lower 32 bits = fractional seconds. +func ntpToUint64(t time.Time) uint64 { + const ntpEpochOffset = 2208988800 + secs := uint64(t.Unix()) + ntpEpochOffset + frac := uint64(t.Nanosecond()) * (1 << 32) / 1e9 + return secs<<32 | frac +} + +func TestNtpEstimator_NotReadyBeforeTwoSRs(t *testing.T) { + e := NewNtpEstimator(90000) + + // Zero SRs: not ready + require.False(t, e.IsReady(), "should not be ready with 0 SRs") + + _, err := e.RtpToNtp(1000) + require.Error(t, err, "RtpToNtp should error when not ready") + + // One SR: still not ready + now := time.Now() + e.OnSenderReport(ntpToUint64(now), 90000, now) + require.False(t, e.IsReady(), "should not be ready with 1 SR") + + _, err = e.RtpToNtp(90000) + require.Error(t, err, "RtpToNtp should error with only 1 SR") + + // Two SRs: ready + now2 := now.Add(time.Second) + e.OnSenderReport(ntpToUint64(now2), 180000, now2) + require.True(t, e.IsReady(), "should be ready with 2 SRs") + + _, err = e.RtpToNtp(135000) + require.NoError(t, err, "RtpToNtp should succeed when ready") +} + +func TestNtpEstimator_AccurateMapping(t *testing.T) { + const clockRate = 90000 + e := NewNtpEstimator(clockRate) + + baseTime := time.Date(2025, 1, 1, 12, 0, 0, 0, time.UTC) + + // Feed 10 perfect SRs at 1-second intervals + for i := 0; i < 10; i++ { + wallTime := baseTime.Add(time.Duration(i) * time.Second) + rtpTS := uint32(i) * clockRate + e.OnSenderReport(ntpToUint64(wallTime), rtpTS, wallTime) + } + + require.True(t, e.IsReady()) + + // Verify mapping at intermediate points + for _, tc := range []struct { + name string + rtpTS uint32 + wantNTP time.Time + }{ + {"at SR 0", 0, baseTime}, + {"at SR 5", 5 * clockRate, baseTime.Add(5 * time.Second)}, + {"between SR 2 and 3", uint32(2.5 * clockRate), baseTime.Add(2500 * time.Millisecond)}, + {"at SR 9", 9 * clockRate, baseTime.Add(9 * time.Second)}, + } { + t.Run(tc.name, func(t *testing.T) { + got, err := e.RtpToNtp(tc.rtpTS) + require.NoError(t, err) + diff := got.Sub(tc.wantNTP) + if diff < 0 { + diff = -diff + } + require.Less(t, diff, time.Millisecond, + "mapping off by %v; got %v, want %v", diff, got, tc.wantNTP) + }) + } +} + +func TestNtpEstimator_OutlierRejection(t *testing.T) { + const clockRate = 90000 + e := NewNtpEstimator(clockRate) + + baseTime := time.Date(2025, 6, 15, 10, 0, 0, 0, time.UTC) + + // Feed 5 good SRs at 1-second intervals + for i := 0; i < 5; i++ { + wallTime := baseTime.Add(time.Duration(i) * time.Second) + rtpTS := uint32(i) * clockRate + e.OnSenderReport(ntpToUint64(wallTime), rtpTS, wallTime) + } + + require.True(t, e.IsReady()) + + // Feed 1 wildly wrong SR: RTP says 5 seconds but NTP says 50 seconds + badWallTime := baseTime.Add(50 * time.Second) + badRTP := uint32(5) * clockRate + e.OnSenderReport(ntpToUint64(badWallTime), badRTP, badWallTime) + + // Verify mapping is still accurate (outlier should have been rejected) + got, err := e.RtpToNtp(uint32(2.5 * clockRate)) + require.NoError(t, err) + want := baseTime.Add(2500 * time.Millisecond) + diff := got.Sub(want) + if diff < 0 { + diff = -diff + } + require.Less(t, diff, time.Millisecond, + "mapping should be accurate despite outlier; off by %v", diff) +} + +func TestNtpEstimator_Wraparound(t *testing.T) { + const clockRate = 90000 + e := NewNtpEstimator(clockRate) + + baseTime := time.Date(2025, 3, 1, 0, 0, 0, 0, time.UTC) + + // Start RTP near uint32 max so wraparound occurs + // math.MaxUint32 - 5*clockRate puts us 5 seconds before wrap + startRTP := uint32(math.MaxUint32 - 5*clockRate) + + for i := 0; i < 10; i++ { + wallTime := baseTime.Add(time.Duration(i) * time.Second) + rtpTS := startRTP + uint32(i)*clockRate // will wrap around uint32 + e.OnSenderReport(ntpToUint64(wallTime), rtpTS, wallTime) + } + + require.True(t, e.IsReady()) + + // Test mapping at points before and after the wraparound + // SR at i=5 is exactly where RTP wraps past 0 + for _, tc := range []struct { + name string + idx int + wantNTP time.Time + }{ + {"before wrap (i=3)", 3, baseTime.Add(3 * time.Second)}, + {"at wrap (i=5)", 5, baseTime.Add(5 * time.Second)}, + {"after wrap (i=8)", 8, baseTime.Add(8 * time.Second)}, + } { + t.Run(tc.name, func(t *testing.T) { + rtpTS := startRTP + uint32(tc.idx)*clockRate + got, err := e.RtpToNtp(rtpTS) + require.NoError(t, err) + diff := got.Sub(tc.wantNTP) + if diff < 0 { + diff = -diff + } + require.Less(t, diff, time.Millisecond, + "mapping off by %v across wraparound; got %v, want %v", diff, got, tc.wantNTP) + }) + } +} + +func TestNtpEstimator_SlidingWindow(t *testing.T) { + const clockRate = 90000 + e := NewNtpEstimator(clockRate) + + baseTime := time.Date(2025, 4, 1, 0, 0, 0, 0, time.UTC) + + // Feed 25 SRs (exceeds window of 20) + for i := 0; i < 25; i++ { + wallTime := baseTime.Add(time.Duration(i) * time.Second) + rtpTS := uint32(i) * clockRate + e.OnSenderReport(ntpToUint64(wallTime), rtpTS, wallTime) + } + + require.True(t, e.IsReady()) + + // Verify mapping still works accurately in the recent window + got, err := e.RtpToNtp(uint32(22) * clockRate) + require.NoError(t, err) + want := baseTime.Add(22 * time.Second) + diff := got.Sub(want) + if diff < 0 { + diff = -diff + } + require.Less(t, diff, time.Millisecond, + "mapping should be accurate after sliding window overflow; off by %v", diff) +} + +func TestNtpEstimator_Slope(t *testing.T) { + const clockRate = 90000 + e := NewNtpEstimator(clockRate) + + baseTime := time.Date(2025, 5, 1, 0, 0, 0, 0, time.UTC) + + // Feed 5 perfect SRs + for i := 0; i < 5; i++ { + wallTime := baseTime.Add(time.Duration(i) * time.Second) + rtpTS := uint32(i) * clockRate + e.OnSenderReport(ntpToUint64(wallTime), rtpTS, wallTime) + } + + require.True(t, e.IsReady()) + + // Slope should be close to 1/clockRate (seconds per RTP tick) + expectedSlope := 1.0 / float64(clockRate) + gotSlope := e.Slope() + + relError := math.Abs(gotSlope-expectedSlope) / expectedSlope + require.Less(t, relError, 1e-6, + "slope should be ~%e, got %e (relative error %e)", expectedSlope, gotSlope, relError) +} diff --git a/pkg/synchronizer/participantsync.go b/pkg/synchronizer/participantsync.go new file mode 100644 index 00000000..88f204ae --- /dev/null +++ b/pkg/synchronizer/participantsync.go @@ -0,0 +1,217 @@ +// Copyright 2026 LiveKit, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package synchronizer + +import ( + "sync" + "time" +) + +// MediaType distinguishes audio and video tracks for A/V sync purposes. +type MediaType int + +const ( + MediaTypeAudio MediaType = iota + MediaTypeVideo +) + +const ( + // slewRatePerSecond is the maximum adjustment slew: 5ms per second of real time. + slewRatePerSecond = 5 * time.Millisecond + + // deadbandThreshold is the minimum |offset| before any correction is applied. + deadbandThreshold = 5 * time.Millisecond +) + +// trackEntry holds per-track state within a ParticipantSync. +type trackEntry struct { + mediaType MediaType + estimator *NtpEstimator + adjustment time.Duration // current playout delay adjustment +} + +// ParticipantSync compares NTP estimates across a participant's audio and video +// tracks to compute A/V playout delay adjustments with time-based slew rate +// limiting. This is the equivalent of Chrome's StreamSynchronization. +// +// Audio is the reference track; video absorbs the correction. +type ParticipantSync struct { + mu sync.Mutex + tracks map[string]*trackEntry + + // lastSessionTime records the session time from the most recent + // updateAdjustments call, used to compute elapsed time for slew limiting. + lastSessionTime time.Duration + initialized bool // true after first updateAdjustments call + + // targetOffset is the desired total video adjustment (negative means + // video should be delayed relative to audio). + targetOffset time.Duration + + // currentOffset tracks the slew-limited offset applied so far. + currentOffset time.Duration +} + +// NewParticipantSync creates a new ParticipantSync instance. +func NewParticipantSync() *ParticipantSync { + return &ParticipantSync{ + tracks: make(map[string]*trackEntry), + } +} + +// SetTrackEstimator registers or updates the NtpEstimator for a given track. +func (ps *ParticipantSync) SetTrackEstimator(trackID string, mediaType MediaType, estimator *NtpEstimator) { + ps.mu.Lock() + defer ps.mu.Unlock() + ps.tracks[trackID] = &trackEntry{ + mediaType: mediaType, + estimator: estimator, + } +} + +// RemoveTrack removes a track and resets its adjustment. +func (ps *ParticipantSync) RemoveTrack(trackID string) { + ps.mu.Lock() + defer ps.mu.Unlock() + delete(ps.tracks, trackID) + // Reset sync state since we may no longer have both audio and video. + ps.targetOffset = 0 + ps.currentOffset = 0 + // Clear adjustments on remaining tracks. + for _, entry := range ps.tracks { + entry.adjustment = 0 + } +} + +// OnSenderReport is called when new SR data arrives for a track. It triggers +// recomputation of the A/V offset target. +func (ps *ParticipantSync) OnSenderReport(trackID string) { + ps.mu.Lock() + defer ps.mu.Unlock() + ps.recomputeTarget() +} + +// GetAdjustment returns the current playout delay adjustment for a track. +// Returns zero if the track is not registered or estimators are not ready. +func (ps *ParticipantSync) GetAdjustment(trackID string) time.Duration { + ps.mu.Lock() + defer ps.mu.Unlock() + entry, ok := ps.tracks[trackID] + if !ok { + return 0 + } + return entry.adjustment +} + +// updateAdjustments is called periodically (by SyncEngine) to drive the +// time-based slew toward the target offset. sessionTime is the elapsed +// session time (monotonically increasing). +func (ps *ParticipantSync) updateAdjustments(sessionTime time.Duration) { + ps.mu.Lock() + defer ps.mu.Unlock() + if !ps.initialized { + ps.lastSessionTime = sessionTime + ps.initialized = true + // Recompute on first call in case SRs arrived before the first tick. + ps.recomputeTarget() + ps.applyAdjustments() + return + } + + elapsed := sessionTime - ps.lastSessionTime + ps.lastSessionTime = sessionTime + + if elapsed <= 0 { + return + } + + // Compute the maximum slew for this interval. + maxSlew := time.Duration(float64(slewRatePerSecond) * float64(elapsed) / float64(time.Second)) + + // Move currentOffset toward targetOffset, bounded by maxSlew. + diff := ps.targetOffset - ps.currentOffset + if diff > 0 { + if diff > maxSlew { + diff = maxSlew + } + ps.currentOffset += diff + } else if diff < 0 { + if -diff > maxSlew { + diff = -maxSlew + } + ps.currentOffset += diff + } + + ps.applyAdjustments() +} + +// recomputeTarget recalculates the target A/V offset from the latest NTP +// samples of the audio and video estimators. +func (ps *ParticipantSync) recomputeTarget() { + audioNTP, audioOK := ps.latestNTP(MediaTypeAudio) + videoNTP, videoOK := ps.latestNTP(MediaTypeVideo) + + if !audioOK || !videoOK { + return + } + + // offset = video NTP - audio NTP. + // If positive, video's NTP is ahead of audio's, meaning video needs to be + // delayed (negative adjustment) to align with audio. + offset := videoNTP - audioNTP + + // Apply deadband: if the offset is small enough, treat it as zero. + if offset > -deadbandThreshold && offset < deadbandThreshold { + ps.targetOffset = 0 + return + } + + // Video absorbs the correction: negate the offset so that a positive NTP + // difference becomes a negative (delay) adjustment on video. + ps.targetOffset = -offset +} + +// latestNTP returns the NTP time of the most recent SR sample for the first +// ready estimator of the given media type. The second return value is false if +// no ready estimator of that type exists. +func (ps *ParticipantSync) latestNTP(mt MediaType) (time.Duration, bool) { + for _, entry := range ps.tracks { + if entry.mediaType != mt || entry.estimator == nil || !entry.estimator.IsReady() { + continue + } + if entry.estimator.sampleLen == 0 { + continue + } + + // The most recent sample is at (sampleHead - 1 + maxSRSamples) % maxSRSamples. + idx := (entry.estimator.sampleHead - 1 + maxSRSamples) % maxSRSamples + s := entry.estimator.samples[idx] + return time.Duration(s.ntpNanos), true + } + return 0, false +} + +// applyAdjustments distributes the current slew-limited offset to the +// appropriate tracks. Audio gets zero; video gets the correction. +func (ps *ParticipantSync) applyAdjustments() { + for _, entry := range ps.tracks { + switch entry.mediaType { + case MediaTypeAudio: + entry.adjustment = 0 + case MediaTypeVideo: + entry.adjustment = ps.currentOffset + } + } +} diff --git a/pkg/synchronizer/participantsync_test.go b/pkg/synchronizer/participantsync_test.go new file mode 100644 index 00000000..ddd138cf --- /dev/null +++ b/pkg/synchronizer/participantsync_test.go @@ -0,0 +1,185 @@ +// Copyright 2026 LiveKit, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package synchronizer + +import ( + "testing" + "time" + + "github.com/stretchr/testify/require" +) + +// readyEstimator creates an NtpEstimator pre-loaded with `count` sender reports +// so that it is ready for use. The SR samples are spaced 5 seconds apart in both +// NTP and RTP time. +func readyEstimator(clockRate uint32, baseNtp time.Time, baseRtp uint32, count int) *NtpEstimator { + e := NewNtpEstimator(clockRate) + for i := 0; i < count; i++ { + ntpTime := baseNtp.Add(time.Duration(i) * 5 * time.Second) + rtpTS := baseRtp + uint32(i)*uint32(clockRate)*5 + e.OnSenderReport(ntpToUint64(ntpTime), rtpTS, ntpTime.Add(30*time.Millisecond)) + } + return e +} + +func TestParticipantSync_NoAdjustmentBeforeReady(t *testing.T) { + ps := NewParticipantSync() + + // Register a non-ready estimator (only 1 SR, need >= 2). + e := NewNtpEstimator(90000) + baseNtp := time.Date(2025, 1, 1, 12, 0, 0, 0, time.UTC) + e.OnSenderReport(ntpToUint64(baseNtp), 0, baseNtp.Add(30*time.Millisecond)) + + require.False(t, e.IsReady(), "estimator should not be ready with 1 SR") + + ps.SetTrackEstimator("audio-1", MediaTypeAudio, e) + ps.SetTrackEstimator("video-1", MediaTypeVideo, NewNtpEstimator(90000)) + + ps.OnSenderReport("audio-1") + ps.updateAdjustments(time.Second) + + require.Equal(t, time.Duration(0), ps.GetAdjustment("audio-1"), + "audio adjustment should be zero before estimator is ready") + require.Equal(t, time.Duration(0), ps.GetAdjustment("video-1"), + "video adjustment should be zero before estimator is ready") +} + +func TestParticipantSync_DeadbandSuppressesSmallOffset(t *testing.T) { + ps := NewParticipantSync() + + baseNtp := time.Date(2025, 1, 1, 12, 0, 0, 0, time.UTC) + + // Audio and video with the same NTP base => perfectly aligned. + audioEst := readyEstimator(48000, baseNtp, 0, 5) + videoEst := readyEstimator(90000, baseNtp, 0, 5) + + ps.SetTrackEstimator("audio-1", MediaTypeAudio, audioEst) + ps.SetTrackEstimator("video-1", MediaTypeVideo, videoEst) + + ps.OnSenderReport("audio-1") + ps.OnSenderReport("video-1") + + // Drive enough updates to let any adjustment converge. + for i := 0; i < 100; i++ { + ps.updateAdjustments(time.Duration(i) * 100 * time.Millisecond) + } + + require.Equal(t, time.Duration(0), ps.GetAdjustment("audio-1"), + "audio adjustment should be zero when perfectly aligned") + require.Equal(t, time.Duration(0), ps.GetAdjustment("video-1"), + "video adjustment should be zero when perfectly aligned") +} + +func TestParticipantSync_VideoAdjustsToMatchAudio(t *testing.T) { + ps := NewParticipantSync() + + baseNtp := time.Date(2025, 1, 1, 12, 0, 0, 0, time.UTC) + + // Audio starts at baseNtp, video starts 100ms later in NTP. + // This means video's NTP clock is 100ms ahead of audio's. + // The offset (video NTP - audio NTP) = +100ms. + // Since audio is reference, video must be delayed by -100ms. + audioEst := readyEstimator(48000, baseNtp, 0, 5) + videoEst := readyEstimator(90000, baseNtp.Add(100*time.Millisecond), 0, 5) + + ps.SetTrackEstimator("audio-1", MediaTypeAudio, audioEst) + ps.SetTrackEstimator("video-1", MediaTypeVideo, videoEst) + + ps.OnSenderReport("audio-1") + ps.OnSenderReport("video-1") + + // Drive enough time to converge: 100ms offset / 5ms per second = 20 seconds. + // Use more to ensure full convergence. + for i := 0; i <= 300; i++ { + ps.updateAdjustments(time.Duration(i) * 100 * time.Millisecond) + } + + audioAdj := ps.GetAdjustment("audio-1") + videoAdj := ps.GetAdjustment("video-1") + + // Audio is the reference, should stay near zero. + require.InDelta(t, 0, float64(audioAdj), float64(time.Millisecond), + "audio adjustment should be near zero, got %v", audioAdj) + + // Video should get approximately -100ms adjustment. + require.InDelta(t, float64(-100*time.Millisecond), float64(videoAdj), float64(10*time.Millisecond), + "video adjustment should be ~-100ms, got %v", videoAdj) +} + +func TestParticipantSync_SlewRateIsTimeBased(t *testing.T) { + ps := NewParticipantSync() + + baseNtp := time.Date(2025, 1, 1, 12, 0, 0, 0, time.UTC) + + // 200ms offset: video NTP is 200ms ahead of audio. + audioEst := readyEstimator(48000, baseNtp, 0, 5) + videoEst := readyEstimator(90000, baseNtp.Add(200*time.Millisecond), 0, 5) + + ps.SetTrackEstimator("audio-1", MediaTypeAudio, audioEst) + ps.SetTrackEstimator("video-1", MediaTypeVideo, videoEst) + + ps.OnSenderReport("audio-1") + ps.OnSenderReport("video-1") + + // Drive exactly 1 second of session time (slew rate = 5ms/s, so max adjustment = 5ms after 1s). + // Start from 0 to 1 second. + ps.updateAdjustments(0) + ps.updateAdjustments(time.Second) + + videoAdj := ps.GetAdjustment("video-1") + + // After 1 second of real time at 5ms/s slew, adjustment magnitude should be <= ~5ms. + // We add a small tolerance (1ms). + require.LessOrEqual(t, -videoAdj, 6*time.Millisecond, + "after 1s, video adjustment magnitude should be <= ~5ms due to slew rate, got %v", videoAdj) + require.Greater(t, -videoAdj, time.Duration(0), + "video adjustment should be non-zero after 1s with 200ms offset, got %v", videoAdj) +} + +func TestParticipantSync_RemoveTrack(t *testing.T) { + ps := NewParticipantSync() + + baseNtp := time.Date(2025, 1, 1, 12, 0, 0, 0, time.UTC) + + audioEst := readyEstimator(48000, baseNtp, 0, 5) + videoEst := readyEstimator(90000, baseNtp.Add(100*time.Millisecond), 0, 5) + + ps.SetTrackEstimator("audio-1", MediaTypeAudio, audioEst) + ps.SetTrackEstimator("video-1", MediaTypeVideo, videoEst) + + ps.OnSenderReport("audio-1") + ps.OnSenderReport("video-1") + + // Drive some updates so adjustments are non-zero. + for i := 0; i <= 50; i++ { + ps.updateAdjustments(time.Duration(i) * 100 * time.Millisecond) + } + + // Verify video has some non-zero adjustment. + require.NotEqual(t, time.Duration(0), ps.GetAdjustment("video-1"), + "video adjustment should be non-zero before removal") + + // Remove the video track. + ps.RemoveTrack("video-1") + + // After removal, GetAdjustment should return 0. + require.Equal(t, time.Duration(0), ps.GetAdjustment("video-1"), + "video adjustment should be zero after removal") + + // Audio should also return zero since there's no counterpart to sync against. + // (But audio adjustment was always zero since audio is the reference.) + require.Equal(t, time.Duration(0), ps.GetAdjustment("audio-1"), + "audio adjustment should be zero (reference track)") +} diff --git a/pkg/synchronizer/sessiontimeline.go b/pkg/synchronizer/sessiontimeline.go new file mode 100644 index 00000000..d4d679d5 --- /dev/null +++ b/pkg/synchronizer/sessiontimeline.go @@ -0,0 +1,195 @@ +// Copyright 2026 LiveKit, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package synchronizer + +import ( + "errors" + "fmt" + "sync" + "time" + + "github.com/livekit/mediatransportutil/pkg/latency" +) + +var errNoSenderReports = errors.New("SessionTimeline: no sender reports received for track") + +// participantTrack holds per-track state within a ParticipantClock. +type participantTrack struct { + estimator *NtpEstimator + trackID string +} + +// ParticipantClock holds OWD and NTP estimation state for a single participant. +type ParticipantClock struct { + owdEstimator *latency.OWDEstimator + participantSync *ParticipantSync + tracks map[string]*participantTrack + ntpEpoch time.Time // NTP time from first SR + hasEpoch bool +} + +// SessionTimeline establishes a shared recording timeline and maps each +// participant's NTP clock domain onto it using OWD (one-way delay) +// normalization. This is the key component that fixes cross-participant +// misalignment. +// +// Algorithm: +// 1. Each SR provides a pair: (senderNtpTime, receivedAtWallClock). The +// difference is the one-way delay (OWD). +// 2. Using the OWDEstimator, estimate each participant's OWD. The min +// observed OWD approximates true propagation delay. +// 3. To map a participant's RTP timestamp to the session timeline: +// sessionPTS = ntpEstimator.RtpToNtp(rtpTS) - participantNtpEpoch + (epochOnReceiverClock - sessionStart) +// Where: +// - participantNtpEpoch = NTP time from first SR for this participant +// - epochOnReceiverClock = participantNtpEpoch + estimatedOWD (maps epoch to receiver clock) +// - sessionStart = wall-clock time first packet of any track arrived +type SessionTimeline struct { + mu sync.RWMutex + participants map[string]*ParticipantClock + sessionStart time.Time + hasStart bool +} + +// NewSessionTimeline creates a new SessionTimeline. +func NewSessionTimeline() *SessionTimeline { + return &SessionTimeline{ + participants: make(map[string]*ParticipantClock), + } +} + +// SetSessionStart sets the session start time (wall-clock time when the first +// packet of any track arrived at the receiver). +func (st *SessionTimeline) SetSessionStart(t time.Time) { + st.mu.Lock() + defer st.mu.Unlock() + st.sessionStart = t + st.hasStart = true +} + +// AddParticipant registers a new participant with the given identity. +func (st *SessionTimeline) AddParticipant(identity string) *ParticipantClock { + st.mu.Lock() + defer st.mu.Unlock() + + pc := &ParticipantClock{ + owdEstimator: latency.NewOWDEstimator(latency.OWDEstimatorParamsDefault), + participantSync: NewParticipantSync(), + tracks: make(map[string]*participantTrack), + } + st.participants[identity] = pc + return pc +} + +// RemoveParticipant removes the participant with the given identity. +func (st *SessionTimeline) RemoveParticipant(identity string) { + st.mu.Lock() + defer st.mu.Unlock() + delete(st.participants, identity) +} + +// OnSenderReport processes an RTCP sender report for a participant's track. +// It updates the NTP estimator, OWD estimator, and records the NTP epoch. +func (st *SessionTimeline) OnSenderReport(identity, trackID string, clockRate uint32, ntpTime uint64, rtpTimestamp uint32, receivedAt time.Time) { + st.mu.Lock() + defer st.mu.Unlock() + + pc, ok := st.participants[identity] + if !ok { + return + } + + // Get or create the per-track NTP estimator. + pt, ok := pc.tracks[trackID] + if !ok { + pt = &participantTrack{ + estimator: NewNtpEstimator(clockRate), + trackID: trackID, + } + pc.tracks[trackID] = pt + } + + // Feed the SR to the NTP estimator. + pt.estimator.OnSenderReport(ntpTime, rtpTimestamp, receivedAt) + + // Convert NTP timestamp to nanoseconds and update OWD. + senderNtpNanos := ntpTimestampToNanos(ntpTime) + receiverNanos := receivedAt.UnixNano() + _, pathChanged := pc.owdEstimator.Update(senderNtpNanos, receiverNanos) + + // If a path change was detected, re-anchor the NTP epoch to the current SR. + // This handles cases where the network path changes (e.g., server migration). + if pathChanged && pc.hasEpoch { + pc.ntpEpoch = nanosToTime(senderNtpNanos) + } + + // Record the NTP epoch from the first SR for this participant. + if !pc.hasEpoch { + pc.ntpEpoch = nanosToTime(senderNtpNanos) + pc.hasEpoch = true + } +} + +// GetSessionPTS maps an RTP timestamp for a participant's track to a position +// on the shared session timeline. +// +// The formula is: +// +// sessionPTS = ntpEstimator.RtpToNtp(rtpTS) - participantNtpEpoch + (epochOnReceiverClock - sessionStart) +// +// Where: +// - participantNtpEpoch = NTP time from first SR for this participant +// - epochOnReceiverClock = participantNtpEpoch + estimatedOWD +// - sessionStart = wall-clock time first packet arrived +func (st *SessionTimeline) GetSessionPTS(identity, trackID string, rtpTimestamp uint32) (time.Duration, error) { + st.mu.RLock() + defer st.mu.RUnlock() + + pc, ok := st.participants[identity] + if !ok { + return 0, fmt.Errorf("SessionTimeline: unknown participant %q", identity) + } + + pt, ok := pc.tracks[trackID] + if !ok { + return 0, errNoSenderReports + } + + if !pt.estimator.IsReady() { + return 0, errNotReady + } + + if !pc.hasEpoch { + return 0, errNoSenderReports + } + + // Map RTP to NTP wall-clock time. + ntpTime, err := pt.estimator.RtpToNtp(rtpTimestamp) + if err != nil { + return 0, err + } + + // Compute offset from participant's NTP epoch. + sinceEpoch := ntpTime.Sub(pc.ntpEpoch) + + // Map the participant's NTP epoch to the receiver's clock. + estimatedOWD := time.Duration(pc.owdEstimator.EstimatedPropagationDelay()) + epochOnReceiverClock := pc.ntpEpoch.Add(estimatedOWD) + + // Compute the session PTS. + sessionPTS := sinceEpoch + epochOnReceiverClock.Sub(st.sessionStart) + + return sessionPTS, nil +} diff --git a/pkg/synchronizer/sessiontimeline_test.go b/pkg/synchronizer/sessiontimeline_test.go new file mode 100644 index 00000000..6e083cba --- /dev/null +++ b/pkg/synchronizer/sessiontimeline_test.go @@ -0,0 +1,223 @@ +// Copyright 2026 LiveKit, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package synchronizer + +import ( + "testing" + "time" + + "github.com/stretchr/testify/require" +) + +func TestSessionTimeline_SingleParticipant(t *testing.T) { + // One participant with 50ms OWD, feed 5 SRs, verify PTS at 10s is ~10s. + const ( + clockRate = 90000 + owd = 50 * time.Millisecond + identity = "alice" + trackID = "audio-1" + ) + + st := NewSessionTimeline() + + // Session starts at a fixed wall-clock time. + sessionStart := time.Date(2025, 6, 1, 12, 0, 0, 0, time.UTC) + st.SetSessionStart(sessionStart) + + st.AddParticipant(identity) + + // The participant's NTP clock is offset from wall clock by OWD. + // senderNtpTime + OWD = receivedAt (approximately). + // So receivedAt = senderNtpTime + OWD. + baseNTP := sessionStart // participant's NTP epoch starts at sessionStart + for i := 0; i < 5; i++ { + senderNTP := baseNTP.Add(time.Duration(i) * 2 * time.Second) + rtpTS := uint32(i) * 2 * clockRate + receivedAt := senderNTP.Add(owd) + st.OnSenderReport(identity, trackID, clockRate, ntpToUint64(senderNTP), rtpTS, receivedAt) + } + + // Query PTS at RTP timestamp corresponding to 10s into the stream. + rtpAt10s := uint32(10 * clockRate) + pts, err := st.GetSessionPTS(identity, trackID, rtpAt10s) + require.NoError(t, err) + + // Expected: ~10s on the session timeline. + diff := pts - 10*time.Second + if diff < 0 { + diff = -diff + } + require.Less(t, diff, 100*time.Millisecond, + "PTS at 10s should be ~10s, got %v (diff %v)", pts, diff) +} + +func TestSessionTimeline_CrossParticipantAlignment(t *testing.T) { + // Two participants with different OWDs (50ms and 200ms), both producing + // media at the same real-world time. The SessionTimeline maps each + // participant's NTP clock domain onto the receiver's clock using OWD. + // + // Because both start producing at the same real-world time but have + // different network path delays, the receiver-clock-based timeline + // correctly reflects the OWD difference: bob's media arrives 150ms + // later than alice's for the same production instant. + // + // Additionally, we verify that NTP clock offset differences between + // participants are properly normalized via the OWD mapping: if bob's + // NTP clock is offset by +500ms relative to alice's, the OWD estimator + // absorbs this, and the session PTS still reflects the real receiver-clock + // arrival times. + const ( + clockRate = 90000 + owd1 = 50 * time.Millisecond // alice's real network delay + owd2 = 50 * time.Millisecond // bob's real network delay (same) + ) + + // Bob's NTP clock is offset by 500ms relative to alice's (different NTP servers). + bobNTPOffset := 500 * time.Millisecond + + st := NewSessionTimeline() + sessionStart := time.Date(2025, 6, 1, 12, 0, 0, 0, time.UTC) + st.SetSessionStart(sessionStart) + + st.AddParticipant("alice") + st.AddParticipant("bob") + + // Both participants start producing at the same real-world time. + // Alice's NTP clock = real-world time. + // Bob's NTP clock = real-world time + 500ms (NTP offset). + // Both have the same real OWD of 50ms, so: + // receivedAt = realWorldTime + owd + // aliceNTP = realWorldTime + // bobNTP = realWorldTime + 500ms + // OWD as seen by estimator: + // alice: receivedAt - aliceNTP = owd = 50ms + // bob: receivedAt - bobNTP = owd - 500ms = -450ms (negative! but the estimator handles this) + // + // Actually, OWD = receivedAt - senderNTP. For bob: + // receivedAt = realWorldTime + 50ms + // senderNTP = realWorldTime + 500ms + // observed OWD = (realWorldTime + 50ms) - (realWorldTime + 500ms) = -450ms + // + // This negative OWD is fine - it just means bob's NTP clock is ahead of + // the receiver's clock by more than the real OWD. The formula still works + // because: ntpTime + OWD - sessionStart = (realWorldTime + 500ms) + (-450ms) - sessionStart + // = realWorldTime + 50ms - sessionStart + // Which matches alice's: realWorldTime + 50ms - sessionStart + + for i := 0; i < 5; i++ { + realTime := sessionStart.Add(time.Duration(i) * 2 * time.Second) + rtpTS := uint32(i) * 2 * clockRate + receivedAt := realTime.Add(owd1) + + aliceNTP := realTime // alice NTP = real time + st.OnSenderReport("alice", "audio-a", clockRate, ntpToUint64(aliceNTP), rtpTS, receivedAt) + + bobNTP := realTime.Add(bobNTPOffset) // bob NTP = real time + offset + bobRecv := realTime.Add(owd2) + st.OnSenderReport("bob", "audio-b", clockRate, ntpToUint64(bobNTP), rtpTS, bobRecv) + } + + // Both participants produce a frame at RTP timestamp corresponding to 5s. + rtpAt5s := uint32(5 * clockRate) + + alicePTS, err := st.GetSessionPTS("alice", "audio-a", rtpAt5s) + require.NoError(t, err) + + bobPTS, err := st.GetSessionPTS("bob", "audio-b", rtpAt5s) + require.NoError(t, err) + + // Despite bob's NTP clock being 500ms offset, the OWD-based mapping + // normalizes both to the receiver's clock domain. Their PTS values + // should be within a small tolerance. + diff := alicePTS - bobPTS + if diff < 0 { + diff = -diff + } + require.Less(t, diff, 50*time.Millisecond, + "cross-participant PTS should be aligned despite NTP clock offset; alice=%v bob=%v diff=%v", alicePTS, bobPTS, diff) +} + +func TestSessionTimeline_LateJoiner(t *testing.T) { + // One participant starts, 30s later another joins. + // Verify the late joiner's first frame maps to ~30s on the session timeline. + const ( + clockRate = 90000 + owd = 50 * time.Millisecond + ) + + st := NewSessionTimeline() + sessionStart := time.Date(2025, 6, 1, 12, 0, 0, 0, time.UTC) + st.SetSessionStart(sessionStart) + + // Alice joins at session start. + st.AddParticipant("alice") + aliceBaseNTP := sessionStart + for i := 0; i < 5; i++ { + aliceNTP := aliceBaseNTP.Add(time.Duration(i) * 2 * time.Second) + aliceRTP := uint32(i) * 2 * clockRate + aliceRecv := aliceNTP.Add(owd) + st.OnSenderReport("alice", "audio-a", clockRate, ntpToUint64(aliceNTP), aliceRTP, aliceRecv) + } + + // Bob joins 30s later. + st.AddParticipant("bob") + bobBaseNTP := sessionStart.Add(30 * time.Second) + for i := 0; i < 5; i++ { + bobNTP := bobBaseNTP.Add(time.Duration(i) * 2 * time.Second) + bobRTP := uint32(i) * 2 * clockRate + bobRecv := bobNTP.Add(owd) + st.OnSenderReport("bob", "audio-b", clockRate, ntpToUint64(bobNTP), bobRTP, bobRecv) + } + + // Bob's first frame (RTP=0) should map to ~30s on session timeline. + bobPTS, err := st.GetSessionPTS("bob", "audio-b", 0) + require.NoError(t, err) + + diff := bobPTS - 30*time.Second + if diff < 0 { + diff = -diff + } + require.Less(t, diff, 100*time.Millisecond, + "late joiner's first frame should be at ~30s; got %v (diff %v)", bobPTS, diff) + + // Alice's first frame should be at ~0s. + alicePTS, err := st.GetSessionPTS("alice", "audio-a", 0) + require.NoError(t, err) + + diff = alicePTS + if diff < 0 { + diff = -diff + } + require.Less(t, diff, 100*time.Millisecond, + "first participant's first frame should be at ~0s; got %v", alicePTS) +} + +func TestSessionTimeline_FallbackBeforeSRs(t *testing.T) { + // Verify error when no SRs received. + st := NewSessionTimeline() + sessionStart := time.Date(2025, 6, 1, 12, 0, 0, 0, time.UTC) + st.SetSessionStart(sessionStart) + + st.AddParticipant("alice") + + // No SRs have been received: should return error. + _, err := st.GetSessionPTS("alice", "audio-a", 1000) + require.Error(t, err) + require.ErrorIs(t, err, errNoSenderReports) + + // Unknown participant should also error. + _, err = st.GetSessionPTS("unknown", "track-x", 1000) + require.Error(t, err) +} diff --git a/pkg/synchronizer/syncengine.go b/pkg/synchronizer/syncengine.go new file mode 100644 index 00000000..4665e631 --- /dev/null +++ b/pkg/synchronizer/syncengine.go @@ -0,0 +1,492 @@ +// Copyright 2026 LiveKit, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package synchronizer + +import ( + "io" + "sync" + "sync/atomic" + "time" + + "github.com/pion/rtcp" + "github.com/pion/webrtc/v4" + + "github.com/livekit/media-sdk/jitter" + + "github.com/livekit/protocol/utils/rtputil" +) + +const ( + // transitionSlewRatePerSecond is the rate at which the wall-clock→NTP + // transition correction is absorbed: 5ms per second of real time. + transitionSlewRatePerSecond = 5 * time.Millisecond + + // wallClockSanityThreshold is the maximum divergence between RTP-derived PTS + // and wall-clock PTS before falling back to wall clock. + wallClockSanityThreshold = 5 * time.Second +) + +// SyncEngineOption configures a SyncEngine. +type SyncEngineOption func(*SyncEngine) + +// WithSyncEngineOnStarted sets a callback invoked once the first track is initialized. +func WithSyncEngineOnStarted(f func()) SyncEngineOption { + return func(e *SyncEngine) { + e.onStarted = f + } +} + +// WithSyncEngineStartGate enables the burst-estimation start gate on all tracks. +func WithSyncEngineStartGate() SyncEngineOption { + return func(e *SyncEngine) { + e.enableStartGate = true + } +} + +// SyncEngine orchestrates NtpEstimator, ParticipantSync, and SessionTimeline +// to provide cross-participant alignment and per-participant A/V lip sync. +// It implements the Sync interface. +type SyncEngine struct { + mu sync.Mutex + timeline *SessionTimeline + tracks map[uint32]*syncEngineTrack // keyed by SSRC + trackIDs map[string]*syncEngineTrack // keyed by track ID + + startedAt atomic.Int64 + endedAt atomic.Int64 + + enableStartGate bool + onStarted func() + + mediaRunningTime func() (time.Duration, bool) + mediaRunningTimeLock sync.RWMutex +} + +// NewSyncEngine creates a new SyncEngine with the given options. +func NewSyncEngine(opts ...SyncEngineOption) *SyncEngine { + e := &SyncEngine{ + timeline: NewSessionTimeline(), + tracks: make(map[uint32]*syncEngineTrack), + trackIDs: make(map[string]*syncEngineTrack), + } + for _, opt := range opts { + opt(e) + } + return e +} + +// AddTrack registers a new track and returns a TrackSync handle. +func (e *SyncEngine) AddTrack(track TrackRemote, identity string) TrackSync { + ssrc := uint32(track.SSRC()) + clockRate := track.Codec().ClockRate + + e.mu.Lock() + defer e.mu.Unlock() + + // Ensure the participant exists in the timeline. + e.timeline.mu.Lock() + pc, ok := e.timeline.participants[identity] + if !ok { + e.timeline.mu.Unlock() + pc = e.timeline.AddParticipant(identity) + e.timeline.mu.Lock() + } + + // Auto-register the track with ParticipantSync using a placeholder estimator. + mt := MediaTypeAudio + if track.Kind() == webrtc.RTPCodecTypeVideo { + mt = MediaTypeVideo + } + placeholder := NewNtpEstimator(clockRate) + pc.participantSync.SetTrackEstimator(track.ID(), mt, placeholder) + e.timeline.mu.Unlock() + + st := &syncEngineTrack{ + engine: e, + track: track, + identity: identity, + converter: rtputil.NewRTPConverter(int64(clockRate)), + } + + if e.enableStartGate { + st.startGate = newStartGate(clockRate, track.Kind(), nil) + } + + e.tracks[ssrc] = st + e.trackIDs[track.ID()] = st + + return st +} + +// RemoveTrack removes a track by track ID. +func (e *SyncEngine) RemoveTrack(trackID string) { + e.mu.Lock() + st, ok := e.trackIDs[trackID] + if !ok { + e.mu.Unlock() + return + } + ssrc := uint32(st.track.SSRC()) + delete(e.tracks, ssrc) + delete(e.trackIDs, trackID) + e.mu.Unlock() + + st.Close() +} + +// OnRTCP processes an RTCP packet, dispatching sender reports to the appropriate +// track's NTP estimator and ParticipantSync. +func (e *SyncEngine) OnRTCP(packet rtcp.Packet) { + sr, ok := packet.(*rtcp.SenderReport) + if !ok { + return + } + + e.mu.Lock() + st, ok := e.tracks[sr.SSRC] + if !ok { + e.mu.Unlock() + return + } + identity := st.identity + trackID := st.track.ID() + clockRate := st.track.Codec().ClockRate + e.mu.Unlock() + + now := time.Now() + + // Feed the SR to the session timeline (updates NTP estimator + OWD). + e.timeline.OnSenderReport(identity, trackID, clockRate, sr.NTPTime, sr.RTPTime, now) + + // Wire up ParticipantSync: get the track's estimator from timeline and update it. + e.timeline.mu.RLock() + pc, pcOK := e.timeline.participants[identity] + if pcOK { + pt, ptOK := pc.tracks[trackID] + if ptOK { + mt := MediaTypeAudio + if st.track.Kind() == webrtc.RTPCodecTypeVideo { + mt = MediaTypeVideo + } + pc.participantSync.SetTrackEstimator(trackID, mt, pt.estimator) + pc.participantSync.OnSenderReport(trackID) + + // Compute elapsed session time for slew limiting. + startedAt := e.startedAt.Load() + if startedAt > 0 { + elapsed := time.Duration(now.UnixNano() - startedAt) + pc.participantSync.updateAdjustments(elapsed) + } + } + } + e.timeline.mu.RUnlock() + + // Call onSR callback if set. + st.mu.Lock() + onSR := st.onSR + st.mu.Unlock() + + if onSR != nil { + // Compute drift as the difference between NTP-derived time and wall clock elapsed. + ntpNanos := ntpTimestampToNanos(sr.NTPTime) + ntpTime := nanosToTime(ntpNanos) + startedAt := e.startedAt.Load() + if startedAt > 0 { + sessionStart := time.Unix(0, startedAt) + expectedElapsed := now.Sub(sessionStart) + ntpElapsed := ntpTime.Sub(sessionStart) + drift := ntpElapsed - expectedElapsed + onSR(drift) + } + } +} + +// End signals the end of the session and sets drain ceilings on all tracks. +func (e *SyncEngine) End() { + e.mu.Lock() + defer e.mu.Unlock() + + // Find the maximum adjusted PTS across all tracks. + var maxPTS time.Duration + for _, st := range e.tracks { + st.mu.Lock() + if st.lastPTSAdjusted > maxPTS { + maxPTS = st.lastPTSAdjusted + } + st.mu.Unlock() + } + + startedAt := e.startedAt.Load() + if startedAt > 0 { + e.endedAt.Store(startedAt + int64(maxPTS)) + } else { + e.endedAt.Store(time.Now().UnixNano()) + } + + // Set drain ceiling on all tracks. + for _, st := range e.tracks { + st.mu.Lock() + st.maxPTS = maxPTS + st.maxPTSSet = true + st.mu.Unlock() + } +} + +// GetStartedAt returns the start timestamp in nanoseconds, or 0 if not started. +func (e *SyncEngine) GetStartedAt() int64 { + return e.startedAt.Load() +} + +// GetEndedAt returns the end timestamp in nanoseconds, or 0 if not ended. +func (e *SyncEngine) GetEndedAt() int64 { + return e.endedAt.Load() +} + +// SetMediaRunningTime sets the external media running time provider. +func (e *SyncEngine) SetMediaRunningTime(mediaRunningTime func() (time.Duration, bool)) { + e.mediaRunningTimeLock.Lock() + e.mediaRunningTime = mediaRunningTime + e.mediaRunningTimeLock.Unlock() +} + +// initializeIfNeeded sets the session start time and fires the onStarted callback +// on the first track initialization. Returns the startedAt value. +func (e *SyncEngine) initializeIfNeeded(receivedAt time.Time) int64 { + nano := receivedAt.UnixNano() + if e.startedAt.CompareAndSwap(0, nano) { + e.timeline.SetSessionStart(receivedAt) + if e.onStarted != nil { + e.onStarted() + } + } + return e.startedAt.Load() +} + +// --- syncEngineTrack --- + +// syncEngineTrack implements TrackSync for a single track within a SyncEngine. +type syncEngineTrack struct { + engine *SyncEngine + track TrackRemote + identity string + converter *rtputil.RTPConverter + startGate startGate // from start_gate.go, nil if not enabled + + mu sync.Mutex + startTime time.Time + lastTS uint32 + lastPTS time.Duration + lastPTSAdjusted time.Duration + initialized bool + closed bool + + // NTP transition + ntpTransitioned bool + transitionSlew time.Duration + lastSlewTime time.Time // wall-clock time of last slew step + + // drain + maxPTS time.Duration + maxPTSSet bool + + onSR func(drift time.Duration) +} + +// PrimeForStart implements TrackSync. It buffers packets through the optional +// start gate and initializes the track on the first valid packet. +func (st *syncEngineTrack) PrimeForStart(pkt jitter.ExtPacket) ([]jitter.ExtPacket, int, bool) { + st.mu.Lock() + defer st.mu.Unlock() + + if st.initialized || st.startGate == nil { + if !st.initialized { + st.initializeLocked(pkt) + } + return []jitter.ExtPacket{pkt}, 0, true + } + + ready, dropped, done := st.startGate.Push(pkt) + if !done { + return nil, dropped, false + } + + if len(ready) == 0 { + ready = []jitter.ExtPacket{pkt} + } + + if !st.initialized { + st.initializeLocked(ready[0]) + } + + return ready, dropped, true +} + +// initializeLocked sets the track's start time and registers with the engine. +// Caller must hold st.mu. +func (st *syncEngineTrack) initializeLocked(pkt jitter.ExtPacket) { + receivedAt := pkt.ReceivedAt + if receivedAt.IsZero() { + receivedAt = time.Now() + } + + st.startTime = receivedAt + st.lastTS = pkt.Timestamp + st.initialized = true + + // Initialize the engine's session start time. + st.engine.initializeIfNeeded(receivedAt) +} + +// GetPTS implements TrackSync. It computes the presentation timestamp for a packet +// using the NTP-grounded timeline when available, falling back to wall clock otherwise. +func (st *syncEngineTrack) GetPTS(pkt jitter.ExtPacket) (time.Duration, error) { + st.mu.Lock() + defer st.mu.Unlock() + + if st.closed { + return 0, io.EOF + } + + if !st.initialized { + st.initializeLocked(pkt) + } + + ts := pkt.Timestamp + + // Step 1: Try NTP-grounded PTS from SessionTimeline. + ntpPTS, ntpErr := st.engine.timeline.GetSessionPTS(st.identity, st.track.ID(), ts) + + var pts time.Duration + if ntpErr != nil { + // Step 2: Fall back to wall-clock PTS. + pts = st.wallClockPTS(pkt) + } else { + // Step 3: On first successful NTP PTS, compute transition correction. + if !st.ntpTransitioned { + wallPTS := st.wallClockPTS(pkt) + st.transitionSlew = wallPTS - ntpPTS + st.ntpTransitioned = true + } + pts = ntpPTS + } + + // Step 4: Apply transition slew (absorb gradually toward zero, time-based). + if st.transitionSlew != 0 { + pts += st.transitionSlew + + now := pkt.ReceivedAt + if now.IsZero() { + now = time.Now() + } + if !st.lastSlewTime.IsZero() { + elapsed := now.Sub(st.lastSlewTime) + maxStep := time.Duration(float64(transitionSlewRatePerSecond) * elapsed.Seconds()) + if maxStep > 0 { + if st.transitionSlew > 0 { + st.transitionSlew -= maxStep + if st.transitionSlew < 0 { + st.transitionSlew = 0 + } + } else { + st.transitionSlew += maxStep + if st.transitionSlew > 0 { + st.transitionSlew = 0 + } + } + } + } + st.lastSlewTime = now + } + + // Step 5: Apply ParticipantSync A/V adjustment. + st.engine.timeline.mu.RLock() + if pc, ok := st.engine.timeline.participants[st.identity]; ok { + adj := pc.participantSync.GetAdjustment(st.track.ID()) + pts += adj + } + st.engine.timeline.mu.RUnlock() + + // Step 6: Enforce monotonicity. + if pts < st.lastPTSAdjusted+time.Millisecond && st.lastPTSAdjusted > 0 { + pts = st.lastPTSAdjusted + time.Millisecond + } + + // Step 7: Enforce drain ceiling. + if st.maxPTSSet && pts > st.maxPTS { + return 0, io.EOF + } + + // Update state. + st.lastTS = ts + st.lastPTS = pts // the raw PTS before adjustment (for wall clock computation) + st.lastPTSAdjusted = pts + + return pts, nil +} + +// wallClockPTS computes a PTS based on wall-clock timing and RTP deltas. +func (st *syncEngineTrack) wallClockPTS(pkt jitter.ExtPacket) time.Duration { + ts := pkt.Timestamp + + // Same RTP timestamp as last packet: same frame. + if st.lastTS == ts && st.lastPTS > 0 { + return st.lastPTS + } + + // Wall-clock elapsed. + wallElapsed := pkt.ReceivedAt.Sub(st.startTime) + + // If we have a previous timestamp, use RTP delta for more precision. + if st.lastPTS > 0 { + rtpDelta := ts - st.lastTS + rtpDerived := st.lastPTS + st.converter.ToDuration(rtpDelta) + + // Sanity check: if RTP-derived PTS diverges from wall-clock by > 5s, use wall clock. + diff := rtpDerived - wallElapsed + if diff < 0 { + diff = -diff + } + if diff <= wallClockSanityThreshold { + return rtpDerived + } + } + + // Use wall-clock elapsed, ensuring non-negative. + if wallElapsed < 0 { + wallElapsed = 0 + } + return wallElapsed +} + +// OnSenderReport implements TrackSync. It stores a callback invoked on sender reports. +func (st *syncEngineTrack) OnSenderReport(f func(drift time.Duration)) { + st.mu.Lock() + defer st.mu.Unlock() + st.onSR = f +} + +// LastPTSAdjusted implements TrackSync. +func (st *syncEngineTrack) LastPTSAdjusted() time.Duration { + st.mu.Lock() + defer st.mu.Unlock() + return st.lastPTSAdjusted +} + +// Close implements TrackSync. +func (st *syncEngineTrack) Close() { + st.mu.Lock() + defer st.mu.Unlock() + st.closed = true +} diff --git a/pkg/synchronizer/syncengine_test.go b/pkg/synchronizer/syncengine_test.go new file mode 100644 index 00000000..7cf317c7 --- /dev/null +++ b/pkg/synchronizer/syncengine_test.go @@ -0,0 +1,197 @@ +// Copyright 2026 LiveKit, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package synchronizer + +import ( + "testing" + "time" + + "github.com/pion/rtcp" + "github.com/pion/rtp" + "github.com/pion/webrtc/v4" + "github.com/stretchr/testify/require" + + "github.com/livekit/media-sdk/jitter" +) + +// --- Test helpers --- + +type mockTrackRemote struct { + id string + codec webrtc.RTPCodecParameters + kind webrtc.RTPCodecType + ssrc webrtc.SSRC +} + +func (m *mockTrackRemote) ID() string { return m.id } +func (m *mockTrackRemote) Codec() webrtc.RTPCodecParameters { return m.codec } +func (m *mockTrackRemote) Kind() webrtc.RTPCodecType { return m.kind } +func (m *mockTrackRemote) SSRC() webrtc.SSRC { return m.ssrc } + +func newMockAudioTrack(id string, ssrc uint32) *mockTrackRemote { + return &mockTrackRemote{ + id: id, + codec: webrtc.RTPCodecParameters{RTPCodecCapability: webrtc.RTPCodecCapability{ClockRate: 48000}}, + kind: webrtc.RTPCodecTypeAudio, + ssrc: webrtc.SSRC(ssrc), + } +} + +func newMockVideoTrack(id string, ssrc uint32) *mockTrackRemote { + return &mockTrackRemote{ + id: id, + codec: webrtc.RTPCodecParameters{RTPCodecCapability: webrtc.RTPCodecCapability{ClockRate: 90000}}, + kind: webrtc.RTPCodecTypeVideo, + ssrc: webrtc.SSRC(ssrc), + } +} + +func makeExtPacket(ts uint32, sn uint16, receivedAt time.Time) jitter.ExtPacket { + return jitter.ExtPacket{ + ReceivedAt: receivedAt, + Packet: &rtp.Packet{Header: rtp.Header{Timestamp: ts, SequenceNumber: sn}}, + } +} + +// --- Tests --- + +func TestSyncEngine_ImplementsSyncInterface(t *testing.T) { + // Compile-time check that SyncEngine implements Sync. + var _ Sync = (*SyncEngine)(nil) +} + +func TestSyncEngine_FallbackToWallClockBeforeSRs(t *testing.T) { + engine := NewSyncEngine() + + track := newMockAudioTrack("audio-1", 1000) + ts := engine.AddTrack(track, "alice") + + now := time.Now() + + // Prime the track with the first packet. + pkt0 := makeExtPacket(0, 0, now) + _, _, done := ts.PrimeForStart(pkt0) + require.True(t, done, "without start gate, track should be ready immediately") + + // Get PTS for first packet (same as prime packet). + pts0, err := ts.GetPTS(pkt0) + require.NoError(t, err) + require.GreaterOrEqual(t, int64(pts0), int64(0), "first PTS should be >= 0") + + // Second packet 100ms later. + pkt1 := makeExtPacket(4800, 1, now.Add(100*time.Millisecond)) + pts1, err := ts.GetPTS(pkt1) + require.NoError(t, err) + require.Greater(t, int64(pts1), int64(0), "second packet PTS should be > 0") + require.Greater(t, int64(pts1), int64(pts0), "PTS should advance") +} + +func TestSyncEngine_TransitionsToNTPAfterSRs(t *testing.T) { + engine := NewSyncEngine() + + track := newMockAudioTrack("audio-1", 1000) + ts := engine.AddTrack(track, "alice") + + now := time.Now() + + // Prime and get initial wall-clock PTS. + pkt0 := makeExtPacket(0, 0, now) + ts.PrimeForStart(pkt0) + pts0, err := ts.GetPTS(pkt0) + require.NoError(t, err) + + // Get a wall-clock PTS at 500ms. + pkt1 := makeExtPacket(24000, 1, now.Add(500*time.Millisecond)) + pts1, err := ts.GetPTS(pkt1) + require.NoError(t, err) + require.Greater(t, int64(pts1), int64(pts0)) + + // Feed 3 sender reports to make NTP estimator ready. + for i := 0; i < 3; i++ { + srTime := now.Add(time.Duration(i) * time.Second) + rtpTS := uint32(i) * 48000 + ntpTime := ntpToUint64(srTime) + sr := makeSenderReport(1000, ntpTime, rtpTS) + engine.OnRTCP(sr) + } + + // Get PTS after NTP transition - should still be valid and advancing. + pkt2 := makeExtPacket(48000, 2, now.Add(time.Second)) + pts2, err := ts.GetPTS(pkt2) + require.NoError(t, err) + require.Greater(t, int64(pts2), int64(pts1), "PTS should continue to advance after NTP transition") +} + +func TestSyncEngine_MonotonicPTS(t *testing.T) { + engine := NewSyncEngine() + + track := newMockAudioTrack("audio-1", 1000) + ts := engine.AddTrack(track, "alice") + + now := time.Now() + + // Prime with first packet. + pkt0 := makeExtPacket(0, 0, now) + ts.PrimeForStart(pkt0) + + var lastPTS time.Duration + for i := 0; i < 100; i++ { + recvAt := now.Add(time.Duration(i) * 20 * time.Millisecond) + rtpTS := uint32(i) * 960 // 20ms at 48kHz + pkt := makeExtPacket(rtpTS, uint16(i), recvAt) + pts, err := ts.GetPTS(pkt) + require.NoError(t, err) + require.GreaterOrEqual(t, int64(pts), int64(lastPTS), + "PTS must be monotonically non-decreasing: packet %d got %v, last was %v", i, pts, lastPTS) + lastPTS = pts + } +} + +func TestSyncEngine_EndDrain(t *testing.T) { + engine := NewSyncEngine() + + track := newMockAudioTrack("audio-1", 1000) + ts := engine.AddTrack(track, "alice") + + now := time.Now() + + // Prime and push some packets. + pkt0 := makeExtPacket(0, 0, now) + ts.PrimeForStart(pkt0) + ts.GetPTS(pkt0) + + for i := 1; i <= 10; i++ { + recvAt := now.Add(time.Duration(i) * 20 * time.Millisecond) + rtpTS := uint32(i) * 960 + pkt := makeExtPacket(rtpTS, uint16(i), recvAt) + ts.GetPTS(pkt) + } + + require.Equal(t, int64(0), engine.GetEndedAt(), "endedAt should be 0 before End()") + + engine.End() + + require.Greater(t, engine.GetEndedAt(), int64(0), "endedAt should be > 0 after End()") + require.Greater(t, engine.GetStartedAt(), int64(0), "startedAt should be > 0") +} + +// makeSenderReport creates an rtcp.SenderReport with the given fields. +func makeSenderReport(ssrc uint32, ntpTime uint64, rtpTime uint32) *rtcp.SenderReport { + return &rtcp.SenderReport{ + SSRC: ssrc, + NTPTime: ntpTime, + RTPTime: rtpTime, + } +} diff --git a/pkg/synchronizer/synchronizer.go b/pkg/synchronizer/synchronizer.go index 9227aa7d..3661391a 100644 --- a/pkg/synchronizer/synchronizer.go +++ b/pkg/synchronizer/synchronizer.go @@ -386,6 +386,22 @@ func (s *Synchronizer) GetEndedAt() int64 { return s.endedAt } +// SynchronizerAdapter wraps the legacy Synchronizer to implement the Sync interface. +// The Synchronizer's own AddTrack returns *TrackSynchronizer (concrete type); this +// adapter's AddTrack returns TrackSync so that *SynchronizerAdapter satisfies Sync. +type SynchronizerAdapter struct { + *Synchronizer +} + +func (a *SynchronizerAdapter) AddTrack(track TrackRemote, identity string) TrackSync { + return a.Synchronizer.AddTrack(track, identity) +} + +// AsSyncInterface returns a Sync-compatible wrapper around this Synchronizer. +func (s *Synchronizer) AsSyncInterface() Sync { + return &SynchronizerAdapter{Synchronizer: s} +} + func (s *Synchronizer) getExternalMediaDeadline() (time.Duration, bool) { s.RLock() startTime := s.externalMediaStartTime diff --git a/pkg/synchronizer/synchronizer_test.go b/pkg/synchronizer/synchronizer_test.go index a485989e..c5f40b6f 100644 --- a/pkg/synchronizer/synchronizer_test.go +++ b/pkg/synchronizer/synchronizer_test.go @@ -15,6 +15,10 @@ import ( "github.com/livekit/server-sdk-go/v2/pkg/synchronizer/synchronizerfakes" ) +// Compile-time interface checks +var _ synchronizer.Sync = (*synchronizer.SynchronizerAdapter)(nil) +var _ synchronizer.TrackSync = (*synchronizer.TrackSynchronizer)(nil) + const timeTolerance = time.Millisecond * 10 const fakeAudioTrackID = "audio-1" From d0be90809fe63ce36abe4b719aa982f5368c80de Mon Sep 17 00:00:00 2001 From: David Colburn Date: Fri, 24 Apr 2026 01:15:13 -0400 Subject: [PATCH 02/17] session offset for late joining tracks --- pkg/synchronizer/syncengine.go | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/pkg/synchronizer/syncengine.go b/pkg/synchronizer/syncengine.go index 4665e631..f3d937ba 100644 --- a/pkg/synchronizer/syncengine.go +++ b/pkg/synchronizer/syncengine.go @@ -286,6 +286,7 @@ type syncEngineTrack struct { mu sync.Mutex startTime time.Time + sessionOffset time.Duration // offset from session start to this track's start lastTS uint32 lastPTS time.Duration lastPTSAdjusted time.Duration @@ -346,7 +347,8 @@ func (st *syncEngineTrack) initializeLocked(pkt jitter.ExtPacket) { st.initialized = true // Initialize the engine's session start time. - st.engine.initializeIfNeeded(receivedAt) + sessionStart := st.engine.initializeIfNeeded(receivedAt) + st.sessionOffset = time.Duration(receivedAt.UnixNano() - sessionStart) } // GetPTS implements TrackSync. It computes the presentation timestamp for a packet @@ -445,8 +447,8 @@ func (st *syncEngineTrack) wallClockPTS(pkt jitter.ExtPacket) time.Duration { return st.lastPTS } - // Wall-clock elapsed. - wallElapsed := pkt.ReceivedAt.Sub(st.startTime) + // Wall-clock elapsed since this track started, plus session offset + wallElapsed := pkt.ReceivedAt.Sub(st.startTime) + st.sessionOffset // If we have a previous timestamp, use RTP delta for more precision. if st.lastPTS > 0 { From 3b6d0d229c6e54fe167d040bffe7da82720acbfa Mon Sep 17 00:00:00 2001 From: David Colburn Date: Fri, 24 Apr 2026 13:09:11 -0400 Subject: [PATCH 03/17] same frame pts fix --- pkg/synchronizer/syncengine.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pkg/synchronizer/syncengine.go b/pkg/synchronizer/syncengine.go index f3d937ba..8eb16e80 100644 --- a/pkg/synchronizer/syncengine.go +++ b/pkg/synchronizer/syncengine.go @@ -367,6 +367,11 @@ func (st *syncEngineTrack) GetPTS(pkt jitter.ExtPacket) (time.Duration, error) { ts := pkt.Timestamp + // Same RTP timestamp as last packet: return same PTS (same frame). + if ts == st.lastTS && st.lastPTSAdjusted > 0 { + return st.lastPTSAdjusted, nil + } + // Step 1: Try NTP-grounded PTS from SessionTimeline. ntpPTS, ntpErr := st.engine.timeline.GetSessionPTS(st.identity, st.track.ID(), ts) From 1e8a672918693d4531108c2e18b1b77c15ce43e6 Mon Sep 17 00:00:00 2001 From: David Colburn Date: Fri, 24 Apr 2026 13:10:46 -0400 Subject: [PATCH 04/17] track max pts of removed tracks --- pkg/synchronizer/syncengine.go | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/pkg/synchronizer/syncengine.go b/pkg/synchronizer/syncengine.go index 8eb16e80..8d1214e4 100644 --- a/pkg/synchronizer/syncengine.go +++ b/pkg/synchronizer/syncengine.go @@ -67,6 +67,9 @@ type SyncEngine struct { startedAt atomic.Int64 endedAt atomic.Int64 + // high-water mark for removed tracks, so End() includes their PTS + maxRemovedPTS time.Duration + enableStartGate bool onStarted func() @@ -138,6 +141,14 @@ func (e *SyncEngine) RemoveTrack(trackID string) { e.mu.Unlock() return } + + // Preserve removed track's PTS high-water mark so End() includes it. + st.mu.Lock() + if st.lastPTSAdjusted > e.maxRemovedPTS { + e.maxRemovedPTS = st.lastPTSAdjusted + } + st.mu.Unlock() + ssrc := uint32(st.track.SSRC()) delete(e.tracks, ssrc) delete(e.trackIDs, trackID) @@ -218,8 +229,8 @@ func (e *SyncEngine) End() { e.mu.Lock() defer e.mu.Unlock() - // Find the maximum adjusted PTS across all tracks. - var maxPTS time.Duration + // Start from the high-water mark of removed tracks. + maxPTS := e.maxRemovedPTS for _, st := range e.tracks { st.mu.Lock() if st.lastPTSAdjusted > maxPTS { From 9520f290dc12dfbd1ce08bf8b939cfe3cc92a2fe Mon Sep 17 00:00:00 2001 From: David Colburn Date: Fri, 24 Apr 2026 13:17:53 -0400 Subject: [PATCH 05/17] add old packet threshold --- pkg/synchronizer/syncengine.go | 30 +++++++++++++++++++++++++----- 1 file changed, 25 insertions(+), 5 deletions(-) diff --git a/pkg/synchronizer/syncengine.go b/pkg/synchronizer/syncengine.go index 8d1214e4..51f7186b 100644 --- a/pkg/synchronizer/syncengine.go +++ b/pkg/synchronizer/syncengine.go @@ -36,6 +36,9 @@ const ( // wallClockSanityThreshold is the maximum divergence between RTP-derived PTS // and wall-clock PTS before falling back to wall clock. wallClockSanityThreshold = 5 * time.Second + + // defaultOldPacketThreshold is the default age after which packets are dropped. + defaultOldPacketThreshold = 500 * time.Millisecond ) // SyncEngineOption configures a SyncEngine. @@ -55,6 +58,14 @@ func WithSyncEngineStartGate() SyncEngineOption { } } +// WithSyncEngineOldPacketThreshold sets the age after which packets are dropped. +// Zero disables the check. +func WithSyncEngineOldPacketThreshold(d time.Duration) SyncEngineOption { + return func(e *SyncEngine) { + e.oldPacketThreshold = d + } +} + // SyncEngine orchestrates NtpEstimator, ParticipantSync, and SessionTimeline // to provide cross-participant alignment and per-participant A/V lip sync. // It implements the Sync interface. @@ -70,8 +81,9 @@ type SyncEngine struct { // high-water mark for removed tracks, so End() includes their PTS maxRemovedPTS time.Duration - enableStartGate bool - onStarted func() + enableStartGate bool + oldPacketThreshold time.Duration + onStarted func() mediaRunningTime func() (time.Duration, bool) mediaRunningTimeLock sync.RWMutex @@ -80,9 +92,10 @@ type SyncEngine struct { // NewSyncEngine creates a new SyncEngine with the given options. func NewSyncEngine(opts ...SyncEngineOption) *SyncEngine { e := &SyncEngine{ - timeline: NewSessionTimeline(), - tracks: make(map[uint32]*syncEngineTrack), - trackIDs: make(map[string]*syncEngineTrack), + timeline: NewSessionTimeline(), + tracks: make(map[uint32]*syncEngineTrack), + trackIDs: make(map[string]*syncEngineTrack), + oldPacketThreshold: defaultOldPacketThreshold, } for _, opt := range opts { opt(e) @@ -383,6 +396,13 @@ func (st *syncEngineTrack) GetPTS(pkt jitter.ExtPacket) (time.Duration, error) { return st.lastPTSAdjusted, nil } + // Drop packets older than threshold. + if st.engine.oldPacketThreshold > 0 && !pkt.ReceivedAt.IsZero() { + if time.Since(pkt.ReceivedAt) > st.engine.oldPacketThreshold { + return 0, ErrPacketTooOld + } + } + // Step 1: Try NTP-grounded PTS from SessionTimeline. ntpPTS, ntpErr := st.engine.timeline.GetSessionPTS(st.identity, st.track.ID(), ts) From 9d6ea8645f5874c451eb4cc38c12f0af73cca8a1 Mon Sep 17 00:00:00 2001 From: David Colburn Date: Fri, 24 Apr 2026 13:24:28 -0400 Subject: [PATCH 06/17] pts-based slew --- pkg/synchronizer/syncengine.go | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/pkg/synchronizer/syncengine.go b/pkg/synchronizer/syncengine.go index 51f7186b..83fa8df3 100644 --- a/pkg/synchronizer/syncengine.go +++ b/pkg/synchronizer/syncengine.go @@ -320,7 +320,7 @@ type syncEngineTrack struct { // NTP transition ntpTransitioned bool transitionSlew time.Duration - lastSlewTime time.Time // wall-clock time of last slew step + lastSlewPTS time.Duration // PTS at which slew was last updated // drain maxPTS time.Duration @@ -420,18 +420,14 @@ func (st *syncEngineTrack) GetPTS(pkt jitter.ExtPacket) (time.Duration, error) { pts = ntpPTS } - // Step 4: Apply transition slew (absorb gradually toward zero, time-based). + // Step 4: Apply transition slew (absorb gradually toward zero, pts-based). if st.transitionSlew != 0 { pts += st.transitionSlew - now := pkt.ReceivedAt - if now.IsZero() { - now = time.Now() - } - if !st.lastSlewTime.IsZero() { - elapsed := now.Sub(st.lastSlewTime) - maxStep := time.Duration(float64(transitionSlewRatePerSecond) * elapsed.Seconds()) - if maxStep > 0 { + if st.lastSlewPTS > 0 { + ptsDelta := pts - st.lastSlewPTS + if ptsDelta > 0 { + maxStep := time.Duration(float64(transitionSlewRatePerSecond) * ptsDelta.Seconds()) if st.transitionSlew > 0 { st.transitionSlew -= maxStep if st.transitionSlew < 0 { @@ -445,7 +441,7 @@ func (st *syncEngineTrack) GetPTS(pkt jitter.ExtPacket) (time.Duration, error) { } } } - st.lastSlewTime = now + st.lastSlewPTS = pts } // Step 5: Apply ParticipantSync A/V adjustment. From bd4b2e5d162a4024af3c34103c1deb3632b3ae1b Mon Sep 17 00:00:00 2001 From: David Colburn Date: Fri, 24 Apr 2026 13:30:42 -0400 Subject: [PATCH 07/17] add media running time option --- pkg/synchronizer/integration_test.go | 4 +-- pkg/synchronizer/syncengine.go | 48 ++++++++++++++++++++++++++-- 2 files changed, 47 insertions(+), 5 deletions(-) diff --git a/pkg/synchronizer/integration_test.go b/pkg/synchronizer/integration_test.go index 9f5484b5..02207fba 100644 --- a/pkg/synchronizer/integration_test.go +++ b/pkg/synchronizer/integration_test.go @@ -45,7 +45,7 @@ func TestIntegration_CrossParticipantSync(t *testing.T) { bobNTPOffset = 500 * time.Millisecond ) - engine := NewSyncEngine() + engine := NewSyncEngine(WithSyncEngineOldPacketThreshold(0)) aliceTrack := newMockAudioTrack("audio-alice", 1000) bobTrack := newMockAudioTrack("audio-bob", 2000) @@ -157,7 +157,7 @@ func TestIntegration_AVLipSync(t *testing.T) { videoEncoderDelay = 80 * time.Millisecond ) - engine := NewSyncEngine() + engine := NewSyncEngine(WithSyncEngineOldPacketThreshold(0)) audioTrack := newMockAudioTrack("audio-alice", 1000) videoTrack := newMockVideoTrack("video-alice", 2000) diff --git a/pkg/synchronizer/syncengine.go b/pkg/synchronizer/syncengine.go index 83fa8df3..89a557e4 100644 --- a/pkg/synchronizer/syncengine.go +++ b/pkg/synchronizer/syncengine.go @@ -37,6 +37,10 @@ const ( // and wall-clock PTS before falling back to wall clock. wallClockSanityThreshold = 5 * time.Second + // maxTimelyPacketAge is how long a track can be behind the pipeline deadline + // before its PTS is force-corrected forward. + maxTimelyPacketAge = 10 * time.Second + // defaultOldPacketThreshold is the default age after which packets are dropped. defaultOldPacketThreshold = 500 * time.Millisecond ) @@ -66,6 +70,15 @@ func WithSyncEngineOldPacketThreshold(d time.Duration) SyncEngineOption { } } +// WithSyncEngineMediaRunningTime sets the initial media running time provider and max delay. +// If a track's PTS falls behind the deadline by more than maxDelay for >10s, PTS is force-corrected. +func WithSyncEngineMediaRunningTime(mediaRunningTime func() (time.Duration, bool), maxDelay time.Duration) SyncEngineOption { + return func(e *SyncEngine) { + e.mediaRunningTime = mediaRunningTime + e.maxMediaRunningTimeDelay = maxDelay + } +} + // SyncEngine orchestrates NtpEstimator, ParticipantSync, and SessionTimeline // to provide cross-participant alignment and per-participant A/V lip sync. // It implements the Sync interface. @@ -85,8 +98,9 @@ type SyncEngine struct { oldPacketThreshold time.Duration onStarted func() - mediaRunningTime func() (time.Duration, bool) - mediaRunningTimeLock sync.RWMutex + mediaRunningTime func() (time.Duration, bool) + maxMediaRunningTimeDelay time.Duration + mediaRunningTimeLock sync.RWMutex } // NewSyncEngine creates a new SyncEngine with the given options. @@ -285,6 +299,17 @@ func (e *SyncEngine) SetMediaRunningTime(mediaRunningTime func() (time.Duration, e.mediaRunningTimeLock.Unlock() } +// getMediaDeadline returns the current pipeline deadline, or false if unavailable. +func (e *SyncEngine) getMediaDeadline() (time.Duration, bool) { + e.mediaRunningTimeLock.RLock() + fn := e.mediaRunningTime + e.mediaRunningTimeLock.RUnlock() + if fn == nil { + return 0, false + } + return fn() +} + // initializeIfNeeded sets the session start time and fires the onStarted callback // on the first track initialization. Returns the startedAt value. func (e *SyncEngine) initializeIfNeeded(receivedAt time.Time) int64 { @@ -322,6 +347,9 @@ type syncEngineTrack struct { transitionSlew time.Duration lastSlewPTS time.Duration // PTS at which slew was last updated + // pipeline time feedback + lastTimelyPacket time.Time + // drain maxPTS time.Duration maxPTSSet bool @@ -368,6 +396,7 @@ func (st *syncEngineTrack) initializeLocked(pkt jitter.ExtPacket) { st.startTime = receivedAt st.lastTS = pkt.Timestamp + st.lastTimelyPacket = receivedAt st.initialized = true // Initialize the engine's session start time. @@ -452,7 +481,20 @@ func (st *syncEngineTrack) GetPTS(pkt jitter.ExtPacket) (time.Duration, error) { } st.engine.timeline.mu.RUnlock() - // Step 6: Enforce monotonicity. + // Step 6: Pipeline time feedback — if the track has fallen behind the + // pipeline's deadline for too long, force-correct PTS forward. + if deadline, ok := st.engine.getMediaDeadline(); ok && st.engine.maxMediaRunningTimeDelay > 0 { + limit := deadline - st.engine.maxMediaRunningTimeDelay + if pts < limit { + if time.Since(st.lastTimelyPacket) > maxTimelyPacketAge { + pts = deadline - st.engine.maxMediaRunningTimeDelay/2 + } + } else { + st.lastTimelyPacket = time.Now() + } + } + + // Step 7: Enforce monotonicity. if pts < st.lastPTSAdjusted+time.Millisecond && st.lastPTSAdjusted > 0 { pts = st.lastPTSAdjusted + time.Millisecond } From cda9da0b38c42c4657cc8fefd0bb75f417b04934 Mon Sep 17 00:00:00 2001 From: David Colburn Date: Fri, 24 Apr 2026 16:34:17 -0400 Subject: [PATCH 08/17] more fixes --- pkg/synchronizer/integration_test.go | 46 ++--- pkg/synchronizer/ntpestimator.go | 32 +++- pkg/synchronizer/participantsync.go | 166 ++---------------- pkg/synchronizer/participantsync_test.go | 149 ++-------------- pkg/synchronizer/sessiontimeline.go | 75 +++++++- pkg/synchronizer/syncengine.go | 209 ++++++++++++++++------- 6 files changed, 292 insertions(+), 385 deletions(-) diff --git a/pkg/synchronizer/integration_test.go b/pkg/synchronizer/integration_test.go index 02207fba..42889dab 100644 --- a/pkg/synchronizer/integration_test.go +++ b/pkg/synchronizer/integration_test.go @@ -81,30 +81,25 @@ func TestIntegration_CrossParticipantSync(t *testing.T) { // since OnRTCP uses time.Now(). We need deterministic timing. engine.timeline.OnSenderReport("alice", "audio-alice", clockRate, aliceNTP, rtpTS, receivedAt) - // Also process through OnRTCP-like path to update ParticipantSync. - // We mimic the SR processing that OnRTCP does for ParticipantSync wiring. - engine.timeline.mu.RLock() - if pc, ok := engine.timeline.participants["alice"]; ok { - if pt, ok := pc.tracks["audio-alice"]; ok { - pc.participantSync.SetTrackEstimator("audio-alice", MediaTypeAudio, pt.estimator) - pc.participantSync.OnSenderReport("audio-alice") + // Wire up ParticipantSync with the track's estimator. + if est := engine.timeline.GetTrackEstimator("alice", "audio-alice"); est != nil { + if ps := engine.timeline.GetParticipantSync("alice"); ps != nil { + ps.SetTrackEstimator("audio-alice", MediaTypeAudio, est) + ps.OnSenderReport("audio-alice") } } - engine.timeline.mu.RUnlock() _ = aliceSR // used above indirectly // Bob SR: NTP = realTime + 500ms (Bob's NTP clock is 500ms ahead) bobNTP := ntpToUint64(realTime.Add(bobNTPOffset)) engine.timeline.OnSenderReport("bob", "audio-bob", clockRate, bobNTP, rtpTS, receivedAt) - engine.timeline.mu.RLock() - if pc, ok := engine.timeline.participants["bob"]; ok { - if pt, ok := pc.tracks["audio-bob"]; ok { - pc.participantSync.SetTrackEstimator("audio-bob", MediaTypeAudio, pt.estimator) - pc.participantSync.OnSenderReport("audio-bob") + if est := engine.timeline.GetTrackEstimator("bob", "audio-bob"); est != nil { + if ps := engine.timeline.GetParticipantSync("bob"); ps != nil { + ps.SetTrackEstimator("audio-bob", MediaTypeAudio, est) + ps.OnSenderReport("audio-bob") } } - engine.timeline.mu.RUnlock() } // Get PTS for both participants at "real time + 10s" with corresponding @@ -191,26 +186,15 @@ func TestIntegration_AVLipSync(t *testing.T) { videoNTP := ntpToUint64(srTime.Add(videoEncoderDelay)) engine.timeline.OnSenderReport("alice", "video-alice", videoClockRate, videoNTP, videoRTP, receivedAt) - // Wire up ParticipantSync with latest estimators and trigger SR processing. - engine.timeline.mu.RLock() - if pc, ok := engine.timeline.participants["alice"]; ok { - if pt, ok := pc.tracks["audio-alice"]; ok { - pc.participantSync.SetTrackEstimator("audio-alice", MediaTypeAudio, pt.estimator) - pc.participantSync.OnSenderReport("audio-alice") + // Wire up ParticipantSync with latest estimators. + if ps := engine.timeline.GetParticipantSync("alice"); ps != nil { + if est := engine.timeline.GetTrackEstimator("alice", "audio-alice"); est != nil { + ps.SetTrackEstimator("audio-alice", MediaTypeAudio, est) } - if pt, ok := pc.tracks["video-alice"]; ok { - pc.participantSync.SetTrackEstimator("video-alice", MediaTypeVideo, pt.estimator) - pc.participantSync.OnSenderReport("video-alice") - } - - // Drive slew adjustments with elapsed session time. - startedAt := engine.startedAt.Load() - if startedAt > 0 { - elapsed := time.Duration(receivedAt.UnixNano() - startedAt) - pc.participantSync.updateAdjustments(elapsed) + if est := engine.timeline.GetTrackEstimator("alice", "video-alice"); est != nil { + ps.SetTrackEstimator("video-alice", MediaTypeVideo, est) } } - engine.timeline.mu.RUnlock() } // Push multiple packets through GetPTS to drive the transition slew diff --git a/pkg/synchronizer/ntpestimator.go b/pkg/synchronizer/ntpestimator.go index 4a489a9a..753dd27d 100644 --- a/pkg/synchronizer/ntpestimator.go +++ b/pkg/synchronizer/ntpestimator.go @@ -17,6 +17,7 @@ package synchronizer import ( "errors" "math" + "sync" "time" ) @@ -46,11 +47,9 @@ type srSample struct { // sender report pairs to map RTP timestamps to NTP time. It is modeled after // Chrome's RtpToNtpEstimator. type NtpEstimator struct { + mu sync.Mutex clockRate uint32 - // Circular buffer of SR samples. These fields are unexported but - // package-accessible so that ParticipantSync (same package) can read - // the most recent sample. samples [maxSRSamples]srSample sampleLen int // number of valid samples in the buffer (0..maxSRSamples) sampleHead int // index of the next write position @@ -77,11 +76,32 @@ func NewNtpEstimator(clockRate uint32) *NtpEstimator { } } +// Reset clears all state, returning the estimator to its initial condition. +// Used when a stream discontinuity is detected (e.g., stream restart with a new +// RTP offset) and the old regression is no longer valid. +func (e *NtpEstimator) Reset() { + e.mu.Lock() + defer e.mu.Unlock() + e.samples = [maxSRSamples]srSample{} + e.sampleLen = 0 + e.sampleHead = 0 + e.lastRTP = 0 + e.rtpOffset = 0 + e.hasLastRTP = false + e.slopeNanos = 0 + e.meanX = 0 + e.meanY = 0 + e.residStd = 0 + e.ready = false +} + // OnSenderReport ingests a new RTCP sender report observation. // ntpTime is the 64-bit NTP timestamp from the SR, rtpTimestamp is the // corresponding RTP timestamp, and receivedAt is the local wall-clock time // when the SR was received. func (e *NtpEstimator) OnSenderReport(ntpTime uint64, rtpTimestamp uint32, receivedAt time.Time) { + e.mu.Lock() + defer e.mu.Unlock() ntpNanos := ntpTimestampToNanos(ntpTime) unwrapped := e.unwrapRTP(rtpTimestamp) @@ -118,11 +138,15 @@ func (e *NtpEstimator) OnSenderReport(ntpTime uint64, rtpTimestamp uint32, recei // IsReady returns true once at least 2 sender reports have been processed // and the regression is valid. func (e *NtpEstimator) IsReady() bool { + e.mu.Lock() + defer e.mu.Unlock() return e.ready } // RtpToNtp maps an RTP timestamp to wall-clock time using the current regression. func (e *NtpEstimator) RtpToNtp(rtpTimestamp uint32) (time.Time, error) { + e.mu.Lock() + defer e.mu.Unlock() if !e.ready { return time.Time{}, errNotReady } @@ -135,6 +159,8 @@ func (e *NtpEstimator) RtpToNtp(rtpTimestamp uint32) (time.Time, error) { // Slope returns the regression slope: seconds of NTP time per RTP tick. // For a perfect clock this equals 1/clockRate. func (e *NtpEstimator) Slope() float64 { + e.mu.Lock() + defer e.mu.Unlock() return e.slopeNanos / 1e9 } diff --git a/pkg/synchronizer/participantsync.go b/pkg/synchronizer/participantsync.go index 88f204ae..0b9e3fe1 100644 --- a/pkg/synchronizer/participantsync.go +++ b/pkg/synchronizer/participantsync.go @@ -28,40 +28,25 @@ const ( ) const ( - // slewRatePerSecond is the maximum adjustment slew: 5ms per second of real time. + // slewRatePerSecond is the maximum rate at which PTS corrections are absorbed. slewRatePerSecond = 5 * time.Millisecond - // deadbandThreshold is the minimum |offset| before any correction is applied. + // deadbandThreshold is the minimum |correction| before slew smoothing kicks in. deadbandThreshold = 5 * time.Millisecond ) // trackEntry holds per-track state within a ParticipantSync. type trackEntry struct { - mediaType MediaType - estimator *NtpEstimator - adjustment time.Duration // current playout delay adjustment + mediaType MediaType + estimator *NtpEstimator } -// ParticipantSync compares NTP estimates across a participant's audio and video -// tracks to compute A/V playout delay adjustments with time-based slew rate -// limiting. This is the equivalent of Chrome's StreamSynchronization. -// -// Audio is the reference track; video absorbs the correction. +// ParticipantSync holds per-participant sender report state and track metadata. +// PTS jump smoothing is handled directly in syncEngineTrack.GetPTS using a +// per-track correction that decays at the slew rate. type ParticipantSync struct { mu sync.Mutex tracks map[string]*trackEntry - - // lastSessionTime records the session time from the most recent - // updateAdjustments call, used to compute elapsed time for slew limiting. - lastSessionTime time.Duration - initialized bool // true after first updateAdjustments call - - // targetOffset is the desired total video adjustment (negative means - // video should be delayed relative to audio). - targetOffset time.Duration - - // currentOffset tracks the slew-limited offset applied so far. - currentOffset time.Duration } // NewParticipantSync creates a new ParticipantSync instance. @@ -75,143 +60,28 @@ func NewParticipantSync() *ParticipantSync { func (ps *ParticipantSync) SetTrackEstimator(trackID string, mediaType MediaType, estimator *NtpEstimator) { ps.mu.Lock() defer ps.mu.Unlock() + + if existing, ok := ps.tracks[trackID]; ok { + existing.estimator = estimator + existing.mediaType = mediaType + return + } + ps.tracks[trackID] = &trackEntry{ mediaType: mediaType, estimator: estimator, } } -// RemoveTrack removes a track and resets its adjustment. +// RemoveTrack removes a track. func (ps *ParticipantSync) RemoveTrack(trackID string) { ps.mu.Lock() defer ps.mu.Unlock() delete(ps.tracks, trackID) - // Reset sync state since we may no longer have both audio and video. - ps.targetOffset = 0 - ps.currentOffset = 0 - // Clear adjustments on remaining tracks. - for _, entry := range ps.tracks { - entry.adjustment = 0 - } } -// OnSenderReport is called when new SR data arrives for a track. It triggers -// recomputation of the A/V offset target. +// OnSenderReport is called when new SR data arrives for a track. func (ps *ParticipantSync) OnSenderReport(trackID string) { - ps.mu.Lock() - defer ps.mu.Unlock() - ps.recomputeTarget() -} - -// GetAdjustment returns the current playout delay adjustment for a track. -// Returns zero if the track is not registered or estimators are not ready. -func (ps *ParticipantSync) GetAdjustment(trackID string) time.Duration { - ps.mu.Lock() - defer ps.mu.Unlock() - entry, ok := ps.tracks[trackID] - if !ok { - return 0 - } - return entry.adjustment -} - -// updateAdjustments is called periodically (by SyncEngine) to drive the -// time-based slew toward the target offset. sessionTime is the elapsed -// session time (monotonically increasing). -func (ps *ParticipantSync) updateAdjustments(sessionTime time.Duration) { - ps.mu.Lock() - defer ps.mu.Unlock() - if !ps.initialized { - ps.lastSessionTime = sessionTime - ps.initialized = true - // Recompute on first call in case SRs arrived before the first tick. - ps.recomputeTarget() - ps.applyAdjustments() - return - } - - elapsed := sessionTime - ps.lastSessionTime - ps.lastSessionTime = sessionTime - - if elapsed <= 0 { - return - } - - // Compute the maximum slew for this interval. - maxSlew := time.Duration(float64(slewRatePerSecond) * float64(elapsed) / float64(time.Second)) - - // Move currentOffset toward targetOffset, bounded by maxSlew. - diff := ps.targetOffset - ps.currentOffset - if diff > 0 { - if diff > maxSlew { - diff = maxSlew - } - ps.currentOffset += diff - } else if diff < 0 { - if -diff > maxSlew { - diff = -maxSlew - } - ps.currentOffset += diff - } - - ps.applyAdjustments() -} - -// recomputeTarget recalculates the target A/V offset from the latest NTP -// samples of the audio and video estimators. -func (ps *ParticipantSync) recomputeTarget() { - audioNTP, audioOK := ps.latestNTP(MediaTypeAudio) - videoNTP, videoOK := ps.latestNTP(MediaTypeVideo) - - if !audioOK || !videoOK { - return - } - - // offset = video NTP - audio NTP. - // If positive, video's NTP is ahead of audio's, meaning video needs to be - // delayed (negative adjustment) to align with audio. - offset := videoNTP - audioNTP - - // Apply deadband: if the offset is small enough, treat it as zero. - if offset > -deadbandThreshold && offset < deadbandThreshold { - ps.targetOffset = 0 - return - } - - // Video absorbs the correction: negate the offset so that a positive NTP - // difference becomes a negative (delay) adjustment on video. - ps.targetOffset = -offset -} - -// latestNTP returns the NTP time of the most recent SR sample for the first -// ready estimator of the given media type. The second return value is false if -// no ready estimator of that type exists. -func (ps *ParticipantSync) latestNTP(mt MediaType) (time.Duration, bool) { - for _, entry := range ps.tracks { - if entry.mediaType != mt || entry.estimator == nil || !entry.estimator.IsReady() { - continue - } - if entry.estimator.sampleLen == 0 { - continue - } - - // The most recent sample is at (sampleHead - 1 + maxSRSamples) % maxSRSamples. - idx := (entry.estimator.sampleHead - 1 + maxSRSamples) % maxSRSamples - s := entry.estimator.samples[idx] - return time.Duration(s.ntpNanos), true - } - return 0, false -} - -// applyAdjustments distributes the current slew-limited offset to the -// appropriate tracks. Audio gets zero; video gets the correction. -func (ps *ParticipantSync) applyAdjustments() { - for _, entry := range ps.tracks { - switch entry.mediaType { - case MediaTypeAudio: - entry.adjustment = 0 - case MediaTypeVideo: - entry.adjustment = ps.currentOffset - } - } + // SR data is processed by SessionTimeline's NtpEstimator. + // Jump detection and smoothing happen in syncEngineTrack.GetPTS. } diff --git a/pkg/synchronizer/participantsync_test.go b/pkg/synchronizer/participantsync_test.go index ddd138cf..be8107bf 100644 --- a/pkg/synchronizer/participantsync_test.go +++ b/pkg/synchronizer/participantsync_test.go @@ -34,152 +34,31 @@ func readyEstimator(clockRate uint32, baseNtp time.Time, baseRtp uint32, count i return e } -func TestParticipantSync_NoAdjustmentBeforeReady(t *testing.T) { +func TestParticipantSync_SetAndRemoveTrack(t *testing.T) { ps := NewParticipantSync() - // Register a non-ready estimator (only 1 SR, need >= 2). - e := NewNtpEstimator(90000) - baseNtp := time.Date(2025, 1, 1, 12, 0, 0, 0, time.UTC) - e.OnSenderReport(ntpToUint64(baseNtp), 0, baseNtp.Add(30*time.Millisecond)) - - require.False(t, e.IsReady(), "estimator should not be ready with 1 SR") - + e := readyEstimator(48000, time.Date(2025, 1, 1, 12, 0, 0, 0, time.UTC), 0, 5) ps.SetTrackEstimator("audio-1", MediaTypeAudio, e) - ps.SetTrackEstimator("video-1", MediaTypeVideo, NewNtpEstimator(90000)) - ps.OnSenderReport("audio-1") - ps.updateAdjustments(time.Second) - - require.Equal(t, time.Duration(0), ps.GetAdjustment("audio-1"), - "audio adjustment should be zero before estimator is ready") - require.Equal(t, time.Duration(0), ps.GetAdjustment("video-1"), - "video adjustment should be zero before estimator is ready") -} - -func TestParticipantSync_DeadbandSuppressesSmallOffset(t *testing.T) { - ps := NewParticipantSync() - - baseNtp := time.Date(2025, 1, 1, 12, 0, 0, 0, time.UTC) - - // Audio and video with the same NTP base => perfectly aligned. - audioEst := readyEstimator(48000, baseNtp, 0, 5) - videoEst := readyEstimator(90000, baseNtp, 0, 5) - - ps.SetTrackEstimator("audio-1", MediaTypeAudio, audioEst) - ps.SetTrackEstimator("video-1", MediaTypeVideo, videoEst) + ps.RemoveTrack("audio-1") + // Should not panic or error after removal. ps.OnSenderReport("audio-1") - ps.OnSenderReport("video-1") - - // Drive enough updates to let any adjustment converge. - for i := 0; i < 100; i++ { - ps.updateAdjustments(time.Duration(i) * 100 * time.Millisecond) - } - - require.Equal(t, time.Duration(0), ps.GetAdjustment("audio-1"), - "audio adjustment should be zero when perfectly aligned") - require.Equal(t, time.Duration(0), ps.GetAdjustment("video-1"), - "video adjustment should be zero when perfectly aligned") } -func TestParticipantSync_VideoAdjustsToMatchAudio(t *testing.T) { +func TestParticipantSync_UpdateEstimator(t *testing.T) { ps := NewParticipantSync() baseNtp := time.Date(2025, 1, 1, 12, 0, 0, 0, time.UTC) + e1 := readyEstimator(48000, baseNtp, 0, 5) + e2 := readyEstimator(48000, baseNtp.Add(time.Second), 0, 5) - // Audio starts at baseNtp, video starts 100ms later in NTP. - // This means video's NTP clock is 100ms ahead of audio's. - // The offset (video NTP - audio NTP) = +100ms. - // Since audio is reference, video must be delayed by -100ms. - audioEst := readyEstimator(48000, baseNtp, 0, 5) - videoEst := readyEstimator(90000, baseNtp.Add(100*time.Millisecond), 0, 5) - - ps.SetTrackEstimator("audio-1", MediaTypeAudio, audioEst) - ps.SetTrackEstimator("video-1", MediaTypeVideo, videoEst) - - ps.OnSenderReport("audio-1") - ps.OnSenderReport("video-1") - - // Drive enough time to converge: 100ms offset / 5ms per second = 20 seconds. - // Use more to ensure full convergence. - for i := 0; i <= 300; i++ { - ps.updateAdjustments(time.Duration(i) * 100 * time.Millisecond) - } - - audioAdj := ps.GetAdjustment("audio-1") - videoAdj := ps.GetAdjustment("video-1") - - // Audio is the reference, should stay near zero. - require.InDelta(t, 0, float64(audioAdj), float64(time.Millisecond), - "audio adjustment should be near zero, got %v", audioAdj) - - // Video should get approximately -100ms adjustment. - require.InDelta(t, float64(-100*time.Millisecond), float64(videoAdj), float64(10*time.Millisecond), - "video adjustment should be ~-100ms, got %v", videoAdj) -} - -func TestParticipantSync_SlewRateIsTimeBased(t *testing.T) { - ps := NewParticipantSync() - - baseNtp := time.Date(2025, 1, 1, 12, 0, 0, 0, time.UTC) - - // 200ms offset: video NTP is 200ms ahead of audio. - audioEst := readyEstimator(48000, baseNtp, 0, 5) - videoEst := readyEstimator(90000, baseNtp.Add(200*time.Millisecond), 0, 5) - - ps.SetTrackEstimator("audio-1", MediaTypeAudio, audioEst) - ps.SetTrackEstimator("video-1", MediaTypeVideo, videoEst) - - ps.OnSenderReport("audio-1") - ps.OnSenderReport("video-1") - - // Drive exactly 1 second of session time (slew rate = 5ms/s, so max adjustment = 5ms after 1s). - // Start from 0 to 1 second. - ps.updateAdjustments(0) - ps.updateAdjustments(time.Second) - - videoAdj := ps.GetAdjustment("video-1") - - // After 1 second of real time at 5ms/s slew, adjustment magnitude should be <= ~5ms. - // We add a small tolerance (1ms). - require.LessOrEqual(t, -videoAdj, 6*time.Millisecond, - "after 1s, video adjustment magnitude should be <= ~5ms due to slew rate, got %v", videoAdj) - require.Greater(t, -videoAdj, time.Duration(0), - "video adjustment should be non-zero after 1s with 200ms offset, got %v", videoAdj) -} - -func TestParticipantSync_RemoveTrack(t *testing.T) { - ps := NewParticipantSync() - - baseNtp := time.Date(2025, 1, 1, 12, 0, 0, 0, time.UTC) - - audioEst := readyEstimator(48000, baseNtp, 0, 5) - videoEst := readyEstimator(90000, baseNtp.Add(100*time.Millisecond), 0, 5) - - ps.SetTrackEstimator("audio-1", MediaTypeAudio, audioEst) - ps.SetTrackEstimator("video-1", MediaTypeVideo, videoEst) - - ps.OnSenderReport("audio-1") - ps.OnSenderReport("video-1") - - // Drive some updates so adjustments are non-zero. - for i := 0; i <= 50; i++ { - ps.updateAdjustments(time.Duration(i) * 100 * time.Millisecond) - } - - // Verify video has some non-zero adjustment. - require.NotEqual(t, time.Duration(0), ps.GetAdjustment("video-1"), - "video adjustment should be non-zero before removal") - - // Remove the video track. - ps.RemoveTrack("video-1") - - // After removal, GetAdjustment should return 0. - require.Equal(t, time.Duration(0), ps.GetAdjustment("video-1"), - "video adjustment should be zero after removal") + ps.SetTrackEstimator("audio-1", MediaTypeAudio, e1) + ps.SetTrackEstimator("audio-1", MediaTypeAudio, e2) - // Audio should also return zero since there's no counterpart to sync against. - // (But audio adjustment was always zero since audio is the reference.) - require.Equal(t, time.Duration(0), ps.GetAdjustment("audio-1"), - "audio adjustment should be zero (reference track)") + // Should use e2, not e1. + ps.mu.Lock() + entry := ps.tracks["audio-1"] + require.Same(t, e2, entry.estimator) + ps.mu.Unlock() } diff --git a/pkg/synchronizer/sessiontimeline.go b/pkg/synchronizer/sessiontimeline.go index d4d679d5..a3cb5d1e 100644 --- a/pkg/synchronizer/sessiontimeline.go +++ b/pkg/synchronizer/sessiontimeline.go @@ -93,6 +93,53 @@ func (st *SessionTimeline) AddParticipant(identity string) *ParticipantClock { return pc } +// GetOrAddParticipant returns the ParticipantClock for the given identity, +// creating one if it doesn't exist. This is safe for concurrent use. +func (st *SessionTimeline) GetOrAddParticipant(identity string) *ParticipantClock { + st.mu.Lock() + defer st.mu.Unlock() + + if pc, ok := st.participants[identity]; ok { + return pc + } + + pc := &ParticipantClock{ + owdEstimator: latency.NewOWDEstimator(latency.OWDEstimatorParamsDefault), + participantSync: NewParticipantSync(), + tracks: make(map[string]*participantTrack), + } + st.participants[identity] = pc + return pc +} + +// GetTrackEstimator returns the NTP estimator for a participant's track, or nil. +func (st *SessionTimeline) GetTrackEstimator(identity, trackID string) *NtpEstimator { + st.mu.RLock() + defer st.mu.RUnlock() + + pc, ok := st.participants[identity] + if !ok { + return nil + } + pt, ok := pc.tracks[trackID] + if !ok { + return nil + } + return pt.estimator +} + +// GetParticipantSync returns the ParticipantSync for a participant, or nil. +func (st *SessionTimeline) GetParticipantSync(identity string) *ParticipantSync { + st.mu.RLock() + defer st.mu.RUnlock() + + pc, ok := st.participants[identity] + if !ok { + return nil + } + return pc.participantSync +} + // RemoveParticipant removes the participant with the given identity. func (st *SessionTimeline) RemoveParticipant(identity string) { st.mu.Lock() @@ -102,6 +149,23 @@ func (st *SessionTimeline) RemoveParticipant(identity string) { // OnSenderReport processes an RTCP sender report for a participant's track. // It updates the NTP estimator, OWD estimator, and records the NTP epoch. +// ResetTrack clears the NTP estimator for a track, forcing it to rebuild from +// new sender reports. Used when a stream discontinuity is detected. +func (st *SessionTimeline) ResetTrack(identity, trackID string) { + st.mu.Lock() + defer st.mu.Unlock() + + pc, ok := st.participants[identity] + if !ok { + return + } + pt, ok := pc.tracks[trackID] + if !ok { + return + } + pt.estimator.Reset() +} + func (st *SessionTimeline) OnSenderReport(identity, trackID string, clockRate uint32, ntpTime uint64, rtpTimestamp uint32, receivedAt time.Time) { st.mu.Lock() defer st.mu.Unlock() @@ -127,15 +191,12 @@ func (st *SessionTimeline) OnSenderReport(identity, trackID string, clockRate ui // Convert NTP timestamp to nanoseconds and update OWD. senderNtpNanos := ntpTimestampToNanos(ntpTime) receiverNanos := receivedAt.UnixNano() - _, pathChanged := pc.owdEstimator.Update(senderNtpNanos, receiverNanos) - - // If a path change was detected, re-anchor the NTP epoch to the current SR. - // This handles cases where the network path changes (e.g., server migration). - if pathChanged && pc.hasEpoch { - pc.ntpEpoch = nanosToTime(senderNtpNanos) - } + pc.owdEstimator.Update(senderNtpNanos, receiverNanos) // Record the NTP epoch from the first SR for this participant. + // Note: ntpEpoch cancels out in the GetSessionPTS formula + // (sessionPTS = ntpTime + OWD - sessionStart), so its exact value + // doesn't affect the output. It's kept for readability of the formula. if !pc.hasEpoch { pc.ntpEpoch = nanosToTime(senderNtpNanos) pc.hasEpoch = true diff --git a/pkg/synchronizer/syncengine.go b/pkg/synchronizer/syncengine.go index 89a557e4..7fe95b6a 100644 --- a/pkg/synchronizer/syncengine.go +++ b/pkg/synchronizer/syncengine.go @@ -25,6 +25,7 @@ import ( "github.com/livekit/media-sdk/jitter" + "github.com/livekit/protocol/logger" "github.com/livekit/protocol/utils/rtputil" ) @@ -34,9 +35,16 @@ const ( transitionSlewRatePerSecond = 5 * time.Millisecond // wallClockSanityThreshold is the maximum divergence between RTP-derived PTS - // and wall-clock PTS before falling back to wall clock. + // and wall-clock PTS before falling back to wall clock in wallClockPTS(). wallClockSanityThreshold = 5 * time.Second + // ntpTrustThreshold is the maximum allowed divergence between NTP-derived PTS + // and wall-clock PTS. If NTP disagrees with wall clock by more than this, + // the NTP data is suspect (bad SRs, clock jumps, nonsensical timing) and + // we clamp to wall clock. This prevents bad publishers from dragging PTS far + // from reality. + ntpTrustThreshold = 500 * time.Millisecond + // maxTimelyPacketAge is how long a track can be behind the pipeline deadline // before its PTS is force-corrected forward. maxTimelyPacketAge = 10 * time.Second @@ -126,13 +134,7 @@ func (e *SyncEngine) AddTrack(track TrackRemote, identity string) TrackSync { defer e.mu.Unlock() // Ensure the participant exists in the timeline. - e.timeline.mu.Lock() - pc, ok := e.timeline.participants[identity] - if !ok { - e.timeline.mu.Unlock() - pc = e.timeline.AddParticipant(identity) - e.timeline.mu.Lock() - } + pc := e.timeline.GetOrAddParticipant(identity) // Auto-register the track with ParticipantSync using a placeholder estimator. mt := MediaTypeAudio @@ -141,12 +143,12 @@ func (e *SyncEngine) AddTrack(track TrackRemote, identity string) TrackSync { } placeholder := NewNtpEstimator(clockRate) pc.participantSync.SetTrackEstimator(track.ID(), mt, placeholder) - e.timeline.mu.Unlock() st := &syncEngineTrack{ engine: e, track: track, identity: identity, + logger: logger.GetLogger().WithValues("trackID", track.ID(), "kind", track.Kind().String(), "syncEngine", true), converter: rtputil.NewRTPConverter(int64(clockRate)), } @@ -181,6 +183,7 @@ func (e *SyncEngine) RemoveTrack(trackID string) { delete(e.trackIDs, trackID) e.mu.Unlock() + st.logger.Infow("track removed", "lastPTS", st.lastPTSAdjusted) st.Close() } @@ -209,27 +212,16 @@ func (e *SyncEngine) OnRTCP(packet rtcp.Packet) { e.timeline.OnSenderReport(identity, trackID, clockRate, sr.NTPTime, sr.RTPTime, now) // Wire up ParticipantSync: get the track's estimator from timeline and update it. - e.timeline.mu.RLock() - pc, pcOK := e.timeline.participants[identity] - if pcOK { - pt, ptOK := pc.tracks[trackID] - if ptOK { - mt := MediaTypeAudio - if st.track.Kind() == webrtc.RTPCodecTypeVideo { - mt = MediaTypeVideo - } - pc.participantSync.SetTrackEstimator(trackID, mt, pt.estimator) - pc.participantSync.OnSenderReport(trackID) - - // Compute elapsed session time for slew limiting. - startedAt := e.startedAt.Load() - if startedAt > 0 { - elapsed := time.Duration(now.UnixNano() - startedAt) - pc.participantSync.updateAdjustments(elapsed) - } + if estimator := e.timeline.GetTrackEstimator(identity, trackID); estimator != nil { + mt := MediaTypeAudio + if st.track.Kind() == webrtc.RTPCodecTypeVideo { + mt = MediaTypeVideo + } + if ps := e.timeline.GetParticipantSync(identity); ps != nil { + ps.SetTrackEstimator(trackID, mt, estimator) + ps.OnSenderReport(trackID) } } - e.timeline.mu.RUnlock() // Call onSR callback if set. st.mu.Lock() @@ -237,16 +229,23 @@ func (e *SyncEngine) OnRTCP(packet rtcp.Packet) { st.mu.Unlock() if onSR != nil { - // Compute drift as the difference between NTP-derived time and wall clock elapsed. - ntpNanos := ntpTimestampToNanos(sr.NTPTime) - ntpTime := nanosToTime(ntpNanos) + // Compute drift using OWD-normalized session PTS (not raw NTP, which + // includes the sender's clock offset and would produce phantom drift + // if the sender's NTP clock adjusts during the recording). startedAt := e.startedAt.Load() if startedAt > 0 { - sessionStart := time.Unix(0, startedAt) - expectedElapsed := now.Sub(sessionStart) - ntpElapsed := ntpTime.Sub(sessionStart) - drift := ntpElapsed - expectedElapsed - onSR(drift) + sessionPTS, err := e.timeline.GetSessionPTS(identity, trackID, sr.RTPTime) + if err == nil { + sessionStart := time.Unix(0, startedAt) + expectedElapsed := now.Sub(sessionStart) + drift := sessionPTS - expectedElapsed + st.logger.Debugw("sender report", + "drift", drift, + "sessionPTS", sessionPTS, + "expectedElapsed", expectedElapsed, + ) + onSR(drift) + } } } } @@ -330,6 +329,7 @@ type syncEngineTrack struct { engine *SyncEngine track TrackRemote identity string + logger logger.Logger converter *rtputil.RTPConverter startGate startGate // from start_gate.go, nil if not enabled @@ -342,10 +342,12 @@ type syncEngineTrack struct { initialized bool closed bool - // NTP transition + // NTP transition and smoothing ntpTransitioned bool transitionSlew time.Duration lastSlewPTS time.Duration // PTS at which slew was last updated + lastNtpPTS time.Duration // last raw NTP PTS (before corrections), for jump detection + ntpCorrection time.Duration // smoothing correction for SR-induced NTP jumps // pipeline time feedback lastTimelyPacket time.Time @@ -402,6 +404,12 @@ func (st *syncEngineTrack) initializeLocked(pkt jitter.ExtPacket) { // Initialize the engine's session start time. sessionStart := st.engine.initializeIfNeeded(receivedAt) st.sessionOffset = time.Duration(receivedAt.UnixNano() - sessionStart) + + st.logger.Infow("initialized track", + "startTime", st.startTime, + "sessionOffset", st.sessionOffset, + "rtpTS", pkt.Timestamp, + ) } // GetPTS implements TrackSync. It computes the presentation timestamp for a packet @@ -435,51 +443,123 @@ func (st *syncEngineTrack) GetPTS(pkt jitter.ExtPacket) (time.Duration, error) { // Step 1: Try NTP-grounded PTS from SessionTimeline. ntpPTS, ntpErr := st.engine.timeline.GetSessionPTS(st.identity, st.track.ID(), ts) + wallPTS := st.wallClockPTS(pkt) + var pts time.Duration if ntpErr != nil { // Step 2: Fall back to wall-clock PTS. - pts = st.wallClockPTS(pkt) + pts = wallPTS } else { + // Step 2b: Clamp NTP PTS to within ntpTrustThreshold of wall clock. + // Prevents bad publishers (wrong SRs, clock jumps) from dragging PTS far from reality. + diff := ntpPTS - wallPTS + if diff > ntpTrustThreshold || diff < -ntpTrustThreshold { + st.logger.Warnw("NTP PTS exceeds trust threshold, clamping to wall clock", nil, + "ntpPTS", ntpPTS, + "wallPTS", wallPTS, + "diff", diff, + ) + pts = wallPTS + } else { + pts = ntpPTS + } + // Step 3: On first successful NTP PTS, compute transition correction. if !st.ntpTransitioned { - wallPTS := st.wallClockPTS(pkt) - st.transitionSlew = wallPTS - ntpPTS + st.transitionSlew = wallPTS - pts st.ntpTransitioned = true + st.logger.Infow("NTP transition", + "wallPTS", wallPTS, + "ntpPTS", ntpPTS, + "transitionSlew", st.transitionSlew, + ) } - pts = ntpPTS + } + + // Compute PTS delta for slew rate calculations (used by both transition slew and NTP correction). + // Must be computed before either adjustment modifies pts. + var slewPTSDelta time.Duration + if st.lastSlewPTS > 0 { + slewPTSDelta = pts - st.lastSlewPTS } // Step 4: Apply transition slew (absorb gradually toward zero, pts-based). if st.transitionSlew != 0 { pts += st.transitionSlew - if st.lastSlewPTS > 0 { - ptsDelta := pts - st.lastSlewPTS - if ptsDelta > 0 { - maxStep := time.Duration(float64(transitionSlewRatePerSecond) * ptsDelta.Seconds()) + if slewPTSDelta > 0 { + maxStep := time.Duration(float64(transitionSlewRatePerSecond) * slewPTSDelta.Seconds()) + if st.transitionSlew > 0 { + st.transitionSlew -= maxStep + if st.transitionSlew < 0 { + st.transitionSlew = 0 + } + } else { + st.transitionSlew += maxStep if st.transitionSlew > 0 { - st.transitionSlew -= maxStep - if st.transitionSlew < 0 { - st.transitionSlew = 0 - } - } else { - st.transitionSlew += maxStep - if st.transitionSlew > 0 { - st.transitionSlew = 0 - } + st.transitionSlew = 0 } } } - st.lastSlewPTS = pts } - // Step 5: Apply ParticipantSync A/V adjustment. - st.engine.timeline.mu.RLock() - if pc, ok := st.engine.timeline.participants[st.identity]; ok { - adj := pc.participantSync.GetAdjustment(st.track.ID()) - pts += adj + // Step 5: Detect discontinuities and smooth NTP regression jumps. + rtpDelta := ts - st.lastTS // uint32 subtraction, wraps correctly for forward deltas + rtpDeltaDuration := st.converter.ToDuration(rtpDelta) + + if st.lastTS != 0 && rtpDeltaDuration >= 30*time.Second { + // Discontinuity: stream restart, SSRC reuse with new RTP offset, or massive gap. + // Reset NTP state — the old regression is no longer valid. + st.engine.timeline.ResetTrack(st.identity, st.track.ID()) + st.lastNtpPTS = 0 + st.ntpCorrection = 0 + st.ntpTransitioned = false + st.transitionSlew = 0 + st.lastSlewPTS = 0 + st.logger.Warnw("stream discontinuity detected, resetting NTP state", nil, + "rtpDelta", rtpDelta, + "rtpDeltaDuration", rtpDeltaDuration, + ) + } else if ntpErr == nil && st.lastNtpPTS > 0 && rtpDelta > 0 { + // Normal forward packet: detect NTP regression jumps. + // When a new SR shifts the regression, the NTP-derived PTS can jump + // relative to where the previous PTS + RTP delta would predict. + // Absorb the jump into ntpCorrection and decay it via slew. + expectedNtpPTS := st.lastNtpPTS + rtpDeltaDuration + jump := pts - expectedNtpPTS + if jump > deadbandThreshold || jump < -deadbandThreshold { + st.ntpCorrection += -jump + pts += -jump + st.logger.Debugw("NTP regression jump detected", + "jump", jump, + "ntpCorrection", st.ntpCorrection, + ) + } + } + if ntpErr == nil { + st.lastNtpPTS = pts + } + + // Decay ntpCorrection toward zero via slew. + if st.ntpCorrection != 0 { + if slewPTSDelta > 0 { + maxStep := time.Duration(float64(slewRatePerSecond) * slewPTSDelta.Seconds()) + if st.ntpCorrection > 0 { + st.ntpCorrection -= maxStep + if st.ntpCorrection < 0 { + st.ntpCorrection = 0 + } + } else { + st.ntpCorrection += maxStep + if st.ntpCorrection > 0 { + st.ntpCorrection = 0 + } + } + } + pts += st.ntpCorrection } - st.engine.timeline.mu.RUnlock() + + st.lastSlewPTS = pts // Step 6: Pipeline time feedback — if the track has fallen behind the // pipeline's deadline for too long, force-correct PTS forward. @@ -487,7 +567,14 @@ func (st *syncEngineTrack) GetPTS(pkt jitter.ExtPacket) (time.Duration, error) { limit := deadline - st.engine.maxMediaRunningTimeDelay if pts < limit { if time.Since(st.lastTimelyPacket) > maxTimelyPacketAge { + oldPTS := pts pts = deadline - st.engine.maxMediaRunningTimeDelay/2 + st.logger.Warnw("force-correcting PTS forward, track behind pipeline deadline", nil, + "oldPTS", oldPTS, + "newPTS", pts, + "deadline", deadline, + "behindBy", limit-oldPTS, + ) } } else { st.lastTimelyPacket = time.Now() From 85963772a0c5d6d47f3119631ea9654e6b7d842d Mon Sep 17 00:00:00 2001 From: David Colburn Date: Mon, 27 Apr 2026 12:54:33 -0400 Subject: [PATCH 09/17] more logging --- pkg/synchronizer/ntpestimator.go | 24 +++++++++++++++++++++++- pkg/synchronizer/ntpestimator_test.go | 12 ++++++------ pkg/synchronizer/participantsync_test.go | 2 +- pkg/synchronizer/sessiontimeline.go | 23 +++++++++++++++++++++-- pkg/synchronizer/sessiontimeline_test.go | 8 ++++---- pkg/synchronizer/syncengine.go | 21 ++++++++++++++++++--- 6 files changed, 73 insertions(+), 17 deletions(-) diff --git a/pkg/synchronizer/ntpestimator.go b/pkg/synchronizer/ntpestimator.go index 753dd27d..338918c3 100644 --- a/pkg/synchronizer/ntpestimator.go +++ b/pkg/synchronizer/ntpestimator.go @@ -19,6 +19,8 @@ import ( "math" "sync" "time" + + "github.com/livekit/protocol/logger" ) const ( @@ -48,6 +50,7 @@ type srSample struct { // Chrome's RtpToNtpEstimator. type NtpEstimator struct { mu sync.Mutex + logger logger.Logger clockRate uint32 samples [maxSRSamples]srSample @@ -70,8 +73,9 @@ type NtpEstimator struct { } // NewNtpEstimator creates an NtpEstimator for a codec with the given clock rate. -func NewNtpEstimator(clockRate uint32) *NtpEstimator { +func NewNtpEstimator(clockRate uint32, l logger.Logger) *NtpEstimator { return &NtpEstimator{ + logger: l, clockRate: clockRate, } } @@ -133,6 +137,24 @@ func (e *NtpEstimator) OnSenderReport(ntpTime uint64, rtpTimestamp uint32, recei e.computeRegression() e.ready = true } + + if e.logger == nil { + return + } + e.logger.Debugw("NtpEstimator: SR ingested", + "clockRate", e.clockRate, + "rtpTS", rtpTimestamp, + "unwrappedRTP", unwrapped, + "ntpTimeRaw", ntpTime, + "ntpNanos", ntpNanos, + "ntpAsTime", nanosToTime(ntpNanos), + "sampleLen", e.sampleLen, + "ready", e.ready, + "slopeNanos", e.slopeNanos, + "meanX", e.meanX, + "meanY", e.meanY, + "residStd", e.residStd, + ) } // IsReady returns true once at least 2 sender reports have been processed diff --git a/pkg/synchronizer/ntpestimator_test.go b/pkg/synchronizer/ntpestimator_test.go index 5b87a4e7..e9409d8a 100644 --- a/pkg/synchronizer/ntpestimator_test.go +++ b/pkg/synchronizer/ntpestimator_test.go @@ -33,7 +33,7 @@ func ntpToUint64(t time.Time) uint64 { } func TestNtpEstimator_NotReadyBeforeTwoSRs(t *testing.T) { - e := NewNtpEstimator(90000) + e := NewNtpEstimator(90000, nil) // Zero SRs: not ready require.False(t, e.IsReady(), "should not be ready with 0 SRs") @@ -60,7 +60,7 @@ func TestNtpEstimator_NotReadyBeforeTwoSRs(t *testing.T) { func TestNtpEstimator_AccurateMapping(t *testing.T) { const clockRate = 90000 - e := NewNtpEstimator(clockRate) + e := NewNtpEstimator(clockRate, nil) baseTime := time.Date(2025, 1, 1, 12, 0, 0, 0, time.UTC) @@ -99,7 +99,7 @@ func TestNtpEstimator_AccurateMapping(t *testing.T) { func TestNtpEstimator_OutlierRejection(t *testing.T) { const clockRate = 90000 - e := NewNtpEstimator(clockRate) + e := NewNtpEstimator(clockRate, nil) baseTime := time.Date(2025, 6, 15, 10, 0, 0, 0, time.UTC) @@ -131,7 +131,7 @@ func TestNtpEstimator_OutlierRejection(t *testing.T) { func TestNtpEstimator_Wraparound(t *testing.T) { const clockRate = 90000 - e := NewNtpEstimator(clockRate) + e := NewNtpEstimator(clockRate, nil) baseTime := time.Date(2025, 3, 1, 0, 0, 0, 0, time.UTC) @@ -174,7 +174,7 @@ func TestNtpEstimator_Wraparound(t *testing.T) { func TestNtpEstimator_SlidingWindow(t *testing.T) { const clockRate = 90000 - e := NewNtpEstimator(clockRate) + e := NewNtpEstimator(clockRate, nil) baseTime := time.Date(2025, 4, 1, 0, 0, 0, 0, time.UTC) @@ -201,7 +201,7 @@ func TestNtpEstimator_SlidingWindow(t *testing.T) { func TestNtpEstimator_Slope(t *testing.T) { const clockRate = 90000 - e := NewNtpEstimator(clockRate) + e := NewNtpEstimator(clockRate, nil) baseTime := time.Date(2025, 5, 1, 0, 0, 0, 0, time.UTC) diff --git a/pkg/synchronizer/participantsync_test.go b/pkg/synchronizer/participantsync_test.go index be8107bf..9bfad021 100644 --- a/pkg/synchronizer/participantsync_test.go +++ b/pkg/synchronizer/participantsync_test.go @@ -25,7 +25,7 @@ import ( // so that it is ready for use. The SR samples are spaced 5 seconds apart in both // NTP and RTP time. func readyEstimator(clockRate uint32, baseNtp time.Time, baseRtp uint32, count int) *NtpEstimator { - e := NewNtpEstimator(clockRate) + e := NewNtpEstimator(clockRate, nil) for i := 0; i < count; i++ { ntpTime := baseNtp.Add(time.Duration(i) * 5 * time.Second) rtpTS := baseRtp + uint32(i)*uint32(clockRate)*5 diff --git a/pkg/synchronizer/sessiontimeline.go b/pkg/synchronizer/sessiontimeline.go index a3cb5d1e..3e216d00 100644 --- a/pkg/synchronizer/sessiontimeline.go +++ b/pkg/synchronizer/sessiontimeline.go @@ -21,6 +21,7 @@ import ( "time" "github.com/livekit/mediatransportutil/pkg/latency" + "github.com/livekit/protocol/logger" ) var errNoSenderReports = errors.New("SessionTimeline: no sender reports received for track") @@ -58,14 +59,16 @@ type ParticipantClock struct { // - sessionStart = wall-clock time first packet of any track arrived type SessionTimeline struct { mu sync.RWMutex + logger logger.Logger participants map[string]*ParticipantClock sessionStart time.Time hasStart bool } // NewSessionTimeline creates a new SessionTimeline. -func NewSessionTimeline() *SessionTimeline { +func NewSessionTimeline(l logger.Logger) *SessionTimeline { return &SessionTimeline{ + logger: l, participants: make(map[string]*ParticipantClock), } } @@ -179,7 +182,7 @@ func (st *SessionTimeline) OnSenderReport(identity, trackID string, clockRate ui pt, ok := pc.tracks[trackID] if !ok { pt = &participantTrack{ - estimator: NewNtpEstimator(clockRate), + estimator: NewNtpEstimator(clockRate, st.logger), trackID: trackID, } pc.tracks[trackID] = pt @@ -252,5 +255,21 @@ func (st *SessionTimeline) GetSessionPTS(identity, trackID string, rtpTimestamp // Compute the session PTS. sessionPTS := sinceEpoch + epochOnReceiverClock.Sub(st.sessionStart) + if (sessionPTS < 0 || sessionPTS > 24*time.Hour) && st.logger != nil { + st.logger.Warnw("GetSessionPTS: abnormal result", + nil, + "identity", identity, + "trackID", trackID, + "rtpTimestamp", rtpTimestamp, + "ntpTime", ntpTime, + "ntpEpoch", pc.ntpEpoch, + "sinceEpoch", sinceEpoch, + "estimatedOWD", estimatedOWD, + "epochOnReceiverClock", epochOnReceiverClock, + "sessionStart", st.sessionStart, + "sessionPTS", sessionPTS, + ) + } + return sessionPTS, nil } diff --git a/pkg/synchronizer/sessiontimeline_test.go b/pkg/synchronizer/sessiontimeline_test.go index 6e083cba..415825c1 100644 --- a/pkg/synchronizer/sessiontimeline_test.go +++ b/pkg/synchronizer/sessiontimeline_test.go @@ -30,7 +30,7 @@ func TestSessionTimeline_SingleParticipant(t *testing.T) { trackID = "audio-1" ) - st := NewSessionTimeline() + st := NewSessionTimeline(nil) // Session starts at a fixed wall-clock time. sessionStart := time.Date(2025, 6, 1, 12, 0, 0, 0, time.UTC) @@ -87,7 +87,7 @@ func TestSessionTimeline_CrossParticipantAlignment(t *testing.T) { // Bob's NTP clock is offset by 500ms relative to alice's (different NTP servers). bobNTPOffset := 500 * time.Millisecond - st := NewSessionTimeline() + st := NewSessionTimeline(nil) sessionStart := time.Date(2025, 6, 1, 12, 0, 0, 0, time.UTC) st.SetSessionStart(sessionStart) @@ -157,7 +157,7 @@ func TestSessionTimeline_LateJoiner(t *testing.T) { owd = 50 * time.Millisecond ) - st := NewSessionTimeline() + st := NewSessionTimeline(nil) sessionStart := time.Date(2025, 6, 1, 12, 0, 0, 0, time.UTC) st.SetSessionStart(sessionStart) @@ -206,7 +206,7 @@ func TestSessionTimeline_LateJoiner(t *testing.T) { func TestSessionTimeline_FallbackBeforeSRs(t *testing.T) { // Verify error when no SRs received. - st := NewSessionTimeline() + st := NewSessionTimeline(nil) sessionStart := time.Date(2025, 6, 1, 12, 0, 0, 0, time.UTC) st.SetSessionStart(sessionStart) diff --git a/pkg/synchronizer/syncengine.go b/pkg/synchronizer/syncengine.go index 7fe95b6a..da8f0e36 100644 --- a/pkg/synchronizer/syncengine.go +++ b/pkg/synchronizer/syncengine.go @@ -87,6 +87,13 @@ func WithSyncEngineMediaRunningTime(mediaRunningTime func() (time.Duration, bool } } +// WithSyncEngineLogger sets the logger for the sync engine and all sub-components. +func WithSyncEngineLogger(l logger.Logger) SyncEngineOption { + return func(e *SyncEngine) { + e.logger = l + } +} + // SyncEngine orchestrates NtpEstimator, ParticipantSync, and SessionTimeline // to provide cross-participant alignment and per-participant A/V lip sync. // It implements the Sync interface. @@ -102,6 +109,7 @@ type SyncEngine struct { // high-water mark for removed tracks, so End() includes their PTS maxRemovedPTS time.Duration + logger logger.Logger enableStartGate bool oldPacketThreshold time.Duration onStarted func() @@ -114,7 +122,6 @@ type SyncEngine struct { // NewSyncEngine creates a new SyncEngine with the given options. func NewSyncEngine(opts ...SyncEngineOption) *SyncEngine { e := &SyncEngine{ - timeline: NewSessionTimeline(), tracks: make(map[uint32]*syncEngineTrack), trackIDs: make(map[string]*syncEngineTrack), oldPacketThreshold: defaultOldPacketThreshold, @@ -122,6 +129,7 @@ func NewSyncEngine(opts ...SyncEngineOption) *SyncEngine { for _, opt := range opts { opt(e) } + e.timeline = NewSessionTimeline(e.logger) return e } @@ -141,14 +149,14 @@ func (e *SyncEngine) AddTrack(track TrackRemote, identity string) TrackSync { if track.Kind() == webrtc.RTPCodecTypeVideo { mt = MediaTypeVideo } - placeholder := NewNtpEstimator(clockRate) + placeholder := NewNtpEstimator(clockRate, e.logger) pc.participantSync.SetTrackEstimator(track.ID(), mt, placeholder) st := &syncEngineTrack{ engine: e, track: track, identity: identity, - logger: logger.GetLogger().WithValues("trackID", track.ID(), "kind", track.Kind().String(), "syncEngine", true), + logger: e.getTrackLogger(track), converter: rtputil.NewRTPConverter(int64(clockRate)), } @@ -322,6 +330,13 @@ func (e *SyncEngine) initializeIfNeeded(receivedAt time.Time) int64 { return e.startedAt.Load() } +func (e *SyncEngine) getTrackLogger(track TrackRemote) logger.Logger { + if e.logger != nil { + return e.logger.WithValues("trackID", track.ID(), "kind", track.Kind().String()) + } + return logger.GetLogger().WithValues("trackID", track.ID(), "kind", track.Kind().String(), "syncEngine", true) +} + // --- syncEngineTrack --- // syncEngineTrack implements TrackSync for a single track within a SyncEngine. From f6974581bb5a5629c73618c21d2f2aa0425dbee5 Mon Sep 17 00:00:00 2001 From: David Colburn Date: Mon, 27 Apr 2026 13:04:17 -0400 Subject: [PATCH 10/17] ignore duplicate SRs --- pkg/synchronizer/ntpestimator.go | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/pkg/synchronizer/ntpestimator.go b/pkg/synchronizer/ntpestimator.go index 338918c3..34d25fa1 100644 --- a/pkg/synchronizer/ntpestimator.go +++ b/pkg/synchronizer/ntpestimator.go @@ -109,6 +109,17 @@ func (e *NtpEstimator) OnSenderReport(ntpTime uint64, rtpTimestamp uint32, recei ntpNanos := ntpTimestampToNanos(ntpTime) unwrapped := e.unwrapRTP(rtpTimestamp) + // Skip duplicate SRs (same NTP/RTP pair as the most recent sample). + // This happens when the same SR is dispatched multiple times via + // per-publication RTCP callbacks. + if e.sampleLen > 0 { + lastIdx := (e.sampleHead - 1 + maxSRSamples) % maxSRSamples + last := e.samples[lastIdx] + if last.unwrappedRTP == unwrapped && last.ntpNanos == ntpNanos { + return + } + } + // Outlier rejection: if we already have a valid regression, check whether // this new sample deviates from the prediction by more than 3 standard // deviations. From 9caa8de8df07c7ec41e3290bf19cb37594465f13 Mon Sep 17 00:00:00 2001 From: David Colburn Date: Mon, 27 Apr 2026 13:13:58 -0400 Subject: [PATCH 11/17] disable ntp jump detection --- pkg/synchronizer/syncengine.go | 24 +++++++++--------------- 1 file changed, 9 insertions(+), 15 deletions(-) diff --git a/pkg/synchronizer/syncengine.go b/pkg/synchronizer/syncengine.go index da8f0e36..4cc8b9cf 100644 --- a/pkg/synchronizer/syncengine.go +++ b/pkg/synchronizer/syncengine.go @@ -535,22 +535,16 @@ func (st *syncEngineTrack) GetPTS(pkt jitter.ExtPacket) (time.Duration, error) { "rtpDelta", rtpDelta, "rtpDeltaDuration", rtpDeltaDuration, ) - } else if ntpErr == nil && st.lastNtpPTS > 0 && rtpDelta > 0 { - // Normal forward packet: detect NTP regression jumps. - // When a new SR shifts the regression, the NTP-derived PTS can jump - // relative to where the previous PTS + RTP delta would predict. - // Absorb the jump into ntpCorrection and decay it via slew. - expectedNtpPTS := st.lastNtpPTS + rtpDeltaDuration - jump := pts - expectedNtpPTS - if jump > deadbandThreshold || jump < -deadbandThreshold { - st.ntpCorrection += -jump - pts += -jump - st.logger.Debugw("NTP regression jump detected", - "jump", jump, - "ntpCorrection", st.ntpCorrection, - ) - } } + // TODO: NTP regression jump detection disabled for debugging audio pop. + // } else if ntpErr == nil && st.lastNtpPTS > 0 && rtpDelta > 0 { + // expectedNtpPTS := st.lastNtpPTS + rtpDeltaDuration + // jump := pts - expectedNtpPTS + // if jump > deadbandThreshold || jump < -deadbandThreshold { + // st.ntpCorrection += -jump + // pts += -jump + // } + // } if ntpErr == nil { st.lastNtpPTS = pts } From 5092acb20ab1b70a6a260132f515ee4fcf871111 Mon Sep 17 00:00:00 2001 From: David Colburn Date: Mon, 27 Apr 2026 13:33:22 -0400 Subject: [PATCH 12/17] fix ntp jump detection --- pkg/synchronizer/syncengine.go | 91 +++++++++++++++++----------------- 1 file changed, 46 insertions(+), 45 deletions(-) diff --git a/pkg/synchronizer/syncengine.go b/pkg/synchronizer/syncengine.go index 4cc8b9cf..b72ef58d 100644 --- a/pkg/synchronizer/syncengine.go +++ b/pkg/synchronizer/syncengine.go @@ -456,49 +456,82 @@ func (st *syncEngineTrack) GetPTS(pkt jitter.ExtPacket) (time.Duration, error) { } // Step 1: Try NTP-grounded PTS from SessionTimeline. - ntpPTS, ntpErr := st.engine.timeline.GetSessionPTS(st.identity, st.track.ID(), ts) + rawNtpPTS, ntpErr := st.engine.timeline.GetSessionPTS(st.identity, st.track.ID(), ts) wallPTS := st.wallClockPTS(pkt) + // Step 2: Detect discontinuities and NTP regression jumps on RAW NTP PTS. + // This operates before any corrections to avoid feedback loops. + rtpDelta := ts - st.lastTS + rtpDeltaDuration := st.converter.ToDuration(rtpDelta) + + if st.lastTS != 0 && rtpDeltaDuration >= 30*time.Second { + // Discontinuity: stream restart, SSRC reuse with new RTP offset, or massive gap. + st.engine.timeline.ResetTrack(st.identity, st.track.ID()) + st.lastNtpPTS = 0 + st.ntpCorrection = 0 + st.ntpTransitioned = false + st.transitionSlew = 0 + st.lastSlewPTS = 0 + st.logger.Warnw("stream discontinuity detected, resetting NTP state", nil, + "rtpDelta", rtpDelta, + "rtpDeltaDuration", rtpDeltaDuration, + ) + } else if ntpErr == nil && st.lastNtpPTS > 0 && rtpDelta > 0 { + // Detect regression jumps: compare raw NTP PTS against expected. + expectedRawNtpPTS := st.lastNtpPTS + rtpDeltaDuration + jump := rawNtpPTS - expectedRawNtpPTS + if jump > deadbandThreshold || jump < -deadbandThreshold { + st.ntpCorrection -= jump + st.logger.Debugw("NTP regression jump detected", + "jump", jump, + "ntpCorrection", st.ntpCorrection, + ) + } + } + if ntpErr == nil { + st.lastNtpPTS = rawNtpPTS // Always track raw NTP PTS, never corrected + } + + // Step 3: Compute final PTS with corrections. var pts time.Duration if ntpErr != nil { - // Step 2: Fall back to wall-clock PTS. pts = wallPTS } else { - // Step 2b: Clamp NTP PTS to within ntpTrustThreshold of wall clock. - // Prevents bad publishers (wrong SRs, clock jumps) from dragging PTS far from reality. - diff := ntpPTS - wallPTS + // Apply NTP jump correction. + pts = rawNtpPTS + st.ntpCorrection + + // Clamp corrected PTS to within trust threshold of wall clock. + diff := pts - wallPTS if diff > ntpTrustThreshold || diff < -ntpTrustThreshold { st.logger.Warnw("NTP PTS exceeds trust threshold, clamping to wall clock", nil, - "ntpPTS", ntpPTS, + "rawNtpPTS", rawNtpPTS, + "ntpCorrection", st.ntpCorrection, "wallPTS", wallPTS, "diff", diff, ) pts = wallPTS - } else { - pts = ntpPTS } - // Step 3: On first successful NTP PTS, compute transition correction. + // On first successful NTP PTS, compute transition correction. if !st.ntpTransitioned { st.transitionSlew = wallPTS - pts st.ntpTransitioned = true st.logger.Infow("NTP transition", "wallPTS", wallPTS, - "ntpPTS", ntpPTS, + "ntpPTS", rawNtpPTS, "transitionSlew", st.transitionSlew, ) } } - // Compute PTS delta for slew rate calculations (used by both transition slew and NTP correction). - // Must be computed before either adjustment modifies pts. + // Compute PTS delta for slew rate calculations. var slewPTSDelta time.Duration if st.lastSlewPTS > 0 { slewPTSDelta = pts - st.lastSlewPTS } - // Step 4: Apply transition slew (absorb gradually toward zero, pts-based). + // Step 4: Apply transition slew (absorb gradually toward zero). if st.transitionSlew != 0 { pts += st.transitionSlew @@ -518,37 +551,6 @@ func (st *syncEngineTrack) GetPTS(pkt jitter.ExtPacket) (time.Duration, error) { } } - // Step 5: Detect discontinuities and smooth NTP regression jumps. - rtpDelta := ts - st.lastTS // uint32 subtraction, wraps correctly for forward deltas - rtpDeltaDuration := st.converter.ToDuration(rtpDelta) - - if st.lastTS != 0 && rtpDeltaDuration >= 30*time.Second { - // Discontinuity: stream restart, SSRC reuse with new RTP offset, or massive gap. - // Reset NTP state — the old regression is no longer valid. - st.engine.timeline.ResetTrack(st.identity, st.track.ID()) - st.lastNtpPTS = 0 - st.ntpCorrection = 0 - st.ntpTransitioned = false - st.transitionSlew = 0 - st.lastSlewPTS = 0 - st.logger.Warnw("stream discontinuity detected, resetting NTP state", nil, - "rtpDelta", rtpDelta, - "rtpDeltaDuration", rtpDeltaDuration, - ) - } - // TODO: NTP regression jump detection disabled for debugging audio pop. - // } else if ntpErr == nil && st.lastNtpPTS > 0 && rtpDelta > 0 { - // expectedNtpPTS := st.lastNtpPTS + rtpDeltaDuration - // jump := pts - expectedNtpPTS - // if jump > deadbandThreshold || jump < -deadbandThreshold { - // st.ntpCorrection += -jump - // pts += -jump - // } - // } - if ntpErr == nil { - st.lastNtpPTS = pts - } - // Decay ntpCorrection toward zero via slew. if st.ntpCorrection != 0 { if slewPTSDelta > 0 { @@ -565,7 +567,6 @@ func (st *syncEngineTrack) GetPTS(pkt jitter.ExtPacket) (time.Duration, error) { } } } - pts += st.ntpCorrection } st.lastSlewPTS = pts From 2e738d99b620b405b2cdd31812bf39f9715bd7e9 Mon Sep 17 00:00:00 2001 From: David Colburn Date: Mon, 27 Apr 2026 13:49:00 -0400 Subject: [PATCH 13/17] 4 minSamples for ntp estimator --- pkg/synchronizer/ntpestimator.go | 9 +++++++-- pkg/synchronizer/ntpestimator_test.go | 25 +++++++++++++------------ 2 files changed, 20 insertions(+), 14 deletions(-) diff --git a/pkg/synchronizer/ntpestimator.go b/pkg/synchronizer/ntpestimator.go index 34d25fa1..cd235d1a 100644 --- a/pkg/synchronizer/ntpestimator.go +++ b/pkg/synchronizer/ntpestimator.go @@ -27,6 +27,11 @@ const ( // maxSRSamples is the sliding window size for sender report pairs. maxSRSamples = 20 + // minSamplesReady is the minimum number of SR pairs needed before the + // regression is considered ready. With only 2 points the slope is entirely + // determined by SR timing jitter; 4 gives a much more stable fit. + minSamplesReady = 4 + // outlierThresholdStdDevs is the number of standard deviations beyond which // a new SR is considered an outlier and excluded from the regression. outlierThresholdStdDevs = 3.0 @@ -62,7 +67,7 @@ type NtpEstimator struct { rtpOffset int64 // cumulative offset from wraparounds hasLastRTP bool - // Regression results (valid when sampleLen >= 2) + // Regression results (valid when sampleLen >= minSamplesReady) // The internal model is: ntpNanos = slopeNanos * (unwrappedRTP - meanX) + meanY // where slopeNanos is nanos per RTP tick. slopeNanos float64 // nanos of NTP time per RTP tick @@ -144,7 +149,7 @@ func (e *NtpEstimator) OnSenderReport(ntpTime uint64, rtpTimestamp uint32, recei } // Recompute regression if we have enough samples. - if e.sampleLen >= 2 { + if e.sampleLen >= minSamplesReady { e.computeRegression() e.ready = true } diff --git a/pkg/synchronizer/ntpestimator_test.go b/pkg/synchronizer/ntpestimator_test.go index e9409d8a..37388b42 100644 --- a/pkg/synchronizer/ntpestimator_test.go +++ b/pkg/synchronizer/ntpestimator_test.go @@ -32,27 +32,28 @@ func ntpToUint64(t time.Time) uint64 { return secs<<32 | frac } -func TestNtpEstimator_NotReadyBeforeTwoSRs(t *testing.T) { +func TestNtpEstimator_NotReadyBeforeEnoughSRs(t *testing.T) { e := NewNtpEstimator(90000, nil) - // Zero SRs: not ready require.False(t, e.IsReady(), "should not be ready with 0 SRs") _, err := e.RtpToNtp(1000) require.Error(t, err, "RtpToNtp should error when not ready") - // One SR: still not ready + // Feed SRs one at a time, checking readiness now := time.Now() - e.OnSenderReport(ntpToUint64(now), 90000, now) - require.False(t, e.IsReady(), "should not be ready with 1 SR") - - _, err = e.RtpToNtp(90000) - require.Error(t, err, "RtpToNtp should error with only 1 SR") + for i := 0; i < minSamplesReady-1; i++ { + srTime := now.Add(time.Duration(i) * time.Second) + rtpTS := uint32(i+1) * 90000 + e.OnSenderReport(ntpToUint64(srTime), rtpTS, srTime) + require.False(t, e.IsReady(), "should not be ready with %d SRs", i+1) + } - // Two SRs: ready - now2 := now.Add(time.Second) - e.OnSenderReport(ntpToUint64(now2), 180000, now2) - require.True(t, e.IsReady(), "should be ready with 2 SRs") + // One more SR makes it ready + srTime := now.Add(time.Duration(minSamplesReady-1) * time.Second) + rtpTS := uint32(minSamplesReady) * 90000 + e.OnSenderReport(ntpToUint64(srTime), rtpTS, srTime) + require.True(t, e.IsReady(), "should be ready with %d SRs", minSamplesReady) _, err = e.RtpToNtp(135000) require.NoError(t, err, "RtpToNtp should succeed when ready") From e8f3d9f0dfced00799eaf3a836f5475fe6c2f5af Mon Sep 17 00:00:00 2001 From: David Colburn Date: Mon, 27 Apr 2026 14:10:52 -0400 Subject: [PATCH 14/17] add option to use audio tempo controller --- pkg/synchronizer/syncengine.go | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/pkg/synchronizer/syncengine.go b/pkg/synchronizer/syncengine.go index b72ef58d..8b358c57 100644 --- a/pkg/synchronizer/syncengine.go +++ b/pkg/synchronizer/syncengine.go @@ -94,6 +94,16 @@ func WithSyncEngineLogger(l logger.Logger) SyncEngineOption { } } +// WithSyncEngineAudioDriftCompensated signals that audio drift is handled +// externally (e.g., by a tempo controller) and the sync engine should not +// apply NTP PTS corrections to audio tracks. NTP regression still runs for +// drift measurement and reporting. +func WithSyncEngineAudioDriftCompensated() SyncEngineOption { + return func(e *SyncEngine) { + e.audioDriftCompensated = true + } +} + // SyncEngine orchestrates NtpEstimator, ParticipantSync, and SessionTimeline // to provide cross-participant alignment and per-participant A/V lip sync. // It implements the Sync interface. @@ -109,10 +119,11 @@ type SyncEngine struct { // high-water mark for removed tracks, so End() includes their PTS maxRemovedPTS time.Duration - logger logger.Logger - enableStartGate bool - oldPacketThreshold time.Duration - onStarted func() + logger logger.Logger + enableStartGate bool + oldPacketThreshold time.Duration + audioDriftCompensated bool // audio drift handled externally (e.g., tempo controller) + onStarted func() mediaRunningTime func() (time.Duration, bool) maxMediaRunningTimeDelay time.Duration @@ -460,6 +471,11 @@ func (st *syncEngineTrack) GetPTS(pkt jitter.ExtPacket) (time.Duration, error) { wallPTS := st.wallClockPTS(pkt) + // Audio tracks with external drift compensation (e.g., tempo controller) skip + // NTP PTS corrections — drift is handled by resampling, not PTS adjustment. + // NTP regression still runs (via OnSenderReport) for drift measurement. + useWallClockOnly := st.engine.audioDriftCompensated && st.track.Kind() == webrtc.RTPCodecTypeAudio + // Step 2: Detect discontinuities and NTP regression jumps on RAW NTP PTS. // This operates before any corrections to avoid feedback loops. rtpDelta := ts - st.lastTS @@ -477,7 +493,7 @@ func (st *syncEngineTrack) GetPTS(pkt jitter.ExtPacket) (time.Duration, error) { "rtpDelta", rtpDelta, "rtpDeltaDuration", rtpDeltaDuration, ) - } else if ntpErr == nil && st.lastNtpPTS > 0 && rtpDelta > 0 { + } else if !useWallClockOnly && ntpErr == nil && st.lastNtpPTS > 0 && rtpDelta > 0 { // Detect regression jumps: compare raw NTP PTS against expected. expectedRawNtpPTS := st.lastNtpPTS + rtpDeltaDuration jump := rawNtpPTS - expectedRawNtpPTS @@ -495,7 +511,7 @@ func (st *syncEngineTrack) GetPTS(pkt jitter.ExtPacket) (time.Duration, error) { // Step 3: Compute final PTS with corrections. var pts time.Duration - if ntpErr != nil { + if ntpErr != nil || useWallClockOnly { pts = wallPTS } else { // Apply NTP jump correction. From c6637680dd6f3fbba9d65cff8f93b74f9b27bc10 Mon Sep 17 00:00:00 2001 From: David Colburn Date: Mon, 27 Apr 2026 14:44:42 -0400 Subject: [PATCH 15/17] clean up and reorganize --- pkg/synchronizer/integration_test.go | 30 +- pkg/synchronizer/ntpestimator.go | 23 +- pkg/synchronizer/ntpestimator_test.go | 12 +- pkg/synchronizer/participantclock.go | 45 ++ ...tsync_test.go => participantclock_test.go} | 30 +- pkg/synchronizer/participantsync.go | 87 ---- pkg/synchronizer/sessiontimeline.go | 64 +-- pkg/synchronizer/syncengine.go | 390 ++---------------- pkg/synchronizer/syncenginetrack.go | 358 ++++++++++++++++ 9 files changed, 492 insertions(+), 547 deletions(-) create mode 100644 pkg/synchronizer/participantclock.go rename pkg/synchronizer/{participantsync_test.go => participantclock_test.go} (72%) delete mode 100644 pkg/synchronizer/participantsync.go create mode 100644 pkg/synchronizer/syncenginetrack.go diff --git a/pkg/synchronizer/integration_test.go b/pkg/synchronizer/integration_test.go index 42889dab..edeb0eed 100644 --- a/pkg/synchronizer/integration_test.go +++ b/pkg/synchronizer/integration_test.go @@ -21,8 +21,8 @@ import ( "github.com/stretchr/testify/require" ) -// TestIntegration_CrossParticipantSync exercises the full SyncEngine stack -// (NtpEstimator -> SessionTimeline -> ParticipantSync -> SyncEngine) to verify +// TestIntegration_CrossParticipantClock exercises the full SyncEngine stack +// (NtpEstimator -> SessionTimeline -> ParticipantClock -> SyncEngine) to verify // that two participants producing audio at the same real-world time are aligned // on the session timeline despite having different NTP clock offsets. // @@ -38,7 +38,7 @@ import ( // // The formula sessionPTS = ntpTime + OWD - sessionStart normalizes the clock // offset because ntpTime includes the +500ms and OWD reflects the -500ms. -func TestIntegration_CrossParticipantSync(t *testing.T) { +func TestIntegration_CrossParticipantClock(t *testing.T) { const ( clockRate = uint32(48000) owd = 50 * time.Millisecond @@ -81,11 +81,10 @@ func TestIntegration_CrossParticipantSync(t *testing.T) { // since OnRTCP uses time.Now(). We need deterministic timing. engine.timeline.OnSenderReport("alice", "audio-alice", clockRate, aliceNTP, rtpTS, receivedAt) - // Wire up ParticipantSync with the track's estimator. + // Wire up ParticipantClock with the track's estimator. if est := engine.timeline.GetTrackEstimator("alice", "audio-alice"); est != nil { - if ps := engine.timeline.GetParticipantSync("alice"); ps != nil { - ps.SetTrackEstimator("audio-alice", MediaTypeAudio, est) - ps.OnSenderReport("audio-alice") + if ps := engine.timeline.GetParticipantClock("alice"); ps != nil { + ps.SetTrackEstimator("audio-alice", est) } } _ = aliceSR // used above indirectly @@ -95,9 +94,8 @@ func TestIntegration_CrossParticipantSync(t *testing.T) { engine.timeline.OnSenderReport("bob", "audio-bob", clockRate, bobNTP, rtpTS, receivedAt) if est := engine.timeline.GetTrackEstimator("bob", "audio-bob"); est != nil { - if ps := engine.timeline.GetParticipantSync("bob"); ps != nil { - ps.SetTrackEstimator("audio-bob", MediaTypeAudio, est) - ps.OnSenderReport("audio-bob") + if ps := engine.timeline.GetParticipantClock("bob"); ps != nil { + ps.SetTrackEstimator("audio-bob", est) } } } @@ -142,7 +140,7 @@ func TestIntegration_CrossParticipantSync(t *testing.T) { // - Video has 80ms encoder delay: video NTP = audio NTP + 80ms for same // real-world instant (video capture is delayed by encoding pipeline) // -// The ParticipantSync detects the A/V NTP offset and applies a slew-limited +// The ParticipantClock detects the A/V NTP offset and applies a slew-limited // correction on the video track to bring them into alignment. func TestIntegration_AVLipSync(t *testing.T) { const ( @@ -186,13 +184,13 @@ func TestIntegration_AVLipSync(t *testing.T) { videoNTP := ntpToUint64(srTime.Add(videoEncoderDelay)) engine.timeline.OnSenderReport("alice", "video-alice", videoClockRate, videoNTP, videoRTP, receivedAt) - // Wire up ParticipantSync with latest estimators. - if ps := engine.timeline.GetParticipantSync("alice"); ps != nil { + // Wire up ParticipantClock with latest estimators. + if ps := engine.timeline.GetParticipantClock("alice"); ps != nil { if est := engine.timeline.GetTrackEstimator("alice", "audio-alice"); est != nil { - ps.SetTrackEstimator("audio-alice", MediaTypeAudio, est) + ps.SetTrackEstimator("audio-alice", est) } if est := engine.timeline.GetTrackEstimator("alice", "video-alice"); est != nil { - ps.SetTrackEstimator("video-alice", MediaTypeVideo, est) + ps.SetTrackEstimator("video-alice", est) } } } @@ -223,7 +221,7 @@ func TestIntegration_AVLipSync(t *testing.T) { require.NoError(t, err) // The 80ms encoder delay should be corrected (or mostly corrected) by - // ParticipantSync's slew-limited adjustment. Allow 100ms tolerance to + // ParticipantClock's slew-limited adjustment. Allow 100ms tolerance to // account for slew rate convergence. diff := audioPTS - videoPTS if diff < 0 { diff --git a/pkg/synchronizer/ntpestimator.go b/pkg/synchronizer/ntpestimator.go index cd235d1a..c17104fe 100644 --- a/pkg/synchronizer/ntpestimator.go +++ b/pkg/synchronizer/ntpestimator.go @@ -19,8 +19,6 @@ import ( "math" "sync" "time" - - "github.com/livekit/protocol/logger" ) const ( @@ -55,7 +53,6 @@ type srSample struct { // Chrome's RtpToNtpEstimator. type NtpEstimator struct { mu sync.Mutex - logger logger.Logger clockRate uint32 samples [maxSRSamples]srSample @@ -78,9 +75,8 @@ type NtpEstimator struct { } // NewNtpEstimator creates an NtpEstimator for a codec with the given clock rate. -func NewNtpEstimator(clockRate uint32, l logger.Logger) *NtpEstimator { +func NewNtpEstimator(clockRate uint32) *NtpEstimator { return &NtpEstimator{ - logger: l, clockRate: clockRate, } } @@ -154,23 +150,6 @@ func (e *NtpEstimator) OnSenderReport(ntpTime uint64, rtpTimestamp uint32, recei e.ready = true } - if e.logger == nil { - return - } - e.logger.Debugw("NtpEstimator: SR ingested", - "clockRate", e.clockRate, - "rtpTS", rtpTimestamp, - "unwrappedRTP", unwrapped, - "ntpTimeRaw", ntpTime, - "ntpNanos", ntpNanos, - "ntpAsTime", nanosToTime(ntpNanos), - "sampleLen", e.sampleLen, - "ready", e.ready, - "slopeNanos", e.slopeNanos, - "meanX", e.meanX, - "meanY", e.meanY, - "residStd", e.residStd, - ) } // IsReady returns true once at least 2 sender reports have been processed diff --git a/pkg/synchronizer/ntpestimator_test.go b/pkg/synchronizer/ntpestimator_test.go index 37388b42..e01f425d 100644 --- a/pkg/synchronizer/ntpestimator_test.go +++ b/pkg/synchronizer/ntpestimator_test.go @@ -33,7 +33,7 @@ func ntpToUint64(t time.Time) uint64 { } func TestNtpEstimator_NotReadyBeforeEnoughSRs(t *testing.T) { - e := NewNtpEstimator(90000, nil) + e := NewNtpEstimator(90000) require.False(t, e.IsReady(), "should not be ready with 0 SRs") @@ -61,7 +61,7 @@ func TestNtpEstimator_NotReadyBeforeEnoughSRs(t *testing.T) { func TestNtpEstimator_AccurateMapping(t *testing.T) { const clockRate = 90000 - e := NewNtpEstimator(clockRate, nil) + e := NewNtpEstimator(clockRate) baseTime := time.Date(2025, 1, 1, 12, 0, 0, 0, time.UTC) @@ -100,7 +100,7 @@ func TestNtpEstimator_AccurateMapping(t *testing.T) { func TestNtpEstimator_OutlierRejection(t *testing.T) { const clockRate = 90000 - e := NewNtpEstimator(clockRate, nil) + e := NewNtpEstimator(clockRate) baseTime := time.Date(2025, 6, 15, 10, 0, 0, 0, time.UTC) @@ -132,7 +132,7 @@ func TestNtpEstimator_OutlierRejection(t *testing.T) { func TestNtpEstimator_Wraparound(t *testing.T) { const clockRate = 90000 - e := NewNtpEstimator(clockRate, nil) + e := NewNtpEstimator(clockRate) baseTime := time.Date(2025, 3, 1, 0, 0, 0, 0, time.UTC) @@ -175,7 +175,7 @@ func TestNtpEstimator_Wraparound(t *testing.T) { func TestNtpEstimator_SlidingWindow(t *testing.T) { const clockRate = 90000 - e := NewNtpEstimator(clockRate, nil) + e := NewNtpEstimator(clockRate) baseTime := time.Date(2025, 4, 1, 0, 0, 0, 0, time.UTC) @@ -202,7 +202,7 @@ func TestNtpEstimator_SlidingWindow(t *testing.T) { func TestNtpEstimator_Slope(t *testing.T) { const clockRate = 90000 - e := NewNtpEstimator(clockRate, nil) + e := NewNtpEstimator(clockRate) baseTime := time.Date(2025, 5, 1, 0, 0, 0, 0, time.UTC) diff --git a/pkg/synchronizer/participantclock.go b/pkg/synchronizer/participantclock.go new file mode 100644 index 00000000..48732559 --- /dev/null +++ b/pkg/synchronizer/participantclock.go @@ -0,0 +1,45 @@ +// Copyright 2026 LiveKit, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package synchronizer + +import ( + "sync" + "time" + + "github.com/livekit/mediatransportutil/pkg/latency" +) + +// ParticipantClock holds OWD and NTP estimation state for a single participant. +type ParticipantClock struct { + mu sync.Mutex + owdEstimator *latency.OWDEstimator + tracks map[string]*NtpEstimator + ntpEpoch time.Time // NTP time from first SR + hasEpoch bool +} + +// SetTrackEstimator registers or updates the NtpEstimator for a given track. +func (pc *ParticipantClock) SetTrackEstimator(trackID string, estimator *NtpEstimator) { + pc.mu.Lock() + defer pc.mu.Unlock() + pc.tracks[trackID] = estimator +} + +// RemoveTrack removes a track. +func (pc *ParticipantClock) RemoveTrack(trackID string) { + pc.mu.Lock() + defer pc.mu.Unlock() + delete(pc.tracks, trackID) +} diff --git a/pkg/synchronizer/participantsync_test.go b/pkg/synchronizer/participantclock_test.go similarity index 72% rename from pkg/synchronizer/participantsync_test.go rename to pkg/synchronizer/participantclock_test.go index 9bfad021..8ecf2362 100644 --- a/pkg/synchronizer/participantsync_test.go +++ b/pkg/synchronizer/participantclock_test.go @@ -25,7 +25,7 @@ import ( // so that it is ready for use. The SR samples are spaced 5 seconds apart in both // NTP and RTP time. func readyEstimator(clockRate uint32, baseNtp time.Time, baseRtp uint32, count int) *NtpEstimator { - e := NewNtpEstimator(clockRate, nil) + e := NewNtpEstimator(clockRate) for i := 0; i < count; i++ { ntpTime := baseNtp.Add(time.Duration(i) * 5 * time.Second) rtpTS := baseRtp + uint32(i)*uint32(clockRate)*5 @@ -34,31 +34,29 @@ func readyEstimator(clockRate uint32, baseNtp time.Time, baseRtp uint32, count i return e } -func TestParticipantSync_SetAndRemoveTrack(t *testing.T) { - ps := NewParticipantSync() +func TestParticipantClock_SetAndRemoveTrack(t *testing.T) { + st := NewSessionTimeline(nil) + pc := st.AddParticipant("alice") e := readyEstimator(48000, time.Date(2025, 1, 1, 12, 0, 0, 0, time.UTC), 0, 5) - ps.SetTrackEstimator("audio-1", MediaTypeAudio, e) + pc.SetTrackEstimator("audio-1", e) - ps.RemoveTrack("audio-1") - - // Should not panic or error after removal. - ps.OnSenderReport("audio-1") + pc.RemoveTrack("audio-1") } -func TestParticipantSync_UpdateEstimator(t *testing.T) { - ps := NewParticipantSync() +func TestParticipantClock_UpdateEstimator(t *testing.T) { + st := NewSessionTimeline(nil) + pc := st.AddParticipant("alice") baseNtp := time.Date(2025, 1, 1, 12, 0, 0, 0, time.UTC) e1 := readyEstimator(48000, baseNtp, 0, 5) e2 := readyEstimator(48000, baseNtp.Add(time.Second), 0, 5) - ps.SetTrackEstimator("audio-1", MediaTypeAudio, e1) - ps.SetTrackEstimator("audio-1", MediaTypeAudio, e2) + pc.SetTrackEstimator("audio-1", e1) + pc.SetTrackEstimator("audio-1", e2) // Should use e2, not e1. - ps.mu.Lock() - entry := ps.tracks["audio-1"] - require.Same(t, e2, entry.estimator) - ps.mu.Unlock() + pc.mu.Lock() + require.Same(t, e2, pc.tracks["audio-1"]) + pc.mu.Unlock() } diff --git a/pkg/synchronizer/participantsync.go b/pkg/synchronizer/participantsync.go deleted file mode 100644 index 0b9e3fe1..00000000 --- a/pkg/synchronizer/participantsync.go +++ /dev/null @@ -1,87 +0,0 @@ -// Copyright 2026 LiveKit, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package synchronizer - -import ( - "sync" - "time" -) - -// MediaType distinguishes audio and video tracks for A/V sync purposes. -type MediaType int - -const ( - MediaTypeAudio MediaType = iota - MediaTypeVideo -) - -const ( - // slewRatePerSecond is the maximum rate at which PTS corrections are absorbed. - slewRatePerSecond = 5 * time.Millisecond - - // deadbandThreshold is the minimum |correction| before slew smoothing kicks in. - deadbandThreshold = 5 * time.Millisecond -) - -// trackEntry holds per-track state within a ParticipantSync. -type trackEntry struct { - mediaType MediaType - estimator *NtpEstimator -} - -// ParticipantSync holds per-participant sender report state and track metadata. -// PTS jump smoothing is handled directly in syncEngineTrack.GetPTS using a -// per-track correction that decays at the slew rate. -type ParticipantSync struct { - mu sync.Mutex - tracks map[string]*trackEntry -} - -// NewParticipantSync creates a new ParticipantSync instance. -func NewParticipantSync() *ParticipantSync { - return &ParticipantSync{ - tracks: make(map[string]*trackEntry), - } -} - -// SetTrackEstimator registers or updates the NtpEstimator for a given track. -func (ps *ParticipantSync) SetTrackEstimator(trackID string, mediaType MediaType, estimator *NtpEstimator) { - ps.mu.Lock() - defer ps.mu.Unlock() - - if existing, ok := ps.tracks[trackID]; ok { - existing.estimator = estimator - existing.mediaType = mediaType - return - } - - ps.tracks[trackID] = &trackEntry{ - mediaType: mediaType, - estimator: estimator, - } -} - -// RemoveTrack removes a track. -func (ps *ParticipantSync) RemoveTrack(trackID string) { - ps.mu.Lock() - defer ps.mu.Unlock() - delete(ps.tracks, trackID) -} - -// OnSenderReport is called when new SR data arrives for a track. -func (ps *ParticipantSync) OnSenderReport(trackID string) { - // SR data is processed by SessionTimeline's NtpEstimator. - // Jump detection and smoothing happen in syncEngineTrack.GetPTS. -} diff --git a/pkg/synchronizer/sessiontimeline.go b/pkg/synchronizer/sessiontimeline.go index 3e216d00..29d45fe5 100644 --- a/pkg/synchronizer/sessiontimeline.go +++ b/pkg/synchronizer/sessiontimeline.go @@ -26,21 +26,6 @@ import ( var errNoSenderReports = errors.New("SessionTimeline: no sender reports received for track") -// participantTrack holds per-track state within a ParticipantClock. -type participantTrack struct { - estimator *NtpEstimator - trackID string -} - -// ParticipantClock holds OWD and NTP estimation state for a single participant. -type ParticipantClock struct { - owdEstimator *latency.OWDEstimator - participantSync *ParticipantSync - tracks map[string]*participantTrack - ntpEpoch time.Time // NTP time from first SR - hasEpoch bool -} - // SessionTimeline establishes a shared recording timeline and maps each // participant's NTP clock domain onto it using OWD (one-way delay) // normalization. This is the key component that fixes cross-participant @@ -88,9 +73,8 @@ func (st *SessionTimeline) AddParticipant(identity string) *ParticipantClock { defer st.mu.Unlock() pc := &ParticipantClock{ - owdEstimator: latency.NewOWDEstimator(latency.OWDEstimatorParamsDefault), - participantSync: NewParticipantSync(), - tracks: make(map[string]*participantTrack), + owdEstimator: latency.NewOWDEstimator(latency.OWDEstimatorParamsDefault), + tracks: make(map[string]*NtpEstimator), } st.participants[identity] = pc return pc @@ -107,9 +91,8 @@ func (st *SessionTimeline) GetOrAddParticipant(identity string) *ParticipantCloc } pc := &ParticipantClock{ - owdEstimator: latency.NewOWDEstimator(latency.OWDEstimatorParamsDefault), - participantSync: NewParticipantSync(), - tracks: make(map[string]*participantTrack), + owdEstimator: latency.NewOWDEstimator(latency.OWDEstimatorParamsDefault), + tracks: make(map[string]*NtpEstimator), } st.participants[identity] = pc return pc @@ -124,23 +107,15 @@ func (st *SessionTimeline) GetTrackEstimator(identity, trackID string) *NtpEstim if !ok { return nil } - pt, ok := pc.tracks[trackID] - if !ok { - return nil - } - return pt.estimator + return pc.tracks[trackID] } -// GetParticipantSync returns the ParticipantSync for a participant, or nil. -func (st *SessionTimeline) GetParticipantSync(identity string) *ParticipantSync { +// GetParticipantClock returns the ParticipantClock for a participant, or nil. +func (st *SessionTimeline) GetParticipantClock(identity string) *ParticipantClock { st.mu.RLock() defer st.mu.RUnlock() - pc, ok := st.participants[identity] - if !ok { - return nil - } - return pc.participantSync + return st.participants[identity] } // RemoveParticipant removes the participant with the given identity. @@ -162,11 +137,9 @@ func (st *SessionTimeline) ResetTrack(identity, trackID string) { if !ok { return } - pt, ok := pc.tracks[trackID] - if !ok { - return + if est, ok := pc.tracks[trackID]; ok { + est.Reset() } - pt.estimator.Reset() } func (st *SessionTimeline) OnSenderReport(identity, trackID string, clockRate uint32, ntpTime uint64, rtpTimestamp uint32, receivedAt time.Time) { @@ -179,17 +152,14 @@ func (st *SessionTimeline) OnSenderReport(identity, trackID string, clockRate ui } // Get or create the per-track NTP estimator. - pt, ok := pc.tracks[trackID] + est, ok := pc.tracks[trackID] if !ok { - pt = &participantTrack{ - estimator: NewNtpEstimator(clockRate, st.logger), - trackID: trackID, - } - pc.tracks[trackID] = pt + est = NewNtpEstimator(clockRate) + pc.tracks[trackID] = est } // Feed the SR to the NTP estimator. - pt.estimator.OnSenderReport(ntpTime, rtpTimestamp, receivedAt) + est.OnSenderReport(ntpTime, rtpTimestamp, receivedAt) // Convert NTP timestamp to nanoseconds and update OWD. senderNtpNanos := ntpTimestampToNanos(ntpTime) @@ -226,12 +196,12 @@ func (st *SessionTimeline) GetSessionPTS(identity, trackID string, rtpTimestamp return 0, fmt.Errorf("SessionTimeline: unknown participant %q", identity) } - pt, ok := pc.tracks[trackID] + est, ok := pc.tracks[trackID] if !ok { return 0, errNoSenderReports } - if !pt.estimator.IsReady() { + if !est.IsReady() { return 0, errNotReady } @@ -240,7 +210,7 @@ func (st *SessionTimeline) GetSessionPTS(identity, trackID string, rtpTimestamp } // Map RTP to NTP wall-clock time. - ntpTime, err := pt.estimator.RtpToNtp(rtpTimestamp) + ntpTime, err := est.RtpToNtp(rtpTimestamp) if err != nil { return 0, err } diff --git a/pkg/synchronizer/syncengine.go b/pkg/synchronizer/syncengine.go index 8b358c57..a38f7f7b 100644 --- a/pkg/synchronizer/syncengine.go +++ b/pkg/synchronizer/syncengine.go @@ -15,15 +15,11 @@ package synchronizer import ( - "io" "sync" "sync/atomic" "time" "github.com/pion/rtcp" - "github.com/pion/webrtc/v4" - - "github.com/livekit/media-sdk/jitter" "github.com/livekit/protocol/logger" "github.com/livekit/protocol/utils/rtputil" @@ -51,6 +47,12 @@ const ( // defaultOldPacketThreshold is the default age after which packets are dropped. defaultOldPacketThreshold = 500 * time.Millisecond + + // slewRatePerSecond is the maximum rate at which PTS corrections are absorbed. + slewRatePerSecond = 5 * time.Millisecond + + // deadbandThreshold is the minimum |correction| before slew smoothing kicks in. + deadbandThreshold = 5 * time.Millisecond ) // SyncEngineOption configures a SyncEngine. @@ -104,7 +106,7 @@ func WithSyncEngineAudioDriftCompensated() SyncEngineOption { } } -// SyncEngine orchestrates NtpEstimator, ParticipantSync, and SessionTimeline +// SyncEngine orchestrates NtpEstimator, ParticipantClock, and SessionTimeline // to provide cross-participant alignment and per-participant A/V lip sync. // It implements the Sync interface. type SyncEngine struct { @@ -155,13 +157,9 @@ func (e *SyncEngine) AddTrack(track TrackRemote, identity string) TrackSync { // Ensure the participant exists in the timeline. pc := e.timeline.GetOrAddParticipant(identity) - // Auto-register the track with ParticipantSync using a placeholder estimator. - mt := MediaTypeAudio - if track.Kind() == webrtc.RTPCodecTypeVideo { - mt = MediaTypeVideo - } - placeholder := NewNtpEstimator(clockRate, e.logger) - pc.participantSync.SetTrackEstimator(track.ID(), mt, placeholder) + // Auto-register the track with a placeholder estimator. + placeholder := NewNtpEstimator(clockRate) + pc.SetTrackEstimator(track.ID(), placeholder) st := &syncEngineTrack{ engine: e, @@ -202,12 +200,22 @@ func (e *SyncEngine) RemoveTrack(trackID string) { delete(e.trackIDs, trackID) e.mu.Unlock() + // Clean up track from participant, and remove the participant from the + // timeline if this was their last track. + identity := st.identity + if pc := e.timeline.GetParticipantClock(identity); pc != nil { + pc.RemoveTrack(trackID) + } + if !e.hasTracksForParticipant(identity) { + e.timeline.RemoveParticipant(identity) + } + st.logger.Infow("track removed", "lastPTS", st.lastPTSAdjusted) st.Close() } // OnRTCP processes an RTCP packet, dispatching sender reports to the appropriate -// track's NTP estimator and ParticipantSync. +// track's NTP estimator and ParticipantClock. func (e *SyncEngine) OnRTCP(packet rtcp.Packet) { sr, ok := packet.(*rtcp.SenderReport) if !ok { @@ -230,15 +238,10 @@ func (e *SyncEngine) OnRTCP(packet rtcp.Packet) { // Feed the SR to the session timeline (updates NTP estimator + OWD). e.timeline.OnSenderReport(identity, trackID, clockRate, sr.NTPTime, sr.RTPTime, now) - // Wire up ParticipantSync: get the track's estimator from timeline and update it. + // Update the participant's track estimator from the timeline. if estimator := e.timeline.GetTrackEstimator(identity, trackID); estimator != nil { - mt := MediaTypeAudio - if st.track.Kind() == webrtc.RTPCodecTypeVideo { - mt = MediaTypeVideo - } - if ps := e.timeline.GetParticipantSync(identity); ps != nil { - ps.SetTrackEstimator(trackID, mt, estimator) - ps.OnSenderReport(trackID) + if pc := e.timeline.GetParticipantClock(identity); pc != nil { + pc.SetTrackEstimator(trackID, estimator) } } @@ -341,341 +344,22 @@ func (e *SyncEngine) initializeIfNeeded(receivedAt time.Time) int64 { return e.startedAt.Load() } -func (e *SyncEngine) getTrackLogger(track TrackRemote) logger.Logger { - if e.logger != nil { - return e.logger.WithValues("trackID", track.ID(), "kind", track.Kind().String()) - } - return logger.GetLogger().WithValues("trackID", track.ID(), "kind", track.Kind().String(), "syncEngine", true) -} - -// --- syncEngineTrack --- - -// syncEngineTrack implements TrackSync for a single track within a SyncEngine. -type syncEngineTrack struct { - engine *SyncEngine - track TrackRemote - identity string - logger logger.Logger - converter *rtputil.RTPConverter - startGate startGate // from start_gate.go, nil if not enabled - - mu sync.Mutex - startTime time.Time - sessionOffset time.Duration // offset from session start to this track's start - lastTS uint32 - lastPTS time.Duration - lastPTSAdjusted time.Duration - initialized bool - closed bool - - // NTP transition and smoothing - ntpTransitioned bool - transitionSlew time.Duration - lastSlewPTS time.Duration // PTS at which slew was last updated - lastNtpPTS time.Duration // last raw NTP PTS (before corrections), for jump detection - ntpCorrection time.Duration // smoothing correction for SR-induced NTP jumps - - // pipeline time feedback - lastTimelyPacket time.Time - - // drain - maxPTS time.Duration - maxPTSSet bool - - onSR func(drift time.Duration) -} - -// PrimeForStart implements TrackSync. It buffers packets through the optional -// start gate and initializes the track on the first valid packet. -func (st *syncEngineTrack) PrimeForStart(pkt jitter.ExtPacket) ([]jitter.ExtPacket, int, bool) { - st.mu.Lock() - defer st.mu.Unlock() - - if st.initialized || st.startGate == nil { - if !st.initialized { - st.initializeLocked(pkt) - } - return []jitter.ExtPacket{pkt}, 0, true - } - - ready, dropped, done := st.startGate.Push(pkt) - if !done { - return nil, dropped, false - } - - if len(ready) == 0 { - ready = []jitter.ExtPacket{pkt} - } - - if !st.initialized { - st.initializeLocked(ready[0]) - } - - return ready, dropped, true -} - -// initializeLocked sets the track's start time and registers with the engine. -// Caller must hold st.mu. -func (st *syncEngineTrack) initializeLocked(pkt jitter.ExtPacket) { - receivedAt := pkt.ReceivedAt - if receivedAt.IsZero() { - receivedAt = time.Now() - } - - st.startTime = receivedAt - st.lastTS = pkt.Timestamp - st.lastTimelyPacket = receivedAt - st.initialized = true - - // Initialize the engine's session start time. - sessionStart := st.engine.initializeIfNeeded(receivedAt) - st.sessionOffset = time.Duration(receivedAt.UnixNano() - sessionStart) - - st.logger.Infow("initialized track", - "startTime", st.startTime, - "sessionOffset", st.sessionOffset, - "rtpTS", pkt.Timestamp, - ) -} - -// GetPTS implements TrackSync. It computes the presentation timestamp for a packet -// using the NTP-grounded timeline when available, falling back to wall clock otherwise. -func (st *syncEngineTrack) GetPTS(pkt jitter.ExtPacket) (time.Duration, error) { - st.mu.Lock() - defer st.mu.Unlock() - - if st.closed { - return 0, io.EOF - } - - if !st.initialized { - st.initializeLocked(pkt) - } - - ts := pkt.Timestamp - - // Same RTP timestamp as last packet: return same PTS (same frame). - if ts == st.lastTS && st.lastPTSAdjusted > 0 { - return st.lastPTSAdjusted, nil - } - - // Drop packets older than threshold. - if st.engine.oldPacketThreshold > 0 && !pkt.ReceivedAt.IsZero() { - if time.Since(pkt.ReceivedAt) > st.engine.oldPacketThreshold { - return 0, ErrPacketTooOld - } - } - - // Step 1: Try NTP-grounded PTS from SessionTimeline. - rawNtpPTS, ntpErr := st.engine.timeline.GetSessionPTS(st.identity, st.track.ID(), ts) - - wallPTS := st.wallClockPTS(pkt) - - // Audio tracks with external drift compensation (e.g., tempo controller) skip - // NTP PTS corrections — drift is handled by resampling, not PTS adjustment. - // NTP regression still runs (via OnSenderReport) for drift measurement. - useWallClockOnly := st.engine.audioDriftCompensated && st.track.Kind() == webrtc.RTPCodecTypeAudio - - // Step 2: Detect discontinuities and NTP regression jumps on RAW NTP PTS. - // This operates before any corrections to avoid feedback loops. - rtpDelta := ts - st.lastTS - rtpDeltaDuration := st.converter.ToDuration(rtpDelta) - - if st.lastTS != 0 && rtpDeltaDuration >= 30*time.Second { - // Discontinuity: stream restart, SSRC reuse with new RTP offset, or massive gap. - st.engine.timeline.ResetTrack(st.identity, st.track.ID()) - st.lastNtpPTS = 0 - st.ntpCorrection = 0 - st.ntpTransitioned = false - st.transitionSlew = 0 - st.lastSlewPTS = 0 - st.logger.Warnw("stream discontinuity detected, resetting NTP state", nil, - "rtpDelta", rtpDelta, - "rtpDeltaDuration", rtpDeltaDuration, - ) - } else if !useWallClockOnly && ntpErr == nil && st.lastNtpPTS > 0 && rtpDelta > 0 { - // Detect regression jumps: compare raw NTP PTS against expected. - expectedRawNtpPTS := st.lastNtpPTS + rtpDeltaDuration - jump := rawNtpPTS - expectedRawNtpPTS - if jump > deadbandThreshold || jump < -deadbandThreshold { - st.ntpCorrection -= jump - st.logger.Debugw("NTP regression jump detected", - "jump", jump, - "ntpCorrection", st.ntpCorrection, - ) - } - } - if ntpErr == nil { - st.lastNtpPTS = rawNtpPTS // Always track raw NTP PTS, never corrected - } - - // Step 3: Compute final PTS with corrections. - var pts time.Duration - if ntpErr != nil || useWallClockOnly { - pts = wallPTS - } else { - // Apply NTP jump correction. - pts = rawNtpPTS + st.ntpCorrection - - // Clamp corrected PTS to within trust threshold of wall clock. - diff := pts - wallPTS - if diff > ntpTrustThreshold || diff < -ntpTrustThreshold { - st.logger.Warnw("NTP PTS exceeds trust threshold, clamping to wall clock", nil, - "rawNtpPTS", rawNtpPTS, - "ntpCorrection", st.ntpCorrection, - "wallPTS", wallPTS, - "diff", diff, - ) - pts = wallPTS - } - - // On first successful NTP PTS, compute transition correction. - if !st.ntpTransitioned { - st.transitionSlew = wallPTS - pts - st.ntpTransitioned = true - st.logger.Infow("NTP transition", - "wallPTS", wallPTS, - "ntpPTS", rawNtpPTS, - "transitionSlew", st.transitionSlew, - ) - } - } - - // Compute PTS delta for slew rate calculations. - var slewPTSDelta time.Duration - if st.lastSlewPTS > 0 { - slewPTSDelta = pts - st.lastSlewPTS - } - - // Step 4: Apply transition slew (absorb gradually toward zero). - if st.transitionSlew != 0 { - pts += st.transitionSlew - - if slewPTSDelta > 0 { - maxStep := time.Duration(float64(transitionSlewRatePerSecond) * slewPTSDelta.Seconds()) - if st.transitionSlew > 0 { - st.transitionSlew -= maxStep - if st.transitionSlew < 0 { - st.transitionSlew = 0 - } - } else { - st.transitionSlew += maxStep - if st.transitionSlew > 0 { - st.transitionSlew = 0 - } - } - } - } - - // Decay ntpCorrection toward zero via slew. - if st.ntpCorrection != 0 { - if slewPTSDelta > 0 { - maxStep := time.Duration(float64(slewRatePerSecond) * slewPTSDelta.Seconds()) - if st.ntpCorrection > 0 { - st.ntpCorrection -= maxStep - if st.ntpCorrection < 0 { - st.ntpCorrection = 0 - } - } else { - st.ntpCorrection += maxStep - if st.ntpCorrection > 0 { - st.ntpCorrection = 0 - } - } - } - } - - st.lastSlewPTS = pts - - // Step 6: Pipeline time feedback — if the track has fallen behind the - // pipeline's deadline for too long, force-correct PTS forward. - if deadline, ok := st.engine.getMediaDeadline(); ok && st.engine.maxMediaRunningTimeDelay > 0 { - limit := deadline - st.engine.maxMediaRunningTimeDelay - if pts < limit { - if time.Since(st.lastTimelyPacket) > maxTimelyPacketAge { - oldPTS := pts - pts = deadline - st.engine.maxMediaRunningTimeDelay/2 - st.logger.Warnw("force-correcting PTS forward, track behind pipeline deadline", nil, - "oldPTS", oldPTS, - "newPTS", pts, - "deadline", deadline, - "behindBy", limit-oldPTS, - ) - } - } else { - st.lastTimelyPacket = time.Now() +// hasTracksForParticipant returns true if any remaining track belongs to the +// given participant identity. Caller must NOT hold e.mu. +func (e *SyncEngine) hasTracksForParticipant(identity string) bool { + e.mu.Lock() + defer e.mu.Unlock() + for _, st := range e.tracks { + if st.identity == identity { + return true } } - - // Step 7: Enforce monotonicity. - if pts < st.lastPTSAdjusted+time.Millisecond && st.lastPTSAdjusted > 0 { - pts = st.lastPTSAdjusted + time.Millisecond - } - - // Step 7: Enforce drain ceiling. - if st.maxPTSSet && pts > st.maxPTS { - return 0, io.EOF - } - - // Update state. - st.lastTS = ts - st.lastPTS = pts // the raw PTS before adjustment (for wall clock computation) - st.lastPTSAdjusted = pts - - return pts, nil + return false } -// wallClockPTS computes a PTS based on wall-clock timing and RTP deltas. -func (st *syncEngineTrack) wallClockPTS(pkt jitter.ExtPacket) time.Duration { - ts := pkt.Timestamp - - // Same RTP timestamp as last packet: same frame. - if st.lastTS == ts && st.lastPTS > 0 { - return st.lastPTS - } - - // Wall-clock elapsed since this track started, plus session offset - wallElapsed := pkt.ReceivedAt.Sub(st.startTime) + st.sessionOffset - - // If we have a previous timestamp, use RTP delta for more precision. - if st.lastPTS > 0 { - rtpDelta := ts - st.lastTS - rtpDerived := st.lastPTS + st.converter.ToDuration(rtpDelta) - - // Sanity check: if RTP-derived PTS diverges from wall-clock by > 5s, use wall clock. - diff := rtpDerived - wallElapsed - if diff < 0 { - diff = -diff - } - if diff <= wallClockSanityThreshold { - return rtpDerived - } - } - - // Use wall-clock elapsed, ensuring non-negative. - if wallElapsed < 0 { - wallElapsed = 0 +func (e *SyncEngine) getTrackLogger(track TrackRemote) logger.Logger { + if e.logger != nil { + return e.logger.WithValues("trackID", track.ID(), "kind", track.Kind().String()) } - return wallElapsed -} - -// OnSenderReport implements TrackSync. It stores a callback invoked on sender reports. -func (st *syncEngineTrack) OnSenderReport(f func(drift time.Duration)) { - st.mu.Lock() - defer st.mu.Unlock() - st.onSR = f -} - -// LastPTSAdjusted implements TrackSync. -func (st *syncEngineTrack) LastPTSAdjusted() time.Duration { - st.mu.Lock() - defer st.mu.Unlock() - return st.lastPTSAdjusted -} - -// Close implements TrackSync. -func (st *syncEngineTrack) Close() { - st.mu.Lock() - defer st.mu.Unlock() - st.closed = true + return logger.GetLogger().WithValues("trackID", track.ID(), "kind", track.Kind().String(), "syncEngine", true) } diff --git a/pkg/synchronizer/syncenginetrack.go b/pkg/synchronizer/syncenginetrack.go new file mode 100644 index 00000000..a351bd88 --- /dev/null +++ b/pkg/synchronizer/syncenginetrack.go @@ -0,0 +1,358 @@ +// Copyright 2026 LiveKit, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package synchronizer + +import ( + "io" + "sync" + "time" + + "github.com/pion/webrtc/v4" + + "github.com/livekit/media-sdk/jitter" + + "github.com/livekit/protocol/logger" + "github.com/livekit/protocol/utils/rtputil" +) + +// syncEngineTrack implements TrackSync for a single track within a SyncEngine. +type syncEngineTrack struct { + engine *SyncEngine + track TrackRemote + identity string + logger logger.Logger + converter *rtputil.RTPConverter + startGate startGate // from start_gate.go, nil if not enabled + + mu sync.Mutex + startTime time.Time + sessionOffset time.Duration // offset from session start to this track's start + lastTS uint32 + lastPTS time.Duration + lastPTSAdjusted time.Duration + initialized bool + closed bool + + // NTP transition and smoothing + ntpTransitioned bool + transitionSlew time.Duration + lastSlewPTS time.Duration // PTS at which slew was last updated + lastNtpPTS time.Duration // last raw NTP PTS (before corrections), for jump detection + ntpCorrection time.Duration // smoothing correction for SR-induced NTP jumps + + // pipeline time feedback + lastTimelyPacket time.Time + + // drain + maxPTS time.Duration + maxPTSSet bool + + onSR func(drift time.Duration) +} + +// PrimeForStart implements TrackSync. It buffers packets through the optional +// start gate and initializes the track on the first valid packet. +func (st *syncEngineTrack) PrimeForStart(pkt jitter.ExtPacket) ([]jitter.ExtPacket, int, bool) { + st.mu.Lock() + defer st.mu.Unlock() + + if st.initialized || st.startGate == nil { + if !st.initialized { + st.initializeLocked(pkt) + } + return []jitter.ExtPacket{pkt}, 0, true + } + + ready, dropped, done := st.startGate.Push(pkt) + if !done { + return nil, dropped, false + } + + if len(ready) == 0 { + ready = []jitter.ExtPacket{pkt} + } + + if !st.initialized { + st.initializeLocked(ready[0]) + } + + return ready, dropped, true +} + +// initializeLocked sets the track's start time and registers with the engine. +// Caller must hold st.mu. +func (st *syncEngineTrack) initializeLocked(pkt jitter.ExtPacket) { + receivedAt := pkt.ReceivedAt + if receivedAt.IsZero() { + receivedAt = time.Now() + } + + st.startTime = receivedAt + st.lastTS = pkt.Timestamp + st.lastTimelyPacket = receivedAt + st.initialized = true + + // Initialize the engine's session start time. + sessionStart := st.engine.initializeIfNeeded(receivedAt) + st.sessionOffset = time.Duration(receivedAt.UnixNano() - sessionStart) + + st.logger.Infow("initialized track", + "startTime", st.startTime, + "sessionOffset", st.sessionOffset, + "rtpTS", pkt.Timestamp, + ) +} + +// GetPTS implements TrackSync. It computes the presentation timestamp for a packet +// using the NTP-grounded timeline when available, falling back to wall clock otherwise. +func (st *syncEngineTrack) GetPTS(pkt jitter.ExtPacket) (time.Duration, error) { + st.mu.Lock() + defer st.mu.Unlock() + + if st.closed { + return 0, io.EOF + } + + if !st.initialized { + st.initializeLocked(pkt) + } + + ts := pkt.Timestamp + + // Same RTP timestamp as last packet: return same PTS (same frame). + if ts == st.lastTS && st.lastPTSAdjusted > 0 { + return st.lastPTSAdjusted, nil + } + + // Drop packets older than threshold. + if st.engine.oldPacketThreshold > 0 && !pkt.ReceivedAt.IsZero() { + if time.Since(pkt.ReceivedAt) > st.engine.oldPacketThreshold { + return 0, ErrPacketTooOld + } + } + + // Step 1: Try NTP-grounded PTS from SessionTimeline. + rawNtpPTS, ntpErr := st.engine.timeline.GetSessionPTS(st.identity, st.track.ID(), ts) + + wallPTS := st.wallClockPTS(pkt) + + // Audio tracks with external drift compensation (e.g., tempo controller) skip + // NTP PTS corrections — drift is handled by resampling, not PTS adjustment. + // NTP regression still runs (via OnSenderReport) for drift measurement. + useWallClockOnly := st.engine.audioDriftCompensated && st.track.Kind() == webrtc.RTPCodecTypeAudio + + // Step 2: Detect discontinuities and NTP regression jumps on RAW NTP PTS. + // This operates before any corrections to avoid feedback loops. + rtpDelta := ts - st.lastTS + rtpDeltaDuration := st.converter.ToDuration(rtpDelta) + + if st.lastTS != 0 && rtpDeltaDuration >= 30*time.Second { + // Discontinuity: stream restart, SSRC reuse with new RTP offset, or massive gap. + st.engine.timeline.ResetTrack(st.identity, st.track.ID()) + st.lastNtpPTS = 0 + st.ntpCorrection = 0 + st.ntpTransitioned = false + st.transitionSlew = 0 + st.lastSlewPTS = 0 + st.logger.Warnw("stream discontinuity detected, resetting NTP state", nil, + "rtpDelta", rtpDelta, + "rtpDeltaDuration", rtpDeltaDuration, + ) + } else if !useWallClockOnly && ntpErr == nil && st.lastNtpPTS > 0 && rtpDelta > 0 { + // Detect regression jumps: compare raw NTP PTS against expected. + expectedRawNtpPTS := st.lastNtpPTS + rtpDeltaDuration + jump := rawNtpPTS - expectedRawNtpPTS + if jump > deadbandThreshold || jump < -deadbandThreshold { + st.ntpCorrection -= jump + st.logger.Debugw("NTP regression jump detected", + "jump", jump, + "ntpCorrection", st.ntpCorrection, + ) + } + } + if ntpErr == nil { + st.lastNtpPTS = rawNtpPTS // Always track raw NTP PTS, never corrected + } + + // Step 3: Compute final PTS with corrections. + var pts time.Duration + if ntpErr != nil || useWallClockOnly { + pts = wallPTS + } else { + // Apply NTP jump correction. + pts = rawNtpPTS + st.ntpCorrection + + // Clamp corrected PTS to within trust threshold of wall clock. + diff := pts - wallPTS + if diff > ntpTrustThreshold || diff < -ntpTrustThreshold { + st.logger.Warnw("NTP PTS exceeds trust threshold, clamping to wall clock", nil, + "rawNtpPTS", rawNtpPTS, + "ntpCorrection", st.ntpCorrection, + "wallPTS", wallPTS, + "diff", diff, + ) + pts = wallPTS + } + + // On first successful NTP PTS, compute transition correction. + if !st.ntpTransitioned { + st.transitionSlew = wallPTS - pts + st.ntpTransitioned = true + st.logger.Infow("NTP transition", + "wallPTS", wallPTS, + "ntpPTS", rawNtpPTS, + "transitionSlew", st.transitionSlew, + ) + } + } + + // Compute PTS delta for slew rate calculations. + var slewPTSDelta time.Duration + if st.lastSlewPTS > 0 { + slewPTSDelta = pts - st.lastSlewPTS + } + + // Step 4: Apply transition slew (absorb gradually toward zero). + if st.transitionSlew != 0 { + pts += st.transitionSlew + + if slewPTSDelta > 0 { + maxStep := time.Duration(float64(transitionSlewRatePerSecond) * slewPTSDelta.Seconds()) + if st.transitionSlew > 0 { + st.transitionSlew -= maxStep + if st.transitionSlew < 0 { + st.transitionSlew = 0 + } + } else { + st.transitionSlew += maxStep + if st.transitionSlew > 0 { + st.transitionSlew = 0 + } + } + } + } + + // Decay ntpCorrection toward zero via slew. + if st.ntpCorrection != 0 { + if slewPTSDelta > 0 { + maxStep := time.Duration(float64(slewRatePerSecond) * slewPTSDelta.Seconds()) + if st.ntpCorrection > 0 { + st.ntpCorrection -= maxStep + if st.ntpCorrection < 0 { + st.ntpCorrection = 0 + } + } else { + st.ntpCorrection += maxStep + if st.ntpCorrection > 0 { + st.ntpCorrection = 0 + } + } + } + } + + st.lastSlewPTS = pts + + // Step 6: Pipeline time feedback — if the track has fallen behind the + // pipeline's deadline for too long, force-correct PTS forward. + if deadline, ok := st.engine.getMediaDeadline(); ok && st.engine.maxMediaRunningTimeDelay > 0 { + limit := deadline - st.engine.maxMediaRunningTimeDelay + if pts < limit { + if time.Since(st.lastTimelyPacket) > maxTimelyPacketAge { + oldPTS := pts + pts = deadline - st.engine.maxMediaRunningTimeDelay/2 + st.logger.Warnw("force-correcting PTS forward, track behind pipeline deadline", nil, + "oldPTS", oldPTS, + "newPTS", pts, + "deadline", deadline, + "behindBy", limit-oldPTS, + ) + } + } else { + st.lastTimelyPacket = time.Now() + } + } + + // Step 7: Enforce monotonicity. + if pts < st.lastPTSAdjusted+time.Millisecond && st.lastPTSAdjusted > 0 { + pts = st.lastPTSAdjusted + time.Millisecond + } + + // Step 7: Enforce drain ceiling. + if st.maxPTSSet && pts > st.maxPTS { + return 0, io.EOF + } + + // Update state. + st.lastTS = ts + st.lastPTS = pts // the raw PTS before adjustment (for wall clock computation) + st.lastPTSAdjusted = pts + + return pts, nil +} + +// wallClockPTS computes a PTS based on wall-clock timing and RTP deltas. +func (st *syncEngineTrack) wallClockPTS(pkt jitter.ExtPacket) time.Duration { + ts := pkt.Timestamp + + // Same RTP timestamp as last packet: same frame. + if st.lastTS == ts && st.lastPTS > 0 { + return st.lastPTS + } + + // Wall-clock elapsed since this track started, plus session offset + wallElapsed := pkt.ReceivedAt.Sub(st.startTime) + st.sessionOffset + + // If we have a previous timestamp, use RTP delta for more precision. + if st.lastPTS > 0 { + rtpDelta := ts - st.lastTS + rtpDerived := st.lastPTS + st.converter.ToDuration(rtpDelta) + + // Sanity check: if RTP-derived PTS diverges from wall-clock by > 5s, use wall clock. + diff := rtpDerived - wallElapsed + if diff < 0 { + diff = -diff + } + if diff <= wallClockSanityThreshold { + return rtpDerived + } + } + + // Use wall-clock elapsed, ensuring non-negative. + if wallElapsed < 0 { + wallElapsed = 0 + } + return wallElapsed +} + +// OnSenderReport implements TrackSync. It stores a callback invoked on sender reports. +func (st *syncEngineTrack) OnSenderReport(f func(drift time.Duration)) { + st.mu.Lock() + defer st.mu.Unlock() + st.onSR = f +} + +// LastPTSAdjusted implements TrackSync. +func (st *syncEngineTrack) LastPTSAdjusted() time.Duration { + st.mu.Lock() + defer st.mu.Unlock() + return st.lastPTSAdjusted +} + +// Close implements TrackSync. +func (st *syncEngineTrack) Close() { + st.mu.Lock() + defer st.mu.Unlock() + st.closed = true +} From 94bf64b147954f824150c59120b5c35082b69bbf Mon Sep 17 00:00:00 2001 From: David Colburn Date: Mon, 27 Apr 2026 15:01:25 -0400 Subject: [PATCH 16/17] identity -> participantID, check st.hasStart --- pkg/synchronizer/interfaces.go | 2 +- pkg/synchronizer/sessiontimeline.go | 57 ++++++++++++++++------------- pkg/synchronizer/syncengine.go | 30 +++++++-------- pkg/synchronizer/syncenginetrack.go | 6 +-- pkg/synchronizer/synchronizer.go | 10 ++--- 5 files changed, 56 insertions(+), 49 deletions(-) diff --git a/pkg/synchronizer/interfaces.go b/pkg/synchronizer/interfaces.go index ada94dbb..8dd30e9b 100644 --- a/pkg/synchronizer/interfaces.go +++ b/pkg/synchronizer/interfaces.go @@ -25,7 +25,7 @@ import ( // Sync is the top-level synchronization interface. // Implemented by both Synchronizer (legacy) and SyncEngine (new). type Sync interface { - AddTrack(track TrackRemote, identity string) TrackSync + AddTrack(track TrackRemote, participantID string) TrackSync RemoveTrack(trackID string) OnRTCP(packet rtcp.Packet) End() diff --git a/pkg/synchronizer/sessiontimeline.go b/pkg/synchronizer/sessiontimeline.go index 29d45fe5..7abc1573 100644 --- a/pkg/synchronizer/sessiontimeline.go +++ b/pkg/synchronizer/sessiontimeline.go @@ -24,7 +24,10 @@ import ( "github.com/livekit/protocol/logger" ) -var errNoSenderReports = errors.New("SessionTimeline: no sender reports received for track") +var ( + errNoSenderReports = errors.New("SessionTimeline: no sender reports received for track") + errNoSessionStart = errors.New("SessionTimeline: session start time not set") +) // SessionTimeline establishes a shared recording timeline and maps each // participant's NTP clock domain onto it using OWD (one-way delay) @@ -67,8 +70,8 @@ func (st *SessionTimeline) SetSessionStart(t time.Time) { st.hasStart = true } -// AddParticipant registers a new participant with the given identity. -func (st *SessionTimeline) AddParticipant(identity string) *ParticipantClock { +// AddParticipant registers a new participant with the given participantID. +func (st *SessionTimeline) AddParticipant(participantID string) *ParticipantClock { st.mu.Lock() defer st.mu.Unlock() @@ -76,17 +79,17 @@ func (st *SessionTimeline) AddParticipant(identity string) *ParticipantClock { owdEstimator: latency.NewOWDEstimator(latency.OWDEstimatorParamsDefault), tracks: make(map[string]*NtpEstimator), } - st.participants[identity] = pc + st.participants[participantID] = pc return pc } -// GetOrAddParticipant returns the ParticipantClock for the given identity, +// GetOrAddParticipant returns the ParticipantClock for the given participantID, // creating one if it doesn't exist. This is safe for concurrent use. -func (st *SessionTimeline) GetOrAddParticipant(identity string) *ParticipantClock { +func (st *SessionTimeline) GetOrAddParticipant(participantID string) *ParticipantClock { st.mu.Lock() defer st.mu.Unlock() - if pc, ok := st.participants[identity]; ok { + if pc, ok := st.participants[participantID]; ok { return pc } @@ -94,16 +97,16 @@ func (st *SessionTimeline) GetOrAddParticipant(identity string) *ParticipantCloc owdEstimator: latency.NewOWDEstimator(latency.OWDEstimatorParamsDefault), tracks: make(map[string]*NtpEstimator), } - st.participants[identity] = pc + st.participants[participantID] = pc return pc } // GetTrackEstimator returns the NTP estimator for a participant's track, or nil. -func (st *SessionTimeline) GetTrackEstimator(identity, trackID string) *NtpEstimator { +func (st *SessionTimeline) GetTrackEstimator(participantID, trackID string) *NtpEstimator { st.mu.RLock() defer st.mu.RUnlock() - pc, ok := st.participants[identity] + pc, ok := st.participants[participantID] if !ok { return nil } @@ -111,29 +114,27 @@ func (st *SessionTimeline) GetTrackEstimator(identity, trackID string) *NtpEstim } // GetParticipantClock returns the ParticipantClock for a participant, or nil. -func (st *SessionTimeline) GetParticipantClock(identity string) *ParticipantClock { +func (st *SessionTimeline) GetParticipantClock(participantID string) *ParticipantClock { st.mu.RLock() defer st.mu.RUnlock() - return st.participants[identity] + return st.participants[participantID] } -// RemoveParticipant removes the participant with the given identity. -func (st *SessionTimeline) RemoveParticipant(identity string) { +// RemoveParticipant removes the participant with the given participantID. +func (st *SessionTimeline) RemoveParticipant(participantID string) { st.mu.Lock() defer st.mu.Unlock() - delete(st.participants, identity) + delete(st.participants, participantID) } -// OnSenderReport processes an RTCP sender report for a participant's track. -// It updates the NTP estimator, OWD estimator, and records the NTP epoch. // ResetTrack clears the NTP estimator for a track, forcing it to rebuild from // new sender reports. Used when a stream discontinuity is detected. -func (st *SessionTimeline) ResetTrack(identity, trackID string) { +func (st *SessionTimeline) ResetTrack(participantID, trackID string) { st.mu.Lock() defer st.mu.Unlock() - pc, ok := st.participants[identity] + pc, ok := st.participants[participantID] if !ok { return } @@ -142,11 +143,13 @@ func (st *SessionTimeline) ResetTrack(identity, trackID string) { } } -func (st *SessionTimeline) OnSenderReport(identity, trackID string, clockRate uint32, ntpTime uint64, rtpTimestamp uint32, receivedAt time.Time) { +// OnSenderReport processes an RTCP sender report for a participant's track. +// It updates the NTP estimator, OWD estimator, and records the NTP epoch. +func (st *SessionTimeline) OnSenderReport(participantID, trackID string, clockRate uint32, ntpTime uint64, rtpTimestamp uint32, receivedAt time.Time) { st.mu.Lock() defer st.mu.Unlock() - pc, ok := st.participants[identity] + pc, ok := st.participants[participantID] if !ok { return } @@ -187,13 +190,17 @@ func (st *SessionTimeline) OnSenderReport(identity, trackID string, clockRate ui // - participantNtpEpoch = NTP time from first SR for this participant // - epochOnReceiverClock = participantNtpEpoch + estimatedOWD // - sessionStart = wall-clock time first packet arrived -func (st *SessionTimeline) GetSessionPTS(identity, trackID string, rtpTimestamp uint32) (time.Duration, error) { +func (st *SessionTimeline) GetSessionPTS(participantID, trackID string, rtpTimestamp uint32) (time.Duration, error) { st.mu.RLock() defer st.mu.RUnlock() - pc, ok := st.participants[identity] + if !st.hasStart { + return 0, errNoSessionStart + } + + pc, ok := st.participants[participantID] if !ok { - return 0, fmt.Errorf("SessionTimeline: unknown participant %q", identity) + return 0, fmt.Errorf("SessionTimeline: unknown participant %q", participantID) } est, ok := pc.tracks[trackID] @@ -228,7 +235,7 @@ func (st *SessionTimeline) GetSessionPTS(identity, trackID string, rtpTimestamp if (sessionPTS < 0 || sessionPTS > 24*time.Hour) && st.logger != nil { st.logger.Warnw("GetSessionPTS: abnormal result", nil, - "identity", identity, + "participantID", participantID, "trackID", trackID, "rtpTimestamp", rtpTimestamp, "ntpTime", ntpTime, diff --git a/pkg/synchronizer/syncengine.go b/pkg/synchronizer/syncengine.go index a38f7f7b..2e253dfe 100644 --- a/pkg/synchronizer/syncengine.go +++ b/pkg/synchronizer/syncengine.go @@ -147,7 +147,7 @@ func NewSyncEngine(opts ...SyncEngineOption) *SyncEngine { } // AddTrack registers a new track and returns a TrackSync handle. -func (e *SyncEngine) AddTrack(track TrackRemote, identity string) TrackSync { +func (e *SyncEngine) AddTrack(track TrackRemote, participantID string) TrackSync { ssrc := uint32(track.SSRC()) clockRate := track.Codec().ClockRate @@ -155,7 +155,7 @@ func (e *SyncEngine) AddTrack(track TrackRemote, identity string) TrackSync { defer e.mu.Unlock() // Ensure the participant exists in the timeline. - pc := e.timeline.GetOrAddParticipant(identity) + pc := e.timeline.GetOrAddParticipant(participantID) // Auto-register the track with a placeholder estimator. placeholder := NewNtpEstimator(clockRate) @@ -164,7 +164,7 @@ func (e *SyncEngine) AddTrack(track TrackRemote, identity string) TrackSync { st := &syncEngineTrack{ engine: e, track: track, - identity: identity, + participantID: participantID, logger: e.getTrackLogger(track), converter: rtputil.NewRTPConverter(int64(clockRate)), } @@ -202,12 +202,12 @@ func (e *SyncEngine) RemoveTrack(trackID string) { // Clean up track from participant, and remove the participant from the // timeline if this was their last track. - identity := st.identity - if pc := e.timeline.GetParticipantClock(identity); pc != nil { + participantID := st.participantID + if pc := e.timeline.GetParticipantClock(participantID); pc != nil { pc.RemoveTrack(trackID) } - if !e.hasTracksForParticipant(identity) { - e.timeline.RemoveParticipant(identity) + if !e.hasTracksForParticipant(participantID) { + e.timeline.RemoveParticipant(participantID) } st.logger.Infow("track removed", "lastPTS", st.lastPTSAdjusted) @@ -228,7 +228,7 @@ func (e *SyncEngine) OnRTCP(packet rtcp.Packet) { e.mu.Unlock() return } - identity := st.identity + participantID := st.participantID trackID := st.track.ID() clockRate := st.track.Codec().ClockRate e.mu.Unlock() @@ -236,11 +236,11 @@ func (e *SyncEngine) OnRTCP(packet rtcp.Packet) { now := time.Now() // Feed the SR to the session timeline (updates NTP estimator + OWD). - e.timeline.OnSenderReport(identity, trackID, clockRate, sr.NTPTime, sr.RTPTime, now) + e.timeline.OnSenderReport(participantID, trackID, clockRate, sr.NTPTime, sr.RTPTime, now) // Update the participant's track estimator from the timeline. - if estimator := e.timeline.GetTrackEstimator(identity, trackID); estimator != nil { - if pc := e.timeline.GetParticipantClock(identity); pc != nil { + if estimator := e.timeline.GetTrackEstimator(participantID, trackID); estimator != nil { + if pc := e.timeline.GetParticipantClock(participantID); pc != nil { pc.SetTrackEstimator(trackID, estimator) } } @@ -256,7 +256,7 @@ func (e *SyncEngine) OnRTCP(packet rtcp.Packet) { // if the sender's NTP clock adjusts during the recording). startedAt := e.startedAt.Load() if startedAt > 0 { - sessionPTS, err := e.timeline.GetSessionPTS(identity, trackID, sr.RTPTime) + sessionPTS, err := e.timeline.GetSessionPTS(participantID, trackID, sr.RTPTime) if err == nil { sessionStart := time.Unix(0, startedAt) expectedElapsed := now.Sub(sessionStart) @@ -345,12 +345,12 @@ func (e *SyncEngine) initializeIfNeeded(receivedAt time.Time) int64 { } // hasTracksForParticipant returns true if any remaining track belongs to the -// given participant identity. Caller must NOT hold e.mu. -func (e *SyncEngine) hasTracksForParticipant(identity string) bool { +// given participant participantID. Caller must NOT hold e.mu. +func (e *SyncEngine) hasTracksForParticipant(participantID string) bool { e.mu.Lock() defer e.mu.Unlock() for _, st := range e.tracks { - if st.identity == identity { + if st.participantID == participantID { return true } } diff --git a/pkg/synchronizer/syncenginetrack.go b/pkg/synchronizer/syncenginetrack.go index a351bd88..01c62788 100644 --- a/pkg/synchronizer/syncenginetrack.go +++ b/pkg/synchronizer/syncenginetrack.go @@ -31,7 +31,7 @@ import ( type syncEngineTrack struct { engine *SyncEngine track TrackRemote - identity string + participantID string logger logger.Logger converter *rtputil.RTPConverter startGate startGate // from start_gate.go, nil if not enabled @@ -144,7 +144,7 @@ func (st *syncEngineTrack) GetPTS(pkt jitter.ExtPacket) (time.Duration, error) { } // Step 1: Try NTP-grounded PTS from SessionTimeline. - rawNtpPTS, ntpErr := st.engine.timeline.GetSessionPTS(st.identity, st.track.ID(), ts) + rawNtpPTS, ntpErr := st.engine.timeline.GetSessionPTS(st.participantID, st.track.ID(), ts) wallPTS := st.wallClockPTS(pkt) @@ -160,7 +160,7 @@ func (st *syncEngineTrack) GetPTS(pkt jitter.ExtPacket) (time.Duration, error) { if st.lastTS != 0 && rtpDeltaDuration >= 30*time.Second { // Discontinuity: stream restart, SSRC reuse with new RTP offset, or massive gap. - st.engine.timeline.ResetTrack(st.identity, st.track.ID()) + st.engine.timeline.ResetTrack(st.participantID, st.track.ID()) st.lastNtpPTS = 0 st.ntpCorrection = 0 st.ntpTransitioned = false diff --git a/pkg/synchronizer/synchronizer.go b/pkg/synchronizer/synchronizer.go index 3661391a..673f72c1 100644 --- a/pkg/synchronizer/synchronizer.go +++ b/pkg/synchronizer/synchronizer.go @@ -275,14 +275,14 @@ func NewSynchronizerWithOptions(opts ...SynchronizerOption) *Synchronizer { } } -func (s *Synchronizer) AddTrack(track TrackRemote, identity string) *TrackSynchronizer { +func (s *Synchronizer) AddTrack(track TrackRemote, participantID string) *TrackSynchronizer { t := newTrackSynchronizer(s, track) s.Lock() - p := s.psByIdentity[identity] + p := s.psByIdentity[participantID] if p == nil { p = newParticipantSynchronizer() - s.psByIdentity[identity] = p + s.psByIdentity[participantID] = p } ssrc := uint32(track.SSRC()) s.ssrcByID[track.ID()] = ssrc @@ -393,8 +393,8 @@ type SynchronizerAdapter struct { *Synchronizer } -func (a *SynchronizerAdapter) AddTrack(track TrackRemote, identity string) TrackSync { - return a.Synchronizer.AddTrack(track, identity) +func (a *SynchronizerAdapter) AddTrack(track TrackRemote, participantID string) TrackSync { + return a.Synchronizer.AddTrack(track, participantID) } // AsSyncInterface returns a Sync-compatible wrapper around this Synchronizer. From 56923d368aeeadf81432f1a32c5ab3b94c23d576 Mon Sep 17 00:00:00 2001 From: David Colburn Date: Mon, 27 Apr 2026 15:17:54 -0400 Subject: [PATCH 17/17] more cleaning --- pkg/synchronizer/integration_test.go | 23 ---- pkg/synchronizer/ntpestimator.go | 18 ++- pkg/synchronizer/participantclock.go | 93 +++++++++++++++- pkg/synchronizer/participantclock_test.go | 33 +++--- pkg/synchronizer/sessiontimeline.go | 127 +++++----------------- pkg/synchronizer/syncengine.go | 38 +------ pkg/synchronizer/syncenginetrack.go | 27 +++++ 7 files changed, 170 insertions(+), 189 deletions(-) diff --git a/pkg/synchronizer/integration_test.go b/pkg/synchronizer/integration_test.go index edeb0eed..05fe5bb8 100644 --- a/pkg/synchronizer/integration_test.go +++ b/pkg/synchronizer/integration_test.go @@ -80,24 +80,11 @@ func TestIntegration_CrossParticipantClock(t *testing.T) { // Manually set receivedAt by calling OnSenderReport on the timeline directly // since OnRTCP uses time.Now(). We need deterministic timing. engine.timeline.OnSenderReport("alice", "audio-alice", clockRate, aliceNTP, rtpTS, receivedAt) - - // Wire up ParticipantClock with the track's estimator. - if est := engine.timeline.GetTrackEstimator("alice", "audio-alice"); est != nil { - if ps := engine.timeline.GetParticipantClock("alice"); ps != nil { - ps.SetTrackEstimator("audio-alice", est) - } - } _ = aliceSR // used above indirectly // Bob SR: NTP = realTime + 500ms (Bob's NTP clock is 500ms ahead) bobNTP := ntpToUint64(realTime.Add(bobNTPOffset)) engine.timeline.OnSenderReport("bob", "audio-bob", clockRate, bobNTP, rtpTS, receivedAt) - - if est := engine.timeline.GetTrackEstimator("bob", "audio-bob"); est != nil { - if ps := engine.timeline.GetParticipantClock("bob"); ps != nil { - ps.SetTrackEstimator("audio-bob", est) - } - } } // Get PTS for both participants at "real time + 10s" with corresponding @@ -183,16 +170,6 @@ func TestIntegration_AVLipSync(t *testing.T) { videoRTP := uint32(i) * 5 * videoClockRate videoNTP := ntpToUint64(srTime.Add(videoEncoderDelay)) engine.timeline.OnSenderReport("alice", "video-alice", videoClockRate, videoNTP, videoRTP, receivedAt) - - // Wire up ParticipantClock with latest estimators. - if ps := engine.timeline.GetParticipantClock("alice"); ps != nil { - if est := engine.timeline.GetTrackEstimator("alice", "audio-alice"); est != nil { - ps.SetTrackEstimator("audio-alice", est) - } - if est := engine.timeline.GetTrackEstimator("alice", "video-alice"); est != nil { - ps.SetTrackEstimator("video-alice", est) - } - } } // Push multiple packets through GetPTS to drive the transition slew diff --git a/pkg/synchronizer/ntpestimator.go b/pkg/synchronizer/ntpestimator.go index c17104fe..c1e80da6 100644 --- a/pkg/synchronizer/ntpestimator.go +++ b/pkg/synchronizer/ntpestimator.go @@ -100,13 +100,23 @@ func (e *NtpEstimator) Reset() { e.ready = false } +// SRResult indicates the outcome of processing a sender report. +type SRResult int + +const ( + SRAccepted SRResult = iota + SRDuplicate + SROutlier +) + // OnSenderReport ingests a new RTCP sender report observation. // ntpTime is the 64-bit NTP timestamp from the SR, rtpTimestamp is the // corresponding RTP timestamp, and receivedAt is the local wall-clock time // when the SR was received. -func (e *NtpEstimator) OnSenderReport(ntpTime uint64, rtpTimestamp uint32, receivedAt time.Time) { +func (e *NtpEstimator) OnSenderReport(ntpTime uint64, rtpTimestamp uint32, receivedAt time.Time) SRResult { e.mu.Lock() defer e.mu.Unlock() + ntpNanos := ntpTimestampToNanos(ntpTime) unwrapped := e.unwrapRTP(rtpTimestamp) @@ -117,7 +127,7 @@ func (e *NtpEstimator) OnSenderReport(ntpTime uint64, rtpTimestamp uint32, recei lastIdx := (e.sampleHead - 1 + maxSRSamples) % maxSRSamples last := e.samples[lastIdx] if last.unwrappedRTP == unwrapped && last.ntpNanos == ntpNanos { - return + return SRDuplicate } } @@ -128,8 +138,7 @@ func (e *NtpEstimator) OnSenderReport(ntpTime uint64, rtpTimestamp uint32, recei predicted := e.slopeNanos*(float64(unwrapped)-e.meanX) + e.meanY residual := math.Abs(float64(ntpNanos) - predicted) if residual > outlierThresholdStdDevs*e.residStd { - // Reject this sample as an outlier. - return + return SROutlier } } @@ -150,6 +159,7 @@ func (e *NtpEstimator) OnSenderReport(ntpTime uint64, rtpTimestamp uint32, recei e.ready = true } + return SRAccepted } // IsReady returns true once at least 2 sender reports have been processed diff --git a/pkg/synchronizer/participantclock.go b/pkg/synchronizer/participantclock.go index 48732559..2c067cf4 100644 --- a/pkg/synchronizer/participantclock.go +++ b/pkg/synchronizer/participantclock.go @@ -19,27 +19,114 @@ import ( "time" "github.com/livekit/mediatransportutil/pkg/latency" + "github.com/livekit/protocol/logger" ) // ParticipantClock holds OWD and NTP estimation state for a single participant. type ParticipantClock struct { mu sync.Mutex + logger logger.Logger owdEstimator *latency.OWDEstimator tracks map[string]*NtpEstimator ntpEpoch time.Time // NTP time from first SR hasEpoch bool } -// SetTrackEstimator registers or updates the NtpEstimator for a given track. -func (pc *ParticipantClock) SetTrackEstimator(trackID string, estimator *NtpEstimator) { +// NewParticipantClock creates a new ParticipantClock. +func NewParticipantClock(l logger.Logger) *ParticipantClock { + return &ParticipantClock{ + logger: l, + owdEstimator: latency.NewOWDEstimator(latency.OWDEstimatorParamsDefault), + tracks: make(map[string]*NtpEstimator), + } +} + +// OnSenderReport processes an RTCP sender report for a track. +// It updates the NTP estimator, OWD estimator, and records the NTP epoch. +func (pc *ParticipantClock) OnSenderReport(trackID string, clockRate uint32, ntpTime uint64, rtpTimestamp uint32, receivedAt time.Time) { + pc.mu.Lock() + defer pc.mu.Unlock() + + est, ok := pc.tracks[trackID] + if !ok { + est = NewNtpEstimator(clockRate) + pc.tracks[trackID] = est + } + + result := est.OnSenderReport(ntpTime, rtpTimestamp, receivedAt) + if result == SROutlier && pc.logger != nil { + pc.logger.Warnw("sender report rejected as outlier", nil, + "trackID", trackID, + "rtpTimestamp", rtpTimestamp, + "ntpTime", ntpTime, + ) + } + if result != SRAccepted { + return + } + + senderNtpNanos := ntpTimestampToNanos(ntpTime) + pc.owdEstimator.Update(senderNtpNanos, receivedAt.UnixNano()) + + if !pc.hasEpoch { + pc.ntpEpoch = nanosToTime(senderNtpNanos) + pc.hasEpoch = true + } +} + +// RtpToReceiverClock maps an RTP timestamp to a time on the receiver's clock. +// The result is ntpTime + estimatedOWD, which places the sender's NTP time +// into the receiver's clock domain. +func (pc *ParticipantClock) RtpToReceiverClock(trackID string, rtpTimestamp uint32) (time.Time, error) { + pc.mu.Lock() + defer pc.mu.Unlock() + + est, ok := pc.tracks[trackID] + if !ok { + return time.Time{}, errNoSenderReports + } + + if !est.IsReady() { + return time.Time{}, errNotReady + } + + if !pc.hasEpoch { + return time.Time{}, errNoSenderReports + } + + ntpTime, err := est.RtpToNtp(rtpTimestamp) + if err != nil { + return time.Time{}, err + } + + estimatedOWD := time.Duration(pc.owdEstimator.EstimatedPropagationDelay()) + return ntpTime.Add(estimatedOWD), nil +} + +// ResetTrack clears the NTP estimator for a track, forcing it to rebuild +// from new sender reports. Used when a stream discontinuity is detected. +func (pc *ParticipantClock) ResetTrack(trackID string) { pc.mu.Lock() defer pc.mu.Unlock() - pc.tracks[trackID] = estimator + + if est, ok := pc.tracks[trackID]; ok { + est.Reset() + } } // RemoveTrack removes a track. func (pc *ParticipantClock) RemoveTrack(trackID string) { pc.mu.Lock() defer pc.mu.Unlock() + delete(pc.tracks, trackID) } + +// HasTrack returns true if the participant has a track with the given ID. +func (pc *ParticipantClock) HasTrack(trackID string) bool { + pc.mu.Lock() + defer pc.mu.Unlock() + + _, ok := pc.tracks[trackID] + return ok +} diff --git a/pkg/synchronizer/participantclock_test.go b/pkg/synchronizer/participantclock_test.go index 8ecf2362..31df7318 100644 --- a/pkg/synchronizer/participantclock_test.go +++ b/pkg/synchronizer/participantclock_test.go @@ -34,29 +34,22 @@ func readyEstimator(clockRate uint32, baseNtp time.Time, baseRtp uint32, count i return e } -func TestParticipantClock_SetAndRemoveTrack(t *testing.T) { +func TestParticipantClock_RemoveTrack(t *testing.T) { st := NewSessionTimeline(nil) - pc := st.AddParticipant("alice") - - e := readyEstimator(48000, time.Date(2025, 1, 1, 12, 0, 0, 0, time.UTC), 0, 5) - pc.SetTrackEstimator("audio-1", e) - - pc.RemoveTrack("audio-1") -} - -func TestParticipantClock_UpdateEstimator(t *testing.T) { - st := NewSessionTimeline(nil) - pc := st.AddParticipant("alice") + st.AddParticipant("alice") baseNtp := time.Date(2025, 1, 1, 12, 0, 0, 0, time.UTC) - e1 := readyEstimator(48000, baseNtp, 0, 5) - e2 := readyEstimator(48000, baseNtp.Add(time.Second), 0, 5) + // Feed SRs to create the track estimator via the timeline. + for i := 0; i < 5; i++ { + ntpTime := baseNtp.Add(time.Duration(i) * 5 * time.Second) + rtpTS := uint32(i) * 5 * 48000 + st.OnSenderReport("alice", "audio-1", 48000, ntpToUint64(ntpTime), rtpTS, ntpTime.Add(30*time.Millisecond)) + } - pc.SetTrackEstimator("audio-1", e1) - pc.SetTrackEstimator("audio-1", e2) + pc := st.GetParticipantClock("alice") + require.NotNil(t, pc) + require.True(t, pc.HasTrack("audio-1")) - // Should use e2, not e1. - pc.mu.Lock() - require.Same(t, e2, pc.tracks["audio-1"]) - pc.mu.Unlock() + pc.RemoveTrack("audio-1") + require.False(t, pc.HasTrack("audio-1")) } diff --git a/pkg/synchronizer/sessiontimeline.go b/pkg/synchronizer/sessiontimeline.go index 7abc1573..fea25f5b 100644 --- a/pkg/synchronizer/sessiontimeline.go +++ b/pkg/synchronizer/sessiontimeline.go @@ -20,7 +20,6 @@ import ( "sync" "time" - "github.com/livekit/mediatransportutil/pkg/latency" "github.com/livekit/protocol/logger" ) @@ -40,11 +39,7 @@ var ( // 2. Using the OWDEstimator, estimate each participant's OWD. The min // observed OWD approximates true propagation delay. // 3. To map a participant's RTP timestamp to the session timeline: -// sessionPTS = ntpEstimator.RtpToNtp(rtpTS) - participantNtpEpoch + (epochOnReceiverClock - sessionStart) -// Where: -// - participantNtpEpoch = NTP time from first SR for this participant -// - epochOnReceiverClock = participantNtpEpoch + estimatedOWD (maps epoch to receiver clock) -// - sessionStart = wall-clock time first packet of any track arrived +// sessionPTS = ntpTime + estimatedOWD - sessionStart type SessionTimeline struct { mu sync.RWMutex logger logger.Logger @@ -66,6 +61,7 @@ func NewSessionTimeline(l logger.Logger) *SessionTimeline { func (st *SessionTimeline) SetSessionStart(t time.Time) { st.mu.Lock() defer st.mu.Unlock() + st.sessionStart = t st.hasStart = true } @@ -75,10 +71,7 @@ func (st *SessionTimeline) AddParticipant(participantID string) *ParticipantCloc st.mu.Lock() defer st.mu.Unlock() - pc := &ParticipantClock{ - owdEstimator: latency.NewOWDEstimator(latency.OWDEstimatorParamsDefault), - tracks: make(map[string]*NtpEstimator), - } + pc := NewParticipantClock(st.logger) st.participants[participantID] = pc return pc } @@ -93,26 +86,11 @@ func (st *SessionTimeline) GetOrAddParticipant(participantID string) *Participan return pc } - pc := &ParticipantClock{ - owdEstimator: latency.NewOWDEstimator(latency.OWDEstimatorParamsDefault), - tracks: make(map[string]*NtpEstimator), - } + pc := NewParticipantClock(st.logger) st.participants[participantID] = pc return pc } -// GetTrackEstimator returns the NTP estimator for a participant's track, or nil. -func (st *SessionTimeline) GetTrackEstimator(participantID, trackID string) *NtpEstimator { - st.mu.RLock() - defer st.mu.RUnlock() - - pc, ok := st.participants[participantID] - if !ok { - return nil - } - return pc.tracks[trackID] -} - // GetParticipantClock returns the ParticipantClock for a participant, or nil. func (st *SessionTimeline) GetParticipantClock(participantID string) *ParticipantClock { st.mu.RLock() @@ -125,112 +103,61 @@ func (st *SessionTimeline) GetParticipantClock(participantID string) *Participan func (st *SessionTimeline) RemoveParticipant(participantID string) { st.mu.Lock() defer st.mu.Unlock() + delete(st.participants, participantID) } // ResetTrack clears the NTP estimator for a track, forcing it to rebuild from // new sender reports. Used when a stream discontinuity is detected. func (st *SessionTimeline) ResetTrack(participantID, trackID string) { - st.mu.Lock() - defer st.mu.Unlock() - + st.mu.RLock() pc, ok := st.participants[participantID] - if !ok { - return - } - if est, ok := pc.tracks[trackID]; ok { - est.Reset() + st.mu.RUnlock() + + if ok { + pc.ResetTrack(trackID) } } // OnSenderReport processes an RTCP sender report for a participant's track. -// It updates the NTP estimator, OWD estimator, and records the NTP epoch. +// It delegates to the ParticipantClock to update the NTP estimator, OWD +// estimator, and NTP epoch. func (st *SessionTimeline) OnSenderReport(participantID, trackID string, clockRate uint32, ntpTime uint64, rtpTimestamp uint32, receivedAt time.Time) { - st.mu.Lock() - defer st.mu.Unlock() - + st.mu.RLock() pc, ok := st.participants[participantID] - if !ok { - return - } + st.mu.RUnlock() - // Get or create the per-track NTP estimator. - est, ok := pc.tracks[trackID] if !ok { - est = NewNtpEstimator(clockRate) - pc.tracks[trackID] = est + return } - // Feed the SR to the NTP estimator. - est.OnSenderReport(ntpTime, rtpTimestamp, receivedAt) - - // Convert NTP timestamp to nanoseconds and update OWD. - senderNtpNanos := ntpTimestampToNanos(ntpTime) - receiverNanos := receivedAt.UnixNano() - pc.owdEstimator.Update(senderNtpNanos, receiverNanos) - - // Record the NTP epoch from the first SR for this participant. - // Note: ntpEpoch cancels out in the GetSessionPTS formula - // (sessionPTS = ntpTime + OWD - sessionStart), so its exact value - // doesn't affect the output. It's kept for readability of the formula. - if !pc.hasEpoch { - pc.ntpEpoch = nanosToTime(senderNtpNanos) - pc.hasEpoch = true - } + pc.OnSenderReport(trackID, clockRate, ntpTime, rtpTimestamp, receivedAt) } // GetSessionPTS maps an RTP timestamp for a participant's track to a position // on the shared session timeline. // -// The formula is: -// -// sessionPTS = ntpEstimator.RtpToNtp(rtpTS) - participantNtpEpoch + (epochOnReceiverClock - sessionStart) -// -// Where: -// - participantNtpEpoch = NTP time from first SR for this participant -// - epochOnReceiverClock = participantNtpEpoch + estimatedOWD -// - sessionStart = wall-clock time first packet arrived +// The formula is: sessionPTS = ntpTime + estimatedOWD - sessionStart func (st *SessionTimeline) GetSessionPTS(participantID, trackID string, rtpTimestamp uint32) (time.Duration, error) { st.mu.RLock() - defer st.mu.RUnlock() - if !st.hasStart { + st.mu.RUnlock() return 0, errNoSessionStart } - pc, ok := st.participants[participantID] - if !ok { - return 0, fmt.Errorf("SessionTimeline: unknown participant %q", participantID) - } + sessionStart := st.sessionStart + st.mu.RUnlock() - est, ok := pc.tracks[trackID] if !ok { - return 0, errNoSenderReports - } - - if !est.IsReady() { - return 0, errNotReady - } - - if !pc.hasEpoch { - return 0, errNoSenderReports + return 0, fmt.Errorf("SessionTimeline: unknown participant %q", participantID) } - // Map RTP to NTP wall-clock time. - ntpTime, err := est.RtpToNtp(rtpTimestamp) + receiverTime, err := pc.RtpToReceiverClock(trackID, rtpTimestamp) if err != nil { return 0, err } - // Compute offset from participant's NTP epoch. - sinceEpoch := ntpTime.Sub(pc.ntpEpoch) - - // Map the participant's NTP epoch to the receiver's clock. - estimatedOWD := time.Duration(pc.owdEstimator.EstimatedPropagationDelay()) - epochOnReceiverClock := pc.ntpEpoch.Add(estimatedOWD) - - // Compute the session PTS. - sessionPTS := sinceEpoch + epochOnReceiverClock.Sub(st.sessionStart) + sessionPTS := receiverTime.Sub(sessionStart) if (sessionPTS < 0 || sessionPTS > 24*time.Hour) && st.logger != nil { st.logger.Warnw("GetSessionPTS: abnormal result", @@ -238,12 +165,8 @@ func (st *SessionTimeline) GetSessionPTS(participantID, trackID string, rtpTimes "participantID", participantID, "trackID", trackID, "rtpTimestamp", rtpTimestamp, - "ntpTime", ntpTime, - "ntpEpoch", pc.ntpEpoch, - "sinceEpoch", sinceEpoch, - "estimatedOWD", estimatedOWD, - "epochOnReceiverClock", epochOnReceiverClock, - "sessionStart", st.sessionStart, + "receiverTime", receiverTime, + "sessionStart", sessionStart, "sessionPTS", sessionPTS, ) } diff --git a/pkg/synchronizer/syncengine.go b/pkg/synchronizer/syncengine.go index 2e253dfe..0b9849c8 100644 --- a/pkg/synchronizer/syncengine.go +++ b/pkg/synchronizer/syncengine.go @@ -26,33 +26,8 @@ import ( ) const ( - // transitionSlewRatePerSecond is the rate at which the wall-clock→NTP - // transition correction is absorbed: 5ms per second of real time. - transitionSlewRatePerSecond = 5 * time.Millisecond - - // wallClockSanityThreshold is the maximum divergence between RTP-derived PTS - // and wall-clock PTS before falling back to wall clock in wallClockPTS(). - wallClockSanityThreshold = 5 * time.Second - - // ntpTrustThreshold is the maximum allowed divergence between NTP-derived PTS - // and wall-clock PTS. If NTP disagrees with wall clock by more than this, - // the NTP data is suspect (bad SRs, clock jumps, nonsensical timing) and - // we clamp to wall clock. This prevents bad publishers from dragging PTS far - // from reality. - ntpTrustThreshold = 500 * time.Millisecond - - // maxTimelyPacketAge is how long a track can be behind the pipeline deadline - // before its PTS is force-corrected forward. - maxTimelyPacketAge = 10 * time.Second - // defaultOldPacketThreshold is the default age after which packets are dropped. defaultOldPacketThreshold = 500 * time.Millisecond - - // slewRatePerSecond is the maximum rate at which PTS corrections are absorbed. - slewRatePerSecond = 5 * time.Millisecond - - // deadbandThreshold is the minimum |correction| before slew smoothing kicks in. - deadbandThreshold = 5 * time.Millisecond ) // SyncEngineOption configures a SyncEngine. @@ -155,11 +130,7 @@ func (e *SyncEngine) AddTrack(track TrackRemote, participantID string) TrackSync defer e.mu.Unlock() // Ensure the participant exists in the timeline. - pc := e.timeline.GetOrAddParticipant(participantID) - - // Auto-register the track with a placeholder estimator. - placeholder := NewNtpEstimator(clockRate) - pc.SetTrackEstimator(track.ID(), placeholder) + e.timeline.GetOrAddParticipant(participantID) st := &syncEngineTrack{ engine: e, @@ -238,13 +209,6 @@ func (e *SyncEngine) OnRTCP(packet rtcp.Packet) { // Feed the SR to the session timeline (updates NTP estimator + OWD). e.timeline.OnSenderReport(participantID, trackID, clockRate, sr.NTPTime, sr.RTPTime, now) - // Update the participant's track estimator from the timeline. - if estimator := e.timeline.GetTrackEstimator(participantID, trackID); estimator != nil { - if pc := e.timeline.GetParticipantClock(participantID); pc != nil { - pc.SetTrackEstimator(trackID, estimator) - } - } - // Call onSR callback if set. st.mu.Lock() onSR := st.onSR diff --git a/pkg/synchronizer/syncenginetrack.go b/pkg/synchronizer/syncenginetrack.go index 01c62788..3a339a73 100644 --- a/pkg/synchronizer/syncenginetrack.go +++ b/pkg/synchronizer/syncenginetrack.go @@ -27,6 +27,33 @@ import ( "github.com/livekit/protocol/utils/rtputil" ) +const ( + // transitionSlewRatePerSecond is the rate at which the wall-clock→NTP + // transition correction is absorbed: 5ms per second of real time. + transitionSlewRatePerSecond = 5 * time.Millisecond + + // wallClockSanityThreshold is the maximum divergence between RTP-derived PTS + // and wall-clock PTS before falling back to wall clock in wallClockPTS(). + wallClockSanityThreshold = 5 * time.Second + + // ntpTrustThreshold is the maximum allowed divergence between NTP-derived PTS + // and wall-clock PTS. If NTP disagrees with wall clock by more than this, + // the NTP data is suspect (bad SRs, clock jumps, nonsensical timing) and + // we clamp to wall clock. This prevents bad publishers from dragging PTS far + // from reality. + ntpTrustThreshold = 500 * time.Millisecond + + // maxTimelyPacketAge is how long a track can be behind the pipeline deadline + // before its PTS is force-corrected forward. + maxTimelyPacketAge = 10 * time.Second + + // slewRatePerSecond is the maximum rate at which PTS corrections are absorbed. + slewRatePerSecond = 5 * time.Millisecond + + // deadbandThreshold is the minimum |correction| before slew smoothing kicks in. + deadbandThreshold = 5 * time.Millisecond +) + // syncEngineTrack implements TrackSync for a single track within a SyncEngine. type syncEngineTrack struct { engine *SyncEngine