diff --git a/pkg/media/getusermedia_integration_test.go b/pkg/media/getusermedia_integration_test.go new file mode 100644 index 0000000..dd485fd --- /dev/null +++ b/pkg/media/getusermedia_integration_test.go @@ -0,0 +1,437 @@ +package media + +import ( + "sync" + "sync/atomic" + "testing" + "time" + + "github.com/pion/webrtc/v4" + + "github.com/thesyncim/libgowebrtc/internal/ffi" + "github.com/thesyncim/libgowebrtc/internal/testutil" + "github.com/thesyncim/libgowebrtc/pkg/codec" +) + +type syntheticVideoCapture struct { + deviceID string + width int + height int + fps int + + mu sync.Mutex + stopCh chan struct{} + stopped bool +} + +func newSyntheticVideoCapture(deviceID string, width, height, fps int) *syntheticVideoCapture { + return &syntheticVideoCapture{ + deviceID: deviceID, + width: width, + height: height, + fps: fps, + stopCh: make(chan struct{}), + } +} + +func (c *syntheticVideoCapture) Start(callback ffi.VideoCaptureCallback) error { + interval := time.Second / 30 + if c.fps > 0 { + interval = time.Second / time.Duration(c.fps) + } + + go func() { + ticker := time.NewTicker(interval) + defer ticker.Stop() + + for frameIndex := int64(0); ; frameIndex++ { + frame := testutil.CreateTestVideoFrame(c.width, c.height) + if len(frame.Data) > 0 && len(frame.Data[0]) > 0 { + frame.Data[0][0] = byte(len(c.deviceID) % 255) + } + callback(&ffi.CapturedVideoFrame{ + YPlane: frame.Data[0], + UPlane: frame.Data[1], + VPlane: frame.Data[2], + Width: int32(c.width), + Height: int32(c.height), + YStride: int32(frame.Stride[0]), + UStride: int32(frame.Stride[1]), + VStride: int32(frame.Stride[2]), + TimestampUs: frameIndex * int64(interval/time.Microsecond), + }) + + select { + case <-ticker.C: + case <-c.stopCh: + return + } + } + }() + + return nil +} + +func (c *syntheticVideoCapture) Close() { + c.mu.Lock() + defer c.mu.Unlock() + if c.stopped { + return + } + c.stopped = true + close(c.stopCh) +} + +type syntheticAudioCapture struct { + deviceID string + sampleRate int + channels int + + mu sync.Mutex + stopCh chan struct{} + stopped bool +} + +func newSyntheticAudioCapture(deviceID string, sampleRate, channels int) *syntheticAudioCapture { + return &syntheticAudioCapture{ + deviceID: deviceID, + sampleRate: sampleRate, + channels: channels, + stopCh: make(chan struct{}), + } +} + +func (c *syntheticAudioCapture) Start(callback ffi.AudioCaptureCallback) error { + const interval = 20 * time.Millisecond + samplesPerChannel := c.sampleRate / 50 + + go func() { + ticker := time.NewTicker(interval) + defer ticker.Stop() + + for frameIndex := int64(0); ; frameIndex++ { + frame := testutil.CreateTestAudioFrame(c.sampleRate, c.channels, samplesPerChannel) + callback(&ffi.CapturedAudioFrame{ + Samples: frame.SamplesS16(), + NumChannels: int32(c.channels), + SampleRate: int32(c.sampleRate), + TimestampUs: frameIndex * int64(interval/time.Microsecond), + }) + + select { + case <-ticker.C: + case <-c.stopCh: + return + } + } + }() + + return nil +} + +func (c *syntheticAudioCapture) Close() { + c.mu.Lock() + defer c.mu.Unlock() + if c.stopped { + return + } + c.stopped = true + close(c.stopCh) +} + +func TestGetUserMediaFacingModeSelectsMatchingCameraAndFlowsToPion(t *testing.T) { + testutil.RequireShim(t) + + installMediaFFIStubs(t, mediaFFIStubs{ + loadLibrary: func() error { return nil }, + enumerateDevice: func() ([]ffi.DeviceInfo, error) { + return []ffi.DeviceInfo{ + {DeviceID: "camera-front", Label: "Front Camera", Kind: ffi.DeviceKindVideoInput}, + {DeviceID: "camera-rear", Label: "Rear Camera", Kind: ffi.DeviceKindVideoInput}, + }, nil + }, + newVideo: func(deviceID string, width, height, fps int) (videoCaptureHandle, error) { + return newSyntheticVideoCapture(deviceID, width, height, fps), nil + }, + }) + + stream, err := GetUserMedia(Constraints{ + Video: &VideoConstraints{ + FacingMode: FacingModeEnvironment, + Width: IdealInt(320), + Height: IdealInt(240), + FrameRate: ExactFloat(10), + Codec: codec.VP8, + }, + }) + if err != nil { + t.Fatalf("GetUserMedia() error = %v", err) + } + defer stopSyntheticMediaStreamTracks(stream) + + videoTracks := stream.GetVideoTracks() + if len(videoTracks) != 1 { + t.Fatalf("GetVideoTracks() len = %d, want 1", len(videoTracks)) + } + + settings := videoTracks[0].GetSettings() + if settings.DeviceID != "camera-rear" { + t.Fatalf("GetSettings().DeviceID = %q, want %q", settings.DeviceID, "camera-rear") + } + if settings.FacingMode != FacingModeEnvironment { + t.Fatalf("GetSettings().FacingMode = %q, want %q", settings.FacingMode, FacingModeEnvironment) + } + if videoTracks[0].Label() != "Rear Camera" { + t.Fatalf("Label() = %q, want %q", videoTracks[0].Label(), "Rear Camera") + } + + remoteTrack, packetCount := requireSyntheticMediaStreamInterop(t, stream) + if got := remoteTrack.Kind(); got != webrtc.RTPCodecTypeVideo { + t.Fatalf("remote track kind = %v, want %v", got, webrtc.RTPCodecTypeVideo) + } + if got := remoteTrack.StreamID(); got != stream.ID() { + t.Fatalf("remote StreamID() = %q, want %q", got, stream.ID()) + } + if packetCount.Load() == 0 { + t.Fatal("remote RTP packet count = 0, want synthetic capture to flow end to end") + } +} + +func TestGetUserMediaFallsBackToDefaultDevicesWhenEnumerationIsEmpty(t *testing.T) { + testutil.RequireShim(t) + + var videoDeviceID string + var audioDeviceID string + + installMediaFFIStubs(t, mediaFFIStubs{ + loadLibrary: func() error { return nil }, + enumerateDevice: func() ([]ffi.DeviceInfo, error) { + return nil, nil + }, + newVideo: func(deviceID string, width, height, fps int) (videoCaptureHandle, error) { + videoDeviceID = deviceID + return newSyntheticVideoCapture(deviceID, width, height, fps), nil + }, + newAudio: func(deviceID string, sampleRate, channels int) (audioCaptureHandle, error) { + audioDeviceID = deviceID + return newSyntheticAudioCapture(deviceID, sampleRate, channels), nil + }, + }) + + stream, err := GetUserMedia(Constraints{ + Video: &VideoConstraints{ + Width: ExactInt(320), + Height: ExactInt(240), + FrameRate: ExactFloat(10), + Codec: codec.VP8, + }, + Audio: &AudioConstraints{ + SampleRate: ExactInt(48_000), + ChannelCount: ExactInt(2), + }, + }) + if err != nil { + t.Fatalf("GetUserMedia() error = %v", err) + } + defer stopSyntheticMediaStreamTracks(stream) + + if videoDeviceID != "" { + t.Fatalf("newVideoCapture deviceID = %q, want empty default-device selection", videoDeviceID) + } + if audioDeviceID != "" { + t.Fatalf("newAudioCapture deviceID = %q, want empty default-device selection", audioDeviceID) + } + + videoTracks := stream.GetVideoTracks() + audioTracks := stream.GetAudioTracks() + if len(videoTracks) != 1 || len(audioTracks) != 1 { + t.Fatalf("track counts = (%d video, %d audio), want (1, 1)", len(videoTracks), len(audioTracks)) + } + if got := videoTracks[0].Label(); got != "camera" { + t.Fatalf("video Label() = %q, want %q", got, "camera") + } + if got := audioTracks[0].Label(); got != "microphone" { + t.Fatalf("audio Label() = %q, want %q", got, "microphone") + } + if got := videoTracks[0].GetSettings().DeviceID; got != "" { + t.Fatalf("video DeviceID = %q, want empty default-device ID", got) + } + if got := audioTracks[0].GetSettings().DeviceID; got != "" { + t.Fatalf("audio DeviceID = %q, want empty default-device ID", got) + } + + remoteTracks, packetCounts := requireSyntheticMediaStreamInteropKinds( + t, + stream, + []webrtc.RTPCodecType{webrtc.RTPCodecTypeAudio, webrtc.RTPCodecTypeVideo}, + ) + for _, kind := range []webrtc.RTPCodecType{webrtc.RTPCodecTypeAudio, webrtc.RTPCodecTypeVideo} { + if packetCounts[kind].Load() == 0 { + t.Fatalf("remote %v RTP packet count = 0, want fallback capture to flow end to end", kind) + } + if got := remoteTracks[kind].StreamID(); got != stream.ID() { + t.Fatalf("remote %v StreamID() = %q, want %q", kind, got, stream.ID()) + } + } +} + +func TestGetUserMediaDoesNotReportRequestedFacingModeWhenEnumerationIsEmpty(t *testing.T) { + testutil.RequireShim(t) + + installMediaFFIStubs(t, mediaFFIStubs{ + loadLibrary: func() error { return nil }, + enumerateDevice: func() ([]ffi.DeviceInfo, error) { + return nil, nil + }, + newVideo: func(deviceID string, width, height, fps int) (videoCaptureHandle, error) { + return newSyntheticVideoCapture(deviceID, width, height, fps), nil + }, + }) + + stream, err := GetUserMedia(Constraints{ + Video: &VideoConstraints{ + FacingMode: FacingModeEnvironment, + Width: ExactInt(320), + Height: ExactInt(240), + FrameRate: ExactFloat(10), + Codec: codec.VP8, + }, + }) + if err != nil { + t.Fatalf("GetUserMedia() error = %v", err) + } + defer stopSyntheticMediaStreamTracks(stream) + + videoTracks := stream.GetVideoTracks() + if len(videoTracks) != 1 { + t.Fatalf("GetVideoTracks() len = %d, want 1", len(videoTracks)) + } + + settings := videoTracks[0].GetSettings() + if got := settings.FacingMode; got != "" { + t.Fatalf("GetSettings().FacingMode = %q, want empty when fallback device direction is unknown", got) + } + if got := videoTracks[0].GetCapabilities().FacingMode; len(got) != 0 { + t.Fatalf("GetCapabilities().FacingMode = %v, want empty when fallback device direction is unknown", got) + } + + remoteTrack, packetCount := requireSyntheticMediaStreamInterop(t, stream) + if got := remoteTrack.Kind(); got != webrtc.RTPCodecTypeVideo { + t.Fatalf("remote track kind = %v, want %v", got, webrtc.RTPCodecTypeVideo) + } + if got := remoteTrack.StreamID(); got != stream.ID() { + t.Fatalf("remote StreamID() = %q, want %q", got, stream.ID()) + } + if packetCount.Load() == 0 { + t.Fatal("remote RTP packet count = 0, want fallback capture to flow end to end") + } +} + +func requireSyntheticMediaStreamInterop(t *testing.T, stream *MediaStream) (*webrtc.TrackRemote, *atomic.Int64) { + t.Helper() + + kind := singleSyntheticTrackKind(t, stream) + remoteTracks, packetCounts := requireSyntheticMediaStreamInteropKinds(t, stream, []webrtc.RTPCodecType{kind}) + return remoteTracks[kind], packetCounts[kind] +} + +func requireSyntheticMediaStreamInteropKinds(t *testing.T, stream *MediaStream, expectedKinds []webrtc.RTPCodecType) (map[webrtc.RTPCodecType]*webrtc.TrackRemote, map[webrtc.RTPCodecType]*atomic.Int64) { + t.Helper() + + sender := newLoopbackPionPeerConnection(t) + defer func() { _ = sender.Close() }() + + receiver := newLoopbackPionPeerConnection(t) + defer func() { _ = receiver.Close() }() + + remoteTracks := make(map[webrtc.RTPCodecType]*webrtc.TrackRemote, len(expectedKinds)) + packetCounts := make(map[webrtc.RTPCodecType]*atomic.Int64, len(expectedKinds)) + packetCh := make(chan webrtc.RTPCodecType, len(expectedKinds)) + var mu sync.Mutex + + receiver.OnTrack(func(track *webrtc.TrackRemote, rtpReceiver *webrtc.RTPReceiver) { + kind := track.Kind() + mu.Lock() + packetCount, ok := packetCounts[kind] + if !ok { + packetCount = &atomic.Int64{} + packetCounts[kind] = packetCount + } + if _, ok := remoteTracks[kind]; !ok { + remoteTracks[kind] = track + } + mu.Unlock() + + go func() { + for { + _, _, err := track.ReadRTP() + if err != nil { + return + } + packetCount.Add(1) + select { + case packetCh <- kind: + default: + } + } + }() + }) + + senders, err := AddTracksToPionPeerConnection(sender, stream) + if err != nil { + t.Fatalf("AddTracksToPionPeerConnection() error = %v", err) + } + if len(senders) != len(expectedKinds) { + t.Fatalf("AddTracksToPionPeerConnection() senders len = %d, want %d", len(senders), len(expectedKinds)) + } + for _, rtpSender := range senders { + go drainRemoteRegistryRTCP(rtpSender) + } + + connectRemoteRegistryPionPeers(t, sender, receiver) + + for _, kind := range expectedKinds { + deadline := time.After(10 * time.Second) + for { + mu.Lock() + remoteTrack := remoteTracks[kind] + packetCount := packetCounts[kind] + mu.Unlock() + if remoteTrack != nil && packetCount != nil && packetCount.Load() > 0 { + break + } + select { + case <-packetCh: + case <-deadline: + t.Fatalf("timed out waiting for remote %v track from GetUserMedia stream", kind) + } + } + } + + return remoteTracks, packetCounts +} + +func singleSyntheticTrackKind(t *testing.T, stream *MediaStream) webrtc.RTPCodecType { + t.Helper() + + videoTracks := stream.GetVideoTracks() + audioTracks := stream.GetAudioTracks() + switch { + case len(videoTracks) == 1 && len(audioTracks) == 0: + return webrtc.RTPCodecTypeVideo + case len(videoTracks) == 0 && len(audioTracks) == 1: + return webrtc.RTPCodecTypeAudio + default: + t.Fatalf("stream has %d video and %d audio tracks, want exactly one total track", len(videoTracks), len(audioTracks)) + return webrtc.RTPCodecType(0) + } +} + +func stopSyntheticMediaStreamTracks(stream *MediaStream) { + if stream == nil { + return + } + for _, mediaTrack := range stream.GetTracks() { + mediaTrack.Stop() + } +} diff --git a/pkg/media/media.go b/pkg/media/media.go index e083387..92a58ed 100644 --- a/pkg/media/media.go +++ b/pkg/media/media.go @@ -6,6 +6,7 @@ import ( "errors" "fmt" "strconv" + "strings" "sync" "sync/atomic" @@ -778,7 +779,7 @@ func resolveVideoCaptureRequest(request VideoConstraints, devices []MediaDeviceI if err := validateVideoConstraints(request); err != nil { return VideoTrackSettings{}, VideoConstraints{}, "", err } - device, err := selectDevice(devices, MediaDeviceKindVideoInput, request.DeviceID) + device, actualFacingMode, err := selectVideoDevice(devices, request.DeviceID, request.FacingMode) if err != nil { return VideoTrackSettings{}, VideoConstraints{}, "", err } @@ -795,7 +796,7 @@ func resolveVideoCaptureRequest(request VideoConstraints, devices []MediaDeviceI Height: height, FrameRate: frameRate, DeviceID: device.DeviceID, - FacingMode: request.FacingMode, + FacingMode: actualFacingMode, } resolved := request @@ -818,11 +819,82 @@ func resolveVideoCaptureRequest(request VideoConstraints, devices []MediaDeviceI return settings, resolved, label, nil } +func selectVideoDevice(devices []MediaDeviceInfo, requestedDeviceID StringConstraint, requestedFacingMode FacingMode) (MediaDeviceInfo, FacingMode, error) { + device, enumerated, err := selectDevice(devices, MediaDeviceKindVideoInput, requestedDeviceID) + if err != nil { + return MediaDeviceInfo{}, "", err + } + + // deviceId constraints take precedence over facingMode preferences. + if requestedDeviceID.Exact != nil || requestedDeviceID.Ideal != nil { + return device, resolvedFacingModeForDevice(device, requestedFacingMode, enumerated), nil + } + + if requestedFacingMode != "" { + for _, candidate := range filterDevicesByKind(devices, MediaDeviceKindVideoInput) { + if inferFacingMode(candidate) == requestedFacingMode { + return candidate, requestedFacingMode, nil + } + } + } + + return device, resolvedFacingModeForDevice(device, requestedFacingMode, enumerated), nil +} + +func filterDevicesByKind(devices []MediaDeviceInfo, kind MediaDeviceKind) []MediaDeviceInfo { + filtered := make([]MediaDeviceInfo, 0, len(devices)) + for _, device := range devices { + if device.Kind == kind { + filtered = append(filtered, device) + } + } + return filtered +} + +func resolvedFacingModeForDevice(device MediaDeviceInfo, fallback FacingMode, enumerated bool) FacingMode { + if inferred := inferFacingMode(device); inferred != "" { + return inferred + } + if !enumerated { + return "" + } + return fallback +} + +func inferFacingMode(device MediaDeviceInfo) FacingMode { + normalized := normalizeCaptureDescriptor(device.Label + " " + device.DeviceID) + + switch { + case strings.Contains(normalized, "front"), + strings.Contains(normalized, "facetime"), + strings.Contains(normalized, "selfie"), + strings.Contains(normalized, "user"): + return FacingModeUser + case strings.Contains(normalized, "rear"), + strings.Contains(normalized, "back"), + strings.Contains(normalized, "environment"), + strings.Contains(normalized, "world"): + return FacingModeEnvironment + case strings.Contains(normalized, "left"): + return FacingModeLeft + case strings.Contains(normalized, "right"): + return FacingModeRight + default: + return "" + } +} + +func normalizeCaptureDescriptor(value string) string { + value = strings.ToLower(value) + replacer := strings.NewReplacer("-", " ", "_", " ", ".", " ", "/", " ", "\\", " ") + return replacer.Replace(value) +} + func resolveAudioCaptureRequest(request AudioConstraints, devices []MediaDeviceInfo) (AudioTrackSettings, AudioConstraints, string, error) { if err := validateAudioConstraints(request); err != nil { return AudioTrackSettings{}, AudioConstraints{}, "", err } - device, err := selectDevice(devices, MediaDeviceKindAudioInput, request.DeviceID) + device, _, err := selectDevice(devices, MediaDeviceKindAudioInput, request.DeviceID) if err != nil { return AudioTrackSettings{}, AudioConstraints{}, "", err } @@ -1070,7 +1142,7 @@ func validateStringConstraintShape(c StringConstraint) error { return nil } -func selectDevice(devices []MediaDeviceInfo, kind MediaDeviceKind, requested StringConstraint) (MediaDeviceInfo, error) { +func selectDevice(devices []MediaDeviceInfo, kind MediaDeviceKind, requested StringConstraint) (MediaDeviceInfo, bool, error) { candidates := make([]MediaDeviceInfo, 0, len(devices)) for _, device := range devices { if device.Kind == kind { @@ -1078,15 +1150,24 @@ func selectDevice(devices []MediaDeviceInfo, kind MediaDeviceKind, requested Str } } if len(candidates) == 0 { - return MediaDeviceInfo{}, ErrDeviceNotFound + if requested.Exact != nil { + return MediaDeviceInfo{}, false, &OverconstrainedError{ + Constraint: "deviceId", + Message: fmt.Sprintf("requires exact %q", *requested.Exact), + } + } + return MediaDeviceInfo{ + Kind: kind, + Label: defaultDeviceLabel(kind), + }, false, nil } if requested.Exact != nil { for _, device := range candidates { if device.DeviceID == *requested.Exact { - return device, nil + return device, true, nil } } - return MediaDeviceInfo{}, &OverconstrainedError{ + return MediaDeviceInfo{}, true, &OverconstrainedError{ Constraint: "deviceId", Message: fmt.Sprintf("requires exact %q", *requested.Exact), } @@ -1094,11 +1175,24 @@ func selectDevice(devices []MediaDeviceInfo, kind MediaDeviceKind, requested Str if requested.Ideal != nil { for _, device := range candidates { if device.DeviceID == *requested.Ideal { - return device, nil + return device, true, nil } } } - return candidates[0], nil + return candidates[0], true, nil +} + +func defaultDeviceLabel(kind MediaDeviceKind) string { + switch kind { + case MediaDeviceKindVideoInput: + return "camera" + case MediaDeviceKindAudioInput: + return "microphone" + case MediaDeviceKindAudioOutput: + return "speaker" + default: + return "device" + } } func selectDisplayTarget(screens []ScreenInfo, request DisplayVideoConstraints) (ScreenInfo, DisplaySurface, error) { @@ -1119,6 +1213,20 @@ func selectDisplayTarget(screens []ScreenInfo, request DisplayVideoConstraints) return screen, DisplaySurfaceMonitor, nil } } + onlyWindows := true + for _, screen := range screens { + if !screen.IsWindow { + onlyWindows = false + break + } + } + if onlyWindows { + return ScreenInfo{ + ID: request.ScreenID, + Title: fmt.Sprintf("screen-%d", request.ScreenID), + IsWindow: false, + }, DisplaySurfaceMonitor, nil + } return ScreenInfo{}, "", ErrDeviceNotFound } @@ -1413,12 +1521,6 @@ func (t *videoStreamTrack) ApplyConstraints(vc VideoConstraints) error { return err } - if merged.Bitrate > 0 && merged.Bitrate != t.constraints.Bitrate { - if err := t.track.SetBitrate(merged.Bitrate); err != nil { - return err - } - } - nextFrameRate, err := resolveVideoFrameRateConstraint( merged.FrameRate, t.settings.FrameRate, @@ -1427,7 +1529,17 @@ func (t *videoStreamTrack) ApplyConstraints(vc VideoConstraints) error { if err != nil { return err } + + if merged.Bitrate > 0 && merged.Bitrate != t.constraints.Bitrate { + if err := t.track.SetBitrate(merged.Bitrate); err != nil { + return err + } + } + if nextFrameRate != t.settings.FrameRate { + if err := t.restartVideoCapture(nextFrameRate); err != nil { + return err + } if err := t.track.SetFramerate(nextFrameRate); err != nil { return err } @@ -1446,37 +1558,112 @@ var _ VideoStreamTrack = (*videoStreamTrack)(nil) func (t *videoStreamTrack) pionTrack() webrtc.TrackLocal { return t.track } -func (t *videoStreamTrack) startVideoCapture() error { +func (t *videoStreamTrack) restartVideoCapture(frameRate float64) error { + if t.source != sourceDevice && t.source != sourceDisplay { + return nil + } + t.mu.Lock() defer t.mu.Unlock() - if t.videoCapture != nil { + if t.readyState.Load().(string) != "live" { return nil } - capture, err := newVideoCapture( - t.settings.DeviceID, - t.settings.Width, - t.settings.Height, - int(t.settings.FrameRate), + var ( + nextCapture videoCaptureHandle + nextScreen screenCaptureHandle + err error ) + + switch t.source { + case sourceDevice: + nextCapture, err = newVideoCapture( + t.settings.DeviceID, + t.settings.Width, + t.settings.Height, + int(frameRate), + ) + if err != nil { + return err + } + err = nextCapture.Start(t.videoCaptureCallback()) + case sourceDisplay: + if t.displayConstraints == nil { + return ErrInvalidConstraints + } + screenID := t.displayConstraints.ScreenID + isWindow := false + if t.displayConstraints.WindowID != 0 { + screenID = t.displayConstraints.WindowID + isWindow = true + } + nextScreen, err = newScreenCapture(screenID, isWindow, int(frameRate)) + if err != nil { + return err + } + err = nextScreen.Start(t.videoCaptureCallback()) + } if err != nil { + if nextCapture != nil { + nextCapture.Close() + } + if nextScreen != nil { + nextScreen.Close() + } return err } - err = capture.Start(func(captured *ffi.CapturedVideoFrame) { + if t.videoCapture != nil { + t.videoCapture.Close() + t.videoCapture = nil + } + if t.screenCapture != nil { + t.screenCapture.Close() + t.screenCapture = nil + } + + t.videoCapture = nextCapture + t.screenCapture = nextScreen + return nil +} + +func (t *videoStreamTrack) videoCaptureCallback() ffi.VideoCaptureCallback { + return func(captured *ffi.CapturedVideoFrame) { if !t.enabled.Load() || t.muted.Load() || t.readyState.Load().(string) != "live" { return } videoFrame := &frame.VideoFrame{ Width: int(captured.Width), Height: int(captured.Height), + PTS: ptsFromTimestampUs(captured.TimestampUs, 90000), Format: frame.PixelFormatI420, Data: [][]byte{captured.YPlane, captured.UPlane, captured.VPlane}, Stride: []int{int(captured.YStride), int(captured.UStride), int(captured.VStride)}, } _ = t.track.WriteFrame(videoFrame, false) - }) + } +} + +func (t *videoStreamTrack) startVideoCapture() error { + t.mu.Lock() + defer t.mu.Unlock() + + if t.videoCapture != nil { + return nil + } + + capture, err := newVideoCapture( + t.settings.DeviceID, + t.settings.Width, + t.settings.Height, + int(t.settings.FrameRate), + ) + if err != nil { + return err + } + + err = capture.Start(t.videoCaptureCallback()) if err != nil { capture.Close() return err @@ -1509,19 +1696,7 @@ func (t *videoStreamTrack) startScreenCapture() error { return err } - err = capture.Start(func(captured *ffi.CapturedVideoFrame) { - if !t.enabled.Load() || t.muted.Load() || t.readyState.Load().(string) != "live" { - return - } - videoFrame := &frame.VideoFrame{ - Width: int(captured.Width), - Height: int(captured.Height), - Format: frame.PixelFormatI420, - Data: [][]byte{captured.YPlane, captured.UPlane, captured.VPlane}, - Stride: []int{int(captured.YStride), int(captured.UStride), int(captured.VStride)}, - } - _ = t.track.WriteFrame(videoFrame, false) - }) + err = capture.Start(t.videoCaptureCallback()) if err != nil { capture.Close() return err @@ -1651,6 +1826,7 @@ func (t *audioStreamTrack) startAudioCapture() error { int(captured.SampleRate), int(captured.NumChannels), ) + audioFrame.PTS = ptsFromTimestampUs(captured.TimestampUs, int64(captured.SampleRate)) _ = t.track.WriteFrame(audioFrame) }) if err != nil { @@ -1669,3 +1845,10 @@ var idCounter atomic.Uint64 func generateID() string { return "libwebrtc-" + strconv.FormatUint(idCounter.Add(1), 10) } + +func ptsFromTimestampUs(timestampUs int64, clockRate int64) uint32 { + if timestampUs <= 0 || clockRate <= 0 { + return 0 + } + return uint32((timestampUs * clockRate) / 1_000_000) +} diff --git a/pkg/media/media_test.go b/pkg/media/media_test.go index 2bd842b..4d665a0 100644 --- a/pkg/media/media_test.go +++ b/pkg/media/media_test.go @@ -257,6 +257,67 @@ func TestGetUserMediaMissingExactDeviceReturnsOverconstrained(t *testing.T) { } } +func TestGetUserMediaReturnsOverconstrainedWhenEnumerationIsEmptyAndExactDeviceRequested(t *testing.T) { + installMediaFFIStubs(t, mediaFFIStubs{ + loadLibrary: func() error { return nil }, + enumerateDevice: func() ([]ffi.DeviceInfo, error) { + return nil, nil + }, + }) + + stream, err := GetUserMedia(Constraints{ + Video: &VideoConstraints{ + DeviceID: ExactString("cam-404"), + }, + }) + if stream != nil { + t.Fatal("GetUserMedia() stream = non-nil, want nil") + } + + var overconstrained *OverconstrainedError + if !errors.As(err, &overconstrained) { + t.Fatalf("GetUserMedia() error = %v, want OverconstrainedError", err) + } + if overconstrained.Constraint != "deviceId" { + t.Fatalf("Constraint = %q, want %q", overconstrained.Constraint, "deviceId") + } +} + +func TestGetUserMediaFallbackDoesNotReportRequestedFacingModeWhenEnumerationIsEmpty(t *testing.T) { + installMediaFFIStubs(t, mediaFFIStubs{ + loadLibrary: func() error { return nil }, + enumerateDevice: func() ([]ffi.DeviceInfo, error) { + return nil, nil + }, + newVideo: func(string, int, int, int) (videoCaptureHandle, error) { return stubVideoCapture{}, nil }, + }) + + stream, err := GetUserMedia(Constraints{ + Video: &VideoConstraints{ + FacingMode: FacingModeEnvironment, + Width: ExactInt(640), + Height: ExactInt(480), + FrameRate: ExactFloat(15), + }, + }) + if err != nil { + t.Fatalf("GetUserMedia() error = %v", err) + } + + videoTracks := stream.GetVideoTracks() + if len(videoTracks) != 1 { + t.Fatalf("GetVideoTracks() len = %d, want 1", len(videoTracks)) + } + + video := videoTracks[0] + if got := video.GetSettings().FacingMode; got != "" { + t.Fatalf("video FacingMode = %q, want empty when fallback device direction is unknown", got) + } + if got := video.GetCapabilities().FacingMode; len(got) != 0 { + t.Fatalf("video FacingMode capability = %v, want empty when fallback device direction is unknown", got) + } +} + func TestGetDisplayMediaResolvesRequestedWindowAndOptionalAudio(t *testing.T) { installMediaFFIStubs(t, mediaFFIStubs{ loadLibrary: func() error { return nil }, @@ -340,6 +401,54 @@ func TestGetDisplayMediaRejectsConflictingTargets(t *testing.T) { } } +func TestGetDisplayMediaAllowsExplicitScreenIDWhenEnumerationOnlyReturnsWindows(t *testing.T) { + installMediaFFIStubs(t, mediaFFIStubs{ + loadLibrary: func() error { return nil }, + enumerateScreen: func() ([]ffi.ScreenInfo, error) { + return []ffi.ScreenInfo{ + {ID: 7, Title: "Slides", IsWindow: true}, + }, nil + }, + newScreen: func(id int64, isWindow bool, fps int) (screenCaptureHandle, error) { + if isWindow { + t.Fatal("explicit screen fallback should not create a window capture") + } + if id != 1 { + t.Fatalf("newScreen id = %d, want 1", id) + } + return stubScreenCapture{}, nil + }, + }) + + stream, err := GetDisplayMedia(DisplayConstraints{ + Video: &DisplayVideoConstraints{ + ScreenID: 1, + Width: IdealInt(1920), + Height: IdealInt(1080), + FrameRate: IdealFloat(30), + }, + }) + if err != nil { + t.Fatalf("GetDisplayMedia() error = %v", err) + } + if stream == nil { + t.Fatal("GetDisplayMedia() stream = nil, want non-nil") + } + + videoTracks := stream.GetVideoTracks() + if len(videoTracks) != 1 { + t.Fatalf("GetVideoTracks() len = %d, want 1", len(videoTracks)) + } + + video := videoTracks[0].(*videoStreamTrack) + if got := video.Label(); got != "screen-capture" { + t.Fatalf("video Label() = %q, want %q", got, "screen-capture") + } + if video.displayConstraints == nil || video.displayConstraints.ScreenID != 1 { + t.Fatalf("screen id = %+v, want 1", video.displayConstraints) + } +} + func TestNewVideoStreamTrackCodecPreferencesOverrideCodec(t *testing.T) { video, err := newVideoStreamTrack( VideoConstraints{ diff --git a/pkg/media/remote_integration_test.go b/pkg/media/remote_integration_test.go index edda79d..3b89cca 100644 --- a/pkg/media/remote_integration_test.go +++ b/pkg/media/remote_integration_test.go @@ -5,6 +5,7 @@ import ( "testing" "time" + "github.com/pion/ice/v4" "github.com/pion/rtp" pioncodecs "github.com/pion/rtp/codecs" "github.com/pion/webrtc/v4" @@ -16,17 +17,24 @@ import ( "github.com/thesyncim/libgowebrtc/pkg/pionrecv" ) -func TestRemoteStreamRegistryBindPionTrackIntegration(t *testing.T) { - sender, err := webrtc.NewPeerConnection(webrtc.Configuration{}) +func newLoopbackPionPeerConnection(t testing.TB) *webrtc.PeerConnection { + t.Helper() + + settingEngine := webrtc.SettingEngine{} + settingEngine.SetICEMulticastDNSMode(ice.MulticastDNSModeDisabled) + + pc, err := webrtc.NewAPI(webrtc.WithSettingEngine(settingEngine)).NewPeerConnection(webrtc.Configuration{}) if err != nil { - t.Fatalf("NewPeerConnection(sender): %v", err) + t.Fatalf("NewPeerConnection(): %v", err) } + return pc +} + +func TestRemoteStreamRegistryBindPionTrackIntegration(t *testing.T) { + sender := newLoopbackPionPeerConnection(t) defer func() { _ = sender.Close() }() - receiver, err := webrtc.NewPeerConnection(webrtc.Configuration{}) - if err != nil { - t.Fatalf("NewPeerConnection(receiver): %v", err) - } + receiver := newLoopbackPionPeerConnection(t) defer func() { _ = receiver.Close() }() videoTrack, err := webrtc.NewTrackLocalStaticRTP( diff --git a/pkg/media/timestamp_test.go b/pkg/media/timestamp_test.go new file mode 100644 index 0000000..397e570 --- /dev/null +++ b/pkg/media/timestamp_test.go @@ -0,0 +1,27 @@ +package media + +import "testing" + +func TestPTSFromTimestampUs(t *testing.T) { + tests := []struct { + name string + timestampUs int64 + clockRate int64 + want uint32 + }{ + {name: "zero timestamp", timestampUs: 0, clockRate: 90000, want: 0}, + {name: "negative timestamp", timestampUs: -1, clockRate: 90000, want: 0}, + {name: "zero clock", timestampUs: 1_000_000, clockRate: 0, want: 0}, + {name: "video one second", timestampUs: 1_000_000, clockRate: 90000, want: 90000}, + {name: "video thirty fps frame", timestampUs: 33_333, clockRate: 90000, want: 2999}, + {name: "audio twenty ms opus", timestampUs: 20_000, clockRate: 48000, want: 960}, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + if got := ptsFromTimestampUs(tc.timestampUs, tc.clockRate); got != tc.want { + t.Fatalf("ptsFromTimestampUs(%d, %d) = %d, want %d", tc.timestampUs, tc.clockRate, got, tc.want) + } + }) + } +} diff --git a/shim/BUILD.bazel b/shim/BUILD.bazel index 37c52b2..807f60e 100644 --- a/shim/BUILD.bazel +++ b/shim/BUILD.bazel @@ -197,7 +197,10 @@ cc_library( }), defines = _COMMON_DEFINES + select({ "@platforms//os:macos": _UNIX_DEFINES + ["WEBRTC_MAC"], - "@platforms//os:linux": _UNIX_DEFINES + ["WEBRTC_LINUX"], + "@platforms//os:linux": _UNIX_DEFINES + [ + "WEBRTC_LINUX", + "WEBRTC_USE_X11", + ], "@platforms//os:windows": ["WEBRTC_WIN", "NOMINMAX", "WIN32_LEAN_AND_MEAN"], "//conditions:default": _UNIX_DEFINES, }), diff --git a/shim/shim_capture.cc b/shim/shim_capture.cc index a8ee764..d2c275e 100644 --- a/shim/shim_capture.cc +++ b/shim/shim_capture.cc @@ -9,6 +9,7 @@ #include #include +#include #include #include #include @@ -29,16 +30,67 @@ #if defined(SHIM_ENABLE_DEVICE_CAPTURE) namespace { +bool EnvHasValue(const char* name) { + const char* value = std::getenv(name); + return value && value[0] != '\0'; +} + +bool EnvTruthy(const char* name, bool default_value) { + const char* value = std::getenv(name); + if (!value || value[0] == '\0') { + return default_value; + } + + if (strcmp(value, "1") == 0 || + strcmp(value, "true") == 0 || + strcmp(value, "TRUE") == 0 || + strcmp(value, "yes") == 0 || + strcmp(value, "YES") == 0 || + strcmp(value, "on") == 0 || + strcmp(value, "ON") == 0) { + return true; + } + + if (strcmp(value, "0") == 0 || + strcmp(value, "false") == 0 || + strcmp(value, "FALSE") == 0 || + strcmp(value, "no") == 0 || + strcmp(value, "NO") == 0 || + strcmp(value, "off") == 0 || + strcmp(value, "OFF") == 0) { + return false; + } + + return default_value; +} + webrtc::DesktopCaptureOptions CreateDesktopCaptureOptions() { - webrtc::DesktopCaptureOptions options; + webrtc::DesktopCaptureOptions options = + webrtc::DesktopCaptureOptions::CreateDefault(); #if defined(WEBRTC_USE_X11) - options.set_x_display(webrtc::SharedXDisplay::CreateDefault()); + if (EnvHasValue("DISPLAY")) { + options.set_x_display(webrtc::SharedXDisplay::CreateDefault()); + } #endif #if defined(WEBRTC_USE_PIPEWIRE) - options.set_allow_pipewire(true); - options.set_screencast_stream(webrtc::SharedScreenCastStream::CreateDefault()); + // Prefer X11 capture whenever a DISPLAY is available. PipeWire remains + // available for Wayland/headless portal flows and can be forced on with + // LIBWEBRTC_ENABLE_PIPEWIRE=1 when desired. + bool allow_pipewire = EnvTruthy( + "LIBWEBRTC_ENABLE_PIPEWIRE", + EnvHasValue("WAYLAND_DISPLAY") && !EnvHasValue("DISPLAY") + ); + options.set_allow_pipewire(allow_pipewire); + if (allow_pipewire) { + options.set_screencast_stream(webrtc::SharedScreenCastStream::CreateDefault()); + } +#endif + +#if defined(WEBRTC_MAC) && !defined(WEBRTC_IOS) + options.set_allow_iosurface(true); + options.set_allow_sck_capturer(true); #endif return options;