diff --git a/Packages/RelayInterface/Sources/RelayInterface/Models/RelayError.swift b/Packages/RelayInterface/Sources/RelayInterface/Models/RelayError.swift index 8ccaccd..0e1303e 100644 --- a/Packages/RelayInterface/Sources/RelayInterface/Models/RelayError.swift +++ b/Packages/RelayInterface/Sources/RelayInterface/Models/RelayError.swift @@ -108,6 +108,11 @@ public enum RelayError: LocalizedError, Sendable { /// A direct message room could not be opened or created. case dmCreationFailed(String) + // MARK: Calls + + /// A call could not be started. + case callFailed(String) + // MARK: LocalizedError public var errorDescription: String? { @@ -160,6 +165,8 @@ public enum RelayError: LocalizedError, Sendable { "Could Not Update Display Name" case .dmCreationFailed: "Could Not Open Conversation" + case .callFailed: + "Call Failed" } } @@ -213,6 +220,8 @@ public enum RelayError: LocalizedError, Sendable { reason case .dmCreationFailed(let reason): reason + case .callFailed(let reason): + reason } } } diff --git a/Packages/RelayInterface/Sources/RelayInterface/Models/TimelineMessage.swift b/Packages/RelayInterface/Sources/RelayInterface/Models/TimelineMessage.swift index d8b49fc..0d4d078 100644 --- a/Packages/RelayInterface/Sources/RelayInterface/Models/TimelineMessage.swift +++ b/Packages/RelayInterface/Sources/RelayInterface/Models/TimelineMessage.swift @@ -66,6 +66,8 @@ public struct TimelineMessage: Identifiable, Sendable, Equatable { case profileChange /// A room state change (room name, topic, avatar, encryption, join rules, etc.). case stateEvent + /// A call-related event (user started, joined, or left a call). + case callEvent } /// A group of emoji reactions attached to a message, aggregated by reaction key. @@ -357,7 +359,7 @@ public struct TimelineMessage: Identifiable, Sendable, Equatable { nonisolated public var isSpecialType: Bool { switch kind { case .text, .emote, .notice: false - case .membership, .profileChange, .stateEvent: false + case .membership, .profileChange, .stateEvent, .callEvent: false default: true } } @@ -366,7 +368,7 @@ public struct TimelineMessage: Identifiable, Sendable, Equatable { /// rather than a user-authored message. nonisolated public var isSystemEvent: Bool { switch kind { - case .membership, .profileChange, .stateEvent: true + case .membership, .profileChange, .stateEvent, .callEvent: true default: false } } diff --git a/Packages/RelayInterface/Sources/RelayInterface/Protocols/CallViewModelProtocol.swift b/Packages/RelayInterface/Sources/RelayInterface/Protocols/CallViewModelProtocol.swift new file mode 100644 index 0000000..20c72b3 --- /dev/null +++ b/Packages/RelayInterface/Sources/RelayInterface/Protocols/CallViewModelProtocol.swift @@ -0,0 +1,121 @@ +// Copyright 2026 Link Dupont +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import Foundation +import SwiftUI + +/// The connection state of a call. +public enum CallState: Sendable, Equatable { + /// No active call. + case idle + /// Establishing connection to the call server. + case connecting + /// Successfully connected; media is flowing. + case connected + /// The call ended cleanly. + case disconnected + /// The call failed with an error message. + case failed(String) +} + +/// A snapshot of a single call participant. +public struct CallParticipant: Identifiable, Sendable, Equatable { + /// The participant's identity string (typically their Matrix user ID). + public let id: String + /// The participant's display name, if available. + public let displayName: String? + /// Whether the participant has their camera enabled. + public let isCameraEnabled: Bool + /// Whether the participant has their microphone enabled. + public let isMicrophoneEnabled: Bool + /// Whether the participant is currently speaking. + public let isSpeaking: Bool + + public init( + id: String, + displayName: String?, + isCameraEnabled: Bool, + isMicrophoneEnabled: Bool, + isSpeaking: Bool + ) { + self.id = id + self.displayName = displayName + self.isCameraEnabled = isCameraEnabled + self.isMicrophoneEnabled = isMicrophoneEnabled + self.isSpeaking = isSpeaking + } +} + +/// The view model protocol for a LiveKit-backed audio/video call in a Matrix room. +/// +/// ``CallViewModelProtocol`` defines the observable state and actions needed by ``CallView`` +/// to render the call UI, control local media, and display remote participants. Concrete +/// implementations include ``CallViewModel`` (backed by the LiveKit Swift SDK) and +/// ``PreviewCallViewModel`` (for SwiftUI previews). +/// +/// Video rendering is intentionally opaque: callers request an ``NSView`` via +/// ``makeVideoView(for:)`` to avoid exposing LiveKit types outside of RelayKit. +@MainActor +public protocol CallViewModelProtocol: AnyObject, Observable { + /// The current connection state of the call. + var state: CallState { get } + + /// All remote participants currently in the call. + var participants: [CallParticipant] { get } + + /// Whether the local user's camera is active. + var isLocalCameraEnabled: Bool { get } + + /// Whether the local user's microphone is active. + var isLocalMicrophoneEnabled: Bool { get } + + /// The identity of the local participant, set after connection. + var localParticipantID: String? { get } + + /// A monotonically increasing counter that is bumped whenever video tracks change + /// (publish, unpublish, camera toggle, etc.). SwiftUI views should read this value + /// to ensure ``NSViewRepresentable`` bridges receive `updateNSView` calls when the + /// underlying video track becomes available. + var videoTrackRevision: UInt { get } + + /// Connects to the call using the provided LiveKit server URL and JWT token. + /// + /// - Parameters: + /// - url: The WebSocket URL of the LiveKit server (e.g. `"wss://livekit.example.com"`). + /// - token: A signed JWT granting access to the room. + func connect(url: String, token: String, sfuServiceURL: String) async throws + + /// Disconnects from the call and cleans up media resources. + func disconnect() async + + /// Toggles the local camera on or off. + func toggleCamera() async throws + + /// Toggles the local microphone on or off. + func toggleMicrophone() async throws + + /// Returns a SwiftUI view that renders the video track of the given participant, + /// or `nil` if the participant has no active video track or is not found. + /// + /// - Parameter participantID: The ``CallParticipant/id`` of the participant to render. + func makeVideoView(for participantID: String) -> AnyView? + + /// Returns the aspect ratio (width / height) of the participant's currently + /// publishing video track, or `nil` if no track is available or its + /// dimensions haven't been negotiated yet. Tile-based UIs use this to + /// avoid stretching video — each tile can size itself to the source aspect. + /// + /// - Parameter participantID: The ``CallParticipant/id`` of the participant. + func videoAspectRatio(for participantID: String) -> CGFloat? +} diff --git a/Packages/RelayInterface/Sources/RelayInterface/Protocols/MatrixServiceProtocol.swift b/Packages/RelayInterface/Sources/RelayInterface/Protocols/MatrixServiceProtocol.swift index 0d65e72..9c44257 100644 --- a/Packages/RelayInterface/Sources/RelayInterface/Protocols/MatrixServiceProtocol.swift +++ b/Packages/RelayInterface/Sources/RelayInterface/Protocols/MatrixServiceProtocol.swift @@ -480,6 +480,25 @@ public protocol MatrixServiceProtocol: AnyObject, Observable { /// verification controller is not available. func makeSessionVerificationViewModel() async throws -> (any SessionVerificationViewModelProtocol)? + /// Creates a view model for joining or managing a LiveKit audio/video call in a Matrix room. + /// + /// - Parameter roomId: The Matrix room identifier for the call. + /// - Returns: A ``CallViewModelProtocol`` instance ready to be connected with a LiveKit + /// URL and token, or `nil` if calling is not supported. + func makeCallViewModel(roomId: String) async -> (any CallViewModelProtocol)? + + /// Fetches LiveKit credentials for a Matrix room using the MatrixRTC flow (MSC4143). + /// + /// Discovers the SFU URL from the homeserver, obtains an OpenID token, and + /// exchanges it with the SFU's JWT service. The returned URL and token can be + /// passed directly to ``CallViewModelProtocol/connect(url:token:)``. + /// + /// - Parameter roomId: The Matrix room identifier. + /// - Returns: A tuple of `(livekitURL, token)` where `livekitURL` is the LiveKit + /// WebSocket URL and `token` is the JWT access token. + /// - Throws: If the homeserver doesn't support MatrixRTC or credential exchange fails. + func callCredentials(for roomId: String) async throws -> (livekitURL: String, token: String, sfuServiceURL: String) + // MARK: Notification Settings (synced via push rules) /// Returns the default notification mode for rooms of the given type. @@ -749,6 +768,8 @@ public extension EnvironmentValues { } } +private struct PlaceholderError: Error {} + @Observable private final class PlaceholderMatrixService: MatrixServiceProtocol { let activityLog: any ActivityLogProtocol = PlaceholderActivityLog() @@ -809,6 +830,10 @@ private final class PlaceholderMatrixService: MatrixServiceProtocol { func isCurrentSessionVerified() async -> Bool { false } func encryptionState() async -> EncryptionStatus { EncryptionStatus() } func makeSessionVerificationViewModel() async throws -> (any SessionVerificationViewModelProtocol)? { nil } + func makeCallViewModel(roomId: String) async -> (any CallViewModelProtocol)? { nil } + func callCredentials(for roomId: String) async throws -> (livekitURL: String, token: String, sfuServiceURL: String) { + throw PlaceholderError() + } func getDefaultNotificationMode( isOneToOne: Bool ) async throws -> DefaultNotificationMode { .mentionsAndKeywordsOnly } diff --git a/Relay.xcodeproj/project.pbxproj b/Relay.xcodeproj/project.pbxproj index a941a3a..2727f8d 100644 --- a/Relay.xcodeproj/project.pbxproj +++ b/Relay.xcodeproj/project.pbxproj @@ -12,6 +12,8 @@ 3BRI00032FC10003001F0EA1 /* RelayInterface in Frameworks */ = {isa = PBXBuildFile; productRef = 3BRI00042FC10004001F0EA1 /* RelayInterface */; }; 3BRK00012FC00001001F0EA1 /* RelayKit.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 3BRK00002FC00000001F0EA1 /* RelayKit.framework */; }; 3BRK00022FC00002001F0EA1 /* RelayKit.framework in Embed Frameworks */ = {isa = PBXBuildFile; fileRef = 3BRK00002FC00000001F0EA1 /* RelayKit.framework */; settings = {ATTRIBUTES = (CodeSignOnCopy, RemoveHeadersOnCopy, ); }; }; + DF41154C2F82F7A30028241B /* LiveKit in Frameworks */ = {isa = PBXBuildFile; productRef = 3BLK00022FD10002001F0EA1 /* LiveKit */; }; + DFDAFCB22FA2C56400A27353 /* LiveKit in Frameworks */ = {isa = PBXBuildFile; productRef = DFDAFCB12FA2C56400A27353 /* LiveKit */; }; /* End PBXBuildFile section */ /* Begin PBXContainerItemProxy section */ @@ -96,6 +98,7 @@ isa = PBXFrameworksBuildPhase; buildActionMask = 2147483647; files = ( + DFDAFCB22FA2C56400A27353 /* LiveKit in Frameworks */, 3BRK00012FC00001001F0EA1 /* RelayKit.framework in Frameworks */, 3BRI00032FC10003001F0EA1 /* RelayInterface in Frameworks */, ); @@ -105,6 +108,7 @@ isa = PBXFrameworksBuildPhase; buildActionMask = 2147483647; files = ( + DF41154C2F82F7A30028241B /* LiveKit in Frameworks */, 3BMRST012FB10001001F0EA1 /* MatrixRustSDK in Frameworks */, 3BRI00012FC10001001F0EA1 /* RelayInterface in Frameworks */, ); @@ -121,6 +125,7 @@ 3B4AFD8B2F638A35001F0EA1 /* Relay */, 3BRK00112FC00011001F0EA1 /* RelayKit */, 3B11E0892F9A3F600051F7B3 /* RelayTests */, + DFDAFCAE2FA2C4F700A27353 /* Frameworks */, 3B4AFD8A2F638A35001F0EA1 /* Products */, ); sourceTree = ""; @@ -135,6 +140,13 @@ name = Products; sourceTree = ""; }; + DFDAFCAE2FA2C4F700A27353 /* Frameworks */ = { + isa = PBXGroup; + children = ( + ); + name = Frameworks; + sourceTree = ""; + }; /* End PBXGroup section */ /* Begin PBXHeadersBuildPhase section */ @@ -192,6 +204,7 @@ name = Relay; packageProductDependencies = ( 3BRI00042FC10004001F0EA1 /* RelayInterface */, + DFDAFCB12FA2C56400A27353 /* LiveKit */, ); productName = Relay; productReference = 3B4AFD892F638A35001F0EA1 /* Relay.app */; @@ -217,6 +230,7 @@ packageProductDependencies = ( 3BMRST022FB10002001F0EA1 /* MatrixRustSDK */, 3BRI00022FC10002001F0EA1 /* RelayInterface */, + 3BLK00022FD10002001F0EA1 /* LiveKit */, ); productName = RelayKit; productReference = 3BRK00002FC00000001F0EA1 /* RelayKit.framework */; @@ -256,6 +270,7 @@ packageReferences = ( 3BRI00052FC10005001F0EA1 /* XCLocalSwiftPackageReference "Packages/RelayInterface" */, 3BMRST032FB10003001F0EA1 /* XCRemoteSwiftPackageReference "matrix-rust-components-swift" */, + 3BLK00032FD10003001F0EA1 /* XCRemoteSwiftPackageReference "client-sdk-swift" */, ); preferredProjectObjectVersion = 77; productRefGroup = 3B4AFD8A2F638A35001F0EA1 /* Products */; @@ -528,10 +543,13 @@ ASSETCATALOG_COMPILER_APPICON_NAME = AppIconDev; ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColorDev; CODE_SIGN_ENTITLEMENTS = Relay/Relay.entitlements; + CODE_SIGN_IDENTITY = "Apple Development"; + "CODE_SIGN_IDENTITY[sdk=macosx*]" = "-"; CODE_SIGN_STYLE = Automatic; COMBINE_HIDPI_IMAGES = YES; CURRENT_PROJECT_VERSION = 13; DEAD_CODE_STRIPPING = YES; + DEVELOPMENT_TEAM = ""; ENABLE_APP_SANDBOX = YES; ENABLE_HARDENED_RUNTIME = YES; ENABLE_INCOMING_NETWORK_CONNECTIONS = YES; @@ -558,6 +576,7 @@ MARKETING_VERSION = 0.4.4; PRODUCT_BUNDLE_IDENTIFIER = app.subpop.Relay; PRODUCT_NAME = "$(TARGET_NAME)"; + PROVISIONING_PROFILE_SPECIFIER = ""; REGISTER_APP_GROUPS = YES; STRING_CATALOG_GENERATE_SYMBOLS = YES; SWIFT_APPROACHABLE_CONCURRENCY = YES; @@ -574,10 +593,13 @@ ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon; ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor; CODE_SIGN_ENTITLEMENTS = Relay/Relay.entitlements; + CODE_SIGN_IDENTITY = "Apple Development"; + "CODE_SIGN_IDENTITY[sdk=macosx*]" = "-"; CODE_SIGN_STYLE = Automatic; COMBINE_HIDPI_IMAGES = YES; CURRENT_PROJECT_VERSION = 13; DEAD_CODE_STRIPPING = YES; + DEVELOPMENT_TEAM = ""; ENABLE_APP_SANDBOX = YES; ENABLE_HARDENED_RUNTIME = YES; ENABLE_INCOMING_NETWORK_CONNECTIONS = YES; @@ -604,6 +626,7 @@ MARKETING_VERSION = 0.4.4; PRODUCT_BUNDLE_IDENTIFIER = app.subpop.Relay; PRODUCT_NAME = "$(TARGET_NAME)"; + PROVISIONING_PROFILE_SPECIFIER = ""; REGISTER_APP_GROUPS = YES; STRING_CATALOG_GENERATE_SYMBOLS = YES; SWIFT_APPROACHABLE_CONCURRENCY = YES; @@ -730,6 +753,14 @@ /* End XCLocalSwiftPackageReference section */ /* Begin XCRemoteSwiftPackageReference section */ + 3BLK00032FD10003001F0EA1 /* XCRemoteSwiftPackageReference "client-sdk-swift" */ = { + isa = XCRemoteSwiftPackageReference; + repositoryURL = "https://github.com/livekit/client-sdk-swift"; + requirement = { + kind = upToNextMajorVersion; + minimumVersion = 2.0.0; + }; + }; 3BMRST032FB10003001F0EA1 /* XCRemoteSwiftPackageReference "matrix-rust-components-swift" */ = { isa = XCRemoteSwiftPackageReference; repositoryURL = "https://github.com/matrix-org/matrix-rust-components-swift"; @@ -741,6 +772,11 @@ /* End XCRemoteSwiftPackageReference section */ /* Begin XCSwiftPackageProductDependency section */ + 3BLK00022FD10002001F0EA1 /* LiveKit */ = { + isa = XCSwiftPackageProductDependency; + package = 3BLK00032FD10003001F0EA1 /* XCRemoteSwiftPackageReference "client-sdk-swift" */; + productName = LiveKit; + }; 3BMRST022FB10002001F0EA1 /* MatrixRustSDK */ = { isa = XCSwiftPackageProductDependency; package = 3BMRST032FB10003001F0EA1 /* XCRemoteSwiftPackageReference "matrix-rust-components-swift" */; @@ -754,6 +790,11 @@ isa = XCSwiftPackageProductDependency; productName = RelayInterface; }; + DFDAFCB12FA2C56400A27353 /* LiveKit */ = { + isa = XCSwiftPackageProductDependency; + package = 3BLK00032FD10003001F0EA1 /* XCRemoteSwiftPackageReference "client-sdk-swift" */; + productName = LiveKit; + }; /* End XCSwiftPackageProductDependency section */ }; rootObject = 3B4AFD812F638A35001F0EA1 /* Project object */; diff --git a/Relay.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved b/Relay.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved index eecca9c..f2b2069 100644 --- a/Relay.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved +++ b/Relay.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved @@ -1,6 +1,24 @@ { - "originHash" : "6c542309bc3e806f061c41054f96023ef09c12c16fd5c288ab6028ddfe261e92", + "originHash" : "beed1a9e4d2e17ac1e82e835ac2cc90809f53ecb8556bc36ab04903d7e3cf291", "pins" : [ + { + "identity" : "client-sdk-swift", + "kind" : "remoteSourceControl", + "location" : "https://github.com/livekit/client-sdk-swift", + "state" : { + "revision" : "4e930e856e3b076c2aacce98c77cc81fd2db498b", + "version" : "2.13.0" + } + }, + { + "identity" : "livekit-uniffi-xcframework", + "kind" : "remoteSourceControl", + "location" : "https://github.com/livekit/livekit-uniffi-xcframework.git", + "state" : { + "revision" : "61229f4032131311b997ddb1bc1cb8f5afbe30c8", + "version" : "0.0.5" + } + }, { "identity" : "matrix-rust-components-swift", "kind" : "remoteSourceControl", @@ -9,6 +27,24 @@ "revision" : "2916f3f9cc2aea86ba3a820cb6a8389e13e0284a", "version" : "26.4.1" } + }, + { + "identity" : "swift-protobuf", + "kind" : "remoteSourceControl", + "location" : "https://github.com/apple/swift-protobuf.git", + "state" : { + "revision" : "a008af1a102ff3dd6cc3764bb69bf63226d0f5f6", + "version" : "1.36.1" + } + }, + { + "identity" : "webrtc-xcframework", + "kind" : "remoteSourceControl", + "location" : "https://github.com/livekit/webrtc-xcframework.git", + "state" : { + "revision" : "e2a0ab3be155475ad60f845813f2088847e584f7", + "version" : "144.7559.3" + } } ], "version" : 3 diff --git a/Relay/Info.plist b/Relay/Info.plist index f50b658..37386eb 100644 --- a/Relay/Info.plist +++ b/Relay/Info.plist @@ -17,6 +17,10 @@ + NSCameraUsageDescription + Relay uses the camera for video calls in Matrix rooms. + NSMicrophoneUsageDescription + Relay uses the microphone for audio and video calls in Matrix rooms. CFBundleURLTypes diff --git a/Relay/Relay.entitlements b/Relay/Relay.entitlements index ee95ab7..de227ff 100644 --- a/Relay/Relay.entitlements +++ b/Relay/Relay.entitlements @@ -6,5 +6,11 @@ com.apple.security.network.client + com.apple.security.network.server + + com.apple.security.device.camera + + com.apple.security.device.microphone + diff --git a/Relay/RelayApp.swift b/Relay/RelayApp.swift index 1f439e7..989c55a 100644 --- a/Relay/RelayApp.swift +++ b/Relay/RelayApp.swift @@ -29,6 +29,7 @@ private let logger = Logger(subsystem: "Relay", category: "DeepLink") struct RelayApp: App { @State private var matrixService = MatrixService() @State private var gifSearchService = GiphyService(apiKey: Secrets.giphyAPIKey ?? "") + @State private var callManager = CallManager() @State private var notificationDelegate = NotificationDelegate() @State private var appActions = AppActions() @State private var composeDraftStore = ComposeDraftStore() @@ -43,6 +44,7 @@ struct RelayApp: App { ContentView() .environment(\.matrixService, matrixService) .environment(\.gifSearchService, gifSearchService) + .environment(\.callManager, callManager) .environment(\.errorReporter, matrixService.errorReporter) .environment(\.composeDraftStore, composeDraftStore) .environment(appActions) @@ -116,6 +118,17 @@ struct RelayApp: App { } .defaultSize(width: 900, height: 600) .keyboardShortcut("a", modifiers: [.option, .command]) + + Window("Call", id: "call") { + CallWindowView() + .environment(\.matrixService, matrixService) + .environment(\.callManager, callManager) + } + .windowStyle(.hiddenTitleBar) + .windowResizability(.contentMinSize) + .defaultSize(width: 360, height: 540) + .defaultPosition(.topTrailing) + .defaultLaunchBehavior(.suppressed) } // MARK: - Notifications diff --git a/Relay/Services/PreviewMatrixService.swift b/Relay/Services/PreviewMatrixService.swift index 632fb11..d3b8500 100644 --- a/Relay/Services/PreviewMatrixService.swift +++ b/Relay/Services/PreviewMatrixService.swift @@ -224,6 +224,16 @@ final class PreviewMatrixService: MatrixServiceProtocol { PreviewSessionVerificationViewModel() } + func makeCallViewModel(roomId: String) async -> (any CallViewModelProtocol)? { + PreviewCallViewModel() + } + + func callCredentials(for roomId: String) async throws -> (livekitURL: String, token: String, sfuServiceURL: String) { + // Simulate a brief credential fetch; previews never actually connect. + try? await Task.sleep(for: .milliseconds(500)) + return (livekitURL: "wss://preview.livekit.example.com", token: "preview-jwt-token", sfuServiceURL: "https://preview.livekit.example.com") + } + func declinePendingVerificationRequest() async { pendingVerificationRequest = nil } diff --git a/Relay/Utilities/MatrixLink.swift b/Relay/Utilities/MatrixLink.swift new file mode 100644 index 0000000..424c419 --- /dev/null +++ b/Relay/Utilities/MatrixLink.swift @@ -0,0 +1,86 @@ +// Copyright 2026 Link Dupont +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import Foundation + +/// A parsed Matrix deep link, derived from either a `https://matrix.to` URL or a `matrix:` URI. +/// +/// **matrix.to format:** +/// - User: `https://matrix.to/#/@user:server` +/// - Room: `https://matrix.to/#/#room:server` or `https://matrix.to/#/!roomId:server` +/// +/// **matrix: URI format (MSC2312):** +/// - User: `matrix:u/user:server` +/// - Room: `matrix:r/room:server` or `matrix:roomid/roomId:server` +enum MatrixLink { + /// A Matrix user ID (e.g. `@alice:matrix.org`). + case user(String) + /// A room alias or room ID (e.g. `#general:matrix.org` or `!abc123:matrix.org`). + case room(String) + + /// Parses a URL into a ``MatrixLink``, returning `nil` if the URL is not a recognised Matrix link. + init?(url: URL) { + if url.host?.lowercased() == "matrix.to" { + guard let link = Self(matrixToURL: url) else { return nil } + self = link + } else if url.scheme?.lowercased() == "matrix" { + guard let link = Self(matrixURI: url) else { return nil } + self = link + } else { + return nil + } + } + + // MARK: - Private parsers + + private init?(matrixToURL url: URL) { + // Fragment is everything after `#`, e.g. `/@alice:matrix.org` or `/#general:matrix.org` + guard let fragment = url.fragment, fragment.hasPrefix("/") else { return nil } + // The fragment may contain additional path components (e.g. an event ID after a second `/`). + // Extract only the first component as the entity identifier. + guard let entity = String(fragment.dropFirst()) + .components(separatedBy: "/").first? + .removingPercentEncoding else { return nil } + + if entity.hasPrefix("@") { + self = .user(entity) + } else if entity.hasPrefix("#") || entity.hasPrefix("!") { + self = .room(entity) + } else { + return nil + } + } + + private init?(matrixURI url: URL) { + // matrix: URIs encode the entity type and identifier in the path: + // `u/user:server`, `r/room:server`, `roomid/roomId:server` (without sigils) + let path = url.path + let parts = path.components(separatedBy: "/") + guard parts.count >= 2 else { return nil } + let type = parts[0] + let identifier = parts[1] + guard !identifier.isEmpty else { return nil } + + switch type { + case "u": + self = .user("@\(identifier)") + case "r": + self = .room("#\(identifier)") + case "roomid": + self = .room("!\(identifier)") + default: + return nil + } + } +} diff --git a/Relay/ViewModels/PreviewCallViewModel.swift b/Relay/ViewModels/PreviewCallViewModel.swift new file mode 100644 index 0000000..cd2b277 --- /dev/null +++ b/Relay/ViewModels/PreviewCallViewModel.swift @@ -0,0 +1,77 @@ +// Copyright 2026 Link Dupont +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import Foundation +import RelayInterface +import SwiftUI + +/// A mock ``CallViewModelProtocol`` for SwiftUI previews. +/// +/// Simulates a connected call with sample participants after a short delay. +/// All methods are safe to call from the main actor, and no real media or +/// network connections are established. +@Observable +@MainActor +final class PreviewCallViewModel: CallViewModelProtocol { + var state: CallState = .idle + var participants: [CallParticipant] = [] + var isLocalCameraEnabled: Bool = false + var isLocalMicrophoneEnabled: Bool = false + var localParticipantID: String? = nil + var videoTrackRevision: UInt = 0 + + func connect(url: String, token: String, sfuServiceURL: String) async throws { + state = .connecting + try? await Task.sleep(for: .milliseconds(800)) + isLocalCameraEnabled = true + isLocalMicrophoneEnabled = true + localParticipantID = "@preview:matrix.org" + participants = [ + CallParticipant( + id: "@alice:matrix.org", + displayName: "Alice Smith", + isCameraEnabled: true, + isMicrophoneEnabled: true, + isSpeaking: true + ), + CallParticipant( + id: "@bob:matrix.org", + displayName: "Bob Chen", + isCameraEnabled: false, + isMicrophoneEnabled: true, + isSpeaking: false + ) + ] + state = .connected + } + + func disconnect() async { + state = .disconnected + participants = [] + isLocalCameraEnabled = false + isLocalMicrophoneEnabled = false + localParticipantID = nil + } + + func toggleCamera() async throws { + isLocalCameraEnabled.toggle() + } + + func toggleMicrophone() async throws { + isLocalMicrophoneEnabled.toggle() + } + + func makeVideoView(for participantID: String) -> AnyView? { nil } + func videoAspectRatio(for participantID: String) -> CGFloat? { 16.0 / 9.0 } +} diff --git a/Relay/Views/CallView.swift b/Relay/Views/CallView.swift new file mode 100644 index 0000000..f9cddc6 --- /dev/null +++ b/Relay/Views/CallView.swift @@ -0,0 +1,671 @@ +// Copyright 2026 Link Dupont +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import RelayInterface +import SwiftUI + +/// Renders a LiveKit audio/video call with a FaceTime-inspired design. +/// +/// The call opens in its own borderless window (`.windowStyle(.plain)`). +/// When connected, the remote participant's video fills the window with +/// a small self-view PiP overlay and a translucent floating control bar. +struct CallView: View { + // `let` — not `@State`. The view model is a reference-typed + // `@Observable` class owned by `CallManager`; wrapping it in `@State` + // caused SwiftUI's `StoredLocationBase` to reinitialise the storage on + // every parent re-render of `CallWindowView`, which surfaces as + // recursive `StoredLocationBase.beginUpdate` calls during the layout + // transaction and eventually the "more Update Constraints in Window + // passes than there are views" fault. + let viewModel: any CallViewModelProtocol + var isPreparingCredentials: Bool = false + var onDismiss: () -> Void + + @State private var serverURL: String = "" + @State private var accessToken: String = "" + @State private var isJoining: Bool = false + // NOTE: The earlier implementation auto-hid the control bar after a + // timeout using a `controlsVisible` @State + `.animation(.easeInOut(..), + // value: controlsVisible)` on the control bar's opacity, plus a + // `.onHover` toggle. That produced the "more Update Constraints in + // Window passes than there are views" crash: the implicit animation + // pushed the AppKit `NSAnimationContext.runAnimationGroup` path which + // invalidated `StoredLocationBase` during an already-running layout + // pass on the hosting window. The control bar is now always visible. + + var body: some View { + ZStack { + Color.black.ignoresSafeArea() + + switch viewModel.state { + case .idle: + if isPreparingCredentials { + preparingView + } else { + joinForm + } + + case .connecting: + connectingView + + case .connected: + connectedView + + case .disconnected: + // Clean ending — close the window immediately. No overlay, + // no "Dismiss" button. Background cleanup (leave event, + // LiveKit teardown) continues in disconnect()'s task. + Color.clear + .task { onDismiss() } + + case .failed(let message): + // Errors still show the overlay so the user can read what + // went wrong before dismissing. + endedOverlay( + title: "Call Failed", + systemImage: "exclamationmark.triangle.fill", + isError: true, + detail: message + ) + } + } + .frame(minWidth: 320, minHeight: 480) + .clipShape(RoundedRectangle(cornerRadius: 16, style: .continuous)) + } + + // MARK: - Connected View (FaceTime-style) + + @ViewBuilder + private var connectedView: some View { + ZStack { + // Background gradient gives tiles something nicer to float on + // than pure black; keeps the FaceTime-on-Mac feel. + LinearGradient( + colors: [Color(white: 0.08), Color(white: 0.02)], + startPoint: .top, + endPoint: .bottom + ) + .ignoresSafeArea() + + // 1 remote → primary video fills. + // 2+ remotes → polished tile grid of remotes only. + if viewModel.participants.count >= 2 { + remoteTilesGrid + .padding(.horizontal, 12) + .padding(.top, 12) + .padding(.bottom, 96) // leave room for control bar + PiP + } else { + primaryVideo + .ignoresSafeArea() + + // Participant name at top (1:1 only — tiles label themselves) + VStack { + participantNameBar + Spacer() + } + } + + // Self-view PiP — always present, always bottom-right. + if let localID = viewModel.localParticipantID { + VStack { + Spacer() + HStack { + Spacer() + selfViewPiP(id: localID) + } + } + .padding(12) + .padding(.bottom, 72) + } + + // Floating control bar at bottom (always visible). + VStack { + Spacer() + controlBar + } + .padding(.bottom, 16) + } + // Disable ALL implicit animations within the connected view subtree. + // + // Removing the `.animation(...)` modifier on `controlBar` was not + // enough to stop the "more Update Constraints in Window passes than + // there are views" crash — SwiftUI still wraps structural changes + // (`if let firstRemote = ...`, `if let localID = ...`, + // `if viewModel.isLocalCameraEnabled`, `if first.isSpeaking`) in + // implicit transition animations during the connect sequence. Each + // of those animations runs through `NSAnimationContext.runAnimationGroup` + // inside `NSHostingView.layout`, writing back into the SwiftUI graph + // and queueing another constraint pass on the same frame — eventually + // exceeding the view-count budget and tripping the AppKit fault. + // + // `.transaction { $0.animation = nil }` strips the animation off + // every transaction propagated through this subtree, so structural + // changes happen instantly with no animator running during layout. + .transaction { $0.animation = nil } + } + + // MARK: - Primary Video + + @ViewBuilder + private var primaryVideo: some View { + if let firstRemote = viewModel.participants.first { + VideoRendererView(viewModel: viewModel, participantID: firstRemote.id) { + participantPlaceholder(firstRemote) + } + .id(firstRemote.id) + } else { + // No remote participants yet — waiting + VStack(spacing: 12) { + ProgressView() + .controlSize(.large) + .tint(.white) + Text("Waiting for others to join…") + .font(.headline) + .foregroundStyle(.white.opacity(0.7)) + } + } + } + + // MARK: - Remote Tiles Grid (2+ remotes) + + /// Polished grid of every remote participant. The local view always + /// stays in the PiP overlay; remotes tile across the main area. + @ViewBuilder + private var remoteTilesGrid: some View { + GeometryReader { geo in + let remotes = viewModel.participants + let layout = Self.gridLayout(count: remotes.count, in: geo.size) + VStack(spacing: 8) { + ForEach(0.. (rows: Int, cols: Int) { + guard count > 0 else { return (1, 1) } + let isLandscape = size.width >= size.height + switch count { + case 1: return (1, 1) + case 2: return isLandscape ? (1, 2) : (2, 1) + case 3, 4: return (2, 2) + case 5, 6: return isLandscape ? (2, 3) : (3, 2) + case 7, 8, 9: return (3, 3) + default: + let cols = Int(ceil(Double(count).squareRoot())) + let rows = Int(ceil(Double(count) / Double(cols))) + return (rows, cols) + } + } + + // MARK: - Self-View PiP + + @ViewBuilder + private func selfViewPiP(id: String) -> some View { + ZStack { + RoundedRectangle(cornerRadius: 10, style: .continuous) + .fill(Color(nsColor: .darkGray)) + + if viewModel.isLocalCameraEnabled { + VideoRendererView(viewModel: viewModel, participantID: id) { + Image(systemName: "person.fill") + .font(.title2) + .foregroundStyle(.white.opacity(0.5)) + } + .id(id) + } else { + Image(systemName: "person.fill") + .font(.title2) + .foregroundStyle(.white.opacity(0.5)) + } + } + .frame(width: 120, height: 90) + .clipShape(RoundedRectangle(cornerRadius: 10, style: .continuous)) + .shadow(color: .black.opacity(0.4), radius: 6, y: 2) + } + + // MARK: - Participant Name Bar + + @ViewBuilder + private var participantNameBar: some View { + if let first = viewModel.participants.first { + HStack { + if first.isSpeaking { + Image(systemName: "waveform") + .font(.caption) + .foregroundStyle(.green) + } + Text(first.displayName ?? first.id) + .font(.callout.weight(.medium)) + .foregroundStyle(.white) + .lineLimit(1) + } + .padding(.horizontal, 12) + .padding(.vertical, 6) + .background(.ultraThinMaterial.opacity(0.8), in: Capsule()) + .padding(.top, 12) + } + } + + // MARK: - Control Bar + + @ViewBuilder + private var controlBar: some View { + HStack(spacing: 20) { + // Microphone toggle + controlButton( + icon: viewModel.isLocalMicrophoneEnabled ? "mic.fill" : "mic.slash.fill", + isActive: viewModel.isLocalMicrophoneEnabled, + help: viewModel.isLocalMicrophoneEnabled ? "Mute" : "Unmute" + ) { + Task { try? await viewModel.toggleMicrophone() } + } + + // Camera toggle + controlButton( + icon: viewModel.isLocalCameraEnabled ? "video.fill" : "video.slash.fill", + isActive: viewModel.isLocalCameraEnabled, + help: viewModel.isLocalCameraEnabled ? "Camera Off" : "Camera On" + ) { + Task { try? await viewModel.toggleCamera() } + } + + // End call + Button { + // Disconnect — the .disconnected case in `body` calls + // onDismiss() immediately so the window closes. + Task { await viewModel.disconnect() } + } label: { + Image(systemName: "phone.down.fill") + .font(.title3) + .foregroundStyle(.white) + .frame(width: 48, height: 48) + .background(Color.red, in: Circle()) + } + .buttonStyle(.plain) + .help("End Call") + } + .padding(.horizontal, 24) + .padding(.vertical, 12) + .background(.ultraThinMaterial, in: Capsule()) + } + + @ViewBuilder + private func controlButton(icon: String, isActive: Bool, help: String, action: @escaping () -> Void) -> some View { + Button(action: action) { + Image(systemName: icon) + .font(.title3) + .foregroundStyle(.white) + .frame(width: 44, height: 44) + .background( + isActive ? Color.white.opacity(0.15) : Color.red.opacity(0.8), + in: Circle() + ) + } + .buttonStyle(.plain) + .help(help) + } + + // MARK: - Participant Placeholder + + @ViewBuilder + private func participantPlaceholder(_ participant: CallParticipant) -> some View { + VStack(spacing: 16) { + Image(systemName: "person.fill") + .font(.system(size: 64)) + .foregroundStyle(.white.opacity(0.3)) + Text(participant.displayName ?? participant.id) + .font(.title2.weight(.medium)) + .foregroundStyle(.white.opacity(0.6)) + } + } + + // MARK: - Preparing View + + @ViewBuilder + private var preparingView: some View { + VStack(spacing: 16) { + Spacer() + ProgressView() + .controlSize(.large) + .tint(.white) + Text("Contacting call server…") + .font(.headline) + .foregroundStyle(.white.opacity(0.7)) + Button("Cancel") { onDismiss() } + .buttonStyle(.bordered) + .foregroundStyle(.white) + Spacer() + } + } + + // MARK: - Connecting View + + @ViewBuilder + private var connectingView: some View { + VStack(spacing: 16) { + Spacer() + ProgressView() + .controlSize(.large) + .tint(.white) + Text("Joining call…") + .font(.headline) + .foregroundStyle(.white.opacity(0.7)) + Button("Cancel") { + Task { await viewModel.disconnect() } + } + .buttonStyle(.bordered) + .foregroundStyle(.white) + Spacer() + } + } + + // MARK: - Failed Overlay + // + // Clean endings auto-close via `.disconnected` in `body`. This overlay + // is only used for failures so the user sees the error before dismissing. + + @ViewBuilder + private func endedOverlay(title: String, systemImage: String, isError: Bool, detail: String? = nil) -> some View { + VStack(spacing: 16) { + Spacer() + Image(systemName: systemImage) + .font(.system(size: 40)) + .foregroundStyle(isError ? .red : .white.opacity(0.6)) + Text(title) + .font(.title3.weight(.semibold)) + .foregroundStyle(.white) + if let detail { + Text(detail) + .font(.subheadline) + .foregroundStyle(.white.opacity(0.6)) + .multilineTextAlignment(.center) + .padding(.horizontal, 32) + } + Button("Dismiss") { onDismiss() } + .buttonStyle(.borderedProminent) + .tint(isError ? .red : .accentColor) + .padding(.top, 4) + Spacer() + } + } + + // MARK: - Join Form (manual entry fallback) + + @ViewBuilder + private var joinForm: some View { + VStack(spacing: 0) { + Spacer() + + VStack(spacing: 20) { + Image(systemName: "phone.fill") + .font(.system(size: 44)) + .foregroundStyle(.white) + + Text("Join Call") + .font(.title2.bold()) + .foregroundStyle(.white) + + Text("Enter the LiveKit server URL and access token.") + .font(.subheadline) + .foregroundStyle(.white.opacity(0.7)) + .multilineTextAlignment(.center) + + VStack(alignment: .leading, spacing: 8) { + Text("Server URL") + .font(.caption) + .foregroundStyle(.white.opacity(0.7)) + TextField("wss://livekit.example.com", text: $serverURL) + .textFieldStyle(.roundedBorder) + .autocorrectionDisabled() + + Text("Access Token") + .font(.caption) + .foregroundStyle(.white.opacity(0.7)) + .padding(.top, 4) + TextField("JWT token", text: $accessToken) + .textFieldStyle(.roundedBorder) + .autocorrectionDisabled() + } + .frame(maxWidth: 320) + + HStack(spacing: 16) { + Button("Cancel") { onDismiss() } + .buttonStyle(.bordered) + .foregroundStyle(.white) + + Button("Join") { + guard !serverURL.isEmpty, !accessToken.isEmpty else { return } + Task { + isJoining = true + try? await viewModel.connect(url: serverURL, token: accessToken, sfuServiceURL: "") + isJoining = false + } + } + .buttonStyle(.borderedProminent) + .disabled(serverURL.isEmpty || accessToken.isEmpty || isJoining) + } + } + .padding(40) + + Spacer() + } + } +} + +// MARK: - Video Renderer + +/// Isolates video-track observation into its own SwiftUI view so that +/// `videoTrackRevision` changes only invalidate THIS subtree rather than +/// the entire ``CallView`` hierarchy. +/// +/// Previously `primaryVideo` and `selfViewPiP` both read +/// `viewModel.videoTrackRevision` directly in their view bodies, which +/// registered dependencies on the whole containing ZStack. Each track +/// update (camera publish, subscribe, etc.) then re-laid-out every +/// sibling view — and because the rendered video is an `NSViewRepresentable` +/// wrapping an AppKit `VideoView`, that triggered recursive +/// `setNeedsUpdateConstraints` calls on the hosting window, producing the +/// "more Update Constraints in Window passes than there are views" hang. +/// +/// The `.id(participantID)` modifier on each usage site gives SwiftUI a +/// stable identity key so the renderer is reused across parent re-renders +/// instead of being torn down and recreated. +private struct VideoRendererView: View { + let viewModel: any CallViewModelProtocol + let participantID: String + @ViewBuilder let placeholder: () -> Placeholder + + var body: some View { + // Reading videoTrackRevision here registers observation *only* on + // this subtree — it is not read in any enclosing view. + let _ = viewModel.videoTrackRevision + if let videoView = viewModel.makeVideoView(for: participantID) { + videoView + } else { + placeholder() + } + } +} + +// MARK: - Participant Tile + +/// A single tile in the remote-participants grid. Video (cropped to fill) +/// inside a rounded rect with a soft shadow, a name pill bottom-left, and +/// a faint outer glow when the participant is speaking. Mirrors the +/// FaceTime-on-Mac aesthetic: clean cards, no hard borders. +private struct ParticipantTile: View { + let viewModel: any CallViewModelProtocol + let participant: CallParticipant + + private static let cornerRadius: CGFloat = 14 + /// Aspect used for camera-off tiles or before the first frame arrives. + private static let placeholderAspect: CGFloat = 16.0 / 9.0 + + var body: some View { + let shape = RoundedRectangle(cornerRadius: Self.cornerRadius, style: .continuous) + // Re-evaluate when video tracks change so we pick up the real + // dimensions after the first frame (RoomDelegate bumps + // videoTrackRevision on streamState transitions). + let _ = viewModel.videoTrackRevision + let aspect: CGFloat = { + if participant.isCameraEnabled, + let live = viewModel.videoAspectRatio(for: participant.id) { + return live + } + return Self.placeholderAspect + }() + + ZStack(alignment: .bottomLeading) { + // Card background — neutral so video looks at home. + shape.fill( + LinearGradient( + colors: [Color(white: 0.18), Color(white: 0.10)], + startPoint: .top, + endPoint: .bottom + ) + ) + + if participant.isCameraEnabled { + VideoRendererView(viewModel: viewModel, participantID: participant.id) { + placeholder + } + .clipShape(shape) + } else { + placeholder + } + + nameLabel + .padding(10) + } + // Tile sizes itself to the source video aspect, centered in the + // grid cell. Surrounding cell area is transparent so the + // background gradient shows through (no harsh letterbox). + // Modifier order matters: shadow + overlay must apply to the + // aspect-fitted shape, then the outer frame centers it in the cell. + .aspectRatio(aspect, contentMode: .fit) + .shadow(color: .black.opacity(0.35), radius: 8, y: 2) + .overlay(speakingGlow.allowsHitTesting(false)) + .frame(maxWidth: .infinity, maxHeight: .infinity) + } + + // MARK: Subviews + + @ViewBuilder + private var placeholder: some View { + VStack(spacing: 8) { + Image(systemName: "person.fill") + .font(.system(size: 44)) + .foregroundStyle(.white.opacity(0.35)) + Text(Self.displayName(for: participant)) + .font(.callout.weight(.medium)) + .foregroundStyle(.white.opacity(0.6)) + .lineLimit(1) + .padding(.horizontal, 12) + } + .frame(maxWidth: .infinity, maxHeight: .infinity) + } + + @ViewBuilder + private var nameLabel: some View { + // Always show mic state next to the name. Filled mic icon when on, + // slashed (red-tinted) when muted — mirrors FaceTime / Zoom badges. + // Solid dark capsule for guaranteed contrast over any video frame — + // .ultraThinMaterial blends into bright frames and the name vanishes. + HStack(spacing: 6) { + Image(systemName: participant.isMicrophoneEnabled ? "mic.fill" : "mic.slash.fill") + .font(.caption.weight(.semibold)) + .foregroundStyle(participant.isMicrophoneEnabled ? .white : .red) + Text(Self.displayName(for: participant)) + .font(.caption.weight(.semibold)) + .foregroundStyle(.white) + .lineLimit(1) + .truncationMode(.tail) + } + .padding(.horizontal, 10) + .padding(.vertical, 5) + .background(Color.black.opacity(0.55), in: Capsule()) + .overlay( + Capsule().strokeBorder(Color.white.opacity(0.12), lineWidth: 0.5) + ) + .shadow(color: .black.opacity(0.4), radius: 3, y: 1) + } + + /// Pulls a friendly name out of the participant: `displayName` if the + /// SFU/JWT supplied one, otherwise the localpart of the Matrix user ID + /// (`@andrew:matrix.example.com:DEVICE` → `andrew`). Falls back to the + /// raw id if neither pattern matches. + static func displayName(for p: CallParticipant) -> String { + if let dn = p.displayName, !dn.isEmpty { return dn } + let id = p.id + // LiveKit identity layout used by Element Call: + // `@::` — strip server + device. + if id.hasPrefix("@") { + let body = id.dropFirst() + if let colon = body.firstIndex(of: ":") { + let localpart = body[.. NSView { StylerView() } + func updateNSView(_ nsView: NSView, context: Context) {} + + private class StylerView: NSView { + override func viewDidMoveToWindow() { + super.viewDidMoveToWindow() + guard let window else { return } + window.titlebarAppearsTransparent = true + window.isMovableByWindowBackground = true + // Hide the traffic light buttons. + window.standardWindowButton(.closeButton)?.isHidden = true + window.standardWindowButton(.miniaturizeButton)?.isHidden = true + window.standardWindowButton(.zoomButton)?.isHidden = true + } + } +} diff --git a/Relay/Views/MainView.swift b/Relay/Views/MainView.swift index b42bd84..4cd3271 100644 --- a/Relay/Views/MainView.swift +++ b/Relay/Views/MainView.swift @@ -31,6 +31,8 @@ struct MainView: View { // swiftlint:disable:this type_body_length @Environment(\.matrixService) private var matrixService @Environment(\.errorReporter) private var errorReporter @Environment(AppActions.self) private var appActions + @Environment(\.callManager) private var callManager + @Environment(\.openWindow) private var openWindow @AppStorage("selectedRoomId") private var selectedRoomId: String? @State private var selectedSpaceId: String? @State private var leaveSpaceItem: LeaveSpaceItem? @@ -46,6 +48,7 @@ struct MainView: View { // swiftlint:disable:this type_body_length @State private var isJoiningLinkedRoom = false @State private var inspectorSelectedProfile: UserProfile? @State private var inspectorInitialTab: InspectorTab? + @State private var isPreparingCall = false private func scrollToMessage(_ eventId: String) { showingPinnedMessages = false @@ -297,6 +300,11 @@ struct MainView: View { // swiftlint:disable:this type_body_length } if !appActions.showRoomDirectory && previewingInvite == nil { + if let selectedRoomId, currentRoom != nil { + ToolbarItem(placement: .primaryAction) { + startCallButton(roomId: selectedRoomId) + } + } ToolbarItem(placement: .primaryAction) { showInspectorButton } @@ -304,6 +312,16 @@ struct MainView: View { // swiftlint:disable:this type_body_length } + private func startCallButton(roomId: String) -> some View { + Button { + startCall(roomId: roomId) + } label: { + Label("Start Call", systemImage: "phone.fill") + } + .help("Start Call") + .disabled(callManager.hasActiveCall) + } + private var toolbarTitleCapsule: some View { HStack(spacing: 0) { if let currentRoom { @@ -392,6 +410,41 @@ struct MainView: View { // swiftlint:disable:this type_body_length .disabled(selectedRoomId == nil && selectedSpaceId == nil) } + // MARK: - Call Handling + + private func startCall(roomId: String) { + guard !callManager.hasActiveCall else { return } + callManager.isPreparingCredentials = true + callManager.callRoomId = roomId + + Task { + guard let viewModel = await matrixService.makeCallViewModel(roomId: roomId) else { + callManager.isPreparingCredentials = false + callManager.callRoomId = nil + return + } + + // Defer the observable state change + window open to the next + // run-loop iteration. Setting activeCallViewModel invalidates + // the CallWindowView body across window boundaries; if that + // fires during an active layout pass the recursive constraint + // update crash occurs. + let openWindowAction = openWindow + DispatchQueue.main.async { + callManager.activeCallViewModel = viewModel + openWindowAction(id: "call") + } + + do { + let creds = try await matrixService.callCredentials(for: roomId) + try await viewModel.connect(url: creds.livekitURL, token: creds.token, sfuServiceURL: creds.sfuServiceURL) + } catch { + errorReporter.report(.callFailed(error.localizedDescription)) + } + callManager.isPreparingCredentials = false + } + } + // MARK: - Deep Link Handling /// Handles an incoming ``MatrixURI`` deep link by navigating to the referenced entity. diff --git a/Relay/Views/Message/SystemEventView.swift b/Relay/Views/Message/SystemEventView.swift index 15427da..d67b14e 100644 --- a/Relay/Views/Message/SystemEventView.swift +++ b/Relay/Views/Message/SystemEventView.swift @@ -42,6 +42,8 @@ struct SystemEventView: View { "person.2" case .profileChange: "person.text.rectangle" + case .callEvent: + "phone.fill" case .stateEvent: "gearshape" default: diff --git a/RelayKit/Call/CallEncryptionService.swift b/RelayKit/Call/CallEncryptionService.swift new file mode 100644 index 0000000..d3e359b --- /dev/null +++ b/RelayKit/Call/CallEncryptionService.swift @@ -0,0 +1,390 @@ +// Copyright 2026 Link Dupont +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import CryptoKit +import Foundation +import LiveKit +import MatrixRustSDK +import OSLog + +private let logger = Logger(subsystem: "RelayKit", category: "CallEncryption") + +/// Helpers for MatrixRTC call-member state signaling, power-level bootstrap, +/// and LiveKit key provider plumbing. +/// +/// Key distribution for `io.element.call.encryption_keys` is handled by +/// ``CallWidgetBridge``, which speaks the Widget API directly to the +/// Matrix Rust SDK's `WidgetDriver`. The SDK handles Olm encryption of the +/// to-device payloads transparently, which the previous raw-REST path could +/// not do — Element-X rejected the plaintext keys and the call failed to +/// negotiate. +/// +/// What remains in this type: +/// - ``sendCallMemberEvent(sfuServiceURL:)`` / ``removeCallMemberEvent()`` — +/// MatrixRTC member state via `sendStateEventRaw` on the SDK room. +/// Rooms should be created with the correct power levels via +/// `powerLevelContentOverride` (see `MatrixService.callPowerLevels`); we +/// no longer try to mutate them at join time, matching Element Call. +/// - ``generateKey()`` / ``setRawKey(_:on:participantId:index:)`` — +/// LiveKit `BaseKeyProvider` plumbing that bypasses the String-based +/// `setKey(...)` API so raw AES bytes are installed unmangled. +struct CallEncryptionService { + + let homeserver: String + let accessToken: String + let userID: String + let deviceID: String + let roomID: String + /// The Matrix SDK room, used for `sendStateEventRaw` which goes through + /// the SDK's authenticated client instead of raw REST API calls. + let sdkRoom: MatrixRustSDK.Room? + + /// The to-device event type used by Element Call for key exchange. + static let encryptionKeysEventType = "io.element.call.encryption_keys" + + /// The state event type for MatrixRTC call membership (MSC3401). + /// Element-X uses this to discover active calls in a room. + static let callMemberEventType = "org.matrix.msc3401.call.member" + + // MARK: - Call Membership Signaling + + /// Sends the MatrixRTC call membership state event so that Element-X and + /// other MatrixRTC clients can discover our participation in the call. + /// + /// Uses the modern MSC4143 per-device format matching Element-X: + /// - State key: `_@userId:server_deviceId_m.call` + /// - `focus_active`: `{"type": "livekit", "focus_selection": "oldest_membership"}` + /// - `foci_preferred`: array with the SFU service URL and room alias + /// + /// - Parameters: + /// - sfuServiceURL: The SFU service URL from MatrixRTC discovery + /// (e.g. `https://livekit.example.com/livekit/jwt`). + /// - membershipId: The per-call membership UUID. Must match the + /// `member.id` field in outbound encryption_keys to-device payloads + /// so peers can correlate our key with our membership event. When + /// `nil`, falls back to `userID:deviceID`. + func sendCallMemberEvent(sfuServiceURL: String, membershipId: String? = nil) async throws { + guard let sdkRoom else { + throw CallEncryptionError.callMemberEventFailed + } + + let stateKey = "_\(userID)_\(deviceID)_m.call" + let serviceURL = sfuServiceURL.trimmingCharacters(in: .init(charactersIn: "/")) + let membership = membershipId ?? "\(userID):\(deviceID)" + // `created_ts` makes each heartbeat a distinct event (Synapse can + // dedupe identical state-event content). It also gives peers a + // monotonic origin time for liveness tracking; matches the field + // matrix-js-sdk's `MatrixRTCSession` writes. + let createdTs = Int64(Date().timeIntervalSince1970 * 1000) + + // Match Element-X's exact format. + let body: [String: Any] = [ + "application": "m.call", + "call_id": "", + "created_ts": createdTs, + "device_id": deviceID, + "expires": 14400000, + "focus_active": [ + "type": "livekit", + "focus_selection": "oldest_membership" + ] as [String: Any], + "foci_preferred": [ + [ + "type": "livekit", + "livekit_service_url": serviceURL, + "livekit_alias": roomID + ] as [String: Any] + ], + "m.call.intent": "video", + "membershipID": membership, + "scope": "m.room" + ] + + let jsonData = try JSONSerialization.data(withJSONObject: body, options: [.sortedKeys]) + let jsonString = String(data: jsonData, encoding: .utf8) ?? "{}" + // Body + state key contain device IDs and per-call membership UUIDs; + // not raw secrets but routing data we don't need leaking to Console. + logger.debug("[RTC]Call member event body: \(jsonString, privacy: .private)") + logger.debug("[RTC]Call member state key: \(stateKey, privacy: .private)") + + _ = try await sdkRoom.sendStateEventRaw( + eventType: Self.callMemberEventType, + stateKey: stateKey, + content: jsonString + ) + logger.info("[RTC]Sent call membership state event") + } + + /// Removes the call membership state event (sets content to empty object) + /// so Element-X knows we've left the call. + func removeCallMemberEvent() async throws { + guard let sdkRoom else { + throw CallEncryptionError.callMemberEventFailed + } + let stateKey = "_\(userID)_\(deviceID)_m.call" + _ = try await sdkRoom.sendStateEventRaw( + eventType: Self.callMemberEventType, + stateKey: stateKey, + content: "{}" + ) + logger.info("[RTC]Removed call membership state event") + } + + // MARK: - Debug: Fetch Existing Call Members + + /// Fetches all existing `org.matrix.msc3401.call.member` state events from + /// the room for debugging interoperability issues. + func fetchCallMemberEvents() async { + let base = homeserver.trimmingCharacters(in: .init(charactersIn: "/")) + let encodedRoomID = roomID.addingPercentEncoding(withAllowedCharacters: .urlPathAllowed) ?? roomID + + guard let url = URL(string: "\(base)/_matrix/client/v3/rooms/\(encodedRoomID)/state") else { return } + + var request = URLRequest(url: url) + request.setValue("Bearer \(accessToken)", forHTTPHeaderField: "Authorization") + + guard let (data, response) = try? await URLSession.shared.data(for: request), + let http = response as? HTTPURLResponse, http.statusCode == 200, + let events = try? JSONSerialization.jsonObject(with: data) as? [[String: Any]] else { + return + } + + for event in events { + guard let type = event["type"] as? String, + type == Self.callMemberEventType else { continue } + let stateKey = event["state_key"] as? String ?? "(none)" + if let content = event["content"], + let contentData = try? JSONSerialization.data(withJSONObject: content, options: [.sortedKeys]), + let contentStr = String(data: contentData, encoding: .utf8) { + // .private — call routing data + device IDs, not for Console. + logger.debug("[RTC]Existing call member [key=\(stateKey, privacy: .private)]: \(contentStr, privacy: .private)") + } + } + } + + /// Returns a `userId -> [deviceId]` map of *other* users currently in the + /// call, parsed from `org.matrix.msc3401.call.member` state events. + /// + /// Element-X writes per-device call-member events with state key + /// `___m.call`. We walk the full room state, filter for + /// non-empty call-member content (empty content means the participant + /// has left), and extract `(userId, deviceId)` from the state key. + /// Our own `userID` is excluded. + func fetchCallTargets() async -> [String: [String]] { + let base = homeserver.trimmingCharacters(in: .init(charactersIn: "/")) + let encodedRoomID = roomID.addingPercentEncoding(withAllowedCharacters: .urlPathAllowed) ?? roomID + + guard let url = URL(string: "\(base)/_matrix/client/v3/rooms/\(encodedRoomID)/state") else { return [:] } + + var request = URLRequest(url: url) + request.setValue("Bearer \(accessToken)", forHTTPHeaderField: "Authorization") + + guard let (data, response) = try? await URLSession.shared.data(for: request), + let http = response as? HTTPURLResponse, http.statusCode == 200, + let events = try? JSONSerialization.jsonObject(with: data) as? [[String: Any]] else { + return [:] + } + + var targets: [String: Set] = [:] + for event in events { + guard let type = event["type"] as? String, + type == Self.callMemberEventType, + let stateKey = event["state_key"] as? String, + let content = event["content"] as? [String: Any], + !content.isEmpty else { continue } + + // State key format: `___m.call` where userId is + // itself `@localpart:server.tld`. Strip the leading underscore + // and the trailing `_m.call` marker, then split on the *last* + // underscore to separate deviceId from userId. + guard stateKey.hasPrefix("_"), stateKey.hasSuffix("_m.call") else { continue } + let trimmed = String(stateKey.dropFirst().dropLast("_m.call".count)) + guard let lastUnderscore = trimmed.lastIndex(of: "_") else { continue } + let userId = String(trimmed[.. Data { + var bytes = [UInt8](repeating: 0, count: 16) + let status = SecRandomCopyBytes(kSecRandomDefault, bytes.count, &bytes) + precondition(status == errSecSuccess, "Failed to generate random key bytes") + return Data(bytes) + } + + // MARK: - Key Provider Setup + + /// Builds a `BaseKeyProvider` whose internal `LKRTCFrameCryptorKeyProvider` + /// is configured for **HKDF-SHA256** key derivation instead of the LiveKit + /// Swift SDK's default of **PBKDF2**. + /// + /// Why this exists: `BaseKeyProvider`'s public inits forward to the 6-arg + /// ObjC initializer which hard-codes PBKDF2 (libwebrtc's default). Element + /// Call / livekit-client JS imports raw key material as HKDF and derives + /// the AES-GCM key with HKDF-SHA256, salt `"LKFrameEncryptionKey"`, + /// info = 128 zero bytes. Starting from byte-identical IKM, PBKDF2 on + /// our side and HKDF on the peer produce **different AES keys**, so every + /// frame's GCM auth tag fails on the peer. The symptom is the same as + /// the "maximum ratchet attempts exceeded / key marked as invalid" loop + /// we were chasing — symmetric, codec-independent, survives timing and + /// identity fixes. + /// + /// The 7-arg ObjC init that accepts `keyDerivationAlgorithm:` is exposed + /// in `webrtc-xcframework` 144.7559.x and newer. We look it up via the + /// Objective-C runtime so we don't need a direct module dependency on + /// `LiveKitWebRTC` from RelayKit. If the runtime lookup fails (older + /// framework), we fall back to the default PBKDF2 provider so the call + /// still builds — but interop with Element Call will stay broken. + static func makeHKDFKeyProvider( + ratchetWindowSize: Int32 = 10, + keyRingSize: Int32 = 256 + ) -> BaseKeyProvider { + let options = KeyProviderOptions( + sharedKey: false, + ratchetWindowSize: ratchetWindowSize, + keyRingSize: keyRingSize + ) + let provider = BaseKeyProvider(options: options) + + guard let cls = NSClassFromString("LKRTCFrameCryptorKeyProvider") as? NSObject.Type else { + logger.error("[RTC]LKRTCFrameCryptorKeyProvider class not found at runtime; HKDF swap skipped — E2EE interop with Element Call will fail (PBKDF2 vs HKDF mismatch)") + return provider + } + + let initSel = NSSelectorFromString( + "initWithRatchetSalt:ratchetWindowSize:sharedKeyMode:uncryptedMagicBytes:failureTolerance:keyRingSize:discardFrameWhenCryptorNotReady:keyDerivationAlgorithm:" + ) + // Swift blocks `NSObject.alloc()`, so go through the ObjC runtime. + let allocSel = NSSelectorFromString("alloc") + typealias AllocFunc = @convention(c) (AnyClass, Selector) -> AnyObject + let allocImp = unsafeBitCast( + (cls as AnyClass).method(for: allocSel), + to: AllocFunc.self + ) + let allocated = allocImp(cls, allocSel) + guard (allocated as AnyObject).responds(to: initSel) else { + logger.error("[RTC]LKRTCFrameCryptorKeyProvider does not expose keyDerivationAlgorithm: init; webrtc-xcframework may be < 144.x — falling back to PBKDF2 (Element Call interop will fail)") + return provider + } + + typealias InitFunc = @convention(c) ( + AnyObject, Selector, NSData, Int32, ObjCBool, NSData?, Int32, Int32, ObjCBool, UInt + ) -> AnyObject + let imp = unsafeBitCast( + (allocated as AnyObject).method(for: initSel), + to: InitFunc.self + ) + // RTCKeyDerivationAlgorithmHKDF is the second enum case (== 1). + let hkdfKeyDerivation: UInt = 1 + let hkdfRtc = imp( + allocated, + initSel, + options.ratchetSalt as NSData, + options.ratchetWindowSize, + ObjCBool(options.sharedKey), + options.uncryptedMagicBytes as NSData, + options.failureTolerance, + options.keyRingSize, + ObjCBool(false), + hkdfKeyDerivation + ) + + guard let ivar = class_getInstanceVariable(BaseKeyProvider.self, "rtcKeyProvider") else { + logger.error("[RTC]rtcKeyProvider ivar not found on BaseKeyProvider; HKDF swap skipped") + return provider + } + object_setIvar(provider, ivar, hkdfRtc) + logger.info("[RTC]Installed HKDF-backed LKRTCFrameCryptorKeyProvider (Element Call interop path)") + return provider + } + + /// Sets a raw key on a `BaseKeyProvider` for the given participant, bypassing + /// the String-based `setKey(key:participantId:index:)` method which would + /// UTF-8-encode the string (wrong for raw AES key bytes). + /// + /// `BaseKeyProvider` is decorated with `@objcMembers`, so its internal + /// `rtcKeyProvider` (an `LKRTCFrameCryptorKeyProvider`) is accessible via KVC. + /// The ObjC provider accepts `NSData` directly. + static func setRawKey( + _ keyData: Data, + on keyProvider: BaseKeyProvider, + participantId: String, + index: Int32 = 0 + ) { + guard let rtcProvider = keyProvider.value(forKey: "rtcKeyProvider") as AnyObject? else { + logger.error("[RTC]Could not access rtcKeyProvider via KVC") + return + } + + // LKRTCFrameCryptorKeyProvider is an ObjC class with: + // - (void)setKey:(NSData *)key withIndex:(int)index forParticipant:(NSString *)participantId + // NSObject.perform(_:with:with:) only supports 2 arguments, so we use + // objc_msgSend to call the 3-argument method directly. + typealias SetKeyFunc = @convention(c) (AnyObject, Selector, NSData, Int32, NSString) -> Void + let selector = NSSelectorFromString("setKey:withIndex:forParticipant:") + guard (rtcProvider as? NSObject)?.responds(to: selector) == true else { + logger.error("[RTC]rtcKeyProvider does not respond to setKey:withIndex:forParticipant:") + return + } + + let imp = unsafeBitCast( + (rtcProvider as AnyObject).method(for: selector), + to: SetKeyFunc.self + ) + imp(rtcProvider, selector, keyData as NSData, index, participantId as NSString) + // SHA-256 fingerprint of the raw IKM so we can confirm the exact same + // 16 bytes end up on the wire. Matches the fingerprint logged in + // CallWidgetBridge.sendEncryptionKey. Diverging fingerprints mean + // our local frame cryptor and the peer are using different keys — + // the #1 root cause of "maximum ratchet attempts exceeded" on an + // otherwise-correct key-exchange handshake. + let fp = SHA256.hash(data: keyData).prefix(8).map { String(format: "%02x", $0) }.joined() + logger.info("[RTC]Set raw encryption key for participant \(participantId, privacy: .public) at index \(index) bytes=\(keyData.count) sha256[0..8]=\(fp, privacy: .public)") + } + + /// Convenience: sets a raw key using base64-encoded key data. + static func setRawKey( + base64Key: String, + on keyProvider: BaseKeyProvider, + participantId: String, + index: Int32 = 0 + ) { + guard let keyData = Data(base64Encoded: base64Key) else { + logger.error("[RTC]Invalid base64 key for participant \(participantId, privacy: .private)") + return + } + setRawKey(keyData, on: keyProvider, participantId: participantId, index: index) + } +} + +// MARK: - Errors + +enum CallEncryptionError: LocalizedError { + case callMemberEventFailed + + var errorDescription: String? { + switch self { + case .callMemberEventFailed: + return "Failed to send call membership state event." + } + } +} diff --git a/RelayKit/Call/CallViewModel.swift b/RelayKit/Call/CallViewModel.swift new file mode 100644 index 0000000..6bb2cb0 --- /dev/null +++ b/RelayKit/Call/CallViewModel.swift @@ -0,0 +1,748 @@ +// Copyright 2026 Link Dupont +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import Foundation +import LiveKit +import RelayInterface +import OSLog +import SwiftUI + +private let logger = Logger(subsystem: "RelayKit", category: "Call") + +/// A concrete ``CallViewModelProtocol`` implementation backed by the LiveKit Swift SDK. +/// +/// ``CallViewModel`` owns a `LiveKit.Room` instance and bridges its delegate callbacks +/// into ``@Observable`` state for SwiftUI consumption. +/// +/// The inner ``Delegate`` class implements `RoomDelegate` and dispatches all callbacks +/// onto the main actor via `Task { @MainActor in … }` so that UI state mutations are +/// always performed on the correct actor without requiring LiveKit itself to be +/// `@MainActor`-aware. +@Observable +@MainActor +public final class CallViewModel: CallViewModelProtocol { + public private(set) var state: CallState = .idle + public private(set) var participants: [CallParticipant] = [] + public private(set) var isLocalCameraEnabled: Bool = false + public private(set) var isLocalMicrophoneEnabled: Bool = false + public private(set) var localParticipantID: String? + /// Incremented whenever video tracks change, triggering SwiftUI to + /// re-evaluate `videoContent(for:)` and pick up new or removed tracks. + public private(set) var videoTrackRevision: UInt = 0 + + @ObservationIgnored + private let room = LiveKit.Room() + @ObservationIgnored + private var delegate: Delegate? + + /// Cached video views keyed by participant ID, to avoid recreating + /// `SwiftUIVideoView` on every SwiftUI re-render. Each entry stores + /// the `ObjectIdentifier` of the `VideoTrack` so the cache is + /// invalidated when the underlying track actually changes. + /// + /// `@ObservationIgnored` is critical: without it, the `@Observable` + /// macro tracks writes to this cache, and because `makeVideoView` is + /// called directly from SwiftUI view bodies, any cache mutation during + /// body evaluation triggers an invalidation which re-runs the body + /// which re-mutates the cache — leading to a constraint-pass crash: + /// "more Update Constraints in Window passes than there are views". + @ObservationIgnored + private var videoViewCache: [String: (trackObjectID: ObjectIdentifier, view: AnyView)] = [:] + + // MARK: - E2EE State + // + // All of these are implementation details — no SwiftUI view reads + // them. Marking them `@ObservationIgnored` keeps their writes out of + // the observation registrar, which eliminates a class of stray + // invalidations that otherwise pile up during call startup when + // `connect()` writes the key, members, and bridge in rapid succession + // on the main actor. + + /// The LiveKit key provider used for per-participant AES-GCM frame encryption. + @ObservationIgnored + private var keyProvider: BaseKeyProvider? + /// The local participant's current encryption key (raw 16 bytes). + @ObservationIgnored + private var localEncryptionKey: Data? + /// The current key index (0-255, wraps around on ratchet). + @ObservationIgnored + private var localKeyIndex: Int = 0 + /// Service for MatrixRTC call-member signaling and LiveKit key plumbing. + @ObservationIgnored + private var encryptionService: CallEncryptionService? + /// The Matrix SDK room, used for the widget bridge. + @ObservationIgnored + private var matrixRoom: MatrixRustSDK.Room? + /// Headless widget-driver bridge that handles Olm-encrypted key exchange + /// via the Matrix Widget API. Nil until `connect(...)` completes setup. + @ObservationIgnored + private var widgetBridge: CallWidgetBridge? + /// Cached user/device map of known call members, rebuilt from + /// MatrixRTC member state events. + @ObservationIgnored + private var callMembers: [String: [String]] = [:] + /// Periodic refresh of the `org.matrix.msc3401.call.member` state event so + /// peers don't expire our membership while the call is in progress. + /// Element Call's matrix-js-sdk `MatrixRTCSession` does the equivalent. + @ObservationIgnored + private var heartbeatTask: Task? + /// Interval at which the call-member event is re-sent. Our `expires` + /// field is 4 hours; refreshing every 30 minutes keeps a generous + /// safety margin against missed sends. + private static let heartbeatInterval: Duration = .seconds(30 * 60) + + /// Creates a call view model without E2EE. Use ``init(encryptionContext:)`` + /// for encrypted calls that interoperate with Element Call. + public init() { + LiveKitLogBridgeInstaller.install() + self.isE2eeEnabled = false + let delegate = Delegate(viewModel: self) + self.delegate = delegate + room.add(delegate: delegate) + } + + /// Encryption context passed from ``MatrixService`` to enable E2EE key exchange. + public struct EncryptionContext: @unchecked Sendable { + public let homeserver: String + public let accessToken: String + public let userID: String + public let deviceID: String + public let roomID: String + /// Whether the Matrix room has encryption enabled (`m.room.encryption` state event). + /// When `true`, LiveKit-level GCM frame encryption + key exchange is enabled. + public let isRoomEncrypted: Bool + /// The Matrix SDK room, used to obtain the timeline for listening to + /// inbound encryption key state events. `nil` if unavailable. + public let matrixRoom: MatrixRustSDK.Room? + + public init(homeserver: String, accessToken: String, userID: String, deviceID: String, roomID: String, isRoomEncrypted: Bool = false, matrixRoom: MatrixRustSDK.Room? = nil) { + self.homeserver = homeserver + self.accessToken = accessToken + self.userID = userID + self.deviceID = deviceID + self.roomID = roomID + self.isRoomEncrypted = isRoomEncrypted + self.matrixRoom = matrixRoom + } + } + + /// Whether this call uses LiveKit-level E2EE (GCM frame encryption). + /// Mirrors the Matrix room's encryption state. + private let isE2eeEnabled: Bool + + /// Creates a call view model with optional E2EE, determined by the Matrix + /// room's encryption state. Encrypted rooms use AES-128-GCM frame encryption + /// with MatrixRTC key exchange; unencrypted rooms use no LiveKit-level E2EE. + public init(encryptionContext: EncryptionContext) { + LiveKitLogBridgeInstaller.install() + self.isE2eeEnabled = encryptionContext.isRoomEncrypted + + let delegate = Delegate(viewModel: self) + self.delegate = delegate + room.add(delegate: delegate) + + self.encryptionService = CallEncryptionService( + homeserver: encryptionContext.homeserver, + accessToken: encryptionContext.accessToken, + userID: encryptionContext.userID, + deviceID: encryptionContext.deviceID, + roomID: encryptionContext.roomID, + sdkRoom: encryptionContext.matrixRoom + ) + + if encryptionContext.isRoomEncrypted { + // Per-participant key provider: each participant has their own key. + // Match Element Call's MatrixKeyProvider configuration so the JS + // LiveKit E2EE worker doesn't exhaust its ratchet window trying to + // decrypt our frames. Swift BaseKeyProvider defaults are + // ratchetWindowSize: 0, keyRingSize: 16; Element Call uses 10/256. + // + // Additionally: swap in an HKDF-SHA256-backed + // LKRTCFrameCryptorKeyProvider. The LiveKit Swift SDK's default + // initializer path constructs the ObjC provider with PBKDF2 + // (libwebrtc's default), but Element Call / livekit-client JS + // derives the AES-GCM key with HKDF from the same raw IKM — + // so the two sides produce different AES keys from matching + // fingerprints, and every frame's auth tag fails on the peer. + // See CallEncryptionService.makeHKDFKeyProvider for details. + self.keyProvider = CallEncryptionService.makeHKDFKeyProvider( + ratchetWindowSize: 10, + keyRingSize: 256 + ) + } + self.matrixRoom = encryptionContext.matrixRoom + } + + // MARK: - CallViewModelProtocol + + public func connect(url: String, token: String, sfuServiceURL: String = "") async throws { + state = .connecting + do { + // Microphone publish is deferred until AFTER the local E2EE key + // has been installed and distributed to peers. If we let + // LiveKit auto-publish the mic at connect time, the first + // audio frames hit the SFU before peers receive our key — + // their frame cryptor then ratchets past its window and + // poisons the key slot. + let connectOpts = ConnectOptions( + autoSubscribe: true, + enableMicrophone: false + ) + + // Enable LiveKit-level GCM frame encryption only for encrypted Matrix + // rooms. Element Call also uses LiveKit E2EE (SFrame) for encrypted + // rooms and no encryption for unencrypted rooms. + let encryptionOpts: EncryptionOptions? = keyProvider.map { + EncryptionOptions(keyProvider: $0, encryptionType: .gcm) + } + if isE2eeEnabled { + logger.info("[RTC]E2EE enabled (encrypted Matrix room)") + } else { + logger.info("[RTC]E2EE disabled (unencrypted Matrix room)") + } + let roomOpts = RoomOptions( + defaultVideoPublishOptions: VideoPublishOptions( + preferredCodec: .vp8 + ), + defaultAudioPublishOptions: AudioPublishOptions( + dtx: true, + red: false + ), + adaptiveStream: true, + dynacast: true, + encryptionOptions: encryptionOpts + ) + try await room.connect( + url: url, + token: token, + connectOptions: connectOpts, + roomOptions: roomOpts + ) + localParticipantID = room.localParticipant.identity?.stringValue + logger.info("[RTC]Connected with LiveKit identity: \(self.localParticipantID ?? "unknown", privacy: .public)") + + // Spin up the headless widget bridge *only* for encrypted rooms. + // For unencrypted rooms the bridge adds no value (no keys to + // exchange) and materialising a virtual Element-Call widget on + // a room Element-X is already observing causes Element-X to + // stall before joining the LiveKit SFU. + if self.isE2eeEnabled, let matrixRoom, let encryptionService { + do { + let bridge = try CallWidgetBridge( + room: matrixRoom, + ownUserId: encryptionService.userID, + ownDeviceId: encryptionService.deviceID, + isRoomEncrypted: true, + keyProvider: self.keyProvider + ) + bridge.start() + self.widgetBridge = bridge + } catch { + logger.error("[RTC]Failed to create CallWidgetBridge: \(error.localizedDescription, privacy: .private)") + } + } + + // CRITICAL: Register the local E2EE key in the keyProvider + // BEFORE publishing any media tracks. LiveKit begins encrypting + // frames the instant `setCamera(enabled: true)` attaches the + // track, so if the key isn't installed yet the first batch of + // frames is encrypted with nothing the remote peer can decrypt — + // and Element-X's video decoder stalls on that first undecodable + // frame, resulting in perpetual black video. + if self.isE2eeEnabled, let keyProvider = self.keyProvider, let encryptionService { + let key = CallEncryptionService.generateKey() + self.localEncryptionKey = key + // Legacy `m.call.member` rtcBackendIdentity is always + // `${sender}:${device_id}` (matrix-js-sdk CallMembership.ts + // line 101). This is what remote peers route our frames under, + // so our local sender cryptor MUST be keyed under the same + // byte sequence — do not trust `localParticipantID` (the + // identity LiveKit assigns from the SFU JWT), since a + // mismatched JWT identity would silently break decrypt. + let localIdentity = "\(encryptionService.userID):\(encryptionService.deviceID)" + if let livekitIdentity = self.localParticipantID, livekitIdentity != localIdentity { + logger.warning("[RTC]LiveKit identity \(livekitIdentity, privacy: .public) != matrix identity \(localIdentity, privacy: .public) — frame encryption may misroute") + } + let keyIndex = self.localKeyIndex + CallEncryptionService.setRawKey( + key, + on: keyProvider, + participantId: localIdentity, + index: Int32(keyIndex) + ) + logger.info("[RTC]Local E2EE key set (index \(keyIndex)) under participantId=\(localIdentity, privacy: .public) before camera publish") + } + + // Set up MatrixRTC signaling and distribute the key **before** + // publishing media. LiveKit begins encrypting the instant + // `setCamera(enabled: true)` attaches the track; if frames reach + // peers before our key does, their LiveKit frame cryptor + // ratchets in the dark, blows through its `ratchetWindowSize` + // (10) worth of failures, and calls `markInvalid()` on index 0 + // — poisoning the slot so our late-arriving key is rejected + // even though the raw IKM is correct. The original ordering ran + // this in a background Task racing `setCamera`, which is + // exactly that bug. + // + // Order: power levels → member state (so peers see us) → + // deliver key via Olm-encrypted to-device → THEN publish media. + // Failures here are logged but non-fatal — a late key is still + // better than no key. + if let encryptionService { + let bridge = self.widgetBridge + let localKey = self.localEncryptionKey + let keyIndex = self.localKeyIndex + + // Debug: log existing call member events to compare formats. + await encryptionService.fetchCallMemberEvents() + + // 1. Send call membership state event. Pass the widget + // bridge's membershipId UUID so the state-event + // `membershipID` matches the `member.id` field in our + // outbound encryption_keys payloads. Power levels must + // already permit this (set at room creation via + // `MatrixService.callPowerLevels`); we no longer try to + // mutate them at join time, matching Element Call. + let membershipId = bridge?.membershipId + do { + try await encryptionService.sendCallMemberEvent( + sfuServiceURL: sfuServiceURL, + membershipId: membershipId + ) + } catch { + logger.warning("[RTC]Call membership event failed: \(error.localizedDescription, privacy: .private)") + } + + // 2. Start the membership heartbeat. matrix-js-sdk's + // `MatrixRTCSession` re-sends roughly every `expires/2`; + // we use a shorter interval to be safe against missed + // sends. Cancelled in `disconnect()`. + self.heartbeatTask = Self.startHeartbeat( + encryptionService: encryptionService, + sfuServiceURL: sfuServiceURL, + membershipId: membershipId + ) + + // 3. Distribute the already-generated local key via the + // widget bridge. The `messages` map for the + // `send_to_device` action requires an explicit + // `{ userId: [deviceId, ...] }` map of recipients, so we + // parse it from the `org.matrix.msc3401.call.member` + // state events already present on the room. The SDK + // then Olm-encrypts the payload per-device. + if self.isE2eeEnabled, let bridge, let localKey { + let targets = await encryptionService.fetchCallTargets() + self.callMembers = targets + logger.info("[RTC]Distributing key to \(targets.count) remote user(s) BEFORE media publish") + do { + try await bridge.sendEncryptionKey( + localKey, + keyIndex: keyIndex, + toMembers: targets + ) + } catch { + logger.warning("[RTC]Widget-bridge key distribution failed: \(error.localizedDescription, privacy: .private)") + } + } + } + + // Key is now installed locally and (best-effort) distributed to + // any existing call participants. Safe to publish media. + try await room.localParticipant.setMicrophone(enabled: true) + try await room.localParticipant.setCamera(enabled: true) + + isLocalCameraEnabled = true + isLocalMicrophoneEnabled = true + state = .connected + videoTrackRevision += 1 + } catch { + logger.error("[RTC]Connect failed: \(error.localizedDescription, privacy: .private)") + state = .failed(error.localizedDescription) + throw error + } + } + + public func disconnect() async { + // Update UI state immediately — SwiftUI re-renders to the + // disconnected state while the awaited cleanup runs. + state = .disconnected + participants = [] + isLocalCameraEnabled = false + isLocalMicrophoneEnabled = false + localParticipantID = nil + videoViewCache.removeAll() + localEncryptionKey = nil + localKeyIndex = 0 + callMembers = [:] + + // Stop the heartbeat first so it can't race the leave event and + // accidentally re-publish a fresh membership while we're tearing down. + heartbeatTask?.cancel() + heartbeatTask = nil + + // Tear down the widget bridge synchronously so its tasks can't race + // with subsequent connects. + widgetBridge?.shutdown() + widgetBridge = nil + + // Proper cleanup: send the empty `m.call.member` content so peers + // see us leave immediately (otherwise they wait up to `expires` + // ms — 4 hours — before treating us as gone). Best-effort, capped + // by a short timeout so the UI never beach-balls if the homeserver + // is slow to respond. + let service = encryptionService + await Self.runWithTimeout(seconds: 2) { + try? await service?.removeCallMemberEvent() + } + + await room.disconnect() + } + + /// Re-sends the call-member state event on a fixed interval until cancelled. + /// Detached from `self` so the loop body has no actor hop. + nonisolated private static func startHeartbeat( + encryptionService: CallEncryptionService, + sfuServiceURL: String, + membershipId: String? + ) -> Task { + Task.detached(priority: .background) { + // Local logger — the file-scope `logger` is inferred as + // MainActor-isolated and isn't reachable from a detached task. + let log = Logger(subsystem: "RelayKit", category: "Call") + while !Task.isCancelled { + do { + try await Task.sleep(for: heartbeatInterval) + } catch { + return // cancelled + } + if Task.isCancelled { return } + do { + try await encryptionService.sendCallMemberEvent( + sfuServiceURL: sfuServiceURL, + membershipId: membershipId + ) + log.debug("[RTC]Heartbeat refreshed call.member state event") + } catch { + log.warning("[RTC]Heartbeat refresh failed: \(error.localizedDescription, privacy: .private)") + } + } + } + } + + /// Runs `work` and returns when it completes or after `seconds`, + /// whichever comes first. The work continues in the background after + /// the timeout; the caller just stops waiting. + nonisolated private static func runWithTimeout( + seconds: TimeInterval, + _ work: @Sendable @escaping () async -> Void + ) async { + let workTask: Task = Task.detached(priority: .userInitiated) { + await work() + } + await withTaskGroup(of: Void.self) { group in + group.addTask { await workTask.value } + group.addTask { + try? await Task.sleep(for: .seconds(seconds)) + } + await group.next() + group.cancelAll() + } + } + + public func toggleCamera() async throws { + let enabled = !isLocalCameraEnabled + try await room.localParticipant.setCamera(enabled: enabled) + isLocalCameraEnabled = enabled + if let localID = localParticipantID { + videoViewCache.removeValue(forKey: localID) + } + videoTrackRevision += 1 + } + + public func toggleMicrophone() async throws { + let enabled = !isLocalMicrophoneEnabled + try await room.localParticipant.setMicrophone(enabled: enabled) + isLocalMicrophoneEnabled = enabled + } + + public func videoAspectRatio(for participantID: String) -> CGFloat? { + let isLocal = room.localParticipant.identity?.stringValue == participantID + let participant: Participant? = isLocal + ? room.localParticipant + : room.remoteParticipants.values.first { $0.identity?.stringValue == participantID } + + guard let publication = participant?.videoTracks.first, + !publication.isMuted, + let track = publication.track as? VideoTrack else { + return nil + } + if let remotePub = publication as? RemoteTrackPublication, !remotePub.isSubscribed { + return nil + } + guard let dim = track.dimensions, dim.height > 0 else { return nil } + return CGFloat(dim.width) / CGFloat(dim.height) + } + + public func makeVideoView(for participantID: String) -> AnyView? { + let isLocal = room.localParticipant.identity?.stringValue == participantID + let participant: Participant? = isLocal + ? room.localParticipant + : room.remoteParticipants.values.first { $0.identity?.stringValue == participantID } + + guard let publication = participant?.videoTracks.first, + !publication.isMuted, + let track = publication.track as? VideoTrack + else { + videoViewCache.removeValue(forKey: participantID) + return nil + } + + // For remote tracks, verify the track is actually subscribed. + if let remotePub = publication as? RemoteTrackPublication, !remotePub.isSubscribed { + videoViewCache.removeValue(forKey: participantID) + return nil + } + + // Return the cached view if the underlying VideoTrack is unchanged, + // preventing SwiftUI from tearing down and recreating the Metal renderer. + let trackID = ObjectIdentifier(track) + if let cached = videoViewCache[participantID], cached.trackObjectID == trackID { + return cached.view + } + + let view = AnyView( + SwiftUIVideoView(track, + layoutMode: .fill, + mirrorMode: isLocal ? .mirror : .off) + ) + videoViewCache[participantID] = (trackObjectID: trackID, view: view) + return view + } + + // MARK: - E2EE Key Redistribution + + /// Re-sends the local encryption key to a newly joined participant so they + /// can decrypt our media. Routes through the widget bridge so the SDK + /// Olm-encrypts the to-device payload. + fileprivate func redistributeKey(to participantIdentity: String) { + guard let key = localEncryptionKey, let bridge = widgetBridge else { return } + + // Parse "user:device" from the LiveKit identity + // (format: `@userId:server:deviceId`). Element Call uses identities + // like `@user:server:DEVICEID`. + let components = participantIdentity.components(separatedBy: ":") + guard components.count >= 3 else { + logger.warning("[RTC]Cannot parse participant identity for key redistribution: \(participantIdentity, privacy: .private)") + return + } + let userId = components[0] + ":" + components[1] + let deviceId = components.dropFirst(2).joined(separator: ":") + let index = localKeyIndex + + Task { + do { + try await bridge.sendEncryptionKey( + key, + keyIndex: index, + toMembers: [userId: [deviceId]] + ) + logger.info("[RTC]Redistributed key to \(participantIdentity, privacy: .private)") + } catch { + logger.warning("[RTC]Key redistribution failed for \(participantIdentity, privacy: .private): \(error.localizedDescription, privacy: .private)") + } + } + } + + // MARK: - Participant Sync + + /// Re-syncs the ``participants`` array from the room's remote participants. + /// - Parameter trackChanged: When `true`, also bumps ``videoTrackRevision`` + /// to trigger video view updates. Pass `false` for cosmetic-only changes + /// (e.g. speaking indicators) to avoid disrupting the video renderer. + fileprivate func syncParticipants(trackChanged: Bool = false) { + if trackChanged { videoTrackRevision += 1 } + + let newParticipants = room.remoteParticipants.values.map { participant in + CallParticipant( + id: participant.identity?.stringValue ?? participant.sid?.stringValue ?? UUID().uuidString, + displayName: participant.name, + isCameraEnabled: participant.isCameraEnabled(), + isMicrophoneEnabled: participant.isMicrophoneEnabled(), + isSpeaking: participant.isSpeaking + ) + } + + // Prune video view cache for participants who have left. + if trackChanged { + let activeIDs = Set(newParticipants.map(\.id)) + for key in videoViewCache.keys where key != localParticipantID && !activeIDs.contains(key) { + videoViewCache.removeValue(forKey: key) + } + } + + // Only write to the observed `participants` property when the array + // actually changed. The LiveKit `didUpdateSpeakingParticipants` + // callback fires continuously during active audio, and every write + // to an `@Observable` property invalidates downstream SwiftUI views + // regardless of value equality — which can push NSHostingView into + // an unbounded "Update Constraints in Window" loop and crash. + if participants != newParticipants { + participants = newParticipants + } + } + + // MARK: - Delegate Bridge + + /// Bridges `RoomDelegate` callbacks — which arrive on an unspecified thread — onto + /// the main actor so that `CallViewModel`'s `@Observable` state is always mutated + /// safely. The class is `@unchecked Sendable` because `viewModel` is a weak reference + /// that is only read inside `Task { @MainActor in … }` blocks. + /// + /// Also conforms to ``TrackDelegate`` so it can observe per-track + /// dimension changes (e.g. a remote rotating their camera, simulcast + /// layer changes). LiveKit's `RoomDelegate` does not surface those. + private final class Delegate: NSObject, RoomDelegate, TrackDelegate, @unchecked Sendable { + weak var viewModel: CallViewModel? + + init(viewModel: CallViewModel) { + self.viewModel = viewModel + super.init() + } + + /// Bumps `videoTrackRevision` whenever a track's dimensions change, + /// so SwiftUI tiles re-read `videoAspectRatio(for:)`. + func track(_ track: VideoTrack, didUpdateDimensions dimensions: Dimensions?) { + Task { @MainActor [weak viewModel] in + viewModel?.videoTrackRevision += 1 + } + } + + /// Attaches `self` as a `TrackDelegate` on a publication's underlying + /// video track if present. Multicast — safe to call repeatedly. + func observeDimensions(of publication: TrackPublication?) { + guard let videoTrack = publication?.track as? VideoTrack else { return } + videoTrack.add(delegate: self) + } + + func room(_ room: LiveKit.Room, didUpdateConnectionState connectionState: LiveKit.ConnectionState, from oldValue: LiveKit.ConnectionState) { + Task { @MainActor [weak viewModel] in + guard let viewModel else { return } + switch connectionState { + case .connected: + if viewModel.state != .connected { + viewModel.state = .connected + } + case .disconnected: + if viewModel.state == .connected { + viewModel.state = .disconnected + } + case .reconnecting: + logger.info("[RTC]Reconnecting…") + default: + break + } + } + } + + func room(_ room: LiveKit.Room, participantDidConnect participant: RemoteParticipant) { + Task { @MainActor [weak viewModel] in + guard let viewModel else { return } + let identityStr = participant.identity?.stringValue ?? "(none)" + let sidStr = participant.sid?.stringValue ?? "(none)" + logger.info("[RTC]Remote participant connected: identity=\(identityStr, privacy: .public) sid=\(sidStr, privacy: .public) name=\(participant.name ?? "(none)", privacy: .public)") + viewModel.syncParticipants(trackChanged: true) + if viewModel.isE2eeEnabled, let identity = participant.identity?.stringValue { + viewModel.redistributeKey(to: identity) + } + } + } + + func room(_ room: LiveKit.Room, participant: RemoteParticipant, didSubscribeTrack publication: RemoteTrackPublication) { + observeDimensions(of: publication) + Task { @MainActor [weak viewModel] in + let identityStr = participant.identity?.stringValue ?? "(none)" + let kind = publication.kind.rawValue + logger.info("[RTC]Subscribed to \(kind, privacy: .public) track from identity=\(identityStr, privacy: .public) trackSid=\(publication.sid, privacy: .public)") + viewModel?.syncParticipants(trackChanged: true) + } + } + + func room(_ room: LiveKit.Room, participantDidDisconnect participant: RemoteParticipant) { + Task { @MainActor [weak viewModel] in + viewModel?.syncParticipants(trackChanged: true) + } + } + + func room(_ room: LiveKit.Room, didUpdateSpeakingParticipants participants: [Participant]) { + Task { @MainActor [weak viewModel] in + // Speaking state is cosmetic — don't bump videoTrackRevision + // to avoid disrupting the video renderer. + viewModel?.syncParticipants(trackChanged: false) + } + } + + func room(_ room: LiveKit.Room, localParticipant: LocalParticipant, didPublishTrack publication: LocalTrackPublication) { + observeDimensions(of: publication) + Task { @MainActor [weak viewModel] in + viewModel?.videoTrackRevision += 1 + } + } + + func room(_ room: LiveKit.Room, participant: RemoteParticipant, didPublishTrack publication: RemoteTrackPublication) { + Task { @MainActor [weak viewModel] in + viewModel?.syncParticipants(trackChanged: true) + } + } + + // First-frame indicator: dimensions become valid here, so bump + // videoTrackRevision so aspect-ratio observers re-read. + func room(_ room: LiveKit.Room, participant: RemoteParticipant, trackPublication: RemoteTrackPublication, didUpdateStreamState streamState: StreamState) { + Task { @MainActor [weak viewModel] in + viewModel?.videoTrackRevision += 1 + } + } + + // A peer toggled their camera/mic. We need to refresh the participant + // snapshot (so `isCameraEnabled` / `isMicrophoneEnabled` flip) AND + // bump videoTrackRevision so the tile body re-evaluates and + // `makeVideoView` returns nil for the muted track — which surfaces + // the placeholder immediately instead of waiting for the next + // unrelated sync. + func room(_ room: LiveKit.Room, participant: Participant, trackPublication: TrackPublication, didUpdateIsMuted isMuted: Bool) { + Task { @MainActor [weak viewModel] in + viewModel?.syncParticipants(trackChanged: true) + } + } + + // Track-removed events behave the same way for our UI: refresh + // participant state and bump the revision so the placeholder shows. + func room(_ room: LiveKit.Room, participant: RemoteParticipant, didUnpublishTrack publication: RemoteTrackPublication) { + Task { @MainActor [weak viewModel] in + viewModel?.syncParticipants(trackChanged: true) + } + } + + func room(_ room: LiveKit.Room, participant: RemoteParticipant, didUnsubscribeTrack publication: RemoteTrackPublication) { + Task { @MainActor [weak viewModel] in + viewModel?.syncParticipants(trackChanged: true) + } + } + + func room(_ room: LiveKit.Room, participant: LocalParticipant, didUnpublishTrack publication: LocalTrackPublication) { + Task { @MainActor [weak viewModel] in + viewModel?.videoTrackRevision += 1 + } + } + } +} diff --git a/RelayKit/Call/CallWidgetBridge.swift b/RelayKit/Call/CallWidgetBridge.swift new file mode 100644 index 0000000..96f0d80 --- /dev/null +++ b/RelayKit/Call/CallWidgetBridge.swift @@ -0,0 +1,660 @@ +// Copyright 2026 Link Dupont +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// CallWidgetBridge.swift +// RelayKit +// +// SPDX-License-Identifier: Apache-2.0 + +import CryptoKit +import Foundation +import LiveKit +import MatrixRustSDK +import os +import OSLog + +private let logger = Logger(subsystem: "RelayKit", category: "CallWidgetBridge") + +/// Headless widget-driver bridge for MatrixRTC E2EE. +/// +/// Relay embeds LiveKit natively for media but needs the Matrix Widget Driver +/// to handle the MatrixRTC signaling and, crucially, Olm-encrypted to-device +/// delivery of `io.element.call.encryption_keys`. Element Call's web app +/// normally runs inside a WebView that speaks the Widget API (postMessage JSON) +/// to `WidgetDriverHandle`; we collapse the WebView out and speak the same +/// JSON protocol directly from Swift. +/// +/// The SDK side (`WidgetDriver`) handles Olm session setup, m.room.encrypted +/// envelope encryption/decryption, and device discovery transparently. We just +/// emit `send_to_device` widget-API requests with `encrypted: true` and +/// receive decrypted payloads back on the recv channel. +/// +/// ## Lifecycle +/// 1. `start()` kicks off two tasks: the driver's `run(...)` loop and our +/// JSON recv loop on the handle. +/// 2. The recv loop handles SDK-initiated requests (capabilities, notify, +/// incoming events) and dispatches responses to pending outbound requests. +/// 3. `awaitReady()` blocks until the capabilities handshake has completed. +/// 4. `sendEncryptionKey(...)` and `sendCallMemberState(...)` issue +/// fromWidget requests and await their responses. +/// 5. `shutdown()` cancels both tasks and fails any outstanding continuations. +public final class CallWidgetBridge: @unchecked Sendable { + + // MARK: - Configuration + + /// Element Call widget capability strings. These match the capabilities + /// declared by the Element Call web app and approved server-side by + /// `getElementCallRequiredPermissions` (which `CapabilitiesProvider` + /// returns on the SDK side). + private static let elementCallCapabilities: [String] = [ + "io.element.requires_client", + "org.matrix.msc3819.send.to_device:io.element.call.encryption_keys", + "org.matrix.msc3819.receive.to_device:io.element.call.encryption_keys", + "org.matrix.msc2762.receive.state_event:org.matrix.msc3401.call.member", + "org.matrix.msc2762.receive.state_event:m.room.member", + "org.matrix.msc2762.receive.state_event:m.room.encryption", + "org.matrix.msc4157.send.delayed_event", + "org.matrix.msc4157.update_delayed_event" + ] + + /// Supported matrix-widget-api versions we advertise to the SDK when it + /// requests `supported_api_versions`. These match what Element Call's + /// widget declares. + private static let supportedApiVersions: [String] = [ + "0.0.1", + "0.0.2" + ] + + // MARK: - Properties + + private let widgetId: String + private let ownUserId: String + private let ownDeviceId: String + private let roomId: String + /// Per-call MatrixRTC membership UUID. Must match the `membershipID` + /// field in the `org.matrix.msc3401.call.member` state event and the + /// `member.id` field in outbound `io.element.call.encryption_keys` + /// to-device payloads so peers can correlate our keys with our + /// membership event. + public let membershipId: String + private weak var keyProvider: BaseKeyProvider? + private let room: MatrixRustSDK.Room + private let capabilitiesProvider: ElementCallCapabilitiesProvider + + private var driver: WidgetDriver? + private var handle: WidgetDriverHandle? + private var recvTask: Task? + private var driverTask: Task? + + /// State that may be touched from the driver recv loop, the shutdown + /// path, and outbound-request callers concurrently. Kept behind an + /// unfair-lock so access is synchronous and async-context-safe. + /// + /// Pending requests resume with `Void` — callers fire and forget. If a + /// future caller needs the response body, wire a separate sink. + private struct State { + var pendingRequests: [String: CheckedContinuation] = [:] + var readyContinuations: [CheckedContinuation] = [] + var isReady: Bool = false + } + private let state = OSAllocatedUnfairLock(initialState: State()) + + // MARK: - Init / Start / Shutdown + + /// Creates a bridge for the given Matrix room. + /// + /// - Parameters: + /// - room: The SDK room hosting the call. + /// - ownUserId: Local user's Matrix ID (e.g. `@alice:server`). + /// - ownDeviceId: Local device ID. + /// - isRoomEncrypted: Whether the room is encrypted — controls the + /// `EncryptionSystem` on the widget settings. + /// - keyProvider: The LiveKit key provider that receives inbound keys. + public init( + room: MatrixRustSDK.Room, + ownUserId: String, + ownDeviceId: String, + isRoomEncrypted: Bool, + keyProvider: BaseKeyProvider? + ) throws { + self.room = room + self.ownUserId = ownUserId + self.ownDeviceId = ownDeviceId + self.roomId = room.id() + self.keyProvider = keyProvider + self.widgetId = UUID().uuidString + self.membershipId = UUID().uuidString.lowercased() + self.capabilitiesProvider = ElementCallCapabilitiesProvider( + ownUserId: ownUserId, + ownDeviceId: ownDeviceId + ) + + let props = VirtualElementCallWidgetProperties( + elementCallUrl: "https://call.element.io", + widgetId: self.widgetId, + parentUrl: nil, + fontScale: nil, + font: nil, + encryption: isRoomEncrypted ? .perParticipantKeys : .unencrypted, + posthogUserId: nil, + posthogApiHost: nil, + posthogApiKey: nil, + rageshakeSubmitUrl: nil, + sentryDsn: nil, + sentryEnvironment: nil + ) + + let config = VirtualElementCallWidgetConfig( + intent: .joinExisting, + skipLobby: true, + header: nil, + hideHeader: true, + preload: nil, + appPrompt: false, + confineToRoom: true, + hideScreensharing: nil, + controlledAudioDevices: true, + sendNotificationType: nil + ) + + let settings = try newVirtualElementCallWidget(props: props, config: config) + let driverAndHandle = try makeWidgetDriver(settings: settings) + self.driver = driverAndHandle.driver + self.handle = driverAndHandle.handle + } + + /// Starts the driver and the recv loop. Idempotent. + /// + /// Element Call's virtual widget settings set `init_on_content_load: true` + /// inside the Rust SDK, meaning the driver's state machine **waits for a + /// `content_loaded` fromWidget request before it will do anything** + /// (including capability negotiation). We fire that proactively so the + /// driver progresses and eventually sends us the `capabilities` request. + public func start() { + guard let driver, let handle else { return } + guard driverTask == nil, recvTask == nil else { return } + + let room = self.room + let capabilitiesProvider = self.capabilitiesProvider + driverTask = Task { [weak self] in + await driver.run(room: room, capabilitiesProvider: capabilitiesProvider) + logger.info("[RTC]WidgetDriver.run returned; driver exited") + self?.resolveReady() + } + + recvTask = Task { [weak self] in + await self?.recvLoop(handle: handle) + } + + // Kick the state machine off the "Unset" state. Fire-and-forget — + // the response just echoes back through recvLoop. + Task { [weak self] in + do { + try await self?.sendRequest(action: "content_loaded", data: [:]) + logger.info("[RTC]Widget content_loaded acknowledged by driver") + } catch { + logger.warning("[RTC]content_loaded failed: \(error.localizedDescription, privacy: .private)") + } + } + + logger.info("[RTC]CallWidgetBridge started (widgetId=\(self.widgetId, privacy: .public))") + } + + /// Cancels both tasks and fails any outstanding pending requests. + public func shutdown() { + recvTask?.cancel() + driverTask?.cancel() + recvTask = nil + driverTask = nil + + // Fail any pending outbound continuations so callers don't hang. + let pending = state.withLock { s -> [CheckedContinuation] in + let values = Array(s.pendingRequests.values) + s.pendingRequests.removeAll() + return values + } + for cont in pending { + cont.resume(throwing: CallWidgetBridgeError.shutdown) + } + + resolveReady() + logger.info("[RTC]CallWidgetBridge shut down") + } + + /// Suspends until the capabilities handshake has completed and the + /// widget is permitted to send state and to-device events. + public func awaitReady() async { + // Fast path: already ready. + let alreadyReady = state.withLock { $0.isReady } + if alreadyReady { return } + + await withCheckedContinuation { (cont: CheckedContinuation) in + // Re-check under the lock to avoid races with resolveReady(). + let shouldResume = state.withLock { s -> Bool in + if s.isReady { return true } + s.readyContinuations.append(cont) + return false + } + if shouldResume { cont.resume() } + } + } + + private func resolveReady() { + let toResume = state.withLock { s -> [CheckedContinuation] in + if s.isReady { return [] } + s.isReady = true + let pending = s.readyContinuations + s.readyContinuations.removeAll() + return pending + } + for c in toResume { c.resume() } + } + + // MARK: - Public API + + /// Sends an encrypted `io.element.call.encryption_keys` to-device message + /// to the specified user/device map via a fromWidget `send_to_device` + /// request. The SDK handles Olm encryption transparently. + /// + /// - Parameters: + /// - key: Raw 16-byte AES-128-GCM key. + /// - keyIndex: Key index (0–255). + /// - toMembers: Map of `userId -> [deviceId]`. Use `"*"` as device id + /// to target all devices of that user. + public func sendEncryptionKey( + _ key: Data, + keyIndex: Int, + toMembers: [String: [String]] + ) async throws { + await awaitReady() + + let base64Key = key.base64EncodedString() + let sentTs = Int(Date().timeIntervalSince1970 * 1000) + + // Wire format per matrix-js-sdk + // `EncryptionKeysToDeviceEventContent`: + // { keys: {index, key}, // SINGLE object + // member: {id, claimed_device_id}, // id = membership UUID + // room_id, + // session: {application, call_id, scope}, + // sent_ts? } + // Element Call's parser discards payloads where `keys` is an + // array or where `member`/`room_id`/`session` are missing — which + // is why earlier calls completed key exchange yet peers never + // decoded our frames. + let content: [String: Any] = [ + "keys": [ + "index": keyIndex, + "key": base64Key + ] as [String: Any], + "member": [ + "id": self.membershipId, + "claimed_device_id": self.ownDeviceId + ] as [String: Any], + "room_id": self.roomId, + "session": [ + "application": "m.call", + "call_id": "", + "scope": "m.room" + ] as [String: Any], + "sent_ts": sentTs + ] + + var messages: [String: [String: Any]] = [:] + for (userId, deviceIds) in toMembers { + var deviceMessages: [String: Any] = [:] + for deviceId in deviceIds { + deviceMessages[deviceId] = content + } + messages[userId] = deviceMessages + } + + let data: [String: Any] = [ + "type": CallEncryptionService.encryptionKeysEventType, + "encrypted": true, + "messages": messages + ] + + // SHA-256 fingerprint of the raw IKM going on the wire. This is + // compared against the fingerprint logged by `setRawKey` at the local + // cryptor registration site. Matching prefixes confirm the same 16 + // bytes are both (a) driving our outgoing AES-128-GCM and (b) being + // base64'd into this to-device payload. Diverging prefixes localise + // the bug to the key-capture path in `CallViewModel.connect`. + let fp = SHA256.hash(data: key).prefix(8).map { String(format: "%02x", $0) }.joined() + + _ = try await sendRequest(action: "send_to_device", data: data) + logger.info("[RTC]Sent encryption key (index \(keyIndex)) to \(toMembers.count) user(s) member.id=\(self.membershipId, privacy: .public) sha256[0..8]=\(fp, privacy: .public)") + } + + /// Sends a MatrixRTC call member state event + /// (`org.matrix.msc3401.call.member`) via a fromWidget `send_event` + /// request. + public func sendCallMemberState( + content: [String: Any], + stateKey: String + ) async throws { + await awaitReady() + + let data: [String: Any] = [ + "type": CallEncryptionService.callMemberEventType, + "state_key": stateKey, + "content": content, + "room_id": roomId + ] + + _ = try await sendRequest(action: "send_event", data: data) + logger.info("[RTC]Sent call member state event (state_key=\(stateKey, privacy: .public))") + } + + // MARK: - Request / Response plumbing + + /// Issues a fromWidget request and awaits acknowledgement. The response + /// body is not surfaced — if a future call-site needs it, add a separate + /// delivery channel keyed by `requestId`. + private func sendRequest(action: String, data: [String: Any]) async throws { + guard let handle else { + throw CallWidgetBridgeError.notStarted + } + + let requestId = UUID().uuidString + let msg: [String: Any] = [ + "api": "fromWidget", + "widgetId": widgetId, + "requestId": requestId, + "action": action, + "data": data + ] + let json = try Self.encode(msg) + + try await withCheckedThrowingContinuation { (cont: CheckedContinuation) in + state.withLock { $0.pendingRequests[requestId] = cont } + + Task { + let ok = await handle.send(msg: json) + if !ok { + let waiting = state.withLock { s -> CheckedContinuation? in + s.pendingRequests.removeValue(forKey: requestId) + } + waiting?.resume(throwing: CallWidgetBridgeError.sendFailed) + } + } + } + } + + // MARK: - Recv loop + + private func recvLoop(handle: WidgetDriverHandle) async { + while !Task.isCancelled { + guard let raw = await handle.recv() else { + logger.info("[RTC]WidgetDriverHandle.recv returned nil; loop exiting") + break + } + + // SECURITY: never log the raw widget JSON. Outbound and inbound + // `send_to_device` payloads of type `io.element.call.encryption_keys` + // carry raw AES keys in the `keys.key` field — those would land + // unredacted in the system log. Action / type only; full bodies + // are .private so they're stripped from non-debug Console output. + logger.debug("[RTC]widget recv (\(raw.count) bytes)") + + guard let data = raw.data(using: .utf8), + let msg = try? JSONSerialization.jsonObject(with: data) as? [String: Any] else { + logger.warning("[RTC]Non-JSON message from widget driver: \(raw, privacy: .private)") + continue + } + + // Responses to our outbound fromWidget requests. + if let api = msg["api"] as? String, + api == "fromWidget", + msg["response"] != nil, + let requestId = msg["requestId"] as? String { + let cont = state.withLock { s -> CheckedContinuation? in + s.pendingRequests.removeValue(forKey: requestId) + } + let response = (msg["response"] as? [String: Any]) ?? [:] + if let err = response["error"] as? [String: Any] { + let message = (err["message"] as? String) ?? "unknown" + cont?.resume(throwing: CallWidgetBridgeError.widgetError(message)) + } else { + cont?.resume(returning: ()) + } + continue + } + + // Incoming SDK-initiated requests (toWidget). + guard let action = msg["action"] as? String else { + logger.warning("[RTC]Widget message missing action: \(raw, privacy: .private)") + continue + } + let requestId = (msg["requestId"] as? String) ?? "" + let reqData = (msg["data"] as? [String: Any]) ?? [:] + + await handleIncoming(action: action, requestId: requestId, data: reqData, fullMessage: msg, handle: handle) + } + } + + private func handleIncoming( + action: String, + requestId: String, + data: [String: Any], + fullMessage: [String: Any], + handle: WidgetDriverHandle + ) async { + var responseBody: [String: Any] = [:] + + switch action { + case "capabilities": + // SDK is asking which capabilities we want. Replying here + // concludes the first half of negotiation; the driver will then + // call our `acquireCapabilities` provider to approve. + responseBody = ["capabilities": Self.elementCallCapabilities] + + case "notify_capabilities": + // SDK telling us what was approved. After this we're ready. + responseBody = [:] + resolveReady() + + case "supported_api_versions": + responseBody = ["supported_versions": Self.supportedApiVersions] + + case "send_to_device": + handleIncomingToDevice(data: data) + responseBody = [:] + + case "send_event", "update_state": + // Incoming Matrix events observed by the widget driver. + // MatrixRTC member state is handled by Element Call peers + // directly; we just need to ack these. Log and move on. + if let type = data["type"] as? String { + logger.info("[RTC]widget incoming \(action, privacy: .public) type=\(type, privacy: .public)") + } + responseBody = [:] + + case "content_loaded": + responseBody = [:] + + default: + logger.info("[RTC]widget unhandled action=\(action, privacy: .public); acking with {}") + responseBody = [:] + } + + // Belt-and-braces: once the driver is sending any post-negotiation + // event to us (send_event / send_to_device), it has approved our + // capabilities even if we missed the explicit notify_capabilities + // message. Flip readiness so outbound sends aren't stuck. + if action == "send_to_device" || action == "send_event" || action == "update_state" { + resolveReady() + } + + await reply(to: fullMessage, requestId: requestId, response: responseBody, handle: handle) + } + + private func reply( + to original: [String: Any], + requestId: String, + response: [String: Any], + handle: WidgetDriverHandle + ) async { + var reply = original + reply["response"] = response + // requestId is already in the echoed message; ensure it's set. + if !requestId.isEmpty { reply["requestId"] = requestId } + + guard let json = try? Self.encode(reply) else { + logger.error("[RTC]Failed to encode widget reply") + return + } + let ok = await handle.send(msg: json) + if !ok { + logger.warning("[RTC]handle.send returned false replying to action=\(original["action"] as? String ?? "?", privacy: .public)") + } + } + + // MARK: - Incoming key plumbing + + private func handleIncomingToDevice(data: [String: Any]) { + guard let type = data["type"] as? String, + type == CallEncryptionService.encryptionKeysEventType, + let sender = data["sender"] as? String else { + return + } + let content = (data["content"] as? [String: Any]) ?? [:] + guard let keyProvider else { + logger.warning("[RTC]No keyProvider; dropping inbound key from \(sender, privacy: .private)") + return + } + + // Wire format has evolved. Newer Element Call sends: + // content: { keys: { index, key }, member: { id, claimed_device_id }, room_id, ... } + // Older callers (including ourselves pre-fix) send: + // content: { keys: [ { index, key }, ... ], device_id, call_id, ... } + // Support both. + var keyEntries: [[String: Any]] = [] + if let arr = content["keys"] as? [[String: Any]] { + keyEntries = arr + } else if let single = content["keys"] as? [String: Any] { + keyEntries = [single] + } else { + logger.warning("[RTC]encryption_keys to-device missing keys from \(sender, privacy: .private)") + return + } + + let member = content["member"] as? [String: Any] + let memberId = (member?["id"] as? String) ?? "" + let claimedDeviceId = (member?["claimed_device_id"] as? String) ?? "" + let topDeviceId = (content["device_id"] as? String) ?? "" + let deviceId = !claimedDeviceId.isEmpty ? claimedDeviceId : topDeviceId + + // LiveKit participant identity lookup order. Element Call connects to + // the SFU with identity `@user:server:deviceId` (confirmed in the + // MatrixRTC JWT grant), so that's what we need to key on for the + // LKRTCFrameCryptorKeyProvider to route the key to the right + // participant's decoder. + // + // `member.id` is the MSC4143 per-membership UUID — an *event*-level + // identifier, not a LiveKit participant identity. It only enters the + // fallback chain so older peers that somehow omit the device id still + // get routed. + let participantIdentity: String + if !deviceId.isEmpty { + participantIdentity = "\(sender):\(deviceId)" + } else if !memberId.isEmpty { + participantIdentity = memberId + } else { + participantIdentity = sender + } + + for entry in keyEntries { + guard let base64Key = entry["key"] as? String, + let index = entry["index"] as? Int, + let keyData = Data(base64Encoded: base64Key) else { + continue + } + CallEncryptionService.setRawKey( + keyData, + on: keyProvider, + participantId: participantIdentity, + index: Int32(index) + ) + // Log with `.public` so we can correlate the key routing + // identity (what we register the frame-decryption key under) + // with the actual LiveKit participant identity (logged on + // connect) — if these do not match byte-for-byte, LiveKit will + // silently fail to decrypt this peer's frames. + logger.info("[RTC]Applied inbound key -> routed to LiveKit participantId=\(participantIdentity, privacy: .public) sender=\(sender, privacy: .public) device=\(deviceId, privacy: .public) member=\(memberId, privacy: .public) index=\(index)") + } + } + + // MARK: - Helpers + + private static func encode(_ value: [String: Any]) throws -> String { + // `.sortedKeys` guarantees `action` is serialised before `data` in + // top-level messages. The Rust SDK uses + // `#[serde(tag = "action", content = "data")]` on its FromWidget enum; + // when `data` appears first, serde falls back to its Content-buffering + // path, which fails for `Raw` newtype fields with + // "invalid type: newtype struct, expected any valid JSON value". + // Sorting keys sidesteps the bug entirely. + let data = try JSONSerialization.data( + withJSONObject: value, + options: [.sortedKeys] + ) + return String(data: data, encoding: .utf8) ?? "{}" + } +} + +// MARK: - Capabilities Provider + +/// Implements `WidgetCapabilitiesProvider` by returning the Element Call +/// required permissions verbatim. The SDK intersects these with whatever +/// the widget requests over JSON. +private final class ElementCallCapabilitiesProvider: WidgetCapabilitiesProvider, @unchecked Sendable { + private let ownUserId: String + private let ownDeviceId: String + + init(ownUserId: String, ownDeviceId: String) { + self.ownUserId = ownUserId + self.ownDeviceId = ownDeviceId + } + + func acquireCapabilities(capabilities: WidgetCapabilities) -> WidgetCapabilities { + return getElementCallRequiredPermissions( + ownUserId: ownUserId, + ownDeviceId: ownDeviceId + ) + } +} + +// MARK: - Errors + +enum CallWidgetBridgeError: LocalizedError { + case notStarted + case sendFailed + case shutdown + case widgetError(String) + + var errorDescription: String? { + switch self { + case .notStarted: + return "Widget bridge is not started." + case .sendFailed: + return "Failed to send widget message; driver may have exited." + case .shutdown: + return "Widget bridge was shut down before the request completed." + case .widgetError(let message): + return "Widget protocol error: \(message)" + } + } +} diff --git a/RelayKit/Call/LiveKitCredentialService.swift b/RelayKit/Call/LiveKitCredentialService.swift new file mode 100644 index 0000000..c9aaee9 --- /dev/null +++ b/RelayKit/Call/LiveKitCredentialService.swift @@ -0,0 +1,309 @@ +// Copyright 2026 Link Dupont +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import Foundation +import os + +private let logger = Logger(subsystem: "RelayKit", category: "LiveKitCredentialService") + +/// Fetches LiveKit credentials (WebSocket URL + JWT) for a Matrix room by +/// implementing the MatrixRTC credential exchange flow (MSC4143). +/// +/// **Step 1 – Discover the SFU URL** +/// Tries `GET /_matrix/client/unstable/org.matrix.msc4143/rtc/transports`. +/// If that returns 404, falls back to reading `org.matrix.msc4143.rtc_foci` +/// from `GET {server}/.well-known/matrix/client`. +/// +/// **Step 2 – Request an OpenID token** +/// `POST /_matrix/client/v3/user/{userId}/openid/request_token` using the +/// session's Matrix access token as Bearer auth. +/// +/// **Step 3 – Exchange for a LiveKit JWT** +/// `POST {sfuURL}/get_token` (MSC4143 v2). Falls back to the legacy +/// `POST {sfuURL}/sfu/get` endpoint if the server returns 404. +/// +/// Both exchange endpoints return `{ url, jwt }` where `url` is the LiveKit +/// WebSocket address and `jwt` is the LiveKit room access token. +struct LiveKitCredentialService { + + let homeserver: String + let accessToken: String + let userID: String + let deviceID: String + /// The Matrix server name (e.g. `fedora.im`) extracted from the user ID. + /// Used for `.well-known` lookups, which must query the server name domain, + /// not the delegated homeserver URL (e.g. `fedora.ems.host`). + let serverName: String + + // MARK: - Public Entry Point + + /// Returns `(livekitWebSocketURL, livekitJWT, sfuServiceURL)` for the given Matrix room. + /// The `sfuServiceURL` is the SFU service URL from discovery, used in call member events. + func credentials(for roomID: String) async throws -> (url: String, token: String, sfuServiceURL: String) { + logger.info("[RTC]Fetching LiveKit credentials for room \(roomID, privacy: .private)") + let sfuURL = try await discoverSFUURL() + logger.info("[RTC]SFU URL discovered: \(sfuURL)") + let openIDToken = try await requestOpenIDToken() + logger.debug("[RTC]OpenID token obtained") + let (url, jwt) = try await fetchLiveKitToken(sfuURL: sfuURL, roomID: roomID, openIDToken: openIDToken) + return (url, jwt, sfuURL) + } + + // MARK: - Step 1: Discover SFU URL + + private func discoverSFUURL() async throws -> String { + // Prefer the MSC4143 transports endpoint + if let url = try? await fetchRTCTransportsURL() { + return url + } + // Fall back to .well-known + if let url = try? await fetchWellKnownSFUURL() { + return url + } + throw LiveKitCredentialError.sfuURLNotFound + } + + private func fetchRTCTransportsURL() async throws -> String { + let base = homeserver.trimmingCharacters(in: .init(charactersIn: "/")) + guard let url = URL(string: "\(base)/_matrix/client/unstable/org.matrix.msc4143/rtc/transports") else { + throw LiveKitCredentialError.invalidURL + } + var request = URLRequest(url: url) + request.setValue("Bearer \(accessToken)", forHTTPHeaderField: "Authorization") + + let (data, response) = try await URLSession.shared.data(for: request) + guard let http = response as? HTTPURLResponse, http.statusCode == 200 else { + throw LiveKitCredentialError.serverError + } + + let decoded = try JSONDecoder().decode(RTCTransportsResponse.self, from: data) + guard let livekit = decoded.transports.first(where: { $0.type == "livekit" }) else { + throw LiveKitCredentialError.sfuURLNotFound + } + return livekit.livekitServiceUrl + } + + private func fetchWellKnownSFUURL() async throws -> String { + guard let url = URL(string: "https://\(serverName)/.well-known/matrix/client") else { + throw LiveKitCredentialError.invalidURL + } + + let (data, response) = try await URLSession.shared.data(from: url) + guard let http = response as? HTTPURLResponse, http.statusCode == 200 else { + throw LiveKitCredentialError.serverError + } + + let decoded = try JSONDecoder().decode(WellKnownResponse.self, from: data) + guard let foci = decoded.rtcFoci, + let first = foci.first(where: { $0.type == "livekit" }) else { + throw LiveKitCredentialError.sfuURLNotFound + } + return first.livekitServiceUrl + } + + // MARK: - Step 2: Request OpenID Token + + private func requestOpenIDToken() async throws -> OpenIDTokenPayload { + let base = homeserver.trimmingCharacters(in: .init(charactersIn: "/")) + let encoded = userID.addingPercentEncoding(withAllowedCharacters: .urlPathAllowed) ?? userID + guard let url = URL(string: "\(base)/_matrix/client/v3/user/\(encoded)/openid/request_token") else { + throw LiveKitCredentialError.invalidURL + } + var request = URLRequest(url: url) + request.httpMethod = "POST" + request.setValue("Bearer \(accessToken)", forHTTPHeaderField: "Authorization") + request.setValue("application/json", forHTTPHeaderField: "Content-Type") + request.httpBody = Data("{}".utf8) + + let (data, response) = try await URLSession.shared.data(for: request) + guard let http = response as? HTTPURLResponse, http.statusCode == 200 else { + throw LiveKitCredentialError.openIDTokenFailed + } + return try JSONDecoder().decode(OpenIDTokenPayload.self, from: data) + } + + // MARK: - Step 3: Exchange for LiveKit JWT + + private func fetchLiveKitToken( + sfuURL: String, + roomID: String, + openIDToken: OpenIDTokenPayload + ) async throws -> (url: String, token: String) { + // Try the v2 endpoint first, fall back to legacy + if let result = try? await fetchLiveKitTokenV2(sfuURL: sfuURL, roomID: roomID, openIDToken: openIDToken) { + return result + } + return try await fetchLiveKitTokenLegacy(sfuURL: sfuURL, roomID: roomID, openIDToken: openIDToken) + } + + private func fetchLiveKitTokenV2( + sfuURL: String, + roomID: String, + openIDToken: OpenIDTokenPayload + ) async throws -> (url: String, token: String) { + let base = sfuURL.trimmingCharacters(in: .init(charactersIn: "/")) + guard let url = URL(string: "\(base)/get_token") else { + throw LiveKitCredentialError.invalidURL + } + var request = URLRequest(url: url) + request.httpMethod = "POST" + request.setValue("application/json", forHTTPHeaderField: "Content-Type") + + let body = GetTokenRequest( + roomId: roomID, + openidToken: openIDToken, + member: .init(id: "\(userID):\(deviceID)", claimedUserId: userID, claimedDeviceId: deviceID) + ) + request.httpBody = try JSONEncoder().encode(body) + + let (data, response) = try await URLSession.shared.data(for: request) + guard let http = response as? HTTPURLResponse, http.statusCode == 200 else { + throw LiveKitCredentialError.tokenExchangeFailed + } + let decoded = try JSONDecoder().decode(LiveKitTokenResponse.self, from: data) + logger.info("[RTC]LiveKit credentials obtained via /get_token") + return (decoded.url, decoded.jwt) + } + + private func fetchLiveKitTokenLegacy( + sfuURL: String, + roomID: String, + openIDToken: OpenIDTokenPayload + ) async throws -> (url: String, token: String) { + let base = sfuURL.trimmingCharacters(in: .init(charactersIn: "/")) + guard let url = URL(string: "\(base)/sfu/get") else { + throw LiveKitCredentialError.invalidURL + } + var request = URLRequest(url: url) + request.httpMethod = "POST" + request.setValue("application/json", forHTTPHeaderField: "Content-Type") + + let body = SFUGetRequest(room: roomID, openidToken: openIDToken, deviceId: deviceID) + request.httpBody = try JSONEncoder().encode(body) + + let (data, response) = try await URLSession.shared.data(for: request) + guard let http = response as? HTTPURLResponse, http.statusCode == 200 else { + throw LiveKitCredentialError.tokenExchangeFailed + } + let decoded = try JSONDecoder().decode(LiveKitTokenResponse.self, from: data) + logger.info("[RTC]LiveKit credentials obtained via legacy /sfu/get") + return (decoded.url, decoded.jwt) + } +} + +// MARK: - Errors + +enum LiveKitCredentialError: LocalizedError { + case sfuURLNotFound + case invalidURL + case serverError + case openIDTokenFailed + case tokenExchangeFailed + + var errorDescription: String? { + switch self { + case .sfuURLNotFound: + return "This homeserver has no LiveKit call server configured. " + + "Check that your server supports MatrixRTC (MSC4143)." + case .invalidURL: + return "Could not construct a valid URL for the call server." + case .serverError: + return "The homeserver returned an error while fetching call credentials." + case .openIDTokenFailed: + return "Failed to obtain an OpenID token from the homeserver." + case .tokenExchangeFailed: + return "The call server rejected the credential exchange." + } + } +} + +// MARK: - Codable Types + +private struct RTCTransportsResponse: Decodable { + let transports: [Transport] + struct Transport: Decodable { + let type: String + let livekitServiceUrl: String + enum CodingKeys: String, CodingKey { + case type + case livekitServiceUrl = "livekit_service_url" + } + } +} + +private struct WellKnownResponse: Decodable { + let rtcFoci: [RtcFocus]? + struct RtcFocus: Decodable { + let type: String + let livekitServiceUrl: String + enum CodingKeys: String, CodingKey { + case type + case livekitServiceUrl = "livekit_service_url" + } + } + enum CodingKeys: String, CodingKey { + case rtcFoci = "org.matrix.msc4143.rtc_foci" + } +} + +// Internal type — not exposed outside RelayKit. +struct OpenIDTokenPayload: Codable { + let accessToken: String + let tokenType: String + let matrixServerName: String + let expiresIn: Int + enum CodingKeys: String, CodingKey { + case accessToken = "access_token" + case tokenType = "token_type" + case matrixServerName = "matrix_server_name" + case expiresIn = "expires_in" + } +} + +private struct GetTokenRequest: Encodable { + let roomId: String + let openidToken: OpenIDTokenPayload + let member: Member + struct Member: Encodable { + let id: String + let claimedUserId: String + let claimedDeviceId: String + enum CodingKeys: String, CodingKey { + case id + case claimedUserId = "claimed_user_id" + case claimedDeviceId = "claimed_device_id" + } + } + enum CodingKeys: String, CodingKey { + case roomId = "room_id" + case openidToken = "openid_token" + case member + } +} + +private struct SFUGetRequest: Encodable { + let room: String + let openidToken: OpenIDTokenPayload + let deviceId: String + enum CodingKeys: String, CodingKey { + case room + case openidToken = "openid_token" + case deviceId = "device_id" + } +} + +private struct LiveKitTokenResponse: Decodable { + let url: String + let jwt: String +} diff --git a/RelayKit/Call/LiveKitLogBridge.swift b/RelayKit/Call/LiveKitLogBridge.swift new file mode 100644 index 0000000..4de7926 --- /dev/null +++ b/RelayKit/Call/LiveKitLogBridge.swift @@ -0,0 +1,77 @@ +// Copyright 2026 Link Dupont +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import Foundation +import LiveKit +import os + +/// A LiveKit `Logger` implementation that forwards all LiveKit SDK log output +/// through `os.Logger` with a `[RTC]` prefix on every message so calling-related +/// logs can be filtered out of the Console with a single token. +/// +/// Install once, as early as possible (before any `LiveKit.Room` is created). +struct LiveKitLogBridge: LiveKit.Logger { + private static let osLogger = os.Logger(subsystem: "RelayKit", category: "LiveKitSDK") + + // swiftlint:disable:next function_parameter_count + func log( + _ message: @autoclosure () -> CustomStringConvertible, + _ level: LiveKit.LogLevel, + source: @autoclosure () -> String?, + file _: StaticString, + type: Any.Type, + function: StaticString, + line _: UInt, + metaData: ScopedMetadataContainer + ) { + let rendered: String = { + let typeName = String(describing: type) + let meta: String + if metaData.isEmpty { + meta = "" + } else { + meta = " [" + metaData.map { "\($0): \($1)" }.joined(separator: ", ") + "]" + } + return "[RTC] \(typeName).\(function) \(message().description)\(meta)" + }() + + // SECURITY: LiveKit SDK log content is at the SDK's discretion and + // can include connection JWTs, signaling URLs, peer identities, etc. + // Mark as .private so the Console redacts it on release; developers + // can still see the messages by enabling unredacted logging in Xcode. + switch level { + case .debug: + Self.osLogger.debug("\(rendered, privacy: .private)") + case .info: + Self.osLogger.info("\(rendered, privacy: .private)") + case .warning: + Self.osLogger.warning("\(rendered, privacy: .private)") + case .error: + Self.osLogger.error("\(rendered, privacy: .private)") + } + } +} + +/// Installs ``LiveKitLogBridge`` exactly once, regardless of how many times +/// ``install()`` is called. Safe to invoke from any thread; the actual swap +/// runs on first access of the static initializer. +enum LiveKitLogBridgeInstaller { + private static let installed: Void = { + LiveKitSDK.setLogger(LiveKitLogBridge()) + }() + + static func install() { + _ = installed + } +} diff --git a/RelayKit/Services/MatrixService.swift b/RelayKit/Services/MatrixService.swift index 9fe2afb..e63f64d 100644 --- a/RelayKit/Services/MatrixService.swift +++ b/RelayKit/Services/MatrixService.swift @@ -619,6 +619,24 @@ public final class MatrixService: MatrixServiceProtocol { try await sdkRoom.leave() } + /// Power level overrides that allow any room member to send MatrixRTC call + /// membership and encryption key state events (matching Element Call's setup). + private static let callPowerLevels = PowerLevels( + usersDefault: nil, + eventsDefault: nil, + stateDefault: nil, + ban: nil, + kick: nil, + redact: nil, + invite: nil, + notifications: nil, + users: [:], + events: [ + "org.matrix.msc3401.call.member": 0, + "io.element.call.encryption_keys": 0 + ] + ) + public func createRoom(name: String, topic: String?, isPublic: Bool) async throws -> String { guard let client else { throw RelayError.notLoggedIn } let params = CreateRoomParameters( @@ -627,7 +645,8 @@ public final class MatrixService: MatrixServiceProtocol { isEncrypted: !isPublic, isDirect: false, visibility: isPublic ? .public : .private, - preset: isPublic ? .publicChat : .privateChat + preset: isPublic ? .publicChat : .privateChat, + powerLevelContentOverride: Self.callPowerLevels ) return try await client.createRoom(parameters: params) } @@ -641,6 +660,7 @@ public final class MatrixService: MatrixServiceProtocol { isDirect: false, visibility: options.isPublic ? .public : .private, preset: options.isPublic ? .publicChat : .privateChat, + powerLevelContentOverride: Self.callPowerLevels, canonicalAlias: options.address, isSpace: options.isSpace ) @@ -662,7 +682,8 @@ public final class MatrixService: MatrixServiceProtocol { isDirect: true, visibility: .private, preset: .trustedPrivateChat, - invite: [userId] + invite: [userId], + powerLevelContentOverride: Self.callPowerLevels ) return try await client.createRoom(parameters: params) } @@ -1440,6 +1461,54 @@ public final class MatrixService: MatrixServiceProtocol { return viewModel } + public func makeCallViewModel(roomId: String) async -> (any CallViewModelProtocol)? { + guard let client else { return nil } + do { + let session = try client.session() + let sdkRoom = room(id: roomId) + // Check if the Matrix room has encryption enabled to decide whether + // to use LiveKit-level E2EE for the call. + let isEncrypted: Bool + if let sdkRoom, let info = try? await sdkRoom.roomInfo() { + isEncrypted = info.encryptionState != .notEncrypted + } else { + isEncrypted = false + } + let context = CallViewModel.EncryptionContext( + homeserver: client.homeserver, + accessToken: session.accessToken, + userID: client.userID, + deviceID: client.deviceID, + roomID: roomId, + isRoomEncrypted: isEncrypted, + matrixRoom: sdkRoom + ) + return CallViewModel(encryptionContext: context) + } catch { + logger.warning("Could not create encryption context, falling back to unencrypted call: \(error.localizedDescription)") + return CallViewModel() + } + } + + public func callCredentials(for roomId: String) async throws -> (livekitURL: String, token: String, sfuServiceURL: String) { + guard let client else { + throw LiveKitCredentialError.serverError + } + let session = try client.session() + // Extract the server name from the user ID (e.g. "@user:fedora.im" → "fedora.im"). + // .well-known must be queried on the server name domain, not the delegated homeserver. + let serverName = client.userID.split(separator: ":").dropFirst().joined(separator: ":") + let service = LiveKitCredentialService( + homeserver: client.homeserver, + accessToken: session.accessToken, + userID: client.userID, + deviceID: client.deviceID, + serverName: serverName + ) + let result = try await service.credentials(for: roomId) + return (livekitURL: result.url, token: result.token, sfuServiceURL: result.sfuServiceURL) + } + public func declinePendingVerificationRequest() async { pendingVerificationRequest = nil try? await verificationController?.cancelVerification() diff --git a/RelayKit/Services/RoomListManager.swift b/RelayKit/Services/RoomListManager.swift index 5bf2069..9b3180b 100644 --- a/RelayKit/Services/RoomListManager.swift +++ b/RelayKit/Services/RoomListManager.swift @@ -802,8 +802,15 @@ private final class RoomEntry: Identifiable { avatarUrl: avatarUrl, prevAvatarUrl: prevAvatarUrl )) - case .state(_, let content): - return AttributedString(TimelineMessageMapper.stateEventDescription(content)) + case .state(let stateKey, let content): + let (body, _) = TimelineMessageMapper.describeStateEvent( + content, + stateKey: stateKey, + senderDisplayName: nil, + senderId: "" + ) + guard let body else { return nil } + return AttributedString(body) default: return nil } // swiftlint:enable identifier_name diff --git a/RelayKit/Services/TimelineMessageMapper.swift b/RelayKit/Services/TimelineMessageMapper.swift index 79b83c1..6558b89 100644 --- a/RelayKit/Services/TimelineMessageMapper.swift +++ b/RelayKit/Services/TimelineMessageMapper.swift @@ -187,9 +187,20 @@ struct TimelineMessageMapper: Sendable { // swiftlint:disable:this type_body_len prevAvatarUrl: prevAvatarUrl ) msgKind = .profileChange - case .state(_, let content): - msgBody = Self.stateEventDescription(content) - msgKind = .stateEvent + case .state(let stateKey, let content): + let (body, kind) = Self.describeStateEvent( + content, + stateKey: stateKey, + senderDisplayName: { + if case .ready(let name, _, _) = event.senderProfile { return name } + return nil + }(), + senderId: event.sender + ) + // Skip noisy internal events (encryption key exchange). + guard let body else { continue } + msgBody = body + msgKind = kind default: continue } @@ -319,6 +330,11 @@ struct TimelineMessageMapper: Sendable { // swiftlint:disable:this type_body_len )) } + // Deduplicate consecutive call events from the same sender. + // When a user ends a call, the removal state event (empty content) + // appears as a second "started a call" — filter those out. + result = Self.deduplicateCallEvents(result) + return MappingResult(messages: result, unresolvedReplyEventIds: pendingReplyFetchIds) } @@ -448,9 +464,23 @@ struct TimelineMessageMapper: Sendable { // swiftlint:disable:this type_body_len prevAvatarUrl: prevAvatarUrl ) msgKind = .profileChange - case .state(_, let content): - msgBody = Self.stateEventDescription(content) - msgKind = .stateEvent + case .state(let stateKey, let content): + // Use describeStateEvent so call membership events render as + // "X started a call" with .callEvent kind, and so the noisy + // io.element.call.encryption_keys events are filtered out — + // matching the bulk-mapping and rebuild paths. + let (body, kind) = Self.describeStateEvent( + content, + stateKey: stateKey, + senderDisplayName: { + if case .ready(let name, _, _) = event.senderProfile { return name } + return nil + }(), + senderId: event.sender + ) + guard let body else { return nil } + msgBody = body + msgKind = kind default: return nil } @@ -773,9 +803,19 @@ struct TimelineMessageMapper: Sendable { // swiftlint:disable:this type_body_len prevAvatarUrl: prevAvatarUrl ) msgKind = .profileChange - case .state(_, let content): - msgBody = Self.stateEventDescription(content) - msgKind = .stateEvent + case .state(let stateKey, let content): + let (body, kind) = Self.describeStateEvent( + content, + stateKey: stateKey, + senderDisplayName: { + if case .ready(let name, _, _) = event.senderProfile { return name } + return nil + }(), + senderId: event.sender + ) + guard let body else { return nil } + msgBody = body + msgKind = kind default: return nil } @@ -852,6 +892,28 @@ struct TimelineMessageMapper: Sendable { // swiftlint:disable:this type_body_len } } + // MARK: - Call Event Deduplication + + /// Removes duplicate consecutive call events from the same sender. + /// + /// When a user ends a call, the MatrixRTC leave event (`{}` content) appears + /// in the timeline as a second "started a call" message from the same sender. + /// This filters out those duplicates, keeping only the first occurrence in each + /// consecutive run. + private static func deduplicateCallEvents(_ messages: [TimelineMessage]) -> [TimelineMessage] { + var result: [TimelineMessage] = [] + for message in messages { + if message.kind == .callEvent, + let last = result.last, + last.kind == .callEvent, + last.senderID == message.senderID { + continue + } + result.append(message) + } + return result + } + // MARK: - System Event Descriptions // swiftlint:disable cyclomatic_complexity @@ -927,6 +989,33 @@ struct TimelineMessageMapper: Sendable { // swiftlint:disable:this type_body_len return "\(name) updated their profile" } + /// Routes a state event to the appropriate description and message kind. + /// + /// Returns `nil` body for events that should be hidden (e.g. encryption key exchange). + nonisolated static func describeStateEvent( + _ state: OtherState, + stateKey: String, + senderDisplayName: String?, + senderId: String + ) -> (body: String?, kind: TimelineMessage.Kind) { + if case .custom(let type) = state { + switch type { + case "org.matrix.msc3401.call.member": + let name = senderDisplayName ?? senderId + // Empty state key or one starting with "_" indicates join/leave. + // A non-empty content means joining; removal sends empty content + // which the SDK may or may not surface — treat presence of the event as a join. + return ("\(name) started a call", .callEvent) + case "io.element.call.encryption_keys": + // Internal key exchange — don't show in timeline. + return (nil, .stateEvent) + default: + return (stateEventDescription(state), .stateEvent) + } + } + return (stateEventDescription(state), .stateEvent) + } + // swiftlint:disable cyclomatic_complexity /// Returns a human-readable description for a room state change event. nonisolated static func stateEventDescription(_ state: OtherState) -> String {