diff --git a/firebaseai/FirebaseAIExample.xcodeproj/project.pbxproj b/firebaseai/FirebaseAIExample.xcodeproj/project.pbxproj index aabc3b33a..4d8cf96b5 100644 --- a/firebaseai/FirebaseAIExample.xcodeproj/project.pbxproj +++ b/firebaseai/FirebaseAIExample.xcodeproj/project.pbxproj @@ -271,6 +271,7 @@ DEVELOPMENT_TEAM = ""; ENABLE_PREVIEWS = YES; GENERATE_INFOPLIST_FILE = YES; + INFOPLIST_KEY_NSMicrophoneUsageDescription = "The app needs access to your microphone to enable live voice conversations with Gemini."; INFOPLIST_KEY_UIApplicationSceneManifest_Generation = YES; INFOPLIST_KEY_UIApplicationSupportsIndirectInputEvents = YES; INFOPLIST_KEY_UILaunchScreen_Generation = YES; @@ -304,6 +305,7 @@ DEVELOPMENT_TEAM = ""; ENABLE_PREVIEWS = YES; GENERATE_INFOPLIST_FILE = YES; + INFOPLIST_KEY_NSMicrophoneUsageDescription = "The app needs access to your microphone to enable live voice conversations with Gemini."; INFOPLIST_KEY_UIApplicationSceneManifest_Generation = YES; INFOPLIST_KEY_UIApplicationSupportsIndirectInputEvents = YES; INFOPLIST_KEY_UILaunchScreen_Generation = YES; diff --git a/firebaseai/FirebaseAIExample/Assets.xcassets/gemini-logo.imageset/Contents.json b/firebaseai/FirebaseAIExample/Assets.xcassets/gemini-logo.imageset/Contents.json new file mode 100644 index 000000000..a511771dd --- /dev/null +++ b/firebaseai/FirebaseAIExample/Assets.xcassets/gemini-logo.imageset/Contents.json @@ -0,0 +1,21 @@ +{ + "images" : [ + { + "filename" : "gemini-logo.png", + "idiom" : "universal", + "scale" : "1x" + }, + { + "idiom" : "universal", + "scale" : "2x" + }, + { + "idiom" : "universal", + "scale" : "3x" + } + ], + "info" : { + "author" : "xcode", + "version" : 1 + } +} diff --git a/firebaseai/FirebaseAIExample/Assets.xcassets/gemini-logo.imageset/gemini-logo.png b/firebaseai/FirebaseAIExample/Assets.xcassets/gemini-logo.imageset/gemini-logo.png new file mode 100644 index 000000000..11ef95cdd Binary files /dev/null and b/firebaseai/FirebaseAIExample/Assets.xcassets/gemini-logo.imageset/gemini-logo.png differ diff --git a/firebaseai/FirebaseAIExample/ContentView.swift b/firebaseai/FirebaseAIExample/ContentView.swift index 610d61ef8..830630f38 100644 --- a/firebaseai/FirebaseAIExample/ContentView.swift +++ b/firebaseai/FirebaseAIExample/ContentView.swift @@ -110,6 +110,8 @@ struct ContentView: View { FunctionCallingScreen(backendType: selectedBackend, sample: sample) case "GroundingScreen": GroundingScreen(backendType: selectedBackend, sample: sample) + case "LiveScreen": + LiveScreen(backendType: selectedBackend, sample: sample) default: EmptyView() } diff --git a/firebaseai/FirebaseAIExample/Features/Live/Screens/LiveScreen.swift b/firebaseai/FirebaseAIExample/Features/Live/Screens/LiveScreen.swift new file mode 100644 index 000000000..941a38a7c --- /dev/null +++ b/firebaseai/FirebaseAIExample/Features/Live/Screens/LiveScreen.swift @@ -0,0 +1,74 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#if canImport(FirebaseAILogic) + import FirebaseAILogic +#else + import FirebaseAI +#endif +import SwiftUI +import TipKit + +struct LiveScreen: View { + let backendType: BackendOption + @StateObject var viewModel: LiveViewModel + + init(backendType: BackendOption, sample: Sample? = nil) { + self.backendType = backendType + _viewModel = + StateObject(wrappedValue: LiveViewModel(backendType: backendType, + sample: sample)) + } + + var body: some View { + VStack(spacing: 20) { + ModelAvatar(isConnected: viewModel.state == .connected) + TranscriptView(typewriter: viewModel.transcriptTypewriter) + + Spacer() + if let error = viewModel.error { + ErrorDetailsView(error: error) + } + if let tip = viewModel.tip, !viewModel.hasTranscripts { + TipView(tip) + } + ConnectButton( + state: viewModel.state, + onConnect: viewModel.connect, + onDisconnect: viewModel.disconnect + ) + + #if targetEnvironment(simulator) + AudioOutputToggle(isEnabled: $viewModel.isAudioOutputEnabled, onChange: { + Task { + await viewModel.onAudioPlaybackChanged() + } + }) + #endif + } + .padding() + .navigationTitle(viewModel.title) + .navigationBarTitleDisplayMode(.inline) + .background(viewModel.backgroundColor ?? .clear) + .onDisappear { + Task { + await viewModel.disconnect() + } + } + } +} + +#Preview { + LiveScreen(backendType: .googleAI) +} diff --git a/firebaseai/FirebaseAIExample/Features/Live/ViewModels/LiveViewModel.swift b/firebaseai/FirebaseAIExample/Features/Live/ViewModels/LiveViewModel.swift new file mode 100644 index 000000000..8910a27c8 --- /dev/null +++ b/firebaseai/FirebaseAIExample/Features/Live/ViewModels/LiveViewModel.swift @@ -0,0 +1,297 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import FirebaseAILogic +import Foundation +import OSLog +import AVFoundation +import SwiftUI +import AVKit +import Combine + +enum LiveViewModelState { + case idle + case connecting + case connected +} + +@MainActor +class LiveViewModel: ObservableObject { + private var logger = Logger(subsystem: Bundle.main.bundleIdentifier!, category: "generative-ai") + + @Published + var error: Error? + + @Published + var state: LiveViewModelState = .idle + + @Published + var transcriptTypewriter: TypeWriterViewModel = TypeWriterViewModel() + + @Published + var backgroundColor: Color? = nil + + @Published + var hasTranscripts: Bool = false + + @Published + var title: String + + @Published + var tip: InlineTip? + + @Published + var isAudioOutputEnabled: Bool = { + #if targetEnvironment(simulator) + return false + #else + return true + #endif + }() + + private var model: LiveGenerativeModel? + private var liveSession: LiveSession? + + private var audioController: AudioController? + private var microphoneTask = Task {} + + init(backendType: BackendOption, sample: Sample? = nil) { + let firebaseService = backendType == .googleAI + ? FirebaseAI.firebaseAI(backend: .googleAI()) + : FirebaseAI.firebaseAI(backend: .vertexAI()) + + model = firebaseService.liveModel( + modelName: (backendType == .googleAI) ? "gemini-2.5-flash-native-audio-preview-09-2025" : + "gemini-live-2.5-flash-preview-native-audio-09-2025", + generationConfig: sample?.liveGenerationConfig, + tools: sample?.tools, + systemInstruction: sample?.systemInstruction + ) + title = sample?.title ?? "" + tip = sample?.tip + } + + /// Start a connection to the model. + /// + /// If a connection is already active, you'll need to call ``LiveViewModel/disconnect()`` first. + func connect() async { + guard let model, state == .idle else { + return + } + + if !isAudioOutputEnabled { + logger.warning("Playback audio is disabled.") + } + + guard await requestRecordPermission() else { + logger.warning("The user denied us permission to record the microphone.") + isAudioOutputEnabled = false + return + } + + state = .connecting + transcriptTypewriter.restart() + hasTranscripts = false + + do { + liveSession = try await model.connect() + audioController = try await AudioController() + + try await startRecording() + + state = .connected + try await startProcessingResponses() + } catch { + logger.error("\(String(describing: error))") + self.error = error + await disconnect() + } + } + + func onAudioPlaybackChanged() async { + if isAudioOutputEnabled { + guard await requestRecordPermission() else { + logger.warning("The user denied us permission to record the microphone.") + isAudioOutputEnabled = false + return + } + } + } + + /// Disconnects the model. + /// + /// Will stop any pending playback, and the recording of the mic. + func disconnect() async { + await audioController?.stop() + await liveSession?.close() + microphoneTask.cancel() + state = .idle + liveSession = nil + transcriptTypewriter.clearPending() + + withAnimation { + backgroundColor = nil + } + } + + /// Starts recording data from the user's microphone, and sends it to the model. + private func startRecording() async throws { + guard let audioController, let liveSession else { return } + + let stream = try await audioController.listenToMic() + microphoneTask = Task { + do { + for await audioBuffer in stream { + await liveSession.sendAudioRealtime(try audioBuffer.int16Data()) + } + } catch { + logger.error("\(String(describing: error))") + self.error = error + await disconnect() + } + } + } + + /// Starts queuing responses from the model for parsing. + private func startProcessingResponses() async throws { + guard let liveSession else { return } + + for try await response in liveSession.responses { + try await processServerMessage(response) + } + } + + /// Requests permission to record the user's microphone, returning the result. + /// + /// This is a requirement on iOS devices, on top of needing the proper recording + /// intents. + private func requestRecordPermission() async -> Bool { + await withCheckedContinuation { cont in + if #available(iOS 17.0, *) { + Task { + let ok = await AVAudioApplication.requestRecordPermission() + cont.resume(with: .success(ok)) + } + } else { + AVAudioSession.sharedInstance().requestRecordPermission { ok in + cont.resume(with: .success(ok)) + } + } + } + } + + private func processServerMessage(_ message: LiveServerMessage) async throws { + switch message.payload { + case let .content(content): + try await processServerContent(content) + case let .toolCall(toolCall): + try await processFunctionCalls(functionCalls: toolCall.functionCalls ?? []) + case .toolCallCancellation: + // we don't have any long running functions to cancel + return + case let .goingAwayNotice(goingAwayNotice): + let time = goingAwayNotice.timeLeft?.description ?? "soon" + logger.warning("Going away in: \(time)") + } + } + + private func processServerContent(_ content: LiveServerContent) async throws { + if let message = content.modelTurn { + try await processAudioMessages(message) + } + + if content.isTurnComplete { + // add a space, so the next time a transcript comes in, it's not squished with the previous one + transcriptTypewriter.appendText(" ") + } + + if content.wasInterrupted { + logger.warning("Model was interrupted") + await audioController?.interrupt() + transcriptTypewriter.clearPending() + // adds an em dash to indicate that the model was cutoff + transcriptTypewriter.appendText("— ") + } else if let transcript = content.outputAudioTranscription?.text { + appendAudioTranscript(transcript) + } + } + + private func processAudioMessages(_ content: ModelContent) async throws { + for part in content.parts { + if let part = part as? InlineDataPart { + if part.mimeType.starts(with: "audio/pcm") { + if isAudioOutputEnabled { + try await audioController?.playAudio(audio: part.data) + } + } else { + logger.warning("Received non audio inline data part: \(part.mimeType)") + } + } + } + } + + private func processFunctionCalls(functionCalls: [FunctionCallPart]) async throws { + let responses = try functionCalls.map { functionCall in + switch functionCall.name { + case "changeBackgroundColor": + return try changeBackgroundColor(args: functionCall.args, id: functionCall.functionId) + case "clearBackgroundColor": + return clearBackgroundColor(id: functionCall.functionId) + default: + logger.debug("Function call: \(String(describing: functionCall))") + throw ApplicationError("Unknown function named \"\(functionCall.name)\".") + } + } + + await liveSession?.sendFunctionResponses(responses) + } + + private func appendAudioTranscript(_ transcript: String) { + hasTranscripts = true + transcriptTypewriter.appendText(transcript) + } + + private func changeBackgroundColor(args: JSONObject, id: String?) throws -> FunctionResponsePart { + guard case let .string(color) = args["color"] else { + logger.debug("Function arguments: \(String(describing: args))") + throw ApplicationError("Missing `color` parameter.") + } + + withAnimation { + backgroundColor = Color(hex: color) + } + + if backgroundColor == nil { + logger.warning("The model sent us an invalid hex color: \(color)") + } + + return FunctionResponsePart( + name: "changeBackgroundColor", + response: JSONObject(), + functionId: id + ) + } + + private func clearBackgroundColor(id: String?) -> FunctionResponsePart { + withAnimation { + backgroundColor = nil + } + + return FunctionResponsePart( + name: "clearBackgroundColor", + response: JSONObject(), + functionId: id + ) + } +} diff --git a/firebaseai/FirebaseAIExample/Features/Live/ViewModels/TypeWriterViewModel.swift b/firebaseai/FirebaseAIExample/Features/Live/ViewModels/TypeWriterViewModel.swift new file mode 100644 index 000000000..ab607fd4b --- /dev/null +++ b/firebaseai/FirebaseAIExample/Features/Live/ViewModels/TypeWriterViewModel.swift @@ -0,0 +1,96 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import SwiftUI +import Foundation +import Combine + +@MainActor +class TypeWriterViewModel: ObservableObject { + @Published + var text: String = "" + + /// How long to wait (in milliseconds) between showing the next character. + var delay: Int = 65 + + private var pendingText = [Character]() + private var processTextTask: Task? + + init() { + processTask() + } + + deinit { + processTextTask?.cancel() + } + + /// Queues text to show. + /// + /// Since the text is queued, the text wont be displayed until the previous + /// pending text is populated. + func appendText(_ text: String) { + pendingText.append(contentsOf: text) + } + + /// Clears any text from the queue that is pending being added to the text. + func clearPending() { + pendingText.removeAll() + } + + /// Restarts the class to be a fresh instance. + /// + /// Effectively, this removes all the currently tracked text, + /// and any pending text. + func restart() { + clearPending() + text = "" + } + + /// Long running task for processing characters. + private func processTask() { + processTextTask = Task { + var delay = delay + while !Task.isCancelled { + try? await Task.sleep(for: .milliseconds(delay)) + + delay = processNextCharacter() + } + } + } + + /// Determines the delay for the next character, adding pending text as needed. + /// + /// We don't have a delay when outputting whitespace or the end of a sentence. + /// + /// - Returns: The MS delay before working on the next character in the queue. + private func processNextCharacter() -> Int { + guard !pendingText.isEmpty else { + return delay // Default delay if no text is pending + } + + let char = pendingText.removeFirst() + text.append(char) + + return (char.isWhitespace || char.isEndOfSentence) ? 0 : delay + } +} + +extension Character { + /// Marker for punctuation that dictates the end of a sentence. + /// + /// Namely, this checks for `.`, `!` and `?`. + var isEndOfSentence: Bool { + self == "." || self == "!" || self == "?" + } +} diff --git a/firebaseai/FirebaseAIExample/Features/Live/Views/AudioOutputToggle.swift b/firebaseai/FirebaseAIExample/Features/Live/Views/AudioOutputToggle.swift new file mode 100644 index 000000000..02c8d40f3 --- /dev/null +++ b/firebaseai/FirebaseAIExample/Features/Live/Views/AudioOutputToggle.swift @@ -0,0 +1,39 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import SwiftUI + +struct AudioOutputToggle: View { + @Binding var isEnabled: Bool + var onChange: () -> Void = {} + + var body: some View { + VStack(alignment: .leading, spacing: 5) { + Toggle("Audio Output", isOn: $isEnabled).onChange(of: isEnabled) { _, _ in + onChange() + } + + Text(""" + Audio output works best on physical devices. Enable this to test playback in the \ + simulator. Headphones recommended. + """) + .font(.caption) + .foregroundStyle(.secondary) + } + } +} + +#Preview { + AudioOutputToggle(isEnabled: .constant(false)) +} diff --git a/firebaseai/FirebaseAIExample/Features/Live/Views/ConnectButton.swift b/firebaseai/FirebaseAIExample/Features/Live/Views/ConnectButton.swift new file mode 100644 index 000000000..e4ed9ef05 --- /dev/null +++ b/firebaseai/FirebaseAIExample/Features/Live/Views/ConnectButton.swift @@ -0,0 +1,125 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import SwiftUI + +struct ConnectButton: View { + var state: LiveViewModelState + var onConnect: () async -> Void + var onDisconnect: () async -> Void + + @State private var gradientAngle: Angle = .zero + + private var isConnected: Bool { state == .connected } + + private var title: String { + switch state { + case .connected: "Stop" + case .connecting: "Connecting..." + case .idle: "Start" + } + } + + private var image: String { + switch state { + case .connected: "stop" + case .connecting: "wifi" + case .idle: "play" + } + } + + var body: some View { + Button(action: onClick) { + Label(title, systemImage: image) + .frame(maxWidth: .infinity) + .padding() + } + .buttonStyle(.connect(state: state, gradientAngle: gradientAngle)) + .onAppear { + withAnimation(.linear(duration: 5).repeatForever(autoreverses: false)) { + self.gradientAngle = .degrees(360) + } + } + } + + private func onClick() { + Task { + if isConnected { + await onDisconnect() + } else { + await onConnect() + } + } + } +} + +struct ConnectButtonStyle: ButtonStyle { + var state: LiveViewModelState + var gradientAngle: Angle + + private var color: Color { + switch state { + case .connected: Color(.systemRed) + case .connecting: Color.secondary + case .idle: Color.accentColor + } + } + + private var gradientColors: [Color] { + switch state { + case .connected: [Color(.systemRed)] + case .connecting: [.secondary, .white] + case .idle: [ + Color(.systemRed), + Color(.systemBlue), + Color(.systemGreen), + Color(.systemYellow), + Color(.systemRed), + ] + } + } + + func makeBody(configuration: Configuration) -> some View { + configuration.label + .disabled(state == .connecting) + .overlay( + RoundedRectangle(cornerRadius: 35) + .stroke( + AngularGradient( + gradient: Gradient(colors: gradientColors), + center: .center, + startAngle: gradientAngle, + endAngle: gradientAngle + .degrees(360) + ), + lineWidth: 3 + ) + ) + .foregroundStyle(color) + } +} + +extension ButtonStyle where Self == ConnectButtonStyle { + static func connect(state: LiveViewModelState, gradientAngle: Angle) -> ConnectButtonStyle { + ConnectButtonStyle(state: state, gradientAngle: gradientAngle) + } +} + +#Preview { + VStack(spacing: 30) { + ConnectButton(state: .idle, onConnect: {}, onDisconnect: {}) + ConnectButton(state: .connecting, onConnect: {}, onDisconnect: {}) + ConnectButton(state: .connected, onConnect: {}, onDisconnect: {}) + } + .padding(.horizontal) +} diff --git a/firebaseai/FirebaseAIExample/Features/Live/Views/ModelAvatar.swift b/firebaseai/FirebaseAIExample/Features/Live/Views/ModelAvatar.swift new file mode 100644 index 000000000..1c36733a4 --- /dev/null +++ b/firebaseai/FirebaseAIExample/Features/Live/Views/ModelAvatar.swift @@ -0,0 +1,71 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import SwiftUI + +struct ModelAvatar: View { + var isConnected = false + + @State private var gradientAngle: Angle = .zero + + var colors: [Color] { + if isConnected { + [.red, .blue, .green, .yellow, .red] + } else { + [Color(red: 0.5, green: 0.5, blue: 0.5, opacity: 0.3)] + } + } + + var body: some View { + Image("gemini-logo") + .resizable() + .aspectRatio(contentMode: .fit) + .padding() + .colorMultiply(.black) + .maskedOverlay { + AngularGradient( + gradient: Gradient(colors: colors), + center: .leading, + startAngle: gradientAngle, + endAngle: gradientAngle + .degrees(360) + ) + } + .onAppear { + withAnimation(.linear(duration: 10).repeatForever(autoreverses: false)) { + self.gradientAngle = .degrees(360) + } + } + } +} + +extension View { + /// Creates an overlay which takes advantage of a mask to respect the size of the view. + /// + /// Especially useful when you want to create an overlay of an view with a non standard + /// size. + @ViewBuilder + func maskedOverlay(mask: () -> some View) -> some View { + overlay { + mask() + .mask { self } + } + } +} + +#Preview { + VStack { + ModelAvatar(isConnected: true) + ModelAvatar(isConnected: false) + } +} diff --git a/firebaseai/FirebaseAIExample/Features/Live/Views/TranscriptView.swift b/firebaseai/FirebaseAIExample/Features/Live/Views/TranscriptView.swift new file mode 100644 index 000000000..134d44df5 --- /dev/null +++ b/firebaseai/FirebaseAIExample/Features/Live/Views/TranscriptView.swift @@ -0,0 +1,45 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import SwiftUI + +struct TranscriptView: View { + @ObservedObject var typewriter: TypeWriterViewModel + + var body: some View { + ScrollViewReader { proxy in + ScrollView { + Text(typewriter.text) + .font(.title3) + .frame(maxWidth: .infinity, alignment: .leading) + .transition(.opacity) + .padding(.horizontal) + .id("transcript") + } + .onChange(of: typewriter.text, initial: false) { old, new in + proxy.scrollTo("transcript", anchor: .bottom) + } + } + } +} + +#Preview { + let vm = TypeWriterViewModel() + TranscriptView(typewriter: vm) + .onAppear { + vm.appendText( + "The sky is blue primarily because of a phenomenon called Rayleigh scattering, where tiny molecules of gas (mainly nitrogen and oxygen) in Earth's atmosphere scatter sunlight in all directions." + ) + } +} diff --git a/firebaseai/FirebaseAIExample/FirebaseAIExampleApp.swift b/firebaseai/FirebaseAIExample/FirebaseAIExampleApp.swift index 1d59440ea..99194a765 100644 --- a/firebaseai/FirebaseAIExample/FirebaseAIExampleApp.swift +++ b/firebaseai/FirebaseAIExample/FirebaseAIExampleApp.swift @@ -14,6 +14,7 @@ import FirebaseCore import SwiftUI +import TipKit class AppDelegate: NSObject, UIApplicationDelegate { func application(_ application: UIApplication, @@ -41,6 +42,14 @@ class AppDelegate: NSObject, UIApplicationDelegate { struct FirebaseAIExampleApp: App { @UIApplicationDelegateAdaptor var appDelegate: AppDelegate + init() { + do { + try Tips.configure() + } catch { + print("Error initializing tips: \(error)") + } + } + var body: some Scene { WindowGroup { ContentView() diff --git a/firebaseai/FirebaseAIExample/Shared/ApplicationError.swift b/firebaseai/FirebaseAIExample/Shared/ApplicationError.swift new file mode 100644 index 000000000..344a97472 --- /dev/null +++ b/firebaseai/FirebaseAIExample/Shared/ApplicationError.swift @@ -0,0 +1,30 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import Foundation + +/// Generic error for issues that occur within the application. +public struct ApplicationError: Error, Sendable, CustomNSError { + let localizedDescription: String + + init(_ localizedDescription: String) { + self.localizedDescription = localizedDescription + } + + public var errorUserInfo: [String: Any] { + [ + NSLocalizedDescriptionKey: localizedDescription, + ] + } +} diff --git a/firebaseai/FirebaseAIExample/Shared/Audio/AudioBufferHelpers.swift b/firebaseai/FirebaseAIExample/Shared/Audio/AudioBufferHelpers.swift new file mode 100644 index 000000000..504d09620 --- /dev/null +++ b/firebaseai/FirebaseAIExample/Shared/Audio/AudioBufferHelpers.swift @@ -0,0 +1,91 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import AVFoundation + +extension AVAudioPCMBuffer { + /// Creates a new `AVAudioPCMBuffer` from a `Data` struct. + /// + /// Only works with interleaved data. + static func fromInterleavedData(data: Data, format: AVAudioFormat) throws -> AVAudioPCMBuffer? { + guard format.isInterleaved else { + throw ApplicationError("Only interleaved data is supported") + } + + let frameCapacity = AVAudioFrameCount(data + .count / Int(format.streamDescription.pointee.mBytesPerFrame)) + guard let buffer = AVAudioPCMBuffer(pcmFormat: format, frameCapacity: frameCapacity) else { + return nil + } + + buffer.frameLength = frameCapacity + data.withUnsafeBytes { bytes in + guard let baseAddress = bytes.baseAddress else { return } + let dst = buffer.mutableAudioBufferList.pointee.mBuffers + dst.mData?.copyMemory(from: baseAddress, byteCount: Int(dst.mDataByteSize)) + } + + return buffer + } + + /// Gets the underlying `Data` in this buffer. + /// + /// Will throw an error if this buffer doesn't hold int16 data. + func int16Data() throws -> Data { + guard let bufferPtr = audioBufferList.pointee.mBuffers.mData else { + throw ApplicationError("Missing audio buffer list") + } + + let audioBufferLenth = Int(audioBufferList.pointee.mBuffers.mDataByteSize) + return Data(bytes: bufferPtr, count: audioBufferLenth) + } +} + +extension AVAudioConverter { + /// Uses the converter to convert the provided `buffer`. + /// + /// Will handle determining the proper frame capacity, ensuring formats align, and propagating any + /// errors that occur. + /// + /// - Returns: A new buffer, with the converted data. + func convertBuffer(_ buffer: AVAudioPCMBuffer) throws -> AVAudioPCMBuffer { + if buffer.format == outputFormat { return buffer } + guard buffer.format == inputFormat else { + throw ApplicationError("The buffer's format was different than the converter's input format") + } + + let frameCapacity = AVAudioFrameCount( + ceil(Double(buffer.frameLength) * outputFormat.sampleRate / inputFormat.sampleRate) + ) + + guard let output = AVAudioPCMBuffer( + pcmFormat: outputFormat, + frameCapacity: frameCapacity + ) else { + throw ApplicationError("Failed to create output buffer") + } + + var error: NSError? + convert(to: output, error: &error) { _, status in + status.pointee = .haveData + return buffer + } + + if let error { + throw ApplicationError("Failed to convert buffer: \(error.localizedDescription)") + } + + return output + } +} diff --git a/firebaseai/FirebaseAIExample/Shared/Audio/AudioController.swift b/firebaseai/FirebaseAIExample/Shared/Audio/AudioController.swift new file mode 100644 index 000000000..c966d5472 --- /dev/null +++ b/firebaseai/FirebaseAIExample/Shared/Audio/AudioController.swift @@ -0,0 +1,244 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import AVFoundation +import OSLog + +/// Controls audio playback and recording. +actor AudioController { + private var logger = Logger(subsystem: Bundle.main.bundleIdentifier!, category: "generative-ai") + + /// Data processed from the microphone. + private let microphoneData: AsyncStream + private let microphoneDataQueue: AsyncStream.Continuation + private var audioPlayer: AudioPlayer? + private var audioEngine: AVAudioEngine? + private var microphone: Microphone? + private var listenTask: Task? + private var routeTask: Task? + + /// Port types that are considered "headphones" for our use-case. + /// + /// More specifically, airpods are considered bluetooth ports instead of headphones, so + /// this array is necessary. + private let headphonePortTypes: [AVAudioSession.Port] = [ + .headphones, + .bluetoothA2DP, + .bluetoothLE, + .bluetoothHFP, + ] + + private let modelInputFormat: AVAudioFormat + private let modelOutputFormat: AVAudioFormat + + private var stopped = false + + public init() async throws { + let session = AVAudioSession.sharedInstance() + try session.setCategory( + .playAndRecord, + mode: .voiceChat, + options: [.defaultToSpeaker, .allowBluetoothHFP, .duckOthers, + .interruptSpokenAudioAndMixWithOthers, .allowBluetoothA2DP] + ) + try session.setPreferredIOBufferDuration(0.01) + try session.setActive(true) + + guard let modelInputFormat = AVAudioFormat( + commonFormat: .pcmFormatInt16, + sampleRate: 16000, + channels: 1, + interleaved: false + ) else { + throw ApplicationError("Failed to create model input format") + } + + guard let modelOutputFormat = AVAudioFormat( + commonFormat: .pcmFormatInt16, + sampleRate: 24000, + channels: 1, + interleaved: true + ) else { + throw ApplicationError("Failed to create model output format") + } + + self.modelInputFormat = modelInputFormat + self.modelOutputFormat = modelOutputFormat + + let (processedData, dataQueue) = AsyncStream.makeStream() + microphoneData = processedData + microphoneDataQueue = dataQueue + + listenForRouteChange() + } + + /// Kicks off audio processing, and returns a stream of recorded microphone audio data. + public func listenToMic() async throws -> AsyncStream { + try await spawnAudioProcessingThread() + return microphoneData + } + + /// Permanently stop all audio processing. + /// + /// To start again, create a new instance of ``AudioController``. + public func stop() async { + stopped = true + await stopListeningAndPlayback() + microphoneDataQueue.finish() + routeTask?.cancel() + } + + /// Queues audio for playback. + public func playAudio(audio: Data) async throws { + try await audioPlayer?.play(audio) + } + + /// Interrupts and clears the currently pending audio playback queue. + public func interrupt() async { + await audioPlayer?.interrupt() + } + + private func stopListeningAndPlayback() async { + listenTask?.cancel() + // audio engine needs to be stopped before disconnecting nodes + audioEngine?.pause() + audioEngine?.stop() + if let audioEngine { + do { + // the VP IO leaves behind artifacts, so we need to disable it to properly clean up + if audioEngine.inputNode.isVoiceProcessingEnabled { + try audioEngine.inputNode.setVoiceProcessingEnabled(false) + } + } catch { + logger.error("Failed to disable voice processing: \(error.localizedDescription)") + } + } + await microphone?.stop() + await audioPlayer?.stop() + } + + /// Start audio processing functionality. + /// + /// Will stop any currently running audio processing. + /// + /// This function is also called whenever the input or output device change, + /// so it needs to be able to setup the audio processing without disrupting + /// the consumer of the microphone data. + private func spawnAudioProcessingThread() async throws { + if stopped { return } + + await stopListeningAndPlayback() + + // we need to start a new audio engine if the output device changed, so we might as well do it regardless + let audioEngine = AVAudioEngine() + self.audioEngine = audioEngine + + try await setupAudioPlayback(audioEngine) + try setupVoiceProcessing(audioEngine) + + do { + try audioEngine.start() + } catch { + throw ApplicationError("Failed to start audio engine: \(error.localizedDescription)") + } + + try await setupMicrophone(audioEngine) + } + + private func setupMicrophone(_ engine: AVAudioEngine) async throws { + let microphone = await Microphone(engine: engine) + self.microphone = microphone + + await microphone.start() + + let micFormat = engine.inputNode.outputFormat(forBus: 0) + guard let converter = AVAudioConverter(from: micFormat, to: modelInputFormat) else { + throw ApplicationError("Failed to create audio converter") + } + + listenTask = Task { + for await audio in await microphone.audio { + try microphoneDataQueue.yield(await converter.convertBuffer(audio)) + } + } + } + + private func setupAudioPlayback(_ engine: AVAudioEngine) async throws { + let playbackFormat = engine.outputNode.outputFormat(forBus: 0) + audioPlayer = try await AudioPlayer( + engine: engine, + inputFormat: modelOutputFormat, + outputFormat: playbackFormat + ) + } + + /// Sets up the voice processing I/O, if it needs to be setup. + private func setupVoiceProcessing(_ engine: AVAudioEngine) throws { + do { + let headphonesConnected = headphonesConnected() + let vpEnabled = engine.inputNode.isVoiceProcessingEnabled + + if !vpEnabled, !headphonesConnected { + try engine.inputNode.setVoiceProcessingEnabled(true) + } else if headphonesConnected, vpEnabled { + // bluetooth headphones have integrated AEC, so if we don't disable VP IO we get muted output + try engine.inputNode.setVoiceProcessingEnabled(false) + } + } catch { + throw ApplicationError("Failed to enable voice processing: \(error.localizedDescription)") + } + } + + /// When the output device changes, ensure the audio playback and recording classes are properly restarted. + private func listenForRouteChange() { + routeTask?.cancel() + routeTask = Task { [weak self] in + for await notification in NotificationCenter.default.notifications( + named: AVAudioSession.routeChangeNotification + ) { + await self?.handleRouteChange(notification: notification) + } + } + } + + private func handleRouteChange(notification: Notification) { + guard let userInfo = notification.userInfo, + let reasonValue = userInfo[AVAudioSessionRouteChangeReasonKey] as? UInt, + let reason = AVAudioSession.RouteChangeReason(rawValue: reasonValue) else { + return + } + + switch reason { + case .newDeviceAvailable, .oldDeviceUnavailable: + Task { @MainActor in + do { + try await spawnAudioProcessingThread() + } catch { + await logger + .error("Failed to spawn audio processing thread: \(String(describing: error))") + } + } + default: () + } + } + + /// Checks if the current audio route is a a headphone. + /// + /// This includes airpods. + private func headphonesConnected() -> Bool { + return AVAudioSession.sharedInstance().currentRoute.outputs.contains { + headphonePortTypes.contains($0.portType) + } + } +} diff --git a/firebaseai/FirebaseAIExample/Shared/Audio/AudioPlayer.swift b/firebaseai/FirebaseAIExample/Shared/Audio/AudioPlayer.swift new file mode 100644 index 000000000..ed224cf48 --- /dev/null +++ b/firebaseai/FirebaseAIExample/Shared/Audio/AudioPlayer.swift @@ -0,0 +1,88 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import AVFoundation +import Foundation +import OSLog + +/// Plays back audio through the primary output device. +class AudioPlayer { + private var logger = Logger(subsystem: Bundle.main.bundleIdentifier!, category: "generative-ai") + + private let engine: AVAudioEngine + private let inputFormat: AVAudioFormat + private let outputFormat: AVAudioFormat + private let playbackNode: AVAudioPlayerNode + private var formatConverter: AVAudioConverter + + init(engine: AVAudioEngine, inputFormat: AVAudioFormat, outputFormat: AVAudioFormat) throws { + self.engine = engine + + guard let formatConverter = AVAudioConverter(from: inputFormat, to: outputFormat) else { + throw ApplicationError("Failed to create the audio converter") + } + + let playbackNode = AVAudioPlayerNode() + + engine.attach(playbackNode) + engine.connect(playbackNode, to: engine.mainMixerNode, format: outputFormat) + + self.inputFormat = inputFormat + self.outputFormat = outputFormat + self.formatConverter = formatConverter + self.playbackNode = playbackNode + } + + deinit { + stop() + } + + /// Queue audio to be played through the output device. + /// + /// Note that in a real app, you'd ideally schedule the data before converting it, and then mark data as consumed after its been played + /// back. That way, if the audio route changes during playback, you can requeue the buffer on the new output device. + /// + /// For the sake of simplicity, that is not implemented here; a route change will prevent the currently queued conversation from + /// being played through the output device. + public func play(_ audio: Data) throws { + guard engine.isRunning else { + logger.warning("Audio engine needs to be running to play audio.") + return + } + + guard let inputBuffer = try AVAudioPCMBuffer.fromInterleavedData( + data: audio, + format: inputFormat + ) else { + throw ApplicationError("Failed to create input buffer for playback") + } + + let buffer = try formatConverter.convertBuffer(inputBuffer) + + playbackNode.scheduleBuffer(buffer, at: nil) + playbackNode.play() + } + + /// Stops the current audio playing. + public func interrupt() { + playbackNode.stop() + } + + /// Permanently stop all audio playback. + public func stop() { + interrupt() + engine.disconnectNodeInput(playbackNode) + engine.disconnectNodeOutput(playbackNode) + } +} diff --git a/firebaseai/FirebaseAIExample/Shared/Audio/Microphone.swift b/firebaseai/FirebaseAIExample/Shared/Audio/Microphone.swift new file mode 100644 index 000000000..7d182bad6 --- /dev/null +++ b/firebaseai/FirebaseAIExample/Shared/Audio/Microphone.swift @@ -0,0 +1,62 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import Foundation +import AVFoundation + +/// Microphone bindings using Apple's AudioEngine API. +class Microphone { + /// Data recorded from the microphone. + public let audio: AsyncStream + private let audioQueue: AsyncStream.Continuation + + private let inputNode: AVAudioInputNode + private let audioEngine: AVAudioEngine + + private var isRunning = false + + init(engine: AVAudioEngine) { + let (audio, audioQueue) = AsyncStream.makeStream() + + self.audio = audio + self.audioQueue = audioQueue + inputNode = engine.inputNode + audioEngine = engine + } + + deinit { + stop() + } + + public func start() { + guard !isRunning else { return } + isRunning = true + + // 50ms buffer size for balancing latency and cpu overhead + let targetBufferSize = UInt32(inputNode.outputFormat(forBus: 0).sampleRate / 20) + inputNode + .installTap(onBus: 0, bufferSize: targetBufferSize, format: nil) { [weak self] buffer, _ in + guard let self else { return } + audioQueue.yield(buffer) + } + } + + public func stop() { + audioQueue.finish() + if isRunning { + isRunning = false + inputNode.removeTap(onBus: 0) + } + } +} diff --git a/firebaseai/FirebaseAIExample/Shared/Models/Sample.swift b/firebaseai/FirebaseAIExample/Shared/Models/Sample.swift index 0e59334c9..954a05872 100644 --- a/firebaseai/FirebaseAIExample/Shared/Models/Sample.swift +++ b/firebaseai/FirebaseAIExample/Shared/Models/Sample.swift @@ -31,7 +31,9 @@ public struct Sample: Identifiable { public let systemInstruction: ModelContent? public let tools: [Tool]? public let generationConfig: GenerationConfig? + public let liveGenerationConfig: LiveGenerationConfig? public let fileDataParts: [FileDataPart]? + public let tip: InlineTip? public init(title: String, description: String, @@ -43,7 +45,9 @@ public struct Sample: Identifiable { systemInstruction: ModelContent? = nil, tools: [Tool]? = nil, generationConfig: GenerationConfig? = nil, - fileDataParts: [FileDataPart]? = nil) { + liveGenerationConfig: LiveGenerationConfig? = nil, + fileDataParts: [FileDataPart]? = nil, + tip: InlineTip? = nil) { self.title = title self.description = description self.useCases = useCases @@ -54,7 +58,9 @@ public struct Sample: Identifiable { self.systemInstruction = systemInstruction self.tools = tools self.generationConfig = generationConfig + self.liveGenerationConfig = liveGenerationConfig self.fileDataParts = fileDataParts + self.tip = tip } } @@ -261,6 +267,48 @@ extension Sample { initialPrompt: "What's the weather in Chicago this weekend?", tools: [.googleSearch()] ), + // Live API + Sample( + title: "Live native audio", + description: "Use the Live API to talk with the model via native audio.", + useCases: [.audio], + navRoute: "LiveScreen", + liveGenerationConfig: LiveGenerationConfig( + responseModalities: [.audio], + speech: SpeechConfig(voiceName: "Zephyr", languageCode: "en-US"), + outputAudioTranscription: AudioTranscriptionConfig() + ) + ), + Sample( + title: "Live function calling", + description: "Use function calling with the Live API to ask the model to change the background color.", + useCases: [.functionCalling, .audio], + navRoute: "LiveScreen", + tools: [ + .functionDeclarations([ + FunctionDeclaration( + name: "changeBackgroundColor", + description: "Changes the background color to the specified hex color.", + parameters: [ + "color": .string( + description: "Hex code of the color to change to. (eg, #F54927)" + ), + ], + ), + FunctionDeclaration( + name: "clearBackgroundColor", + description: "Removes the background color.", + parameters: [:] + ), + ]), + ], + liveGenerationConfig: LiveGenerationConfig( + responseModalities: [.audio], + speech: SpeechConfig(voiceName: "Zephyr", languageCode: "en-US"), + outputAudioTranscription: AudioTranscriptionConfig() + ), + tip: InlineTip(text: "Try asking the model to change the background color"), + ), ] public static var sample = samples[0] diff --git a/firebaseai/FirebaseAIExample/Shared/Util/Color+Hex.swift b/firebaseai/FirebaseAIExample/Shared/Util/Color+Hex.swift new file mode 100644 index 000000000..74e0d1513 --- /dev/null +++ b/firebaseai/FirebaseAIExample/Shared/Util/Color+Hex.swift @@ -0,0 +1,44 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import SwiftUI + +extension Color { + /// Creates a new `Color` instance from a hex string. + /// + /// Supports both RGB and RGBA hex strings. + init?(hex: String) { + let hex = hex.replacingOccurrences(of: "#", with: "").uppercased() + + var rgb: UInt64 = 0 + guard Scanner(string: hex).scanHexInt64(&rgb) else { return nil } + + var r: CGFloat = 0, g: CGFloat = 0, b: CGFloat = 0, a: CGFloat = 1 + + if hex.count == 6 { + r = CGFloat((rgb & 0xFF0000) >> 16) / 255.0 + g = CGFloat((rgb & 0x00FF00) >> 8) / 255.0 + b = CGFloat(rgb & 0x0000FF) / 255.0 + } else if hex.count == 8 { + r = CGFloat((rgb & 0xFF00_0000) >> 24) / 255.0 + g = CGFloat((rgb & 0x00FF_0000) >> 16) / 255.0 + b = CGFloat((rgb & 0x0000_FF00) >> 8) / 255.0 + a = CGFloat(rgb & 0x0000_00FF) / 255.0 + } else { + return nil + } + + self.init(red: r, green: g, blue: b, opacity: a) + } +} diff --git a/firebaseai/FirebaseAIExample/Shared/Views/InlineTip.swift b/firebaseai/FirebaseAIExample/Shared/Views/InlineTip.swift new file mode 100644 index 000000000..ff72d9b5e --- /dev/null +++ b/firebaseai/FirebaseAIExample/Shared/Views/InlineTip.swift @@ -0,0 +1,57 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import TipKit + +public struct InlineTip: Tip { + private let _text: String + private let _title: String + private let _icon: Image + + public init(text: String, title: String = "Tip", icon: Image = Image(systemName: "info.circle")) { + _text = text + _title = title + _icon = icon + } + + public var title: Text { + Text(_title) + } + + public var message: Text? { + Text(_text) + } + + public var image: Image? { + _icon + } +} + +#Preview { + TipView(InlineTip(text: "Try asking the model to change the background color")) + TipView( + InlineTip( + text: "You shouldn't do that.", + title: "Warning", + icon: Image(systemName: "exclamationmark.circle") + ) + ) + TipView( + InlineTip( + text: "Oops, try again!", + title: "Error", + icon: Image(systemName: "x.circle") + ) + ) +}