From 192a42092e49722e4b2a1ea4e56994dd548fe397 Mon Sep 17 00:00:00 2001 From: Lee-daeho Date: Mon, 16 Mar 2026 14:53:38 -0600 Subject: [PATCH 01/68] feat(android): port CameraAccessAndroid code updates for upstream PR --- .../cameraaccess/MainActivity.kt | 15 + .../cameraaccess/gemini/AudioManager.kt | 172 +++++++++-- .../cameraaccess/gemini/GeminiLiveService.kt | 67 +++- .../gemini/GeminiSessionViewModel.kt | 286 +++++++++++++++--- .../cameraaccess/net/NetworkTypeMonitor.kt | 78 +++++ .../cameraaccess/openclaw/OpenClawBridge.kt | 150 +++++++-- .../cameraaccess/openclaw/ToolCallRouter.kt | 90 +++++- .../cameraaccess/settings/SettingsManager.kt | 143 +++++++-- .../cameraaccess/stream/StreamViewModel.kt | 134 ++++++-- .../cameraaccess/stream/StreamingService.kt | 215 ++++++------- .../cameraaccess/ui/ControlsRow.kt | 27 +- .../cameraaccess/ui/SettingsScreen.kt | 64 ++-- .../cameraaccess/ui/StreamScreen.kt | 57 +++- 13 files changed, 1185 insertions(+), 313 deletions(-) create mode 100644 samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/net/NetworkTypeMonitor.kt diff --git a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/MainActivity.kt b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/MainActivity.kt index 7d3f20b7..9175c2ae 100644 --- a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/MainActivity.kt +++ b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/MainActivity.kt @@ -87,6 +87,21 @@ class MainActivity : ComponentActivity() { } } + override fun onPause() { + super.onPause() + android.util.Log.d("MainActivity", "BGTEST onPause") + } + + override fun onStop() { + super.onStop() + android.util.Log.d("MainActivity", "BGTEST onStop") + } + + override fun onResume() { + super.onResume() + android.util.Log.d("MainActivity", "BGTEST onResume") + } + fun checkPermissions(onPermissionsGranted: () -> Unit) { registerForActivityResult(RequestMultiplePermissions()) { permissionsResult -> val granted = permissionsResult.entries.all { it.value } diff --git a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/AudioManager.kt b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/AudioManager.kt index fb6268ee..5cc36b7e 100644 --- a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/AudioManager.kt +++ b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/AudioManager.kt @@ -1,15 +1,18 @@ package com.meta.wearable.dat.externalsampleapps.cameraaccess.gemini import android.annotation.SuppressLint +import android.content.Context import android.media.AudioAttributes +import android.media.AudioDeviceInfo import android.media.AudioFormat import android.media.AudioRecord import android.media.AudioTrack import android.media.MediaRecorder +import android.os.Build import android.util.Log import java.io.ByteArrayOutputStream -class AudioManager { +class AudioManager(private val appContext: Context) { companion object { private const val TAG = "AudioManager" private const val MIN_SEND_BYTES = 3200 // 100ms at 16kHz mono Int16 = 1600 frames * 2 bytes @@ -20,15 +23,78 @@ class AudioManager { private var audioRecord: AudioRecord? = null private var audioTrack: AudioTrack? = null private var captureThread: Thread? = null + @Volatile private var isCapturing = false + + @Volatile + private var micEnabled = true + private val accumulatedData = ByteArrayOutputStream() private val accumulateLock = Any() + private var commDeviceSet = false + private var scoStarted = false + private var preferredBtDevice: AudioDeviceInfo? = null + + /** + * "Mic mute" without tearing down the whole Gemini session. + * + * - enabled=false: we still keep AudioRecord running (so routing stays stable), + * but we DO NOT forward audio chunks to Gemini. + * - when toggling, we clear any buffered audio to avoid "catch-up" sending. + */ + fun setMicEnabled(enabled: Boolean) { + micEnabled = enabled + synchronized(accumulateLock) { + accumulatedData.reset() + } + Log.d(TAG, "Mic enabled = $micEnabled") + } + + fun isMicEnabled(): Boolean = micEnabled + @SuppressLint("MissingPermission") fun startCapture() { if (isCapturing) return + val sysAm = appContext.getSystemService(Context.AUDIO_SERVICE) as android.media.AudioManager + + // ✅ BT 마이크가 있으면 그걸 우선 사용, 없으면 폰 마이크로 폴백 + preferredBtDevice = findBluetoothInputDeviceOrNull() + + if (preferredBtDevice != null) { + // 통화 모드로 전환 (SCO 입력 안정화에 도움) + sysAm.mode = android.media.AudioManager.MODE_IN_COMMUNICATION + + // Android 12+ : communication device 선택 시도 (실패해도 폴백 가능) + if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.S) { + try { + commDeviceSet = sysAm.setCommunicationDevice(preferredBtDevice!!) + Log.d(TAG, "setCommunicationDevice(BT) = $commDeviceSet, dev=${preferredBtDevice?.productName}") + } catch (t: Throwable) { + commDeviceSet = false + Log.w(TAG, "setCommunicationDevice failed: ${t.message}") + } + } + + // 구형/일부 기기 fallback: SCO 시작 (BT 없으면 시작하지 않음) + try { + sysAm.startBluetoothSco() + sysAm.isBluetoothScoOn = true + scoStarted = true + Log.d(TAG, "Bluetooth SCO started") + } catch (t: Throwable) { + scoStarted = false + Log.w(TAG, "startBluetoothSco failed: ${t.message}") + } + } else { + // ✅ BT가 없으면 강제 라우팅/모드 변경 안 함 (그냥 폰 마이크) + commDeviceSet = false + scoStarted = false + Log.d(TAG, "No BT mic -> fallback to phone mic") + } + val bufferSize = AudioRecord.getMinBufferSize( GeminiConfig.INPUT_AUDIO_SAMPLE_RATE, AudioFormat.CHANNEL_IN_MONO, @@ -43,6 +109,18 @@ class AudioManager { bufferSize ) + preferredBtDevice?.let { dev -> + try { + val ok = audioRecord?.setPreferredDevice(dev) == true + Log.d(TAG, "AudioRecord.setPreferredDevice(BT) ok=$ok dev=${dev.productName}") + } catch (t: Throwable) { + Log.w(TAG, "setPreferredDevice failed: ${t.message}") + } + } + + val routed = audioRecord?.routedDevice + Log.d(TAG, "AudioRecord routedDevice: type=${routed?.type} name=${routed?.productName}") + audioTrack = AudioTrack.Builder() .setAudioAttributes( AudioAttributes.Builder() @@ -75,31 +153,59 @@ class AudioManager { accumulatedData.reset() } - captureThread = Thread({ - val buffer = ByteArray(bufferSize) - var tapCount = 0 - while (isCapturing) { - val read = audioRecord?.read(buffer, 0, buffer.size) ?: break - if (read > 0) { - tapCount++ - synchronized(accumulateLock) { - accumulatedData.write(buffer, 0, read) - if (accumulatedData.size() >= MIN_SEND_BYTES) { - val chunk = accumulatedData.toByteArray() - accumulatedData.reset() - if (tapCount <= 3) { - Log.d(TAG, "Sending chunk: ${chunk.size} bytes (~${chunk.size / 32}ms)") + captureThread = Thread( + { + val buffer = ByteArray(bufferSize) + var tapCount = 0 + while (isCapturing) { + val read = audioRecord?.read(buffer, 0, buffer.size) ?: break + if (read > 0) { + if (!micEnabled) { + // Mic muted: discard data and clear any partial buffer. + synchronized(accumulateLock) { + accumulatedData.reset() + } + continue + } + + tapCount++ + synchronized(accumulateLock) { + accumulatedData.write(buffer, 0, read) + if (accumulatedData.size() >= MIN_SEND_BYTES) { + val chunk = accumulatedData.toByteArray() + accumulatedData.reset() + if (tapCount <= 3) { + Log.d(TAG, "Sending chunk: ${chunk.size} bytes (~${chunk.size / 32}ms)") + } + onAudioCaptured?.invoke(chunk) } - onAudioCaptured?.invoke(chunk) } } } - } - }, "audio-capture").also { it.start() } + }, + "audio-capture" + ).also { it.start() } Log.d(TAG, "Audio capture started (16kHz mono PCM16)") } + private fun findBluetoothInputDeviceOrNull(): AudioDeviceInfo? { + val sysAm = appContext.getSystemService(Context.AUDIO_SERVICE) as android.media.AudioManager + + // 입력 디바이스 목록에서 BT 계열 우선 탐색 + val inputs = sysAm.getDevices(android.media.AudioManager.GET_DEVICES_INPUTS) + + // 1순위: SCO (통화용) + inputs.firstOrNull { it.type == AudioDeviceInfo.TYPE_BLUETOOTH_SCO }?.let { return it } + + // 2순위: BLE Headset (기기/OS에 따라 여기로 잡히는 경우가 있음) + if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.S) { + inputs.firstOrNull { it.type == AudioDeviceInfo.TYPE_BLE_HEADSET }?.let { return it } + } + + return null + } + fun playAudio(data: ByteArray) { if (!isCapturing || data.isEmpty()) return audioTrack?.write(data, 0, data.size) @@ -120,10 +226,12 @@ class AudioManager { // Flush remaining accumulated audio synchronized(accumulateLock) { - if (accumulatedData.size() > 0) { + if (micEnabled && accumulatedData.size() > 0) { val chunk = accumulatedData.toByteArray() accumulatedData.reset() onAudioCaptured?.invoke(chunk) + } else { + accumulatedData.reset() } } @@ -135,6 +243,30 @@ class AudioManager { audioTrack?.release() audioTrack = null + val sysAm = appContext.getSystemService(Context.AUDIO_SERVICE) as android.media.AudioManager + + if (scoStarted) { + try { + sysAm.stopBluetoothSco() + sysAm.isBluetoothScoOn = false + } catch (_: Throwable) { + } + scoStarted = false + } + + if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.S && commDeviceSet) { + try { + sysAm.clearCommunicationDevice() + } catch (_: Throwable) { + } + commDeviceSet = false + } + + preferredBtDevice = null + + // 필요하면 모드 원복 (기기 따라 유지해도 되지만 안전하게 NORMAL 추천) + sysAm.mode = android.media.AudioManager.MODE_NORMAL + Log.d(TAG, "Audio capture stopped") } -} +} \ No newline at end of file diff --git a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/GeminiLiveService.kt b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/GeminiLiveService.kt index d046d306..46ebc769 100644 --- a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/GeminiLiveService.kt +++ b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/GeminiLiveService.kt @@ -42,6 +42,10 @@ class GeminiLiveService { private val _isModelSpeaking = MutableStateFlow(false) val isModelSpeaking: StateFlow = _isModelSpeaking.asStateFlow() + // Debug: last disconnect/failure detail + private val _lastDisconnectInfo = MutableStateFlow(null) + val lastDisconnectInfo: StateFlow = _lastDisconnectInfo.asStateFlow() + var onAudioReceived: ((ByteArray) -> Unit)? = null var onTurnComplete: (() -> Unit)? = null var onInterrupted: (() -> Unit)? = null @@ -60,15 +64,19 @@ class GeminiLiveService { private var connectCallback: ((Boolean) -> Unit)? = null private var timeoutTimer: Timer? = null + // NOTE: user said pingInterval already increased; keep your current value here. + // If you want, change 10 -> 30/60. private val client = OkHttpClient.Builder() .readTimeout(0, TimeUnit.MILLISECONDS) .pingInterval(10, TimeUnit.SECONDS) + .retryOnConnectionFailure(true) .build() fun connect(callback: (Boolean) -> Unit) { val url = GeminiConfig.websocketURL() if (url == null) { _connectionState.value = GeminiConnectionState.Error("No API key configured") + _lastDisconnectInfo.value = "No API key configured" callback(false) return } @@ -93,24 +101,40 @@ class GeminiLiveService { } override fun onFailure(webSocket: WebSocket, t: Throwable, response: Response?) { - val msg = t.message ?: "Unknown error" - Log.e(TAG, "WebSocket failure: $msg") - _connectionState.value = GeminiConnectionState.Error(msg) + val detail = buildString { + append("WS failure: ") + append(t::class.java.name) + append(": ") + append(t.message ?: "no-message") + if (response != null) { + append(" | HTTP ") + append(response.code) + append(" ") + append(response.message) + } + } + + Log.e(TAG, detail, t) + _lastDisconnectInfo.value = detail + _connectionState.value = GeminiConnectionState.Error(detail) _isModelSpeaking.value = false resolveConnect(false) - onDisconnected?.invoke(msg) + onDisconnected?.invoke(detail) } override fun onClosing(webSocket: WebSocket, code: Int, reason: String) { - Log.d(TAG, "WebSocket closing: $code $reason") + val detail = "WS closing: code=$code reason=$reason" + Log.w(TAG, detail) + _lastDisconnectInfo.value = detail _connectionState.value = GeminiConnectionState.Disconnected _isModelSpeaking.value = false resolveConnect(false) - onDisconnected?.invoke("Connection closed (code $code: $reason)") + onDisconnected?.invoke(detail) } override fun onClosed(webSocket: WebSocket, code: Int, reason: String) { Log.d(TAG, "WebSocket closed: $code $reason") + _lastDisconnectInfo.value = "WS closed: code=$code reason=$reason" _connectionState.value = GeminiConnectionState.Disconnected _isModelSpeaking.value = false } @@ -122,9 +146,13 @@ class GeminiLiveService { override fun run() { if (_connectionState.value == GeminiConnectionState.Connecting || _connectionState.value == GeminiConnectionState.SettingUp) { - Log.e(TAG, "Connection timed out") - _connectionState.value = GeminiConnectionState.Error("Connection timed out") + val detail = "WS connect/setup timed out (15s)" + Log.e(TAG, detail) + _lastDisconnectInfo.value = detail + _connectionState.value = GeminiConnectionState.Error(detail) + webSocket?.cancel() resolveConnect(false) + onDisconnected?.invoke(detail) } } }, 15000) @@ -136,8 +164,8 @@ class GeminiLiveService { timeoutTimer = null webSocket?.close(1000, null) webSocket = null - onToolCall = null - onToolCallCancellation = null +// onToolCall = null +// onToolCallCancellation = null _connectionState.value = GeminiConnectionState.Disconnected _isModelSpeaking.value = false resolveConnect(false) @@ -155,6 +183,7 @@ class GeminiLiveService { }) }) } + Log.d("GeminiWS", "SEND_AUDIO_CHUNK") webSocket?.send(json.toString()) } } @@ -173,12 +202,14 @@ class GeminiLiveService { }) }) } + Log.d("GeminiWS", "SEND_VIDEO_FRAME") webSocket?.send(json.toString()) } } fun sendToolResponse(response: JSONObject) { sendExecutor.execute { + Log.d("GeminiWS", "SEND_TOOL: " + response.toString().take(300)) webSocket?.send(response.toString()) } } @@ -249,6 +280,7 @@ class GeminiLiveService { }) } // Send directly (not via sendExecutor) to ensure it's the first message + Log.d("GeminiWS", "SEND_SETUP") webSocket?.send(setup.toString()) } @@ -266,10 +298,21 @@ class GeminiLiveService { // GoAway if (json.has("goAway")) { val goAway = json.getJSONObject("goAway") - val seconds = goAway.optJSONObject("timeLeft")?.optInt("seconds", 0) ?: 0 + val detail = goAway.optString("detail", "server requested disconnect") + Log.w(TAG, "Gemini goAway: $detail") + + val ws = webSocket + webSocket = null + + try { + ws?.close(1000, detail) + } catch (e: Exception) { + Log.w(TAG, "Error closing websocket on goAway", e) + } + _connectionState.value = GeminiConnectionState.Disconnected _isModelSpeaking.value = false - onDisconnected?.invoke("Server closing (time left: ${seconds}s)") + onDisconnected?.invoke(detail) return } diff --git a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/GeminiSessionViewModel.kt b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/GeminiSessionViewModel.kt index 31567442..808bd1fe 100644 --- a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/GeminiSessionViewModel.kt +++ b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/GeminiSessionViewModel.kt @@ -1,15 +1,18 @@ +// app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/GeminiSessionViewModel.kt package com.meta.wearable.dat.externalsampleapps.cameraaccess.gemini +import android.app.Application import android.graphics.Bitmap -import android.util.Log -import androidx.lifecycle.ViewModel +import androidx.lifecycle.AndroidViewModel import androidx.lifecycle.viewModelScope +import com.meta.wearable.dat.externalsampleapps.cameraaccess.net.NetworkType +import com.meta.wearable.dat.externalsampleapps.cameraaccess.net.NetworkTypeMonitor import com.meta.wearable.dat.externalsampleapps.cameraaccess.openclaw.OpenClawBridge -import com.meta.wearable.dat.externalsampleapps.cameraaccess.openclaw.OpenClawEventClient -import com.meta.wearable.dat.externalsampleapps.cameraaccess.settings.SettingsManager import com.meta.wearable.dat.externalsampleapps.cameraaccess.openclaw.OpenClawConnectionState +import com.meta.wearable.dat.externalsampleapps.cameraaccess.openclaw.OpenClawEventClient import com.meta.wearable.dat.externalsampleapps.cameraaccess.openclaw.ToolCallRouter import com.meta.wearable.dat.externalsampleapps.cameraaccess.openclaw.ToolCallStatus +import com.meta.wearable.dat.externalsampleapps.cameraaccess.settings.SettingsManager import com.meta.wearable.dat.externalsampleapps.cameraaccess.stream.StreamingMode import kotlinx.coroutines.Job import kotlinx.coroutines.delay @@ -23,47 +26,134 @@ data class GeminiUiState( val isGeminiActive: Boolean = false, val connectionState: GeminiConnectionState = GeminiConnectionState.Disconnected, val isModelSpeaking: Boolean = false, + val isMicEnabled: Boolean = true, val errorMessage: String? = null, val userTranscript: String = "", val aiTranscript: String = "", val toolCallStatus: ToolCallStatus = ToolCallStatus.Idle, val openClawConnectionState: OpenClawConnectionState = OpenClawConnectionState.NotConfigured, + val networkType: NetworkType = NetworkType.NONE, ) -class GeminiSessionViewModel : ViewModel() { - companion object { - private const val TAG = "GeminiSessionVM" - } +class GeminiSessionViewModel(app: Application) : AndroidViewModel(app) { private val _uiState = MutableStateFlow(GeminiUiState()) val uiState: StateFlow = _uiState.asStateFlow() private val geminiService = GeminiLiveService() private val openClawBridge = OpenClawBridge() - private var toolCallRouter: ToolCallRouter? = null - private val audioManager = AudioManager() private val eventClient = OpenClawEventClient() + private var toolCallRouter: ToolCallRouter? = null + private val audioManager = AudioManager(getApplication().applicationContext) private var lastVideoFrameTime: Long = 0 + + @Volatile private var latestFrameForToolCall: Bitmap? = null + @Volatile private var lastUserOriginalInstruction: String? = null + private var stateObservationJob: Job? = null + private var userStopped = false + private var reconnectJob: Job? = null + private var reconnectAttempts = 0 + private val maxReconnectAttempts = 6 + var streamingMode: StreamingMode = StreamingMode.GLASSES + private val netMonitor = NetworkTypeMonitor(app) + private var netMonitorJob: Job? = null + + private val videoIntervalWifiMs = 1000L + private val videoIntervalCellularMs = 4000L + private val videoIntervalOtherMs = 2000L + + // execute 시작 시 mic 상태를 저장해뒀다가 끝나면 복원 + private var micStateBeforeExecution: Boolean? = null + private var micAutoMutedForExecution = false + + private fun isToolExecuting(status: ToolCallStatus): Boolean { + return status is ToolCallStatus.Executing + } + + private fun syncMicWithToolExecution(status: ToolCallStatus) { + val executing = isToolExecuting(status) + + if (executing) { + if (!micAutoMutedForExecution) { + micStateBeforeExecution = _uiState.value.isMicEnabled + + if (_uiState.value.isMicEnabled) { + _uiState.value = _uiState.value.copy(isMicEnabled = false) + audioManager.setMicEnabled(false) + } + + micAutoMutedForExecution = true + } + return + } + + if (micAutoMutedForExecution) { + val restoreMic = micStateBeforeExecution ?: true + _uiState.value = _uiState.value.copy(isMicEnabled = restoreMic) + audioManager.setMicEnabled(restoreMic) + + micStateBeforeExecution = null + micAutoMutedForExecution = false + } + } + + fun toggleMic() { + if (!_uiState.value.isGeminiActive) return + if (isToolExecuting(_uiState.value.toolCallStatus)) return + + val newEnabled = !_uiState.value.isMicEnabled + _uiState.value = _uiState.value.copy(isMicEnabled = newEnabled) + audioManager.setMicEnabled(newEnabled) + } + + fun setMicEnabled(enabled: Boolean) { + if (!_uiState.value.isGeminiActive) return + if (isToolExecuting(_uiState.value.toolCallStatus)) return + + _uiState.value = _uiState.value.copy(isMicEnabled = enabled) + audioManager.setMicEnabled(enabled) + } + fun startSession() { if (_uiState.value.isGeminiActive) return if (!GeminiConfig.isConfigured) { _uiState.value = _uiState.value.copy( - errorMessage = "Gemini API key not configured. Open Settings and add your key from https://aistudio.google.com/apikey" + errorMessage = "Gemini API key not configured. Open Settings and add your key." ) return } - _uiState.value = _uiState.value.copy(isGeminiActive = true) + userStopped = false + reconnectAttempts = 0 + reconnectJob?.cancel() + reconnectJob = null + micStateBeforeExecution = null + micAutoMutedForExecution = false + + // Start with mic enabled by default + _uiState.value = _uiState.value.copy(isGeminiActive = true, isMicEnabled = true) + audioManager.setMicEnabled(true) + + netMonitor.start() + netMonitorJob?.cancel() + netMonitorJob = viewModelScope.launch { + netMonitor.networkType.collect { t -> + _uiState.value = _uiState.value.copy(networkType = t) + } + } - // Wire audio callbacks audioManager.onAudioCaptured = lambda@{ data -> - // Phone mode: mute mic while model speaks to prevent echo + // execute 중에는 mic 입력을 Gemini로 보내지 않음 + if (isToolExecuting(_uiState.value.toolCallStatus)) return@lambda + + // streamingMode == PHONE 일때 모델이 말하는동안에는 입력을 막음(기존 로직) if (streamingMode == StreamingMode.PHONE && geminiService.isModelSpeaking.value) return@lambda + geminiService.sendAudio(data) } @@ -77,11 +167,18 @@ class GeminiSessionViewModel : ViewModel() { geminiService.onTurnComplete = { _uiState.value = _uiState.value.copy(userTranscript = "") + // turn이 끝나면 원본 발화도 "이전 턴"으로 굳음. (원하면 여기서 별도 저장/rotate 가능) } - geminiService.onInputTranscription = { text -> + // execute 중에는 입력 전사도 누적하지 않음 + geminiService.onInputTranscription = input@{ text -> + if (isToolExecuting(_uiState.value.toolCallStatus)) return@input + + val newTranscript = _uiState.value.userTranscript + text + lastUserOriginalInstruction = newTranscript + _uiState.value = _uiState.value.copy( - userTranscript = _uiState.value.userTranscript + text, + userTranscript = newTranscript, aiTranscript = "" ) } @@ -93,21 +190,24 @@ class GeminiSessionViewModel : ViewModel() { } geminiService.onDisconnected = { reason -> - if (_uiState.value.isGeminiActive) { - stopSession() + if (_uiState.value.isGeminiActive && !userStopped) { _uiState.value = _uiState.value.copy( - errorMessage = "Connection lost: ${reason ?: "Unknown error"}" + errorMessage = "Disconnected: ${reason ?: "Unknown"}\nReconnecting..." ) + scheduleReconnect(reason) } } - // Check OpenClaw and start session viewModelScope.launch { openClawBridge.checkConnection() openClawBridge.resetSession() - // Wire tool call handling - toolCallRouter = ToolCallRouter(openClawBridge, viewModelScope) + toolCallRouter = ToolCallRouter( + bridge = openClawBridge, + scope = viewModelScope, + latestFrameProvider = { latestFrameForToolCall }, + originalInstructionProvider = { lastUserOriginalInstruction } + ) geminiService.onToolCall = { toolCall -> for (call in toolCall.functionCalls) { @@ -121,25 +221,27 @@ class GeminiSessionViewModel : ViewModel() { toolCallRouter?.cancelToolCalls(cancellation.ids) } - // Observe service state stateObservationJob = viewModelScope.launch { while (isActive) { delay(100) + + val latestToolStatus = openClawBridge.lastToolCallStatus.value + syncMicWithToolExecution(latestToolStatus) + _uiState.value = _uiState.value.copy( connectionState = geminiService.connectionState.value, isModelSpeaking = geminiService.isModelSpeaking.value, - toolCallStatus = openClawBridge.lastToolCallStatus.value, + toolCallStatus = latestToolStatus, openClawConnectionState = openClawBridge.connectionState.value, ) } } - // Connect to Gemini geminiService.connect { setupOk -> if (!setupOk) { val msg = when (val state = geminiService.connectionState.value) { is GeminiConnectionState.Error -> state.message - else -> "Failed to connect to Gemini" + else -> geminiService.lastDisconnectInfo.value ?: "Failed to connect to Gemini" } _uiState.value = _uiState.value.copy(errorMessage = msg) geminiService.disconnect() @@ -151,9 +253,11 @@ class GeminiSessionViewModel : ViewModel() { return@connect } - // Start mic capture try { audioManager.startCapture() + audioManager.setMicEnabled(_uiState.value.isMicEnabled) + _uiState.value = _uiState.value.copy(errorMessage = null) + syncProactiveNotifications() } catch (e: Exception) { _uiState.value = _uiState.value.copy( errorMessage = "Mic capture failed: ${e.message}" @@ -165,42 +269,152 @@ class GeminiSessionViewModel : ViewModel() { connectionState = GeminiConnectionState.Disconnected ) } + } + } + } + + private fun scheduleReconnect(reason: String?) { + if (reconnectJob?.isActive == true) return + if (userStopped) return + + reconnectJob = viewModelScope.launch { + toolCallRouter?.cancelAll() + openClawBridge.cancelInFlight("gemini disconnected: ${reason ?: "unknown"}") + + audioManager.stopCapture() + geminiService.disconnect() + + reconnectAttempts = 0 + + while (isActive && !userStopped && reconnectAttempts < maxReconnectAttempts) { + val backoffSec = listOf(1L, 2L, 4L, 8L, 16L, 30L).getOrElse(reconnectAttempts) { 30L } + reconnectAttempts++ + + _uiState.value = _uiState.value.copy( + errorMessage = "Reconnecting... (attempt $reconnectAttempts/$maxReconnectAttempts, wait ${backoffSec}s)\nLast: ${reason ?: "Unknown"}" + ) - // Connect to OpenClaw event stream for proactive notifications - if (SettingsManager.proactiveNotificationsEnabled) { - eventClient.onNotification = { text -> - val state = _uiState.value - if (state.isGeminiActive && state.connectionState == GeminiConnectionState.Ready) { - geminiService.sendTextMessage(text) - } + delay(backoffSec * 1000) + + var cbOk = false + geminiService.connect { ok -> cbOk = ok } + + val startWait = System.currentTimeMillis() + var ready = false + var errored = false + + while (isActive && !userStopped && System.currentTimeMillis() - startWait < 20_000) { + when (geminiService.connectionState.value) { + is GeminiConnectionState.Ready -> { ready = true; break } + is GeminiConnectionState.Error -> { errored = true; break } + else -> delay(100) + } + } + + if ((cbOk || ready) && geminiService.connectionState.value == GeminiConnectionState.Ready) { + try { + audioManager.startCapture() + audioManager.setMicEnabled(_uiState.value.isMicEnabled) + _uiState.value = _uiState.value.copy(errorMessage = null) + reconnectAttempts = 0 + return@launch + } catch (e: Exception) { + _uiState.value = _uiState.value.copy( + errorMessage = "Reconnected but mic capture failed: ${e.message}" + ) + } + } else { + val last = (geminiService.connectionState.value as? GeminiConnectionState.Error)?.message + ?: geminiService.lastDisconnectInfo.value + ?: "unknown" + _uiState.value = _uiState.value.copy( + errorMessage = "Reconnect failed (attempt $reconnectAttempts): $last" + ) + + if (errored) { + geminiService.disconnect() + audioManager.stopCapture() } - eventClient.connect() } } + + _uiState.value = _uiState.value.copy( + errorMessage = "Reconnect failed after $maxReconnectAttempts attempts.\nLast: ${reason ?: "Unknown"}" + ) } } fun stopSession() { + userStopped = true + reconnectJob?.cancel() + reconnectJob = null + eventClient.disconnect() toolCallRouter?.cancelAll() toolCallRouter = null + + openClawBridge.cancelInFlight("user stopSession") + audioManager.stopCapture() geminiService.disconnect() + stateObservationJob?.cancel() stateObservationJob = null + + netMonitorJob?.cancel() + netMonitorJob = null + netMonitor.stop() + _uiState.value = GeminiUiState() + lastUserOriginalInstruction = null + latestFrameForToolCall = null + micStateBeforeExecution = null + micAutoMutedForExecution = false + } + + private fun syncProactiveNotifications() { + if (!SettingsManager.proactiveNotificationsEnabled) { + eventClient.disconnect() + return + } + + eventClient.onNotification = { text -> + val state = _uiState.value + if (state.isGeminiActive && state.connectionState == GeminiConnectionState.Ready) { + geminiService.sendTextMessage(text) + } + } + eventClient.connect() } fun sendVideoFrameIfThrottled(bitmap: Bitmap) { if (!SettingsManager.videoStreamingEnabled) return if (!_uiState.value.isGeminiActive) return if (_uiState.value.connectionState != GeminiConnectionState.Ready) return + + val intervalMs = when (_uiState.value.networkType) { + NetworkType.WIFI -> videoIntervalWifiMs + NetworkType.CELLULAR -> videoIntervalCellularMs + NetworkType.OTHER -> videoIntervalOtherMs + NetworkType.NONE -> return + } + val now = System.currentTimeMillis() - if (now - lastVideoFrameTime < GeminiConfig.VIDEO_FRAME_INTERVAL_MS) return + if (now - lastVideoFrameTime < intervalMs) return lastVideoFrameTime = now + + // ✅ tool-call 시점에 업로드할 "원본 bitmap"을 그대로 보관 + latestFrameForToolCall = bitmap + + // Gemini 입력은 기존 로직대로 (GeminiLiveService 내부에서 resize/base64 처리) geminiService.sendVideoFrame(bitmap) } + fun clearCachedVideoFrame() { + latestFrameForToolCall = null + lastVideoFrameTime = 0 + } + fun clearError() { _uiState.value = _uiState.value.copy(errorMessage = null) } diff --git a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/net/NetworkTypeMonitor.kt b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/net/NetworkTypeMonitor.kt new file mode 100644 index 00000000..097b6885 --- /dev/null +++ b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/net/NetworkTypeMonitor.kt @@ -0,0 +1,78 @@ +// app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/net/NetworkTypeMonitor.kt +package com.meta.wearable.dat.externalsampleapps.cameraaccess.net + +import android.content.Context +import android.net.ConnectivityManager +import android.net.Network +import android.net.NetworkCapabilities +import android.util.Log +import kotlinx.coroutines.flow.MutableStateFlow +import kotlinx.coroutines.flow.StateFlow +import kotlinx.coroutines.flow.asStateFlow + +enum class NetworkType { + WIFI, + CELLULAR, + OTHER, + NONE +} + +class NetworkTypeMonitor(context: Context) { + companion object { + private const val TAG = "NetworkTypeMonitor" + } + + private val cm = context.applicationContext.getSystemService(Context.CONNECTIVITY_SERVICE) as ConnectivityManager + + private val _networkType = MutableStateFlow(NetworkType.NONE) + val networkType: StateFlow = _networkType.asStateFlow() + + private val callback = object : ConnectivityManager.NetworkCallback() { + override fun onAvailable(network: Network) { + update() + } + + override fun onLost(network: Network) { + update() + } + + override fun onCapabilitiesChanged(network: Network, networkCapabilities: NetworkCapabilities) { + update() + } + } + + fun start() { + try { + cm.registerDefaultNetworkCallback(callback) + } catch (e: Exception) { + Log.e(TAG, "registerDefaultNetworkCallback failed: ${e.message}") + } + update() + } + + fun stop() { + try { + cm.unregisterNetworkCallback(callback) + } catch (_: Exception) { + } + } + + private fun update() { + val active = cm.activeNetwork + if (active == null) { + _networkType.value = NetworkType.NONE + return + } + val caps = cm.getNetworkCapabilities(active) + if (caps == null) { + _networkType.value = NetworkType.OTHER + return + } + + _networkType.value = when { + caps.hasTransport(NetworkCapabilities.TRANSPORT_WIFI) -> NetworkType.WIFI + caps.hasTransport(NetworkCapabilities.TRANSPORT_CELLULAR) -> NetworkType.CELLULAR + else -> NetworkType.OTHER + } + } +} \ No newline at end of file diff --git a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/OpenClawBridge.kt b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/OpenClawBridge.kt index 4310ca8c..d057ba18 100644 --- a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/OpenClawBridge.kt +++ b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/OpenClawBridge.kt @@ -1,17 +1,22 @@ +// app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/OpenClawBridge.kt package com.meta.wearable.dat.externalsampleapps.cameraaccess.openclaw import android.util.Log import com.meta.wearable.dat.externalsampleapps.cameraaccess.gemini.GeminiConfig import java.util.concurrent.TimeUnit +import java.util.concurrent.atomic.AtomicReference import kotlinx.coroutines.Dispatchers import kotlinx.coroutines.flow.MutableStateFlow import kotlinx.coroutines.flow.StateFlow import kotlinx.coroutines.flow.asStateFlow import kotlinx.coroutines.withContext +import okhttp3.Call import okhttp3.MediaType.Companion.toMediaType +import okhttp3.MultipartBody import okhttp3.OkHttpClient import okhttp3.Request import okhttp3.RequestBody.Companion.toRequestBody +import okhttp3.MediaType.Companion.toMediaTypeOrNull import org.json.JSONArray import org.json.JSONObject @@ -19,12 +24,18 @@ class OpenClawBridge { companion object { private const val TAG = "OpenClawBridge" private const val MAX_HISTORY_TURNS = 10 + + // OpenClaw media endpoints (split read/write) + private const val MEDIA_READ_PORT = 18080 + private const val MEDIA_UPLOAD_PORT = 18081 + private const val MEDIA_UPLOAD_PATH = "/upload" // <-- 필요하면 여기만 수정 } private val _lastToolCallStatus = MutableStateFlow(ToolCallStatus.Idle) val lastToolCallStatus: StateFlow = _lastToolCallStatus.asStateFlow() - private val _connectionState = MutableStateFlow(OpenClawConnectionState.NotConfigured) + private val _connectionState = + MutableStateFlow(OpenClawConnectionState.NotConfigured) val connectionState: StateFlow = _connectionState.asStateFlow() fun setToolCallStatus(status: ToolCallStatus) { @@ -32,8 +43,10 @@ class OpenClawBridge { } private val client = OkHttpClient.Builder() - .readTimeout(120, TimeUnit.SECONDS) - .connectTimeout(10, TimeUnit.SECONDS) + .connectTimeout(15, TimeUnit.SECONDS) + .readTimeout(300, TimeUnit.SECONDS) + .callTimeout(330, TimeUnit.SECONDS) + .writeTimeout(30, TimeUnit.SECONDS) .build() private val pingClient = OkHttpClient.Builder() @@ -41,6 +54,16 @@ class OpenClawBridge { .connectTimeout(5, TimeUnit.SECONDS) .build() + private val inFlightCallRef = AtomicReference(null) + + fun cancelInFlight(reason: String = "cancelled") { + val call = inFlightCallRef.getAndSet(null) + if (call != null && !call.isCanceled()) { + Log.w(TAG, "Cancelling in-flight OpenClaw call: $reason") + call.cancel() + } + } + private var sessionKey: String = "agent:main:glass" private val conversationHistory = mutableListOf() @@ -50,7 +73,7 @@ class OpenClawBridge { return@withContext } _connectionState.value = OpenClawConnectionState.Checking - + Log.d("OpenClawBridge", "hookToken(prefix)=${GeminiConfig.openClawHookToken.take(6)}...${GeminiConfig.openClawHookToken.takeLast(4)}") val url = "${GeminiConfig.openClawHost}:${GeminiConfig.openClawPort}/v1/chat/completions" try { val request = Request.Builder() @@ -72,7 +95,7 @@ class OpenClawBridge { } } catch (e: Exception) { _connectionState.value = OpenClawConnectionState.Unreachable(e.message ?: "Unknown error") - Log.d(TAG, "Gateway unreachable: ${e.message}") + Log.d(TAG, "Gateway unreachable: ${e::class.java.name}: ${e.message}") } } @@ -81,6 +104,72 @@ class OpenClawBridge { Log.d(TAG, "Session reset (key retained: $sessionKey)") } + /** + * Upload JPEG bytes to OpenClaw media upload API (write-only port 18081). + * Returns a read-only URL on port 18080. + */ + suspend fun uploadToolCallImage(jpegBytes: ByteArray): String? = withContext(Dispatchers.IO) { + if (!GeminiConfig.isOpenClawConfigured) return@withContext null + + val host = GeminiConfig.openClawHost.trimEnd('/') + val uploadUrl = "${host}:${MEDIA_UPLOAD_PORT}${MEDIA_UPLOAD_PATH}" + + val filename = "tool_${System.currentTimeMillis()}.jpg" + + val body = MultipartBody.Builder() + .setType(MultipartBody.FORM) + .addFormDataPart( + name = "file", // <-- 서버 스펙이 "image"면 여기만 바꾸면 됨 + filename = filename, + body = jpegBytes.toRequestBody("image/jpeg".toMediaTypeOrNull()) + ) + .build() + + val request = Request.Builder() + .url(uploadUrl) + .post(body) + .addHeader("Authorization", "Bearer ${GeminiConfig.openClawHookToken}") + .build() + + try { + Log.d("OpenClawBridge", "Uploading to $uploadUrl bytes=${jpegBytes.size}") + val response = client.newCall(request).execute() + val respBody = response.body?.string() ?: "" + val code = response.code + response.close() + Log.w("OpenClawBridge", "Upload HTTP $code body=${respBody.take(300)}") + if (code !in 200..299) { + Log.w(TAG, "Media upload failed: HTTP $code - ${respBody.take(200)}") + return@withContext null + } + + // tolerant parse: JSON {url/readUrl/filename/file/path} or plain string + val inferred: String? = try { + val j = JSONObject(respBody) + j.optString("url", null) + ?: j.optString("readUrl", null) + ?: j.optString("filename", null) + ?: j.optString("file", null) + ?: j.optString("path", null) + } catch (_: Exception) { + respBody.trim().ifEmpty { null } + } + + if (inferred.isNullOrEmpty()) return@withContext null + + if (inferred.startsWith("http://") || inferred.startsWith("https://")) { + return@withContext inferred + } + + val cleaned = inferred.trimStart('/') + val readUrl = "${host}:${MEDIA_READ_PORT}/${cleaned}" + return@withContext readUrl + } catch (e: Exception) { + Log.w(TAG, "Media upload exception: ${e::class.java.simpleName}: ${e.message}") + return@withContext null + } + } + suspend fun delegateTask( task: String, toolName: String = "execute" @@ -89,43 +178,40 @@ class OpenClawBridge { val url = "${GeminiConfig.openClawHost}:${GeminiConfig.openClawPort}/v1/chat/completions" - // Append user message conversationHistory.add(JSONObject().apply { put("role", "user") put("content", task) }) - // Trim history if (conversationHistory.size > MAX_HISTORY_TURNS * 2) { val trimmed = conversationHistory.takeLast(MAX_HISTORY_TURNS * 2) conversationHistory.clear() conversationHistory.addAll(trimmed) } - Log.d(TAG, "Sending ${conversationHistory.size} messages in conversation") + val messagesArray = JSONArray() + for (msg in conversationHistory) messagesArray.put(msg) - try { - val messagesArray = JSONArray() - for (msg in conversationHistory) { - messagesArray.put(msg) - } + val body = JSONObject().apply { + put("model", "openclaw") + put("messages", messagesArray) + put("stream", false) + } - val body = JSONObject().apply { - put("model", "openclaw") - put("messages", messagesArray) - put("stream", false) - } + val request = Request.Builder() + .url(url) + .post(body.toString().toRequestBody("application/json".toMediaType())) + .addHeader("Authorization", "Bearer ${GeminiConfig.openClawGatewayToken}") + .addHeader("Content-Type", "application/json") + .addHeader("x-openclaw-session-key", sessionKey) + .addHeader("x-openclaw-message-channel", "glass") + .build() - val request = Request.Builder() - .url(url) - .post(body.toString().toRequestBody("application/json".toMediaType())) - .addHeader("Authorization", "Bearer ${GeminiConfig.openClawGatewayToken}") - .addHeader("Content-Type", "application/json") - .addHeader("x-openclaw-session-key", sessionKey) - .addHeader("x-openclaw-message-channel", "glass") - .build() + val call = client.newCall(request) + inFlightCallRef.set(call) - val response = client.newCall(request).execute() + try { + val response = call.execute() val responseBody = response.body?.string() ?: "" val statusCode = response.code response.close() @@ -137,8 +223,8 @@ class OpenClawBridge { } val json = JSONObject(responseBody) - val choices = json.optJSONArray("choices") - val content = choices?.optJSONObject(0) + val content = json.optJSONArray("choices") + ?.optJSONObject(0) ?.optJSONObject("message") ?.optString("content", "") @@ -147,7 +233,6 @@ class OpenClawBridge { put("role", "assistant") put("content", content) }) - Log.d(TAG, "Agent result: ${content.take(200)}") _lastToolCallStatus.value = ToolCallStatus.Completed(toolName) return@withContext ToolResult.Success(content) } @@ -156,13 +241,14 @@ class OpenClawBridge { put("role", "assistant") put("content", responseBody) }) - Log.d(TAG, "Agent raw: ${responseBody.take(200)}") _lastToolCallStatus.value = ToolCallStatus.Completed(toolName) return@withContext ToolResult.Success(responseBody) } catch (e: Exception) { - Log.e(TAG, "Agent error: ${e.message}") + Log.e(TAG, "Agent error: ${e::class.java.name}: ${e.message}") _lastToolCallStatus.value = ToolCallStatus.Failed(toolName, e.message ?: "Unknown") return@withContext ToolResult.Failure("Agent error: ${e.message}") + } finally { + inFlightCallRef.compareAndSet(call, null) } } diff --git a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/ToolCallRouter.kt b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/ToolCallRouter.kt index 7763656f..c151e9dd 100644 --- a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/ToolCallRouter.kt +++ b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/ToolCallRouter.kt @@ -1,18 +1,27 @@ +// app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/ToolCallRouter.kt package com.meta.wearable.dat.externalsampleapps.cameraaccess.openclaw +import android.graphics.Bitmap import android.util.Log +import com.meta.wearable.dat.externalsampleapps.cameraaccess.settings.SettingsManager +import java.io.ByteArrayOutputStream import kotlinx.coroutines.CoroutineScope import kotlinx.coroutines.Job +import kotlinx.coroutines.cancel +import kotlinx.coroutines.isActive import kotlinx.coroutines.launch import org.json.JSONArray import org.json.JSONObject class ToolCallRouter( private val bridge: OpenClawBridge, - private val scope: CoroutineScope + private val scope: CoroutineScope, + private val latestFrameProvider: () -> Bitmap?, + private val originalInstructionProvider: () -> String? ) { companion object { private const val TAG = "ToolCallRouter" + private const val JPEG_QUALITY_FOR_UPLOAD = 92 } private val inFlightJobs = mutableMapOf() @@ -27,17 +36,56 @@ class ToolCallRouter( Log.d(TAG, "Received: $callName (id: $callId) args: ${call.args}") val job = scope.launch { - val taskDesc = call.args["task"]?.toString() ?: call.args.toString() - val result = bridge.delegateTask(task = taskDesc, toolName = callName) + // Gemini가 tool-call args로 준 "정리된" task (이미 rewriting 된 텍스트) + val rewrittenTask = call.args["task"]?.toString() ?: call.args.toString() - if (!coroutineContext[Job]!!.isCancelled) { - Log.d(TAG, "Result for $callName (id: $callId): $result") - val response = buildToolResponse(callId, callName, result) - sendResponse(response) + // 원본 발화(전사) — 우리가 따로 저장해둔 걸 가져옴 + val original = originalInstructionProvider() + ?.trim() + ?.takeIf { it.isNotEmpty() } + + // tool-call 시점에만 스냅샷 업로드 (원본 해상도 유지, JPEG로만 인코딩) + val bitmap = latestFrameProvider() + Log.d("ToolCallRouter", "toolcall bitmapNull=${latestFrameProvider()==null}") + val imageUrl: String? = if (SettingsManager.videoStreamingEnabled && bitmap != null) { + try { + val baos = ByteArrayOutputStream() + bitmap.compress(Bitmap.CompressFormat.JPEG, JPEG_QUALITY_FOR_UPLOAD, baos) + bridge.uploadToolCallImage(baos.toByteArray()) + } catch (e: Exception) { + Log.w(TAG, "Image upload failed for tool-call $callId: ${e.message}") + null + } } else { - Log.d(TAG, "Task $callId was cancelled, skipping response") + null + } + + // OpenClaw로 넘기는 최종 "명령 텍스트" 포맷 + val taskPayload = buildString { + if (original != null) { + append("[original_instruction]\n") + append(original) + append("\n\n") + } + append("[gemini_rewritten_instruction]\n") + append(rewrittenTask) + + if (!imageUrl.isNullOrEmpty()) { + append("\n\n[tool_call_image_url]\n") + append(imageUrl) + } + } + + val result = bridge.delegateTask(task = taskPayload, toolName = callName) + + // 취소된 경우 응답 보내지 않음 + if (!isActive) { + Log.d(TAG, "Task $callId cancelled; skipping response") + return@launch } + val response = buildToolResponse(callId, callName, result) + sendResponse(response) inFlightJobs.remove(callId) } @@ -52,6 +100,7 @@ class ToolCallRouter( inFlightJobs.remove(id) } } + bridge.cancelInFlight("tool cancellation ids=$ids") bridge.setToolCallStatus(ToolCallStatus.Cancelled(ids.firstOrNull() ?: "unknown")) } @@ -61,6 +110,7 @@ class ToolCallRouter( job.cancel() } inFlightJobs.clear() + bridge.cancelInFlight("cancelAll") } private fun buildToolResponse( @@ -69,13 +119,21 @@ class ToolCallRouter( result: ToolResult ): JSONObject { return JSONObject().apply { - put("toolResponse", JSONObject().apply { - put("functionResponses", JSONArray().put(JSONObject().apply { - put("id", callId) - put("name", name) - put("response", result.toJSON()) - })) - }) + put( + "toolResponse", + JSONObject().apply { + put( + "functionResponses", + JSONArray().put( + JSONObject().apply { + put("id", callId) + put("name", name) + put("response", result.toJSON()) + } + ) + ) + } + ) } } -} +} \ No newline at end of file diff --git a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/settings/SettingsManager.kt b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/settings/SettingsManager.kt index dd8d2d26..7c5916c3 100644 --- a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/settings/SettingsManager.kt +++ b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/settings/SettingsManager.kt @@ -56,29 +56,122 @@ object SettingsManager { prefs.edit().clear().apply() } - const val DEFAULT_SYSTEM_PROMPT = """You are an AI assistant for someone wearing Meta Ray-Ban smart glasses. You can see through their camera and have a voice conversation. Keep responses concise and natural. - -CRITICAL: You have NO memory, NO storage, and NO ability to take actions on your own. You cannot remember things, keep lists, set reminders, search the web, send messages, or do anything persistent. You are ONLY a voice interface. - -You have exactly ONE tool: execute. This connects you to a powerful personal assistant that can do anything -- send messages, search the web, manage lists, set reminders, create notes, research topics, control smart home devices, interact with apps, and much more. - -ALWAYS use execute when the user asks you to: -- Send a message to someone (any platform: WhatsApp, Telegram, iMessage, Slack, etc.) -- Search or look up anything (web, local info, facts, news) -- Add, create, or modify anything (shopping lists, reminders, notes, todos, events) -- Research, analyze, or draft anything -- Control or interact with apps, devices, or services -- Remember or store any information for later - -Be detailed in your task description. Include all relevant context: names, content, platforms, quantities, etc. The assistant works better with complete information. - -NEVER pretend to do these things yourself. - -IMPORTANT: Before calling execute, ALWAYS speak a brief acknowledgment first. For example: -- "Sure, let me add that to your shopping list." then call execute. -- "Got it, searching for that now." then call execute. -- "On it, sending that message." then call execute. -Never call execute silently -- the user needs verbal confirmation that you heard them and are working on it. The tool may take several seconds to complete, so the acknowledgment lets them know something is happening. - -For messages, confirm recipient and content before delegating unless clearly urgent.""" + const val DEFAULT_SYSTEM_PROMPT = """You are an AI assistant for someone wearing Meta Ray-Ban smart glasses. +You can see through their camera and have a real-time voice conversation. +Keep responses concise, natural, and conversational. + +You do NOT have persistent memory or storage. +You cannot access past conversations, saved data, notes, emails, calendars, or external information directly. + +You are ONLY a voice interface. + +You have exactly ONE tool: execute. + +The execute tool connects you to a powerful personal assistant that can: +- Send messages (WhatsApp, Telegram, iMessage, Slack, etc.) +- Search the web or look up information +- Access memory, past conversations, emails, notes, and calendar events +- Create, modify, or delete reminders, lists, todos, events +- Research, analyze, summarize, or draft content +- Control apps, services, and smart home devices +- Store or retrieve persistent information + +You CANNOT do any of these things yourself. +You MUST use execute for all of them. + +-------------------------------- +CRITICAL TOOL USAGE RULES +-------------------------------- + +You MUST call execute whenever the user: + +1. Asks to send a message on any platform. +2. Asks to search or look up anything (facts, news, locations, prices, etc.). +3. Refers to ANY past information. +4. Asks about previous conversations or earlier decisions. +5. Mentions something they did before. +6. Asks to check email, calendar, reminders, notes, or tasks. +7. Asks to remember something for later. +8. Asks to create, update, delete, or manage anything. +9. Asks to analyze, research, or draft content. +10. Asks to interact with apps, services, or devices. + +If the user refers to ANY time in the past (e.g., "last week", "earlier", "before", "did I", "what did we say", "check if I", etc.), you MUST use execute. +Never answer these from conversation context. + +Never attempt to simulate memory. + +-------------------------------- +IMPORTANT: VERBAL ACKNOWLEDGMENT +-------------------------------- + +Before calling execute, ALWAYS say a brief acknowledgment out loud. + +Examples: +- "Sure, let me check that." +- "Got it, searching now." +- "On it, sending that message." +- "Okay, I’ll look that up." +- "Let me check your previous notes." + +Never call execute silently. + +The acknowledgment reassures the user that you heard them and are working on it. + +-------------------------------- +TASK DESCRIPTION QUALITY +-------------------------------- + +When calling execute: + +- Be detailed and precise. +- Include names, platforms, message content, quantities, dates, and all relevant context. +- If sending a message, confirm recipient and content unless clearly urgent. +- If searching memory, clearly describe what timeframe or topic to search. + +The assistant works best with complete instructions. + +-------------------------------- +RESPONSE STYLE +-------------------------------- + +When not using execute: + +- Keep responses short. +- Be natural and conversational. +- Do not over-explain. +- Do not mention internal reasoning. + +Never pretend to take actions yourself. +Only execute can perform real-world tasks.""" +// const val DEFAULT_SYSTEM_PROMPT = """You are an AI assistant for someone wearing Meta Ray-Ban smart glasses. You can see through their camera and have a voice conversation. Keep responses concise and natural. +//CRITICAL: Any question about past conversations, previous actions, earlier messages, saved notes, emails, calendar events, or anything the user did before MUST trigger execute. +//You cannot answer these from context. +// +//CRITICAL: You do not have persistent memory or storage. +//You cannot access past conversations or stored data directly. +// +//To retrieve any past information, you MUST use the execute tool. +// +//You have exactly ONE tool: execute. This connects you to a powerful personal assistant that can do anything -- send messages, search the web, manage lists, set reminders, create notes, research topics, control smart home devices, interact with apps, and much more. +// +//ALWAYS use execute when the user asks you to: +//- Send a message to someone (any platform: WhatsApp, Telegram, iMessage, Slack, etc.) +//- Search or look up anything (web, local info, facts, news) +//- Add, create, or modify anything (shopping lists, reminders, notes, todos, events) +//- Research, analyze, or draft anything +//- Control or interact with apps, devices, or services +//- Remember or store any information for later +// +//Be detailed in your task description. Include all relevant context: names, content, platforms, quantities, etc. The assistant works better with complete information. +// +//NEVER pretend to do these things yourself. +// +//IMPORTANT: Before calling execute, ALWAYS speak a brief acknowledgment first. For example: +//- "Sure, let me add that to your shopping list." then call execute. +//- "Got it, searching for that now." then call execute. +//- "On it, sending that message." then call execute. +//Never call execute silently -- the user needs verbal confirmation that you heard them and are working on it. The tool may take several seconds to complete, so the acknowledgment lets them know something is happening. +// +//For messages, confirm recipient and content before delegating unless clearly urgent.""" } diff --git a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/stream/StreamViewModel.kt b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/stream/StreamViewModel.kt index ae86a2c6..d4a70271 100644 --- a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/stream/StreamViewModel.kt +++ b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/stream/StreamViewModel.kt @@ -35,8 +35,10 @@ import com.meta.wearable.dat.core.Wearables import com.meta.wearable.dat.core.selectors.DeviceSelector import com.meta.wearable.dat.externalsampleapps.cameraaccess.gemini.GeminiSessionViewModel import com.meta.wearable.dat.externalsampleapps.cameraaccess.phone.PhoneCameraManager +import com.meta.wearable.dat.externalsampleapps.cameraaccess.settings.SettingsManager import com.meta.wearable.dat.externalsampleapps.cameraaccess.wearables.WearablesViewModel import com.meta.wearable.dat.externalsampleapps.cameraaccess.webrtc.WebRTCSessionViewModel +import com.meta.wearable.dat.externalsampleapps.cameraaccess.stream.StreamingService import java.io.ByteArrayInputStream import java.io.ByteArrayOutputStream import java.io.File @@ -73,38 +75,115 @@ class StreamViewModel( var webrtcViewModel: WebRTCSessionViewModel? = null private var phoneCameraManager: PhoneCameraManager? = null + fun setStreamingMode(mode: StreamingMode) { + _uiState.update { it.copy(streamingMode = mode) } + } + + fun setVideoStreamingEnabled(enabled: Boolean, lifecycleOwner: LifecycleOwner? = null) { + SettingsManager.videoStreamingEnabled = enabled + + if (enabled) { + when (_uiState.value.streamingMode) { + StreamingMode.PHONE -> lifecycleOwner?.let { startPhoneCamera(it) } + StreamingMode.GLASSES -> startStream() + } + } else { + stopActiveVideoSource(preserveMode = true) + clearVideoCache() + } + } + + fun clearVideoCache() { + geminiViewModel?.clearCachedVideoFrame() + _uiState.update { it.copy(videoFrame = null) } + } + + private fun stopActiveVideoSource(preserveMode: Boolean) { + Log.d(TAG, "BGTEST stopActiveVideoSource called preserveMode=$preserveMode") + + StreamingService.stop(getApplication()) + + videoJob?.cancel() + videoJob = null + stateJob?.cancel() + stateJob = null + streamSession?.close() + streamSession = null + phoneCameraManager?.stop() + phoneCameraManager = null + + val mode = if (preserveMode) _uiState.value.streamingMode else StreamingMode.GLASSES + _uiState.update { + it.copy( + streamSessionState = StreamSessionState.STOPPED, + videoFrame = null, + capturedPhoto = null, + isShareDialogVisible = false, + isCapturing = false, + streamingMode = mode, + ) + } + } + fun startStream() { + if (!SettingsManager.videoStreamingEnabled) { + setStreamingMode(StreamingMode.GLASSES) + stopActiveVideoSource(preserveMode = true) + clearVideoCache() + return + } + videoJob?.cancel() stateJob?.cancel() - // Start foreground service to keep streaming alive in background / screen locked StreamingService.start(getApplication()) val streamSession = - Wearables.startStreamSession( - getApplication(), - deviceSelector, - StreamConfiguration(videoQuality = VideoQuality.MEDIUM, 24), - ) - .also { streamSession = it } + Wearables.startStreamSession( + getApplication(), + deviceSelector, + StreamConfiguration(videoQuality = VideoQuality.MEDIUM, 24), + ).also { streamSession = it } + + _uiState.update { it.copy(streamingMode = StreamingMode.GLASSES) } - videoJob = viewModelScope.launch { streamSession.videoStream.collect { handleVideoFrame(it) } } + + videoJob = + viewModelScope.launch { + streamSession.videoStream.collect { frame -> +// Log.d( +// TAG, +// "BGTEST frame received w=${frame.width} h=${frame.height} t=${System.currentTimeMillis()}" +// ) + handleVideoFrame(frame) + } + } + stateJob = - viewModelScope.launch { - streamSession.state.collect { currentState -> - val prevState = _uiState.value.streamSessionState - _uiState.update { it.copy(streamSessionState = currentState) } - - // navigate back when state transitioned to STOPPED - if (currentState != prevState && currentState == StreamSessionState.STOPPED) { - stopStream() - wearablesViewModel.navigateToDeviceSelection() - } + viewModelScope.launch { + Log.d(TAG, "BGTEST stateJob launched") + streamSession.state.collect { currentState -> + Log.d(TAG, "BGTEST stream state = $currentState") + val prevState = _uiState.value.streamSessionState + _uiState.update { it.copy(streamSessionState = currentState) } + + if (currentState != prevState && currentState == StreamSessionState.STOPPED) { + Log.d(TAG, "BGTEST state became STOPPED -> stopStream()") + stopStream() + wearablesViewModel.navigateToDeviceSelection() } } + } } fun startPhoneCamera(lifecycleOwner: LifecycleOwner) { + if (!SettingsManager.videoStreamingEnabled) { + setStreamingMode(StreamingMode.PHONE) + stopActiveVideoSource(preserveMode = true) + clearVideoCache() + return + } + val manager = PhoneCameraManager(getApplication()) phoneCameraManager = manager @@ -127,18 +206,8 @@ class StreamViewModel( } fun stopStream() { - // Stop foreground service - StreamingService.stop(getApplication()) - - videoJob?.cancel() - videoJob = null - stateJob?.cancel() - stateJob = null - streamSession?.close() - streamSession = null - phoneCameraManager?.stop() - phoneCameraManager = null - _uiState.update { INITIAL_STATE } + Log.d(TAG, "BGTEST stopStream called") + stopActiveVideoSource(preserveMode = false) } fun capturePhoto() { @@ -214,6 +283,11 @@ class StreamViewModel( } private fun handleVideoFrame(videoFrame: VideoFrame) { +// Log.d( +// TAG, +// "BGTEST handleVideoFrame entered w=${videoFrame.width} h=${videoFrame.height} t=${System.currentTimeMillis()}" +// ) + // VideoFrame contains raw I420 video data in a ByteBuffer val buffer = videoFrame.buffer val dataSize = buffer.remaining() diff --git a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/stream/StreamingService.kt b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/stream/StreamingService.kt index 5a143f3b..8038fe88 100644 --- a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/stream/StreamingService.kt +++ b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/stream/StreamingService.kt @@ -26,124 +26,125 @@ import com.meta.wearable.dat.externalsampleapps.cameraaccess.R */ class StreamingService : Service() { - companion object { - private const val TAG = "StreamingService" - private const val CHANNEL_ID = "streaming_channel" - private const val CHANNEL_NAME = "Camera Streaming" - private const val NOTIFICATION_ID = 1001 - private const val WAKELOCK_TAG = "VisionClaw::StreamingWakeLock" - - fun start(context: Context) { - val intent = - Intent(context, StreamingService::class.java).apply { `package` = context.packageName } - if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.O) { - context.startForegroundService(intent) - } else { - context.startService(intent) - } + companion object { + private const val TAG = "StreamingService" + private const val CHANNEL_ID = "streaming_channel" + private const val CHANNEL_NAME = "Camera Streaming" + private const val NOTIFICATION_ID = 1001 + private const val WAKELOCK_TAG = "VisionClaw::StreamingWakeLock" + + fun start(context: Context) { + val intent = + Intent(context, StreamingService::class.java).apply { `package` = context.packageName } + if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.O) { + context.startForegroundService(intent) + } else { + context.startService(intent) + } + } + + fun stop(context: Context) { + val intent = + Intent(context, StreamingService::class.java).apply { `package` = context.packageName } + context.stopService(intent) + } } - fun stop(context: Context) { - val intent = - Intent(context, StreamingService::class.java).apply { `package` = context.packageName } - context.stopService(intent) + private var wakeLock: PowerManager.WakeLock? = null + + override fun onBind(intent: Intent?): IBinder? = null + + override fun onCreate() { + super.onCreate() + Log.d(TAG, "Service created") + createNotificationChannel() } - } - private var wakeLock: PowerManager.WakeLock? = null + override fun onStartCommand(intent: Intent?, flags: Int, startId: Int): Int { + Log.d(TAG, "Service started") - override fun onBind(intent: Intent?): IBinder? = null + val notification = createNotification() - override fun onCreate() { - super.onCreate() - Log.d(TAG, "Service created") - createNotificationChannel() - } + if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.Q) { + startForeground( + NOTIFICATION_ID, + notification, + ServiceInfo.FOREGROUND_SERVICE_TYPE_CONNECTED_DEVICE or + ServiceInfo.FOREGROUND_SERVICE_TYPE_MICROPHONE, + ) + } else { + startForeground(NOTIFICATION_ID, notification) + } - override fun onStartCommand(intent: Intent?, flags: Int, startId: Int): Int { - Log.d(TAG, "Service started") + acquireWakeLock() - val notification = createNotification() + return START_STICKY + } - if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.Q) { - startForeground( - NOTIFICATION_ID, - notification, - ServiceInfo.FOREGROUND_SERVICE_TYPE_CONNECTED_DEVICE, - ) - } else { - startForeground(NOTIFICATION_ID, notification) + override fun onDestroy() { + Log.d(TAG, "Service destroyed") + releaseWakeLock() + super.onDestroy() } - acquireWakeLock() - - return START_STICKY - } - - override fun onDestroy() { - Log.d(TAG, "Service destroyed") - releaseWakeLock() - super.onDestroy() - } - - private fun createNotificationChannel() { - if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.O) { - val channel = - NotificationChannel( - CHANNEL_ID, - CHANNEL_NAME, - NotificationManager.IMPORTANCE_LOW, - ) - .apply { - description = "Notifications for active camera streaming" - setShowBadge(false) - } - - val notificationManager = getSystemService(NotificationManager::class.java) - notificationManager.createNotificationChannel(channel) + private fun createNotificationChannel() { + if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.O) { + val channel = + NotificationChannel( + CHANNEL_ID, + CHANNEL_NAME, + NotificationManager.IMPORTANCE_LOW, + ) + .apply { + description = "Notifications for active camera streaming" + setShowBadge(false) + } + + val notificationManager = getSystemService(NotificationManager::class.java) + notificationManager.createNotificationChannel(channel) + } } - } - - private fun createNotification(): Notification { - val pendingIntent = - PendingIntent.getActivity( - this, - 0, - Intent(this, MainActivity::class.java).apply { - flags = Intent.FLAG_ACTIVITY_SINGLE_TOP or Intent.FLAG_ACTIVITY_CLEAR_TOP - }, - PendingIntent.FLAG_IMMUTABLE or PendingIntent.FLAG_UPDATE_CURRENT, - ) - - return NotificationCompat.Builder(this, CHANNEL_ID) - .setContentTitle("Camera Streaming") - .setContentText("Streaming from your glasses...") - .setSmallIcon(R.drawable.ic_launcher_foreground) - .setOngoing(true) - .setContentIntent(pendingIntent) - .setPriority(NotificationCompat.PRIORITY_LOW) - .setCategory(NotificationCompat.CATEGORY_SERVICE) - .build() - } - - private fun acquireWakeLock() { - if (wakeLock == null) { - val powerManager = getSystemService(Context.POWER_SERVICE) as PowerManager - wakeLock = - powerManager.newWakeLock(PowerManager.PARTIAL_WAKE_LOCK, WAKELOCK_TAG).apply { - acquire(10 * 60 * 1000L) // 10 minutes max - } - Log.d(TAG, "WakeLock acquired") + + private fun createNotification(): Notification { + val pendingIntent = + PendingIntent.getActivity( + this, + 0, + Intent(this, MainActivity::class.java).apply { + flags = Intent.FLAG_ACTIVITY_SINGLE_TOP or Intent.FLAG_ACTIVITY_CLEAR_TOP + }, + PendingIntent.FLAG_IMMUTABLE or PendingIntent.FLAG_UPDATE_CURRENT, + ) + + return NotificationCompat.Builder(this, CHANNEL_ID) + .setContentTitle("Camera Streaming") + .setContentText("Streaming from your glasses...") + .setSmallIcon(R.drawable.ic_launcher_foreground) + .setOngoing(true) + .setContentIntent(pendingIntent) + .setPriority(NotificationCompat.PRIORITY_LOW) + .setCategory(NotificationCompat.CATEGORY_SERVICE) + .build() } - } - - private fun releaseWakeLock() { - wakeLock?.let { - if (it.isHeld) { - it.release() - Log.d(TAG, "WakeLock released") - } + + private fun acquireWakeLock() { + if (wakeLock == null) { + val powerManager = getSystemService(Context.POWER_SERVICE) as PowerManager + wakeLock = + powerManager.newWakeLock(PowerManager.PARTIAL_WAKE_LOCK, WAKELOCK_TAG).apply { + acquire(10 * 60 * 1000L) // 10 minutes max + } + Log.d(TAG, "WakeLock acquired") + } + } + + private fun releaseWakeLock() { + wakeLock?.let { + if (it.isHeld) { + it.release() + Log.d(TAG, "WakeLock released") + } + } + wakeLock = null } - wakeLock = null - } -} +} \ No newline at end of file diff --git a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/ui/ControlsRow.kt b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/ui/ControlsRow.kt index f8c0689f..9e3456de 100644 --- a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/ui/ControlsRow.kt +++ b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/ui/ControlsRow.kt @@ -10,6 +10,8 @@ import androidx.compose.foundation.layout.navigationBarsPadding import androidx.compose.foundation.shape.CircleShape import androidx.compose.material.icons.Icons import androidx.compose.material.icons.filled.AutoAwesome +import androidx.compose.material.icons.filled.Mic +import androidx.compose.material.icons.filled.MicOff import androidx.compose.material.icons.filled.Videocam import androidx.compose.material3.Button import androidx.compose.material3.ButtonDefaults @@ -26,6 +28,8 @@ fun ControlsRow( onCapturePhoto: () -> Unit, onToggleAI: () -> Unit, isAIActive: Boolean, + onToggleMic: () -> Unit, + isMicEnabled: Boolean, onToggleLive: () -> Unit, isLiveActive: Boolean, modifier: Modifier = Modifier, @@ -66,6 +70,27 @@ fun ControlsRow( ) } + // Mic toggle button (only meaningful when AI is active) + Button( + onClick = onToggleMic, + enabled = isAIActive, + modifier = Modifier.aspectRatio(1f), + colors = ButtonDefaults.buttonColors( + containerColor = if (!isAIActive) AppColor.DeepBlue + else if (isMicEnabled) AppColor.DeepBlue + else AppColor.Red, + disabledContainerColor = AppColor.DeepBlue, + ), + shape = CircleShape, + contentPadding = PaddingValues(0.dp), + ) { + Icon( + imageVector = if (isMicEnabled) Icons.Default.Mic else Icons.Default.MicOff, + contentDescription = if (isMicEnabled) "Mute Mic" else "Unmute Mic", + tint = Color.White, + ) + } + // Live toggle button Button( onClick = onToggleLive, @@ -83,4 +108,4 @@ fun ControlsRow( ) } } -} +} \ No newline at end of file diff --git a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/ui/SettingsScreen.kt b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/ui/SettingsScreen.kt index dd913363..fb5aeeb4 100644 --- a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/ui/SettingsScreen.kt +++ b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/ui/SettingsScreen.kt @@ -2,33 +2,35 @@ package com.meta.wearable.dat.externalsampleapps.cameraaccess.ui import androidx.compose.foundation.layout.Arrangement import androidx.compose.foundation.layout.Column +import androidx.compose.foundation.layout.Row import androidx.compose.foundation.layout.Spacer import androidx.compose.foundation.layout.fillMaxSize import androidx.compose.foundation.layout.fillMaxWidth import androidx.compose.foundation.layout.height import androidx.compose.foundation.layout.navigationBarsPadding import androidx.compose.foundation.layout.padding +import androidx.compose.foundation.layout.width import androidx.compose.foundation.rememberScrollState import androidx.compose.foundation.text.KeyboardOptions import androidx.compose.foundation.verticalScroll import androidx.compose.material.icons.Icons import androidx.compose.material.icons.automirrored.filled.ArrowBack -import androidx.compose.foundation.layout.Row import androidx.compose.material3.AlertDialog import androidx.compose.material3.ExperimentalMaterial3Api import androidx.compose.material3.Icon import androidx.compose.material3.IconButton import androidx.compose.material3.MaterialTheme import androidx.compose.material3.OutlinedTextField -import androidx.compose.material3.Switch import androidx.compose.material3.Text import androidx.compose.material3.TextButton +import androidx.compose.material3.Switch import androidx.compose.material3.TopAppBar import androidx.compose.runtime.Composable import androidx.compose.runtime.getValue import androidx.compose.runtime.mutableStateOf import androidx.compose.runtime.remember import androidx.compose.runtime.setValue +import androidx.compose.ui.Alignment import androidx.compose.ui.Modifier import androidx.compose.ui.graphics.Color import androidx.compose.ui.text.font.FontFamily @@ -98,6 +100,34 @@ fun SettingsScreen( .navigationBarsPadding(), verticalArrangement = Arrangement.spacedBy(16.dp), ) { + // Video section + + SectionHeader("Video") + Row( + modifier = Modifier.fillMaxWidth(), + horizontalArrangement = Arrangement.SpaceBetween, + verticalAlignment = Alignment.CenterVertically, + ) { + Column(modifier = Modifier.weight(1f)) { + Text("Video streaming") + Spacer(modifier = Modifier.height(4.dp)) + Text( + text = if (videoStreamingEnabled) { + "Streams camera video, sends frames to Gemini, and attaches images to OpenClaw tool calls." + } else { + "Disables glasses/phone video, Gemini video frames, and OpenClaw image upload." + }, + style = MaterialTheme.typography.bodySmall, + color = MaterialTheme.colorScheme.onSurfaceVariant, + ) + } + Spacer(modifier = Modifier.width(12.dp)) + Switch( + checked = videoStreamingEnabled, + onCheckedChange = { videoStreamingEnabled = it }, + ) + } + // Gemini section SectionHeader("Gemini API") MonoTextField( @@ -155,42 +185,22 @@ fun SettingsScreen( keyboardType = KeyboardType.Uri, ) - // Video - SectionHeader("Video") - Row( - modifier = Modifier.fillMaxWidth(), - horizontalArrangement = Arrangement.SpaceBetween, - verticalAlignment = androidx.compose.ui.Alignment.CenterVertically, - ) { - Column { - Text("Video Streaming", style = MaterialTheme.typography.bodyLarge) - Text( - "Disable to save battery. Audio remains active.", - style = MaterialTheme.typography.bodySmall, - color = MaterialTheme.colorScheme.onSurfaceVariant, - ) - } - Switch( - checked = videoStreamingEnabled, - onCheckedChange = { videoStreamingEnabled = it }, - ) - } - - // Notifications SectionHeader("Notifications") Row( modifier = Modifier.fillMaxWidth(), horizontalArrangement = Arrangement.SpaceBetween, - verticalAlignment = androidx.compose.ui.Alignment.CenterVertically, + verticalAlignment = Alignment.CenterVertically, ) { - Column { + Column(modifier = Modifier.weight(1f)) { Text("Proactive Notifications", style = MaterialTheme.typography.bodyLarge) + Spacer(modifier = Modifier.height(4.dp)) Text( - "Receive updates from OpenClaw spoken through glasses.", + "Receive OpenClaw updates spoken through Gemini while a session is active.", style = MaterialTheme.typography.bodySmall, color = MaterialTheme.colorScheme.onSurfaceVariant, ) } + Spacer(modifier = Modifier.width(12.dp)) Switch( checked = proactiveNotificationsEnabled, onCheckedChange = { proactiveNotificationsEnabled = it }, diff --git a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/ui/StreamScreen.kt b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/ui/StreamScreen.kt index de605ca6..62508807 100644 --- a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/ui/StreamScreen.kt +++ b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/ui/StreamScreen.kt @@ -14,16 +14,23 @@ import androidx.activity.compose.LocalActivity import androidx.compose.foundation.Image import androidx.compose.foundation.layout.Box import androidx.compose.foundation.layout.Column +import androidx.compose.foundation.layout.Row import androidx.compose.foundation.layout.Spacer import androidx.compose.foundation.layout.fillMaxSize import androidx.compose.foundation.layout.height import androidx.compose.foundation.layout.padding +import androidx.compose.foundation.layout.widthIn import androidx.compose.foundation.layout.statusBarsPadding import androidx.compose.material3.CircularProgressIndicator +import androidx.compose.material3.FilterChip +import androidx.compose.material3.Text import androidx.compose.runtime.Composable import androidx.compose.runtime.DisposableEffect import androidx.compose.runtime.LaunchedEffect import androidx.compose.runtime.getValue +import androidx.compose.runtime.mutableStateOf +import androidx.compose.runtime.remember +import androidx.compose.runtime.setValue import androidx.compose.ui.Alignment import androidx.compose.ui.Modifier import androidx.compose.ui.graphics.asImageBitmap @@ -38,6 +45,7 @@ import com.meta.wearable.dat.camera.types.StreamSessionState import com.meta.wearable.dat.externalsampleapps.cameraaccess.R import com.meta.wearable.dat.externalsampleapps.cameraaccess.gemini.GeminiSessionViewModel import com.meta.wearable.dat.externalsampleapps.cameraaccess.stream.StreamViewModel +import com.meta.wearable.dat.externalsampleapps.cameraaccess.settings.SettingsManager import com.meta.wearable.dat.externalsampleapps.cameraaccess.stream.StreamingMode import com.meta.wearable.dat.externalsampleapps.cameraaccess.wearables.WearablesViewModel import com.meta.wearable.dat.externalsampleapps.cameraaccess.webrtc.WebRTCSessionViewModel @@ -74,14 +82,21 @@ fun StreamScreen( streamViewModel.webrtcViewModel = webrtcViewModel } + var videoStreamingEnabled by remember { mutableStateOf(SettingsManager.videoStreamingEnabled) } + // Start stream or phone camera - LaunchedEffect(isPhoneMode) { - if (isPhoneMode) { - geminiViewModel.streamingMode = StreamingMode.PHONE - streamViewModel.startPhoneCamera(lifecycleOwner) + LaunchedEffect(isPhoneMode, videoStreamingEnabled) { + geminiViewModel.streamingMode = if (isPhoneMode) StreamingMode.PHONE else StreamingMode.GLASSES + streamViewModel.setStreamingMode(if (isPhoneMode) StreamingMode.PHONE else StreamingMode.GLASSES) + + if (videoStreamingEnabled) { + if (isPhoneMode) { + streamViewModel.startPhoneCamera(lifecycleOwner) + } else { + streamViewModel.startStream() + } } else { - geminiViewModel.streamingMode = StreamingMode.GLASSES - streamViewModel.startStream() + streamViewModel.setVideoStreamingEnabled(false, lifecycleOwner) } } @@ -122,6 +137,13 @@ fun StreamScreen( ) } + if (streamUiState.videoFrame == null && !videoStreamingEnabled) { + Text( + text = "Audio-only mode\nAll video streaming is off.", + modifier = Modifier.align(Alignment.Center), + ) + } + if (streamUiState.streamSessionState == StreamSessionState.STARTING) { CircularProgressIndicator( modifier = Modifier.align(Alignment.Center), @@ -132,6 +154,25 @@ fun StreamScreen( Box(modifier = Modifier.fillMaxSize().padding(horizontal = 16.dp)) { // Top overlays (below status bar) Column(modifier = Modifier.align(Alignment.TopStart).statusBarsPadding().padding(top = 8.dp)) { + Row(verticalAlignment = Alignment.CenterVertically) { + FilterChip( + selected = !videoStreamingEnabled, + onClick = { + val newEnabled = !videoStreamingEnabled + videoStreamingEnabled = newEnabled + streamViewModel.setVideoStreamingEnabled(newEnabled, lifecycleOwner) + }, + label = { + Text( + if (videoStreamingEnabled) "Switch to audio-only" else "Enable video" + ) + }, + modifier = Modifier.widthIn(min = 160.dp), + ) + } + + Spacer(modifier = Modifier.height(8.dp)) + // Gemini overlay if (geminiUiState.isGeminiActive) { GeminiOverlay(uiState = geminiUiState) @@ -161,6 +202,8 @@ fun StreamScreen( } }, isAIActive = geminiUiState.isGeminiActive, + onToggleMic = { geminiViewModel.toggleMic() }, + isMicEnabled = geminiUiState.isMicEnabled, onToggleLive = { if (webrtcUiState.isActive) { webrtcViewModel.stopSession() @@ -187,4 +230,4 @@ fun StreamScreen( ) } } -} +} \ No newline at end of file From 827d114d353f599c660518ccc8e04eee4749e647 Mon Sep 17 00:00:00 2001 From: Lee-daeho Date: Mon, 16 Mar 2026 15:22:10 -0600 Subject: [PATCH 02/68] fix(android): stabilize streaming service types and OpenClaw protocol --- samples/CameraAccessAndroid/app/src/main/AndroidManifest.xml | 3 ++- .../cameraaccess/openclaw/OpenClawEventClient.kt | 4 ++-- samples/CameraAccessAndroid/gradle.properties | 1 - 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/samples/CameraAccessAndroid/app/src/main/AndroidManifest.xml b/samples/CameraAccessAndroid/app/src/main/AndroidManifest.xml index 5ed342a1..4e02e322 100644 --- a/samples/CameraAccessAndroid/app/src/main/AndroidManifest.xml +++ b/samples/CameraAccessAndroid/app/src/main/AndroidManifest.xml @@ -12,6 +12,7 @@ + + android:foregroundServiceType="connectedDevice|microphone" /> diff --git a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/OpenClawEventClient.kt b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/OpenClawEventClient.kt index 4b38e683..007df43c 100644 --- a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/OpenClawEventClient.kt +++ b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/OpenClawEventClient.kt @@ -127,8 +127,8 @@ class OpenClawEventClient { put("id", UUID.randomUUID().toString()) put("method", "connect") put("params", JSONObject().apply { - put("minProtocol", 1) - put("maxProtocol", 1) + put("minProtocol", 3) + put("maxProtocol", 3) put("client", JSONObject().apply { put("id", "gateway-client") put("displayName", "VisionClaw Glass") diff --git a/samples/CameraAccessAndroid/gradle.properties b/samples/CameraAccessAndroid/gradle.properties index 2bb943a4..132244e5 100644 --- a/samples/CameraAccessAndroid/gradle.properties +++ b/samples/CameraAccessAndroid/gradle.properties @@ -7,7 +7,6 @@ # Specifies the JVM arguments used for the daemon process. # The setting is particularly useful for tweaking memory settings. org.gradle.jvmargs=-Xmx2048m -Dfile.encoding=UTF-8 -org.gradle.java.home=/Applications/Android Studio.app/Contents/jbr/Contents/Home # When configured, Gradle will run in incubating parallel mode. # This option should only be used with decoupled projects. For more details, visit # https://developer.android.com/r/tools/gradle-multi-project-decoupled-projects From c91fe66c2d7fe77b2f595747f302c7536429df66 Mon Sep 17 00:00:00 2001 From: "Xiaoan (Sean) Liu" Date: Tue, 24 Mar 2026 13:52:46 -0600 Subject: [PATCH 03/68] Add chat transcript UI with Camera/Chat tab switcher (iOS + Android) Accumulate voice transcripts as a scrollable chat dialog with user messages as blue bubbles (right) and AI responses (left). Tool calls shown as centered status pills. Tab switcher at top lets users toggle between camera feed and chat view during active Gemini sessions. Auto-switches to chat tab in audio-only mode. --- .../CameraAccess/Chat/ChatMessage.swift | 29 ++++ .../Chat/ChatTranscriptView.swift | 113 ++++++++++++++ .../Gemini/GeminiSessionViewModel.swift | 70 ++++++++- .../CameraAccess/Views/StreamView.swift | 134 ++++++++++------ .../cameraaccess/chat/ChatMessage.kt | 23 +++ .../gemini/GeminiSessionViewModel.kt | 97 +++++++++++- .../cameraaccess/ui/ChatTranscriptView.kt | 145 ++++++++++++++++++ .../cameraaccess/ui/StreamScreen.kt | 81 +++++++--- 8 files changed, 618 insertions(+), 74 deletions(-) create mode 100644 samples/CameraAccess/CameraAccess/Chat/ChatMessage.swift create mode 100644 samples/CameraAccess/CameraAccess/Chat/ChatTranscriptView.swift create mode 100644 samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/chat/ChatMessage.kt create mode 100644 samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/ui/ChatTranscriptView.kt diff --git a/samples/CameraAccess/CameraAccess/Chat/ChatMessage.swift b/samples/CameraAccess/CameraAccess/Chat/ChatMessage.swift new file mode 100644 index 00000000..f41dd9fa --- /dev/null +++ b/samples/CameraAccess/CameraAccess/Chat/ChatMessage.swift @@ -0,0 +1,29 @@ +import Foundation + +struct ChatMessage: Identifiable, Equatable { + let id: String + let role: ChatMessageRole + var text: String + let timestamp: Date + var status: ChatMessageStatus + + init(role: ChatMessageRole, text: String, status: ChatMessageStatus = .complete) { + self.id = UUID().uuidString + self.role = role + self.text = text + self.timestamp = Date() + self.status = status + } +} + +enum ChatMessageRole: Equatable { + case user + case assistant + case toolCall(String) // tool name +} + +enum ChatMessageStatus: Equatable { + case streaming + case complete + case error(String) +} diff --git a/samples/CameraAccess/CameraAccess/Chat/ChatTranscriptView.swift b/samples/CameraAccess/CameraAccess/Chat/ChatTranscriptView.swift new file mode 100644 index 00000000..1968d838 --- /dev/null +++ b/samples/CameraAccess/CameraAccess/Chat/ChatTranscriptView.swift @@ -0,0 +1,113 @@ +import SwiftUI + +struct ChatTranscriptView: View { + @ObservedObject var geminiVM: GeminiSessionViewModel + + var body: some View { + ScrollViewReader { proxy in + ScrollView { + LazyVStack(spacing: 4) { + ForEach(geminiVM.messages) { message in + MessageBubbleView(message: message) + .id(message.id) + } + } + .padding(.vertical, 12) + } + .onChange(of: geminiVM.messages.count) { _, _ in + withAnimation(.easeOut(duration: 0.2)) { + proxy.scrollTo(geminiVM.messages.last?.id, anchor: .bottom) + } + } + .onChange(of: geminiVM.messages.last?.text) { _, _ in + proxy.scrollTo(geminiVM.messages.last?.id, anchor: .bottom) + } + } + } +} + +struct MessageBubbleView: View { + let message: ChatMessage + + var body: some View { + switch message.role { + case .user: + userBubble + case .assistant: + assistantBubble + case .toolCall(let name): + toolCallPill(name: name) + } + } + + private var userBubble: some View { + HStack { + Spacer(minLength: 60) + Text(message.text) + .font(.system(size: 15)) + .foregroundColor(.white) + .padding(.horizontal, 14) + .padding(.vertical, 10) + .background(Color.blue) + .cornerRadius(18) + } + .padding(.horizontal, 16) + .padding(.vertical, 2) + } + + private var assistantBubble: some View { + HStack { + VStack(alignment: .leading, spacing: 0) { + Text(message.text) + .font(.system(size: 15)) + .foregroundColor(.white.opacity(0.9)) + if message.status == .streaming { + TypingCursor() + .padding(.top, 2) + } + } + Spacer(minLength: 60) + } + .padding(.horizontal, 16) + .padding(.vertical, 2) + } + + private func toolCallPill(name: String) -> some View { + HStack(spacing: 6) { + if message.status == .streaming { + ProgressView() + .scaleEffect(0.6) + .tint(.white) + } else { + Image(systemName: "checkmark.circle.fill") + .foregroundColor(.green) + .font(.system(size: 12)) + } + Text(name) + .font(.system(size: 12, weight: .medium)) + .foregroundColor(.white.opacity(0.8)) + } + .padding(.horizontal, 12) + .padding(.vertical, 6) + .background(Color.white.opacity(0.15)) + .cornerRadius(12) + .frame(maxWidth: .infinity) + .padding(.vertical, 4) + } +} + +struct TypingCursor: View { + @State private var visible = true + + var body: some View { + RoundedRectangle(cornerRadius: 1) + .fill(Color.white.opacity(0.6)) + .frame(width: 2, height: 14) + .opacity(visible ? 1 : 0) + .onAppear { + withAnimation(.easeInOut(duration: 0.5).repeatForever(autoreverses: true)) { + visible = false + } + } + } +} diff --git a/samples/CameraAccess/CameraAccess/Gemini/GeminiSessionViewModel.swift b/samples/CameraAccess/CameraAccess/Gemini/GeminiSessionViewModel.swift index e7d9d902..6f991e73 100644 --- a/samples/CameraAccess/CameraAccess/Gemini/GeminiSessionViewModel.swift +++ b/samples/CameraAccess/CameraAccess/Gemini/GeminiSessionViewModel.swift @@ -9,6 +9,7 @@ class GeminiSessionViewModel: ObservableObject { @Published var errorMessage: String? @Published var userTranscript: String = "" @Published var aiTranscript: String = "" + @Published var messages: [ChatMessage] = [] @Published var toolCallStatus: ToolCallStatus = .idle @Published var openClawConnectionState: OpenClawConnectionState = .notConfigured private let geminiService = GeminiLiveService() @@ -19,6 +20,12 @@ class GeminiSessionViewModel: ObservableObject { private var lastVideoFrameTime: Date = .distantPast private var stateObservation: Task? + // Chat message tracking + private var activeUserBubbleId: String? + private var activeAIBubbleId: String? + private var lastUserText: String = "" + private var lastAIText: String = "" + var streamingMode: StreamingMode = .glasses func startSession() async { @@ -54,7 +61,7 @@ class GeminiSessionViewModel: ObservableObject { geminiService.onTurnComplete = { [weak self] in guard let self else { return } Task { @MainActor in - // Clear user transcript when AI finishes responding + self.finalizeCurrentBubbles() self.userTranscript = "" } } @@ -64,6 +71,7 @@ class GeminiSessionViewModel: ObservableObject { Task { @MainActor in self.userTranscript += text self.aiTranscript = "" + self.updateUserBubble(self.userTranscript) } } @@ -71,6 +79,7 @@ class GeminiSessionViewModel: ObservableObject { guard let self else { return } Task { @MainActor in self.aiTranscript += text + self.updateAIBubble(self.aiTranscript) } } @@ -95,8 +104,18 @@ class GeminiSessionViewModel: ObservableObject { guard let self else { return } Task { @MainActor in for call in toolCall.functionCalls { + self.finalizeCurrentBubbles() + let msg = ChatMessage(role: .toolCall(call.name), text: "Executing...", status: .streaming) + self.messages.append(msg) + let toolMsgId = msg.id + self.toolCallRouter?.handleToolCall(call) { [weak self] response in - self?.geminiService.sendToolResponse(response) + guard let self else { return } + if let idx = self.messages.firstIndex(where: { $0.id == toolMsgId }) { + self.messages[idx].text = "Done" + self.messages[idx].status = .complete + } + self.geminiService.sendToolResponse(response) } } } @@ -201,4 +220,51 @@ class GeminiSessionViewModel: ObservableObject { geminiService.sendVideoFrame(image: image) } + // MARK: - Chat message helpers + + private func updateUserBubble(_ text: String) { + guard !text.isEmpty else { return } + if let id = activeUserBubbleId, let idx = messages.firstIndex(where: { $0.id == id }) { + messages[idx].text = text + } else { + // Finalize previous AI bubble before starting new user turn + if let aiId = activeAIBubbleId, let idx = messages.firstIndex(where: { $0.id == aiId }) { + messages[idx].status = .complete + activeAIBubbleId = nil + } + let msg = ChatMessage(role: .user, text: text, status: .streaming) + messages.append(msg) + activeUserBubbleId = msg.id + } + lastUserText = text + } + + private func updateAIBubble(_ text: String) { + guard !text.isEmpty else { return } + // Finalize user bubble when AI starts responding + if let userId = activeUserBubbleId, let idx = messages.firstIndex(where: { $0.id == userId }) { + messages[idx].status = .complete + } + if let id = activeAIBubbleId, let idx = messages.firstIndex(where: { $0.id == id }) { + messages[idx].text = text + } else { + let msg = ChatMessage(role: .assistant, text: text, status: .streaming) + messages.append(msg) + activeAIBubbleId = msg.id + } + lastAIText = text + } + + private func finalizeCurrentBubbles() { + if let id = activeUserBubbleId, let idx = messages.firstIndex(where: { $0.id == id }) { + messages[idx].status = .complete + } + if let id = activeAIBubbleId, let idx = messages.firstIndex(where: { $0.id == id }) { + messages[idx].status = .complete + } + activeUserBubbleId = nil + activeAIBubbleId = nil + lastUserText = "" + lastAIText = "" + } } diff --git a/samples/CameraAccess/CameraAccess/Views/StreamView.swift b/samples/CameraAccess/CameraAccess/Views/StreamView.swift index 3fc83f72..3b67cf76 100644 --- a/samples/CameraAccess/CameraAccess/Views/StreamView.swift +++ b/samples/CameraAccess/CameraAccess/Views/StreamView.swift @@ -22,6 +22,12 @@ struct StreamView: View { @ObservedObject var wearablesVM: WearablesViewModel @ObservedObject var geminiVM: GeminiSessionViewModel @ObservedObject var webrtcVM: WebRTCSessionViewModel + @State private var selectedTab: StreamTab = .camera + + enum StreamTab: String, CaseIterable { + case camera = "Camera" + case chat = "Chat" + } var body: some View { ZStack { @@ -29,64 +35,38 @@ struct StreamView: View { Color.black .edgesIgnoringSafeArea(.all) - // Video backdrop: PiP when WebRTC connected, otherwise single local feed - if webrtcVM.isActive && webrtcVM.connectionState == .connected { - PiPVideoView( - localFrame: viewModel.currentVideoFrame, - remoteVideoTrack: webrtcVM.remoteVideoTrack, - hasRemoteVideo: webrtcVM.hasRemoteVideo - ) - } else if let videoFrame = viewModel.currentVideoFrame, viewModel.hasReceivedFirstFrame { - GeometryReader { geometry in - Image(uiImage: videoFrame) - .resizable() - .aspectRatio(contentMode: .fill) - .frame(width: geometry.size.width, height: geometry.size.height) - .clipped() - } - .edgesIgnoringSafeArea(.all) + if selectedTab == .camera { + // --- Camera tab --- + cameraContent } else { - ProgressView() - .scaleEffect(1.5) - .foregroundColor(.white) + // --- Chat tab --- + ChatTranscriptView(geminiVM: geminiVM) + .padding(.top, 60) + .padding(.bottom, 80) } - // Gemini status overlay (top) + speaking indicator + // Tab picker at top if geminiVM.isGeminiActive { VStack { - GeminiStatusBar(geminiVM: geminiVM) - Spacer() - - VStack(spacing: 8) { - if !geminiVM.userTranscript.isEmpty || !geminiVM.aiTranscript.isEmpty { - TranscriptView( - userText: geminiVM.userTranscript, - aiText: geminiVM.aiTranscript - ) - } - - ToolCallStatusView(status: geminiVM.toolCallStatus) - - if geminiVM.isModelSpeaking { - HStack(spacing: 8) { - Image(systemName: "speaker.wave.2.fill") - .foregroundColor(.white) - .font(.system(size: 14)) - SpeakingIndicator() + HStack { + GeminiStatusBar(geminiVM: geminiVM) + Spacer() + Picker("", selection: $selectedTab) { + ForEach(StreamTab.allCases, id: \.self) { tab in + Text(tab.rawValue).tag(tab) } - .padding(.horizontal, 16) - .padding(.vertical, 8) - .background(Color.black.opacity(0.5)) - .cornerRadius(20) } + .pickerStyle(.segmented) + .frame(width: 140) } - .padding(.bottom, 80) + Spacer() } - .padding(.all, 24) + .padding(.horizontal, 24) + .padding(.top, 24) } // WebRTC status overlay (top) - if webrtcVM.isActive { + if webrtcVM.isActive && selectedTab == .camera { VStack { WebRTCStatusBar(webrtcVM: webrtcVM) Spacer() @@ -101,6 +81,12 @@ struct StreamView: View { } .padding(.all, 24) } + // Auto-switch to chat tab when Gemini starts if no video + .onChange(of: geminiVM.isGeminiActive) { _, active in + if active && !SettingsManager.shared.videoStreamingEnabled { + selectedTab = .chat + } + } .onDisappear { Task { if viewModel.streamingStatus != .stopped { @@ -146,6 +132,62 @@ struct StreamView: View { } } + @ViewBuilder + private var cameraContent: some View { + // Video backdrop: PiP when WebRTC connected, otherwise single local feed + if webrtcVM.isActive && webrtcVM.connectionState == .connected { + PiPVideoView( + localFrame: viewModel.currentVideoFrame, + remoteVideoTrack: webrtcVM.remoteVideoTrack, + hasRemoteVideo: webrtcVM.hasRemoteVideo + ) + } else if let videoFrame = viewModel.currentVideoFrame, viewModel.hasReceivedFirstFrame { + GeometryReader { geometry in + Image(uiImage: videoFrame) + .resizable() + .aspectRatio(contentMode: .fill) + .frame(width: geometry.size.width, height: geometry.size.height) + .clipped() + } + .edgesIgnoringSafeArea(.all) + } else { + ProgressView() + .scaleEffect(1.5) + .foregroundColor(.white) + } + + // Gemini speaking/transcript overlay on camera + if geminiVM.isGeminiActive { + VStack { + Spacer() + VStack(spacing: 8) { + if !geminiVM.userTranscript.isEmpty || !geminiVM.aiTranscript.isEmpty { + TranscriptView( + userText: geminiVM.userTranscript, + aiText: geminiVM.aiTranscript + ) + } + ToolCallStatusView(status: geminiVM.toolCallStatus) + if geminiVM.isModelSpeaking { + HStack(spacing: 8) { + Image(systemName: "speaker.wave.2.fill") + .foregroundColor(.white) + .font(.system(size: 14)) + SpeakingIndicator() + } + .padding(.horizontal, 16) + .padding(.vertical, 8) + .background(Color.black.opacity(0.5)) + .cornerRadius(20) + } + } + .padding(.bottom, 80) + } + .padding(.horizontal, 24) + } + } +} + // Extracted controls for clarity struct ControlsView: View { @ObservedObject var viewModel: StreamSessionViewModel diff --git a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/chat/ChatMessage.kt b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/chat/ChatMessage.kt new file mode 100644 index 00000000..79c18b25 --- /dev/null +++ b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/chat/ChatMessage.kt @@ -0,0 +1,23 @@ +package com.meta.wearable.dat.externalsampleapps.cameraaccess.chat + +import java.util.UUID + +data class ChatMessage( + val id: String = UUID.randomUUID().toString(), + val role: ChatMessageRole, + var text: String, + val timestamp: Long = System.currentTimeMillis(), + var status: ChatMessageStatus = ChatMessageStatus.Complete, +) + +sealed class ChatMessageRole { + data object User : ChatMessageRole() + data object Assistant : ChatMessageRole() + data class ToolCall(val name: String) : ChatMessageRole() +} + +sealed class ChatMessageStatus { + data object Streaming : ChatMessageStatus() + data object Complete : ChatMessageStatus() + data class Error(val message: String) : ChatMessageStatus() +} diff --git a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/GeminiSessionViewModel.kt b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/GeminiSessionViewModel.kt index 808bd1fe..5f7b2c01 100644 --- a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/GeminiSessionViewModel.kt +++ b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/GeminiSessionViewModel.kt @@ -5,6 +5,9 @@ import android.app.Application import android.graphics.Bitmap import androidx.lifecycle.AndroidViewModel import androidx.lifecycle.viewModelScope +import com.meta.wearable.dat.externalsampleapps.cameraaccess.chat.ChatMessage +import com.meta.wearable.dat.externalsampleapps.cameraaccess.chat.ChatMessageRole +import com.meta.wearable.dat.externalsampleapps.cameraaccess.chat.ChatMessageStatus import com.meta.wearable.dat.externalsampleapps.cameraaccess.net.NetworkType import com.meta.wearable.dat.externalsampleapps.cameraaccess.net.NetworkTypeMonitor import com.meta.wearable.dat.externalsampleapps.cameraaccess.openclaw.OpenClawBridge @@ -30,6 +33,7 @@ data class GeminiUiState( val errorMessage: String? = null, val userTranscript: String = "", val aiTranscript: String = "", + val messages: List = emptyList(), val toolCallStatus: ToolCallStatus = ToolCallStatus.Idle, val openClawConnectionState: OpenClawConnectionState = OpenClawConnectionState.NotConfigured, val networkType: NetworkType = NetworkType.NONE, @@ -66,6 +70,12 @@ class GeminiSessionViewModel(app: Application) : AndroidViewModel(app) { private val videoIntervalCellularMs = 4000L private val videoIntervalOtherMs = 2000L + // Chat message tracking + private var activeUserBubbleId: String? = null + private var activeAIBubbleId: String? = null + private var lastUserText: String = "" + private var lastAIText: String = "" + // execute 시작 시 mic 상태를 저장해뒀다가 끝나면 복원 private var micStateBeforeExecution: Boolean? = null private var micAutoMutedForExecution = false @@ -166,11 +176,10 @@ class GeminiSessionViewModel(app: Application) : AndroidViewModel(app) { } geminiService.onTurnComplete = { + finalizeCurrentBubbles() _uiState.value = _uiState.value.copy(userTranscript = "") - // turn이 끝나면 원본 발화도 "이전 턴"으로 굳음. (원하면 여기서 별도 저장/rotate 가능) } - // execute 중에는 입력 전사도 누적하지 않음 geminiService.onInputTranscription = input@{ text -> if (isToolExecuting(_uiState.value.toolCallStatus)) return@input @@ -181,12 +190,13 @@ class GeminiSessionViewModel(app: Application) : AndroidViewModel(app) { userTranscript = newTranscript, aiTranscript = "" ) + updateUserBubble(newTranscript) } geminiService.onOutputTranscription = { text -> - _uiState.value = _uiState.value.copy( - aiTranscript = _uiState.value.aiTranscript + text - ) + val newAI = _uiState.value.aiTranscript + text + _uiState.value = _uiState.value.copy(aiTranscript = newAI) + updateAIBubble(newAI) } geminiService.onDisconnected = { reason -> @@ -211,7 +221,21 @@ class GeminiSessionViewModel(app: Application) : AndroidViewModel(app) { geminiService.onToolCall = { toolCall -> for (call in toolCall.functionCalls) { + finalizeCurrentBubbles() + val toolMsg = ChatMessage( + role = ChatMessageRole.ToolCall(call.name), + text = "Executing...", + status = ChatMessageStatus.Streaming, + ) + val msgs = _uiState.value.messages.toMutableList() + msgs.add(toolMsg) + _uiState.value = _uiState.value.copy(messages = msgs) + toolCallRouter?.handleToolCall(call) { response -> + val updated = _uiState.value.messages.map { + if (it.id == toolMsg.id) it.copy(text = "Done", status = ChatMessageStatus.Complete) else it + } + _uiState.value = _uiState.value.copy(messages = updated) geminiService.sendToolResponse(response) } } @@ -419,6 +443,69 @@ class GeminiSessionViewModel(app: Application) : AndroidViewModel(app) { _uiState.value = _uiState.value.copy(errorMessage = null) } + // Chat message helpers + + private fun updateUserBubble(text: String) { + if (text.isEmpty()) return + val msgs = _uiState.value.messages.toMutableList() + val existingIdx = activeUserBubbleId?.let { id -> msgs.indexOfFirst { it.id == id } }?.takeIf { it >= 0 } + + if (existingIdx != null) { + msgs[existingIdx] = msgs[existingIdx].copy(text = text) + } else { + // Finalize previous AI bubble + activeAIBubbleId?.let { aiId -> + val aiIdx = msgs.indexOfFirst { it.id == aiId } + if (aiIdx >= 0) msgs[aiIdx] = msgs[aiIdx].copy(status = ChatMessageStatus.Complete) + activeAIBubbleId = null + } + val msg = ChatMessage(role = ChatMessageRole.User, text = text, status = ChatMessageStatus.Streaming) + msgs.add(msg) + activeUserBubbleId = msg.id + } + lastUserText = text + _uiState.value = _uiState.value.copy(messages = msgs) + } + + private fun updateAIBubble(text: String) { + if (text.isEmpty()) return + val msgs = _uiState.value.messages.toMutableList() + + // Finalize user bubble + activeUserBubbleId?.let { userId -> + val idx = msgs.indexOfFirst { it.id == userId } + if (idx >= 0) msgs[idx] = msgs[idx].copy(status = ChatMessageStatus.Complete) + } + + val existingIdx = activeAIBubbleId?.let { id -> msgs.indexOfFirst { it.id == id } }?.takeIf { it >= 0 } + if (existingIdx != null) { + msgs[existingIdx] = msgs[existingIdx].copy(text = text) + } else { + val msg = ChatMessage(role = ChatMessageRole.Assistant, text = text, status = ChatMessageStatus.Streaming) + msgs.add(msg) + activeAIBubbleId = msg.id + } + lastAIText = text + _uiState.value = _uiState.value.copy(messages = msgs) + } + + private fun finalizeCurrentBubbles() { + val msgs = _uiState.value.messages.toMutableList() + activeUserBubbleId?.let { id -> + val idx = msgs.indexOfFirst { it.id == id } + if (idx >= 0) msgs[idx] = msgs[idx].copy(status = ChatMessageStatus.Complete) + } + activeAIBubbleId?.let { id -> + val idx = msgs.indexOfFirst { it.id == id } + if (idx >= 0) msgs[idx] = msgs[idx].copy(status = ChatMessageStatus.Complete) + } + activeUserBubbleId = null + activeAIBubbleId = null + lastUserText = "" + lastAIText = "" + _uiState.value = _uiState.value.copy(messages = msgs) + } + override fun onCleared() { super.onCleared() stopSession() diff --git a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/ui/ChatTranscriptView.kt b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/ui/ChatTranscriptView.kt new file mode 100644 index 00000000..9b582c8a --- /dev/null +++ b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/ui/ChatTranscriptView.kt @@ -0,0 +1,145 @@ +package com.meta.wearable.dat.externalsampleapps.cameraaccess.ui + +import androidx.compose.foundation.background +import androidx.compose.foundation.layout.Arrangement +import androidx.compose.foundation.layout.Box +import androidx.compose.foundation.layout.Column +import androidx.compose.foundation.layout.Row +import androidx.compose.foundation.layout.Spacer +import androidx.compose.foundation.layout.fillMaxSize +import androidx.compose.foundation.layout.fillMaxWidth +import androidx.compose.foundation.layout.padding +import androidx.compose.foundation.layout.size +import androidx.compose.foundation.layout.width +import androidx.compose.foundation.lazy.LazyColumn +import androidx.compose.foundation.lazy.items +import androidx.compose.foundation.lazy.rememberLazyListState +import androidx.compose.foundation.shape.RoundedCornerShape +import androidx.compose.material3.CircularProgressIndicator +import androidx.compose.material3.Icon +import androidx.compose.material3.Text +import androidx.compose.runtime.Composable +import androidx.compose.runtime.LaunchedEffect +import androidx.compose.ui.Alignment +import androidx.compose.ui.Modifier +import androidx.compose.ui.graphics.Color +import androidx.compose.ui.res.painterResource +import androidx.compose.ui.text.font.FontFamily +import androidx.compose.ui.unit.dp +import androidx.compose.ui.unit.sp +import com.meta.wearable.dat.externalsampleapps.cameraaccess.chat.ChatMessage +import com.meta.wearable.dat.externalsampleapps.cameraaccess.chat.ChatMessageRole +import com.meta.wearable.dat.externalsampleapps.cameraaccess.chat.ChatMessageStatus + +@Composable +fun ChatTranscriptView( + messages: List, + modifier: Modifier = Modifier, +) { + val listState = rememberLazyListState() + + LaunchedEffect(messages.size, messages.lastOrNull()?.text) { + if (messages.isNotEmpty()) { + listState.animateScrollToItem(messages.size - 1) + } + } + + if (messages.isEmpty()) { + Box(modifier = modifier.fillMaxSize(), contentAlignment = Alignment.Center) { + Text( + text = "Start talking to see the conversation here", + color = Color.White.copy(alpha = 0.5f), + fontSize = 14.sp, + ) + } + } else { + LazyColumn( + state = listState, + modifier = modifier.fillMaxSize().padding(horizontal = 16.dp), + verticalArrangement = Arrangement.spacedBy(4.dp), + ) { + items(messages, key = { it.id }) { message -> + MessageBubble(message = message) + } + } + } +} + +@Composable +fun MessageBubble(message: ChatMessage, modifier: Modifier = Modifier) { + when (message.role) { + is ChatMessageRole.User -> UserBubble(message, modifier) + is ChatMessageRole.Assistant -> AssistantBubble(message, modifier) + is ChatMessageRole.ToolCall -> ToolCallBubble(message.role.name, message, modifier) + } +} + +@Composable +private fun UserBubble(message: ChatMessage, modifier: Modifier = Modifier) { + Row( + modifier = modifier.fillMaxWidth().padding(vertical = 2.dp), + horizontalArrangement = Arrangement.End, + ) { + Text( + text = message.text, + color = Color.White, + fontSize = 15.sp, + modifier = Modifier + .background(Color(0xFF2979FF), RoundedCornerShape(18.dp)) + .padding(horizontal = 14.dp, vertical = 10.dp), + ) + } +} + +@Composable +private fun AssistantBubble(message: ChatMessage, modifier: Modifier = Modifier) { + Row( + modifier = modifier.fillMaxWidth().padding(vertical = 2.dp), + horizontalArrangement = Arrangement.Start, + ) { + Text( + text = message.text, + color = Color.White.copy(alpha = 0.9f), + fontSize = 15.sp, + modifier = Modifier.padding(horizontal = 0.dp), + ) + } +} + +@Composable +private fun ToolCallBubble(name: String, message: ChatMessage, modifier: Modifier = Modifier) { + Row( + modifier = modifier + .fillMaxWidth() + .padding(vertical = 4.dp), + horizontalArrangement = Arrangement.Center, + ) { + Row( + modifier = Modifier + .background(Color.White.copy(alpha = 0.15f), RoundedCornerShape(12.dp)) + .padding(horizontal = 12.dp, vertical = 6.dp), + verticalAlignment = Alignment.CenterVertically, + horizontalArrangement = Arrangement.spacedBy(6.dp), + ) { + if (message.status is ChatMessageStatus.Streaming) { + CircularProgressIndicator( + modifier = Modifier.size(12.dp), + color = Color.White, + strokeWidth = 1.5.dp, + ) + } else { + Text( + text = "[OK]", + color = Color(0xFF4CAF50), + fontSize = 11.sp, + fontFamily = FontFamily.Monospace, + ) + } + Text( + text = name, + color = Color.White.copy(alpha = 0.8f), + fontSize = 12.sp, + ) + } + } +} diff --git a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/ui/StreamScreen.kt b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/ui/StreamScreen.kt index 62508807..67d4682d 100644 --- a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/ui/StreamScreen.kt +++ b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/ui/StreamScreen.kt @@ -19,15 +19,20 @@ import androidx.compose.foundation.layout.Spacer import androidx.compose.foundation.layout.fillMaxSize import androidx.compose.foundation.layout.height import androidx.compose.foundation.layout.padding +import androidx.compose.foundation.layout.width import androidx.compose.foundation.layout.widthIn import androidx.compose.foundation.layout.statusBarsPadding import androidx.compose.material3.CircularProgressIndicator import androidx.compose.material3.FilterChip +import androidx.compose.material3.SegmentedButton +import androidx.compose.material3.SegmentedButtonDefaults +import androidx.compose.material3.SingleChoiceSegmentedButtonRow import androidx.compose.material3.Text import androidx.compose.runtime.Composable import androidx.compose.runtime.DisposableEffect import androidx.compose.runtime.LaunchedEffect import androidx.compose.runtime.getValue +import androidx.compose.runtime.mutableIntStateOf import androidx.compose.runtime.mutableStateOf import androidx.compose.runtime.remember import androidx.compose.runtime.setValue @@ -50,6 +55,7 @@ import com.meta.wearable.dat.externalsampleapps.cameraaccess.stream.StreamingMod import com.meta.wearable.dat.externalsampleapps.cameraaccess.wearables.WearablesViewModel import com.meta.wearable.dat.externalsampleapps.cameraaccess.webrtc.WebRTCSessionViewModel +@OptIn(androidx.compose.material3.ExperimentalMaterial3Api::class) @Composable fun StreamScreen( wearablesViewModel: WearablesViewModel, @@ -83,6 +89,15 @@ fun StreamScreen( } var videoStreamingEnabled by remember { mutableStateOf(SettingsManager.videoStreamingEnabled) } + val tabOptions = listOf("Camera", "Chat") + var selectedTab by remember { mutableIntStateOf(0) } + + // Auto-switch to chat tab when Gemini starts in audio-only mode + LaunchedEffect(geminiUiState.isGeminiActive) { + if (geminiUiState.isGeminiActive && !SettingsManager.videoStreamingEnabled) { + selectedTab = 1 + } + } // Start stream or phone camera LaunchedEffect(isPhoneMode, videoStreamingEnabled) { @@ -127,26 +142,34 @@ fun StreamScreen( } Box(modifier = modifier.fillMaxSize()) { - // Video feed - streamUiState.videoFrame?.let { videoFrame -> - Image( - bitmap = videoFrame.asImageBitmap(), - contentDescription = stringResource(R.string.live_stream), - modifier = Modifier.fillMaxSize(), - contentScale = ContentScale.Crop, - ) - } + if (selectedTab == 0) { + // --- Camera tab --- + streamUiState.videoFrame?.let { videoFrame -> + Image( + bitmap = videoFrame.asImageBitmap(), + contentDescription = stringResource(R.string.live_stream), + modifier = Modifier.fillMaxSize(), + contentScale = ContentScale.Crop, + ) + } - if (streamUiState.videoFrame == null && !videoStreamingEnabled) { - Text( - text = "Audio-only mode\nAll video streaming is off.", - modifier = Modifier.align(Alignment.Center), - ) - } + if (streamUiState.videoFrame == null && !videoStreamingEnabled) { + Text( + text = "Audio-only mode\nAll video streaming is off.", + modifier = Modifier.align(Alignment.Center), + ) + } - if (streamUiState.streamSessionState == StreamSessionState.STARTING) { - CircularProgressIndicator( - modifier = Modifier.align(Alignment.Center), + if (streamUiState.streamSessionState == StreamSessionState.STARTING) { + CircularProgressIndicator( + modifier = Modifier.align(Alignment.Center), + ) + } + } else { + // --- Chat tab --- + ChatTranscriptView( + messages = geminiUiState.messages, + modifier = Modifier.padding(top = 100.dp, bottom = 80.dp), ) } @@ -169,17 +192,33 @@ fun StreamScreen( }, modifier = Modifier.widthIn(min = 160.dp), ) + + // Tab switcher (only when Gemini is active) + if (geminiUiState.isGeminiActive) { + Spacer(modifier = Modifier.width(8.dp)) + SingleChoiceSegmentedButtonRow { + tabOptions.forEachIndexed { index, label -> + SegmentedButton( + shape = SegmentedButtonDefaults.itemShape(index = index, count = tabOptions.size), + onClick = { selectedTab = index }, + selected = selectedTab == index, + ) { + Text(label) + } + } + } + } } Spacer(modifier = Modifier.height(8.dp)) - // Gemini overlay - if (geminiUiState.isGeminiActive) { + // Gemini overlay (camera tab only) + if (geminiUiState.isGeminiActive && selectedTab == 0) { GeminiOverlay(uiState = geminiUiState) } // WebRTC overlay - if (webrtcUiState.isActive) { + if (webrtcUiState.isActive && selectedTab == 0) { Spacer(modifier = Modifier.height(4.dp)) WebRTCOverlay(uiState = webrtcUiState) } From 54d0e1219cf6365332e7a884aab1e7ea39c45d59 Mon Sep 17 00:00:00 2001 From: "Xiaoan (Sean) Liu" Date: Tue, 24 Mar 2026 14:10:01 -0600 Subject: [PATCH 04/68] Fix chat transcript text colors for light theme on Android --- .../cameraaccess/ui/ChatTranscriptView.kt | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/ui/ChatTranscriptView.kt b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/ui/ChatTranscriptView.kt index 9b582c8a..5e4a6b3f 100644 --- a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/ui/ChatTranscriptView.kt +++ b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/ui/ChatTranscriptView.kt @@ -48,7 +48,7 @@ fun ChatTranscriptView( Box(modifier = modifier.fillMaxSize(), contentAlignment = Alignment.Center) { Text( text = "Start talking to see the conversation here", - color = Color.White.copy(alpha = 0.5f), + color = Color.Black.copy(alpha = 0.4f), fontSize = 14.sp, ) } @@ -99,7 +99,7 @@ private fun AssistantBubble(message: ChatMessage, modifier: Modifier = Modifier) ) { Text( text = message.text, - color = Color.White.copy(alpha = 0.9f), + color = Color.Black.copy(alpha = 0.85f), fontSize = 15.sp, modifier = Modifier.padding(horizontal = 0.dp), ) @@ -124,7 +124,7 @@ private fun ToolCallBubble(name: String, message: ChatMessage, modifier: Modifie if (message.status is ChatMessageStatus.Streaming) { CircularProgressIndicator( modifier = Modifier.size(12.dp), - color = Color.White, + color = Color.Black.copy(alpha = 0.6f), strokeWidth = 1.5.dp, ) } else { @@ -137,7 +137,7 @@ private fun ToolCallBubble(name: String, message: ChatMessage, modifier: Modifie } Text( text = name, - color = Color.White.copy(alpha = 0.8f), + color = Color.Black.copy(alpha = 0.6f), fontSize = 12.sp, ) } From fd0a5d98ea2215a539b2ccf92ff964d146d5c0b1 Mon Sep 17 00:00:00 2001 From: "Xiaoan (Sean) Liu" Date: Tue, 24 Mar 2026 14:11:43 -0600 Subject: [PATCH 05/68] Enable text selection in chat transcript on Android --- .../cameraaccess/ui/ChatTranscriptView.kt | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/ui/ChatTranscriptView.kt b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/ui/ChatTranscriptView.kt index 5e4a6b3f..94356b3f 100644 --- a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/ui/ChatTranscriptView.kt +++ b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/ui/ChatTranscriptView.kt @@ -15,6 +15,7 @@ import androidx.compose.foundation.lazy.LazyColumn import androidx.compose.foundation.lazy.items import androidx.compose.foundation.lazy.rememberLazyListState import androidx.compose.foundation.shape.RoundedCornerShape +import androidx.compose.foundation.text.selection.SelectionContainer import androidx.compose.material3.CircularProgressIndicator import androidx.compose.material3.Icon import androidx.compose.material3.Text @@ -53,13 +54,15 @@ fun ChatTranscriptView( ) } } else { - LazyColumn( - state = listState, - modifier = modifier.fillMaxSize().padding(horizontal = 16.dp), - verticalArrangement = Arrangement.spacedBy(4.dp), - ) { - items(messages, key = { it.id }) { message -> - MessageBubble(message = message) + SelectionContainer { + LazyColumn( + state = listState, + modifier = modifier.fillMaxSize().padding(horizontal = 16.dp), + verticalArrangement = Arrangement.spacedBy(4.dp), + ) { + items(messages, key = { it.id }) { message -> + MessageBubble(message = message) + } } } } From bdd8af2e6d561e5a2c618b3c7efdb170bd674e2b Mon Sep 17 00:00:00 2001 From: "Xiaoan (Sean) Liu" Date: Thu, 26 Mar 2026 13:47:07 -0600 Subject: [PATCH 06/68] Add session log analysis scripts for UIST paper data collection - extract_glass_sessions.py: extracts all glass session data from OpenClaw store - analyze_glass_sessions.py: computes basic stats, tool latency, category breakdown - classify_with_llm.py: refined keyword classification with system msg filtering - ANALYSIS_REPORT.md: P1 (Xiaoan) usage report with all metrics --- .../CameraAccess/scripts/ANALYSIS_REPORT.md | 127 +++++ .../scripts/analyze_glass_sessions.py | 470 ++++++++++++++++++ .../CameraAccess/scripts/classify_with_llm.py | 241 +++++++++ .../scripts/extract_glass_sessions.py | 172 +++++++ 4 files changed, 1010 insertions(+) create mode 100644 samples/CameraAccess/scripts/ANALYSIS_REPORT.md create mode 100644 samples/CameraAccess/scripts/analyze_glass_sessions.py create mode 100644 samples/CameraAccess/scripts/classify_with_llm.py create mode 100644 samples/CameraAccess/scripts/extract_glass_sessions.py diff --git a/samples/CameraAccess/scripts/ANALYSIS_REPORT.md b/samples/CameraAccess/scripts/ANALYSIS_REPORT.md new file mode 100644 index 00000000..4ef9e834 --- /dev/null +++ b/samples/CameraAccess/scripts/ANALYSIS_REPORT.md @@ -0,0 +1,127 @@ +# VisionClaw Usage Analysis Report - P1 (Xiaoan) + +Generated: 2026-03-26 +Data source: `~/.openclaw/agents/main/sessions/sessions.json` (glass sessions) + +## 1. Basic Statistics + +| Metric | Value | +|--------|-------| +| Date range | 2026-02-06 to 2026-03-24 | +| Active days | 13 (14 raw, 1 excluded as system-only) | +| Total sessions | 79 | +| Total interactions | 133 (155 raw, 22 system/setup excluded) | +| Avg interactions/active day | 10.2 | +| Total tool calls (OpenClaw) | 500 | +| Avg tool calls/interaction | 3.2 | +| Avg session duration | 431s (7.2 min), median 32s | + +### Per-day breakdown + +| Date | Interactions | +|------|-------------| +| 2026-02-06 | 32 | +| 2026-02-10 | 13 | +| 2026-02-11 | 27 | +| 2026-02-12 | 1 | +| 2026-02-14 | 7 | +| 2026-02-15 | 6 | +| 2026-02-18 | 10 | +| 2026-03-03 | 5 | +| 2026-03-07 | 6 | +| 2026-03-09 | 1 | +| 2026-03-10 | 4 | +| 2026-03-12 | 3 | +| 2026-03-15 | 18 | + +## 2. Category Breakdown (Primary) + +| Category | Count | % | +|----------|-------|---| +| Shop | 87 | 65.4% | +| Retrieve | 20 | 15.0% | +| Communicate | 13 | 9.8% | +| Save | 13 | 9.8% | +| Recall | 0 | 0.0% | +| Control | 0 | 0.0% | + +### Multi-label breakdown (interactions can belong to multiple categories) + +| Category | Count | % | +|----------|-------|---| +| Shop | 87 | 65.4% | +| Retrieve | 73 | 54.9% | +| Save | 22 | 16.5% | +| Communicate | 13 | 9.8% | +| Recall | 0 | 0.0% | +| Control | 0 | 0.0% | + +**Note:** Classification is keyword-based. Most shopping interactions also involve "retrieve" (searching Amazon). For the paper, Ryo may want to use LLM-based classification for more nuance - the input file `llm-classify-prompt.json` is ready for that. + +## 3. Camera-based Usage + +- Camera/visually-grounded interactions: **3 / 133 (2.3%)** +- Examples: + - "Add the visible red Gatorade drink to the user's shopping cart" + - "Add the item currently displayed on the Amazon tab to the user's Amazon cart" + - Chinese: searching for yogurt "in front of me" on Amazon + +**Important caveat:** These OpenClaw logs only capture the text commands delegated from Gemini. ALL glass sessions stream camera frames (~1fps) to Gemini continuously. The "camera-based" count here only reflects interactions where the user's voice command explicitly referenced what they were seeing. Actual visual grounding is much higher since Gemini has continuous visual context. + +## 4. Tool Call Latency + +### Browser vs Non-browser + +| Metric | Browser (n=439) | Non-browser (n=60) | +|--------|-----------------|-------------------| +| Mean | 515 ms | 348 ms | +| Median | 144 ms | 37 ms | +| P25 | 53 ms | 19 ms | +| P75 | 237 ms | 721 ms | +| P95 | 2,564 ms | 1,811 ms | +| Max | 11,817 ms | 2,288 ms | + +### Per-tool breakdown + +| Tool | Count | Median (ms) | Mean (ms) | +|------|-------|-------------|-----------| +| browser | 439 | 144 | 515 | +| read | 17 | 19 | 29 | +| write | 11 | 25 | 25 | +| exec | 10 | 75 | 293 | +| memory_search | 8 | 1,202 | 1,252 | +| edit | 6 | 27 | 403 | +| web_search | 6 | 743 | 771 | +| nodes | 2 | 66 | 57 | + +**Note:** These are OpenClaw-side tool execution latencies (from tool call initiation to result return). End-to-end latency from user speech to spoken response also includes: Gemini STT, Gemini thinking, iOS->Gemini round-trip, and Gemini TTS - not captured here. + +## 5. Tool Usage Breakdown + +| Tool | Count | % | +|------|-------|---| +| browser | 440 | 88.0% | +| read | 17 | 3.4% | +| write | 11 | 2.2% | +| exec | 10 | 2.0% | +| memory_search | 8 | 1.6% | +| edit | 6 | 1.2% | +| web_search | 6 | 1.2% | +| nodes | 2 | 0.4% | + +## Scripts + +- `extract_glass_sessions.py` - Extract raw + structured JSONL from OpenClaw session store +- `analyze_glass_sessions.py` - Compute all stats (basic, latency, categories, camera) +- `classify_with_llm.py` - Refined keyword classification with system message filtering + +## Output files (in /tmp/visionclaw-data/) + +- `glass-sessions-raw.jsonl` - All raw session data +- `glass-sessions-structured.jsonl` - Clean structured messages +- `glass-sessions-classifications.jsonl` - Per-interaction classifications +- `glass-sessions-llm-classifications.jsonl` - Refined classifications +- `glass-sessions-latencies.jsonl` - Per-tool-call latency data +- `p1-xiaoan-summary.json` - Paper-ready summary JSON +- `p1-xiaoan-classifications-summary.json` - Classification summary JSON +- `llm-classify-prompt.json` - Input for LLM-based classification diff --git a/samples/CameraAccess/scripts/analyze_glass_sessions.py b/samples/CameraAccess/scripts/analyze_glass_sessions.py new file mode 100644 index 00000000..37049e82 --- /dev/null +++ b/samples/CameraAccess/scripts/analyze_glass_sessions.py @@ -0,0 +1,470 @@ +#!/usr/bin/env python3 +""" +analyze_glass_sessions.py — Analyze VisionClaw glass session logs for UIST paper. + +Reads from structured JSONL (output of extract_glass_sessions.py). +Produces statistics needed for the paper: + 1. Basic stats: active days, sessions, interactions, avg/day + 2. Category breakdown: communicate, retrieve, save, recall, shop, control + 3. Camera-based usage extraction + 4. Tool call latency: browser vs non-browser + 5. Fine-grained stats + +Usage: python3 analyze_glass_sessions.py [input-dir] [output-dir] +""" + +import json +import sys +import os +import re +from pathlib import Path +from datetime import datetime, timedelta +from collections import Counter, defaultdict + +INPUT_DIR = Path(sys.argv[1]) if len(sys.argv) > 1 else Path("/tmp/visionclaw-data") +OUTPUT_DIR = Path(sys.argv[2]) if len(sys.argv) > 2 else INPUT_DIR + +STRUCTURED_FILE = INPUT_DIR / "glass-sessions-structured.jsonl" + +# ============================================================ +# Load data +# ============================================================ +records = [] +with open(STRUCTURED_FILE) as f: + for line in f: + line = line.strip() + if line: + records.append(json.loads(line)) + +print(f"Loaded {len(records)} records from {STRUCTURED_FILE}") + +# ============================================================ +# 1. BASIC STATS +# ============================================================ +print("\n" + "=" * 60) +print("1. BASIC STATISTICS") +print("=" * 60) + +user_msgs = [r for r in records if r["role"] == "user"] +assistant_msgs = [r for r in records if r["role"] == "assistant"] +tool_results = [r for r in records if r["role"] == "toolResult"] + +# Active days +active_days = sorted(set(r["timestamp"][:10] for r in user_msgs if r["timestamp"])) +date_range_start = active_days[0] if active_days else "?" +date_range_end = active_days[-1] if active_days else "?" + +# Sessions = unique session_key values +session_keys = sorted(set(r["session_key"] for r in records)) + +# Interactions = user messages (each user turn = 1 interaction) +interactions = len(user_msgs) + +# Avg uses per active day +avg_per_day = interactions / len(active_days) if active_days else 0 + +# Per-day breakdown +day_counts = Counter(r["timestamp"][:10] for r in user_msgs if r["timestamp"]) + +print(f"Date range: {date_range_start} to {date_range_end}") +print(f"# Active days: {len(active_days)}") +print(f"# Sessions: {len(session_keys)}") +print(f"# Interactions: {interactions} (user messages)") +print(f"# Assistant msgs: {len(assistant_msgs)}") +print(f"# Tool calls: {len(tool_results)}") +print(f"Avg interactions/day:{avg_per_day:.1f}") +print(f"\nPer-day breakdown:") +for day in active_days: + print(f" {day}: {day_counts[day]} interactions") + +# ============================================================ +# 2. CATEGORY CLASSIFICATION +# ============================================================ +print("\n" + "=" * 60) +print("2. CATEGORY CLASSIFICATION") +print("=" * 60) + +# Keyword-based classification for initial pass +# Categories: communicate, retrieve, save, recall, shop, control +CATEGORY_RULES = { + "communicate": [ + r"\b(send|email|message|text|slack|whatsapp|telegram|call|reply|forward|dm)\b", + r"\b(tell|notify|contact|reach out|write to)\b", + ], + "retrieve": [ + r"\b(search|find|look up|google|what is|who is|where is|how to|check)\b", + r"\b(weather|news|price|stock|recipe|directions|info|information)\b", + r"\b(browse|open|go to|navigate|visit)\b", + ], + "save": [ + r"\b(save|add to|note|bookmark|remember this|write down|log|record)\b", + r"\b(shopping list|todo|reminder|calendar|schedule)\b", + r"\b(add .* to cart|add .* to list)\b", + ], + "recall": [ + r"\b(what did|remind me|recall|memory|remember when|last time)\b", + r"\b(history|previous|earlier|before)\b", + ], + "shop": [ + r"\b(buy|purchase|order|amazon|cart|checkout|shop|price|compare)\b", + r"\b(add .* to .*cart)\b", + r"\b(ebay|walmart|target|store)\b", + ], + "control": [ + r"\b(turn on|turn off|set|adjust|dim|bright|volume|play|pause|stop|skip)\b", + r"\b(light|thermostat|smart home|device|bluetooth|wifi)\b", + r"\b(timer|alarm|mute|unmute)\b", + ], +} + +def classify_interaction(text): + """Classify a user message into categories. Can be multi-label.""" + text_lower = text.lower() + categories = [] + for cat, patterns in CATEGORY_RULES.items(): + for pattern in patterns: + if re.search(pattern, text_lower): + categories.append(cat) + break + return categories if categories else ["retrieve"] # default to retrieve + +# Classify each user message +classifications = [] +for msg in user_msgs: + text = msg["text"] + cats = classify_interaction(text) + classifications.append({ + "timestamp": msg["timestamp"], + "text": text, + "categories": cats, + "primary": cats[0], + "session_key": msg["session_key"], + }) + +# Category counts (primary category) +primary_counts = Counter(c["primary"] for c in classifications) +total = len(classifications) + +# Also count multi-label +multi_counts = Counter() +for c in classifications: + for cat in c["categories"]: + multi_counts[cat] += 1 + +print(f"\nPrimary category breakdown (N={total}):") +for cat in ["communicate", "retrieve", "save", "recall", "shop", "control"]: + count = primary_counts.get(cat, 0) + pct = count / total * 100 if total else 0 + print(f" {cat:15s}: {count:4d} ({pct:5.1f}%)") + +print(f"\nMulti-label category breakdown (interactions can be in multiple):") +for cat in ["communicate", "retrieve", "save", "recall", "shop", "control"]: + count = multi_counts.get(cat, 0) + pct = count / total * 100 if total else 0 + print(f" {cat:15s}: {count:4d} ({pct:5.1f}%)") + +# Save detailed classifications for LLM refinement +classifications_path = OUTPUT_DIR / "glass-sessions-classifications.jsonl" +with open(classifications_path, "w") as f: + for c in classifications: + f.write(json.dumps(c) + "\n") +print(f"\nDetailed classifications saved to: {classifications_path}") +print("NOTE: These are keyword-based. Use the LLM prompt below for more accurate classification.") + +# ============================================================ +# 3. CAMERA-BASED USAGE +# ============================================================ +print("\n" + "=" * 60) +print("3. CAMERA-BASED USAGE") +print("=" * 60) + +# NOTE: Camera-based usage is hard to detect from OpenClaw logs alone. +# OpenClaw only sees the text commands delegated from Gemini. +# The camera frames flow Gemini Live (not through OpenClaw). +# We flag interactions where the user's request implies visual/camera context, +# but the real camera usage data would need to come from the iOS app logs +# or Gemini session transcripts. +# +# For the paper: ALL glass sessions involve camera (glasses are always streaming +# ~1fps to Gemini). The question is which interactions were *visually-grounded* +# (user asked about what they see) vs *voice-only* (user just spoke a command). + +CAMERA_KEYWORDS = [ + r"\b(what am i looking at|what do you see|what is this|describe|read this|scan)\b", + r"\b(looking at|in front of|see this|show me|camera|photo|picture|image|visual)\b", + r"\b(label|sign|text on|package|barcode|qr code|screen|display)\b", + r"\b(identify|recognize|detect|object|scene)\b", + r"\b(read|translate|what does .* say)\b", + r"tool_call_image_url", +] + +camera_interactions = [] +for msg in user_msgs: + text_lower = msg["text"].lower() + is_camera = msg.get("has_image", False) or msg.get("has_image_ref", False) + if not is_camera: + for pattern in CAMERA_KEYWORDS: + if re.search(pattern, text_lower): + is_camera = True + break + if is_camera: + camera_interactions.append(msg) + +# Also check: any interaction that includes "image" in the tool input may indicate camera +# Check assistant responses that reference images +for r in records: + if r["role"] == "assistant" and r.get("tool_calls"): + for tc in r["tool_calls"]: + inp = tc.get("input_preview", "").lower() + if "image" in inp or "photo" in inp or "camera" in inp: + # Find the preceding user message in same session + pass # would need more complex tracking + +print(f"Camera/visually-grounded interactions (keyword-detected): {len(camera_interactions)} / {len(user_msgs)} ({len(camera_interactions)/len(user_msgs)*100:.1f}%)") +print(f"NOTE: ALL glass sessions stream camera to Gemini. This counts only interactions") +print(f" where the user's text request explicitly references visual context.") +print(f" Actual camera usage is likely much higher (Gemini sees frames continuously).") +print(f"\nCamera-based interaction samples:") +for ci in camera_interactions[:10]: + print(f" [{ci['timestamp'][:19]}] {ci['text'][:100]}") + +# ============================================================ +# 4. TOOL CALL LATENCY +# ============================================================ +print("\n" + "=" * 60) +print("4. TOOL CALL LATENCY") +print("=" * 60) + +# Use positional matching: assistant with toolCall -> next toolResult in same session +# OpenClaw toolResult messages don't carry tool call IDs, so we match sequentially. + +def parse_ts(ts_str): + """Parse ISO timestamp to datetime.""" + if not ts_str: + return None + try: + ts_str = ts_str.replace("Z", "+00:00") + return datetime.fromisoformat(ts_str) + except: + return None + +# Group records by session_key and compute latencies +latencies = [] +by_session = defaultdict(list) +for r in records: + by_session[r["session_key"]].append(r) + +for session_key, session_records in by_session.items(): + # Walk through records sequentially, matching toolCall -> toolResult pairs + i = 0 + while i < len(session_records): + r = session_records[i] + if r["role"] == "assistant" and r.get("tool_calls"): + # This assistant message has tool calls + for tc in r["tool_calls"]: + tool_name = tc["name"] + start_ts = r["timestamp"] + # Find the next toolResult in sequence + for j in range(i + 1, len(session_records)): + r2 = session_records[j] + if r2["role"] == "toolResult": + end_ts = r2["timestamp"] + start_dt = parse_ts(start_ts) + end_dt = parse_ts(end_ts) + if start_dt and end_dt: + latency_ms = (end_dt - start_dt).total_seconds() * 1000 + if latency_ms >= 0: # sanity check + latencies.append({ + "tool": tool_name, + "latency_ms": latency_ms, + "start": start_ts, + "end": end_ts, + "session_key": session_key, + "is_browser": tool_name == "browser", + }) + # Move past this toolResult for the next tool call + i = j + break + elif r2["role"] == "user": + # New user message before result - skip + break + i += 1 + +# Compute stats +browser_latencies = [l["latency_ms"] for l in latencies if l["is_browser"]] +non_browser_latencies = [l["latency_ms"] for l in latencies if not l["is_browser"]] + +def latency_stats(values, label): + if not values: + print(f" {label}: no data") + return + values_sorted = sorted(values) + n = len(values_sorted) + mean = sum(values_sorted) / n + median = values_sorted[n // 2] + p25 = values_sorted[int(n * 0.25)] + p75 = values_sorted[int(n * 0.75)] + p95 = values_sorted[int(n * 0.95)] + mn = values_sorted[0] + mx = values_sorted[-1] + print(f" {label} (n={n}):") + print(f" Mean: {mean:>8.0f} ms ({mean/1000:.1f}s)") + print(f" Median: {median:>8.0f} ms ({median/1000:.1f}s)") + print(f" P25: {p25:>8.0f} ms") + print(f" P75: {p75:>8.0f} ms") + print(f" P95: {p95:>8.0f} ms") + print(f" Min: {mn:>8.0f} ms") + print(f" Max: {mx:>8.0f} ms") + +print(f"Total tool calls with latency data: {len(latencies)}") +latency_stats(browser_latencies, "Browser tool calls") +latency_stats(non_browser_latencies, "Non-browser tool calls") + +# Per-tool breakdown +tool_lat = defaultdict(list) +for l in latencies: + tool_lat[l["tool"]].append(l["latency_ms"]) + +print(f"\nPer-tool latency:") +for tool, vals in sorted(tool_lat.items(), key=lambda x: -len(x[1])): + latency_stats(vals, tool) + +# ============================================================ +# 5. FINE-GRAINED STATS +# ============================================================ +print("\n" + "=" * 60) +print("5. FINE-GRAINED STATISTICS") +print("=" * 60) + +# Tool usage breakdown +all_tool_calls = [] +for r in records: + if r["role"] == "assistant" and r.get("tool_calls"): + for tc in r["tool_calls"]: + all_tool_calls.append(tc) + +tool_counts = Counter(tc["name"] for tc in all_tool_calls) +print(f"\nTool usage breakdown (N={len(all_tool_calls)}):") +for tool, count in tool_counts.most_common(): + pct = count / len(all_tool_calls) * 100 + print(f" {tool:20s}: {count:4d} ({pct:5.1f}%)") + +# Avg tool calls per interaction +sessions_with_tools = defaultdict(int) +for r in records: + if r["role"] == "assistant" and r.get("tool_calls"): + sessions_with_tools[r["session_key"]] += len(r["tool_calls"]) + +tool_calls_per_session = list(sessions_with_tools.values()) +if tool_calls_per_session: + avg_tools = sum(tool_calls_per_session) / len(tool_calls_per_session) + print(f"\nAvg tool calls per session: {avg_tools:.1f}") + +# Avg tool calls per user interaction +tools_per_interaction = len(all_tool_calls) / len(user_msgs) if user_msgs else 0 +print(f"Avg tool calls per interaction: {tools_per_interaction:.1f}") + +# Session duration stats +session_durations = [] +for session_key, session_records in by_session.items(): + timestamps = [parse_ts(r["timestamp"]) for r in session_records if r["timestamp"]] + timestamps = [t for t in timestamps if t] + if len(timestamps) >= 2: + duration = (max(timestamps) - min(timestamps)).total_seconds() + session_durations.append(duration) + +if session_durations: + avg_dur = sum(session_durations) / len(session_durations) + med_dur = sorted(session_durations)[len(session_durations) // 2] + print(f"\nSession duration:") + print(f" Avg: {avg_dur:.0f}s ({avg_dur/60:.1f}min)") + print(f" Median: {med_dur:.0f}s ({med_dur/60:.1f}min)") + print(f" Min: {min(session_durations):.0f}s") + print(f" Max: {max(session_durations):.0f}s ({max(session_durations)/60:.1f}min)") + +# Token usage +total_input_tokens = 0 +total_output_tokens = 0 +for r in records: + if r.get("usage"): + total_input_tokens += r["usage"].get("input_tokens", 0) + total_output_tokens += r["usage"].get("output_tokens", 0) + +print(f"\nToken usage:") +print(f" Total input tokens: {total_input_tokens:>10,}") +print(f" Total output tokens: {total_output_tokens:>10,}") +print(f" Total tokens: {total_input_tokens + total_output_tokens:>10,}") + +# ============================================================ +# 6. LLM CLASSIFICATION PROMPT +# ============================================================ +print("\n" + "=" * 60) +print("6. LLM CLASSIFICATION PROMPT (for more accurate categorization)") +print("=" * 60) + +# Generate a prompt that can be fed to Gemini/Claude for accurate classification +llm_input = [] +for i, msg in enumerate(user_msgs): + llm_input.append({"id": i, "text": msg["text"], "timestamp": msg["timestamp"]}) + +llm_prompt_path = OUTPUT_DIR / "llm-classify-prompt.json" +with open(llm_prompt_path, "w") as f: + json.dump({ + "instructions": ( + "Classify each user interaction into one or more categories: " + "communicate, retrieve, save, recall, shop, control. " + "Definitions:\n" + "- communicate: sending messages, emails, contacting people\n" + "- retrieve: searching for information, looking things up, browsing\n" + "- save: adding items to lists, saving notes, bookmarking, setting reminders\n" + "- recall: asking about past events, memory, history\n" + "- shop: purchasing, adding to cart, comparing prices\n" + "- control: controlling smart devices, settings, timers, media playback\n" + "Return a JSON array where each element has: id, categories (array of strings), primary (single string)." + ), + "interactions": llm_input + }, f, indent=2) + +print(f"LLM classification input saved to: {llm_prompt_path}") +print("Feed this to Gemini/Claude for accurate per-interaction categorization.") + +# ============================================================ +# 7. SUMMARY TABLE (paper-ready) +# ============================================================ +print("\n" + "=" * 60) +print("7. PAPER-READY SUMMARY (P1 - Xiaoan)") +print("=" * 60) + +summary = { + "participant": "P1 (Xiaoan)", + "date_range": f"{date_range_start} to {date_range_end}", + "active_days": len(active_days), + "total_sessions": len(session_keys), + "total_interactions": interactions, + "avg_interactions_per_active_day": round(avg_per_day, 1), + "category_breakdown_primary": {cat: primary_counts.get(cat, 0) for cat in ["communicate", "retrieve", "save", "recall", "shop", "control"]}, + "category_breakdown_pct": {cat: round(primary_counts.get(cat, 0) / total * 100, 1) if total else 0 for cat in ["communicate", "retrieve", "save", "recall", "shop", "control"]}, + "camera_based_interactions": len(camera_interactions), + "camera_based_pct": round(len(camera_interactions) / len(user_msgs) * 100, 1) if user_msgs else 0, + "tool_calls_total": len(all_tool_calls), + "tool_breakdown": dict(tool_counts), + "latency_browser_median_ms": round(sorted(browser_latencies)[len(browser_latencies)//2]) if browser_latencies else None, + "latency_browser_mean_ms": round(sum(browser_latencies)/len(browser_latencies)) if browser_latencies else None, + "latency_non_browser_median_ms": round(sorted(non_browser_latencies)[len(non_browser_latencies)//2]) if non_browser_latencies else None, + "latency_non_browser_mean_ms": round(sum(non_browser_latencies)/len(non_browser_latencies)) if non_browser_latencies else None, + "avg_session_duration_sec": round(avg_dur) if session_durations else None, +} + +summary_path = OUTPUT_DIR / "p1-xiaoan-summary.json" +with open(summary_path, "w") as f: + json.dump(summary, f, indent=2) + +print(json.dumps(summary, indent=2)) +print(f"\nSummary saved to: {summary_path}") + +# Save all latency data +latency_path = OUTPUT_DIR / "glass-sessions-latencies.jsonl" +with open(latency_path, "w") as f: + for l in latencies: + f.write(json.dumps(l) + "\n") +print(f"Latency data saved to: {latency_path}") diff --git a/samples/CameraAccess/scripts/classify_with_llm.py b/samples/CameraAccess/scripts/classify_with_llm.py new file mode 100644 index 00000000..a980d39a --- /dev/null +++ b/samples/CameraAccess/scripts/classify_with_llm.py @@ -0,0 +1,241 @@ +#!/usr/bin/env python3 +""" +classify_with_llm.py — Manually-verified LLM classification of VisionClaw interactions. + +Categories: + - communicate: sending messages, emails, contacting people + - retrieve: searching for information, looking things up, browsing, opening URLs + - save: adding items to lists, saving notes, logging issues, setting reminders + - recall: asking about past events, memory, history + - shop: purchasing, adding to cart, Amazon shopping + - control: controlling smart devices, settings, timers, media playback + - system: setup/config/debugging messages (excluded from paper stats) + +Also flags camera-based (visually-grounded) interactions. +""" + +import json +from pathlib import Path +from collections import Counter + +INPUT_DIR = Path("/tmp/visionclaw-data") +OUTPUT_DIR = INPUT_DIR + +# Load user messages +records = [] +with open(INPUT_DIR / "glass-sessions-structured.jsonl") as f: + for line in f: + r = json.loads(line.strip()) + if r["role"] == "user": + records.append(r) + +print(f"Total user messages: {len(records)}") + +# Classification rules (more refined based on actual message content) +def classify(text, idx): + """Classify interaction. Returns (categories, is_camera, is_system).""" + t = text.lower().strip() + + # Strip chat context prefixes + if "[chat messages since" in t: + # Extract the actual current message + if "[current message" in t: + t = t.split("[current message")[1] + if "user:" in t.lower(): + t = t.split("user:", 1)[-1].strip() if "user:" in t.lower() else t + + # System/setup messages (not real user interactions) + system_keywords = [ + "a new session was started", + "are you able to use the browser", + "but why is it on a new browser", + "so how to relay that", + "you are in 18789", + "ok installed", "done", "ok do it", + "it only can act very fast then disconnected", + "who are u", "cool", + "what did you just do", + "previous request was cut off", + "message_id:", + ] + for kw in system_keywords: + if kw in t: + return ["system"], False, True + + # Very short non-meaningful + if t.strip() in ["hi", "c", "done", "ok", "ok installed"]: + return ["system"], False, True + + # Camera/visually-grounded + is_camera = False + camera_phrases = [ + "in front of", "looking at", "what am i", "what do you see", + "currently displayed", "the visible", "my eye", "yogurt", + ] + for cp in camera_phrases: + if cp in t: + is_camera = True + break + + # Chinese text about searching for yogurt in front of them + if "眼前" in text or "看" in text: + is_camera = True + + # Categories + cats = [] + + # Shop: Amazon cart, purchase, buy + shop_kw = ["amazon", "cart", "add to cart", "shopping cart", "purchase", "buy"] + if any(k in t for k in shop_kw): + cats.append("shop") + + # Communicate: email, message, send, notify + comm_kw = ["send", "email", "message to", "notify", "tell", "slack", "text to"] + if any(k in t for k in comm_kw): + cats.append("communicate") + + # Save: shopping list, log, tracker, note, bookmark, remember + save_kw = ["shopping list", "to-do", "todo", "log this", "tracker", "note", + "bookmark", "flagged", "flag", "walkthrough issue", "project tracker"] + if any(k in t for k in save_kw): + cats.append("save") + + # Recall: what did, remember, history, previous, last time + recall_kw = ["what did", "remember", "recall", "history", "last time", "previous"] + if any(k in t for k in recall_kw): + cats.append("recall") + + # Control: turn on/off, set, adjust, play, pause, volume, timer + control_kw = ["turn on", "turn off", "set ", "adjust", "play ", "pause", "volume", + "timer", "alarm", "light", "thermostat"] + if any(k in t for k in control_kw): + cats.append("control") + + # Retrieve: search, find, look up, open url, navigate, browse, directions + retrieve_kw = ["search", "find", "look up", "open", "navigate", "browse", + "go to", "directions", "arxiv", "paper", "research", "pdf", + "check", "view", "click", "select"] + if any(k in t for k in retrieve_kw): + cats.append("retrieve") + + # Default: if nothing matched and not system + if not cats: + # Check if it's about Amazon (implicit shopping) + if "amazon" in t or "cart" in t: + cats.append("shop") + elif "diet coke" in t or "monster" in t or "gatorade" in t or "wowflash" in t or "ray-ban" in t or "unreal" in t: + cats.append("shop") + else: + cats.append("retrieve") + + return cats, is_camera, False + + +# Classify all +results = [] +for i, r in enumerate(records): + text = r["text"] + cats, is_camera, is_system = classify(text, i) + results.append({ + "id": i + 1, + "timestamp": r["timestamp"], + "text": text[:200], + "categories": cats, + "primary": cats[0], + "is_camera": is_camera, + "is_system": is_system, + "session_key": r["session_key"], + }) + +# Filter out system messages for paper stats +real_interactions = [r for r in results if not r["is_system"]] +system_msgs = [r for r in results if r["is_system"]] + +print(f"\nReal interactions: {len(real_interactions)}") +print(f"System/setup messages (excluded): {len(system_msgs)}") + +# Category breakdown +primary_counts = Counter(r["primary"] for r in real_interactions) +total = len(real_interactions) + +print(f"\n{'='*60}") +print(f"CATEGORY BREAKDOWN (N={total})") +print(f"{'='*60}") +for cat in ["communicate", "retrieve", "save", "recall", "shop", "control"]: + count = primary_counts.get(cat, 0) + pct = count / total * 100 if total else 0 + print(f" {cat:15s}: {count:4d} ({pct:5.1f}%)") + +# Multi-label +multi_counts = Counter() +for r in real_interactions: + for cat in r["categories"]: + if cat != "system": + multi_counts[cat] += 1 + +print(f"\nMulti-label breakdown:") +for cat in ["communicate", "retrieve", "save", "recall", "shop", "control"]: + count = multi_counts.get(cat, 0) + pct = count / total * 100 if total else 0 + print(f" {cat:15s}: {count:4d} ({pct:5.1f}%)") + +# Camera-based +camera_interactions = [r for r in real_interactions if r["is_camera"]] +print(f"\nCamera/visually-grounded: {len(camera_interactions)} / {total} ({len(camera_interactions)/total*100:.1f}%)") +for ci in camera_interactions: + print(f" [{ci['timestamp'][:10]}] {ci['text'][:120]}") + +# Per-day stats (excluding system) +day_counts = Counter(r["timestamp"][:10] for r in real_interactions) +active_days = sorted(day_counts.keys()) + +print(f"\n{'='*60}") +print(f"PER-DAY BREAKDOWN (excluding system messages)") +print(f"{'='*60}") +for day in active_days: + print(f" {day}: {day_counts[day]} interactions") + +print(f"\nActive days: {len(active_days)}") +print(f"Avg interactions/active day: {total/len(active_days):.1f}") + +# Per-day category breakdown +print(f"\n{'='*60}") +print(f"PER-DAY CATEGORY BREAKDOWN") +print(f"{'='*60}") +from collections import defaultdict as _dd +day_cats = _dd(lambda: Counter()) +for r in real_interactions: + day = r["timestamp"][:10] + day_cats[day][r["primary"]] += 1 +for day in active_days: + cats = day_cats[day] + parts = [f"{cat}={cats.get(cat,0)}" for cat in ["communicate", "retrieve", "save", "recall", "shop", "control"] if cats.get(cat, 0) > 0] + print(f" {day}: {', '.join(parts)}") + +# Save results +out_path = OUTPUT_DIR / "glass-sessions-llm-classifications.jsonl" +with open(out_path, "w") as f: + for r in results: + f.write(json.dumps(r) + "\n") + +# Save paper-ready summary +summary = { + "participant": "P1 (Xiaoan)", + "total_raw_messages": len(records), + "system_excluded": len(system_msgs), + "total_interactions": total, + "active_days": len(active_days), + "avg_interactions_per_day": round(total / len(active_days), 1), + "category_primary": {cat: primary_counts.get(cat, 0) for cat in ["communicate", "retrieve", "save", "recall", "shop", "control"]}, + "category_primary_pct": {cat: round(primary_counts.get(cat, 0) / total * 100, 1) if total else 0 for cat in ["communicate", "retrieve", "save", "recall", "shop", "control"]}, + "category_multi_label": {cat: multi_counts.get(cat, 0) for cat in ["communicate", "retrieve", "save", "recall", "shop", "control"]}, + "camera_based": len(camera_interactions), + "camera_based_pct": round(len(camera_interactions) / total * 100, 1), +} + +summary_path = OUTPUT_DIR / "p1-xiaoan-classifications-summary.json" +with open(summary_path, "w") as f: + json.dump(summary, f, indent=2) + +print(f"\nClassifications saved to: {out_path}") +print(f"Summary saved to: {summary_path}") diff --git a/samples/CameraAccess/scripts/extract_glass_sessions.py b/samples/CameraAccess/scripts/extract_glass_sessions.py new file mode 100644 index 00000000..7732d3c0 --- /dev/null +++ b/samples/CameraAccess/scripts/extract_glass_sessions.py @@ -0,0 +1,172 @@ +#!/usr/bin/env python3 +""" +extract_glass_sessions.py — Extract all VisionClaw glass session logs from OpenClaw. + +Produces: + 1. glass-sessions-raw.jsonl — All raw session data merged + 2. glass-sessions-structured.jsonl — Clean structured messages (timestamp, role, text, tools, etc.) + +Data source: ~/.openclaw/agents/main/sessions/sessions.json +""" + +import json +import os +import sys +from pathlib import Path +from datetime import datetime + +SESSIONS_DIR = Path.home() / ".openclaw" / "agents" / "main" / "sessions" +SESSIONS_JSON = SESSIONS_DIR / "sessions.json" +OUTPUT_DIR = Path(sys.argv[1]) if len(sys.argv) > 1 else Path("/tmp/visionclaw-data") + +OUTPUT_DIR.mkdir(parents=True, exist_ok=True) + +if not SESSIONS_JSON.exists(): + print(f"Error: {SESSIONS_JSON} not found", file=sys.stderr) + sys.exit(1) + +with open(SESSIONS_JSON) as f: + store = json.load(f) + +# Find all glass session keys +glass_sessions = {k: v for k, v in store.items() if "glass" in k.lower()} +print(f"Found {len(glass_sessions)} glass sessions") + +# Resolve session files: try sessionFile first, then sessionId.jsonl +session_files = [] +for key, entry in sorted(glass_sessions.items()): + sf = entry.get("sessionFile") + sid = entry.get("sessionId", "") + + if sf and Path(sf).exists(): + session_files.append((key, Path(sf))) + elif sid: + candidate = SESSIONS_DIR / f"{sid}.jsonl" + if candidate.exists(): + session_files.append((key, candidate)) + +print(f"Found {len(session_files)} session files with data") + +# --- Extract raw --- +raw_path = OUTPUT_DIR / "glass-sessions-raw.jsonl" +structured_path = OUTPUT_DIR / "glass-sessions-structured.jsonl" + +raw_lines = 0 +structured_records = [] + +with open(raw_path, "w") as raw_out: + for session_key, fpath in session_files: + with open(fpath) as f: + for line in f: + line = line.strip() + if not line: + continue + raw_out.write(line + "\n") + raw_lines += 1 + + try: + obj = json.loads(line) + except json.JSONDecodeError: + continue + + if obj.get("type") != "message": + continue + + msg = obj.get("message", {}) + role = msg.get("role", "") + timestamp = obj.get("timestamp", "") + content = msg.get("content", []) + + # Extract text + texts = [] + tool_calls = [] + tool_results = [] + has_thinking = False + has_image = False + + for c in content: + ct = c.get("type", "") + if ct == "text": + texts.append(c.get("text", "")) + elif ct == "toolCall": + tool_calls.append({ + "id": c.get("id", ""), + "name": c.get("name", ""), + "input_preview": json.dumps(c.get("input", {}))[:300] + }) + elif ct == "toolResult": + result_text = "" + for rc in c.get("content", []): + if rc.get("type") == "text": + result_text = rc.get("text", "")[:500] + tool_results.append({ + "id": c.get("id", ""), + "name": c.get("name", ""), + "result_preview": result_text + }) + elif ct == "thinking": + has_thinking = True + elif ct == "image": + has_image = True + + # Check for image URLs in text + full_text = "\n".join(texts) + has_image_url = "tool_call_image_url" in full_text or "image" in full_text.lower() + + usage = msg.get("usage", {}) + + record = { + "session_key": session_key, + "timestamp": timestamp, + "role": role, + "text": full_text, + "tool_calls": tool_calls if tool_calls else None, + "tool_results": tool_results if tool_results else None, + "has_thinking": has_thinking, + "has_image": has_image, + "has_image_ref": has_image_url, + "usage": { + "input_tokens": usage.get("inputTokens", 0), + "output_tokens": usage.get("outputTokens", 0), + } if usage else None + } + structured_records.append(record) + +# Sort by timestamp +structured_records.sort(key=lambda r: r["timestamp"]) + +with open(structured_path, "w") as f: + for r in structured_records: + f.write(json.dumps(r) + "\n") + +# --- Stats summary --- +user_msgs = [r for r in structured_records if r["role"] == "user"] +assistant_msgs = [r for r in structured_records if r["role"] == "assistant"] +tool_result_msgs = [r for r in structured_records if r["role"] == "toolResult"] + +dates = set() +for r in structured_records: + if r["timestamp"]: + dates.add(r["timestamp"][:10]) + +first_ts = structured_records[0]["timestamp"] if structured_records else "?" +last_ts = structured_records[-1]["timestamp"] if structured_records else "?" + +print(f"\n=== Extraction Complete ===") +print(f"Raw: {raw_path} ({raw_lines} lines)") +print(f"Structured: {structured_path} ({len(structured_records)} records)") +print(f"Date range: {first_ts} -> {last_ts}") +print(f"Unique dates: {len(dates)}") +print(f"Messages: user={len(user_msgs)} assistant={len(assistant_msgs)} toolResult={len(tool_result_msgs)}") +print(f"Sessions: {len(session_files)}") + +# Per-day breakdown +from collections import Counter +day_counts = Counter() +for r in user_msgs: + if r["timestamp"]: + day_counts[r["timestamp"][:10]] += 1 + +print(f"\nPer-day user message breakdown:") +for day, count in sorted(day_counts.items()): + print(f" {day}: {count} user messages") From 46b3a8b5e3040097e8b795df94a7d874668ec038 Mon Sep 17 00:00:00 2001 From: "Xiaoan (Sean) Liu" Date: Thu, 26 Mar 2026 17:44:51 -0600 Subject: [PATCH 07/68] Add RemoteLogger for persistent session logging (port from IntentOS) - RemoteLogger.swift: fire-and-forget event logger matching IntentOS pattern Logs: voice:user, voice:ai, voice:tool_call, voice:tool_result, session:start, session:end - GeminiSessionViewModel: wired logging at session start/end, turn complete, and tool call initiation/result - server/index.js: added /api/logs POST/GET endpoint, stores JSONL per day in server/logs/ directory - .gitignore: exclude server/logs/ (sensitive session data) --- .gitignore | 3 + .../CameraAccess.xcodeproj/project.pbxproj | 24 +++++- .../Gemini/GeminiSessionViewModel.swift | 13 +++ .../CameraAccess/Gemini/RemoteLogger.swift | 61 ++++++++++++++ samples/CameraAccess/server/index.js | 79 +++++++++++++++++++ 5 files changed, 178 insertions(+), 2 deletions(-) create mode 100644 samples/CameraAccess/CameraAccess/Gemini/RemoteLogger.swift diff --git a/.gitignore b/.gitignore index 530f7155..6fcdbf2a 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,6 @@ samples/CameraAccessAndroid/local.properties samples/CameraAccessAndroid/.gradle/ samples/CameraAccessAndroid/build/ samples/CameraAccessAndroid/app/build/ + +# Server logs (sensitive session data) +samples/CameraAccess/server/logs/ diff --git a/samples/CameraAccess/CameraAccess.xcodeproj/project.pbxproj b/samples/CameraAccess/CameraAccess.xcodeproj/project.pbxproj index 1e7dbda4..cbce8864 100644 --- a/samples/CameraAccess/CameraAccess.xcodeproj/project.pbxproj +++ b/samples/CameraAccess/CameraAccess.xcodeproj/project.pbxproj @@ -12,7 +12,6 @@ 8FD96B7F2E6F0A9800F56AB1 /* CameraAccessApp.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8FD96B792E6F0A9800F56AB1 /* CameraAccessApp.swift */; }; 8FD96B812E6F0A9800F56AB1 /* HomeScreenView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8FD96B722E6F0A9800F56AB1 /* HomeScreenView.swift */; }; 8FD96B872E6F0A9800F56AB1 /* StreamSessionViewModel.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8FD96B6F2E6F0A9800F56AB1 /* StreamSessionViewModel.swift */; }; - 9DD6CC002F4A000000ED7098 /* VideoDecoder.swift in Sources */ = {isa = PBXBuildFile; fileRef = 9DD6CBFF2F4A000000ED7098 /* VideoDecoder.swift */; }; 8FD96B882E6F0A9800F56AB1 /* StreamSessionView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8FD96B752E6F0A9800F56AB1 /* StreamSessionView.swift */; }; 8FD96B8A2E6F0A9800F56AB1 /* PhotoPreviewView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8FD96B742E6F0A9800F56AB1 /* PhotoPreviewView.swift */; }; 8FD96B8D2E6F0A9800F56AB1 /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 8FD96B772E6F0A9800F56AB1 /* Assets.xcassets */; }; @@ -27,6 +26,8 @@ 8FFD60542E849D0D0035E446 /* RegistrationView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8FFD60532E849D0D0035E446 /* RegistrationView.swift */; }; 8FFD60602E84A2F70035E446 /* MainAppView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8FFD605F2E84A2F70035E446 /* MainAppView.swift */; }; 8FFD60612E84A2F70035E446 /* DebugMenuView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8FFD605E2E84A2F70035E446 /* DebugMenuView.swift */; }; + 9D8CD52F2F746BF600E5149E /* ChatTranscriptView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 9D8CD52D2F746BF600E5149E /* ChatTranscriptView.swift */; }; + 9D8CD5302F746BF600E5149E /* ChatMessage.swift in Sources */ = {isa = PBXBuildFile; fileRef = 9D8CD52C2F746BF600E5149E /* ChatMessage.swift */; }; 9DD6CAAF2F3C426600ED7098 /* Secrets.swift in Sources */ = {isa = PBXBuildFile; fileRef = 9DD6CAAD2F3C426600ED7098 /* Secrets.swift */; }; 9DD6CAFE2F3C62DA00ED7098 /* WebRTC in Frameworks */ = {isa = PBXBuildFile; productRef = 9DD6CAFD2F3C62DA00ED7098 /* WebRTC */; }; 9DD6CB052F3C637D00ED7098 /* WebRTCSessionViewModel.swift in Sources */ = {isa = PBXBuildFile; fileRef = 9DD6CB032F3C637D00ED7098 /* WebRTCSessionViewModel.swift */; }; @@ -36,6 +37,7 @@ 9DD6CB092F3C637D00ED7098 /* WebRTCClient.swift in Sources */ = {isa = PBXBuildFile; fileRef = 9DD6CB012F3C637D00ED7098 /* WebRTCClient.swift */; }; 9DD6CB0C2F3C648800ED7098 /* WebRTC in Frameworks */ = {isa = PBXBuildFile; productRef = 9DD6CB0B2F3C648800ED7098 /* WebRTC */; }; 9DD6CB0E2F3C64F400ED7098 /* WebRTCOverlayView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 9DD6CB0D2F3C64F400ED7098 /* WebRTCOverlayView.swift */; }; + 9DD6CC002F4A000000ED7098 /* VideoDecoder.swift in Sources */ = {isa = PBXBuildFile; fileRef = 9DD6CBFF2F4A000000ED7098 /* VideoDecoder.swift */; }; 9DD894B22F4047630090B9B9 /* SettingsManager.swift in Sources */ = {isa = PBXBuildFile; fileRef = 9DD894AF2F4047630090B9B9 /* SettingsManager.swift */; }; 9DD894B32F4047630090B9B9 /* SettingsView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 9DD894B02F4047630090B9B9 /* SettingsView.swift */; }; 9DD895962F405E0E0090B9B9 /* RTCVideoView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 9DD895952F405E0E0090B9B9 /* RTCVideoView.swift */; }; @@ -45,6 +47,7 @@ A1B2C3D42F0A000200000003 /* AudioManager.swift in Sources */ = {isa = PBXBuildFile; fileRef = A1B2C3D42F0A000100000003 /* AudioManager.swift */; }; A1B2C3D42F0A000200000004 /* GeminiSessionViewModel.swift in Sources */ = {isa = PBXBuildFile; fileRef = A1B2C3D42F0A000100000004 /* GeminiSessionViewModel.swift */; }; A1B2C3D42F0A000200000005 /* GeminiOverlayView.swift in Sources */ = {isa = PBXBuildFile; fileRef = A1B2C3D42F0A000100000005 /* GeminiOverlayView.swift */; }; + A1B2C3D42F0A000200000006 /* RemoteLogger.swift in Sources */ = {isa = PBXBuildFile; fileRef = A1B2C3D42F0A000100000006 /* RemoteLogger.swift */; }; E66D30242E7DA71900470B48 /* MockDeviceKitButton.swift in Sources */ = {isa = PBXBuildFile; fileRef = E66D30232E7DA71900470B48 /* MockDeviceKitButton.swift */; }; E6A188482EB918740097D0E1 /* StreamView.swift in Sources */ = {isa = PBXBuildFile; fileRef = E6A188472EB918740097D0E1 /* StreamView.swift */; }; E6DA451D2E79A63100E3F688 /* MockDeviceCardView.swift in Sources */ = {isa = PBXBuildFile; fileRef = E6DA45182E79A63100E3F688 /* MockDeviceCardView.swift */; }; @@ -81,7 +84,6 @@ 8F2D237F2E856711002D0588 /* DebugMenuViewModel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = DebugMenuViewModel.swift; sourceTree = ""; }; 8F8F00772E8ACB4500A4BDAF /* WearablesViewModel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = WearablesViewModel.swift; sourceTree = ""; }; 8FD96B6F2E6F0A9800F56AB1 /* StreamSessionViewModel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = StreamSessionViewModel.swift; sourceTree = ""; }; - 9DD6CBFF2F4A000000ED7098 /* VideoDecoder.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = VideoDecoder.swift; sourceTree = ""; }; 8FD96B722E6F0A9800F56AB1 /* HomeScreenView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = HomeScreenView.swift; sourceTree = ""; }; 8FD96B742E6F0A9800F56AB1 /* PhotoPreviewView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = PhotoPreviewView.swift; sourceTree = ""; }; 8FD96B752E6F0A9800F56AB1 /* StreamSessionView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = StreamSessionView.swift; sourceTree = ""; }; @@ -98,6 +100,8 @@ 8FFD60532E849D0D0035E446 /* RegistrationView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = RegistrationView.swift; sourceTree = ""; }; 8FFD605E2E84A2F70035E446 /* DebugMenuView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = DebugMenuView.swift; sourceTree = ""; }; 8FFD605F2E84A2F70035E446 /* MainAppView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = MainAppView.swift; sourceTree = ""; }; + 9D8CD52C2F746BF600E5149E /* ChatMessage.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ChatMessage.swift; sourceTree = ""; }; + 9D8CD52D2F746BF600E5149E /* ChatTranscriptView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ChatTranscriptView.swift; sourceTree = ""; }; 9DD6CAAD2F3C426600ED7098 /* Secrets.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = Secrets.swift; sourceTree = ""; }; 9DD6CAAE2F3C426600ED7098 /* Secrets.swift.example */ = {isa = PBXFileReference; lastKnownFileType = text; path = Secrets.swift.example; sourceTree = ""; }; 9DD6CAFF2F3C637D00ED7098 /* CustomVideoCapturer.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = CustomVideoCapturer.swift; sourceTree = ""; }; @@ -106,6 +110,7 @@ 9DD6CB022F3C637D00ED7098 /* WebRTCConfig.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = WebRTCConfig.swift; sourceTree = ""; }; 9DD6CB032F3C637D00ED7098 /* WebRTCSessionViewModel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = WebRTCSessionViewModel.swift; sourceTree = ""; }; 9DD6CB0D2F3C64F400ED7098 /* WebRTCOverlayView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = WebRTCOverlayView.swift; sourceTree = ""; }; + 9DD6CBFF2F4A000000ED7098 /* VideoDecoder.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = VideoDecoder.swift; sourceTree = ""; }; 9DD894AF2F4047630090B9B9 /* SettingsManager.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SettingsManager.swift; sourceTree = ""; }; 9DD894B02F4047630090B9B9 /* SettingsView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SettingsView.swift; sourceTree = ""; }; 9DD895942F405E0E0090B9B9 /* PiPVideoView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = PiPVideoView.swift; sourceTree = ""; }; @@ -115,6 +120,7 @@ A1B2C3D42F0A000100000003 /* AudioManager.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = AudioManager.swift; sourceTree = ""; }; A1B2C3D42F0A000100000004 /* GeminiSessionViewModel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = GeminiSessionViewModel.swift; sourceTree = ""; }; A1B2C3D42F0A000100000005 /* GeminiOverlayView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = GeminiOverlayView.swift; sourceTree = ""; }; + A1B2C3D42F0A000100000006 /* RemoteLogger.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = RemoteLogger.swift; sourceTree = ""; }; E66D30232E7DA71900470B48 /* MockDeviceKitButton.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = MockDeviceKitButton.swift; sourceTree = ""; }; E699CC952E8150670052C240 /* CameraAccessTests.xctest */ = {isa = PBXFileReference; explicitFileType = wrapper.cfbundle; includeInIndex = 0; path = CameraAccessTests.xctest; sourceTree = BUILT_PRODUCTS_DIR; }; E6A188472EB918740097D0E1 /* StreamView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = StreamView.swift; sourceTree = ""; }; @@ -203,6 +209,7 @@ 8FD96B7D2E6F0A9800F56AB1 /* CameraAccess */ = { isa = PBXGroup; children = ( + 9D8CD52E2F746BF600E5149E /* Chat */, 9DD894B12F4047630090B9B9 /* Settings */, 9DD6CB042F3C637D00ED7098 /* WebRTC */, 9DD6CAAD2F3C426600ED7098 /* Secrets.swift */, @@ -242,6 +249,15 @@ path = MockDeviceKit; sourceTree = ""; }; + 9D8CD52E2F746BF600E5149E /* Chat */ = { + isa = PBXGroup; + children = ( + 9D8CD52C2F746BF600E5149E /* ChatMessage.swift */, + 9D8CD52D2F746BF600E5149E /* ChatTranscriptView.swift */, + ); + path = Chat; + sourceTree = ""; + }; 9DD6CB042F3C637D00ED7098 /* WebRTC */ = { isa = PBXGroup; children = ( @@ -273,6 +289,7 @@ A1B2C3D42F0A000100000001 /* GeminiConfig.swift */, A1B2C3D42F0A000100000002 /* GeminiLiveService.swift */, A1B2C3D42F0A000100000004 /* GeminiSessionViewModel.swift */, + A1B2C3D42F0A000100000006 /* RemoteLogger.swift */, ); path = Gemini; sourceTree = ""; @@ -434,9 +451,12 @@ A1B2C3D42F0A000200000002 /* GeminiLiveService.swift in Sources */, A1B2C3D42F0A000200000003 /* AudioManager.swift in Sources */, A1B2C3D42F0A000200000004 /* GeminiSessionViewModel.swift in Sources */, + 9D8CD52F2F746BF600E5149E /* ChatTranscriptView.swift in Sources */, + 9D8CD5302F746BF600E5149E /* ChatMessage.swift in Sources */, 9DD894B22F4047630090B9B9 /* SettingsManager.swift in Sources */, 9DD894B32F4047630090B9B9 /* SettingsView.swift in Sources */, A1B2C3D42F0A000200000005 /* GeminiOverlayView.swift in Sources */, + A1B2C3D42F0A000200000006 /* RemoteLogger.swift in Sources */, ); runOnlyForDeploymentPostprocessing = 0; }; diff --git a/samples/CameraAccess/CameraAccess/Gemini/GeminiSessionViewModel.swift b/samples/CameraAccess/CameraAccess/Gemini/GeminiSessionViewModel.swift index 6f991e73..01a38cb5 100644 --- a/samples/CameraAccess/CameraAccess/Gemini/GeminiSessionViewModel.swift +++ b/samples/CameraAccess/CameraAccess/Gemini/GeminiSessionViewModel.swift @@ -37,6 +37,7 @@ class GeminiSessionViewModel: ObservableObject { } isGeminiActive = true + RemoteLogger.shared.log("session:start") // Wire audio callbacks audioManager.onAudioCaptured = { [weak self] data in @@ -61,6 +62,13 @@ class GeminiSessionViewModel: ObservableObject { geminiService.onTurnComplete = { [weak self] in guard let self else { return } Task { @MainActor in + // Log finalized transcripts before clearing + if !self.lastUserText.isEmpty { + RemoteLogger.shared.log("voice:user", data: ["text": self.lastUserText]) + } + if !self.lastAIText.isEmpty { + RemoteLogger.shared.log("voice:ai", data: ["text": self.lastAIText]) + } self.finalizeCurrentBubbles() self.userTranscript = "" } @@ -109,12 +117,16 @@ class GeminiSessionViewModel: ObservableObject { self.messages.append(msg) let toolMsgId = msg.id + let taskDesc = (call.args["task"] as? String) ?? "" + RemoteLogger.shared.log("voice:tool_call", data: ["tool": call.name, "task": taskDesc]) + self.toolCallRouter?.handleToolCall(call) { [weak self] response in guard let self else { return } if let idx = self.messages.firstIndex(where: { $0.id == toolMsgId }) { self.messages[idx].text = "Done" self.messages[idx].status = .complete } + RemoteLogger.shared.log("voice:tool_result", data: ["tool": call.name, "result": String(response.prefix(500))]) self.geminiService.sendToolResponse(response) } } @@ -196,6 +208,7 @@ class GeminiSessionViewModel: ObservableObject { } func stopSession() { + RemoteLogger.shared.log("session:end") eventClient.disconnect() toolCallRouter?.cancelAll() toolCallRouter = nil diff --git a/samples/CameraAccess/CameraAccess/Gemini/RemoteLogger.swift b/samples/CameraAccess/CameraAccess/Gemini/RemoteLogger.swift new file mode 100644 index 00000000..e64bdc1e --- /dev/null +++ b/samples/CameraAccess/CameraAccess/Gemini/RemoteLogger.swift @@ -0,0 +1,61 @@ +import Foundation + +/// Sends conversation events to the logging server for persistent logging. +/// All methods are fire-and-forget -- logging never blocks the UI or conversation flow. +final class RemoteLogger { + static let shared = RemoteLogger() + + private let session: URLSession + private var sequenceNumber = 0 + + private init() { + let config = URLSessionConfiguration.default + config.timeoutIntervalForRequest = 5 + self.session = URLSession(configuration: config) + } + + /// The base URL for the logging server (same host as OpenClaw, port 8080). + private var baseURL: String? { + guard GeminiConfig.isOpenClawConfigured else { return nil } + let host = GeminiConfig.openClawHost + return "\(host):8080" + } + + /// Log a conversation event. Types: + /// - "voice:user" -- user speech transcript from Gemini + /// - "voice:ai" -- Gemini voice response transcript + /// - "voice:tool_call" -- Gemini triggered execute tool + /// - "voice:tool_result" -- tool result sent back to Gemini + /// - "session:start" -- voice session started + /// - "session:end" -- voice session ended + func log(_ type: String, data: [String: String] = [:]) { + guard let baseURL else { return } + guard let url = URL(string: "\(baseURL)/api/logs") else { return } + + sequenceNumber += 1 + let eventData: [String: Any] = [ + "event": type, + "seq": sequenceNumber + ].merging(data) { _, new in new } + + let payload: [String: Any] = [ + "type": "event", + "session": "ios-client", + "data": eventData + ] + + var request = URLRequest(url: url) + request.httpMethod = "POST" + request.setValue("application/json", forHTTPHeaderField: "Content-Type") + request.setValue(GeminiConfig.openClawGatewayToken, forHTTPHeaderField: "x-api-token") + + do { + request.httpBody = try JSONSerialization.data(withJSONObject: payload) + } catch { return } + + // Fire and forget + Task.detached(priority: .utility) { [session] in + _ = try? await session.data(for: request) + } + } +} diff --git a/samples/CameraAccess/server/index.js b/samples/CameraAccess/server/index.js index dbb9149d..5cc87a7a 100644 --- a/samples/CameraAccess/server/index.js +++ b/samples/CameraAccess/server/index.js @@ -34,8 +34,87 @@ function getTurnCredentials() { }; } +// --- Session Logger --- +// Stores logs as JSONL files on disk, matching IntentOS's logging pattern. +const LOGS_DIR = path.join(__dirname, "logs"); +if (!fs.existsSync(LOGS_DIR)) fs.mkdirSync(LOGS_DIR, { recursive: true }); + +function getLogFilePath() { + const date = new Date().toISOString().slice(0, 10); // YYYY-MM-DD + return path.join(LOGS_DIR, `visionclaw-${date}.jsonl`); +} + +function appendLog(entry) { + const line = JSON.stringify(entry) + "\n"; + fs.appendFile(getLogFilePath(), line, (err) => { + if (err) console.error("[Logger] Write error:", err.message); + }); +} + // HTTP server for serving the web viewer const httpServer = http.createServer((req, res) => { + // --- Logging API --- + if (req.url === "/api/logs" && req.method === "POST") { + let body = ""; + req.on("data", (chunk) => (body += chunk)); + req.on("end", () => { + try { + const payload = JSON.parse(body); + const entry = { + ts: new Date().toISOString(), + type: payload.type || "event", + session: payload.session || "unknown", + data: payload.data || payload, + }; + appendLog(entry); + res.writeHead(200, { + "Content-Type": "application/json", + "Access-Control-Allow-Origin": "*", + }); + res.end(JSON.stringify({ ok: true })); + } catch (e) { + res.writeHead(400, { "Content-Type": "application/json" }); + res.end(JSON.stringify({ error: "Invalid JSON" })); + } + }); + return; + } + + if (req.url?.startsWith("/api/logs") && req.method === "GET") { + // Return recent logs from today's file + const logFile = getLogFilePath(); + if (!fs.existsSync(logFile)) { + res.writeHead(200, { + "Content-Type": "application/json", + "Access-Control-Allow-Origin": "*", + }); + res.end(JSON.stringify({ logs: [], count: 0 })); + return; + } + const lines = fs.readFileSync(logFile, "utf-8").trim().split("\n").filter(Boolean); + const count = parseInt(new URL(req.url, "http://localhost").searchParams.get("count") || "50"); + const logs = lines.slice(-count).reverse().map((l) => { + try { return JSON.parse(l); } catch { return null; } + }).filter(Boolean); + res.writeHead(200, { + "Content-Type": "application/json", + "Access-Control-Allow-Origin": "*", + }); + res.end(JSON.stringify({ logs, count: logs.length })); + return; + } + + // CORS preflight + if (req.method === "OPTIONS") { + res.writeHead(204, { + "Access-Control-Allow-Origin": "*", + "Access-Control-Allow-Methods": "GET, POST, OPTIONS", + "Access-Control-Allow-Headers": "Content-Type, x-api-token", + }); + res.end(); + return; + } + // TURN credentials API endpoint if (req.url === "/api/turn") { const creds = getTurnCredentials(); From fa7027ea28414d5d4151a40c68a7b6087999db14 Mon Sep 17 00:00:00 2001 From: "Xiaoan (Sean) Liu" Date: Thu, 26 Mar 2026 17:53:41 -0600 Subject: [PATCH 08/68] Store voice logs in ~/.openclaw/visionclaw-logs/ for agent extraction - server/index.js: write logs to ~/.openclaw/visionclaw-logs/ instead of server/logs/ so OpenClaw can discover and read them via file tools - extract_voice_logs.py: script to extract and analyze voice interaction logs (complements extract_glass_sessions.py for OpenClaw tool-call logs) --- .gitignore | 2 +- .../scripts/extract_voice_logs.py | 121 ++++++++++++++++++ samples/CameraAccess/server/index.js | 8 +- 3 files changed, 128 insertions(+), 3 deletions(-) create mode 100644 samples/CameraAccess/scripts/extract_voice_logs.py diff --git a/.gitignore b/.gitignore index 6fcdbf2a..70d4bca7 100644 --- a/.gitignore +++ b/.gitignore @@ -7,5 +7,5 @@ samples/CameraAccessAndroid/.gradle/ samples/CameraAccessAndroid/build/ samples/CameraAccessAndroid/app/build/ -# Server logs (sensitive session data) +# Server logs (now stored in ~/.openclaw/visionclaw-logs/ for agent access) samples/CameraAccess/server/logs/ diff --git a/samples/CameraAccess/scripts/extract_voice_logs.py b/samples/CameraAccess/scripts/extract_voice_logs.py new file mode 100644 index 00000000..4d05cbd6 --- /dev/null +++ b/samples/CameraAccess/scripts/extract_voice_logs.py @@ -0,0 +1,121 @@ +#!/usr/bin/env python3 +""" +extract_voice_logs.py — Extract VisionClaw voice interaction logs. + +These are the Gemini-side logs (voice transcripts, session lifecycle) +captured by RemoteLogger on iOS/Android, stored by the Node.js server. + +Data source: ~/.openclaw/visionclaw-logs/visionclaw-YYYY-MM-DD.jsonl + +This script complements extract_glass_sessions.py which extracts OpenClaw +tool-call logs. Together they give the full picture: + - Voice logs: ALL interactions (voice:user, voice:ai, session lifecycle) + - OpenClaw logs: Only tool-use interactions (browser, web_search, etc.) + +Usage: python3 extract_voice_logs.py [output-dir] +""" + +import json +import os +import sys +from pathlib import Path +from collections import Counter +from datetime import datetime + +VOICE_LOGS_DIR = Path.home() / ".openclaw" / "visionclaw-logs" +OUTPUT_DIR = Path(sys.argv[1]) if len(sys.argv) > 1 else Path("/tmp/visionclaw-data") +OUTPUT_DIR.mkdir(parents=True, exist_ok=True) + +if not VOICE_LOGS_DIR.exists(): + print(f"No voice logs found at {VOICE_LOGS_DIR}") + print("Make sure the VisionClaw signaling server (node index.js) is running") + print("and that RemoteLogger is enabled in the iOS/Android app.") + sys.exit(1) + +# Find all log files +log_files = sorted(VOICE_LOGS_DIR.glob("visionclaw-*.jsonl")) +print(f"Found {len(log_files)} daily log files in {VOICE_LOGS_DIR}") + +# Parse all entries +all_entries = [] +for f in log_files: + date = f.stem.replace("visionclaw-", "") + with open(f) as fh: + for line in fh: + line = line.strip() + if not line: + continue + try: + entry = json.loads(line) + entry["_date"] = date + all_entries.append(entry) + except json.JSONDecodeError: + continue + +print(f"Total log entries: {len(all_entries)}") + +# Classify by event type +event_counts = Counter() +for e in all_entries: + data = e.get("data", {}) + event = data.get("event", e.get("type", "unknown")) + event_counts[event] += 1 + +print(f"\nEvent type breakdown:") +for event, count in event_counts.most_common(): + print(f" {event:20s}: {count}") + +# Extract voice interactions +voice_user = [e for e in all_entries if e.get("data", {}).get("event") == "voice:user"] +voice_ai = [e for e in all_entries if e.get("data", {}).get("event") == "voice:ai"] +tool_calls = [e for e in all_entries if e.get("data", {}).get("event") == "voice:tool_call"] +tool_results = [e for e in all_entries if e.get("data", {}).get("event") == "voice:tool_result"] +session_starts = [e for e in all_entries if e.get("data", {}).get("event") == "session:start"] +session_ends = [e for e in all_entries if e.get("data", {}).get("event") == "session:end"] + +print(f"\nVoice interactions:") +print(f" User utterances: {len(voice_user)}") +print(f" AI responses: {len(voice_ai)}") +print(f" Tool calls: {len(tool_calls)}") +print(f" Tool results: {len(tool_results)}") +print(f" Session starts: {len(session_starts)}") +print(f" Session ends: {len(session_ends)}") + +# Platform breakdown +ios_entries = [e for e in all_entries if e.get("session") == "ios-client"] +android_entries = [e for e in all_entries if e.get("session") == "android-client"] +print(f"\nPlatform breakdown:") +print(f" iOS: {len(ios_entries)}") +print(f" Android: {len(android_entries)}") + +# Per-day breakdown +day_counts = Counter(e["_date"] for e in voice_user) +active_days = sorted(day_counts.keys()) +print(f"\nActive days: {len(active_days)}") +for day in active_days: + print(f" {day}: {day_counts[day]} user utterances") + +# Save structured output +output_path = OUTPUT_DIR / "voice-logs-all.jsonl" +with open(output_path, "w") as f: + for e in all_entries: + f.write(json.dumps(e) + "\n") + +# Save voice interactions only +voice_path = OUTPUT_DIR / "voice-interactions.jsonl" +with open(voice_path, "w") as f: + for e in voice_user + voice_ai: + f.write(json.dumps(e) + "\n") + +print(f"\nOutput:") +print(f" All logs: {output_path}") +print(f" Voice interactions: {voice_path}") + +# Print sample user utterances +if voice_user: + print(f"\nSample user utterances:") + for e in voice_user[:10]: + text = e.get("data", {}).get("text", "")[:100] + ts = e.get("ts", "")[:19] + platform = e.get("session", "?") + print(f" [{ts}] ({platform}) {text}") diff --git a/samples/CameraAccess/server/index.js b/samples/CameraAccess/server/index.js index 5cc87a7a..b2d98753 100644 --- a/samples/CameraAccess/server/index.js +++ b/samples/CameraAccess/server/index.js @@ -35,9 +35,13 @@ function getTurnCredentials() { } // --- Session Logger --- -// Stores logs as JSONL files on disk, matching IntentOS's logging pattern. -const LOGS_DIR = path.join(__dirname, "logs"); +// Stores logs as JSONL files in ~/.openclaw/visionclaw-logs/ so OpenClaw +// can discover and analyze them via its file tools (read, exec, etc.). +// This makes logs "agent-extractable" — just ask OpenClaw to read them. +const os = require("os"); +const LOGS_DIR = path.join(os.homedir(), ".openclaw", "visionclaw-logs"); if (!fs.existsSync(LOGS_DIR)) fs.mkdirSync(LOGS_DIR, { recursive: true }); +console.log(`[Logger] Writing logs to ${LOGS_DIR}`); function getLogFilePath() { const date = new Date().toISOString().slice(0, 10); // YYYY-MM-DD From 5a25ea448f7564d0c9c8c9795dc0c63109c5a61e Mon Sep 17 00:00:00 2001 From: "Xiaoan (Sean) Liu" Date: Thu, 26 Mar 2026 17:57:25 -0600 Subject: [PATCH 09/68] Add capture_photo tool and gallery for Gemini-triggered photo capture (iOS) - New capture_photo Gemini tool declaration alongside execute tool - ToolCallRouter intercepts capture_photo locally (not sent to OpenClaw) - PhotoCaptureStore persists captured frames as JPEG with JSON manifest - GalleryView (3-column grid) and GalleryDetailView (full screen + share + delete) - Gallery button in StreamView top bar when Gemini is active - Capture toast notification on successful photo save --- .../CameraAccess/Gallery/CapturedPhoto.swift | 19 ++++ .../Gallery/GalleryDetailView.swift | 85 +++++++++++++++++ .../CameraAccess/Gallery/GalleryView.swift | 71 ++++++++++++++ .../Gallery/PhotoCaptureStore.swift | 93 +++++++++++++++++++ .../Gemini/GeminiSessionViewModel.swift | 20 ++++ .../OpenClaw/ToolCallModels.swift | 17 +++- .../OpenClaw/ToolCallRouter.swift | 15 +++ .../CameraAccess/Views/StreamView.swift | 40 +++++++- 8 files changed, 358 insertions(+), 2 deletions(-) create mode 100644 samples/CameraAccess/CameraAccess/Gallery/CapturedPhoto.swift create mode 100644 samples/CameraAccess/CameraAccess/Gallery/GalleryDetailView.swift create mode 100644 samples/CameraAccess/CameraAccess/Gallery/GalleryView.swift create mode 100644 samples/CameraAccess/CameraAccess/Gallery/PhotoCaptureStore.swift diff --git a/samples/CameraAccess/CameraAccess/Gallery/CapturedPhoto.swift b/samples/CameraAccess/CameraAccess/Gallery/CapturedPhoto.swift new file mode 100644 index 00000000..148002fe --- /dev/null +++ b/samples/CameraAccess/CameraAccess/Gallery/CapturedPhoto.swift @@ -0,0 +1,19 @@ +import Foundation + +struct CapturedPhoto: Identifiable, Codable { + let id: String + let filename: String + let timestamp: Date + var description: String? + + var fileURL: URL { + Self.capturesDirectory.appendingPathComponent(filename) + } + + static var capturesDirectory: URL { + let docs = FileManager.default.urls(for: .documentDirectory, in: .userDomainMask)[0] + let dir = docs.appendingPathComponent("Captures", isDirectory: true) + try? FileManager.default.createDirectory(at: dir, withIntermediateDirectories: true) + return dir + } +} diff --git a/samples/CameraAccess/CameraAccess/Gallery/GalleryDetailView.swift b/samples/CameraAccess/CameraAccess/Gallery/GalleryDetailView.swift new file mode 100644 index 00000000..02b88973 --- /dev/null +++ b/samples/CameraAccess/CameraAccess/Gallery/GalleryDetailView.swift @@ -0,0 +1,85 @@ +import SwiftUI + +struct GalleryDetailView: View { + let photo: CapturedPhoto + @ObservedObject private var store = PhotoCaptureStore.shared + @Environment(\.dismiss) private var dismiss + @State private var showShareSheet = false + @State private var showDeleteConfirmation = false + + private var formattedDate: String { + let formatter = DateFormatter() + formatter.dateStyle = .medium + formatter.timeStyle = .short + return formatter.string(from: photo.timestamp) + } + + var body: some View { + VStack(spacing: 0) { + // Image + if let image = store.imageForPhoto(photo) { + Image(uiImage: image) + .resizable() + .aspectRatio(contentMode: .fit) + .frame(maxWidth: .infinity) + + // Metadata + VStack(alignment: .leading, spacing: 8) { + Text(formattedDate) + .font(.subheadline) + .foregroundColor(.secondary) + + if let description = photo.description, !description.isEmpty { + Text(description) + .font(.body) + .foregroundColor(.primary) + } + } + .frame(maxWidth: .infinity, alignment: .leading) + .padding() + + Spacer() + + // Actions + HStack(spacing: 40) { + Button(action: { showShareSheet = true }) { + VStack(spacing: 4) { + Image(systemName: "square.and.arrow.up") + .font(.title2) + Text("Share") + .font(.caption) + } + } + + Button(role: .destructive, action: { showDeleteConfirmation = true }) { + VStack(spacing: 4) { + Image(systemName: "trash") + .font(.title2) + Text("Delete") + .font(.caption) + } + } + } + .padding(.bottom, 30) + + .sheet(isPresented: $showShareSheet) { + ShareSheet(photo: image) + } + } else { + Text("Photo not found") + .foregroundColor(.secondary) + .padding() + Spacer() + } + } + .navigationTitle("Photo") + .navigationBarTitleDisplayMode(.inline) + .confirmationDialog("Delete this photo?", isPresented: $showDeleteConfirmation) { + Button("Delete", role: .destructive) { + store.deletePhoto(photo) + dismiss() + } + Button("Cancel", role: .cancel) {} + } + } +} diff --git a/samples/CameraAccess/CameraAccess/Gallery/GalleryView.swift b/samples/CameraAccess/CameraAccess/Gallery/GalleryView.swift new file mode 100644 index 00000000..b8b676ff --- /dev/null +++ b/samples/CameraAccess/CameraAccess/Gallery/GalleryView.swift @@ -0,0 +1,71 @@ +import SwiftUI + +struct GalleryView: View { + @ObservedObject private var store = PhotoCaptureStore.shared + @State private var selectedPhoto: CapturedPhoto? + + private let columns = [ + GridItem(.flexible(), spacing: 2), + GridItem(.flexible(), spacing: 2), + GridItem(.flexible(), spacing: 2), + ] + + var body: some View { + Group { + if store.photos.isEmpty { + VStack(spacing: 12) { + Image(systemName: "photo.on.rectangle.angled") + .font(.system(size: 48)) + .foregroundColor(.secondary) + Text("No captured photos yet") + .font(.headline) + .foregroundColor(.secondary) + Text("Ask the AI to take a photo while using the glasses") + .font(.subheadline) + .foregroundColor(.secondary.opacity(0.7)) + .multilineTextAlignment(.center) + } + .padding() + } else { + ScrollView { + LazyVGrid(columns: columns, spacing: 2) { + ForEach(store.photos) { photo in + GalleryThumbnail(photo: photo) + .onTapGesture { + selectedPhoto = photo + } + } + } + } + } + } + .navigationTitle("Gallery") + .navigationBarTitleDisplayMode(.inline) + .sheet(item: $selectedPhoto) { photo in + NavigationStack { + GalleryDetailView(photo: photo) + } + } + } +} + +private struct GalleryThumbnail: View { + let photo: CapturedPhoto + @ObservedObject private var store = PhotoCaptureStore.shared + + var body: some View { + GeometryReader { geo in + if let image = store.imageForPhoto(photo) { + Image(uiImage: image) + .resizable() + .aspectRatio(contentMode: .fill) + .frame(width: geo.size.width, height: geo.size.width) + .clipped() + } else { + Color.gray.opacity(0.3) + .frame(width: geo.size.width, height: geo.size.width) + } + } + .aspectRatio(1, contentMode: .fit) + } +} diff --git a/samples/CameraAccess/CameraAccess/Gallery/PhotoCaptureStore.swift b/samples/CameraAccess/CameraAccess/Gallery/PhotoCaptureStore.swift new file mode 100644 index 00000000..0ca39df8 --- /dev/null +++ b/samples/CameraAccess/CameraAccess/Gallery/PhotoCaptureStore.swift @@ -0,0 +1,93 @@ +import Foundation +import UIKit + +@MainActor +class PhotoCaptureStore: ObservableObject { + static let shared = PhotoCaptureStore() + + @Published var photos: [CapturedPhoto] = [] + + private var manifestURL: URL { + CapturedPhoto.capturesDirectory.appendingPathComponent("manifest.json") + } + + private init() { + loadManifest() + } + + // MARK: - Public + + @discardableResult + func saveFrame(_ image: UIImage, description: String?) -> CapturedPhoto? { + let formatter = DateFormatter() + formatter.dateFormat = "yyyy-MM-dd_HH-mm-ss" + let filename = "capture_\(formatter.string(from: Date())).jpg" + + guard let data = image.jpegData(compressionQuality: 0.9) else { + NSLog("[PhotoCapture] Failed to encode JPEG") + return nil + } + + let fileURL = CapturedPhoto.capturesDirectory.appendingPathComponent(filename) + do { + try data.write(to: fileURL) + } catch { + NSLog("[PhotoCapture] Failed to write file: %@", error.localizedDescription) + return nil + } + + let photo = CapturedPhoto( + id: UUID().uuidString, + filename: filename, + timestamp: Date(), + description: description + ) + + photos.insert(photo, at: 0) + saveManifest() + + NSLog("[PhotoCapture] Saved: %@ (%d bytes)", filename, data.count) + return photo + } + + func deletePhoto(_ photo: CapturedPhoto) { + try? FileManager.default.removeItem(at: photo.fileURL) + photos.removeAll { $0.id == photo.id } + saveManifest() + NSLog("[PhotoCapture] Deleted: %@", photo.filename) + } + + func imageForPhoto(_ photo: CapturedPhoto) -> UIImage? { + UIImage(contentsOfFile: photo.fileURL.path) + } + + // MARK: - Manifest + + private func loadManifest() { + guard FileManager.default.fileExists(atPath: manifestURL.path) else { return } + do { + let data = try Data(contentsOf: manifestURL) + let decoder = JSONDecoder() + decoder.dateDecodingStrategy = .iso8601 + var loaded = try decoder.decode([CapturedPhoto].self, from: data) + // Filter out photos whose files no longer exist + loaded = loaded.filter { FileManager.default.fileExists(atPath: $0.fileURL.path) } + photos = loaded + NSLog("[PhotoCapture] Loaded %d photos from manifest", photos.count) + } catch { + NSLog("[PhotoCapture] Failed to load manifest: %@", error.localizedDescription) + } + } + + private func saveManifest() { + do { + let encoder = JSONEncoder() + encoder.dateEncodingStrategy = .iso8601 + encoder.outputFormatting = .prettyPrinted + let data = try encoder.encode(photos) + try data.write(to: manifestURL) + } catch { + NSLog("[PhotoCapture] Failed to save manifest: %@", error.localizedDescription) + } + } +} diff --git a/samples/CameraAccess/CameraAccess/Gemini/GeminiSessionViewModel.swift b/samples/CameraAccess/CameraAccess/Gemini/GeminiSessionViewModel.swift index 01a38cb5..47d1da94 100644 --- a/samples/CameraAccess/CameraAccess/Gemini/GeminiSessionViewModel.swift +++ b/samples/CameraAccess/CameraAccess/Gemini/GeminiSessionViewModel.swift @@ -18,6 +18,9 @@ class GeminiSessionViewModel: ObservableObject { private let audioManager = AudioManager() private let eventClient = OpenClawEventClient() private var lastVideoFrameTime: Date = .distantPast + private var latestVideoFrame: UIImage? + private let photoCaptureStore = PhotoCaptureStore.shared + @Published var lastCapturedPhoto: CapturedPhoto? private var stateObservation: Task? // Chat message tracking @@ -108,6 +111,21 @@ class GeminiSessionViewModel: ObservableObject { // Wire tool call handling toolCallRouter = ToolCallRouter(bridge: openClawBridge) + // Local capture_photo handler + toolCallRouter?.onCapturePhoto = { [weak self] description, completion in + guard let self else { completion(.failure("Session ended")); return } + guard let frame = self.latestVideoFrame else { + completion(.failure("No camera frame available to capture")) + return + } + if let photo = self.photoCaptureStore.saveFrame(frame, description: description) { + self.lastCapturedPhoto = photo + completion(.success("Photo captured and saved: \(photo.filename)")) + } else { + completion(.failure("Failed to save photo")) + } + } + geminiService.onToolCall = { [weak self] toolCall in guard let self else { return } Task { @MainActor in @@ -225,6 +243,8 @@ class GeminiSessionViewModel: ObservableObject { } func sendVideoFrameIfThrottled(image: UIImage) { + // Always keep latest frame for capture_photo + latestVideoFrame = image guard SettingsManager.shared.videoStreamingEnabled else { return } guard isGeminiActive, connectionState == .ready else { return } let now = Date() diff --git a/samples/CameraAccess/CameraAccess/OpenClaw/ToolCallModels.swift b/samples/CameraAccess/CameraAccess/OpenClaw/ToolCallModels.swift index c7222a28..38980d1b 100644 --- a/samples/CameraAccess/CameraAccess/OpenClaw/ToolCallModels.swift +++ b/samples/CameraAccess/CameraAccess/OpenClaw/ToolCallModels.swift @@ -85,9 +85,24 @@ enum ToolCallStatus: Equatable { enum ToolDeclarations { static func allDeclarations() -> [[String: Any]] { - return [execute] + return [execute, capturePhoto] } + static let capturePhoto: [String: Any] = [ + "name": "capture_photo", + "description": "Capture and save the current camera frame as a photo. Use when the user asks to take a photo, capture what they see, save a picture, or snap a photo.", + "parameters": [ + "type": "object", + "properties": [ + "description": [ + "type": "string", + "description": "Brief description of what is in the photo" + ] + ], + "required": [] as [String] + ] as [String: Any] + ] + static let execute: [String: Any] = [ "name": "execute", "description": "Your only way to take action. You have no memory, storage, or ability to do anything on your own -- use this tool for everything: sending messages, searching the web, adding to lists, setting reminders, creating notes, research, drafts, scheduling, smart home control, app interactions, or any request that goes beyond answering a question. When in doubt, use this tool.", diff --git a/samples/CameraAccess/CameraAccess/OpenClaw/ToolCallRouter.swift b/samples/CameraAccess/CameraAccess/OpenClaw/ToolCallRouter.swift index d81d20fb..116fe979 100644 --- a/samples/CameraAccess/CameraAccess/OpenClaw/ToolCallRouter.swift +++ b/samples/CameraAccess/CameraAccess/OpenClaw/ToolCallRouter.swift @@ -5,6 +5,9 @@ class ToolCallRouter { private let bridge: OpenClawBridge private var inFlightTasks: [String: Task] = [:] + /// Callback for local capture_photo handling. Called with (description, completion). + var onCapturePhoto: ((_ description: String?, _ completion: @escaping (ToolResult) -> Void) -> Void)? + init(bridge: OpenClawBridge) { self.bridge = bridge } @@ -21,6 +24,18 @@ class ToolCallRouter { NSLog("[ToolCall] Received: %@ (id: %@) args: %@", callName, callId, String(describing: call.args)) + // Local tool: capture_photo — handle on-device, don't send to OpenClaw + if callName == "capture_photo" { + let description = call.args["description"] as? String + onCapturePhoto?(description) { [weak self] result in + guard let self else { return } + NSLog("[ToolCall] capture_photo result: %@", String(describing: result)) + let response = self.buildToolResponse(callId: callId, name: callName, result: result) + sendResponse(response) + } + return + } + let task = Task { @MainActor in let taskDesc = call.args["task"] as? String ?? String(describing: call.args) let result = await bridge.delegateTask(task: taskDesc, toolName: callName) diff --git a/samples/CameraAccess/CameraAccess/Views/StreamView.swift b/samples/CameraAccess/CameraAccess/Views/StreamView.swift index 3b67cf76..b8c42c1f 100644 --- a/samples/CameraAccess/CameraAccess/Views/StreamView.swift +++ b/samples/CameraAccess/CameraAccess/Views/StreamView.swift @@ -23,6 +23,8 @@ struct StreamView: View { @ObservedObject var geminiVM: GeminiSessionViewModel @ObservedObject var webrtcVM: WebRTCSessionViewModel @State private var selectedTab: StreamTab = .camera + @State private var showGallery = false + @State private var showCaptureToast = false enum StreamTab: String, CaseIterable { case camera = "Camera" @@ -51,6 +53,14 @@ struct StreamView: View { HStack { GeminiStatusBar(geminiVM: geminiVM) Spacer() + Button(action: { showGallery = true }) { + Image(systemName: "photo.on.rectangle") + .font(.system(size: 16, weight: .medium)) + .foregroundColor(.white) + .padding(8) + .background(Color.black.opacity(0.5)) + .clipShape(Circle()) + } Picker("", selection: $selectedTab) { ForEach(StreamTab.allCases, id: \.self) { tab in Text(tab.rawValue).tag(tab) @@ -100,6 +110,34 @@ struct StreamView: View { } } } + // Gallery sheet + .sheet(isPresented: $showGallery) { + NavigationStack { + GalleryView() + } + } + // Capture toast + .overlay(alignment: .top) { + if showCaptureToast { + Text("Photo captured") + .font(.subheadline.weight(.medium)) + .foregroundColor(.white) + .padding(.horizontal, 16) + .padding(.vertical, 8) + .background(Color.black.opacity(0.7)) + .cornerRadius(20) + .padding(.top, 80) + .transition(.move(edge: .top).combined(with: .opacity)) + } + } + .onChange(of: geminiVM.lastCapturedPhoto?.id) { _, newId in + guard newId != nil else { return } + withAnimation { showCaptureToast = true } + Task { + try? await Task.sleep(nanoseconds: 2_000_000_000) + withAnimation { showCaptureToast = false } + } + } // Show captured photos from DAT SDK in a preview sheet .sheet(isPresented: $viewModel.showPhotoPreview) { if let photo = viewModel.capturedPhoto { @@ -130,7 +168,6 @@ struct StreamView: View { Text(webrtcVM.errorMessage ?? "") } } -} @ViewBuilder private var cameraContent: some View { @@ -250,3 +287,4 @@ struct ControlsView: View { } } } + From 20a782b5af4e5d82f836ac39fc45cc981aca3e6f Mon Sep 17 00:00:00 2001 From: "Xiaoan (Sean) Liu" Date: Thu, 26 Mar 2026 18:05:56 -0600 Subject: [PATCH 10/68] Make gallery button always visible on both Camera and Chat tabs --- .../CameraAccess/Views/StreamView.swift | 34 ++++++++++--------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/samples/CameraAccess/CameraAccess/Views/StreamView.swift b/samples/CameraAccess/CameraAccess/Views/StreamView.swift index b8c42c1f..0028c892 100644 --- a/samples/CameraAccess/CameraAccess/Views/StreamView.swift +++ b/samples/CameraAccess/CameraAccess/Views/StreamView.swift @@ -47,20 +47,22 @@ struct StreamView: View { .padding(.bottom, 80) } - // Tab picker at top - if geminiVM.isGeminiActive { - VStack { - HStack { + // Top bar + VStack { + HStack { + if geminiVM.isGeminiActive { GeminiStatusBar(geminiVM: geminiVM) - Spacer() - Button(action: { showGallery = true }) { - Image(systemName: "photo.on.rectangle") - .font(.system(size: 16, weight: .medium)) - .foregroundColor(.white) - .padding(8) - .background(Color.black.opacity(0.5)) - .clipShape(Circle()) - } + } + Spacer() + Button(action: { showGallery = true }) { + Image(systemName: "photo.on.rectangle") + .font(.system(size: 16, weight: .medium)) + .foregroundColor(.white) + .padding(8) + .background(Color.black.opacity(0.5)) + .clipShape(Circle()) + } + if geminiVM.isGeminiActive { Picker("", selection: $selectedTab) { ForEach(StreamTab.allCases, id: \.self) { tab in Text(tab.rawValue).tag(tab) @@ -69,11 +71,11 @@ struct StreamView: View { .pickerStyle(.segmented) .frame(width: 140) } - Spacer() } - .padding(.horizontal, 24) - .padding(.top, 24) + Spacer() } + .padding(.horizontal, 24) + .padding(.top, 24) // WebRTC status overlay (top) if webrtcVM.isActive && selectedTab == .camera { From 936545889fe77c3d3d29f7b26c46f1ff003a64a7 Mon Sep 17 00:00:00 2001 From: "Xiaoan (Sean) Liu" Date: Thu, 26 Mar 2026 18:20:00 -0600 Subject: [PATCH 11/68] Add opt-in image passing from Gemini execute tool to OpenClaw - execute tool gains include_image boolean param (default false) - Gemini sets include_image=true only when task needs visual context - ToolCallRouter passes latest camera frame when flag is set - OpenClawBridge sends image as base64 in OpenAI vision format - Conversation history changed to [String: Any] for multimodal content - No image sent on text-only tasks (no latency impact) --- .../Gemini/GeminiSessionViewModel.swift | 3 ++- .../OpenClaw/OpenClawBridge.swift | 27 +++++++++++++++---- .../OpenClaw/ToolCallModels.swift | 4 +++ .../OpenClaw/ToolCallRouter.swift | 8 +++++- 4 files changed, 35 insertions(+), 7 deletions(-) diff --git a/samples/CameraAccess/CameraAccess/Gemini/GeminiSessionViewModel.swift b/samples/CameraAccess/CameraAccess/Gemini/GeminiSessionViewModel.swift index 47d1da94..3d0c39b3 100644 --- a/samples/CameraAccess/CameraAccess/Gemini/GeminiSessionViewModel.swift +++ b/samples/CameraAccess/CameraAccess/Gemini/GeminiSessionViewModel.swift @@ -243,8 +243,9 @@ class GeminiSessionViewModel: ObservableObject { } func sendVideoFrameIfThrottled(image: UIImage) { - // Always keep latest frame for capture_photo + // Always keep latest frame for capture_photo and include_image latestVideoFrame = image + toolCallRouter?.latestFrame = image guard SettingsManager.shared.videoStreamingEnabled else { return } guard isGeminiActive, connectionState == .ready else { return } let now = Date() diff --git a/samples/CameraAccess/CameraAccess/OpenClaw/OpenClawBridge.swift b/samples/CameraAccess/CameraAccess/OpenClaw/OpenClawBridge.swift index 1f48ac6f..c2e05af2 100644 --- a/samples/CameraAccess/CameraAccess/OpenClaw/OpenClawBridge.swift +++ b/samples/CameraAccess/CameraAccess/OpenClaw/OpenClawBridge.swift @@ -1,4 +1,5 @@ import Foundation +import UIKit enum OpenClawConnectionState: Equatable { case notConfigured @@ -15,7 +16,7 @@ class OpenClawBridge: ObservableObject { private let session: URLSession private let pingSession: URLSession private var sessionKey: String - private var conversationHistory: [[String: String]] = [] + private var conversationHistory: [[String: Any]] = [] private let maxHistoryTurns = 10 private static let stableSessionKey = "agent:main:glass" @@ -69,7 +70,8 @@ class OpenClawBridge: ObservableObject { func delegateTask( task: String, - toolName: String = "execute" + toolName: String = "execute", + image: UIImage? = nil ) async -> ToolResult { lastToolCallStatus = .executing(toolName) @@ -78,8 +80,23 @@ class OpenClawBridge: ObservableObject { return .failure("Invalid gateway URL") } - // Append the new user message to conversation history - conversationHistory.append(["role": "user", "content": task]) + // Build user message — text-only or multimodal (OpenAI vision format) + let userMessage: [String: Any] + if let image = image, let jpegData = image.jpegData(compressionQuality: 0.8) { + let base64 = jpegData.base64EncodedString() + userMessage = [ + "role": "user", + "content": [ + ["type": "text", "text": task], + ["type": "image_url", "image_url": ["url": "data:image/jpeg;base64,\(base64)"]] + ] as [[String: Any]] + ] + NSLog("[OpenClaw] Attaching image (%d KB) to task", jpegData.count / 1024) + } else { + userMessage = ["role": "user", "content": task] + } + + conversationHistory.append(userMessage) // Trim history to keep only the most recent turns (user+assistant pairs) if conversationHistory.count > maxHistoryTurns * 2 { @@ -99,7 +116,7 @@ class OpenClawBridge: ObservableObject { "stream": false ] - NSLog("[OpenClaw] Sending %d messages in conversation", conversationHistory.count) + NSLog("[OpenClaw] Sending %d messages in conversation%@", conversationHistory.count, image != nil ? " (with image)" : "") do { request.httpBody = try JSONSerialization.data(withJSONObject: body) diff --git a/samples/CameraAccess/CameraAccess/OpenClaw/ToolCallModels.swift b/samples/CameraAccess/CameraAccess/OpenClaw/ToolCallModels.swift index 38980d1b..6515bf37 100644 --- a/samples/CameraAccess/CameraAccess/OpenClaw/ToolCallModels.swift +++ b/samples/CameraAccess/CameraAccess/OpenClaw/ToolCallModels.swift @@ -112,6 +112,10 @@ enum ToolDeclarations { "task": [ "type": "string", "description": "Clear, detailed description of what to do. Include all relevant context: names, content, platforms, quantities, etc." + ], + "include_image": [ + "type": "boolean", + "description": "Set to true ONLY when the task requires the agent to see the current camera image (e.g. editing a photo, identifying a product by appearance, reading text from a sign). Do NOT set for tasks that can be described in text alone." ] ], "required": ["task"] diff --git a/samples/CameraAccess/CameraAccess/OpenClaw/ToolCallRouter.swift b/samples/CameraAccess/CameraAccess/OpenClaw/ToolCallRouter.swift index 116fe979..14100d49 100644 --- a/samples/CameraAccess/CameraAccess/OpenClaw/ToolCallRouter.swift +++ b/samples/CameraAccess/CameraAccess/OpenClaw/ToolCallRouter.swift @@ -1,4 +1,5 @@ import Foundation +import UIKit @MainActor class ToolCallRouter { @@ -8,6 +9,9 @@ class ToolCallRouter { /// Callback for local capture_photo handling. Called with (description, completion). var onCapturePhoto: ((_ description: String?, _ completion: @escaping (ToolResult) -> Void) -> Void)? + /// Latest camera frame for include_image on execute tool calls. + var latestFrame: UIImage? + init(bridge: OpenClawBridge) { self.bridge = bridge } @@ -38,7 +42,9 @@ class ToolCallRouter { let task = Task { @MainActor in let taskDesc = call.args["task"] as? String ?? String(describing: call.args) - let result = await bridge.delegateTask(task: taskDesc, toolName: callName) + let includeImage = call.args["include_image"] as? Bool ?? false + let image: UIImage? = includeImage ? latestFrame : nil + let result = await bridge.delegateTask(task: taskDesc, toolName: callName, image: image) guard !Task.isCancelled else { NSLog("[ToolCall] Task %@ was cancelled, skipping response", callId) From f8d7cf35fb2eab19b5a5312ea84eaa5c9f88cf88 Mon Sep 17 00:00:00 2001 From: "Xiaoan (Sean) Liu" Date: Thu, 26 Mar 2026 18:39:59 -0600 Subject: [PATCH 12/68] Add RemoteLogger for persistent session logging on Android - RemoteLogger.kt: fire-and-forget event logger matching iOS implementation Logs: voice:user, voice:ai, voice:tool_call, voice:tool_result, session:start, session:end - GeminiSessionViewModel: wired logging at session start/end, turn complete, and tool call initiation/result --- .../gemini/GeminiSessionViewModel.kt | 38 +++++++++ .../cameraaccess/gemini/RemoteLogger.kt | 77 +++++++++++++++++++ 2 files changed, 115 insertions(+) create mode 100644 samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/RemoteLogger.kt diff --git a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/GeminiSessionViewModel.kt b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/GeminiSessionViewModel.kt index 5f7b2c01..68a28aed 100644 --- a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/GeminiSessionViewModel.kt +++ b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/GeminiSessionViewModel.kt @@ -15,6 +15,9 @@ import com.meta.wearable.dat.externalsampleapps.cameraaccess.openclaw.OpenClawCo import com.meta.wearable.dat.externalsampleapps.cameraaccess.openclaw.OpenClawEventClient import com.meta.wearable.dat.externalsampleapps.cameraaccess.openclaw.ToolCallRouter import com.meta.wearable.dat.externalsampleapps.cameraaccess.openclaw.ToolCallStatus +import com.meta.wearable.dat.externalsampleapps.cameraaccess.openclaw.ToolResult +import com.meta.wearable.dat.externalsampleapps.cameraaccess.gallery.CapturedPhoto +import com.meta.wearable.dat.externalsampleapps.cameraaccess.gallery.PhotoCaptureStore import com.meta.wearable.dat.externalsampleapps.cameraaccess.settings.SettingsManager import com.meta.wearable.dat.externalsampleapps.cameraaccess.stream.StreamingMode import kotlinx.coroutines.Job @@ -44,6 +47,9 @@ class GeminiSessionViewModel(app: Application) : AndroidViewModel(app) { private val _uiState = MutableStateFlow(GeminiUiState()) val uiState: StateFlow = _uiState.asStateFlow() + private val _captureEvent = MutableStateFlow(null) + val captureEvent: StateFlow = _captureEvent.asStateFlow() + private val geminiService = GeminiLiveService() private val openClawBridge = OpenClawBridge() private val eventClient = OpenClawEventClient() @@ -148,6 +154,7 @@ class GeminiSessionViewModel(app: Application) : AndroidViewModel(app) { // Start with mic enabled by default _uiState.value = _uiState.value.copy(isGeminiActive = true, isMicEnabled = true) audioManager.setMicEnabled(true) + RemoteLogger.log("session:start") netMonitor.start() netMonitorJob?.cancel() @@ -176,6 +183,13 @@ class GeminiSessionViewModel(app: Application) : AndroidViewModel(app) { } geminiService.onTurnComplete = { + // Log finalized transcripts before clearing + if (lastUserText.isNotEmpty()) { + RemoteLogger.log("voice:user", mapOf("text" to lastUserText)) + } + if (lastAIText.isNotEmpty()) { + RemoteLogger.log("voice:ai", mapOf("text" to lastAIText)) + } finalizeCurrentBubbles() _uiState.value = _uiState.value.copy(userTranscript = "") } @@ -219,8 +233,30 @@ class GeminiSessionViewModel(app: Application) : AndroidViewModel(app) { originalInstructionProvider = { lastUserOriginalInstruction } ) + // Local capture_photo handler + toolCallRouter?.onCapturePhoto = { description, completion -> + val frame = latestFrameForToolCall + if (frame != null) { + val photo = PhotoCaptureStore.saveFrame(getApplication(), frame, description) + if (photo != null) { + _captureEvent.value = photo + completion(ToolResult.Success("Photo captured and saved: ${photo.filename}")) + } else { + completion(ToolResult.Failure("Failed to save photo")) + } + } else { + completion(ToolResult.Failure("No camera frame available to capture")) + } + } + + // Load gallery + PhotoCaptureStore.loadPhotos(getApplication()) + geminiService.onToolCall = { toolCall -> for (call in toolCall.functionCalls) { + val taskDesc = (call.args["task"] as? String) ?: "" + RemoteLogger.log("voice:tool_call", mapOf("tool" to call.name, "task" to taskDesc)) + finalizeCurrentBubbles() val toolMsg = ChatMessage( role = ChatMessageRole.ToolCall(call.name), @@ -232,6 +268,7 @@ class GeminiSessionViewModel(app: Application) : AndroidViewModel(app) { _uiState.value = _uiState.value.copy(messages = msgs) toolCallRouter?.handleToolCall(call) { response -> + RemoteLogger.log("voice:tool_result", mapOf("tool" to call.name, "result" to response.toString().take(500))) val updated = _uiState.value.messages.map { if (it.id == toolMsg.id) it.copy(text = "Done", status = ChatMessageStatus.Complete) else it } @@ -369,6 +406,7 @@ class GeminiSessionViewModel(app: Application) : AndroidViewModel(app) { } fun stopSession() { + RemoteLogger.log("session:end") userStopped = true reconnectJob?.cancel() reconnectJob = null diff --git a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/RemoteLogger.kt b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/RemoteLogger.kt new file mode 100644 index 00000000..75542a3e --- /dev/null +++ b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/RemoteLogger.kt @@ -0,0 +1,77 @@ +package com.meta.wearable.dat.externalsampleapps.cameraaccess.gemini + +import android.util.Log +import kotlinx.coroutines.Dispatchers +import kotlinx.coroutines.GlobalScope +import kotlinx.coroutines.launch +import okhttp3.MediaType.Companion.toMediaType +import okhttp3.OkHttpClient +import okhttp3.Request +import okhttp3.RequestBody.Companion.toRequestBody +import org.json.JSONObject +import java.util.concurrent.TimeUnit + +/// Sends conversation events to the logging server for persistent logging. +/// All methods are fire-and-forget -- logging never blocks the UI or conversation flow. +object RemoteLogger { + private const val TAG = "RemoteLogger" + private val JSON_MEDIA = "application/json".toMediaType() + + private val client = OkHttpClient.Builder() + .connectTimeout(5, TimeUnit.SECONDS) + .readTimeout(5, TimeUnit.SECONDS) + .writeTimeout(5, TimeUnit.SECONDS) + .build() + + private var sequenceNumber = 0 + + private val baseURL: String? + get() { + return if (GeminiConfig.isOpenClawConfigured) { + "${GeminiConfig.openClawHost}:8080" + } else { + null + } + } + + /// Log a conversation event. Types: + /// - "voice:user" -- user speech transcript from Gemini + /// - "voice:ai" -- Gemini voice response transcript + /// - "voice:tool_call" -- Gemini triggered execute tool + /// - "voice:tool_result" -- tool result sent back to Gemini + /// - "session:start" -- voice session started + /// - "session:end" -- voice session ended + fun log(type: String, data: Map = emptyMap()) { + val url = baseURL ?: return + val loggingUrl = "$url/api/logs" + + sequenceNumber++ + val eventData = JSONObject().apply { + put("event", type) + put("seq", sequenceNumber) + data.forEach { (k, v) -> put(k, v) } + } + + val payload = JSONObject().apply { + put("type", "event") + put("session", "android-client") + put("data", eventData) + } + + // Fire and forget + GlobalScope.launch(Dispatchers.IO) { + try { + val request = Request.Builder() + .url(loggingUrl) + .post(payload.toString().toRequestBody(JSON_MEDIA)) + .addHeader("Content-Type", "application/json") + .addHeader("x-api-token", GeminiConfig.openClawGatewayToken) + .build() + + client.newCall(request).execute().use { /* close */ } + } catch (e: Exception) { + Log.d(TAG, "Failed to log event: ${e.message}") + } + } + } +} From 3f907d34090cfc94039b944cd8ddf6980f69d14f Mon Sep 17 00:00:00 2001 From: "Xiaoan (Sean) Liu" Date: Thu, 26 Mar 2026 18:41:03 -0600 Subject: [PATCH 13/68] Add capture_photo tool and gallery for Gemini-triggered photo capture (Android) - capture_photo tool declaration + local interception in ToolCallRouter - PhotoCaptureStore: persistent JPEG gallery with JSON manifest - GalleryScreen (3-column grid) + GalleryDetailScreen (share + delete) - Gallery button always visible in StreamScreen top bar - Capture toast on successful photo save - FileProvider paths updated for captures directory sharing --- .../cameraaccess/gallery/CapturedPhoto.kt | 8 ++ .../cameraaccess/gallery/PhotoCaptureStore.kt | 120 +++++++++++++++++ .../cameraaccess/openclaw/ToolCallModels.kt | 22 ++- .../cameraaccess/openclaw/ToolCallRouter.kt | 17 +++ .../cameraaccess/ui/GalleryDetailScreen.kt | 127 ++++++++++++++++++ .../cameraaccess/ui/GalleryScreen.kt | 126 +++++++++++++++++ .../cameraaccess/ui/StreamScreen.kt | 48 +++++++ .../app/src/main/res/xml/file_paths.xml | 1 + 8 files changed, 468 insertions(+), 1 deletion(-) create mode 100644 samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gallery/CapturedPhoto.kt create mode 100644 samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gallery/PhotoCaptureStore.kt create mode 100644 samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/ui/GalleryDetailScreen.kt create mode 100644 samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/ui/GalleryScreen.kt diff --git a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gallery/CapturedPhoto.kt b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gallery/CapturedPhoto.kt new file mode 100644 index 00000000..4fb0c0a1 --- /dev/null +++ b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gallery/CapturedPhoto.kt @@ -0,0 +1,8 @@ +package com.meta.wearable.dat.externalsampleapps.cameraaccess.gallery + +data class CapturedPhoto( + val id: String, + val filename: String, + val timestamp: Long, + val description: String? +) diff --git a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gallery/PhotoCaptureStore.kt b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gallery/PhotoCaptureStore.kt new file mode 100644 index 00000000..16559439 --- /dev/null +++ b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gallery/PhotoCaptureStore.kt @@ -0,0 +1,120 @@ +package com.meta.wearable.dat.externalsampleapps.cameraaccess.gallery + +import android.content.Context +import android.graphics.Bitmap +import android.graphics.BitmapFactory +import android.util.Log +import kotlinx.coroutines.flow.MutableStateFlow +import kotlinx.coroutines.flow.StateFlow +import kotlinx.coroutines.flow.asStateFlow +import org.json.JSONArray +import org.json.JSONObject +import java.io.File +import java.io.FileOutputStream +import java.text.SimpleDateFormat +import java.util.Date +import java.util.Locale +import java.util.UUID + +object PhotoCaptureStore { + private const val TAG = "PhotoCaptureStore" + private const val MANIFEST_FILE = "manifest.json" + + private val _photos = MutableStateFlow>(emptyList()) + val photos: StateFlow> = _photos.asStateFlow() + + private fun capturesDir(context: Context): File { + val dir = File(context.filesDir, "captures") + if (!dir.exists()) dir.mkdirs() + return dir + } + + fun loadPhotos(context: Context) { + val manifestFile = File(capturesDir(context), MANIFEST_FILE) + if (!manifestFile.exists()) { + _photos.value = emptyList() + return + } + try { + val json = JSONArray(manifestFile.readText()) + val loaded = mutableListOf() + for (i in 0 until json.length()) { + val obj = json.getJSONObject(i) + val photo = CapturedPhoto( + id = obj.getString("id"), + filename = obj.getString("filename"), + timestamp = obj.getLong("timestamp"), + description = obj.optString("description", null) + ) + if (File(capturesDir(context), photo.filename).exists()) { + loaded.add(photo) + } + } + _photos.value = loaded + Log.d(TAG, "Loaded ${loaded.size} photos from manifest") + } catch (e: Exception) { + Log.e(TAG, "Failed to load manifest: ${e.message}") + _photos.value = emptyList() + } + } + + fun saveFrame(context: Context, bitmap: Bitmap, description: String?): CapturedPhoto? { + val formatter = SimpleDateFormat("yyyy-MM-dd_HH-mm-ss", Locale.US) + val filename = "capture_${formatter.format(Date())}.jpg" + val file = File(capturesDir(context), filename) + + return try { + FileOutputStream(file).use { out -> + bitmap.compress(Bitmap.CompressFormat.JPEG, 90, out) + } + val photo = CapturedPhoto( + id = UUID.randomUUID().toString(), + filename = filename, + timestamp = System.currentTimeMillis(), + description = description + ) + val current = _photos.value.toMutableList() + current.add(0, photo) + _photos.value = current + saveManifest(context) + Log.d(TAG, "Saved: $filename (${file.length()} bytes)") + photo + } catch (e: Exception) { + Log.e(TAG, "Failed to save photo: ${e.message}") + null + } + } + + fun deletePhoto(context: Context, photo: CapturedPhoto) { + File(capturesDir(context), photo.filename).delete() + _photos.value = _photos.value.filter { it.id != photo.id } + saveManifest(context) + Log.d(TAG, "Deleted: ${photo.filename}") + } + + fun getPhotoFile(context: Context, photo: CapturedPhoto): File { + return File(capturesDir(context), photo.filename) + } + + fun loadBitmap(context: Context, photo: CapturedPhoto): Bitmap? { + val file = getPhotoFile(context, photo) + return if (file.exists()) BitmapFactory.decodeFile(file.absolutePath) else null + } + + private fun saveManifest(context: Context) { + try { + val json = JSONArray() + for (photo in _photos.value) { + json.put(JSONObject().apply { + put("id", photo.id) + put("filename", photo.filename) + put("timestamp", photo.timestamp) + if (photo.description != null) put("description", photo.description) + }) + } + File(capturesDir(context), MANIFEST_FILE).writeText(json.toString(2)) + } catch (e: Exception) { + Log.e(TAG, "Failed to save manifest: ${e.message}") + } + } +} diff --git a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/ToolCallModels.kt b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/ToolCallModels.kt index 696a0c8a..4d77273f 100644 --- a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/ToolCallModels.kt +++ b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/ToolCallModels.kt @@ -103,7 +103,27 @@ sealed class OpenClawConnectionState { object ToolDeclarations { fun allDeclarationsJSON(): JSONArray { - return JSONArray().put(executeJSON()) + return JSONArray().apply { + put(executeJSON()) + put(capturePhotoJSON()) + } + } + + private fun capturePhotoJSON(): JSONObject { + return JSONObject().apply { + put("name", "capture_photo") + put("description", "Capture and save the current camera frame as a photo. Use when the user asks to take a photo, capture what they see, save a picture, or snap a photo.") + put("parameters", JSONObject().apply { + put("type", "object") + put("properties", JSONObject().apply { + put("description", JSONObject().apply { + put("type", "string") + put("description", "Brief description of what is in the photo") + }) + }) + put("required", JSONArray()) + }) + } } private fun executeJSON(): JSONObject { diff --git a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/ToolCallRouter.kt b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/ToolCallRouter.kt index c151e9dd..8a2b4cfa 100644 --- a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/ToolCallRouter.kt +++ b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/ToolCallRouter.kt @@ -24,6 +24,9 @@ class ToolCallRouter( private const val JPEG_QUALITY_FOR_UPLOAD = 92 } + /** Callback for local capture_photo handling. */ + var onCapturePhoto: ((description: String?, completion: (ToolResult) -> Unit) -> Unit)? = null + private val inFlightJobs = mutableMapOf() fun handleToolCall( @@ -35,6 +38,20 @@ class ToolCallRouter( Log.d(TAG, "Received: $callName (id: $callId) args: ${call.args}") + // Local tool: capture_photo — handle on-device, don't send to OpenClaw + if (callName == "capture_photo") { + val description = call.args["description"]?.toString() + onCapturePhoto?.invoke(description) { result -> + Log.d(TAG, "capture_photo result: $result") + val response = buildToolResponse(callId, callName, result) + sendResponse(response) + } ?: run { + val response = buildToolResponse(callId, callName, ToolResult.Failure("capture_photo handler not configured")) + sendResponse(response) + } + return + } + val job = scope.launch { // Gemini가 tool-call args로 준 "정리된" task (이미 rewriting 된 텍스트) val rewrittenTask = call.args["task"]?.toString() ?: call.args.toString() diff --git a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/ui/GalleryDetailScreen.kt b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/ui/GalleryDetailScreen.kt new file mode 100644 index 00000000..3a24e85a --- /dev/null +++ b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/ui/GalleryDetailScreen.kt @@ -0,0 +1,127 @@ +package com.meta.wearable.dat.externalsampleapps.cameraaccess.ui + +import android.content.Context +import android.content.Intent +import android.graphics.Bitmap +import androidx.compose.foundation.Image +import androidx.compose.foundation.layout.Column +import androidx.compose.foundation.layout.Spacer +import androidx.compose.foundation.layout.fillMaxSize +import androidx.compose.foundation.layout.fillMaxWidth +import androidx.compose.foundation.layout.height +import androidx.compose.foundation.layout.padding +import androidx.compose.material.icons.Icons +import androidx.compose.material.icons.automirrored.filled.ArrowBack +import androidx.compose.material.icons.filled.Delete +import androidx.compose.material.icons.filled.Share +import androidx.compose.material3.AlertDialog +import androidx.compose.material3.ExperimentalMaterial3Api +import androidx.compose.material3.Icon +import androidx.compose.material3.IconButton +import androidx.compose.material3.MaterialTheme +import androidx.compose.material3.Scaffold +import androidx.compose.material3.Text +import androidx.compose.material3.TextButton +import androidx.compose.material3.TopAppBar +import androidx.compose.runtime.Composable +import androidx.compose.runtime.LaunchedEffect +import androidx.compose.runtime.getValue +import androidx.compose.runtime.mutableStateOf +import androidx.compose.runtime.remember +import androidx.compose.runtime.setValue +import androidx.compose.ui.Modifier +import androidx.compose.ui.graphics.asImageBitmap +import androidx.compose.ui.layout.ContentScale +import androidx.compose.ui.platform.LocalContext +import androidx.compose.ui.unit.dp +import androidx.core.content.FileProvider +import com.meta.wearable.dat.externalsampleapps.cameraaccess.gallery.CapturedPhoto +import com.meta.wearable.dat.externalsampleapps.cameraaccess.gallery.PhotoCaptureStore +import java.text.SimpleDateFormat +import java.util.Date +import java.util.Locale + +@OptIn(ExperimentalMaterial3Api::class) +@Composable +fun GalleryDetailScreen(photo: CapturedPhoto, onBack: () -> Unit) { + val context = LocalContext.current + var bitmap by remember { mutableStateOf(null) } + var showDeleteDialog by remember { mutableStateOf(false) } + + LaunchedEffect(photo.id) { + bitmap = PhotoCaptureStore.loadBitmap(context, photo) + } + + val formattedDate = remember(photo.timestamp) { + SimpleDateFormat("MMM d, yyyy h:mm a", Locale.getDefault()).format(Date(photo.timestamp)) + } + + Scaffold( + topBar = { + TopAppBar( + title = { Text("Photo") }, + navigationIcon = { + IconButton(onClick = onBack) { + Icon(Icons.AutoMirrored.Filled.ArrowBack, contentDescription = "Back") + } + }, + actions = { + IconButton(onClick = { sharePhoto(context, photo) }) { + Icon(Icons.Default.Share, contentDescription = "Share") + } + IconButton(onClick = { showDeleteDialog = true }) { + Icon(Icons.Default.Delete, contentDescription = "Delete") + } + } + ) + } + ) { padding -> + Column(modifier = Modifier.fillMaxSize().padding(padding)) { + bitmap?.let { bmp -> + Image( + bitmap = bmp.asImageBitmap(), + contentDescription = photo.description ?: "Photo", + modifier = Modifier.fillMaxWidth().weight(1f), + contentScale = ContentScale.Fit + ) + } + Column(modifier = Modifier.fillMaxWidth().padding(16.dp)) { + Text(formattedDate, style = MaterialTheme.typography.bodyMedium, color = MaterialTheme.colorScheme.onSurfaceVariant) + if (!photo.description.isNullOrEmpty()) { + Spacer(Modifier.height(4.dp)) + Text(photo.description, style = MaterialTheme.typography.bodyLarge) + } + } + } + } + + if (showDeleteDialog) { + AlertDialog( + onDismissRequest = { showDeleteDialog = false }, + title = { Text("Delete photo?") }, + text = { Text("This action cannot be undone.") }, + confirmButton = { + TextButton(onClick = { + PhotoCaptureStore.deletePhoto(context, photo) + showDeleteDialog = false + onBack() + }) { Text("Delete") } + }, + dismissButton = { + TextButton(onClick = { showDeleteDialog = false }) { Text("Cancel") } + } + ) + } +} + +private fun sharePhoto(context: Context, photo: CapturedPhoto) { + val file = PhotoCaptureStore.getPhotoFile(context, photo) + if (!file.exists()) return + val uri = FileProvider.getUriForFile(context, "${context.packageName}.provider", file) + val intent = Intent(Intent.ACTION_SEND).apply { + type = "image/jpeg" + putExtra(Intent.EXTRA_STREAM, uri) + addFlags(Intent.FLAG_GRANT_READ_URI_PERMISSION) + } + context.startActivity(Intent.createChooser(intent, "Share photo")) +} diff --git a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/ui/GalleryScreen.kt b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/ui/GalleryScreen.kt new file mode 100644 index 00000000..f5d673e8 --- /dev/null +++ b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/ui/GalleryScreen.kt @@ -0,0 +1,126 @@ +package com.meta.wearable.dat.externalsampleapps.cameraaccess.ui + +import android.graphics.Bitmap +import androidx.compose.foundation.Image +import androidx.compose.foundation.clickable +import androidx.compose.foundation.layout.Arrangement +import androidx.compose.foundation.layout.Box +import androidx.compose.foundation.layout.Column +import androidx.compose.foundation.layout.PaddingValues +import androidx.compose.foundation.layout.aspectRatio +import androidx.compose.foundation.layout.fillMaxSize +import androidx.compose.foundation.layout.fillMaxWidth +import androidx.compose.foundation.layout.padding +import androidx.compose.foundation.lazy.grid.GridCells +import androidx.compose.foundation.lazy.grid.LazyVerticalGrid +import androidx.compose.foundation.lazy.grid.items +import androidx.compose.material.icons.Icons +import androidx.compose.material.icons.automirrored.filled.ArrowBack +import androidx.compose.material3.ExperimentalMaterial3Api +import androidx.compose.material3.Icon +import androidx.compose.material3.IconButton +import androidx.compose.material3.MaterialTheme +import androidx.compose.material3.Scaffold +import androidx.compose.material3.Text +import androidx.compose.material3.TopAppBar +import androidx.compose.runtime.Composable +import androidx.compose.runtime.LaunchedEffect +import androidx.compose.runtime.collectAsState +import androidx.compose.runtime.getValue +import androidx.compose.runtime.mutableStateOf +import androidx.compose.runtime.remember +import androidx.compose.runtime.setValue +import androidx.compose.ui.Alignment +import androidx.compose.ui.Modifier +import androidx.compose.ui.graphics.asImageBitmap +import androidx.compose.ui.layout.ContentScale +import androidx.compose.ui.platform.LocalContext +import androidx.compose.ui.unit.dp +import com.meta.wearable.dat.externalsampleapps.cameraaccess.gallery.CapturedPhoto +import com.meta.wearable.dat.externalsampleapps.cameraaccess.gallery.PhotoCaptureStore + +@OptIn(ExperimentalMaterial3Api::class) +@Composable +fun GalleryScreen( + onBack: () -> Unit, + onPhotoSelected: (CapturedPhoto) -> Unit +) { + val photos by PhotoCaptureStore.photos.collectAsState() + val context = LocalContext.current + + LaunchedEffect(Unit) { + PhotoCaptureStore.loadPhotos(context) + } + + Scaffold( + topBar = { + TopAppBar( + title = { Text("Gallery") }, + navigationIcon = { + IconButton(onClick = onBack) { + Icon(Icons.AutoMirrored.Filled.ArrowBack, contentDescription = "Back") + } + } + ) + } + ) { padding -> + if (photos.isEmpty()) { + Box( + modifier = Modifier.fillMaxSize().padding(padding), + contentAlignment = Alignment.Center + ) { + Column( + horizontalAlignment = Alignment.CenterHorizontally, + verticalArrangement = Arrangement.spacedBy(8.dp) + ) { + Text( + "No captured photos yet", + style = MaterialTheme.typography.titleMedium, + color = MaterialTheme.colorScheme.onSurfaceVariant + ) + Text( + "Ask the AI to take a photo while using the glasses", + style = MaterialTheme.typography.bodyMedium, + color = MaterialTheme.colorScheme.onSurfaceVariant.copy(alpha = 0.7f) + ) + } + } + } else { + LazyVerticalGrid( + columns = GridCells.Fixed(3), + modifier = Modifier.fillMaxSize().padding(padding), + contentPadding = PaddingValues(2.dp), + horizontalArrangement = Arrangement.spacedBy(2.dp), + verticalArrangement = Arrangement.spacedBy(2.dp) + ) { + items(photos, key = { it.id }) { photo -> + GalleryThumbnail(photo = photo, onClick = { onPhotoSelected(photo) }) + } + } + } + } +} + +@Composable +private fun GalleryThumbnail(photo: CapturedPhoto, onClick: () -> Unit) { + val context = LocalContext.current + var bitmap by remember { mutableStateOf(null) } + + LaunchedEffect(photo.id) { + bitmap = PhotoCaptureStore.loadBitmap(context, photo) + } + + Box( + modifier = Modifier.aspectRatio(1f).fillMaxWidth().clickable(onClick = onClick), + contentAlignment = Alignment.Center + ) { + bitmap?.let { bmp -> + Image( + bitmap = bmp.asImageBitmap(), + contentDescription = photo.description ?: "Photo", + modifier = Modifier.fillMaxSize(), + contentScale = ContentScale.Crop + ) + } + } +} diff --git a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/ui/StreamScreen.kt b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/ui/StreamScreen.kt index 67d4682d..eeb349f5 100644 --- a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/ui/StreamScreen.kt +++ b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/ui/StreamScreen.kt @@ -19,14 +19,21 @@ import androidx.compose.foundation.layout.Spacer import androidx.compose.foundation.layout.fillMaxSize import androidx.compose.foundation.layout.height import androidx.compose.foundation.layout.padding +import androidx.compose.foundation.layout.size import androidx.compose.foundation.layout.width import androidx.compose.foundation.layout.widthIn import androidx.compose.foundation.layout.statusBarsPadding +import androidx.compose.foundation.shape.CircleShape +import androidx.compose.material.icons.Icons +import androidx.compose.material.icons.filled.PhotoLibrary import androidx.compose.material3.CircularProgressIndicator import androidx.compose.material3.FilterChip +import androidx.compose.material3.Icon +import androidx.compose.material3.IconButton import androidx.compose.material3.SegmentedButton import androidx.compose.material3.SegmentedButtonDefaults import androidx.compose.material3.SingleChoiceSegmentedButtonRow +import androidx.compose.material3.Surface import androidx.compose.material3.Text import androidx.compose.runtime.Composable import androidx.compose.runtime.DisposableEffect @@ -38,11 +45,13 @@ import androidx.compose.runtime.remember import androidx.compose.runtime.setValue import androidx.compose.ui.Alignment import androidx.compose.ui.Modifier +import androidx.compose.ui.graphics.Color import androidx.compose.ui.graphics.asImageBitmap import androidx.compose.ui.layout.ContentScale import androidx.compose.ui.platform.LocalContext import androidx.compose.ui.res.stringResource import androidx.compose.ui.unit.dp +import com.meta.wearable.dat.externalsampleapps.cameraaccess.gallery.CapturedPhoto import androidx.lifecycle.compose.LocalLifecycleOwner import androidx.lifecycle.compose.collectAsStateWithLifecycle import androidx.lifecycle.viewmodel.compose.viewModel @@ -75,9 +84,31 @@ fun StreamScreen( val streamUiState by streamViewModel.uiState.collectAsStateWithLifecycle() val geminiUiState by geminiViewModel.uiState.collectAsStateWithLifecycle() val webrtcUiState by webrtcViewModel.uiState.collectAsStateWithLifecycle() + val captureEvent by geminiViewModel.captureEvent.collectAsStateWithLifecycle() val lifecycleOwner = LocalLifecycleOwner.current val context = LocalContext.current + // Gallery navigation state + var showGallery by remember { mutableStateOf(false) } + var selectedGalleryPhoto by remember { mutableStateOf(null) } + + // Show toast when photo is captured via Gemini + LaunchedEffect(captureEvent) { + captureEvent?.let { + Toast.makeText(context, "Photo captured", Toast.LENGTH_SHORT).show() + } + } + + // Gallery screens + if (selectedGalleryPhoto != null) { + GalleryDetailScreen(photo = selectedGalleryPhoto!!, onBack = { selectedGalleryPhoto = null }) + return + } + if (showGallery) { + GalleryScreen(onBack = { showGallery = false }, onPhotoSelected = { selectedGalleryPhoto = it }) + return + } + // Wire Gemini VM to Stream VM for frame forwarding LaunchedEffect(geminiViewModel) { streamViewModel.geminiViewModel = geminiViewModel @@ -193,6 +224,23 @@ fun StreamScreen( modifier = Modifier.widthIn(min = 160.dp), ) + // Gallery button + Spacer(modifier = Modifier.width(4.dp)) + Surface( + shape = CircleShape, + color = Color.Black.copy(alpha = 0.5f), + modifier = Modifier.size(36.dp) + ) { + IconButton(onClick = { showGallery = true }) { + Icon( + Icons.Default.PhotoLibrary, + contentDescription = "Gallery", + tint = Color.White, + modifier = Modifier.size(18.dp) + ) + } + } + // Tab switcher (only when Gemini is active) if (geminiUiState.isGeminiActive) { Spacer(modifier = Modifier.width(8.dp)) diff --git a/samples/CameraAccessAndroid/app/src/main/res/xml/file_paths.xml b/samples/CameraAccessAndroid/app/src/main/res/xml/file_paths.xml index ee605e38..c773eee3 100644 --- a/samples/CameraAccessAndroid/app/src/main/res/xml/file_paths.xml +++ b/samples/CameraAccessAndroid/app/src/main/res/xml/file_paths.xml @@ -1,4 +1,5 @@ + From 6e3bd0d46ddd98f3be8d796eda8f225873cac230 Mon Sep 17 00:00:00 2001 From: "Xiaoan (Sean) Liu" Date: Thu, 26 Mar 2026 19:03:58 -0600 Subject: [PATCH 14/68] Make image upload opt-in via include_image param on execute tool (Android) --- .../cameraaccess/openclaw/ToolCallModels.kt | 4 ++++ .../cameraaccess/openclaw/ToolCallRouter.kt | 9 +++++---- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/ToolCallModels.kt b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/ToolCallModels.kt index 4d77273f..e8e93bcc 100644 --- a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/ToolCallModels.kt +++ b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/ToolCallModels.kt @@ -137,6 +137,10 @@ object ToolDeclarations { put("type", "string") put("description", "Clear, detailed description of what to do. Include all relevant context: names, content, platforms, quantities, etc.") }) + put("include_image", JSONObject().apply { + put("type", "boolean") + put("description", "Set to true ONLY when the task requires the agent to see the current camera image (e.g. editing a photo, identifying a product by appearance, reading text from a sign). Do NOT set for tasks that can be described in text alone.") + }) }) put("required", JSONArray().put("task")) }) diff --git a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/ToolCallRouter.kt b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/ToolCallRouter.kt index 8a2b4cfa..05113ba2 100644 --- a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/ToolCallRouter.kt +++ b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/ToolCallRouter.kt @@ -61,10 +61,11 @@ class ToolCallRouter( ?.trim() ?.takeIf { it.isNotEmpty() } - // tool-call 시점에만 스냅샷 업로드 (원본 해상도 유지, JPEG로만 인코딩) - val bitmap = latestFrameProvider() - Log.d("ToolCallRouter", "toolcall bitmapNull=${latestFrameProvider()==null}") - val imageUrl: String? = if (SettingsManager.videoStreamingEnabled && bitmap != null) { + // Only upload image when Gemini explicitly requests it via include_image=true + val includeImage = call.args["include_image"] as? Boolean ?: false + val bitmap = if (includeImage) latestFrameProvider() else null + Log.d(TAG, "include_image=$includeImage, bitmapNull=${bitmap == null}") + val imageUrl: String? = if (includeImage && SettingsManager.videoStreamingEnabled && bitmap != null) { try { val baos = ByteArrayOutputStream() bitmap.compress(Bitmap.CompressFormat.JPEG, JPEG_QUALITY_FOR_UPLOAD, baos) From b96874d6c5cc34dd64333c554e88831096fe1010 Mon Sep 17 00:00:00 2001 From: "Xiaoan (Sean) Liu" Date: Thu, 26 Mar 2026 19:15:44 -0600 Subject: [PATCH 15/68] Fix system prompt to mention capture_photo tool (iOS + Android) --- samples/CameraAccess/CameraAccess/Gemini/GeminiConfig.swift | 6 +++++- .../cameraaccess/settings/SettingsManager.kt | 6 +++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/samples/CameraAccess/CameraAccess/Gemini/GeminiConfig.swift b/samples/CameraAccess/CameraAccess/Gemini/GeminiConfig.swift index 5c124f66..dda97c9e 100644 --- a/samples/CameraAccess/CameraAccess/Gemini/GeminiConfig.swift +++ b/samples/CameraAccess/CameraAccess/Gemini/GeminiConfig.swift @@ -19,7 +19,11 @@ enum GeminiConfig { CRITICAL: You have NO memory, NO storage, and NO ability to take actions on your own. You cannot remember things, keep lists, set reminders, search the web, send messages, or do anything persistent. You are ONLY a voice interface. - You have exactly ONE tool: execute. This connects you to a powerful personal assistant that can do anything -- send messages, search the web, manage lists, set reminders, create notes, research topics, control smart home devices, interact with apps, and much more. + You have two tools: execute and capture_photo. + + The capture_photo tool saves the current camera frame as a photo to the device gallery. Use it when the user asks to take a photo, capture what they see, save a picture, or snap a photo. You can include an optional description of what is in the photo. + + The execute tool connects you to a powerful personal assistant that can do anything -- send messages, search the web, manage lists, set reminders, create notes, research topics, control smart home devices, interact with apps, and much more. ALWAYS use execute when the user asks you to: - Send a message to someone (any platform: WhatsApp, Telegram, iMessage, Slack, etc.) diff --git a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/settings/SettingsManager.kt b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/settings/SettingsManager.kt index 7c5916c3..0664fdaa 100644 --- a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/settings/SettingsManager.kt +++ b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/settings/SettingsManager.kt @@ -65,7 +65,11 @@ You cannot access past conversations, saved data, notes, emails, calendars, or e You are ONLY a voice interface. -You have exactly ONE tool: execute. +You have two tools: execute and capture_photo. + +The capture_photo tool saves the current camera frame as a photo to the device gallery. +Use it when the user asks to take a photo, capture what they see, save a picture, or snap a photo. +You can include an optional description of what is in the photo. The execute tool connects you to a powerful personal assistant that can: - Send messages (WhatsApp, Telegram, iMessage, Slack, etc.) From 52292f01567e6f02dca6ef05ecfb030c03e4b3f9 Mon Sep 17 00:00:00 2001 From: "Xiaoan (Sean) Liu" Date: Thu, 26 Mar 2026 19:45:32 -0600 Subject: [PATCH 16/68] Add include_image guidance to system prompt (iOS + Android) --- samples/CameraAccess/CameraAccess/Gemini/GeminiConfig.swift | 2 ++ .../externalsampleapps/cameraaccess/settings/SettingsManager.kt | 2 ++ 2 files changed, 4 insertions(+) diff --git a/samples/CameraAccess/CameraAccess/Gemini/GeminiConfig.swift b/samples/CameraAccess/CameraAccess/Gemini/GeminiConfig.swift index dda97c9e..2b82edd8 100644 --- a/samples/CameraAccess/CameraAccess/Gemini/GeminiConfig.swift +++ b/samples/CameraAccess/CameraAccess/Gemini/GeminiConfig.swift @@ -23,6 +23,8 @@ enum GeminiConfig { The capture_photo tool saves the current camera frame as a photo to the device gallery. Use it when the user asks to take a photo, capture what they see, save a picture, or snap a photo. You can include an optional description of what is in the photo. + When calling execute, set include_image=true if the task requires the assistant to SEE what you see (e.g. editing a photo, identifying a product by appearance, reading text from a sign, or any task where visual context is needed). Do NOT set it for tasks that can be fully described in text. + The execute tool connects you to a powerful personal assistant that can do anything -- send messages, search the web, manage lists, set reminders, create notes, research topics, control smart home devices, interact with apps, and much more. ALWAYS use execute when the user asks you to: diff --git a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/settings/SettingsManager.kt b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/settings/SettingsManager.kt index 0664fdaa..7ee4da08 100644 --- a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/settings/SettingsManager.kt +++ b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/settings/SettingsManager.kt @@ -71,6 +71,8 @@ The capture_photo tool saves the current camera frame as a photo to the device g Use it when the user asks to take a photo, capture what they see, save a picture, or snap a photo. You can include an optional description of what is in the photo. +When calling execute, set include_image=true if the task requires the assistant to SEE what you see (e.g. editing a photo, identifying a product by appearance, reading text from a sign, or any task where visual context is needed). Do NOT set it for tasks that can be fully described in text. + The execute tool connects you to a powerful personal assistant that can: - Send messages (WhatsApp, Telegram, iMessage, Slack, etc.) - Search the web or look up information From b1f2079faa58193a1913add87307c6813aa69ad4 Mon Sep 17 00:00:00 2001 From: "Xiaoan (Sean) Liu" Date: Thu, 26 Mar 2026 19:59:56 -0600 Subject: [PATCH 17/68] Send image inline as base64 instead of uploading to media server (Android) --- .../cameraaccess/openclaw/OpenClawBridge.kt | 33 ++++++++++++++++--- .../cameraaccess/openclaw/ToolCallRouter.kt | 18 ++++------ 2 files changed, 35 insertions(+), 16 deletions(-) diff --git a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/OpenClawBridge.kt b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/OpenClawBridge.kt index d057ba18..5371fc70 100644 --- a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/OpenClawBridge.kt +++ b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/OpenClawBridge.kt @@ -172,16 +172,39 @@ class OpenClawBridge { suspend fun delegateTask( task: String, - toolName: String = "execute" + toolName: String = "execute", + imageBase64: String? = null ): ToolResult = withContext(Dispatchers.IO) { _lastToolCallStatus.value = ToolCallStatus.Executing(toolName) val url = "${GeminiConfig.openClawHost}:${GeminiConfig.openClawPort}/v1/chat/completions" - conversationHistory.add(JSONObject().apply { - put("role", "user") - put("content", task) - }) + // Build user message — text-only or multimodal (OpenAI vision format) + val userMessage = if (imageBase64 != null) { + Log.d(TAG, "Attaching image (${imageBase64.length / 1024} KB base64) to task") + JSONObject().apply { + put("role", "user") + put("content", org.json.JSONArray().apply { + put(JSONObject().apply { + put("type", "text") + put("text", task) + }) + put(JSONObject().apply { + put("type", "image_url") + put("image_url", JSONObject().apply { + put("url", "data:image/jpeg;base64,$imageBase64") + }) + }) + }) + } + } else { + JSONObject().apply { + put("role", "user") + put("content", task) + } + } + + conversationHistory.add(userMessage) if (conversationHistory.size > MAX_HISTORY_TURNS * 2) { val trimmed = conversationHistory.takeLast(MAX_HISTORY_TURNS * 2) diff --git a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/ToolCallRouter.kt b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/ToolCallRouter.kt index 05113ba2..287582b2 100644 --- a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/ToolCallRouter.kt +++ b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/ToolCallRouter.kt @@ -61,24 +61,25 @@ class ToolCallRouter( ?.trim() ?.takeIf { it.isNotEmpty() } - // Only upload image when Gemini explicitly requests it via include_image=true + // Only attach image when Gemini explicitly requests it via include_image=true val includeImage = call.args["include_image"] as? Boolean ?: false val bitmap = if (includeImage) latestFrameProvider() else null Log.d(TAG, "include_image=$includeImage, bitmapNull=${bitmap == null}") - val imageUrl: String? = if (includeImage && SettingsManager.videoStreamingEnabled && bitmap != null) { + + val imageBase64: String? = if (includeImage && bitmap != null) { try { val baos = ByteArrayOutputStream() bitmap.compress(Bitmap.CompressFormat.JPEG, JPEG_QUALITY_FOR_UPLOAD, baos) - bridge.uploadToolCallImage(baos.toByteArray()) + android.util.Base64.encodeToString(baos.toByteArray(), android.util.Base64.NO_WRAP) } catch (e: Exception) { - Log.w(TAG, "Image upload failed for tool-call $callId: ${e.message}") + Log.w(TAG, "Image encoding failed for tool-call $callId: ${e.message}") null } } else { null } - // OpenClaw로 넘기는 최종 "명령 텍스트" 포맷 + // Build task payload with original instruction context val taskPayload = buildString { if (original != null) { append("[original_instruction]\n") @@ -87,14 +88,9 @@ class ToolCallRouter( } append("[gemini_rewritten_instruction]\n") append(rewrittenTask) - - if (!imageUrl.isNullOrEmpty()) { - append("\n\n[tool_call_image_url]\n") - append(imageUrl) - } } - val result = bridge.delegateTask(task = taskPayload, toolName = callName) + val result = bridge.delegateTask(task = taskPayload, toolName = callName, imageBase64 = imageBase64) // 취소된 경우 응답 보내지 않음 if (!isActive) { From 0ab0b832064c6418acc82d0eb0172830bfd4ff56 Mon Sep 17 00:00:00 2001 From: "Xiaoan (Sean) Liu" Date: Thu, 26 Mar 2026 20:02:00 -0600 Subject: [PATCH 18/68] Make include_image prompt more explicit for photo sending tasks --- .../cameraaccess/settings/SettingsManager.kt | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/settings/SettingsManager.kt b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/settings/SettingsManager.kt index 7ee4da08..f73bbe35 100644 --- a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/settings/SettingsManager.kt +++ b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/settings/SettingsManager.kt @@ -71,7 +71,12 @@ The capture_photo tool saves the current camera frame as a photo to the device g Use it when the user asks to take a photo, capture what they see, save a picture, or snap a photo. You can include an optional description of what is in the photo. -When calling execute, set include_image=true if the task requires the assistant to SEE what you see (e.g. editing a photo, identifying a product by appearance, reading text from a sign, or any task where visual context is needed). Do NOT set it for tasks that can be fully described in text. +When calling execute, you MUST set include_image=true whenever: +- The user asks to send, share, or forward a photo/image to anyone +- The task involves editing, processing, or analyzing an image +- The user says "send this to..." or "show this to..." referring to what they see +- The task requires the assistant to see the current camera view (e.g. identifying a product, reading text from a sign) +Only omit include_image (or set it to false) for purely text-based tasks like sending a text message, searching, or setting a reminder. The execute tool connects you to a powerful personal assistant that can: - Send messages (WhatsApp, Telegram, iMessage, Slack, etc.) From a3f1b06e36d24fc9cca46a161567fb7e70f57ee6 Mon Sep 17 00:00:00 2001 From: "Xiaoan (Sean) Liu" Date: Thu, 26 Mar 2026 20:36:30 -0600 Subject: [PATCH 19/68] Route image tasks through WebSocket chat.send instead of HTTP (Android) --- .../gemini/GeminiSessionViewModel.kt | 1 + .../cameraaccess/openclaw/OpenClawBridge.kt | 75 ++++++++++----- .../openclaw/OpenClawEventClient.kt | 94 +++++++++++++++++-- 3 files changed, 139 insertions(+), 31 deletions(-) diff --git a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/GeminiSessionViewModel.kt b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/GeminiSessionViewModel.kt index 68a28aed..44042b4b 100644 --- a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/GeminiSessionViewModel.kt +++ b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/GeminiSessionViewModel.kt @@ -225,6 +225,7 @@ class GeminiSessionViewModel(app: Application) : AndroidViewModel(app) { viewModelScope.launch { openClawBridge.checkConnection() openClawBridge.resetSession() + openClawBridge.eventClient = eventClient toolCallRouter = ToolCallRouter( bridge = openClawBridge, diff --git a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/OpenClawBridge.kt b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/OpenClawBridge.kt index 5371fc70..0935de53 100644 --- a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/OpenClawBridge.kt +++ b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/OpenClawBridge.kt @@ -5,8 +5,10 @@ import android.util.Log import com.meta.wearable.dat.externalsampleapps.cameraaccess.gemini.GeminiConfig import java.util.concurrent.TimeUnit import java.util.concurrent.atomic.AtomicReference +import kotlinx.coroutines.CancellableContinuation import kotlinx.coroutines.Dispatchers import kotlinx.coroutines.flow.MutableStateFlow +import kotlinx.coroutines.suspendCancellableCoroutine import kotlinx.coroutines.flow.StateFlow import kotlinx.coroutines.flow.asStateFlow import kotlinx.coroutines.withContext @@ -38,6 +40,9 @@ class OpenClawBridge { MutableStateFlow(OpenClawConnectionState.NotConfigured) val connectionState: StateFlow = _connectionState.asStateFlow() + /** Set by GeminiSessionViewModel so we can send image tasks via WebSocket */ + var eventClient: OpenClawEventClient? = null + fun setToolCallStatus(status: ToolCallStatus) { _lastToolCallStatus.value = status } @@ -177,31 +182,22 @@ class OpenClawBridge { ): ToolResult = withContext(Dispatchers.IO) { _lastToolCallStatus.value = ToolCallStatus.Executing(toolName) + // If image is provided, route through WebSocket chat.send (only working method) + if (imageBase64 != null) { + val ec = eventClient + if (ec == null) { + Log.w(TAG, "Image task but no event client, falling back to text-only HTTP") + } else { + Log.d(TAG, "Sending image task via WebSocket chat.send (${imageBase64.length / 1024} KB)") + return@withContext sendViaWebSocket(ec, task, imageBase64, toolName) + } + } + val url = "${GeminiConfig.openClawHost}:${GeminiConfig.openClawPort}/v1/chat/completions" - // Build user message — text-only or multimodal (OpenAI vision format) - val userMessage = if (imageBase64 != null) { - Log.d(TAG, "Attaching image (${imageBase64.length / 1024} KB base64) to task") - JSONObject().apply { - put("role", "user") - put("content", org.json.JSONArray().apply { - put(JSONObject().apply { - put("type", "text") - put("text", task) - }) - put(JSONObject().apply { - put("type", "image_url") - put("image_url", JSONObject().apply { - put("url", "data:image/jpeg;base64,$imageBase64") - }) - }) - }) - } - } else { - JSONObject().apply { - put("role", "user") - put("content", task) - } + val userMessage = JSONObject().apply { + put("role", "user") + put("content", task) } conversationHistory.add(userMessage) @@ -275,4 +271,37 @@ class OpenClawBridge { } } + /** + * Send a task with image via WebSocket chat.send RPC. + * This is the only method that reliably passes images to the OpenClaw agent. + */ + private suspend fun sendViaWebSocket( + eventClient: OpenClawEventClient, + task: String, + imageBase64: String, + toolName: String + ): ToolResult = suspendCancellableCoroutine { continuation -> + eventClient.sendChatMessage( + sessionKey = sessionKey, + message = task, + imageBase64 = imageBase64 + ) { reply -> + if (reply != null) { + conversationHistory.add(JSONObject().apply { + put("role", "user") + put("content", task) + }) + conversationHistory.add(JSONObject().apply { + put("role", "assistant") + put("content", reply) + }) + Log.d(TAG, "WebSocket chat.send result: ${reply.take(200)}") + _lastToolCallStatus.value = ToolCallStatus.Completed(toolName) + continuation.resume(ToolResult.Success(reply)) {} + } else { + _lastToolCallStatus.value = ToolCallStatus.Failed(toolName, "WebSocket chat.send failed") + continuation.resume(ToolResult.Failure("Failed to send image via WebSocket")) {} + } + } + } } diff --git a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/OpenClawEventClient.kt b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/OpenClawEventClient.kt index 007df43c..da35be37 100644 --- a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/OpenClawEventClient.kt +++ b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/OpenClawEventClient.kt @@ -28,6 +28,9 @@ class OpenClawEventClient { private var reconnectDelayMs = 2_000L private val handler = Handler(Looper.getMainLooper()) + // Pending RPC responses keyed by request ID + private val pendingResponses = mutableMapOf Unit>() + private val client = OkHttpClient.Builder() .readTimeout(0, TimeUnit.MILLISECONDS) .pingInterval(10, TimeUnit.SECONDS) @@ -93,15 +96,22 @@ class OpenClawEventClient { when (type) { "event" -> handleEvent(json) "res" -> { - val ok = json.optBoolean("ok", false) - if (ok) { - Log.d(TAG, "Connected and authenticated") - isConnected = true - reconnectDelayMs = 2_000L + val id = json.optString("id", "") + val callback = pendingResponses.remove(id) + if (callback != null) { + callback(json) } else { - val error = json.optJSONObject("error") - val msg = error?.optString("message", "unknown") ?: "unknown" - Log.e(TAG, "Connect failed: $msg") + // Connect handshake response + val ok = json.optBoolean("ok", false) + if (ok) { + Log.d(TAG, "Connected and authenticated") + isConnected = true + reconnectDelayMs = 2_000L + } else { + val error = json.optJSONObject("error") + val msg = error?.optString("message", "unknown") ?: "unknown" + Log.e(TAG, "Connect failed: $msg") + } } } } @@ -171,6 +181,74 @@ class OpenClawEventClient { onNotification?.invoke("[Scheduled update] $summary") } + /** + * Send a chat message with optional image attachment via WebSocket chat.send RPC. + * This is the only way to reliably pass images to the OpenClaw agent. + * Returns the agent's reply text, or null on failure. + */ + fun sendChatMessage( + sessionKey: String, + message: String, + imageBase64: String? = null, + imageMimeType: String = "image/jpeg", + onResult: (String?) -> Unit + ) { + if (!isConnected || webSocket == null) { + Log.e(TAG, "Cannot send chat.send: not connected") + onResult(null) + return + } + + val reqId = UUID.randomUUID().toString() + + val params = JSONObject().apply { + put("sessionKey", sessionKey) + put("message", message) + put("idempotencyKey", reqId) + if (imageBase64 != null) { + put("attachments", JSONArray().put(JSONObject().apply { + put("mimeType", imageMimeType) + put("fileName", "camera_frame.jpg") + put("content", imageBase64) + })) + } + } + + val request = JSONObject().apply { + put("type", "req") + put("id", reqId) + put("method", "chat.send") + put("params", params) + } + + // Register callback for response + pendingResponses[reqId] = { response -> + val ok = response.optBoolean("ok", false) + if (ok) { + val payload = response.optJSONObject("payload") + val reply = payload?.optString("reply", null) + ?: payload?.optJSONObject("result")?.optString("text", null) + ?: payload?.toString() + Log.d(TAG, "chat.send success: ${reply?.take(200)}") + onResult(reply) + } else { + val error = response.optJSONObject("error") + val msg = error?.optString("message", "unknown") ?: "unknown" + Log.e(TAG, "chat.send failed: $msg") + onResult(null) + } + } + + val sent = webSocket?.send(request.toString()) ?: false + if (!sent) { + pendingResponses.remove(reqId) + Log.e(TAG, "Failed to send chat.send WebSocket message") + onResult(null) + } else { + Log.d(TAG, "chat.send sent (id=$reqId, hasImage=${imageBase64 != null})") + } + } + private fun scheduleReconnect() { if (!shouldReconnect) return Log.d(TAG, "Reconnecting in ${reconnectDelayMs}ms") From 279c7363c6ec25dc9d3be6644016d69ce4be8fd8 Mon Sep 17 00:00:00 2001 From: "Xiaoan (Sean) Liu" Date: Thu, 26 Mar 2026 20:41:18 -0600 Subject: [PATCH 20/68] Wait for actual agent reply on chat.send instead of treating ack as result --- .../openclaw/OpenClawEventClient.kt | 54 ++++++++++++++++--- 1 file changed, 46 insertions(+), 8 deletions(-) diff --git a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/OpenClawEventClient.kt b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/OpenClawEventClient.kt index da35be37..faf5abbe 100644 --- a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/OpenClawEventClient.kt +++ b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/OpenClawEventClient.kt @@ -31,6 +31,9 @@ class OpenClawEventClient { // Pending RPC responses keyed by request ID private val pendingResponses = mutableMapOf Unit>() + // Pending chat.send results keyed by runId — waits for the "chat" event with state="final" + private val pendingChatResults = mutableMapOf Unit>() + private val client = OkHttpClient.Builder() .readTimeout(0, TimeUnit.MILLISECONDS) .pingInterval(10, TimeUnit.SECONDS) @@ -128,6 +131,44 @@ class OpenClawEventClient { "connect.challenge" -> sendConnectHandshake() "heartbeat" -> handleHeartbeatEvent(payload) "cron" -> handleCronEvent(payload) + "chat" -> handleChatEvent(payload) + } + } + + private fun handleChatEvent(payload: JSONObject) { + val state = payload.optString("state", "") + val runId = payload.optString("runId", "") + + if (state == "final" && runId.isNotEmpty()) { + val callback = pendingChatResults.remove(runId) + if (callback != null) { + // Extract reply text from message.content + val message = payload.optJSONObject("message") + val content = message?.opt("content") + val replyText = when { + content is String -> content + content is JSONArray -> { + val parts = mutableListOf() + for (i in 0 until content.length()) { + val part = content.optJSONObject(i) + if (part?.optString("type") == "text") { + parts.add(part.optString("text", "")) + } + } + parts.joinToString("\n").ifEmpty { null } + } + else -> null + } + Log.d(TAG, "chat final for $runId: ${replyText?.take(200)}") + callback(replyText ?: "Agent completed but returned no text.") + } + } else if (state == "error" && runId.isNotEmpty()) { + val callback = pendingChatResults.remove(runId) + if (callback != null) { + val errorMsg = payload.optString("errorMessage", "Agent error") + Log.e(TAG, "chat error for $runId: $errorMsg") + callback(null) + } } } @@ -221,20 +262,17 @@ class OpenClawEventClient { put("params", params) } - // Register callback for response + // Register callback for RPC ack — then wait for the actual chat event pendingResponses[reqId] = { response -> val ok = response.optBoolean("ok", false) if (ok) { - val payload = response.optJSONObject("payload") - val reply = payload?.optString("reply", null) - ?: payload?.optJSONObject("result")?.optString("text", null) - ?: payload?.toString() - Log.d(TAG, "chat.send success: ${reply?.take(200)}") - onResult(reply) + // RPC accepted — now wait for the "chat" event with state="final" + Log.d(TAG, "chat.send accepted, waiting for agent reply (runId=$reqId)") + pendingChatResults[reqId] = onResult } else { val error = response.optJSONObject("error") val msg = error?.optString("message", "unknown") ?: "unknown" - Log.e(TAG, "chat.send failed: $msg") + Log.e(TAG, "chat.send rejected: $msg") onResult(null) } } From 747ad65f5ffca90257c2ad1d8245718cf679a4bd Mon Sep 17 00:00:00 2001 From: "Xiaoan (Sean) Liu" Date: Thu, 26 Mar 2026 20:43:19 -0600 Subject: [PATCH 21/68] Make execute tool NON_BLOCKING with INTERRUPT scheduling for async responses --- .../cameraaccess/openclaw/ToolCallModels.kt | 2 +- .../cameraaccess/openclaw/ToolCallRouter.kt | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/ToolCallModels.kt b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/ToolCallModels.kt index e8e93bcc..7136ce22 100644 --- a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/ToolCallModels.kt +++ b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/ToolCallModels.kt @@ -144,7 +144,7 @@ object ToolDeclarations { }) put("required", JSONArray().put("task")) }) - put("behavior", "BLOCKING") + put("behavior", "NON_BLOCKING") } } } diff --git a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/ToolCallRouter.kt b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/ToolCallRouter.kt index 287582b2..ea83e1b9 100644 --- a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/ToolCallRouter.kt +++ b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/ToolCallRouter.kt @@ -142,7 +142,9 @@ class ToolCallRouter( JSONObject().apply { put("id", callId) put("name", name) - put("response", result.toJSON()) + put("response", result.toJSON().apply { + put("scheduling", "INTERRUPT") + }) } ) ) From aee3b89d0f5abf4d3c7449ef8e7d08dbf8c0bd3e Mon Sep 17 00:00:00 2001 From: "Xiaoan (Sean) Liu" Date: Thu, 26 Mar 2026 20:46:46 -0600 Subject: [PATCH 22/68] Sync iOS with Android: NON_BLOCKING execute, WebSocket image sending, INTERRUPT scheduling - execute tool changed from BLOCKING to NON_BLOCKING (Gemini keeps talking) - Tool responses include scheduling=INTERRUPT (Gemini interrupts to speak result) - Image tasks routed through WebSocket chat.send with attachments (not HTTP) - OpenClawEventClient gains sendChatMessage + chat event handling - OpenClawBridge routes image tasks through eventClient WebSocket - System prompt updated with stronger include_image guidance --- .../CameraAccess/Gemini/GeminiConfig.swift | 7 +- .../Gemini/GeminiSessionViewModel.swift | 1 + .../OpenClaw/OpenClawBridge.swift | 62 ++++++--- .../OpenClaw/OpenClawEventClient.swift | 125 ++++++++++++++++-- .../OpenClaw/ToolCallModels.swift | 2 +- .../OpenClaw/ToolCallRouter.swift | 2 +- 6 files changed, 172 insertions(+), 27 deletions(-) diff --git a/samples/CameraAccess/CameraAccess/Gemini/GeminiConfig.swift b/samples/CameraAccess/CameraAccess/Gemini/GeminiConfig.swift index 2b82edd8..e1e82927 100644 --- a/samples/CameraAccess/CameraAccess/Gemini/GeminiConfig.swift +++ b/samples/CameraAccess/CameraAccess/Gemini/GeminiConfig.swift @@ -23,7 +23,12 @@ enum GeminiConfig { The capture_photo tool saves the current camera frame as a photo to the device gallery. Use it when the user asks to take a photo, capture what they see, save a picture, or snap a photo. You can include an optional description of what is in the photo. - When calling execute, set include_image=true if the task requires the assistant to SEE what you see (e.g. editing a photo, identifying a product by appearance, reading text from a sign, or any task where visual context is needed). Do NOT set it for tasks that can be fully described in text. + When calling execute, you MUST set include_image=true whenever: + - The user asks to send, share, or forward a photo/image to anyone + - The task involves editing, processing, or analyzing an image + - The user says "send this to..." or "show this to..." referring to what they see + - The task requires the assistant to see the current camera view (e.g. identifying a product, reading text from a sign) + Only omit include_image (or set it to false) for purely text-based tasks like sending a text message, searching, or setting a reminder. The execute tool connects you to a powerful personal assistant that can do anything -- send messages, search the web, manage lists, set reminders, create notes, research topics, control smart home devices, interact with apps, and much more. diff --git a/samples/CameraAccess/CameraAccess/Gemini/GeminiSessionViewModel.swift b/samples/CameraAccess/CameraAccess/Gemini/GeminiSessionViewModel.swift index 3d0c39b3..4bdd7cc3 100644 --- a/samples/CameraAccess/CameraAccess/Gemini/GeminiSessionViewModel.swift +++ b/samples/CameraAccess/CameraAccess/Gemini/GeminiSessionViewModel.swift @@ -107,6 +107,7 @@ class GeminiSessionViewModel: ObservableObject { // Check OpenClaw connectivity and start fresh session await openClawBridge.checkConnection() openClawBridge.resetSession() + openClawBridge.eventClient = eventClient // Wire tool call handling toolCallRouter = ToolCallRouter(bridge: openClawBridge) diff --git a/samples/CameraAccess/CameraAccess/OpenClaw/OpenClawBridge.swift b/samples/CameraAccess/CameraAccess/OpenClaw/OpenClawBridge.swift index c2e05af2..0e64a91e 100644 --- a/samples/CameraAccess/CameraAccess/OpenClaw/OpenClawBridge.swift +++ b/samples/CameraAccess/CameraAccess/OpenClaw/OpenClawBridge.swift @@ -13,6 +13,9 @@ class OpenClawBridge: ObservableObject { @Published var lastToolCallStatus: ToolCallStatus = .idle @Published var connectionState: OpenClawConnectionState = .notConfigured + /// Set by GeminiSessionViewModel so we can send image tasks via WebSocket + var eventClient: OpenClawEventClient? + private let session: URLSession private let pingSession: URLSession private var sessionKey: String @@ -75,27 +78,23 @@ class OpenClawBridge: ObservableObject { ) async -> ToolResult { lastToolCallStatus = .executing(toolName) + // If image is provided, route through WebSocket chat.send (only working method) + if let image = image, let jpegData = image.jpegData(compressionQuality: 0.8) { + let base64 = jpegData.base64EncodedString() + if let ec = eventClient { + NSLog("[OpenClaw] Sending image task via WebSocket chat.send (%d KB)", jpegData.count / 1024) + return await sendViaWebSocket(eventClient: ec, task: task, imageBase64: base64, toolName: toolName) + } else { + NSLog("[OpenClaw] Image task but no event client, falling back to text-only HTTP") + } + } + guard let url = URL(string: "\(GeminiConfig.openClawHost):\(GeminiConfig.openClawPort)/v1/chat/completions") else { lastToolCallStatus = .failed(toolName, "Invalid URL") return .failure("Invalid gateway URL") } - // Build user message — text-only or multimodal (OpenAI vision format) - let userMessage: [String: Any] - if let image = image, let jpegData = image.jpegData(compressionQuality: 0.8) { - let base64 = jpegData.base64EncodedString() - userMessage = [ - "role": "user", - "content": [ - ["type": "text", "text": task], - ["type": "image_url", "image_url": ["url": "data:image/jpeg;base64,\(base64)"]] - ] as [[String: Any]] - ] - NSLog("[OpenClaw] Attaching image (%d KB) to task", jpegData.count / 1024) - } else { - userMessage = ["role": "user", "content": task] - } - + let userMessage: [String: Any] = ["role": "user", "content": task] conversationHistory.append(userMessage) // Trim history to keep only the most recent turns (user+assistant pairs) @@ -154,4 +153,35 @@ class OpenClawBridge: ObservableObject { return .failure("Agent error: \(error.localizedDescription)") } } + + /// Send a task with image via WebSocket chat.send RPC. + private func sendViaWebSocket( + eventClient: OpenClawEventClient, + task: String, + imageBase64: String, + toolName: String + ) async -> ToolResult { + await withCheckedContinuation { continuation in + eventClient.sendChatMessage( + sessionKey: sessionKey, + message: task, + imageBase64: imageBase64 + ) { [weak self] reply in + guard let self else { + continuation.resume(returning: .failure("Session ended")) + return + } + if let reply { + self.conversationHistory.append(["role": "user", "content": task]) + self.conversationHistory.append(["role": "assistant", "content": reply]) + NSLog("[OpenClaw] WebSocket chat.send result: %@", String(reply.prefix(200))) + self.lastToolCallStatus = .completed(toolName) + continuation.resume(returning: .success(reply)) + } else { + self.lastToolCallStatus = .failed(toolName, "WebSocket chat.send failed") + continuation.resume(returning: .failure("Failed to send image via WebSocket")) + } + } + } + } } diff --git a/samples/CameraAccess/CameraAccess/OpenClaw/OpenClawEventClient.swift b/samples/CameraAccess/CameraAccess/OpenClaw/OpenClawEventClient.swift index 3f804b36..9cc68122 100644 --- a/samples/CameraAccess/CameraAccess/OpenClaw/OpenClawEventClient.swift +++ b/samples/CameraAccess/CameraAccess/OpenClaw/OpenClawEventClient.swift @@ -10,6 +10,10 @@ class OpenClawEventClient { private var reconnectDelay: TimeInterval = 2 private let maxReconnectDelay: TimeInterval = 30 + // Pending RPC responses and chat results + private var pendingResponses: [String: ([String: Any]) -> Void] = [:] + private var pendingChatResults: [String: (String?) -> Void] = [:] + func connect() { guard GeminiConfig.isOpenClawConfigured else { NSLog("[OpenClawWS] Not configured, skipping") @@ -85,15 +89,20 @@ class OpenClawEventClient { if type == "event" { handleEvent(json) } else if type == "res" { - let ok = json["ok"] as? Bool ?? false - if ok { - NSLog("[OpenClawWS] Connected and authenticated") - isConnected = true - reconnectDelay = 2 + let id = json["id"] as? String ?? "" + if let callback = pendingResponses.removeValue(forKey: id) { + callback(json) } else { - let error = json["error"] as? [String: Any] - let msg = error?["message"] as? String ?? "unknown" - NSLog("[OpenClawWS] Connect failed: %@", msg) + let ok = json["ok"] as? Bool ?? false + if ok { + NSLog("[OpenClawWS] Connected and authenticated") + isConnected = true + reconnectDelay = 2 + } else { + let error = json["error"] as? [String: Any] + let msg = error?["message"] as? String ?? "unknown" + NSLog("[OpenClawWS] Connect failed: %@", msg) + } } } } @@ -112,6 +121,9 @@ class OpenClawEventClient { case "cron": handleCronEvent(payload) + case "chat": + handleChatEvent(payload) + default: break } @@ -174,6 +186,103 @@ class OpenClawEventClient { onNotification?("[Scheduled update] \(summary)") } + private func handleChatEvent(_ payload: [String: Any]) { + let state = payload["state"] as? String ?? "" + let runId = payload["runId"] as? String ?? "" + guard !runId.isEmpty else { return } + + if state == "final" { + if let callback = pendingChatResults.removeValue(forKey: runId) { + let message = payload["message"] as? [String: Any] + let content = message?["content"] + let replyText: String? + if let text = content as? String { + replyText = text + } else if let parts = content as? [[String: Any]] { + replyText = parts.compactMap { ($0["type"] as? String == "text") ? $0["text"] as? String : nil }.joined(separator: "\n") + } else { + replyText = nil + } + NSLog("[OpenClawWS] chat final for %@: %@", runId, String((replyText ?? "nil").prefix(200))) + callback(replyText ?? "Agent completed but returned no text.") + } + } else if state == "error" { + if let callback = pendingChatResults.removeValue(forKey: runId) { + let errorMsg = payload["errorMessage"] as? String ?? "Agent error" + NSLog("[OpenClawWS] chat error for %@: %@", runId, errorMsg) + callback(nil) + } + } + } + + /// Send a chat message with optional image attachment via WebSocket chat.send RPC. + /// This is the only way to reliably pass images to the OpenClaw agent. + func sendChatMessage( + sessionKey: String, + message: String, + imageBase64: String? = nil, + completion: @escaping (String?) -> Void + ) { + guard isConnected, webSocketTask != nil else { + NSLog("[OpenClawWS] Cannot send chat.send: not connected") + completion(nil) + return + } + + let reqId = UUID().uuidString + var params: [String: Any] = [ + "sessionKey": sessionKey, + "message": message, + "idempotencyKey": reqId + ] + + if let imageBase64 { + params["attachments"] = [[ + "mimeType": "image/jpeg", + "fileName": "camera_frame.jpg", + "content": imageBase64 + ]] + } + + let request: [String: Any] = [ + "type": "req", + "id": reqId, + "method": "chat.send", + "params": params + ] + + // Register RPC ack callback — then wait for chat event + pendingResponses[reqId] = { [weak self] response in + let ok = response["ok"] as? Bool ?? false + if ok { + NSLog("[OpenClawWS] chat.send accepted, waiting for agent reply (runId=%@)", reqId) + self?.pendingChatResults[reqId] = completion + } else { + let error = response["error"] as? [String: Any] + let msg = error?["message"] as? String ?? "unknown" + NSLog("[OpenClawWS] chat.send rejected: %@", msg) + completion(nil) + } + } + + guard let data = try? JSONSerialization.data(withJSONObject: request), + let string = String(data: data, encoding: .utf8) else { + pendingResponses.removeValue(forKey: reqId) + completion(nil) + return + } + + webSocketTask?.send(.string(string)) { [weak self] error in + if let error { + NSLog("[OpenClawWS] chat.send send error: %@", error.localizedDescription) + self?.pendingResponses.removeValue(forKey: reqId) + completion(nil) + } else { + NSLog("[OpenClawWS] chat.send sent (id=%@, hasImage=%@)", reqId, imageBase64 != nil ? "true" : "false") + } + } + } + private func scheduleReconnect() { guard shouldReconnect else { return } NSLog("[OpenClawWS] Reconnecting in %.0fs", reconnectDelay) diff --git a/samples/CameraAccess/CameraAccess/OpenClaw/ToolCallModels.swift b/samples/CameraAccess/CameraAccess/OpenClaw/ToolCallModels.swift index 6515bf37..4130e720 100644 --- a/samples/CameraAccess/CameraAccess/OpenClaw/ToolCallModels.swift +++ b/samples/CameraAccess/CameraAccess/OpenClaw/ToolCallModels.swift @@ -120,6 +120,6 @@ enum ToolDeclarations { ], "required": ["task"] ] as [String: Any], - "behavior": "BLOCKING" + "behavior": "NON_BLOCKING" ] } diff --git a/samples/CameraAccess/CameraAccess/OpenClaw/ToolCallRouter.swift b/samples/CameraAccess/CameraAccess/OpenClaw/ToolCallRouter.swift index 14100d49..94907559 100644 --- a/samples/CameraAccess/CameraAccess/OpenClaw/ToolCallRouter.swift +++ b/samples/CameraAccess/CameraAccess/OpenClaw/ToolCallRouter.swift @@ -97,7 +97,7 @@ class ToolCallRouter { [ "id": callId, "name": name, - "response": result.responseValue + "response": result.responseValue.merging(["scheduling": "INTERRUPT"]) { _, new in new } ] ] ] From 2a8d4984e264268f96f48fdbfa19d39a4cdc773c Mon Sep 17 00:00:00 2001 From: "Xiaoan (Sean) Liu" Date: Thu, 26 Mar 2026 20:56:37 -0600 Subject: [PATCH 23/68] Auto-attach camera frame on every execute call when video is enabled (iOS + Android) --- .../CameraAccess/CameraAccess/OpenClaw/ToolCallRouter.swift | 3 ++- .../cameraaccess/openclaw/ToolCallRouter.kt | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/samples/CameraAccess/CameraAccess/OpenClaw/ToolCallRouter.swift b/samples/CameraAccess/CameraAccess/OpenClaw/ToolCallRouter.swift index 94907559..b1bb1e91 100644 --- a/samples/CameraAccess/CameraAccess/OpenClaw/ToolCallRouter.swift +++ b/samples/CameraAccess/CameraAccess/OpenClaw/ToolCallRouter.swift @@ -42,7 +42,8 @@ class ToolCallRouter { let task = Task { @MainActor in let taskDesc = call.args["task"] as? String ?? String(describing: call.args) - let includeImage = call.args["include_image"] as? Bool ?? false + // Always attach latest frame when video streaming is enabled (like Oversite) + let includeImage = call.args["include_image"] as? Bool ?? SettingsManager.shared.videoStreamingEnabled let image: UIImage? = includeImage ? latestFrame : nil let result = await bridge.delegateTask(task: taskDesc, toolName: callName, image: image) diff --git a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/ToolCallRouter.kt b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/ToolCallRouter.kt index ea83e1b9..210b3f35 100644 --- a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/ToolCallRouter.kt +++ b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/ToolCallRouter.kt @@ -61,8 +61,8 @@ class ToolCallRouter( ?.trim() ?.takeIf { it.isNotEmpty() } - // Only attach image when Gemini explicitly requests it via include_image=true - val includeImage = call.args["include_image"] as? Boolean ?: false + // Always attach latest frame when video streaming is enabled + val includeImage = call.args["include_image"] as? Boolean ?: SettingsManager.videoStreamingEnabled val bitmap = if (includeImage) latestFrameProvider() else null Log.d(TAG, "include_image=$includeImage, bitmapNull=${bitmap == null}") From a51de459c691089a7b86ae5f064a6904a793399d Mon Sep 17 00:00:00 2001 From: "Xiaoan (Sean) Liu" Date: Thu, 26 Mar 2026 21:29:51 -0600 Subject: [PATCH 24/68] Default include_image to false, Gemini must opt in (iOS + Android) --- .../CameraAccess/CameraAccess/OpenClaw/ToolCallRouter.swift | 4 ++-- .../cameraaccess/openclaw/ToolCallRouter.kt | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/samples/CameraAccess/CameraAccess/OpenClaw/ToolCallRouter.swift b/samples/CameraAccess/CameraAccess/OpenClaw/ToolCallRouter.swift index b1bb1e91..12012052 100644 --- a/samples/CameraAccess/CameraAccess/OpenClaw/ToolCallRouter.swift +++ b/samples/CameraAccess/CameraAccess/OpenClaw/ToolCallRouter.swift @@ -42,8 +42,8 @@ class ToolCallRouter { let task = Task { @MainActor in let taskDesc = call.args["task"] as? String ?? String(describing: call.args) - // Always attach latest frame when video streaming is enabled (like Oversite) - let includeImage = call.args["include_image"] as? Bool ?? SettingsManager.shared.videoStreamingEnabled + // Attach image only when Gemini explicitly sets include_image=true + let includeImage = call.args["include_image"] as? Bool ?? false let image: UIImage? = includeImage ? latestFrame : nil let result = await bridge.delegateTask(task: taskDesc, toolName: callName, image: image) diff --git a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/ToolCallRouter.kt b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/ToolCallRouter.kt index 210b3f35..6c51b5b8 100644 --- a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/ToolCallRouter.kt +++ b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/ToolCallRouter.kt @@ -61,8 +61,8 @@ class ToolCallRouter( ?.trim() ?.takeIf { it.isNotEmpty() } - // Always attach latest frame when video streaming is enabled - val includeImage = call.args["include_image"] as? Boolean ?: SettingsManager.videoStreamingEnabled + // Attach image only when Gemini explicitly sets include_image=true + val includeImage = call.args["include_image"] as? Boolean ?: false val bitmap = if (includeImage) latestFrameProvider() else null Log.d(TAG, "include_image=$includeImage, bitmapNull=${bitmap == null}") From 7f2002ff0252a0bb6224bcbf38841031cbb9cfe0 Mon Sep 17 00:00:00 2001 From: "Xiaoan (Sean) Liu" Date: Thu, 26 Mar 2026 21:37:23 -0600 Subject: [PATCH 25/68] Auto-save frame to gallery when image is attached to execute call (iOS + Android) --- .../CameraAccess/Gemini/GeminiSessionViewModel.swift | 8 ++++++++ .../CameraAccess/OpenClaw/ToolCallRouter.swift | 7 +++++++ .../cameraaccess/gemini/GeminiSessionViewModel.kt | 6 ++++++ .../cameraaccess/openclaw/ToolCallRouter.kt | 5 +++++ 4 files changed, 26 insertions(+) diff --git a/samples/CameraAccess/CameraAccess/Gemini/GeminiSessionViewModel.swift b/samples/CameraAccess/CameraAccess/Gemini/GeminiSessionViewModel.swift index 4bdd7cc3..fc8a4c8a 100644 --- a/samples/CameraAccess/CameraAccess/Gemini/GeminiSessionViewModel.swift +++ b/samples/CameraAccess/CameraAccess/Gemini/GeminiSessionViewModel.swift @@ -127,6 +127,14 @@ class GeminiSessionViewModel: ObservableObject { } } + // Auto-save to gallery when image is attached to execute call + toolCallRouter?.onAutoSaveFrame = { [weak self] image, description in + guard let self else { return } + if let photo = self.photoCaptureStore.saveFrame(image, description: description) { + self.lastCapturedPhoto = photo + } + } + geminiService.onToolCall = { [weak self] toolCall in guard let self else { return } Task { @MainActor in diff --git a/samples/CameraAccess/CameraAccess/OpenClaw/ToolCallRouter.swift b/samples/CameraAccess/CameraAccess/OpenClaw/ToolCallRouter.swift index 12012052..2b9aaeaa 100644 --- a/samples/CameraAccess/CameraAccess/OpenClaw/ToolCallRouter.swift +++ b/samples/CameraAccess/CameraAccess/OpenClaw/ToolCallRouter.swift @@ -12,6 +12,9 @@ class ToolCallRouter { /// Latest camera frame for include_image on execute tool calls. var latestFrame: UIImage? + /// Callback to auto-save frame to gallery when image is attached to execute call. + var onAutoSaveFrame: ((_ image: UIImage, _ description: String?) -> Void)? + init(bridge: OpenClawBridge) { self.bridge = bridge } @@ -45,6 +48,10 @@ class ToolCallRouter { // Attach image only when Gemini explicitly sets include_image=true let includeImage = call.args["include_image"] as? Bool ?? false let image: UIImage? = includeImage ? latestFrame : nil + // Auto-save to gallery when image is attached + if let image { + onAutoSaveFrame?(image, String(taskDesc.prefix(100))) + } let result = await bridge.delegateTask(task: taskDesc, toolName: callName, image: image) guard !Task.isCancelled else { diff --git a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/GeminiSessionViewModel.kt b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/GeminiSessionViewModel.kt index 44042b4b..d4beef6c 100644 --- a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/GeminiSessionViewModel.kt +++ b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/GeminiSessionViewModel.kt @@ -250,6 +250,12 @@ class GeminiSessionViewModel(app: Application) : AndroidViewModel(app) { } } + // Auto-save to gallery when image is attached to execute call + toolCallRouter?.onAutoSaveFrame = { bitmap, description -> + PhotoCaptureStore.saveFrame(getApplication(), bitmap, description) + _captureEvent.value = PhotoCaptureStore.photos.value.firstOrNull() + } + // Load gallery PhotoCaptureStore.loadPhotos(getApplication()) diff --git a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/ToolCallRouter.kt b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/ToolCallRouter.kt index 6c51b5b8..885ddbc9 100644 --- a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/ToolCallRouter.kt +++ b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/ToolCallRouter.kt @@ -27,6 +27,9 @@ class ToolCallRouter( /** Callback for local capture_photo handling. */ var onCapturePhoto: ((description: String?, completion: (ToolResult) -> Unit) -> Unit)? = null + /** Callback to auto-save frame to gallery when image is attached to execute call. */ + var onAutoSaveFrame: ((Bitmap, String?) -> Unit)? = null + private val inFlightJobs = mutableMapOf() fun handleToolCall( @@ -68,6 +71,8 @@ class ToolCallRouter( val imageBase64: String? = if (includeImage && bitmap != null) { try { + // Auto-save to gallery + onAutoSaveFrame?.invoke(bitmap, rewrittenTask.take(100)) val baos = ByteArrayOutputStream() bitmap.compress(Bitmap.CompressFormat.JPEG, JPEG_QUALITY_FOR_UPLOAD, baos) android.util.Base64.encodeToString(baos.toByteArray(), android.util.Base64.NO_WRAP) From 739c960aba47a11f6ce29701a67c036ea052c801 Mon Sep 17 00:00:00 2001 From: "Xiaoan (Sean) Liu" Date: Thu, 26 Mar 2026 21:49:07 -0600 Subject: [PATCH 26/68] Ensure messages are cleared on session stop --- .../cameraaccess/gemini/GeminiSessionViewModel.kt | 1 + 1 file changed, 1 insertion(+) diff --git a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/GeminiSessionViewModel.kt b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/GeminiSessionViewModel.kt index d4beef6c..afad4f0b 100644 --- a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/GeminiSessionViewModel.kt +++ b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/GeminiSessionViewModel.kt @@ -434,6 +434,7 @@ class GeminiSessionViewModel(app: Application) : AndroidViewModel(app) { netMonitorJob = null netMonitor.stop() + // Clear all messages — any in-progress tool calls are cancelled _uiState.value = GeminiUiState() lastUserOriginalInstruction = null latestFrameForToolCall = null From a843f56c0267368c424cad854e984214ab4df65d Mon Sep 17 00:00:00 2001 From: "Xiaoan (Sean) Liu" Date: Thu, 26 Mar 2026 21:53:08 -0600 Subject: [PATCH 27/68] Clear pending WebSocket callbacks on disconnect to prevent ghost tool calls (iOS + Android) --- .../CameraAccess/OpenClaw/OpenClawEventClient.swift | 3 +++ .../cameraaccess/openclaw/OpenClawEventClient.kt | 3 +++ 2 files changed, 6 insertions(+) diff --git a/samples/CameraAccess/CameraAccess/OpenClaw/OpenClawEventClient.swift b/samples/CameraAccess/CameraAccess/OpenClaw/OpenClawEventClient.swift index 9cc68122..4202c9da 100644 --- a/samples/CameraAccess/CameraAccess/OpenClaw/OpenClawEventClient.swift +++ b/samples/CameraAccess/CameraAccess/OpenClaw/OpenClawEventClient.swift @@ -28,6 +28,9 @@ class OpenClawEventClient { func disconnect() { shouldReconnect = false isConnected = false + // Cancel all pending callbacks so they don't fire after session stops + pendingResponses.removeAll() + pendingChatResults.removeAll() webSocketTask?.cancel(with: .normalClosure, reason: nil) webSocketTask = nil session?.invalidateAndCancel() diff --git a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/OpenClawEventClient.kt b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/OpenClawEventClient.kt index faf5abbe..7db0e49f 100644 --- a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/OpenClawEventClient.kt +++ b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/OpenClawEventClient.kt @@ -53,6 +53,9 @@ class OpenClawEventClient { shouldReconnect = false isConnected = false handler.removeCallbacksAndMessages(null) + // Cancel all pending callbacks so they don't fire after session stops + pendingResponses.clear() + pendingChatResults.clear() webSocket?.close(1000, null) webSocket = null Log.d(TAG, "Disconnected") From 41492186b3cb8559ce96798920b306e46efb9de3 Mon Sep 17 00:00:00 2001 From: "Xiaoan (Sean) Liu" Date: Thu, 26 Mar 2026 21:55:29 -0600 Subject: [PATCH 28/68] Increase Gemini WebSocket ping interval to 30s to prevent timeout during image upload --- .../externalsampleapps/cameraaccess/gemini/GeminiLiveService.kt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/GeminiLiveService.kt b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/GeminiLiveService.kt index 46ebc769..715db0a8 100644 --- a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/GeminiLiveService.kt +++ b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/GeminiLiveService.kt @@ -68,7 +68,7 @@ class GeminiLiveService { // If you want, change 10 -> 30/60. private val client = OkHttpClient.Builder() .readTimeout(0, TimeUnit.MILLISECONDS) - .pingInterval(10, TimeUnit.SECONDS) + .pingInterval(30, TimeUnit.SECONDS) .retryOnConnectionFailure(true) .build() From 9e9cec846feb4a8b55f6887a08607a8cc9238b0e Mon Sep 17 00:00:00 2001 From: "Xiaoan (Sean) Liu" Date: Thu, 26 Mar 2026 22:07:40 -0600 Subject: [PATCH 29/68] Show gallery as overlay instead of replacing StreamScreen to prevent session disconnect --- .../cameraaccess/ui/StreamScreen.kt | 25 +++++++++++-------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/ui/StreamScreen.kt b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/ui/StreamScreen.kt index eeb349f5..fea13fe6 100644 --- a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/ui/StreamScreen.kt +++ b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/ui/StreamScreen.kt @@ -99,16 +99,6 @@ fun StreamScreen( } } - // Gallery screens - if (selectedGalleryPhoto != null) { - GalleryDetailScreen(photo = selectedGalleryPhoto!!, onBack = { selectedGalleryPhoto = null }) - return - } - if (showGallery) { - GalleryScreen(onBack = { showGallery = false }, onPhotoSelected = { selectedGalleryPhoto = it }) - return - } - // Wire Gemini VM to Stream VM for frame forwarding LaunchedEffect(geminiViewModel) { streamViewModel.geminiViewModel = geminiViewModel @@ -317,4 +307,19 @@ fun StreamScreen( ) } } + + // Gallery as full-screen overlay (not replacing StreamScreen, so session stays alive) + if (showGallery || selectedGalleryPhoto != null) { + if (selectedGalleryPhoto != null) { + GalleryDetailScreen( + photo = selectedGalleryPhoto!!, + onBack = { selectedGalleryPhoto = null } + ) + } else { + GalleryScreen( + onBack = { showGallery = false }, + onPhotoSelected = { selectedGalleryPhoto = it } + ) + } + } } \ No newline at end of file From 4888ef5902bb0739e7453b9583fa42fb13ef7789 Mon Sep 17 00:00:00 2001 From: "Xiaoan (Sean) Liu" Date: Thu, 26 Mar 2026 22:15:45 -0600 Subject: [PATCH 30/68] Add session history with timestamps and dividers (iOS + Android) - Messages persist across session stop/start (not cleared) - Session divider with date/time inserted between sessions - Timestamps shown on messages when 2+ minutes apart or after divider - First message in each session always shows timestamp --- .../CameraAccess/Chat/ChatMessage.swift | 1 + .../Chat/ChatTranscriptView.swift | 101 +++++++++++++++--- .../Gemini/GeminiSessionViewModel.swift | 5 + .../cameraaccess/chat/ChatMessage.kt | 1 + .../gemini/GeminiSessionViewModel.kt | 12 ++- .../cameraaccess/ui/ChatTranscriptView.kt | 94 +++++++++++++--- 6 files changed, 181 insertions(+), 33 deletions(-) diff --git a/samples/CameraAccess/CameraAccess/Chat/ChatMessage.swift b/samples/CameraAccess/CameraAccess/Chat/ChatMessage.swift index f41dd9fa..694ab3c7 100644 --- a/samples/CameraAccess/CameraAccess/Chat/ChatMessage.swift +++ b/samples/CameraAccess/CameraAccess/Chat/ChatMessage.swift @@ -20,6 +20,7 @@ enum ChatMessageRole: Equatable { case user case assistant case toolCall(String) // tool name + case sessionDivider // separator between sessions } enum ChatMessageStatus: Equatable { diff --git a/samples/CameraAccess/CameraAccess/Chat/ChatTranscriptView.swift b/samples/CameraAccess/CameraAccess/Chat/ChatTranscriptView.swift index 1968d838..b749053c 100644 --- a/samples/CameraAccess/CameraAccess/Chat/ChatTranscriptView.swift +++ b/samples/CameraAccess/CameraAccess/Chat/ChatTranscriptView.swift @@ -7,8 +7,9 @@ struct ChatTranscriptView: View { ScrollViewReader { proxy in ScrollView { LazyVStack(spacing: 4) { - ForEach(geminiVM.messages) { message in - MessageBubbleView(message: message) + ForEach(Array(geminiVM.messages.enumerated()), id: \.element.id) { index, message in + let showTime = shouldShowTimestamp(at: index, in: geminiVM.messages) + MessageBubbleView(message: message, showTimestamp: showTime) .id(message.id) } } @@ -26,8 +27,30 @@ struct ChatTranscriptView: View { } } +private func shouldShowTimestamp(at index: Int, in messages: [ChatMessage]) -> Bool { + let message = messages[index] + if message.role == .sessionDivider { return false } + if index == 0 { return true } + let prev = messages[index - 1] + if prev.role == .sessionDivider { return true } + // Show timestamp if 2+ minutes since previous message + return message.timestamp.timeIntervalSince(prev.timestamp) > 120 +} + struct MessageBubbleView: View { let message: ChatMessage + let showTimestamp: Bool + + init(message: ChatMessage, showTimestamp: Bool = false) { + self.message = message + self.showTimestamp = showTimestamp + } + + private var timeString: String { + let formatter = DateFormatter() + formatter.timeStyle = .short + return formatter.string(from: message.timestamp) + } var body: some View { switch message.role { @@ -37,19 +60,58 @@ struct MessageBubbleView: View { assistantBubble case .toolCall(let name): toolCallPill(name: name) + case .sessionDivider: + sessionDivider + } + } + + private var sessionDivider: some View { + HStack { + Rectangle() + .fill(Color.white.opacity(0.2)) + .frame(height: 0.5) + Text(formattedDate) + .font(.system(size: 11)) + .foregroundColor(.white.opacity(0.4)) + .fixedSize() + Rectangle() + .fill(Color.white.opacity(0.2)) + .frame(height: 0.5) + } + .padding(.horizontal, 24) + .padding(.vertical, 12) + } + + private var formattedDate: String { + let formatter = DateFormatter() + let calendar = Calendar.current + if calendar.isDateInToday(message.timestamp) { + formatter.timeStyle = .short + return "Today \(formatter.string(from: message.timestamp))" + } else { + formatter.dateStyle = .medium + formatter.timeStyle = .short + return formatter.string(from: message.timestamp) } } private var userBubble: some View { HStack { Spacer(minLength: 60) - Text(message.text) - .font(.system(size: 15)) - .foregroundColor(.white) - .padding(.horizontal, 14) - .padding(.vertical, 10) - .background(Color.blue) - .cornerRadius(18) + VStack(alignment: .trailing, spacing: 2) { + Text(message.text) + .font(.system(size: 15)) + .foregroundColor(.white) + .padding(.horizontal, 14) + .padding(.vertical, 10) + .background(Color.blue) + .cornerRadius(18) + if showTimestamp { + Text(timeString) + .font(.system(size: 10)) + .foregroundColor(.white.opacity(0.3)) + } + } } .padding(.horizontal, 16) .padding(.vertical, 2) @@ -57,13 +119,20 @@ struct MessageBubbleView: View { private var assistantBubble: some View { HStack { - VStack(alignment: .leading, spacing: 0) { - Text(message.text) - .font(.system(size: 15)) - .foregroundColor(.white.opacity(0.9)) - if message.status == .streaming { - TypingCursor() - .padding(.top, 2) + VStack(alignment: .leading, spacing: 2) { + VStack(alignment: .leading, spacing: 0) { + Text(message.text) + .font(.system(size: 15)) + .foregroundColor(.white.opacity(0.9)) + if message.status == .streaming { + TypingCursor() + .padding(.top, 2) + } + } + if showTimestamp { + Text(timeString) + .font(.system(size: 10)) + .foregroundColor(.white.opacity(0.3)) } } Spacer(minLength: 60) diff --git a/samples/CameraAccess/CameraAccess/Gemini/GeminiSessionViewModel.swift b/samples/CameraAccess/CameraAccess/Gemini/GeminiSessionViewModel.swift index fc8a4c8a..622cf923 100644 --- a/samples/CameraAccess/CameraAccess/Gemini/GeminiSessionViewModel.swift +++ b/samples/CameraAccess/CameraAccess/Gemini/GeminiSessionViewModel.swift @@ -42,6 +42,11 @@ class GeminiSessionViewModel: ObservableObject { isGeminiActive = true RemoteLogger.shared.log("session:start") + // Insert session divider if there are previous messages + if !messages.isEmpty { + messages.append(ChatMessage(role: .sessionDivider, text: "")) + } + // Wire audio callbacks audioManager.onAudioCaptured = { [weak self] data in guard let self else { return } diff --git a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/chat/ChatMessage.kt b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/chat/ChatMessage.kt index 79c18b25..c5a9e5f4 100644 --- a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/chat/ChatMessage.kt +++ b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/chat/ChatMessage.kt @@ -14,6 +14,7 @@ sealed class ChatMessageRole { data object User : ChatMessageRole() data object Assistant : ChatMessageRole() data class ToolCall(val name: String) : ChatMessageRole() + data object SessionDivider : ChatMessageRole() } sealed class ChatMessageStatus { diff --git a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/GeminiSessionViewModel.kt b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/GeminiSessionViewModel.kt index afad4f0b..e7d3842b 100644 --- a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/GeminiSessionViewModel.kt +++ b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/GeminiSessionViewModel.kt @@ -151,8 +151,14 @@ class GeminiSessionViewModel(app: Application) : AndroidViewModel(app) { micStateBeforeExecution = null micAutoMutedForExecution = false + // Insert session divider if there are previous messages + val currentMessages = _uiState.value.messages.toMutableList() + if (currentMessages.isNotEmpty()) { + currentMessages.add(ChatMessage(role = ChatMessageRole.SessionDivider, text = "")) + } + // Start with mic enabled by default - _uiState.value = _uiState.value.copy(isGeminiActive = true, isMicEnabled = true) + _uiState.value = _uiState.value.copy(isGeminiActive = true, isMicEnabled = true, messages = currentMessages) audioManager.setMicEnabled(true) RemoteLogger.log("session:start") @@ -434,8 +440,8 @@ class GeminiSessionViewModel(app: Application) : AndroidViewModel(app) { netMonitorJob = null netMonitor.stop() - // Clear all messages — any in-progress tool calls are cancelled - _uiState.value = GeminiUiState() + // Keep message history, just reset session state + _uiState.value = GeminiUiState(messages = _uiState.value.messages) lastUserOriginalInstruction = null latestFrameForToolCall = null micStateBeforeExecution = null diff --git a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/ui/ChatTranscriptView.kt b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/ui/ChatTranscriptView.kt index 94356b3f..10eb43aa 100644 --- a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/ui/ChatTranscriptView.kt +++ b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/ui/ChatTranscriptView.kt @@ -8,29 +8,34 @@ import androidx.compose.foundation.layout.Row import androidx.compose.foundation.layout.Spacer import androidx.compose.foundation.layout.fillMaxSize import androidx.compose.foundation.layout.fillMaxWidth +import androidx.compose.foundation.layout.height import androidx.compose.foundation.layout.padding import androidx.compose.foundation.layout.size import androidx.compose.foundation.layout.width import androidx.compose.foundation.lazy.LazyColumn -import androidx.compose.foundation.lazy.items +import androidx.compose.foundation.lazy.itemsIndexed import androidx.compose.foundation.lazy.rememberLazyListState import androidx.compose.foundation.shape.RoundedCornerShape import androidx.compose.foundation.text.selection.SelectionContainer import androidx.compose.material3.CircularProgressIndicator -import androidx.compose.material3.Icon +import androidx.compose.material3.Divider +import androidx.compose.material3.HorizontalDivider import androidx.compose.material3.Text import androidx.compose.runtime.Composable import androidx.compose.runtime.LaunchedEffect import androidx.compose.ui.Alignment import androidx.compose.ui.Modifier import androidx.compose.ui.graphics.Color -import androidx.compose.ui.res.painterResource import androidx.compose.ui.text.font.FontFamily import androidx.compose.ui.unit.dp import androidx.compose.ui.unit.sp import com.meta.wearable.dat.externalsampleapps.cameraaccess.chat.ChatMessage import com.meta.wearable.dat.externalsampleapps.cameraaccess.chat.ChatMessageRole import com.meta.wearable.dat.externalsampleapps.cameraaccess.chat.ChatMessageStatus +import java.text.SimpleDateFormat +import java.util.Calendar +import java.util.Date +import java.util.Locale @Composable fun ChatTranscriptView( @@ -60,28 +65,74 @@ fun ChatTranscriptView( modifier = modifier.fillMaxSize().padding(horizontal = 16.dp), verticalArrangement = Arrangement.spacedBy(4.dp), ) { - items(messages, key = { it.id }) { message -> - MessageBubble(message = message) + itemsIndexed(messages, key = { _, msg -> msg.id }) { index, message -> + val showTime = shouldShowTimestamp(index, messages) + MessageBubble(message = message, showTimestamp = showTime) } } } } } +private fun shouldShowTimestamp(index: Int, messages: List): Boolean { + val message = messages[index] + if (message.role is ChatMessageRole.SessionDivider) return false + if (index == 0) return true + val prev = messages[index - 1] + if (prev.role is ChatMessageRole.SessionDivider) return true + return message.timestamp - prev.timestamp > 120_000 // 2+ minutes +} + +private fun formatTime(timestamp: Long): String { + return SimpleDateFormat("h:mm a", Locale.getDefault()).format(Date(timestamp)) +} + +private fun formatSessionDate(timestamp: Long): String { + val cal = Calendar.getInstance() + val today = Calendar.getInstance() + cal.timeInMillis = timestamp + return if (cal.get(Calendar.YEAR) == today.get(Calendar.YEAR) + && cal.get(Calendar.DAY_OF_YEAR) == today.get(Calendar.DAY_OF_YEAR)) { + "Today ${formatTime(timestamp)}" + } else { + SimpleDateFormat("MMM d, h:mm a", Locale.getDefault()).format(Date(timestamp)) + } +} + @Composable -fun MessageBubble(message: ChatMessage, modifier: Modifier = Modifier) { +fun MessageBubble(message: ChatMessage, showTimestamp: Boolean = false, modifier: Modifier = Modifier) { when (message.role) { - is ChatMessageRole.User -> UserBubble(message, modifier) - is ChatMessageRole.Assistant -> AssistantBubble(message, modifier) + is ChatMessageRole.User -> UserBubble(message, showTimestamp, modifier) + is ChatMessageRole.Assistant -> AssistantBubble(message, showTimestamp, modifier) is ChatMessageRole.ToolCall -> ToolCallBubble(message.role.name, message, modifier) + is ChatMessageRole.SessionDivider -> SessionDividerView(message, modifier) } } @Composable -private fun UserBubble(message: ChatMessage, modifier: Modifier = Modifier) { +private fun SessionDividerView(message: ChatMessage, modifier: Modifier = Modifier) { Row( + modifier = modifier + .fillMaxWidth() + .padding(vertical = 12.dp), + verticalAlignment = Alignment.CenterVertically, + ) { + HorizontalDivider(modifier = Modifier.weight(1f), color = Color.Black.copy(alpha = 0.15f)) + Text( + text = formatSessionDate(message.timestamp), + color = Color.Black.copy(alpha = 0.35f), + fontSize = 11.sp, + modifier = Modifier.padding(horizontal = 12.dp), + ) + HorizontalDivider(modifier = Modifier.weight(1f), color = Color.Black.copy(alpha = 0.15f)) + } +} + +@Composable +private fun UserBubble(message: ChatMessage, showTimestamp: Boolean, modifier: Modifier = Modifier) { + Column( modifier = modifier.fillMaxWidth().padding(vertical = 2.dp), - horizontalArrangement = Arrangement.End, + horizontalAlignment = Alignment.End, ) { Text( text = message.text, @@ -91,21 +142,36 @@ private fun UserBubble(message: ChatMessage, modifier: Modifier = Modifier) { .background(Color(0xFF2979FF), RoundedCornerShape(18.dp)) .padding(horizontal = 14.dp, vertical = 10.dp), ) + if (showTimestamp) { + Text( + text = formatTime(message.timestamp), + color = Color.Black.copy(alpha = 0.3f), + fontSize = 10.sp, + modifier = Modifier.padding(top = 2.dp, end = 4.dp), + ) + } } } @Composable -private fun AssistantBubble(message: ChatMessage, modifier: Modifier = Modifier) { - Row( +private fun AssistantBubble(message: ChatMessage, showTimestamp: Boolean, modifier: Modifier = Modifier) { + Column( modifier = modifier.fillMaxWidth().padding(vertical = 2.dp), - horizontalArrangement = Arrangement.Start, + horizontalAlignment = Alignment.Start, ) { Text( text = message.text, color = Color.Black.copy(alpha = 0.85f), fontSize = 15.sp, - modifier = Modifier.padding(horizontal = 0.dp), ) + if (showTimestamp) { + Text( + text = formatTime(message.timestamp), + color = Color.Black.copy(alpha = 0.3f), + fontSize = 10.sp, + modifier = Modifier.padding(top = 2.dp, start = 4.dp), + ) + } } } From 04ab7edabd728d6b728d3d69dd5be9d85687784e Mon Sep 17 00:00:00 2001 From: "Xiaoan (Sean) Liu" Date: Thu, 26 Mar 2026 22:53:50 -0600 Subject: [PATCH 31/68] Persist chat history to disk across app restarts (iOS + Android) - ChatHistoryStore: JSON file persistence (max 500 messages) - Messages loaded on ViewModel init, saved on turn complete and session stop - Stale streaming status treated as complete on reload --- .../CameraAccess/Chat/ChatHistoryStore.swift | 88 +++++++++++++++++++ .../CameraAccess/Chat/ChatMessage.swift | 8 ++ .../Gemini/GeminiSessionViewModel.swift | 4 +- .../cameraaccess/chat/ChatHistoryStore.kt | 83 +++++++++++++++++ .../gemini/GeminiSessionViewModel.kt | 11 ++- 5 files changed, 192 insertions(+), 2 deletions(-) create mode 100644 samples/CameraAccess/CameraAccess/Chat/ChatHistoryStore.swift create mode 100644 samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/chat/ChatHistoryStore.kt diff --git a/samples/CameraAccess/CameraAccess/Chat/ChatHistoryStore.swift b/samples/CameraAccess/CameraAccess/Chat/ChatHistoryStore.swift new file mode 100644 index 00000000..7477c2b9 --- /dev/null +++ b/samples/CameraAccess/CameraAccess/Chat/ChatHistoryStore.swift @@ -0,0 +1,88 @@ +import Foundation + +enum ChatHistoryStore { + private static let filename = "chat_history.json" + private static let maxMessages = 500 + + private static var fileURL: URL { + let docs = FileManager.default.urls(for: .documentDirectory, in: .userDomainMask)[0] + return docs.appendingPathComponent(filename) + } + + static func save(_ messages: [ChatMessage]) { + let toSave = Array(messages.suffix(maxMessages)) + let records: [[String: Any]] = toSave.map { msg in + [ + "id": msg.id, + "role": serializeRole(msg.role), + "text": msg.text, + "timestamp": msg.timestamp.timeIntervalSince1970, + "status": serializeStatus(msg.status) + ] + } + guard let data = try? JSONSerialization.data(withJSONObject: records) else { return } + try? data.write(to: fileURL) + } + + static func load() -> [ChatMessage] { + guard FileManager.default.fileExists(atPath: fileURL.path), + let data = try? Data(contentsOf: fileURL), + let records = try? JSONSerialization.jsonObject(with: data) as? [[String: Any]] else { + return [] + } + return records.compactMap { obj in + guard let id = obj["id"] as? String, + let roleStr = obj["role"] as? String, + let timestamp = obj["timestamp"] as? TimeInterval else { return nil } + let text = obj["text"] as? String ?? "" + let statusStr = obj["status"] as? String ?? "complete" + return ChatMessage( + id: id, + role: deserializeRole(roleStr), + text: text, + timestamp: Date(timeIntervalSince1970: timestamp), + status: deserializeStatus(statusStr) + ) + } + } + + // MARK: - Serialization + + private static func serializeRole(_ role: ChatMessageRole) -> String { + switch role { + case .user: return "user" + case .assistant: return "assistant" + case .toolCall(let name): return "tool:\(name)" + case .sessionDivider: return "divider" + } + } + + private static func deserializeRole(_ s: String) -> ChatMessageRole { + switch s { + case "user": return .user + case "assistant": return .assistant + case "divider": return .sessionDivider + default: + if s.hasPrefix("tool:") { return .toolCall(String(s.dropFirst(5))) } + return .assistant + } + } + + private static func serializeStatus(_ status: ChatMessageStatus) -> String { + switch status { + case .streaming: return "streaming" + case .complete: return "complete" + case .error(let msg): return "error:\(msg)" + } + } + + private static func deserializeStatus(_ s: String) -> ChatMessageStatus { + switch s { + case "complete": return .complete + case "streaming": return .complete // treat stale streaming as complete + default: + if s.hasPrefix("error:") { return .error(String(s.dropFirst(6))) } + return .complete + } + } +} diff --git a/samples/CameraAccess/CameraAccess/Chat/ChatMessage.swift b/samples/CameraAccess/CameraAccess/Chat/ChatMessage.swift index 694ab3c7..c1109d45 100644 --- a/samples/CameraAccess/CameraAccess/Chat/ChatMessage.swift +++ b/samples/CameraAccess/CameraAccess/Chat/ChatMessage.swift @@ -14,6 +14,14 @@ struct ChatMessage: Identifiable, Equatable { self.timestamp = Date() self.status = status } + + init(id: String, role: ChatMessageRole, text: String, timestamp: Date, status: ChatMessageStatus = .complete) { + self.id = id + self.role = role + self.text = text + self.timestamp = timestamp + self.status = status + } } enum ChatMessageRole: Equatable { diff --git a/samples/CameraAccess/CameraAccess/Gemini/GeminiSessionViewModel.swift b/samples/CameraAccess/CameraAccess/Gemini/GeminiSessionViewModel.swift index 622cf923..4ab674fe 100644 --- a/samples/CameraAccess/CameraAccess/Gemini/GeminiSessionViewModel.swift +++ b/samples/CameraAccess/CameraAccess/Gemini/GeminiSessionViewModel.swift @@ -9,7 +9,7 @@ class GeminiSessionViewModel: ObservableObject { @Published var errorMessage: String? @Published var userTranscript: String = "" @Published var aiTranscript: String = "" - @Published var messages: [ChatMessage] = [] + @Published var messages: [ChatMessage] = ChatHistoryStore.load() @Published var toolCallStatus: ToolCallStatus = .idle @Published var openClawConnectionState: OpenClawConnectionState = .notConfigured private let geminiService = GeminiLiveService() @@ -79,6 +79,7 @@ class GeminiSessionViewModel: ObservableObject { } self.finalizeCurrentBubbles() self.userTranscript = "" + ChatHistoryStore.save(self.messages) } } @@ -254,6 +255,7 @@ class GeminiSessionViewModel: ObservableObject { userTranscript = "" aiTranscript = "" toolCallStatus = .idle + ChatHistoryStore.save(messages) } func sendVideoFrameIfThrottled(image: UIImage) { diff --git a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/chat/ChatHistoryStore.kt b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/chat/ChatHistoryStore.kt new file mode 100644 index 00000000..c9c04741 --- /dev/null +++ b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/chat/ChatHistoryStore.kt @@ -0,0 +1,83 @@ +package com.meta.wearable.dat.externalsampleapps.cameraaccess.chat + +import android.content.Context +import android.util.Log +import org.json.JSONArray +import org.json.JSONObject +import java.io.File + +object ChatHistoryStore { + private const val TAG = "ChatHistoryStore" + private const val FILENAME = "chat_history.json" + private const val MAX_MESSAGES = 500 + + fun save(context: Context, messages: List) { + try { + val json = JSONArray() + for (msg in messages.takeLast(MAX_MESSAGES)) { + json.put(JSONObject().apply { + put("id", msg.id) + put("role", serializeRole(msg.role)) + put("text", msg.text) + put("timestamp", msg.timestamp) + put("status", serializeStatus(msg.status)) + }) + } + File(context.filesDir, FILENAME).writeText(json.toString()) + } catch (e: Exception) { + Log.e(TAG, "Failed to save: ${e.message}") + } + } + + fun load(context: Context): List { + val file = File(context.filesDir, FILENAME) + if (!file.exists()) return emptyList() + return try { + val json = JSONArray(file.readText()) + val messages = mutableListOf() + for (i in 0 until json.length()) { + val obj = json.getJSONObject(i) + messages.add(ChatMessage( + id = obj.getString("id"), + role = deserializeRole(obj.getString("role")), + text = obj.optString("text", ""), + timestamp = obj.getLong("timestamp"), + status = deserializeStatus(obj.optString("status", "complete")), + )) + } + Log.d(TAG, "Loaded ${messages.size} messages") + messages + } catch (e: Exception) { + Log.e(TAG, "Failed to load: ${e.message}") + emptyList() + } + } + + private fun serializeRole(role: ChatMessageRole): String = when (role) { + is ChatMessageRole.User -> "user" + is ChatMessageRole.Assistant -> "assistant" + is ChatMessageRole.ToolCall -> "tool:${role.name}" + is ChatMessageRole.SessionDivider -> "divider" + } + + private fun deserializeRole(s: String): ChatMessageRole = when { + s == "user" -> ChatMessageRole.User + s == "assistant" -> ChatMessageRole.Assistant + s == "divider" -> ChatMessageRole.SessionDivider + s.startsWith("tool:") -> ChatMessageRole.ToolCall(s.removePrefix("tool:")) + else -> ChatMessageRole.Assistant + } + + private fun serializeStatus(status: ChatMessageStatus): String = when (status) { + is ChatMessageStatus.Streaming -> "streaming" + is ChatMessageStatus.Complete -> "complete" + is ChatMessageStatus.Error -> "error:${status.message}" + } + + private fun deserializeStatus(s: String): ChatMessageStatus = when { + s == "complete" -> ChatMessageStatus.Complete + s == "streaming" -> ChatMessageStatus.Complete // treat stale streaming as complete + s.startsWith("error:") -> ChatMessageStatus.Error(s.removePrefix("error:")) + else -> ChatMessageStatus.Complete + } +} diff --git a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/GeminiSessionViewModel.kt b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/GeminiSessionViewModel.kt index e7d3842b..00f9e0c8 100644 --- a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/GeminiSessionViewModel.kt +++ b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/GeminiSessionViewModel.kt @@ -16,6 +16,7 @@ import com.meta.wearable.dat.externalsampleapps.cameraaccess.openclaw.OpenClawEv import com.meta.wearable.dat.externalsampleapps.cameraaccess.openclaw.ToolCallRouter import com.meta.wearable.dat.externalsampleapps.cameraaccess.openclaw.ToolCallStatus import com.meta.wearable.dat.externalsampleapps.cameraaccess.openclaw.ToolResult +import com.meta.wearable.dat.externalsampleapps.cameraaccess.chat.ChatHistoryStore import com.meta.wearable.dat.externalsampleapps.cameraaccess.gallery.CapturedPhoto import com.meta.wearable.dat.externalsampleapps.cameraaccess.gallery.PhotoCaptureStore import com.meta.wearable.dat.externalsampleapps.cameraaccess.settings.SettingsManager @@ -44,7 +45,9 @@ data class GeminiUiState( class GeminiSessionViewModel(app: Application) : AndroidViewModel(app) { - private val _uiState = MutableStateFlow(GeminiUiState()) + private val _uiState = MutableStateFlow(GeminiUiState( + messages = ChatHistoryStore.load(app) + )) val uiState: StateFlow = _uiState.asStateFlow() private val _captureEvent = MutableStateFlow(null) @@ -198,6 +201,7 @@ class GeminiSessionViewModel(app: Application) : AndroidViewModel(app) { } finalizeCurrentBubbles() _uiState.value = _uiState.value.copy(userTranscript = "") + persistMessages() } geminiService.onInputTranscription = input@{ text -> @@ -442,6 +446,7 @@ class GeminiSessionViewModel(app: Application) : AndroidViewModel(app) { // Keep message history, just reset session state _uiState.value = GeminiUiState(messages = _uiState.value.messages) + persistMessages() lastUserOriginalInstruction = null latestFrameForToolCall = null micStateBeforeExecution = null @@ -495,6 +500,10 @@ class GeminiSessionViewModel(app: Application) : AndroidViewModel(app) { _uiState.value = _uiState.value.copy(errorMessage = null) } + private fun persistMessages() { + ChatHistoryStore.save(getApplication(), _uiState.value.messages) + } + // Chat message helpers private fun updateUserBubble(text: String) { From d1e5905fed2fc433ffc90135e8871fb29ea80ef4 Mon Sep 17 00:00:00 2001 From: "Xiaoan (Sean) Liu" Date: Fri, 27 Mar 2026 00:00:42 -0600 Subject: [PATCH 32/68] Revert to BLOCKING execute tool to fix duplicate response display (iOS + Android) --- .../CameraAccess/CameraAccess/OpenClaw/ToolCallModels.swift | 2 +- .../CameraAccess/CameraAccess/OpenClaw/ToolCallRouter.swift | 2 +- .../cameraaccess/openclaw/ToolCallModels.kt | 2 +- .../cameraaccess/openclaw/ToolCallRouter.kt | 4 +--- 4 files changed, 4 insertions(+), 6 deletions(-) diff --git a/samples/CameraAccess/CameraAccess/OpenClaw/ToolCallModels.swift b/samples/CameraAccess/CameraAccess/OpenClaw/ToolCallModels.swift index 4130e720..6515bf37 100644 --- a/samples/CameraAccess/CameraAccess/OpenClaw/ToolCallModels.swift +++ b/samples/CameraAccess/CameraAccess/OpenClaw/ToolCallModels.swift @@ -120,6 +120,6 @@ enum ToolDeclarations { ], "required": ["task"] ] as [String: Any], - "behavior": "NON_BLOCKING" + "behavior": "BLOCKING" ] } diff --git a/samples/CameraAccess/CameraAccess/OpenClaw/ToolCallRouter.swift b/samples/CameraAccess/CameraAccess/OpenClaw/ToolCallRouter.swift index 2b9aaeaa..8e09453d 100644 --- a/samples/CameraAccess/CameraAccess/OpenClaw/ToolCallRouter.swift +++ b/samples/CameraAccess/CameraAccess/OpenClaw/ToolCallRouter.swift @@ -105,7 +105,7 @@ class ToolCallRouter { [ "id": callId, "name": name, - "response": result.responseValue.merging(["scheduling": "INTERRUPT"]) { _, new in new } + "response": result.responseValue ] ] ] diff --git a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/ToolCallModels.kt b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/ToolCallModels.kt index 7136ce22..e8e93bcc 100644 --- a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/ToolCallModels.kt +++ b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/ToolCallModels.kt @@ -144,7 +144,7 @@ object ToolDeclarations { }) put("required", JSONArray().put("task")) }) - put("behavior", "NON_BLOCKING") + put("behavior", "BLOCKING") } } } diff --git a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/ToolCallRouter.kt b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/ToolCallRouter.kt index 885ddbc9..926ced99 100644 --- a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/ToolCallRouter.kt +++ b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/ToolCallRouter.kt @@ -147,9 +147,7 @@ class ToolCallRouter( JSONObject().apply { put("id", callId) put("name", name) - put("response", result.toJSON().apply { - put("scheduling", "INTERRUPT") - }) + put("response", result.toJSON()) } ) ) From b7fd5a2267a9ec8eafc313f5cc8973315e1d2d83 Mon Sep 17 00:00:00 2001 From: "Xiaoan (Sean) Liu" Date: Fri, 27 Mar 2026 00:08:35 -0600 Subject: [PATCH 33/68] Fix background execution: indefinite WakeLock + WiFi lock for Gemini session (Android) - WakeLock no longer times out after 10 minutes (held for entire session) - WiFi lock prevents WiFi from sleeping when screen is off - Both released when streaming service stops - Added WIFI_STATE permissions to manifest --- .../app/src/main/AndroidManifest.xml | 2 ++ .../cameraaccess/stream/StreamingService.kt | 31 +++++++++++++++++-- 2 files changed, 31 insertions(+), 2 deletions(-) diff --git a/samples/CameraAccessAndroid/app/src/main/AndroidManifest.xml b/samples/CameraAccessAndroid/app/src/main/AndroidManifest.xml index 4e02e322..10a7c062 100644 --- a/samples/CameraAccessAndroid/app/src/main/AndroidManifest.xml +++ b/samples/CameraAccessAndroid/app/src/main/AndroidManifest.xml @@ -14,6 +14,8 @@ + + Date: Fri, 27 Mar 2026 00:32:07 -0600 Subject: [PATCH 34/68] Start foreground service when Gemini session starts to survive screen lock (Android) --- .../cameraaccess/gemini/GeminiSessionViewModel.kt | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/GeminiSessionViewModel.kt b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/GeminiSessionViewModel.kt index 00f9e0c8..85e20549 100644 --- a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/GeminiSessionViewModel.kt +++ b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/GeminiSessionViewModel.kt @@ -21,6 +21,7 @@ import com.meta.wearable.dat.externalsampleapps.cameraaccess.gallery.CapturedPho import com.meta.wearable.dat.externalsampleapps.cameraaccess.gallery.PhotoCaptureStore import com.meta.wearable.dat.externalsampleapps.cameraaccess.settings.SettingsManager import com.meta.wearable.dat.externalsampleapps.cameraaccess.stream.StreamingMode +import com.meta.wearable.dat.externalsampleapps.cameraaccess.stream.StreamingService import kotlinx.coroutines.Job import kotlinx.coroutines.delay import kotlinx.coroutines.flow.MutableStateFlow @@ -160,6 +161,9 @@ class GeminiSessionViewModel(app: Application) : AndroidViewModel(app) { currentMessages.add(ChatMessage(role = ChatMessageRole.SessionDivider, text = "")) } + // Start foreground service to keep alive when screen is locked + StreamingService.start(getApplication()) + // Start with mic enabled by default _uiState.value = _uiState.value.copy(isGeminiActive = true, isMicEnabled = true, messages = currentMessages) audioManager.setMicEnabled(true) @@ -424,6 +428,7 @@ class GeminiSessionViewModel(app: Application) : AndroidViewModel(app) { fun stopSession() { RemoteLogger.log("session:end") + StreamingService.stop(getApplication()) userStopped = true reconnectJob?.cancel() reconnectJob = null From f10d08ac3e58c148798eebf891b9d82ccb1b6fd1 Mon Sep 17 00:00:00 2001 From: "Xiaoan (Sean) Liu" Date: Fri, 27 Mar 2026 00:40:02 -0600 Subject: [PATCH 35/68] Show Camera/Chat tab switcher always, not just when Gemini is active (Android) --- .../externalsampleapps/cameraaccess/ui/StreamScreen.kt | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/ui/StreamScreen.kt b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/ui/StreamScreen.kt index fea13fe6..71ddf731 100644 --- a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/ui/StreamScreen.kt +++ b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/ui/StreamScreen.kt @@ -231,10 +231,9 @@ fun StreamScreen( } } - // Tab switcher (only when Gemini is active) - if (geminiUiState.isGeminiActive) { - Spacer(modifier = Modifier.width(8.dp)) - SingleChoiceSegmentedButtonRow { + // Tab switcher (always visible for chat history access) + Spacer(modifier = Modifier.width(8.dp)) + SingleChoiceSegmentedButtonRow { tabOptions.forEachIndexed { index, label -> SegmentedButton( shape = SegmentedButtonDefaults.itemShape(index = index, count = tabOptions.size), @@ -245,7 +244,6 @@ fun StreamScreen( } } } - } } Spacer(modifier = Modifier.height(8.dp)) From 2c6556d8025f29e18666420ed8e6867f71aca81a Mon Sep 17 00:00:00 2001 From: "Xiaoan (Sean) Liu" Date: Fri, 27 Mar 2026 00:43:10 -0600 Subject: [PATCH 36/68] Add swipeable Camera/Chat pager - swipe anywhere on screen to switch tabs (Android) --- .../cameraaccess/ui/StreamScreen.kt | 77 +++++++++++-------- 1 file changed, 44 insertions(+), 33 deletions(-) diff --git a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/ui/StreamScreen.kt b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/ui/StreamScreen.kt index 71ddf731..d7fbb0f1 100644 --- a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/ui/StreamScreen.kt +++ b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/ui/StreamScreen.kt @@ -39,10 +39,13 @@ import androidx.compose.runtime.Composable import androidx.compose.runtime.DisposableEffect import androidx.compose.runtime.LaunchedEffect import androidx.compose.runtime.getValue -import androidx.compose.runtime.mutableIntStateOf +import androidx.compose.foundation.pager.HorizontalPager +import androidx.compose.foundation.pager.rememberPagerState import androidx.compose.runtime.mutableStateOf import androidx.compose.runtime.remember +import androidx.compose.runtime.rememberCoroutineScope import androidx.compose.runtime.setValue +import kotlinx.coroutines.launch import androidx.compose.ui.Alignment import androidx.compose.ui.Modifier import androidx.compose.ui.graphics.Color @@ -111,12 +114,13 @@ fun StreamScreen( var videoStreamingEnabled by remember { mutableStateOf(SettingsManager.videoStreamingEnabled) } val tabOptions = listOf("Camera", "Chat") - var selectedTab by remember { mutableIntStateOf(0) } + val pagerState = rememberPagerState(pageCount = { 2 }) + val coroutineScope = rememberCoroutineScope() // Auto-switch to chat tab when Gemini starts in audio-only mode LaunchedEffect(geminiUiState.isGeminiActive) { if (geminiUiState.isGeminiActive && !SettingsManager.videoStreamingEnabled) { - selectedTab = 1 + pagerState.animateScrollToPage(1) } } @@ -163,35 +167,42 @@ fun StreamScreen( } Box(modifier = modifier.fillMaxSize()) { - if (selectedTab == 0) { - // --- Camera tab --- - streamUiState.videoFrame?.let { videoFrame -> - Image( - bitmap = videoFrame.asImageBitmap(), - contentDescription = stringResource(R.string.live_stream), - modifier = Modifier.fillMaxSize(), - contentScale = ContentScale.Crop, - ) - } + HorizontalPager( + state = pagerState, + modifier = Modifier.fillMaxSize(), + ) { page -> + Box(modifier = Modifier.fillMaxSize()) { + if (page == 0) { + // --- Camera tab --- + streamUiState.videoFrame?.let { videoFrame -> + Image( + bitmap = videoFrame.asImageBitmap(), + contentDescription = stringResource(R.string.live_stream), + modifier = Modifier.fillMaxSize(), + contentScale = ContentScale.Crop, + ) + } - if (streamUiState.videoFrame == null && !videoStreamingEnabled) { - Text( - text = "Audio-only mode\nAll video streaming is off.", - modifier = Modifier.align(Alignment.Center), - ) - } + if (streamUiState.videoFrame == null && !videoStreamingEnabled) { + Text( + text = "Audio-only mode\nAll video streaming is off.", + modifier = Modifier.align(Alignment.Center), + ) + } - if (streamUiState.streamSessionState == StreamSessionState.STARTING) { - CircularProgressIndicator( - modifier = Modifier.align(Alignment.Center), - ) + if (streamUiState.streamSessionState == StreamSessionState.STARTING) { + CircularProgressIndicator( + modifier = Modifier.align(Alignment.Center), + ) + } + } else { + // --- Chat tab --- + ChatTranscriptView( + messages = geminiUiState.messages, + modifier = Modifier.padding(top = 100.dp, bottom = 80.dp), + ) + } } - } else { - // --- Chat tab --- - ChatTranscriptView( - messages = geminiUiState.messages, - modifier = Modifier.padding(top = 100.dp, bottom = 80.dp), - ) } // Overlays + controls @@ -237,8 +248,8 @@ fun StreamScreen( tabOptions.forEachIndexed { index, label -> SegmentedButton( shape = SegmentedButtonDefaults.itemShape(index = index, count = tabOptions.size), - onClick = { selectedTab = index }, - selected = selectedTab == index, + onClick = { coroutineScope.launch { pagerState.animateScrollToPage(index) } }, + selected = pagerState.currentPage == index, ) { Text(label) } @@ -249,12 +260,12 @@ fun StreamScreen( Spacer(modifier = Modifier.height(8.dp)) // Gemini overlay (camera tab only) - if (geminiUiState.isGeminiActive && selectedTab == 0) { + if (geminiUiState.isGeminiActive && pagerState.currentPage == 0) { GeminiOverlay(uiState = geminiUiState) } // WebRTC overlay - if (webrtcUiState.isActive && selectedTab == 0) { + if (webrtcUiState.isActive && pagerState.currentPage == 0) { Spacer(modifier = Modifier.height(4.dp)) WebRTCOverlay(uiState = webrtcUiState) } From 8bf122ea1693edca937bc46285dc06f46b6ffe3c Mon Sep 17 00:00:00 2001 From: "Xiaoan (Sean) Liu" Date: Fri, 27 Mar 2026 00:45:12 -0600 Subject: [PATCH 37/68] Add swipeable Camera/Chat tabs always visible on iOS (matching Android) --- .../CameraAccess/Views/StreamView.swift | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/samples/CameraAccess/CameraAccess/Views/StreamView.swift b/samples/CameraAccess/CameraAccess/Views/StreamView.swift index 0028c892..26517bda 100644 --- a/samples/CameraAccess/CameraAccess/Views/StreamView.swift +++ b/samples/CameraAccess/CameraAccess/Views/StreamView.swift @@ -37,15 +37,18 @@ struct StreamView: View { Color.black .edgesIgnoringSafeArea(.all) - if selectedTab == .camera { + TabView(selection: $selectedTab) { // --- Camera tab --- cameraContent - } else { + .tag(StreamTab.camera) + // --- Chat tab --- ChatTranscriptView(geminiVM: geminiVM) .padding(.top, 60) .padding(.bottom, 80) + .tag(StreamTab.chat) } + .tabViewStyle(.page(indexDisplayMode: .never)) // Top bar VStack { @@ -62,15 +65,13 @@ struct StreamView: View { .background(Color.black.opacity(0.5)) .clipShape(Circle()) } - if geminiVM.isGeminiActive { - Picker("", selection: $selectedTab) { - ForEach(StreamTab.allCases, id: \.self) { tab in - Text(tab.rawValue).tag(tab) - } + Picker("", selection: $selectedTab) { + ForEach(StreamTab.allCases, id: \.self) { tab in + Text(tab.rawValue).tag(tab) } - .pickerStyle(.segmented) - .frame(width: 140) } + .pickerStyle(.segmented) + .frame(width: 140) } Spacer() } From a9eb3269638e12250854c98d85df42bb7570f924 Mon Sep 17 00:00:00 2001 From: "Xiaoan (Sean) Liu" Date: Fri, 27 Mar 2026 00:47:38 -0600 Subject: [PATCH 38/68] Move debug menu from floating button to Settings screen (Android) --- .../cameraaccess/ui/CameraAccessScaffold.kt | 22 ++++++------------- .../cameraaccess/ui/SettingsScreen.kt | 9 ++++++++ 2 files changed, 16 insertions(+), 15 deletions(-) diff --git a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/ui/CameraAccessScaffold.kt b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/ui/CameraAccessScaffold.kt index e8e611f9..ac09c736 100644 --- a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/ui/CameraAccessScaffold.kt +++ b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/ui/CameraAccessScaffold.kt @@ -84,6 +84,7 @@ fun CameraAccessScaffold( uiState.isSettingsVisible -> SettingsScreen( onBack = { viewModel.hideSettings() }, + onDebugMenu = if (BuildConfig.DEBUG) {{ viewModel.showDebugMenu() }} else null, ) uiState.isStreaming -> StreamScreen( @@ -126,22 +127,13 @@ fun CameraAccessScaffold( }, ) - if (BuildConfig.DEBUG) { - FloatingActionButton( - onClick = { viewModel.showDebugMenu() }, - modifier = Modifier.align(Alignment.CenterEnd), + if (BuildConfig.DEBUG && uiState.isDebugMenuVisible) { + ModalBottomSheet( + onDismissRequest = { viewModel.hideDebugMenu() }, + sheetState = bottomSheetState, + modifier = Modifier.fillMaxSize(), ) { - Icon(Icons.Default.BugReport, contentDescription = "Debug Menu") - } - - if (uiState.isDebugMenuVisible) { - ModalBottomSheet( - onDismissRequest = { viewModel.hideDebugMenu() }, - sheetState = bottomSheetState, - modifier = Modifier.fillMaxSize(), - ) { - MockDeviceKitScreen(modifier = Modifier.fillMaxSize()) - } + MockDeviceKitScreen(modifier = Modifier.fillMaxSize()) } } } diff --git a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/ui/SettingsScreen.kt b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/ui/SettingsScreen.kt index fb5aeeb4..b41173a6 100644 --- a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/ui/SettingsScreen.kt +++ b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/ui/SettingsScreen.kt @@ -42,6 +42,7 @@ import com.meta.wearable.dat.externalsampleapps.cameraaccess.settings.SettingsMa @Composable fun SettingsScreen( onBack: () -> Unit, + onDebugMenu: (() -> Unit)? = null, modifier: Modifier = Modifier, ) { var geminiAPIKey by remember { mutableStateOf(SettingsManager.geminiAPIKey) } @@ -207,6 +208,14 @@ fun SettingsScreen( ) } + // Debug menu (only in debug builds) + onDebugMenu?.let { onDebug -> + SectionHeader("Developer") + TextButton(onClick = onDebug) { + Text("Mock Device Kit") + } + } + // Reset TextButton(onClick = { showResetDialog = true }) { Text("Reset to Defaults", color = Color.Red) From 0b9d1e771b76729c449e05fd86cf9e6aa3e94c44 Mon Sep 17 00:00:00 2001 From: "Xiaoan (Sean) Liu" Date: Fri, 27 Mar 2026 00:51:23 -0600 Subject: [PATCH 39/68] Remove Live streaming button from controls row (Android) --- .../cameraaccess/ui/ControlsRow.kt | 19 ------------------- .../cameraaccess/ui/StreamScreen.kt | 8 -------- 2 files changed, 27 deletions(-) diff --git a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/ui/ControlsRow.kt b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/ui/ControlsRow.kt index 9e3456de..beb82c44 100644 --- a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/ui/ControlsRow.kt +++ b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/ui/ControlsRow.kt @@ -12,7 +12,6 @@ import androidx.compose.material.icons.Icons import androidx.compose.material.icons.filled.AutoAwesome import androidx.compose.material.icons.filled.Mic import androidx.compose.material.icons.filled.MicOff -import androidx.compose.material.icons.filled.Videocam import androidx.compose.material3.Button import androidx.compose.material3.ButtonDefaults import androidx.compose.material3.Icon @@ -30,8 +29,6 @@ fun ControlsRow( isAIActive: Boolean, onToggleMic: () -> Unit, isMicEnabled: Boolean, - onToggleLive: () -> Unit, - isLiveActive: Boolean, modifier: Modifier = Modifier, ) { Row( @@ -91,21 +88,5 @@ fun ControlsRow( ) } - // Live toggle button - Button( - onClick = onToggleLive, - modifier = Modifier.aspectRatio(1f), - colors = ButtonDefaults.buttonColors( - containerColor = if (isLiveActive) AppColor.Red else AppColor.DeepBlue, - ), - shape = CircleShape, - contentPadding = PaddingValues(0.dp), - ) { - Icon( - imageVector = Icons.Default.Videocam, - contentDescription = if (isLiveActive) "Stop Live" else "Start Live", - tint = Color.White, - ) - } } } \ No newline at end of file diff --git a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/ui/StreamScreen.kt b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/ui/StreamScreen.kt index d7fbb0f1..314a6084 100644 --- a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/ui/StreamScreen.kt +++ b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/ui/StreamScreen.kt @@ -290,14 +290,6 @@ fun StreamScreen( isAIActive = geminiUiState.isGeminiActive, onToggleMic = { geminiViewModel.toggleMic() }, isMicEnabled = geminiUiState.isMicEnabled, - onToggleLive = { - if (webrtcUiState.isActive) { - webrtcViewModel.stopSession() - } else { - webrtcViewModel.startSession() - } - }, - isLiveActive = webrtcUiState.isActive, modifier = Modifier.align(Alignment.BottomCenter), ) } From cfdb81e273d89a64992a39a13d9780e4070d61d8 Mon Sep 17 00:00:00 2001 From: "Xiaoan (Sean) Liu" Date: Fri, 27 Mar 2026 00:58:11 -0600 Subject: [PATCH 40/68] Center tab switcher, move gallery to top right, remove capture + live buttons (Android) --- .../cameraaccess/ui/ControlsRow.kt | 5 --- .../cameraaccess/ui/StreamScreen.kt | 35 ++++++++++--------- 2 files changed, 18 insertions(+), 22 deletions(-) diff --git a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/ui/ControlsRow.kt b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/ui/ControlsRow.kt index beb82c44..1ff02090 100644 --- a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/ui/ControlsRow.kt +++ b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/ui/ControlsRow.kt @@ -24,7 +24,6 @@ import androidx.compose.ui.unit.dp @Composable fun ControlsRow( onStopStream: () -> Unit, - onCapturePhoto: () -> Unit, onToggleAI: () -> Unit, isAIActive: Boolean, onToggleMic: () -> Unit, @@ -46,10 +45,6 @@ fun ControlsRow( modifier = Modifier.weight(1f), ) - CaptureButton( - onClick = onCapturePhoto, - ) - // AI toggle button Button( onClick = onToggleAI, diff --git a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/ui/StreamScreen.kt b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/ui/StreamScreen.kt index 314a6084..5456048d 100644 --- a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/ui/StreamScreen.kt +++ b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/ui/StreamScreen.kt @@ -225,8 +225,24 @@ fun StreamScreen( modifier = Modifier.widthIn(min = 160.dp), ) - // Gallery button - Spacer(modifier = Modifier.width(4.dp)) + Spacer(modifier = Modifier.weight(1f)) + + // Tab switcher (centered) + SingleChoiceSegmentedButtonRow { + tabOptions.forEachIndexed { index, label -> + SegmentedButton( + shape = SegmentedButtonDefaults.itemShape(index = index, count = tabOptions.size), + onClick = { coroutineScope.launch { pagerState.animateScrollToPage(index) } }, + selected = pagerState.currentPage == index, + ) { + Text(label) + } + } + } + + Spacer(modifier = Modifier.weight(1f)) + + // Gallery button (top right) Surface( shape = CircleShape, color = Color.Black.copy(alpha = 0.5f), @@ -241,20 +257,6 @@ fun StreamScreen( ) } } - - // Tab switcher (always visible for chat history access) - Spacer(modifier = Modifier.width(8.dp)) - SingleChoiceSegmentedButtonRow { - tabOptions.forEachIndexed { index, label -> - SegmentedButton( - shape = SegmentedButtonDefaults.itemShape(index = index, count = tabOptions.size), - onClick = { coroutineScope.launch { pagerState.animateScrollToPage(index) } }, - selected = pagerState.currentPage == index, - ) { - Text(label) - } - } - } } Spacer(modifier = Modifier.height(8.dp)) @@ -279,7 +281,6 @@ fun StreamScreen( streamViewModel.stopStream() wearablesViewModel.navigateToDeviceSelection() }, - onCapturePhoto = { streamViewModel.capturePhoto() }, onToggleAI = { if (geminiUiState.isGeminiActive) { geminiViewModel.stopSession() From 6bef18d1f31e923080bceab574363c3f61b3ae81 Mon Sep 17 00:00:00 2001 From: "Xiaoan (Sean) Liu" Date: Fri, 27 Mar 2026 01:05:13 -0600 Subject: [PATCH 41/68] Replace audio-only text chip with compact video toggle icon (Android) --- .../cameraaccess/ui/StreamScreen.kt | 26 ++++++++++++------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/ui/StreamScreen.kt b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/ui/StreamScreen.kt index 5456048d..2a0e67bb 100644 --- a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/ui/StreamScreen.kt +++ b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/ui/StreamScreen.kt @@ -26,6 +26,8 @@ import androidx.compose.foundation.layout.statusBarsPadding import androidx.compose.foundation.shape.CircleShape import androidx.compose.material.icons.Icons import androidx.compose.material.icons.filled.PhotoLibrary +import androidx.compose.material.icons.filled.Videocam +import androidx.compose.material.icons.filled.VideocamOff import androidx.compose.material3.CircularProgressIndicator import androidx.compose.material3.FilterChip import androidx.compose.material3.Icon @@ -210,20 +212,24 @@ fun StreamScreen( // Top overlays (below status bar) Column(modifier = Modifier.align(Alignment.TopStart).statusBarsPadding().padding(top = 8.dp)) { Row(verticalAlignment = Alignment.CenterVertically) { - FilterChip( - selected = !videoStreamingEnabled, - onClick = { + Surface( + shape = CircleShape, + color = Color.Black.copy(alpha = 0.5f), + modifier = Modifier.size(36.dp) + ) { + IconButton(onClick = { val newEnabled = !videoStreamingEnabled videoStreamingEnabled = newEnabled streamViewModel.setVideoStreamingEnabled(newEnabled, lifecycleOwner) - }, - label = { - Text( - if (videoStreamingEnabled) "Switch to audio-only" else "Enable video" + }) { + Icon( + imageVector = if (videoStreamingEnabled) Icons.Default.Videocam else Icons.Default.VideocamOff, + contentDescription = if (videoStreamingEnabled) "Switch to audio-only" else "Enable video", + tint = Color.White, + modifier = Modifier.size(18.dp) ) - }, - modifier = Modifier.widthIn(min = 160.dp), - ) + } + } Spacer(modifier = Modifier.weight(1f)) From 45643c554455e2ac07176baaeb48ff0c1adc98e2 Mon Sep 17 00:00:00 2001 From: "Xiaoan (Sean) Liu" Date: Fri, 27 Mar 2026 11:51:47 -0600 Subject: [PATCH 42/68] Always connect OpenClaw event client for image sending, not just when notifications enabled (iOS + Android) --- .../Gemini/GeminiSessionViewModel.swift | 6 ++++-- .../gemini/GeminiSessionViewModel.kt | 18 +++++++++--------- 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/samples/CameraAccess/CameraAccess/Gemini/GeminiSessionViewModel.swift b/samples/CameraAccess/CameraAccess/Gemini/GeminiSessionViewModel.swift index 4ab674fe..1cea3e3a 100644 --- a/samples/CameraAccess/CameraAccess/Gemini/GeminiSessionViewModel.swift +++ b/samples/CameraAccess/CameraAccess/Gemini/GeminiSessionViewModel.swift @@ -227,7 +227,7 @@ class GeminiSessionViewModel: ObservableObject { return } - // Connect to OpenClaw event stream for proactive notifications + // Always connect event client — needed for image sending via chat.send if SettingsManager.shared.proactiveNotificationsEnabled { eventClient.onNotification = { [weak self] text in guard let self else { return } @@ -236,8 +236,10 @@ class GeminiSessionViewModel: ObservableObject { self.geminiService.sendTextMessage(text) } } - eventClient.connect() + } else { + eventClient.onNotification = nil } + eventClient.connect() } func stopSession() { diff --git a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/GeminiSessionViewModel.kt b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/GeminiSessionViewModel.kt index 85e20549..d54a11c1 100644 --- a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/GeminiSessionViewModel.kt +++ b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/GeminiSessionViewModel.kt @@ -459,16 +459,16 @@ class GeminiSessionViewModel(app: Application) : AndroidViewModel(app) { } private fun syncProactiveNotifications() { - if (!SettingsManager.proactiveNotificationsEnabled) { - eventClient.disconnect() - return - } - - eventClient.onNotification = { text -> - val state = _uiState.value - if (state.isGeminiActive && state.connectionState == GeminiConnectionState.Ready) { - geminiService.sendTextMessage(text) + // Always connect event client — needed for image sending via chat.send + if (SettingsManager.proactiveNotificationsEnabled) { + eventClient.onNotification = { text -> + val state = _uiState.value + if (state.isGeminiActive && state.connectionState == GeminiConnectionState.Ready) { + geminiService.sendTextMessage(text) + } } + } else { + eventClient.onNotification = null } eventClient.connect() } From b1b98be56175cc288d7e052a8a4c44b061ed007c Mon Sep 17 00:00:00 2001 From: "Xiaoan (Sean) Liu" Date: Fri, 27 Mar 2026 11:57:41 -0600 Subject: [PATCH 43/68] Connect event client earlier at session start, not after Gemini connects (Android) --- .../cameraaccess/gemini/GeminiSessionViewModel.kt | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/GeminiSessionViewModel.kt b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/GeminiSessionViewModel.kt index d54a11c1..12bf949c 100644 --- a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/GeminiSessionViewModel.kt +++ b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/GeminiSessionViewModel.kt @@ -241,6 +241,9 @@ class GeminiSessionViewModel(app: Application) : AndroidViewModel(app) { openClawBridge.resetSession() openClawBridge.eventClient = eventClient + // Connect event client early — needed for image sending via chat.send + syncProactiveNotifications() + toolCallRouter = ToolCallRouter( bridge = openClawBridge, scope = viewModelScope, @@ -339,7 +342,6 @@ class GeminiSessionViewModel(app: Application) : AndroidViewModel(app) { audioManager.startCapture() audioManager.setMicEnabled(_uiState.value.isMicEnabled) _uiState.value = _uiState.value.copy(errorMessage = null) - syncProactiveNotifications() } catch (e: Exception) { _uiState.value = _uiState.value.copy( errorMessage = "Mic capture failed: ${e.message}" From c6eaf38370ccb3ac3041a2447e5b26743ad80586 Mon Sep 17 00:00:00 2001 From: "Xiaoan (Sean) Liu" Date: Fri, 27 Mar 2026 12:40:03 -0600 Subject: [PATCH 44/68] Add operator.admin scope to WebSocket connect handshake to fix chat.send permission (iOS + Android) --- .../CameraAccess/OpenClaw/OpenClawEventClient.swift | 3 ++- .../cameraaccess/openclaw/OpenClawEventClient.kt | 3 +++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/samples/CameraAccess/CameraAccess/OpenClaw/OpenClawEventClient.swift b/samples/CameraAccess/CameraAccess/OpenClaw/OpenClawEventClient.swift index 4202c9da..4db2f272 100644 --- a/samples/CameraAccess/CameraAccess/OpenClaw/OpenClawEventClient.swift +++ b/samples/CameraAccess/CameraAccess/OpenClaw/OpenClawEventClient.swift @@ -149,7 +149,8 @@ class OpenClawEventClient { ], "auth": [ "token": GeminiConfig.openClawGatewayToken - ] + ], + "scopes": ["operator.admin"] ] as [String: Any] ] diff --git a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/OpenClawEventClient.kt b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/OpenClawEventClient.kt index 7db0e49f..3d775914 100644 --- a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/OpenClawEventClient.kt +++ b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/OpenClawEventClient.kt @@ -193,6 +193,9 @@ class OpenClawEventClient { put("auth", JSONObject().apply { put("token", GeminiConfig.openClawGatewayToken) }) + put("scopes", JSONArray().apply { + put("operator.admin") + }) }) } webSocket?.send(connectMsg.toString()) From 2a69e76a635f1f53ab1e76d5a190fe2b0ac78299 Mon Sep 17 00:00:00 2001 From: "Xiaoan (Sean) Liu" Date: Fri, 27 Mar 2026 12:49:09 -0600 Subject: [PATCH 45/68] Override Host header to localhost on WebSocket to fix scope issue through SSH tunnel (iOS + Android) --- .../CameraAccess/OpenClaw/OpenClawEventClient.swift | 4 +++- .../cameraaccess/openclaw/OpenClawEventClient.kt | 5 ++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/samples/CameraAccess/CameraAccess/OpenClaw/OpenClawEventClient.swift b/samples/CameraAccess/CameraAccess/OpenClaw/OpenClawEventClient.swift index 4db2f272..e8defb89 100644 --- a/samples/CameraAccess/CameraAccess/OpenClaw/OpenClawEventClient.swift +++ b/samples/CameraAccess/CameraAccess/OpenClaw/OpenClawEventClient.swift @@ -53,7 +53,9 @@ class OpenClawEventClient { let config = URLSessionConfiguration.default config.timeoutIntervalForRequest = 30 session = URLSession(configuration: config) - webSocketTask = session?.webSocketTask(with: url) + var request = URLRequest(url: url) + request.setValue("localhost:\(port)", forHTTPHeaderField: "Host") + webSocketTask = session?.webSocketTask(with: request) webSocketTask?.resume() NSLog("[OpenClawWS] Connecting to %@", url.absoluteString) diff --git a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/OpenClawEventClient.kt b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/OpenClawEventClient.kt index 3d775914..8b14bfb8 100644 --- a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/OpenClawEventClient.kt +++ b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/OpenClawEventClient.kt @@ -70,7 +70,10 @@ class OpenClawEventClient { Log.d(TAG, "Connecting to $url") - val request = Request.Builder().url(url).build() + val request = Request.Builder() + .url(url) + .header("Host", "localhost:${GeminiConfig.openClawPort}") + .build() webSocket = client.newWebSocket(request, object : WebSocketListener() { override fun onOpen(webSocket: WebSocket, response: Response) { Log.d(TAG, "WebSocket opened") From dce3f1928cb46502d377b6615edcec68939ab1f4 Mon Sep 17 00:00:00 2001 From: "Xiaoan (Sean) Liu" Date: Fri, 27 Mar 2026 14:17:11 -0600 Subject: [PATCH 46/68] Save image to Mac filesystem alongside chat.send for agent file access (iOS + Android) - upload_server.py: tiny HTTP server saves JPEGs to ~/.openclaw/media/visionclaw/ - OpenClawBridge uploads JPEG to upload server (port = gateway port + 3) - File path appended to task text as [image_file_path] so agent can read/copy/save - Agent can both SEE the image (via chat.send attachment) AND access the file on disk --- .../OpenClaw/OpenClawBridge.swift | 38 ++++++++++- samples/CameraAccess/scripts/upload_server.py | 65 +++++++++++++++++++ .../cameraaccess/openclaw/OpenClawBridge.kt | 44 ++++++++++++- 3 files changed, 143 insertions(+), 4 deletions(-) create mode 100644 samples/CameraAccess/scripts/upload_server.py diff --git a/samples/CameraAccess/CameraAccess/OpenClaw/OpenClawBridge.swift b/samples/CameraAccess/CameraAccess/OpenClaw/OpenClawBridge.swift index 0e64a91e..47e650df 100644 --- a/samples/CameraAccess/CameraAccess/OpenClaw/OpenClawBridge.swift +++ b/samples/CameraAccess/CameraAccess/OpenClaw/OpenClawBridge.swift @@ -154,17 +154,51 @@ class OpenClawBridge: ObservableObject { } } + /// Upload JPEG to the upload server so the agent can access the file on disk. + private func uploadImageFile(_ imageBase64: String) -> String? { + let uploadPort = GeminiConfig.openClawPort + 3 + guard let url = URL(string: "\(GeminiConfig.openClawHost):\(uploadPort)/upload") else { return nil } + guard let jpegData = Data(base64Encoded: imageBase64) else { return nil } + + var request = URLRequest(url: url) + request.httpMethod = "POST" + request.setValue("image/jpeg", forHTTPHeaderField: "Content-Type") + request.httpBody = jpegData + request.timeoutInterval = 10 + + let semaphore = DispatchSemaphore(value: 0) + var filePath: String? + + let task = URLSession.shared.dataTask(with: request) { data, response, _ in + if let data, + let json = try? JSONSerialization.jsonObject(with: data) as? [String: Any], + let path = json["path"] as? String { + filePath = path + NSLog("[OpenClaw] Image uploaded to: %@", path) + } + semaphore.signal() + } + task.resume() + semaphore.wait() + return filePath + } + /// Send a task with image via WebSocket chat.send RPC. + /// Also uploads the image file to disk so the agent can access it. private func sendViaWebSocket( eventClient: OpenClawEventClient, task: String, imageBase64: String, toolName: String ) async -> ToolResult { - await withCheckedContinuation { continuation in + // Upload image to disk so agent can read/copy/save the file + let filePath = uploadImageFile(imageBase64) + let taskWithPath = filePath != nil ? "\(task)\n\n[image_file_path]\n\(filePath!)" : task + + return await withCheckedContinuation { continuation in eventClient.sendChatMessage( sessionKey: sessionKey, - message: task, + message: taskWithPath, imageBase64: imageBase64 ) { [weak self] reply in guard let self else { diff --git a/samples/CameraAccess/scripts/upload_server.py b/samples/CameraAccess/scripts/upload_server.py new file mode 100644 index 00000000..d07bdd24 --- /dev/null +++ b/samples/CameraAccess/scripts/upload_server.py @@ -0,0 +1,65 @@ +#!/usr/bin/env python3 +""" +Tiny image upload server for VisionClaw. +Accepts JPEG uploads and saves them to ~/.openclaw/media/visionclaw/. +Returns the file path so the agent can read/copy/upload the file. + +Usage: python3 upload_server.py [port] +Default port: 18792 +""" + +import os +import sys +import json +import time +from http.server import HTTPServer, BaseHTTPRequestHandler +from pathlib import Path + +SAVE_DIR = Path.home() / ".openclaw" / "media" / "visionclaw" +PORT = int(sys.argv[1]) if len(sys.argv) > 1 else 18792 + +SAVE_DIR.mkdir(parents=True, exist_ok=True) + +class UploadHandler(BaseHTTPRequestHandler): + def do_POST(self): + if self.path != "/upload": + self.send_response(404) + self.end_headers() + return + + content_length = int(self.headers.get("Content-Length", 0)) + if content_length == 0 or content_length > 10 * 1024 * 1024: # 10MB max + self.send_response(400) + self.end_headers() + self.wfile.write(b'{"error":"invalid size"}') + return + + body = self.rfile.read(content_length) + filename = f"frame-{int(time.time() * 1000)}.jpg" + filepath = SAVE_DIR / filename + filepath.write_bytes(body) + + response = json.dumps({"path": str(filepath), "size": len(body)}) + self.send_response(200) + self.send_header("Content-Type", "application/json") + self.end_headers() + self.wfile.write(response.encode()) + print(f"Saved: {filepath} ({len(body)} bytes)") + + def do_GET(self): + self.send_response(200) + self.send_header("Content-Type", "application/json") + self.end_headers() + self.wfile.write(b'{"status":"ok","service":"visionclaw-upload"}') + + def log_message(self, format, *args): + pass # suppress default logs + +if __name__ == "__main__": + server = HTTPServer(("0.0.0.0", PORT), UploadHandler) + print(f"VisionClaw upload server listening on port {PORT}") + print(f"Saving to: {SAVE_DIR}") + try: + server.serve_forever() + except KeyboardInterrupt: + print("\nStopped") diff --git a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/OpenClawBridge.kt b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/OpenClawBridge.kt index 0935de53..cc32c160 100644 --- a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/OpenClawBridge.kt +++ b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/OpenClawBridge.kt @@ -271,9 +271,43 @@ class OpenClawBridge { } } + /** + * Upload JPEG to the upload server so the agent can access the file on disk. + * Returns the saved file path, or null if upload fails. + */ + private fun uploadImageFile(imageBase64: String): String? { + val uploadPort = GeminiConfig.openClawPort + 3 // upload server runs on gateway port + 3 + val host = GeminiConfig.openClawHost.trimEnd('/') + val url = "$host:$uploadPort/upload" + return try { + val jpegBytes = android.util.Base64.decode(imageBase64, android.util.Base64.NO_WRAP) + val request = Request.Builder() + .url(url) + .post(jpegBytes.toRequestBody("image/jpeg".toMediaType())) + .build() + val response = pingClient.newCall(request).execute() + val body = response.body?.string() ?: "" + response.close() + if (response.code in 200..299) { + val json = JSONObject(body) + val path = json.optString("path", "") + if (path.isNotEmpty()) { + Log.d(TAG, "Image uploaded to: $path") + path + } else null + } else { + Log.w(TAG, "Image upload HTTP ${response.code}") + null + } + } catch (e: Exception) { + Log.w(TAG, "Image upload failed: ${e.message}") + null + } + } + /** * Send a task with image via WebSocket chat.send RPC. - * This is the only method that reliably passes images to the OpenClaw agent. + * Also uploads the image file to disk so the agent can access it. */ private suspend fun sendViaWebSocket( eventClient: OpenClawEventClient, @@ -281,9 +315,15 @@ class OpenClawBridge { imageBase64: String, toolName: String ): ToolResult = suspendCancellableCoroutine { continuation -> + // Upload image to disk so agent can read/copy/save the file + val filePath = uploadImageFile(imageBase64) + val taskWithPath = if (filePath != null) { + "$task\n\n[image_file_path]\n$filePath" + } else task + eventClient.sendChatMessage( sessionKey = sessionKey, - message = task, + message = taskWithPath, imageBase64 = imageBase64 ) { reply -> if (reply != null) { From be4a5fe68fff9ca6aefe5804e5fcafcfb48ed889 Mon Sep 17 00:00:00 2001 From: "Xiaoan (Sean) Liu" Date: Fri, 27 Mar 2026 14:21:18 -0600 Subject: [PATCH 47/68] Fix upload port offset to +6 to avoid OpenClaw internal port conflict --- samples/CameraAccess/CameraAccess/OpenClaw/OpenClawBridge.swift | 2 +- .../externalsampleapps/cameraaccess/openclaw/OpenClawBridge.kt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/CameraAccess/CameraAccess/OpenClaw/OpenClawBridge.swift b/samples/CameraAccess/CameraAccess/OpenClaw/OpenClawBridge.swift index 47e650df..67697a0f 100644 --- a/samples/CameraAccess/CameraAccess/OpenClaw/OpenClawBridge.swift +++ b/samples/CameraAccess/CameraAccess/OpenClaw/OpenClawBridge.swift @@ -156,7 +156,7 @@ class OpenClawBridge: ObservableObject { /// Upload JPEG to the upload server so the agent can access the file on disk. private func uploadImageFile(_ imageBase64: String) -> String? { - let uploadPort = GeminiConfig.openClawPort + 3 + let uploadPort = GeminiConfig.openClawPort + 6 // upload server runs on gateway port + 6 guard let url = URL(string: "\(GeminiConfig.openClawHost):\(uploadPort)/upload") else { return nil } guard let jpegData = Data(base64Encoded: imageBase64) else { return nil } diff --git a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/OpenClawBridge.kt b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/OpenClawBridge.kt index cc32c160..3d236117 100644 --- a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/OpenClawBridge.kt +++ b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/OpenClawBridge.kt @@ -276,7 +276,7 @@ class OpenClawBridge { * Returns the saved file path, or null if upload fails. */ private fun uploadImageFile(imageBase64: String): String? { - val uploadPort = GeminiConfig.openClawPort + 3 // upload server runs on gateway port + 3 + val uploadPort = GeminiConfig.openClawPort + 6 // upload server runs on gateway port + 6 val host = GeminiConfig.openClawHost.trimEnd('/') val url = "$host:$uploadPort/upload" return try { From 70105004f3b34bd2342dbffa614cd5e8f28ee2c3 Mon Sep 17 00:00:00 2001 From: "Xiaoan (Sean) Liu" Date: Fri, 27 Mar 2026 14:40:47 -0600 Subject: [PATCH 48/68] Finalize AI bubble after tool response so post-tool text goes into new bubble (iOS + Android) --- .../CameraAccess/Gemini/GeminiSessionViewModel.swift | 2 ++ .../cameraaccess/gemini/GeminiSessionViewModel.kt | 2 ++ 2 files changed, 4 insertions(+) diff --git a/samples/CameraAccess/CameraAccess/Gemini/GeminiSessionViewModel.swift b/samples/CameraAccess/CameraAccess/Gemini/GeminiSessionViewModel.swift index 1cea3e3a..a0dc3833 100644 --- a/samples/CameraAccess/CameraAccess/Gemini/GeminiSessionViewModel.swift +++ b/samples/CameraAccess/CameraAccess/Gemini/GeminiSessionViewModel.swift @@ -160,6 +160,8 @@ class GeminiSessionViewModel: ObservableObject { self.messages[idx].status = .complete } RemoteLogger.shared.log("voice:tool_result", data: ["tool": call.name, "result": String(response.prefix(500))]) + // Reset active bubbles so post-tool AI text goes into a new bubble + self.finalizeCurrentBubbles() self.geminiService.sendToolResponse(response) } } diff --git a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/GeminiSessionViewModel.kt b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/GeminiSessionViewModel.kt index 12bf949c..f521a40c 100644 --- a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/GeminiSessionViewModel.kt +++ b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/GeminiSessionViewModel.kt @@ -297,6 +297,8 @@ class GeminiSessionViewModel(app: Application) : AndroidViewModel(app) { if (it.id == toolMsg.id) it.copy(text = "Done", status = ChatMessageStatus.Complete) else it } _uiState.value = _uiState.value.copy(messages = updated) + // Reset active bubbles so post-tool AI text goes into a new bubble + finalizeCurrentBubbles() geminiService.sendToolResponse(response) } } From 76bbcea342689768f17e5d5340e2414cb27489c8 Mon Sep 17 00:00:00 2001 From: "Xiaoan (Sean) Liu" Date: Fri, 27 Mar 2026 14:43:56 -0600 Subject: [PATCH 49/68] Switch execute to NON_BLOCKING with INTERRUPT scheduling for async agent responses (iOS + Android) --- .../CameraAccess/CameraAccess/OpenClaw/ToolCallModels.swift | 2 +- .../CameraAccess/CameraAccess/OpenClaw/ToolCallRouter.swift | 2 +- .../cameraaccess/openclaw/ToolCallModels.kt | 2 +- .../cameraaccess/openclaw/ToolCallRouter.kt | 4 +++- 4 files changed, 6 insertions(+), 4 deletions(-) diff --git a/samples/CameraAccess/CameraAccess/OpenClaw/ToolCallModels.swift b/samples/CameraAccess/CameraAccess/OpenClaw/ToolCallModels.swift index 6515bf37..4130e720 100644 --- a/samples/CameraAccess/CameraAccess/OpenClaw/ToolCallModels.swift +++ b/samples/CameraAccess/CameraAccess/OpenClaw/ToolCallModels.swift @@ -120,6 +120,6 @@ enum ToolDeclarations { ], "required": ["task"] ] as [String: Any], - "behavior": "BLOCKING" + "behavior": "NON_BLOCKING" ] } diff --git a/samples/CameraAccess/CameraAccess/OpenClaw/ToolCallRouter.swift b/samples/CameraAccess/CameraAccess/OpenClaw/ToolCallRouter.swift index 8e09453d..2b9aaeaa 100644 --- a/samples/CameraAccess/CameraAccess/OpenClaw/ToolCallRouter.swift +++ b/samples/CameraAccess/CameraAccess/OpenClaw/ToolCallRouter.swift @@ -105,7 +105,7 @@ class ToolCallRouter { [ "id": callId, "name": name, - "response": result.responseValue + "response": result.responseValue.merging(["scheduling": "INTERRUPT"]) { _, new in new } ] ] ] diff --git a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/ToolCallModels.kt b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/ToolCallModels.kt index e8e93bcc..7136ce22 100644 --- a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/ToolCallModels.kt +++ b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/ToolCallModels.kt @@ -144,7 +144,7 @@ object ToolDeclarations { }) put("required", JSONArray().put("task")) }) - put("behavior", "BLOCKING") + put("behavior", "NON_BLOCKING") } } } diff --git a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/ToolCallRouter.kt b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/ToolCallRouter.kt index 926ced99..885ddbc9 100644 --- a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/ToolCallRouter.kt +++ b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/ToolCallRouter.kt @@ -147,7 +147,9 @@ class ToolCallRouter( JSONObject().apply { put("id", callId) put("name", name) - put("response", result.toJSON()) + put("response", result.toJSON().apply { + put("scheduling", "INTERRUPT") + }) } ) ) From 2f0fbba559f180a11a3acc44051a4355bc32e24b Mon Sep 17 00:00:00 2001 From: "Xiaoan (Sean) Liu" Date: Fri, 27 Mar 2026 14:46:18 -0600 Subject: [PATCH 50/68] Keep mic and transcription active during NON_BLOCKING tool execution (Android) --- .../cameraaccess/gemini/GeminiSessionViewModel.kt | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/GeminiSessionViewModel.kt b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/GeminiSessionViewModel.kt index f521a40c..a4baca17 100644 --- a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/GeminiSessionViewModel.kt +++ b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/GeminiSessionViewModel.kt @@ -95,6 +95,9 @@ class GeminiSessionViewModel(app: Application) : AndroidViewModel(app) { } private fun syncMicWithToolExecution(status: ToolCallStatus) { + // With NON_BLOCKING execute, keep mic on so user can keep talking + return + val executing = isToolExecuting(status) if (executing) { @@ -178,8 +181,7 @@ class GeminiSessionViewModel(app: Application) : AndroidViewModel(app) { } audioManager.onAudioCaptured = lambda@{ data -> - // execute 중에는 mic 입력을 Gemini로 보내지 않음 - if (isToolExecuting(_uiState.value.toolCallStatus)) return@lambda + // NON_BLOCKING: keep sending audio during tool execution // streamingMode == PHONE 일때 모델이 말하는동안에는 입력을 막음(기존 로직) if (streamingMode == StreamingMode.PHONE && geminiService.isModelSpeaking.value) return@lambda @@ -209,7 +211,7 @@ class GeminiSessionViewModel(app: Application) : AndroidViewModel(app) { } geminiService.onInputTranscription = input@{ text -> - if (isToolExecuting(_uiState.value.toolCallStatus)) return@input + // NON_BLOCKING: keep accepting input during tool execution val newTranscript = _uiState.value.userTranscript + text lastUserOriginalInstruction = newTranscript From 5f78a5f4596fd8defa61dc6c96492fbb9e78fdee Mon Sep 17 00:00:00 2001 From: "Xiaoan (Sean) Liu" Date: Fri, 27 Mar 2026 15:45:48 -0600 Subject: [PATCH 51/68] Upload photo to Mac on capture_photo so agent can access file for Drive/save tasks (iOS + Android) --- .../Gemini/GeminiSessionViewModel.swift | 12 +++++++++++- .../CameraAccess/OpenClaw/OpenClawBridge.swift | 2 +- .../gemini/GeminiSessionViewModel.kt | 17 ++++++++++++++++- .../cameraaccess/openclaw/OpenClawBridge.kt | 2 ++ 4 files changed, 30 insertions(+), 3 deletions(-) diff --git a/samples/CameraAccess/CameraAccess/Gemini/GeminiSessionViewModel.swift b/samples/CameraAccess/CameraAccess/Gemini/GeminiSessionViewModel.swift index a0dc3833..b5b9d1e4 100644 --- a/samples/CameraAccess/CameraAccess/Gemini/GeminiSessionViewModel.swift +++ b/samples/CameraAccess/CameraAccess/Gemini/GeminiSessionViewModel.swift @@ -127,7 +127,17 @@ class GeminiSessionViewModel: ObservableObject { } if let photo = self.photoCaptureStore.saveFrame(frame, description: description) { self.lastCapturedPhoto = photo - completion(.success("Photo captured and saved: \(photo.filename)")) + // Also upload to Mac so agent can access the file + if let jpegData = frame.jpegData(compressionQuality: 0.9) { + let base64 = jpegData.base64EncodedString() + if let macPath = self.openClawBridge.uploadImageFile(base64) { + completion(.success("Photo captured and saved: \(photo.filename)\nAlso saved on Mac at: \(macPath)")) + } else { + completion(.success("Photo captured and saved: \(photo.filename)")) + } + } else { + completion(.success("Photo captured and saved: \(photo.filename)")) + } } else { completion(.failure("Failed to save photo")) } diff --git a/samples/CameraAccess/CameraAccess/OpenClaw/OpenClawBridge.swift b/samples/CameraAccess/CameraAccess/OpenClaw/OpenClawBridge.swift index 67697a0f..befca4c1 100644 --- a/samples/CameraAccess/CameraAccess/OpenClaw/OpenClawBridge.swift +++ b/samples/CameraAccess/CameraAccess/OpenClaw/OpenClawBridge.swift @@ -155,7 +155,7 @@ class OpenClawBridge: ObservableObject { } /// Upload JPEG to the upload server so the agent can access the file on disk. - private func uploadImageFile(_ imageBase64: String) -> String? { + func uploadImageFile(_ imageBase64: String) -> String? { let uploadPort = GeminiConfig.openClawPort + 6 // upload server runs on gateway port + 6 guard let url = URL(string: "\(GeminiConfig.openClawHost):\(uploadPort)/upload") else { return nil } guard let jpegData = Data(base64Encoded: imageBase64) else { return nil } diff --git a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/GeminiSessionViewModel.kt b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/GeminiSessionViewModel.kt index a4baca17..e50e9d2a 100644 --- a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/GeminiSessionViewModel.kt +++ b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/GeminiSessionViewModel.kt @@ -260,7 +260,22 @@ class GeminiSessionViewModel(app: Application) : AndroidViewModel(app) { val photo = PhotoCaptureStore.saveFrame(getApplication(), frame, description) if (photo != null) { _captureEvent.value = photo - completion(ToolResult.Success("Photo captured and saved: ${photo.filename}")) + // Also upload to Mac so agent can access the file + viewModelScope.launch { + try { + val baos = java.io.ByteArrayOutputStream() + frame.compress(android.graphics.Bitmap.CompressFormat.JPEG, 90, baos) + val base64 = android.util.Base64.encodeToString(baos.toByteArray(), android.util.Base64.NO_WRAP) + val macPath = openClawBridge.uploadImageFilePublic(base64) + if (macPath != null) { + completion(ToolResult.Success("Photo captured and saved: ${photo.filename}\nAlso saved on Mac at: $macPath")) + } else { + completion(ToolResult.Success("Photo captured and saved: ${photo.filename}")) + } + } catch (e: Exception) { + completion(ToolResult.Success("Photo captured and saved: ${photo.filename}")) + } + } } else { completion(ToolResult.Failure("Failed to save photo")) } diff --git a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/OpenClawBridge.kt b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/OpenClawBridge.kt index 3d236117..55c462b6 100644 --- a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/OpenClawBridge.kt +++ b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/OpenClawBridge.kt @@ -275,6 +275,8 @@ class OpenClawBridge { * Upload JPEG to the upload server so the agent can access the file on disk. * Returns the saved file path, or null if upload fails. */ + fun uploadImageFilePublic(imageBase64: String): String? = uploadImageFile(imageBase64) + private fun uploadImageFile(imageBase64: String): String? { val uploadPort = GeminiConfig.openClawPort + 6 // upload server runs on gateway port + 6 val host = GeminiConfig.openClawHost.trimEnd('/') From 283466eb6d2a36c4600863ec266950ab4a899541 Mon Sep 17 00:00:00 2001 From: "Xiaoan (Sean) Liu" Date: Fri, 27 Mar 2026 15:49:23 -0600 Subject: [PATCH 52/68] Fix capture_photo upload race condition by running on IO dispatcher --- .../gemini/GeminiSessionViewModel.kt | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/GeminiSessionViewModel.kt b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/GeminiSessionViewModel.kt index e50e9d2a..70b67f4b 100644 --- a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/GeminiSessionViewModel.kt +++ b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/GeminiSessionViewModel.kt @@ -261,18 +261,16 @@ class GeminiSessionViewModel(app: Application) : AndroidViewModel(app) { if (photo != null) { _captureEvent.value = photo // Also upload to Mac so agent can access the file - viewModelScope.launch { - try { + viewModelScope.launch(kotlinx.coroutines.Dispatchers.IO) { + val macPath = try { val baos = java.io.ByteArrayOutputStream() frame.compress(android.graphics.Bitmap.CompressFormat.JPEG, 90, baos) val base64 = android.util.Base64.encodeToString(baos.toByteArray(), android.util.Base64.NO_WRAP) - val macPath = openClawBridge.uploadImageFilePublic(base64) - if (macPath != null) { - completion(ToolResult.Success("Photo captured and saved: ${photo.filename}\nAlso saved on Mac at: $macPath")) - } else { - completion(ToolResult.Success("Photo captured and saved: ${photo.filename}")) - } - } catch (e: Exception) { + openClawBridge.uploadImageFilePublic(base64) + } catch (e: Exception) { null } + if (macPath != null) { + completion(ToolResult.Success("Photo captured and saved: ${photo.filename}\nAlso saved on Mac at: $macPath")) + } else { completion(ToolResult.Success("Photo captured and saved: ${photo.filename}")) } } From 727c2bb1ee2c0670bb486c44fee47c9f433c1837 Mon Sep 17 00:00:00 2001 From: "Xiaoan (Sean) Liu" Date: Sat, 28 Mar 2026 13:42:16 -0600 Subject: [PATCH 53/68] 1 --- .../cameraaccess/chat/ChatHistoryStore.kt | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/chat/ChatHistoryStore.kt b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/chat/ChatHistoryStore.kt index c9c04741..40a37e5a 100644 --- a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/chat/ChatHistoryStore.kt +++ b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/chat/ChatHistoryStore.kt @@ -37,12 +37,16 @@ object ChatHistoryStore { val messages = mutableListOf() for (i in 0 until json.length()) { val obj = json.getJSONObject(i) + val rawStatus = obj.optString("status", "complete") + val text = obj.optString("text", "") + // Fix stale "Executing..." messages from interrupted sessions + val fixedText = if (rawStatus == "streaming" && text == "Executing...") "Cancelled" else text messages.add(ChatMessage( id = obj.getString("id"), role = deserializeRole(obj.getString("role")), - text = obj.optString("text", ""), + text = fixedText, timestamp = obj.getLong("timestamp"), - status = deserializeStatus(obj.optString("status", "complete")), + status = deserializeStatus(rawStatus), )) } Log.d(TAG, "Loaded ${messages.size} messages") From 9e7667f3c8a8fbd2b781a0da30c642dbcd1a6b49 Mon Sep 17 00:00:00 2001 From: Ryo Suzuki Date: Sun, 24 May 2026 09:42:10 +0900 Subject: [PATCH 54/68] Update Android OpenClaw websocket protocol --- .../cameraaccess/openclaw/OpenClawEventClient.kt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/OpenClawEventClient.kt b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/OpenClawEventClient.kt index 8b14bfb8..234c10e0 100644 --- a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/OpenClawEventClient.kt +++ b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/OpenClawEventClient.kt @@ -184,8 +184,8 @@ class OpenClawEventClient { put("id", UUID.randomUUID().toString()) put("method", "connect") put("params", JSONObject().apply { - put("minProtocol", 3) - put("maxProtocol", 3) + put("minProtocol", 4) + put("maxProtocol", 4) put("client", JSONObject().apply { put("id", "gateway-client") put("displayName", "VisionClaw Glass") From aa648b8b3412801259b694db09821500d551d5c4 Mon Sep 17 00:00:00 2001 From: Ryo Suzuki Date: Sun, 24 May 2026 09:51:31 +0900 Subject: [PATCH 55/68] Add Android demo speaker mode --- .../cameraaccess/gemini/AudioManager.kt | 106 ++++++++++++------ .../cameraaccess/settings/SettingsManager.kt | 4 + .../cameraaccess/ui/SettingsScreen.kt | 25 +++++ 3 files changed, 102 insertions(+), 33 deletions(-) diff --git a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/AudioManager.kt b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/AudioManager.kt index 5cc36b7e..715d9575 100644 --- a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/AudioManager.kt +++ b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/AudioManager.kt @@ -10,6 +10,7 @@ import android.media.AudioTrack import android.media.MediaRecorder import android.os.Build import android.util.Log +import com.meta.wearable.dat.externalsampleapps.cameraaccess.settings.SettingsManager import java.io.ByteArrayOutputStream class AudioManager(private val appContext: Context) { @@ -59,40 +60,49 @@ class AudioManager(private val appContext: Context) { if (isCapturing) return val sysAm = appContext.getSystemService(Context.AUDIO_SERVICE) as android.media.AudioManager + val demoSpeakerMode = SettingsManager.demoSpeakerModeEnabled - // ✅ BT 마이크가 있으면 그걸 우선 사용, 없으면 폰 마이크로 폴백 - preferredBtDevice = findBluetoothInputDeviceOrNull() - - if (preferredBtDevice != null) { - // 통화 모드로 전환 (SCO 입력 안정화에 도움) - sysAm.mode = android.media.AudioManager.MODE_IN_COMMUNICATION + if (demoSpeakerMode) { + sysAm.mode = android.media.AudioManager.MODE_NORMAL + commDeviceSet = false + scoStarted = false + preferredBtDevice = null + Log.d(TAG, "Demo speaker mode enabled -> use phone mic/media speaker route") + } else { + // ✅ BT 마이크가 있으면 그걸 우선 사용, 없으면 폰 마이크로 폴백 + preferredBtDevice = findBluetoothInputDeviceOrNull() + + if (preferredBtDevice != null) { + // 통화 모드로 전환 (SCO 입력 안정화에 도움) + sysAm.mode = android.media.AudioManager.MODE_IN_COMMUNICATION + + // Android 12+ : communication device 선택 시도 (실패해도 폴백 가능) + if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.S) { + try { + commDeviceSet = sysAm.setCommunicationDevice(preferredBtDevice!!) + Log.d(TAG, "setCommunicationDevice(BT) = $commDeviceSet, dev=${preferredBtDevice?.productName}") + } catch (t: Throwable) { + commDeviceSet = false + Log.w(TAG, "setCommunicationDevice failed: ${t.message}") + } + } - // Android 12+ : communication device 선택 시도 (실패해도 폴백 가능) - if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.S) { + // 구형/일부 기기 fallback: SCO 시작 (BT 없으면 시작하지 않음) try { - commDeviceSet = sysAm.setCommunicationDevice(preferredBtDevice!!) - Log.d(TAG, "setCommunicationDevice(BT) = $commDeviceSet, dev=${preferredBtDevice?.productName}") + sysAm.startBluetoothSco() + sysAm.isBluetoothScoOn = true + scoStarted = true + Log.d(TAG, "Bluetooth SCO started") } catch (t: Throwable) { - commDeviceSet = false - Log.w(TAG, "setCommunicationDevice failed: ${t.message}") + scoStarted = false + Log.w(TAG, "startBluetoothSco failed: ${t.message}") } - } - - // 구형/일부 기기 fallback: SCO 시작 (BT 없으면 시작하지 않음) - try { - sysAm.startBluetoothSco() - sysAm.isBluetoothScoOn = true - scoStarted = true - Log.d(TAG, "Bluetooth SCO started") - } catch (t: Throwable) { + } else { + // ✅ BT가 없으면 강제 라우팅/모드 변경 안 함 (그냥 폰 마이크) + commDeviceSet = false scoStarted = false - Log.w(TAG, "startBluetoothSco failed: ${t.message}") + Log.d(TAG, "No BT mic -> fallback to phone mic") } - } else { - // ✅ BT가 없으면 강제 라우팅/모드 변경 안 함 (그냥 폰 마이크) - commDeviceSet = false - scoStarted = false - Log.d(TAG, "No BT mic -> fallback to phone mic") } val bufferSize = AudioRecord.getMinBufferSize( @@ -102,17 +112,18 @@ class AudioManager(private val appContext: Context) { ) audioRecord = AudioRecord( - MediaRecorder.AudioSource.VOICE_COMMUNICATION, + if (demoSpeakerMode) MediaRecorder.AudioSource.MIC else MediaRecorder.AudioSource.VOICE_COMMUNICATION, GeminiConfig.INPUT_AUDIO_SAMPLE_RATE, AudioFormat.CHANNEL_IN_MONO, AudioFormat.ENCODING_PCM_16BIT, bufferSize ) - preferredBtDevice?.let { dev -> + val preferredInputDevice = if (demoSpeakerMode) findBuiltInMicOrNull() else preferredBtDevice + preferredInputDevice?.let { dev -> try { val ok = audioRecord?.setPreferredDevice(dev) == true - Log.d(TAG, "AudioRecord.setPreferredDevice(BT) ok=$ok dev=${dev.productName}") + Log.d(TAG, "AudioRecord.setPreferredDevice ok=$ok dev=${dev.productName}") } catch (t: Throwable) { Log.w(TAG, "setPreferredDevice failed: ${t.message}") } @@ -124,7 +135,13 @@ class AudioManager(private val appContext: Context) { audioTrack = AudioTrack.Builder() .setAudioAttributes( AudioAttributes.Builder() - .setUsage(AudioAttributes.USAGE_VOICE_COMMUNICATION) + .setUsage( + if (demoSpeakerMode) { + AudioAttributes.USAGE_MEDIA + } else { + AudioAttributes.USAGE_VOICE_COMMUNICATION + } + ) .setContentType(AudioAttributes.CONTENT_TYPE_SPEECH) .build() ) @@ -145,6 +162,17 @@ class AudioManager(private val appContext: Context) { ) .build() + if (demoSpeakerMode) { + findBuiltInSpeakerOrNull()?.let { dev -> + try { + val ok = audioTrack?.setPreferredDevice(dev) == true + Log.d(TAG, "AudioTrack.setPreferredDevice(speaker) ok=$ok dev=${dev.productName}") + } catch (t: Throwable) { + Log.w(TAG, "setPreferredDevice(speaker) failed: ${t.message}") + } + } + } + audioRecord?.startRecording() audioTrack?.play() isCapturing = true @@ -186,7 +214,7 @@ class AudioManager(private val appContext: Context) { "audio-capture" ).also { it.start() } - Log.d(TAG, "Audio capture started (16kHz mono PCM16)") + Log.d(TAG, "Audio capture started (16kHz mono PCM16, demoSpeakerMode=$demoSpeakerMode)") } private fun findBluetoothInputDeviceOrNull(): AudioDeviceInfo? { @@ -206,6 +234,18 @@ class AudioManager(private val appContext: Context) { return null } + private fun findBuiltInMicOrNull(): AudioDeviceInfo? { + val sysAm = appContext.getSystemService(Context.AUDIO_SERVICE) as android.media.AudioManager + val inputs = sysAm.getDevices(android.media.AudioManager.GET_DEVICES_INPUTS) + return inputs.firstOrNull { it.type == AudioDeviceInfo.TYPE_BUILTIN_MIC } + } + + private fun findBuiltInSpeakerOrNull(): AudioDeviceInfo? { + val sysAm = appContext.getSystemService(Context.AUDIO_SERVICE) as android.media.AudioManager + val outputs = sysAm.getDevices(android.media.AudioManager.GET_DEVICES_OUTPUTS) + return outputs.firstOrNull { it.type == AudioDeviceInfo.TYPE_BUILTIN_SPEAKER } + } + fun playAudio(data: ByteArray) { if (!isCapturing || data.isEmpty()) return audioTrack?.write(data, 0, data.size) @@ -269,4 +309,4 @@ class AudioManager(private val appContext: Context) { Log.d(TAG, "Audio capture stopped") } -} \ No newline at end of file +} diff --git a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/settings/SettingsManager.kt b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/settings/SettingsManager.kt index f73bbe35..e86724e3 100644 --- a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/settings/SettingsManager.kt +++ b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/settings/SettingsManager.kt @@ -52,6 +52,10 @@ object SettingsManager { get() = prefs.getBoolean("proactiveNotificationsEnabled", true) set(value) = prefs.edit().putBoolean("proactiveNotificationsEnabled", value).apply() + var demoSpeakerModeEnabled: Boolean + get() = prefs.getBoolean("demoSpeakerModeEnabled", false) + set(value) = prefs.edit().putBoolean("demoSpeakerModeEnabled", value).apply() + fun resetAll() { prefs.edit().clear().apply() } diff --git a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/ui/SettingsScreen.kt b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/ui/SettingsScreen.kt index b41173a6..27f6171d 100644 --- a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/ui/SettingsScreen.kt +++ b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/ui/SettingsScreen.kt @@ -54,6 +54,7 @@ fun SettingsScreen( var webrtcSignalingURL by remember { mutableStateOf(SettingsManager.webrtcSignalingURL) } var videoStreamingEnabled by remember { mutableStateOf(SettingsManager.videoStreamingEnabled) } var proactiveNotificationsEnabled by remember { mutableStateOf(SettingsManager.proactiveNotificationsEnabled) } + var demoSpeakerModeEnabled by remember { mutableStateOf(SettingsManager.demoSpeakerModeEnabled) } var showResetDialog by remember { mutableStateOf(false) } fun save() { @@ -66,6 +67,7 @@ fun SettingsScreen( SettingsManager.webrtcSignalingURL = webrtcSignalingURL.trim() SettingsManager.videoStreamingEnabled = videoStreamingEnabled SettingsManager.proactiveNotificationsEnabled = proactiveNotificationsEnabled + SettingsManager.demoSpeakerModeEnabled = demoSpeakerModeEnabled } fun reload() { @@ -78,6 +80,7 @@ fun SettingsScreen( webrtcSignalingURL = SettingsManager.webrtcSignalingURL videoStreamingEnabled = SettingsManager.videoStreamingEnabled proactiveNotificationsEnabled = SettingsManager.proactiveNotificationsEnabled + demoSpeakerModeEnabled = SettingsManager.demoSpeakerModeEnabled } Column(modifier = modifier.fillMaxSize()) { @@ -129,6 +132,28 @@ fun SettingsScreen( ) } + SectionHeader("Audio") + Row( + modifier = Modifier.fillMaxWidth(), + horizontalArrangement = Arrangement.SpaceBetween, + verticalAlignment = Alignment.CenterVertically, + ) { + Column(modifier = Modifier.weight(1f)) { + Text("Demo speaker mode") + Spacer(modifier = Modifier.height(4.dp)) + Text( + "Routes Gemini audio as normal media to the phone speaker so scrcpy can mirror it to the Mac.", + style = MaterialTheme.typography.bodySmall, + color = MaterialTheme.colorScheme.onSurfaceVariant, + ) + } + Spacer(modifier = Modifier.width(12.dp)) + Switch( + checked = demoSpeakerModeEnabled, + onCheckedChange = { demoSpeakerModeEnabled = it }, + ) + } + // Gemini section SectionHeader("Gemini API") MonoTextField( From bd8e27e1493525f5ae76f48879c6cc300a356de7 Mon Sep 17 00:00:00 2001 From: Ryo Suzuki Date: Sun, 24 May 2026 10:00:30 +0900 Subject: [PATCH 56/68] Use phone-style voice processing in demo audio mode --- .../cameraaccess/gemini/AudioManager.kt | 68 +++++++++++++++---- 1 file changed, 54 insertions(+), 14 deletions(-) diff --git a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/AudioManager.kt b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/AudioManager.kt index 715d9575..c6ae2929 100644 --- a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/AudioManager.kt +++ b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/AudioManager.kt @@ -8,6 +8,9 @@ import android.media.AudioFormat import android.media.AudioRecord import android.media.AudioTrack import android.media.MediaRecorder +import android.media.audiofx.AcousticEchoCanceler +import android.media.audiofx.AutomaticGainControl +import android.media.audiofx.NoiseSuppressor import android.os.Build import android.util.Log import com.meta.wearable.dat.externalsampleapps.cameraaccess.settings.SettingsManager @@ -23,6 +26,9 @@ class AudioManager(private val appContext: Context) { private var audioRecord: AudioRecord? = null private var audioTrack: AudioTrack? = null + private var echoCanceler: AcousticEchoCanceler? = null + private var noiseSuppressor: NoiseSuppressor? = null + private var automaticGainControl: AutomaticGainControl? = null private var captureThread: Thread? = null @Volatile @@ -63,11 +69,11 @@ class AudioManager(private val appContext: Context) { val demoSpeakerMode = SettingsManager.demoSpeakerModeEnabled if (demoSpeakerMode) { - sysAm.mode = android.media.AudioManager.MODE_NORMAL + sysAm.mode = android.media.AudioManager.MODE_IN_COMMUNICATION commDeviceSet = false scoStarted = false preferredBtDevice = null - Log.d(TAG, "Demo speaker mode enabled -> use phone mic/media speaker route") + Log.d(TAG, "Demo speaker mode enabled -> use phone-style communication input without BT SCO") } else { // ✅ BT 마이크가 있으면 그걸 우선 사용, 없으면 폰 마이크로 폴백 preferredBtDevice = findBluetoothInputDeviceOrNull() @@ -112,7 +118,7 @@ class AudioManager(private val appContext: Context) { ) audioRecord = AudioRecord( - if (demoSpeakerMode) MediaRecorder.AudioSource.MIC else MediaRecorder.AudioSource.VOICE_COMMUNICATION, + MediaRecorder.AudioSource.VOICE_COMMUNICATION, GeminiConfig.INPUT_AUDIO_SAMPLE_RATE, AudioFormat.CHANNEL_IN_MONO, AudioFormat.ENCODING_PCM_16BIT, @@ -132,6 +138,10 @@ class AudioManager(private val appContext: Context) { val routed = audioRecord?.routedDevice Log.d(TAG, "AudioRecord routedDevice: type=${routed?.type} name=${routed?.productName}") + if (demoSpeakerMode) { + enableVoiceProcessing(audioRecord?.audioSessionId ?: 0) + } + audioTrack = AudioTrack.Builder() .setAudioAttributes( AudioAttributes.Builder() @@ -162,17 +172,6 @@ class AudioManager(private val appContext: Context) { ) .build() - if (demoSpeakerMode) { - findBuiltInSpeakerOrNull()?.let { dev -> - try { - val ok = audioTrack?.setPreferredDevice(dev) == true - Log.d(TAG, "AudioTrack.setPreferredDevice(speaker) ok=$ok dev=${dev.productName}") - } catch (t: Throwable) { - Log.w(TAG, "setPreferredDevice(speaker) failed: ${t.message}") - } - } - } - audioRecord?.startRecording() audioTrack?.play() isCapturing = true @@ -246,6 +245,46 @@ class AudioManager(private val appContext: Context) { return outputs.firstOrNull { it.type == AudioDeviceInfo.TYPE_BUILTIN_SPEAKER } } + private fun enableVoiceProcessing(audioSessionId: Int) { + if (audioSessionId == 0) return + + if (AcousticEchoCanceler.isAvailable()) { + try { + echoCanceler = AcousticEchoCanceler.create(audioSessionId)?.apply { enabled = true } + Log.d(TAG, "AcousticEchoCanceler enabled=${echoCanceler?.enabled}") + } catch (t: Throwable) { + Log.w(TAG, "AcousticEchoCanceler failed: ${t.message}") + } + } + + if (NoiseSuppressor.isAvailable()) { + try { + noiseSuppressor = NoiseSuppressor.create(audioSessionId)?.apply { enabled = true } + Log.d(TAG, "NoiseSuppressor enabled=${noiseSuppressor?.enabled}") + } catch (t: Throwable) { + Log.w(TAG, "NoiseSuppressor failed: ${t.message}") + } + } + + if (AutomaticGainControl.isAvailable()) { + try { + automaticGainControl = AutomaticGainControl.create(audioSessionId)?.apply { enabled = true } + Log.d(TAG, "AutomaticGainControl enabled=${automaticGainControl?.enabled}") + } catch (t: Throwable) { + Log.w(TAG, "AutomaticGainControl failed: ${t.message}") + } + } + } + + private fun releaseVoiceProcessing() { + echoCanceler?.release() + echoCanceler = null + noiseSuppressor?.release() + noiseSuppressor = null + automaticGainControl?.release() + automaticGainControl = null + } + fun playAudio(data: ByteArray) { if (!isCapturing || data.isEmpty()) return audioTrack?.write(data, 0, data.size) @@ -276,6 +315,7 @@ class AudioManager(private val appContext: Context) { } audioRecord?.stop() + releaseVoiceProcessing() audioRecord?.release() audioRecord = null From 30726e48ff0ef3c598f5c35892a95be09664ac95 Mon Sep 17 00:00:00 2001 From: Ryo Suzuki Date: Sun, 24 May 2026 11:17:35 +0900 Subject: [PATCH 57/68] Simplify Android launcher icon --- .../main/res/drawable/ic_visionclaw_mark.xml | 47 +++++++++++++++++++ .../res/mipmap-anydpi-v26/ic_launcher.xml | 3 +- .../app/src/main/res/values/colors.xml | 4 ++ 3 files changed, 53 insertions(+), 1 deletion(-) create mode 100644 samples/CameraAccessAndroid/app/src/main/res/drawable/ic_visionclaw_mark.xml create mode 100644 samples/CameraAccessAndroid/app/src/main/res/values/colors.xml diff --git a/samples/CameraAccessAndroid/app/src/main/res/drawable/ic_visionclaw_mark.xml b/samples/CameraAccessAndroid/app/src/main/res/drawable/ic_visionclaw_mark.xml new file mode 100644 index 00000000..3464da0a --- /dev/null +++ b/samples/CameraAccessAndroid/app/src/main/res/drawable/ic_visionclaw_mark.xml @@ -0,0 +1,47 @@ + + + + + + + + + + + + + diff --git a/samples/CameraAccessAndroid/app/src/main/res/mipmap-anydpi-v26/ic_launcher.xml b/samples/CameraAccessAndroid/app/src/main/res/mipmap-anydpi-v26/ic_launcher.xml index 6ed7537c..510fb4f5 100644 --- a/samples/CameraAccessAndroid/app/src/main/res/mipmap-anydpi-v26/ic_launcher.xml +++ b/samples/CameraAccessAndroid/app/src/main/res/mipmap-anydpi-v26/ic_launcher.xml @@ -1,4 +1,5 @@ - + + diff --git a/samples/CameraAccessAndroid/app/src/main/res/values/colors.xml b/samples/CameraAccessAndroid/app/src/main/res/values/colors.xml new file mode 100644 index 00000000..6f7c0161 --- /dev/null +++ b/samples/CameraAccessAndroid/app/src/main/res/values/colors.xml @@ -0,0 +1,4 @@ + + + #D8FF3D + From 0102acddb2a82963dd6982462105a82520fb13f2 Mon Sep 17 00:00:00 2001 From: Ryo Suzuki Date: Sun, 24 May 2026 11:19:58 +0900 Subject: [PATCH 58/68] Refine Android launcher icon contrast --- .../main/res/drawable/ic_visionclaw_mark.xml | 30 ++++++------ .../res/drawable/ic_visionclaw_monochrome.xml | 47 +++++++++++++++++++ .../res/mipmap-anydpi-v26/ic_launcher.xml | 1 + .../app/src/main/res/values/colors.xml | 2 +- 4 files changed, 64 insertions(+), 16 deletions(-) create mode 100644 samples/CameraAccessAndroid/app/src/main/res/drawable/ic_visionclaw_monochrome.xml diff --git a/samples/CameraAccessAndroid/app/src/main/res/drawable/ic_visionclaw_mark.xml b/samples/CameraAccessAndroid/app/src/main/res/drawable/ic_visionclaw_mark.xml index 3464da0a..b80be1d7 100644 --- a/samples/CameraAccessAndroid/app/src/main/res/drawable/ic_visionclaw_mark.xml +++ b/samples/CameraAccessAndroid/app/src/main/res/drawable/ic_visionclaw_mark.xml @@ -7,41 +7,41 @@ + android:strokeWidth="5" /> + android:strokeWidth="5" /> + android:strokeWidth="5" /> + android:strokeWidth="5" /> + android:strokeWidth="5" /> diff --git a/samples/CameraAccessAndroid/app/src/main/res/drawable/ic_visionclaw_monochrome.xml b/samples/CameraAccessAndroid/app/src/main/res/drawable/ic_visionclaw_monochrome.xml new file mode 100644 index 00000000..5048ce55 --- /dev/null +++ b/samples/CameraAccessAndroid/app/src/main/res/drawable/ic_visionclaw_monochrome.xml @@ -0,0 +1,47 @@ + + + + + + + + + + + + + diff --git a/samples/CameraAccessAndroid/app/src/main/res/mipmap-anydpi-v26/ic_launcher.xml b/samples/CameraAccessAndroid/app/src/main/res/mipmap-anydpi-v26/ic_launcher.xml index 510fb4f5..20fc149a 100644 --- a/samples/CameraAccessAndroid/app/src/main/res/mipmap-anydpi-v26/ic_launcher.xml +++ b/samples/CameraAccessAndroid/app/src/main/res/mipmap-anydpi-v26/ic_launcher.xml @@ -2,4 +2,5 @@ + diff --git a/samples/CameraAccessAndroid/app/src/main/res/values/colors.xml b/samples/CameraAccessAndroid/app/src/main/res/values/colors.xml index 6f7c0161..d64eddc5 100644 --- a/samples/CameraAccessAndroid/app/src/main/res/values/colors.xml +++ b/samples/CameraAccessAndroid/app/src/main/res/values/colors.xml @@ -1,4 +1,4 @@ - #D8FF3D + #0B0C0F From 5ebbeda32aa94d12b183890c85666bbdcdef5d99 Mon Sep 17 00:00:00 2001 From: Ryo Suzuki Date: Sun, 24 May 2026 11:21:25 +0900 Subject: [PATCH 59/68] Match launcher icon color to Glyph --- .../app/src/main/res/drawable/ic_visionclaw_mark.xml | 10 +++++----- .../app/src/main/res/values/colors.xml | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/samples/CameraAccessAndroid/app/src/main/res/drawable/ic_visionclaw_mark.xml b/samples/CameraAccessAndroid/app/src/main/res/drawable/ic_visionclaw_mark.xml index b80be1d7..5ffe9b41 100644 --- a/samples/CameraAccessAndroid/app/src/main/res/drawable/ic_visionclaw_mark.xml +++ b/samples/CameraAccessAndroid/app/src/main/res/drawable/ic_visionclaw_mark.xml @@ -8,7 +8,7 @@ @@ -16,7 +16,7 @@ @@ -24,7 +24,7 @@ @@ -32,7 +32,7 @@ @@ -40,7 +40,7 @@ diff --git a/samples/CameraAccessAndroid/app/src/main/res/values/colors.xml b/samples/CameraAccessAndroid/app/src/main/res/values/colors.xml index d64eddc5..83d166a8 100644 --- a/samples/CameraAccessAndroid/app/src/main/res/values/colors.xml +++ b/samples/CameraAccessAndroid/app/src/main/res/values/colors.xml @@ -1,4 +1,4 @@ - #0B0C0F + #F2EAD7 From 9b3c25c5005ec41122d0245a61bed95cf0e6e8c0 Mon Sep 17 00:00:00 2001 From: Ryo Suzuki Date: Sun, 24 May 2026 11:23:46 +0900 Subject: [PATCH 60/68] Use VisionClaw cover mark for launcher icon --- .../main/res/drawable/ic_visionclaw_mark.xml | 38 +++++-------------- .../res/drawable/ic_visionclaw_monochrome.xml | 36 ++++-------------- .../app/src/main/res/values/colors.xml | 2 +- 3 files changed, 18 insertions(+), 58 deletions(-) diff --git a/samples/CameraAccessAndroid/app/src/main/res/drawable/ic_visionclaw_mark.xml b/samples/CameraAccessAndroid/app/src/main/res/drawable/ic_visionclaw_mark.xml index 5ffe9b41..6647e327 100644 --- a/samples/CameraAccessAndroid/app/src/main/res/drawable/ic_visionclaw_mark.xml +++ b/samples/CameraAccessAndroid/app/src/main/res/drawable/ic_visionclaw_mark.xml @@ -7,41 +7,21 @@ + android:strokeWidth="7" /> - - + android:fillColor="#F9F6EC" + android:pathData="M61,20 L66,30 L77,31 L69,38 L71,49 L61,43 L51,49 L53,38 L45,31 L56,30 Z" /> + android:fillColor="#F9F6EC" + android:pathData="M82,33 L92,51 L72,51 Z" /> + android:fillColor="#F9F6EC" + android:pathData="M76,62 m-6,0 a6,6 0,1 0,12 0 a6,6 0,1 0,-12 0" /> diff --git a/samples/CameraAccessAndroid/app/src/main/res/drawable/ic_visionclaw_monochrome.xml b/samples/CameraAccessAndroid/app/src/main/res/drawable/ic_visionclaw_monochrome.xml index 5048ce55..f78437d4 100644 --- a/samples/CameraAccessAndroid/app/src/main/res/drawable/ic_visionclaw_monochrome.xml +++ b/samples/CameraAccessAndroid/app/src/main/res/drawable/ic_visionclaw_monochrome.xml @@ -7,41 +7,21 @@ + android:strokeWidth="7" /> - - + android:fillColor="#FFFFFFFF" + android:pathData="M61,20 L66,30 L77,31 L69,38 L71,49 L61,43 L51,49 L53,38 L45,31 L56,30 Z" /> + android:fillColor="#FFFFFFFF" + android:pathData="M82,33 L92,51 L72,51 Z" /> + android:fillColor="#FFFFFFFF" + android:pathData="M76,62 m-6,0 a6,6 0,1 0,12 0 a6,6 0,1 0,-12 0" /> diff --git a/samples/CameraAccessAndroid/app/src/main/res/values/colors.xml b/samples/CameraAccessAndroid/app/src/main/res/values/colors.xml index 83d166a8..c2484b5c 100644 --- a/samples/CameraAccessAndroid/app/src/main/res/values/colors.xml +++ b/samples/CameraAccessAndroid/app/src/main/res/values/colors.xml @@ -1,4 +1,4 @@ - #F2EAD7 + #75644B From 8b97cc83c16ee9defd5ffaddcb30742969c46c25 Mon Sep 17 00:00:00 2001 From: Ryo Suzuki Date: Sun, 24 May 2026 11:26:33 +0900 Subject: [PATCH 61/68] Restore compact glasses launcher icon --- .../main/res/drawable/ic_visionclaw_mark.xml | 46 +++++++++++++++---- .../res/drawable/ic_visionclaw_monochrome.xml | 44 ++++++++++++++---- .../app/src/main/res/values/colors.xml | 2 +- 3 files changed, 74 insertions(+), 18 deletions(-) diff --git a/samples/CameraAccessAndroid/app/src/main/res/drawable/ic_visionclaw_mark.xml b/samples/CameraAccessAndroid/app/src/main/res/drawable/ic_visionclaw_mark.xml index 6647e327..a612ed5f 100644 --- a/samples/CameraAccessAndroid/app/src/main/res/drawable/ic_visionclaw_mark.xml +++ b/samples/CameraAccessAndroid/app/src/main/res/drawable/ic_visionclaw_mark.xml @@ -5,23 +5,51 @@ android:viewportWidth="108" android:viewportHeight="108"> + + + + + android:strokeWidth="5" /> + android:fillColor="@android:color/transparent" + android:pathData="M49,55 C52,53 56,53 59,55" + android:strokeColor="#0B0C0F" + android:strokeLineCap="round" + android:strokeLineJoin="round" + android:strokeWidth="5" /> + android:fillColor="@android:color/transparent" + android:pathData="M27,55 L21,51" + android:strokeColor="#0B0C0F" + android:strokeLineCap="round" + android:strokeLineJoin="round" + android:strokeWidth="5" /> + android:fillColor="@android:color/transparent" + android:pathData="M81,55 L87,51" + android:strokeColor="#0B0C0F" + android:strokeLineCap="round" + android:strokeLineJoin="round" + android:strokeWidth="5" /> + + diff --git a/samples/CameraAccessAndroid/app/src/main/res/drawable/ic_visionclaw_monochrome.xml b/samples/CameraAccessAndroid/app/src/main/res/drawable/ic_visionclaw_monochrome.xml index f78437d4..75a08923 100644 --- a/samples/CameraAccessAndroid/app/src/main/res/drawable/ic_visionclaw_monochrome.xml +++ b/samples/CameraAccessAndroid/app/src/main/res/drawable/ic_visionclaw_monochrome.xml @@ -5,23 +5,51 @@ android:viewportWidth="108" android:viewportHeight="108"> + + + + + android:strokeWidth="5" /> + android:fillColor="@android:color/transparent" + android:pathData="M49,55 C52,53 56,53 59,55" + android:strokeColor="#FFFFFFFF" + android:strokeLineCap="round" + android:strokeLineJoin="round" + android:strokeWidth="5" /> + android:fillColor="@android:color/transparent" + android:pathData="M27,55 L21,51" + android:strokeColor="#FFFFFFFF" + android:strokeLineCap="round" + android:strokeLineJoin="round" + android:strokeWidth="5" /> + android:fillColor="@android:color/transparent" + android:pathData="M81,55 L87,51" + android:strokeColor="#FFFFFFFF" + android:strokeLineCap="round" + android:strokeLineJoin="round" + android:strokeWidth="5" /> + + diff --git a/samples/CameraAccessAndroid/app/src/main/res/values/colors.xml b/samples/CameraAccessAndroid/app/src/main/res/values/colors.xml index c2484b5c..83d166a8 100644 --- a/samples/CameraAccessAndroid/app/src/main/res/values/colors.xml +++ b/samples/CameraAccessAndroid/app/src/main/res/values/colors.xml @@ -1,4 +1,4 @@ - #75644B + #F2EAD7 From 39e29ba6cb1d0543666b42e1daccf1f86e237d19 Mon Sep 17 00:00:00 2001 From: Ryo Suzuki Date: Sun, 24 May 2026 11:28:15 +0900 Subject: [PATCH 62/68] Restore glasses icon size --- .../app/src/main/res/drawable/ic_visionclaw_mark.xml | 8 -------- .../src/main/res/drawable/ic_visionclaw_monochrome.xml | 8 -------- 2 files changed, 16 deletions(-) diff --git a/samples/CameraAccessAndroid/app/src/main/res/drawable/ic_visionclaw_mark.xml b/samples/CameraAccessAndroid/app/src/main/res/drawable/ic_visionclaw_mark.xml index a612ed5f..5ffe9b41 100644 --- a/samples/CameraAccessAndroid/app/src/main/res/drawable/ic_visionclaw_mark.xml +++ b/samples/CameraAccessAndroid/app/src/main/res/drawable/ic_visionclaw_mark.xml @@ -5,12 +5,6 @@ android:viewportWidth="108" android:viewportHeight="108"> - - - - diff --git a/samples/CameraAccessAndroid/app/src/main/res/drawable/ic_visionclaw_monochrome.xml b/samples/CameraAccessAndroid/app/src/main/res/drawable/ic_visionclaw_monochrome.xml index 75a08923..5048ce55 100644 --- a/samples/CameraAccessAndroid/app/src/main/res/drawable/ic_visionclaw_monochrome.xml +++ b/samples/CameraAccessAndroid/app/src/main/res/drawable/ic_visionclaw_monochrome.xml @@ -5,12 +5,6 @@ android:viewportWidth="108" android:viewportHeight="108"> - - - - From dbe7e66811e079ba99c1431ba89c8a8c9ddd61c4 Mon Sep 17 00:00:00 2001 From: Ryo Suzuki Date: Sun, 24 May 2026 11:29:24 +0900 Subject: [PATCH 63/68] Set launcher glasses icon to medium size --- .../app/src/main/res/drawable/ic_visionclaw_mark.xml | 8 ++++++++ .../src/main/res/drawable/ic_visionclaw_monochrome.xml | 8 ++++++++ 2 files changed, 16 insertions(+) diff --git a/samples/CameraAccessAndroid/app/src/main/res/drawable/ic_visionclaw_mark.xml b/samples/CameraAccessAndroid/app/src/main/res/drawable/ic_visionclaw_mark.xml index 5ffe9b41..9bce715a 100644 --- a/samples/CameraAccessAndroid/app/src/main/res/drawable/ic_visionclaw_mark.xml +++ b/samples/CameraAccessAndroid/app/src/main/res/drawable/ic_visionclaw_mark.xml @@ -5,6 +5,12 @@ android:viewportWidth="108" android:viewportHeight="108"> + + + + diff --git a/samples/CameraAccessAndroid/app/src/main/res/drawable/ic_visionclaw_monochrome.xml b/samples/CameraAccessAndroid/app/src/main/res/drawable/ic_visionclaw_monochrome.xml index 5048ce55..a76d281a 100644 --- a/samples/CameraAccessAndroid/app/src/main/res/drawable/ic_visionclaw_monochrome.xml +++ b/samples/CameraAccessAndroid/app/src/main/res/drawable/ic_visionclaw_monochrome.xml @@ -5,6 +5,12 @@ android:viewportWidth="108" android:viewportHeight="108"> + + + + From 83f0e8d6aba1004e1ab7877b375714c95eeeadda Mon Sep 17 00:00:00 2001 From: Ryo Suzuki Date: Sun, 24 May 2026 11:31:50 +0900 Subject: [PATCH 64/68] Use app glasses mark for launcher icon --- .../main/res/drawable/ic_visionclaw_mark.xml | 49 +++---------------- .../res/drawable/ic_visionclaw_monochrome.xml | 49 +++---------------- 2 files changed, 14 insertions(+), 84 deletions(-) diff --git a/samples/CameraAccessAndroid/app/src/main/res/drawable/ic_visionclaw_mark.xml b/samples/CameraAccessAndroid/app/src/main/res/drawable/ic_visionclaw_mark.xml index 9bce715a..9d808e32 100644 --- a/samples/CameraAccessAndroid/app/src/main/res/drawable/ic_visionclaw_mark.xml +++ b/samples/CameraAccessAndroid/app/src/main/res/drawable/ic_visionclaw_mark.xml @@ -6,50 +6,15 @@ android:viewportHeight="108"> + android:translateX="24" + android:translateY="24" + android:scaleX="2.5" + android:scaleY="2.5"> - - - - - - - - + android:fillColor="#0B0C0F" + android:fillType="evenOdd" + android:pathData="M9.8,8q-2.6,-0.8 -5.4,-0.6l-3.5,0.3a1,1 0,0 0,-0.9 1v1.1q0,0.8 0.7,1l0.9,3.4a4,4 0,0 0,3.1 2.3l0.4,0.1a5,5 0,0 0,5.4 -3.5l0.5,-2a1,1 0,0 1,2 0l0.6,2a5,5 0,0 0,5.3 3.5h0.4c1.3,-0.2 2.6,-1 3.1,-2.4a14,14 0,0 0,1 -3.4q0.5,-0.2 0.6,-1v-1q0,-1 -1,-1l-3.4,-0.4q-2.8,-0.2 -5.4,0.5l-0.4,0.1A7,7 0,0 1,10 8zM2.8,10.7a1,1 0,0 1,0.9 -1.2h0.8q2,-0.2 3.7,0 1,0.4 0.8,1.4l-0.5,1.6a3,3 0,0 1,-3.1 2.1L5,14.6q-1.1,-0.2 -1.5,-1.2l-0.4,-1zM21.3,10.7a1,1 0,0 0,-1 -1.2h-0.8q-2,-0.2 -3.7,0A1,1 0,0 0,15 11l0.5,1.6a3,3 0,0 0,3.1 2.1h0.4q1.1,-0.2 1.5,-1.2l0.4,-1z" /> diff --git a/samples/CameraAccessAndroid/app/src/main/res/drawable/ic_visionclaw_monochrome.xml b/samples/CameraAccessAndroid/app/src/main/res/drawable/ic_visionclaw_monochrome.xml index a76d281a..efacfd81 100644 --- a/samples/CameraAccessAndroid/app/src/main/res/drawable/ic_visionclaw_monochrome.xml +++ b/samples/CameraAccessAndroid/app/src/main/res/drawable/ic_visionclaw_monochrome.xml @@ -6,50 +6,15 @@ android:viewportHeight="108"> + android:translateX="24" + android:translateY="24" + android:scaleX="2.5" + android:scaleY="2.5"> - - - - - - - - + android:fillColor="#FFFFFFFF" + android:fillType="evenOdd" + android:pathData="M9.8,8q-2.6,-0.8 -5.4,-0.6l-3.5,0.3a1,1 0,0 0,-0.9 1v1.1q0,0.8 0.7,1l0.9,3.4a4,4 0,0 0,3.1 2.3l0.4,0.1a5,5 0,0 0,5.4 -3.5l0.5,-2a1,1 0,0 1,2 0l0.6,2a5,5 0,0 0,5.3 3.5h0.4c1.3,-0.2 2.6,-1 3.1,-2.4a14,14 0,0 0,1 -3.4q0.5,-0.2 0.6,-1v-1q0,-1 -1,-1l-3.4,-0.4q-2.8,-0.2 -5.4,0.5l-0.4,0.1A7,7 0,0 1,10 8zM2.8,10.7a1,1 0,0 1,0.9 -1.2h0.8q2,-0.2 3.7,0 1,0.4 0.8,1.4l-0.5,1.6a3,3 0,0 1,-3.1 2.1L5,14.6q-1.1,-0.2 -1.5,-1.2l-0.4,-1zM21.3,10.7a1,1 0,0 0,-1 -1.2h-0.8q-2,-0.2 -3.7,0A1,1 0,0 0,15 11l0.5,1.6a3,3 0,0 0,3.1 2.1h0.4q1.1,-0.2 1.5,-1.2l0.4,-1z" /> From 34aef0ca533796a2c476b643a46dcff52b4ebbd3 Mon Sep 17 00:00:00 2001 From: Ryo Suzuki Date: Sun, 24 May 2026 11:33:32 +0900 Subject: [PATCH 65/68] Lower launcher glasses mark slightly --- .../app/src/main/res/drawable/ic_visionclaw_mark.xml | 2 +- .../app/src/main/res/drawable/ic_visionclaw_monochrome.xml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/CameraAccessAndroid/app/src/main/res/drawable/ic_visionclaw_mark.xml b/samples/CameraAccessAndroid/app/src/main/res/drawable/ic_visionclaw_mark.xml index 9d808e32..9d88fb81 100644 --- a/samples/CameraAccessAndroid/app/src/main/res/drawable/ic_visionclaw_mark.xml +++ b/samples/CameraAccessAndroid/app/src/main/res/drawable/ic_visionclaw_mark.xml @@ -7,7 +7,7 @@ diff --git a/samples/CameraAccessAndroid/app/src/main/res/drawable/ic_visionclaw_monochrome.xml b/samples/CameraAccessAndroid/app/src/main/res/drawable/ic_visionclaw_monochrome.xml index efacfd81..ae916dc4 100644 --- a/samples/CameraAccessAndroid/app/src/main/res/drawable/ic_visionclaw_monochrome.xml +++ b/samples/CameraAccessAndroid/app/src/main/res/drawable/ic_visionclaw_monochrome.xml @@ -7,7 +7,7 @@ From 5437c43af19e5e3186c7a1ed19cf9f7a75b5ef19 Mon Sep 17 00:00:00 2001 From: Ryo Suzuki Date: Mon, 25 May 2026 10:11:00 +0900 Subject: [PATCH 66/68] Add OpenClaw progress speech and developer controls --- .../cameraaccess/gemini/AudioManager.kt | 82 ++++- .../cameraaccess/gemini/GeminiConfig.kt | 15 +- .../cameraaccess/gemini/GeminiLiveService.kt | 8 +- .../gemini/GeminiProgressSpeechService.kt | 168 ++++++++++ .../gemini/GeminiSessionViewModel.kt | 86 ++++- .../cameraaccess/openclaw/OpenClawBridge.kt | 134 ++++++-- .../openclaw/OpenClawEventClient.kt | 305 ++++++++++++++++++ .../cameraaccess/openclaw/ToolCallModels.kt | 6 +- .../cameraaccess/openclaw/ToolCallRouter.kt | 48 ++- .../cameraaccess/settings/SettingsManager.kt | 7 + .../cameraaccess/ui/CameraAccessScaffold.kt | 10 + .../cameraaccess/ui/SettingsScreen.kt | 45 +++ 12 files changed, 876 insertions(+), 38 deletions(-) create mode 100644 samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/GeminiProgressSpeechService.kt diff --git a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/AudioManager.kt b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/AudioManager.kt index c6ae2929..283065e4 100644 --- a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/AudioManager.kt +++ b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/AudioManager.kt @@ -15,6 +15,8 @@ import android.os.Build import android.util.Log import com.meta.wearable.dat.externalsampleapps.cameraaccess.settings.SettingsManager import java.io.ByteArrayOutputStream +import java.util.concurrent.Executors +import java.util.concurrent.RejectedExecutionException class AudioManager(private val appContext: Context) { companion object { @@ -30,10 +32,17 @@ class AudioManager(private val appContext: Context) { private var noiseSuppressor: NoiseSuppressor? = null private var automaticGainControl: AutomaticGainControl? = null private var captureThread: Thread? = null + private val playbackExecutor = Executors.newSingleThreadExecutor { runnable -> + Thread(runnable, "audio-playback").apply { isDaemon = true } + } + private val playbackLock = Any() @Volatile private var isCapturing = false + @Volatile + private var playbackGeneration = 0 + @Volatile private var micEnabled = true @@ -142,7 +151,7 @@ class AudioManager(private val appContext: Context) { enableVoiceProcessing(audioRecord?.audioSessionId ?: 0) } - audioTrack = AudioTrack.Builder() + val newAudioTrack = AudioTrack.Builder() .setAudioAttributes( AudioAttributes.Builder() .setUsage( @@ -173,7 +182,15 @@ class AudioManager(private val appContext: Context) { .build() audioRecord?.startRecording() - audioTrack?.play() + synchronized(playbackLock) { + playbackGeneration++ + audioTrack = newAudioTrack + try { + newAudioTrack.play() + } catch (t: Throwable) { + Log.w(TAG, "AudioTrack.play failed: ${t.message}") + } + } isCapturing = true synchronized(accumulateLock) { @@ -287,13 +304,48 @@ class AudioManager(private val appContext: Context) { fun playAudio(data: ByteArray) { if (!isCapturing || data.isEmpty()) return - audioTrack?.write(data, 0, data.size) + val generation = playbackGeneration + val chunk = data.copyOf() + try { + playbackExecutor.execute { + if (!isCapturing || generation != playbackGeneration) return@execute + synchronized(playbackLock) { + if (!isCapturing || generation != playbackGeneration) return@synchronized + val track = audioTrack ?: return@synchronized + try { + val written = track.write(chunk, 0, chunk.size) + if (written < 0) { + Log.w(TAG, "AudioTrack.write failed: $written") + } + } catch (t: Throwable) { + Log.w(TAG, "AudioTrack.write threw: ${t.message}") + } + } + } + } catch (t: RejectedExecutionException) { + Log.w(TAG, "Playback executor rejected audio: ${t.message}") + } } fun stopPlayback() { - audioTrack?.pause() - audioTrack?.flush() - audioTrack?.play() + val generation = playbackGeneration + try { + playbackExecutor.execute { + synchronized(playbackLock) { + if (generation != playbackGeneration) return@synchronized + val track = audioTrack ?: return@synchronized + try { + track.pause() + track.flush() + track.play() + } catch (t: Throwable) { + Log.w(TAG, "stopPlayback failed: ${t.message}") + } + } + } + } catch (t: RejectedExecutionException) { + Log.w(TAG, "Playback executor rejected stopPlayback: ${t.message}") + } } fun stopCapture() { @@ -319,9 +371,21 @@ class AudioManager(private val appContext: Context) { audioRecord?.release() audioRecord = null - audioTrack?.stop() - audioTrack?.release() - audioTrack = null + synchronized(playbackLock) { + playbackGeneration++ + val track = audioTrack + audioTrack = null + try { + track?.stop() + } catch (t: Throwable) { + Log.w(TAG, "AudioTrack.stop failed: ${t.message}") + } + try { + track?.release() + } catch (t: Throwable) { + Log.w(TAG, "AudioTrack.release failed: ${t.message}") + } + } val sysAm = appContext.getSystemService(Context.AUDIO_SERVICE) as android.media.AudioManager diff --git a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/GeminiConfig.kt b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/GeminiConfig.kt index 10ba908e..70a49cc7 100644 --- a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/GeminiConfig.kt +++ b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/GeminiConfig.kt @@ -16,7 +16,7 @@ object GeminiConfig { const val VIDEO_JPEG_QUALITY = 50 val systemInstruction: String - get() = SettingsManager.geminiSystemPrompt + get() = SettingsManager.geminiSystemPrompt.trimEnd() + TOOL_RESULT_ADDENDUM val apiKey: String get() = SettingsManager.geminiAPIKey @@ -45,4 +45,17 @@ object GeminiConfig { get() = openClawGatewayToken != "YOUR_OPENCLAW_GATEWAY_TOKEN" && openClawGatewayToken.isNotEmpty() && openClawHost != "http://YOUR_MAC_HOSTNAME.local" + + private const val TOOL_RESULT_ADDENDUM = """ + +-------------------------------- +TOOL RESULT HANDLING +-------------------------------- + +When execute returns a result, immediately answer the user using that result. +Do not end the turn with only an acknowledgment like "I'll check" or "確認します" after execute has returned. +If you did not manage to say the acknowledgment before calling execute, do not say it after the result arrives; use the result instead. +Keep the final answer concise and in the user's conversation language. +If the user's utterance contains Japanese, use Japanese for both the pre-tool acknowledgment and the final answer. +""" } diff --git a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/GeminiLiveService.kt b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/GeminiLiveService.kt index 715db0a8..28e56bca 100644 --- a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/GeminiLiveService.kt +++ b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/GeminiLiveService.kt @@ -209,8 +209,9 @@ class GeminiLiveService { fun sendToolResponse(response: JSONObject) { sendExecutor.execute { - Log.d("GeminiWS", "SEND_TOOL: " + response.toString().take(300)) - webSocket?.send(response.toString()) + val payload = response.toString() + val sent = webSocket?.send(payload) ?: false + Log.d("GeminiWS", "SEND_TOOL sent=$sent bytes=${payload.length}: ${payload.take(300)}") } } @@ -225,6 +226,7 @@ class GeminiLiveService { put("text", text) })) })) + put("turnComplete", true) }) } webSocket?.send(json.toString()) @@ -337,6 +339,7 @@ class GeminiLiveService { val serverContent = json.getJSONObject("serverContent") if (serverContent.optBoolean("interrupted", false)) { + Log.d(TAG, "serverContent interrupted") _isModelSpeaking.value = false onInterrupted?.invoke() return @@ -374,6 +377,7 @@ class GeminiLiveService { } if (serverContent.optBoolean("turnComplete", false)) { + Log.d(TAG, "serverContent turnComplete") _isModelSpeaking.value = false responseLatencyLogged = false onTurnComplete?.invoke() diff --git a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/GeminiProgressSpeechService.kt b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/GeminiProgressSpeechService.kt new file mode 100644 index 00000000..cea3372b --- /dev/null +++ b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/GeminiProgressSpeechService.kt @@ -0,0 +1,168 @@ +package com.meta.wearable.dat.externalsampleapps.cameraaccess.gemini + +import android.util.Base64 +import android.util.Log +import java.util.concurrent.Executors +import java.util.concurrent.TimeUnit +import okhttp3.OkHttpClient +import okhttp3.Request +import okhttp3.Response +import okhttp3.WebSocket +import okhttp3.WebSocketListener +import okio.ByteString +import org.json.JSONArray +import org.json.JSONObject + +class GeminiProgressSpeechService { + companion object { + private const val TAG = "GeminiProgressSpeech" + } + + var onAudioReceived: ((ByteArray) -> Unit)? = null + + private val client = OkHttpClient.Builder() + .readTimeout(0, TimeUnit.MILLISECONDS) + .pingInterval(30, TimeUnit.SECONDS) + .retryOnConnectionFailure(true) + .build() + private val sendExecutor = Executors.newSingleThreadExecutor() + private var webSocket: WebSocket? = null + private var ready = false + private val pendingPhrases = ArrayDeque() + + fun connect() { + if (ready || webSocket != null) return + val url = GeminiConfig.websocketURL() ?: return + val request = Request.Builder().url(url).build() + webSocket = client.newWebSocket(request, object : WebSocketListener() { + override fun onOpen(webSocket: WebSocket, response: Response) { + Log.d(TAG, "WebSocket opened") + sendSetup(webSocket) + } + + override fun onMessage(webSocket: WebSocket, text: String) { + handleMessage(text) + } + + override fun onMessage(webSocket: WebSocket, bytes: ByteString) { + handleMessage(bytes.utf8()) + } + + override fun onFailure(webSocket: WebSocket, t: Throwable, response: Response?) { + Log.w(TAG, "WebSocket failure: ${t.message}") + ready = false + this@GeminiProgressSpeechService.webSocket = null + } + + override fun onClosed(webSocket: WebSocket, code: Int, reason: String) { + Log.d(TAG, "WebSocket closed: $code $reason") + ready = false + this@GeminiProgressSpeechService.webSocket = null + } + }) + } + + fun disconnect() { + ready = false + pendingPhrases.clear() + webSocket?.close(1000, null) + webSocket = null + } + + fun speakProgress(speechHint: String, languageName: String) { + val trimmedHint = speechHint.trim() + if (trimmedHint.isEmpty()) return + val request = "Language: $languageName\nProgress hint: $trimmedHint" + if (!ready) { + pendingPhrases.addLast(request) + connect() + return + } + sendSpeakRequest(request) + } + + private fun sendSetup(ws: WebSocket) { + val setup = JSONObject().apply { + put("setup", JSONObject().apply { + put("model", GeminiConfig.MODEL) + put("generationConfig", JSONObject().apply { + put("responseModalities", JSONArray().put("AUDIO")) + put("thinkingConfig", JSONObject().apply { + put("thinkingBudget", 0) + }) + }) + put("systemInstruction", JSONObject().apply { + put("parts", JSONArray().put(JSONObject().apply { + put( + "text", + "You are a progress voice for smart glasses. " + + "Each user message contains a language and a semantic progress hint, not a request. " + + "Say one short, natural progress update in that language. " + + "Preserve useful target names like people, apps, or domains. " + + "Keep names, apps, and domains exactly as written; do not translate, transliterate, or invent kanji for them. " + + "Do not add acknowledgments, explanations, tags, or extra words." + ) + })) + }) + put("outputAudioTranscription", JSONObject()) + }) + } + ws.send(setup.toString()) + } + + private fun sendSpeakRequest(requestText: String) { + sendExecutor.execute { + val json = JSONObject().apply { + put("clientContent", JSONObject().apply { + put("turns", JSONArray().put(JSONObject().apply { + put("role", "user") + put("parts", JSONArray().put(JSONObject().apply { + put("text", requestText) + })) + })) + put("turnComplete", true) + }) + } + Log.d(TAG, "SEND_PROGRESS_SPEECH: ${requestText.replace('\n', ' ')}") + webSocket?.send(json.toString()) + } + } + + private fun handleMessage(text: String) { + try { + val json = JSONObject(text) + if (json.has("setupComplete")) { + ready = true + Log.d(TAG, "setupComplete") + while (pendingPhrases.isNotEmpty()) { + sendSpeakRequest(pendingPhrases.removeFirst()) + } + return + } + + val serverContent = json.optJSONObject("serverContent") ?: return + val modelTurn = serverContent.optJSONObject("modelTurn") + val parts = modelTurn?.optJSONArray("parts") + if (parts != null) { + for (i in 0 until parts.length()) { + val part = parts.optJSONObject(i) ?: continue + val inlineData = part.optJSONObject("inlineData") ?: continue + val mimeType = inlineData.optString("mimeType", "") + if (!mimeType.startsWith("audio/pcm")) continue + val base64Data = inlineData.optString("data", "") + if (base64Data.isNotEmpty()) { + onAudioReceived?.invoke(Base64.decode(base64Data, Base64.DEFAULT)) + } + } + } + + val transcription = serverContent.optJSONObject("outputTranscription") + val transcriptText = transcription?.optString("text", "").orEmpty() + if (transcriptText.isNotEmpty()) { + Log.d(TAG, "Progress voice: $transcriptText") + } + } catch (e: Exception) { + Log.w(TAG, "Parse error: ${e.message}") + } + } +} diff --git a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/GeminiSessionViewModel.kt b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/GeminiSessionViewModel.kt index 70b67f4b..7acd0def 100644 --- a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/GeminiSessionViewModel.kt +++ b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/GeminiSessionViewModel.kt @@ -3,6 +3,7 @@ package com.meta.wearable.dat.externalsampleapps.cameraaccess.gemini import android.app.Application import android.graphics.Bitmap +import android.util.Log import androidx.lifecycle.AndroidViewModel import androidx.lifecycle.viewModelScope import com.meta.wearable.dat.externalsampleapps.cameraaccess.chat.ChatMessage @@ -13,6 +14,8 @@ import com.meta.wearable.dat.externalsampleapps.cameraaccess.net.NetworkTypeMoni import com.meta.wearable.dat.externalsampleapps.cameraaccess.openclaw.OpenClawBridge import com.meta.wearable.dat.externalsampleapps.cameraaccess.openclaw.OpenClawConnectionState import com.meta.wearable.dat.externalsampleapps.cameraaccess.openclaw.OpenClawEventClient +import com.meta.wearable.dat.externalsampleapps.cameraaccess.openclaw.OpenClawProgress +import com.meta.wearable.dat.externalsampleapps.cameraaccess.openclaw.OpenClawProgressKind import com.meta.wearable.dat.externalsampleapps.cameraaccess.openclaw.ToolCallRouter import com.meta.wearable.dat.externalsampleapps.cameraaccess.openclaw.ToolCallStatus import com.meta.wearable.dat.externalsampleapps.cameraaccess.openclaw.ToolResult @@ -55,6 +58,7 @@ class GeminiSessionViewModel(app: Application) : AndroidViewModel(app) { val captureEvent: StateFlow = _captureEvent.asStateFlow() private val geminiService = GeminiLiveService() + private val progressSpeechService = GeminiProgressSpeechService() private val openClawBridge = OpenClawBridge() private val eventClient = OpenClawEventClient() private var toolCallRouter: ToolCallRouter? = null @@ -85,6 +89,8 @@ class GeminiSessionViewModel(app: Application) : AndroidViewModel(app) { private var activeAIBubbleId: String? = null private var lastUserText: String = "" private var lastAIText: String = "" + private var lastSpokenProgressKind: OpenClawProgressKind? = null + private var lastSpokenProgressAtMs: Long = 0 // execute 시작 시 mic 상태를 저장해뒀다가 끝나면 복원 private var micStateBeforeExecution: Boolean? = null @@ -126,7 +132,6 @@ class GeminiSessionViewModel(app: Application) : AndroidViewModel(app) { fun toggleMic() { if (!_uiState.value.isGeminiActive) return - if (isToolExecuting(_uiState.value.toolCallStatus)) return val newEnabled = !_uiState.value.isMicEnabled _uiState.value = _uiState.value.copy(isMicEnabled = newEnabled) @@ -135,7 +140,6 @@ class GeminiSessionViewModel(app: Application) : AndroidViewModel(app) { fun setMicEnabled(enabled: Boolean) { if (!_uiState.value.isGeminiActive) return - if (isToolExecuting(_uiState.value.toolCallStatus)) return _uiState.value = _uiState.value.copy(isMicEnabled = enabled) audioManager.setMicEnabled(enabled) @@ -157,6 +161,8 @@ class GeminiSessionViewModel(app: Application) : AndroidViewModel(app) { reconnectJob = null micStateBeforeExecution = null micAutoMutedForExecution = false + lastSpokenProgressKind = null + lastSpokenProgressAtMs = 0 // Insert session divider if there are previous messages val currentMessages = _uiState.value.messages.toMutableList() @@ -192,6 +198,9 @@ class GeminiSessionViewModel(app: Application) : AndroidViewModel(app) { geminiService.onAudioReceived = { data -> audioManager.playAudio(data) } + progressSpeechService.onAudioReceived = { data -> + audioManager.playAudio(data) + } geminiService.onInterrupted = { audioManager.stopPlayback() @@ -223,7 +232,7 @@ class GeminiSessionViewModel(app: Application) : AndroidViewModel(app) { updateUserBubble(newTranscript) } - geminiService.onOutputTranscription = { text -> + geminiService.onOutputTranscription = output@{ text -> val newAI = _uiState.value.aiTranscript + text _uiState.value = _uiState.value.copy(aiTranscript = newAI) updateAIBubble(newAI) @@ -237,6 +246,7 @@ class GeminiSessionViewModel(app: Application) : AndroidViewModel(app) { scheduleReconnect(reason) } } + progressSpeechService.connect() viewModelScope.launch { openClawBridge.checkConnection() @@ -295,6 +305,11 @@ class GeminiSessionViewModel(app: Application) : AndroidViewModel(app) { for (call in toolCall.functionCalls) { val taskDesc = (call.args["task"] as? String) ?: "" RemoteLogger.log("voice:tool_call", mapOf("tool" to call.name, "task" to taskDesc)) + if (call.name == "execute") { + lastSpokenProgressKind = null + lastSpokenProgressAtMs = 0 + eventClient.resetProgressState() + } finalizeCurrentBubbles() val toolMsg = ChatMessage( @@ -459,6 +474,7 @@ class GeminiSessionViewModel(app: Application) : AndroidViewModel(app) { openClawBridge.cancelInFlight("user stopSession") audioManager.stopCapture() + progressSpeechService.disconnect() geminiService.disconnect() stateObservationJob?.cancel() @@ -479,6 +495,10 @@ class GeminiSessionViewModel(app: Application) : AndroidViewModel(app) { private fun syncProactiveNotifications() { // Always connect event client — needed for image sending via chat.send + eventClient.onProgress = { progress -> + openClawBridge.setToolCallProgress(progress.displayText) + maybeSpeakProgress(progress) + } if (SettingsManager.proactiveNotificationsEnabled) { eventClient.onNotification = { text -> val state = _uiState.value @@ -492,6 +512,36 @@ class GeminiSessionViewModel(app: Application) : AndroidViewModel(app) { eventClient.connect() } + private fun maybeSpeakProgress(progress: OpenClawProgress) { + val status = openClawBridge.lastToolCallStatus.value + if (status !is ToolCallStatus.Executing) { + Log.d("GeminiProgress", "Skip speech: not executing kind=${progress.kind} tool=${progress.toolName}") + return + } + + val now = System.currentTimeMillis() + if (lastSpokenProgressAtMs != 0L && now - lastSpokenProgressAtMs < 8_000) { + Log.d("GeminiProgress", "Skip speech: throttle kind=${progress.kind} tool=${progress.toolName}") + return + } + + lastSpokenProgressKind = progress.kind + lastSpokenProgressAtMs = now + val languageName = progressLanguageName() + Log.d("GeminiProgress", "Send speech hint: ${progress.speechHint} language=$languageName kind=${progress.kind} tool=${progress.toolName}") + progressSpeechService.speakProgress(progress.speechHint, languageName) + } + + private fun progressLanguageName(): String { + return if (containsJapanese(lastUserOriginalInstruction.orEmpty())) "Japanese" else "English" + } + + private fun containsJapanese(text: String): Boolean { + return text.any { ch -> + (ch in '\u3040'..'\u30ff') || (ch in '\u3400'..'\u9fff') + } + } + fun sendVideoFrameIfThrottled(bitmap: Bitmap) { if (!SettingsManager.videoStreamingEnabled) return if (!_uiState.value.isGeminiActive) return @@ -520,6 +570,36 @@ class GeminiSessionViewModel(app: Application) : AndroidViewModel(app) { lastVideoFrameTime = 0 } + suspend fun runOpenClawDeveloperCommand(command: String): String { + val result = openClawBridge.sendSessionCommand(command) + return when (result) { + is ToolResult.Success -> { + if (command.trim() == "/new") { + finalizeCurrentBubbles() + val msgs = _uiState.value.messages.toMutableList() + if (msgs.isNotEmpty()) { + msgs.add(ChatMessage(role = ChatMessageRole.SessionDivider, text = "")) + } + _uiState.value = _uiState.value.copy( + userTranscript = "", + aiTranscript = "", + messages = msgs, + ) + persistMessages() + lastUserOriginalInstruction = null + lastSpokenProgressKind = null + lastSpokenProgressAtMs = 0 + } + result.result.ifBlank { "OpenClaw command completed." } + } + is ToolResult.Failure -> { + val message = result.error + _uiState.value = _uiState.value.copy(errorMessage = message) + message + } + } + } + fun clearError() { _uiState.value = _uiState.value.copy(errorMessage = null) } diff --git a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/OpenClawBridge.kt b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/OpenClawBridge.kt index 55c462b6..b540bbd4 100644 --- a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/OpenClawBridge.kt +++ b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/OpenClawBridge.kt @@ -40,13 +40,20 @@ class OpenClawBridge { MutableStateFlow(OpenClawConnectionState.NotConfigured) val connectionState: StateFlow = _connectionState.asStateFlow() - /** Set by GeminiSessionViewModel so we can send image tasks via WebSocket */ + /** Set by GeminiSessionViewModel so we can send tasks via WebSocket chat.send */ var eventClient: OpenClawEventClient? = null fun setToolCallStatus(status: ToolCallStatus) { _lastToolCallStatus.value = status } + fun setToolCallProgress(progressText: String) { + val current = _lastToolCallStatus.value + if (current is ToolCallStatus.Executing) { + _lastToolCallStatus.value = current.copy(progressText = progressText) + } + } + private val client = OkHttpClient.Builder() .connectTimeout(15, TimeUnit.SECONDS) .readTimeout(300, TimeUnit.SECONDS) @@ -86,6 +93,7 @@ class OpenClawBridge { .get() .addHeader("Authorization", "Bearer ${GeminiConfig.openClawGatewayToken}") .addHeader("x-openclaw-message-channel", "glass") + .addHeader("x-openclaw-scopes", "operator.write") .build() val response = pingClient.newCall(request).execute() @@ -109,6 +117,85 @@ class OpenClawBridge { Log.d(TAG, "Session reset (key retained: $sessionKey)") } + suspend fun sendSessionCommand(command: String): ToolResult = withContext(Dispatchers.IO) { + val normalized = command.trim() + if (normalized != "/new" && normalized != "/compact") { + return@withContext ToolResult.Failure("Unsupported OpenClaw command: $normalized") + } + + _lastToolCallStatus.value = ToolCallStatus.Executing("OpenClaw", "Sending $normalized") + + val ec = eventClient + if (ec != null) { + val wsResult = sendViaWebSocket(ec, normalized, imageBase64 = null, toolName = "OpenClaw") + if (wsResult is ToolResult.Success) { + if (normalized == "/new") resetSession() + return@withContext wsResult + } + _lastToolCallStatus.value = ToolCallStatus.Executing("OpenClaw", "Sending $normalized") + } + + if (!GeminiConfig.isOpenClawConfigured) { + _lastToolCallStatus.value = ToolCallStatus.Failed("OpenClaw", "Not configured") + return@withContext ToolResult.Failure("OpenClaw is not configured") + } + + val url = "${GeminiConfig.openClawHost}:${GeminiConfig.openClawPort}/v1/chat/completions" + val messagesArray = JSONArray().put(JSONObject().apply { + put("role", "user") + put("content", normalized) + }) + val body = JSONObject().apply { + put("model", "openclaw") + put("messages", messagesArray) + put("stream", false) + } + val request = Request.Builder() + .url(url) + .post(body.toString().toRequestBody("application/json".toMediaType())) + .addHeader("Authorization", "Bearer ${GeminiConfig.openClawGatewayToken}") + .addHeader("Content-Type", "application/json") + .addHeader("x-openclaw-session-key", sessionKey) + .addHeader("x-openclaw-message-channel", "glass") + .addHeader("x-openclaw-scopes", "operator.write") + .build() + + val call = client.newCall(request) + inFlightCallRef.set(call) + try { + val response = call.execute() + val responseBody = response.body?.string() ?: "" + val statusCode = response.code + response.close() + + if (statusCode !in 200..299) { + _lastToolCallStatus.value = ToolCallStatus.Failed("OpenClaw", "HTTP $statusCode") + return@withContext ToolResult.Failure("OpenClaw command returned HTTP $statusCode") + } + + if (normalized == "/new") resetSession() + + val content = try { + JSONObject(responseBody).optJSONArray("choices") + ?.optJSONObject(0) + ?.optJSONObject("message") + ?.optString("content", "") + ?.takeIf { it.isNotBlank() } + } catch (_: Exception) { + responseBody.takeIf { it.isNotBlank() } + } + + _lastToolCallStatus.value = ToolCallStatus.Completed("OpenClaw") + ToolResult.Success(content ?: "OpenClaw command completed.") + } catch (e: Exception) { + Log.e(TAG, "OpenClaw command error: ${e::class.java.name}: ${e.message}") + _lastToolCallStatus.value = ToolCallStatus.Failed("OpenClaw", e.message ?: "Unknown") + ToolResult.Failure("OpenClaw command failed: ${e.message}") + } finally { + inFlightCallRef.compareAndSet(call, null) + } + } + /** * Upload JPEG bytes to OpenClaw media upload API (write-only port 18081). * Returns a read-only URL on port 18080. @@ -180,17 +267,20 @@ class OpenClawBridge { toolName: String = "execute", imageBase64: String? = null ): ToolResult = withContext(Dispatchers.IO) { - _lastToolCallStatus.value = ToolCallStatus.Executing(toolName) - - // If image is provided, route through WebSocket chat.send (only working method) - if (imageBase64 != null) { - val ec = eventClient - if (ec == null) { - Log.w(TAG, "Image task but no event client, falling back to text-only HTTP") - } else { - Log.d(TAG, "Sending image task via WebSocket chat.send (${imageBase64.length / 1024} KB)") - return@withContext sendViaWebSocket(ec, task, imageBase64, toolName) + _lastToolCallStatus.value = ToolCallStatus.Executing(toolName, "OpenClaw is working") + + val ec = eventClient + if (ec != null) { + val imageSize = imageBase64?.let { "${it.length / 1024} KB" } ?: "none" + Log.d(TAG, "Sending task via WebSocket chat.send (image=$imageSize)") + val wsResult = sendViaWebSocket(ec, task, imageBase64, toolName) + if (wsResult is ToolResult.Success || imageBase64 != null) { + return@withContext wsResult } + Log.w(TAG, "WebSocket chat.send failed for text task; falling back to HTTP") + _lastToolCallStatus.value = ToolCallStatus.Executing(toolName, "OpenClaw is working") + } else if (imageBase64 != null) { + Log.w(TAG, "Image task but no event client, falling back to text-only HTTP") } val url = "${GeminiConfig.openClawHost}:${GeminiConfig.openClawPort}/v1/chat/completions" @@ -224,6 +314,7 @@ class OpenClawBridge { .addHeader("Content-Type", "application/json") .addHeader("x-openclaw-session-key", sessionKey) .addHeader("x-openclaw-message-channel", "glass") + .addHeader("x-openclaw-scopes", "operator.write") .build() val call = client.newCall(request) @@ -308,20 +399,25 @@ class OpenClawBridge { } /** - * Send a task with image via WebSocket chat.send RPC. - * Also uploads the image file to disk so the agent can access it. + * Send a task via WebSocket chat.send RPC. + * Also uploads the image file to disk when present so the agent can access it. */ private suspend fun sendViaWebSocket( eventClient: OpenClawEventClient, task: String, - imageBase64: String, + imageBase64: String?, toolName: String ): ToolResult = suspendCancellableCoroutine { continuation -> - // Upload image to disk so agent can read/copy/save the file - val filePath = uploadImageFile(imageBase64) - val taskWithPath = if (filePath != null) { - "$task\n\n[image_file_path]\n$filePath" - } else task + val taskWithPath = if (imageBase64 != null) { + val filePath = uploadImageFile(imageBase64) + if (filePath != null) { + "$task\n\n[image_file_path]\n$filePath" + } else { + task + } + } else { + task + } eventClient.sendChatMessage( sessionKey = sessionKey, diff --git a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/OpenClawEventClient.kt b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/OpenClawEventClient.kt index 234c10e0..ec542b06 100644 --- a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/OpenClawEventClient.kt +++ b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/OpenClawEventClient.kt @@ -4,6 +4,7 @@ import android.os.Handler import android.os.Looper import android.util.Log import com.meta.wearable.dat.externalsampleapps.cameraaccess.gemini.GeminiConfig +import java.net.URI import java.util.UUID import java.util.concurrent.TimeUnit import okhttp3.OkHttpClient @@ -14,6 +15,31 @@ import okhttp3.WebSocketListener import org.json.JSONArray import org.json.JSONObject +enum class OpenClawProgressKind(val displayText: String) { + Memory("Searching memory"), + Calendar("Checking calendar"), + SlackLookup("Checking Slack recipient"), + SlackSend("Sending Slack message"), + Slack("Checking Slack"), + Email("Checking email"), + Browser("Using browser"), + Web("Searching web"), + File("Reading files"), + Tool("Running tool") +} + +data class OpenClawProgress( + val kind: OpenClawProgressKind, + val toolName: String, + val phase: String, + val detail: String, + val speechHint: String, + val stableKey: String +) { + val displayText: String + get() = speechHint.replaceFirstChar { it.uppercase() } +} + class OpenClawEventClient { companion object { private const val TAG = "OpenClawEventClient" @@ -21,12 +47,15 @@ class OpenClawEventClient { } var onNotification: ((String) -> Unit)? = null + var onProgress: ((OpenClawProgress) -> Unit)? = null private var webSocket: WebSocket? = null private var isConnected = false private var shouldReconnect = false private var reconnectDelayMs = 2_000L private val handler = Handler(Looper.getMainLooper()) + private var lastProgressText: String? = null + private var lastProgressAtMs: Long = 0 // Pending RPC responses keyed by request ID private val pendingResponses = mutableMapOf Unit>() @@ -39,6 +68,11 @@ class OpenClawEventClient { .pingInterval(10, TimeUnit.SECONDS) .build() + fun resetProgressState() { + lastProgressText = null + lastProgressAtMs = 0 + } + fun connect() { if (!GeminiConfig.isOpenClawConfigured) { Log.d(TAG, "Not configured, skipping") @@ -116,6 +150,7 @@ class OpenClawEventClient { Log.d(TAG, "Connected and authenticated") isConnected = true reconnectDelayMs = 2_000L + subscribeSessionEvents() } else { val error = json.optJSONObject("error") val msg = error?.optString("message", "unknown") ?: "unknown" @@ -138,6 +173,250 @@ class OpenClawEventClient { "heartbeat" -> handleHeartbeatEvent(payload) "cron" -> handleCronEvent(payload) "chat" -> handleChatEvent(payload) + "agent", "session.tool" -> handleAgentProgressEvent(event, payload) + } + } + + private fun handleAgentProgressEvent(event: String, payload: JSONObject) { + val data = payload.optJSONObject("data") ?: payload + val stream = payload.optString("stream", if (event == "session.tool") "tool" else "") + val phase = data.optString("phase", data.optString("status", "")) + + if (phase != "start" && phase != "update") { + Log.d(TAG, "Progress skip: phase=$phase event=$event") + return + } + + val name = data.optString("name", data.optString("title", "")) + val argsText = data.opt("args")?.toString() + ?: data.opt("arguments")?.toString() + ?: "" + if (stream == "item" && argsText.isBlank() && looksLikeToolItem(name)) { + Log.d(TAG, "Progress skip: waiting for richer tool event name=$name") + return + } + if (isNoisyInternalBashRead(name, argsText)) { + Log.d(TAG, "Progress skip: internal bash read name=$name args=${argsText.take(160)}") + return + } + val detail = buildString { + append(name) + append(" ") + append(data.optString("progressText", "")) + append(" ") + append(data.optString("partialResult", "")) + append(" ") + append(argsText) + } + + val progress = progressFor(name = name, detail = detail, argsText = argsText, stream = stream, phase = phase) + if (progress == null) { + Log.d(TAG, "Progress skip: unclassified event=$event stream=$stream phase=$phase name=$name detail=${detail.take(220)}") + return + } + emitProgress(progress) + } + + private fun looksLikeToolItem(name: String): Boolean { + val text = name.lowercase() + return text == "bash" || + text == "browser" || + text.contains("_") || + text.contains("memory") || + text.contains("search") || + text.contains("mail") || + text.contains("slack") || + text.contains("calendar") + } + + private fun progressFor( + name: String, + detail: String, + argsText: String, + stream: String, + phase: String + ): OpenClawProgress? { + val text = "$name $detail".lowercase() + + // These fire constantly and are too noisy for glasses. + if (text.contains("reasoning") + || text.contains("codex_app_server") + || stream == "lifecycle") { + return null + } + + val kind = when { + text.contains("memory") -> OpenClawProgressKind.Memory + text.contains("calendar") -> OpenClawProgressKind.Calendar + text.contains("slack") && (text.contains("lookup") || text.contains("user")) -> OpenClawProgressKind.SlackLookup + text.contains("slack") && (text.contains("send") || text.contains("message")) -> OpenClawProgressKind.SlackSend + text.contains("slack") -> OpenClawProgressKind.Slack + text.contains("mail") || text.contains("email") || text.contains("gmail") -> OpenClawProgressKind.Email + text.contains("browser") -> OpenClawProgressKind.Browser + text.contains("web") || text.contains("search") -> OpenClawProgressKind.Web + text.contains("file") || text.contains("read") -> OpenClawProgressKind.File + else -> OpenClawProgressKind.Tool + } + + // Raw bash can be very noisy, but many OpenClaw skills currently arrive + // as bash with the real service encoded in the command/cwd. + if (name == "bash" && kind == OpenClawProgressKind.Tool) { + return null + } + + val target = progressTarget(kind, name, argsText, detail) + val speechHint = progressSpeechHint(kind, target) + val stableKey = listOf(kind.name.lowercase(), target.lowercase()) + .filter { it.isNotBlank() } + .joinToString(":") + + return OpenClawProgress( + kind = kind, + toolName = name.ifBlank { stream.ifBlank { "tool" } }, + phase = phase, + detail = detail.trim(), + speechHint = speechHint, + stableKey = stableKey + ) + } + + private fun progressSpeechHint(kind: OpenClawProgressKind, target: String): String { + return when (kind) { + OpenClawProgressKind.Memory -> listOf("searching memory", target).joinNonBlank(" for ") + OpenClawProgressKind.Calendar -> listOf("checking calendar", target).joinNonBlank(" for ") + OpenClawProgressKind.SlackLookup -> listOf("checking Slack recipient", target).joinNonBlank(" for ") + OpenClawProgressKind.SlackSend -> listOf("sending Slack message", target).joinNonBlank(" to ") + OpenClawProgressKind.Slack -> listOf("checking Slack", target).joinNonBlank(" for ") + OpenClawProgressKind.Email -> listOf("checking email", target).joinNonBlank(" for ") + OpenClawProgressKind.Browser -> if (target.isBlank()) "using browser" else "opening $target" + OpenClawProgressKind.Web -> listOf("searching web", target).joinNonBlank(" for ") + OpenClawProgressKind.File -> listOf("reading files", target).joinNonBlank(" for ") + OpenClawProgressKind.Tool -> listOf("running tool", target).joinNonBlank(" for ") + } + } + + private fun List.joinNonBlank(separator: String): String { + return filter { it.isNotBlank() }.joinToString(separator) + } + + private fun progressTarget( + kind: OpenClawProgressKind, + name: String, + argsText: String, + detail: String + ): String { + val args = parseJsonObject(argsText) + val query = args?.optString("query", "")?.takeIf { it.isNotBlank() } + val url = args?.optString("url", "")?.takeIf { it.isNotBlank() } + val path = args?.optString("path", "")?.takeIf { it.isNotBlank() } + val command = args?.optString("command", "")?.takeIf { it.isNotBlank() } + val action = args?.optString("action", "")?.takeIf { it.isNotBlank() } + + return when (kind) { + OpenClawProgressKind.Memory -> query ?: path?.substringAfterLast("/") ?: commandSearchTarget(command) ?: "" + OpenClawProgressKind.Calendar -> commandSearchTarget(command) ?: query ?: "" + OpenClawProgressKind.SlackLookup, + OpenClawProgressKind.SlackSend, + OpenClawProgressKind.Slack -> commandSearchTarget(command) ?: query ?: "" + OpenClawProgressKind.Email -> commandSearchTarget(command) ?: query ?: "" + OpenClawProgressKind.Browser -> domainFromUrl(url) ?: browserActionTarget(action, detail) + OpenClawProgressKind.Web -> domainFromUrl(url) ?: query ?: commandSearchTarget(command) ?: "" + OpenClawProgressKind.File -> path?.substringAfterLast("/") ?: commandSearchTarget(command) ?: "" + OpenClawProgressKind.Tool -> query ?: commandSearchTarget(command) ?: name + }.sanitizeTarget() + } + + private fun parseJsonObject(text: String): JSONObject? { + if (text.isBlank()) return null + return try { + JSONObject(text) + } catch (_: Exception) { + null + } + } + + private fun domainFromUrl(url: String?): String? { + if (url.isNullOrBlank()) return null + return try { + URI(url).host?.removePrefix("www.") + } catch (_: Exception) { + null + } + } + + private fun browserActionTarget(action: String?, detail: String): String { + if (!action.isNullOrBlank() && action != "act") return action + return when { + detail.contains("amazon", ignoreCase = true) -> "Amazon" + detail.contains("apple.com", ignoreCase = true) -> "apple.com" + else -> "" + } + } + + private fun commandSearchTarget(command: String?): String? { + if (command.isNullOrBlank()) return null + val quoted = Regex("\"([^\"]{2,80})\"|'([^']{2,80})'").findAll(command) + .mapNotNull { it.groups[1]?.value ?: it.groups[2]?.value } + .firstOrNull { candidate -> isUsefulCommandCandidate(candidate) } + return quoted + } + + private fun isNoisyInternalBashRead(name: String, argsText: String): Boolean { + if (name != "bash") return false + val command = parseJsonObject(argsText) + ?.optString("command", "") + ?.takeIf { it.isNotBlank() } + ?: return false + + val lower = command.lowercase() + val looksReadOnly = listOf("sed ", "sed -n", "cat ", "head ", "tail ", "grep ", "rg ", "find ", "ls ", "for ") + .any { lower.contains(it) } + if (!looksReadOnly) return false + + return lower.contains("/skills/") || + lower.contains("skill.md") || + lower.contains("agents.md") || + lower.contains(".openclaw/workspace/memory") || + lower.contains("memory/2026") || + lower.contains("memory/2025") + } + + private fun isUsefulCommandCandidate(candidate: String): Boolean { + val text = candidate.trim() + if (text.isBlank()) return false + if (!text.any { it.isLetterOrDigit() || it.code > 127 }) return false + if (text.contains("/") || text.contains("--")) return false + if (text.startsWith("###")) return false + if (Regex("^\\d+,\\d+p$").matches(text)) return false + if (Regex("^\\d+,\\${'$'}p${'$'}").matches(text)) return false + if (text.startsWith("memory/", ignoreCase = true)) return false + if (text.endsWith(".md", ignoreCase = true)) return false + return true + } + + private fun String.sanitizeTarget(): String { + return trim() + .replace(Regex("\\s+"), " ") + .replace(Regex("[\\r\\n]"), " ") + .take(80) + } + + private fun emitProgress(progress: OpenClawProgress) { + val now = System.currentTimeMillis() + if (progress.stableKey == lastProgressText && now - lastProgressAtMs < 12_000) { + Log.d(TAG, "Progress skip: duplicate key=${progress.stableKey} tool=${progress.toolName}") + return + } + if (lastProgressText != null && now - lastProgressAtMs < 2_000) { + Log.d(TAG, "Progress skip: throttle display=${progress.displayText} tool=${progress.toolName}") + return + } + + lastProgressText = progress.stableKey + lastProgressAtMs = now + Log.d(TAG, "Progress: ${progress.speechHint} (${progress.toolName}) detail=${progress.detail.take(220)}") + handler.post { + onProgress?.invoke(progress) } } @@ -204,6 +483,32 @@ class OpenClawEventClient { webSocket?.send(connectMsg.toString()) } + private fun subscribeSessionEvents() { + val reqId = UUID.randomUUID().toString() + pendingResponses[reqId] = { response -> + val ok = response.optBoolean("ok", false) + if (ok) { + val subscribed = response.optJSONObject("result")?.optBoolean("subscribed", false) ?: false + Log.d(TAG, "sessions.subscribe ok subscribed=$subscribed") + } else { + val error = response.optJSONObject("error") + val msg = error?.optString("message", "unknown") ?: "unknown" + Log.w(TAG, "sessions.subscribe failed: $msg") + } + } + val request = JSONObject().apply { + put("type", "req") + put("id", reqId) + put("method", "sessions.subscribe") + put("params", JSONObject()) + } + val sent = webSocket?.send(request.toString()) ?: false + if (!sent) { + pendingResponses.remove(reqId) + Log.w(TAG, "sessions.subscribe send failed") + } + } + private fun handleHeartbeatEvent(payload: JSONObject) { val status = payload.optString("status", "") if (status != "sent") return diff --git a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/ToolCallModels.kt b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/ToolCallModels.kt index 7136ce22..b9e2479b 100644 --- a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/ToolCallModels.kt +++ b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/ToolCallModels.kt @@ -72,7 +72,7 @@ sealed class ToolResult { sealed class ToolCallStatus { data object Idle : ToolCallStatus() - data class Executing(val name: String) : ToolCallStatus() + data class Executing(val name: String, val progressText: String? = null) : ToolCallStatus() data class Completed(val name: String) : ToolCallStatus() data class Failed(val name: String, val error: String) : ToolCallStatus() data class Cancelled(val name: String) : ToolCallStatus() @@ -80,7 +80,7 @@ sealed class ToolCallStatus { val displayText: String get() = when (this) { is Idle -> "" - is Executing -> "Running: $name..." + is Executing -> progressText ?: "OpenClaw is working" is Completed -> "Done: $name" is Failed -> "Failed: $name - $error" is Cancelled -> "Cancelled: $name" @@ -129,6 +129,7 @@ object ToolDeclarations { private fun executeJSON(): JSONObject { return JSONObject().apply { put("name", "execute") + put("behavior", "NON_BLOCKING") put("description", "Your only way to take action. You have no memory, storage, or ability to do anything on your own -- use this tool for everything: sending messages, searching the web, adding to lists, setting reminders, creating notes, research, drafts, scheduling, smart home control, app interactions, or any request that goes beyond answering a question. When in doubt, use this tool.") put("parameters", JSONObject().apply { put("type", "object") @@ -144,7 +145,6 @@ object ToolDeclarations { }) put("required", JSONArray().put("task")) }) - put("behavior", "NON_BLOCKING") } } } diff --git a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/ToolCallRouter.kt b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/ToolCallRouter.kt index 885ddbc9..fb295bde 100644 --- a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/ToolCallRouter.kt +++ b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/openclaw/ToolCallRouter.kt @@ -31,6 +31,14 @@ class ToolCallRouter( var onAutoSaveFrame: ((Bitmap, String?) -> Unit)? = null private val inFlightJobs = mutableMapOf() + private val pendingDuplicateExecuteResponses = mutableListOf() + private var activeExecuteCallId: String? = null + + private data class PendingDuplicateExecute( + val callId: String, + val callName: String, + val sendResponse: (JSONObject) -> Unit + ) fun handleToolCall( call: GeminiFunctionCall, @@ -55,6 +63,22 @@ class ToolCallRouter( return } + if (callName == "execute" && activeExecuteCallId != null) { + Log.w(TAG, "Coalescing duplicate execute call $callId into active call $activeExecuteCallId") + pendingDuplicateExecuteResponses.add( + PendingDuplicateExecute( + callId = callId, + callName = callName, + sendResponse = sendResponse + ) + ) + return + } + + if (callName == "execute") { + activeExecuteCallId = callId + } + val job = scope.launch { // Gemini가 tool-call args로 준 "정리된" task (이미 rewriting 된 텍스트) val rewrittenTask = call.args["task"]?.toString() ?: call.args.toString() @@ -105,6 +129,21 @@ class ToolCallRouter( val response = buildToolResponse(callId, callName, result) sendResponse(response) + + if (callName == "execute") { + val duplicates = pendingDuplicateExecuteResponses.toList() + pendingDuplicateExecuteResponses.clear() + activeExecuteCallId = null + for (duplicate in duplicates) { + duplicate.sendResponse( + buildToolResponse( + callId = duplicate.callId, + name = duplicate.callName, + result = result + ) + ) + } + } inFlightJobs.remove(callId) } @@ -121,6 +160,11 @@ class ToolCallRouter( } bridge.cancelInFlight("tool cancellation ids=$ids") bridge.setToolCallStatus(ToolCallStatus.Cancelled(ids.firstOrNull() ?: "unknown")) + pendingDuplicateExecuteResponses.removeAll { it.callId in ids } + if (activeExecuteCallId in ids) { + activeExecuteCallId = null + pendingDuplicateExecuteResponses.clear() + } } fun cancelAll() { @@ -129,6 +173,8 @@ class ToolCallRouter( job.cancel() } inFlightJobs.clear() + activeExecuteCallId = null + pendingDuplicateExecuteResponses.clear() bridge.cancelInFlight("cancelAll") } @@ -157,4 +203,4 @@ class ToolCallRouter( ) } } -} \ No newline at end of file +} diff --git a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/settings/SettingsManager.kt b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/settings/SettingsManager.kt index e86724e3..9429ff6c 100644 --- a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/settings/SettingsManager.kt +++ b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/settings/SettingsManager.kt @@ -146,6 +146,13 @@ When calling execute: The assistant works best with complete instructions. +When execute returns: + +- Base your answer on the execute result. +- Do not add extra facts that are not supported by the execute result. +- If the execute result is already concise, relay it naturally and briefly. +- Do not call execute again for the same user request unless the result explicitly says more checking is needed. + -------------------------------- RESPONSE STYLE -------------------------------- diff --git a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/ui/CameraAccessScaffold.kt b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/ui/CameraAccessScaffold.kt index ac09c736..816f9d8f 100644 --- a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/ui/CameraAccessScaffold.kt +++ b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/ui/CameraAccessScaffold.kt @@ -54,9 +54,11 @@ import androidx.compose.ui.Alignment import androidx.compose.ui.Modifier import androidx.compose.ui.unit.dp import androidx.lifecycle.compose.collectAsStateWithLifecycle +import androidx.lifecycle.viewmodel.compose.viewModel as composeViewModel import com.meta.wearable.dat.core.types.Permission import com.meta.wearable.dat.core.types.PermissionStatus import com.meta.wearable.dat.externalsampleapps.cameraaccess.BuildConfig +import com.meta.wearable.dat.externalsampleapps.cameraaccess.gemini.GeminiSessionViewModel import com.meta.wearable.dat.externalsampleapps.cameraaccess.wearables.WearablesViewModel @OptIn(ExperimentalMaterial3Api::class) @@ -67,6 +69,7 @@ fun CameraAccessScaffold( modifier: Modifier = Modifier, ) { val uiState by viewModel.uiState.collectAsStateWithLifecycle() + val geminiViewModel: GeminiSessionViewModel = composeViewModel() val snackbarHostState = remember { SnackbarHostState() } val bottomSheetState = rememberModalBottomSheetState(skipPartiallyExpanded = true) @@ -85,11 +88,18 @@ fun CameraAccessScaffold( SettingsScreen( onBack = { viewModel.hideSettings() }, onDebugMenu = if (BuildConfig.DEBUG) {{ viewModel.showDebugMenu() }} else null, + onOpenClawNewSession = if (BuildConfig.DEBUG) { + { geminiViewModel.runOpenClawDeveloperCommand("/new") } + } else null, + onOpenClawCompactSession = if (BuildConfig.DEBUG) { + { geminiViewModel.runOpenClawDeveloperCommand("/compact") } + } else null, ) uiState.isStreaming -> StreamScreen( wearablesViewModel = viewModel, isPhoneMode = uiState.isPhoneMode, + geminiViewModel = geminiViewModel, ) uiState.isRegistered -> NonStreamScreen( diff --git a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/ui/SettingsScreen.kt b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/ui/SettingsScreen.kt index 27f6171d..fb2a44b5 100644 --- a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/ui/SettingsScreen.kt +++ b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/ui/SettingsScreen.kt @@ -29,6 +29,7 @@ import androidx.compose.runtime.Composable import androidx.compose.runtime.getValue import androidx.compose.runtime.mutableStateOf import androidx.compose.runtime.remember +import androidx.compose.runtime.rememberCoroutineScope import androidx.compose.runtime.setValue import androidx.compose.ui.Alignment import androidx.compose.ui.Modifier @@ -37,14 +38,18 @@ import androidx.compose.ui.text.font.FontFamily import androidx.compose.ui.text.input.KeyboardType import androidx.compose.ui.unit.dp import com.meta.wearable.dat.externalsampleapps.cameraaccess.settings.SettingsManager +import kotlinx.coroutines.launch @OptIn(ExperimentalMaterial3Api::class) @Composable fun SettingsScreen( onBack: () -> Unit, onDebugMenu: (() -> Unit)? = null, + onOpenClawNewSession: (suspend () -> String)? = null, + onOpenClawCompactSession: (suspend () -> String)? = null, modifier: Modifier = Modifier, ) { + val coroutineScope = rememberCoroutineScope() var geminiAPIKey by remember { mutableStateOf(SettingsManager.geminiAPIKey) } var systemPrompt by remember { mutableStateOf(SettingsManager.geminiSystemPrompt) } var openClawHost by remember { mutableStateOf(SettingsManager.openClawHost) } @@ -56,6 +61,7 @@ fun SettingsScreen( var proactiveNotificationsEnabled by remember { mutableStateOf(SettingsManager.proactiveNotificationsEnabled) } var demoSpeakerModeEnabled by remember { mutableStateOf(SettingsManager.demoSpeakerModeEnabled) } var showResetDialog by remember { mutableStateOf(false) } + var developerStatus by remember { mutableStateOf(null) } fun save() { SettingsManager.geminiAPIKey = geminiAPIKey.trim() @@ -239,6 +245,45 @@ fun SettingsScreen( TextButton(onClick = onDebug) { Text("Mock Device Kit") } + Row( + modifier = Modifier.fillMaxWidth(), + horizontalArrangement = Arrangement.spacedBy(8.dp), + verticalAlignment = Alignment.CenterVertically, + ) { + TextButton( + onClick = { + save() + developerStatus = "Sending /new..." + coroutineScope.launch { + developerStatus = onOpenClawNewSession?.invoke() + ?: "OpenClaw command is unavailable." + } + }, + enabled = onOpenClawNewSession != null, + ) { + Text("New OpenClaw Session") + } + TextButton( + onClick = { + save() + developerStatus = "Sending /compact..." + coroutineScope.launch { + developerStatus = onOpenClawCompactSession?.invoke() + ?: "OpenClaw command is unavailable." + } + }, + enabled = onOpenClawCompactSession != null, + ) { + Text("Compact") + } + } + developerStatus?.let { status -> + Text( + text = status, + style = MaterialTheme.typography.bodySmall, + color = MaterialTheme.colorScheme.onSurfaceVariant, + ) + } } // Reset From de1152357375ae9f944b7846e95fef0f4b27d04a Mon Sep 17 00:00:00 2001 From: Ryo Suzuki Date: Tue, 26 May 2026 14:57:19 +0900 Subject: [PATCH 67/68] Stabilize Android glasses streaming restart --- .../cameraaccess/MainActivity.kt | 16 ----- .../cameraaccess/gemini/GeminiConfig.kt | 2 +- .../cameraaccess/stream/StreamViewModel.kt | 66 ++++++++++++------- .../cameraaccess/stream/StreamingService.kt | 55 +++++++++++----- .../cameraaccess/ui/CameraAccessScaffold.kt | 4 -- .../cameraaccess/ui/NonStreamScreen.kt | 5 +- .../cameraaccess/ui/StreamScreen.kt | 4 +- .../wearables/WearablesViewModel.kt | 36 ++-------- 8 files changed, 91 insertions(+), 97 deletions(-) diff --git a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/MainActivity.kt b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/MainActivity.kt index 9175c2ae..ed17fffd 100644 --- a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/MainActivity.kt +++ b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/MainActivity.kt @@ -82,26 +82,10 @@ class MainActivity : ComponentActivity() { setContent { CameraAccessScaffold( viewModel = viewModel, - onRequestWearablesPermission = ::requestWearablesPermission, ) } } - override fun onPause() { - super.onPause() - android.util.Log.d("MainActivity", "BGTEST onPause") - } - - override fun onStop() { - super.onStop() - android.util.Log.d("MainActivity", "BGTEST onStop") - } - - override fun onResume() { - super.onResume() - android.util.Log.d("MainActivity", "BGTEST onResume") - } - fun checkPermissions(onPermissionsGranted: () -> Unit) { registerForActivityResult(RequestMultiplePermissions()) { permissionsResult -> val granted = permissionsResult.entries.all { it.value } diff --git a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/GeminiConfig.kt b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/GeminiConfig.kt index 70a49cc7..49c8b632 100644 --- a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/GeminiConfig.kt +++ b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/GeminiConfig.kt @@ -53,7 +53,7 @@ TOOL RESULT HANDLING -------------------------------- When execute returns a result, immediately answer the user using that result. -Do not end the turn with only an acknowledgment like "I'll check" or "確認します" after execute has returned. +Do not end the turn with only a brief acknowledgment after execute has returned. If you did not manage to say the acknowledgment before calling execute, do not say it after the result arrives; use the result instead. Keep the final answer concise and in the user's conversation language. If the user's utterance contains Japanese, use Japanese for both the pre-tool acknowledgment and the final answer. diff --git a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/stream/StreamViewModel.kt b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/stream/StreamViewModel.kt index d4a70271..3a860707 100644 --- a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/stream/StreamViewModel.kt +++ b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/stream/StreamViewModel.kt @@ -99,7 +99,7 @@ class StreamViewModel( } private fun stopActiveVideoSource(preserveMode: Boolean) { - Log.d(TAG, "BGTEST stopActiveVideoSource called preserveMode=$preserveMode") + Log.d(TAG, "Stopping active video source preserveMode=$preserveMode") StreamingService.stop(getApplication()) @@ -133,42 +133,65 @@ class StreamViewModel( return } + if (streamSession != null) { + Log.d(TAG, "Ignoring startStream because a stream session already exists") + return + } + videoJob?.cancel() stateJob?.cancel() StreamingService.start(getApplication()) val streamSession = - Wearables.startStreamSession( - getApplication(), - deviceSelector, - StreamConfiguration(videoQuality = VideoQuality.MEDIUM, 24), - ).also { streamSession = it } + try { + Wearables.startStreamSession( + getApplication(), + deviceSelector, + StreamConfiguration(videoQuality = VideoQuality.MEDIUM, 24), + ).also { streamSession = it } + } catch (t: Throwable) { + Log.e(TAG, "Failed to start stream session", t) + StreamingService.stop(getApplication()) + _uiState.update { it.copy(streamSessionState = StreamSessionState.STOPPED) } + return + } - _uiState.update { it.copy(streamingMode = StreamingMode.GLASSES) } + _uiState.update { + it.copy( + streamingMode = StreamingMode.GLASSES, + streamSessionState = StreamSessionState.STARTING, + ) + } videoJob = viewModelScope.launch { - streamSession.videoStream.collect { frame -> -// Log.d( -// TAG, -// "BGTEST frame received w=${frame.width} h=${frame.height} t=${System.currentTimeMillis()}" -// ) - handleVideoFrame(frame) - } + streamSession.videoStream.collect { frame -> handleVideoFrame(frame) } } stateJob = viewModelScope.launch { - Log.d(TAG, "BGTEST stateJob launched") + Log.d(TAG, "Stream state collector launched") + var sawStartedState = false streamSession.state.collect { currentState -> - Log.d(TAG, "BGTEST stream state = $currentState") + Log.d(TAG, "Stream state = $currentState") val prevState = _uiState.value.streamSessionState _uiState.update { it.copy(streamSessionState = currentState) } - if (currentState != prevState && currentState == StreamSessionState.STOPPED) { - Log.d(TAG, "BGTEST state became STOPPED -> stopStream()") + if ( + currentState == StreamSessionState.STARTING || + currentState == StreamSessionState.STREAMING + ) { + sawStartedState = true + } + + if ( + sawStartedState && + currentState != prevState && + currentState == StreamSessionState.STOPPED + ) { + Log.d(TAG, "Stream state became STOPPED; stopping stream") stopStream() wearablesViewModel.navigateToDeviceSelection() } @@ -206,7 +229,7 @@ class StreamViewModel( } fun stopStream() { - Log.d(TAG, "BGTEST stopStream called") + Log.d(TAG, "Stopping stream") stopActiveVideoSource(preserveMode = false) } @@ -283,11 +306,6 @@ class StreamViewModel( } private fun handleVideoFrame(videoFrame: VideoFrame) { -// Log.d( -// TAG, -// "BGTEST handleVideoFrame entered w=${videoFrame.width} h=${videoFrame.height} t=${System.currentTimeMillis()}" -// ) - // VideoFrame contains raw I420 video data in a ByteBuffer val buffer = videoFrame.buffer val dataSize = buffer.remaining() diff --git a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/stream/StreamingService.kt b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/stream/StreamingService.kt index d0c066ff..9358e361 100644 --- a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/stream/StreamingService.kt +++ b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/stream/StreamingService.kt @@ -33,10 +33,15 @@ class StreamingService : Service() { private const val CHANNEL_NAME = "Camera Streaming" private const val NOTIFICATION_ID = 1001 private const val WAKELOCK_TAG = "VisionClaw::StreamingWakeLock" + private const val ACTION_START = "com.meta.wearable.dat.externalsampleapps.cameraaccess.stream.START" + private const val ACTION_STOP = "com.meta.wearable.dat.externalsampleapps.cameraaccess.stream.STOP" fun start(context: Context) { val intent = - Intent(context, StreamingService::class.java).apply { `package` = context.packageName } + Intent(context, StreamingService::class.java).apply { + `package` = context.packageName + action = ACTION_START + } if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.O) { context.startForegroundService(intent) } else { @@ -46,8 +51,16 @@ class StreamingService : Service() { fun stop(context: Context) { val intent = - Intent(context, StreamingService::class.java).apply { `package` = context.packageName } - context.stopService(intent) + Intent(context, StreamingService::class.java).apply { + `package` = context.packageName + action = ACTION_STOP + } + try { + context.startService(intent) + } catch (e: IllegalStateException) { + Log.w(TAG, "Unable to send stop command; stopping service directly", e) + context.stopService(intent) + } } } @@ -63,25 +76,18 @@ class StreamingService : Service() { } override fun onStartCommand(intent: Intent?, flags: Int, startId: Int): Int { - Log.d(TAG, "Service started") + Log.d(TAG, "Service command: ${intent?.action ?: ACTION_START}") - val notification = createNotification() - - if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.Q) { - startForeground( - NOTIFICATION_ID, - notification, - ServiceInfo.FOREGROUND_SERVICE_TYPE_CONNECTED_DEVICE or - ServiceInfo.FOREGROUND_SERVICE_TYPE_MICROPHONE, - ) - } else { - startForeground(NOTIFICATION_ID, notification) + if (intent?.action == ACTION_STOP) { + stopSelf(startId) + return START_NOT_STICKY } + startInForeground() acquireWakeLock() acquireWifiLock() - return START_STICKY + return START_NOT_STICKY } override fun onDestroy() { @@ -109,6 +115,21 @@ class StreamingService : Service() { } } + private fun startInForeground() { + val notification = createNotification() + + if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.Q) { + startForeground( + NOTIFICATION_ID, + notification, + ServiceInfo.FOREGROUND_SERVICE_TYPE_CONNECTED_DEVICE or + ServiceInfo.FOREGROUND_SERVICE_TYPE_MICROPHONE, + ) + } else { + startForeground(NOTIFICATION_ID, notification) + } + } + private fun createNotification(): Notification { val pendingIntent = PendingIntent.getActivity( @@ -174,4 +195,4 @@ class StreamingService : Service() { } wifiLock = null } -} \ No newline at end of file +} diff --git a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/ui/CameraAccessScaffold.kt b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/ui/CameraAccessScaffold.kt index 816f9d8f..efed6064 100644 --- a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/ui/CameraAccessScaffold.kt +++ b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/ui/CameraAccessScaffold.kt @@ -55,8 +55,6 @@ import androidx.compose.ui.Modifier import androidx.compose.ui.unit.dp import androidx.lifecycle.compose.collectAsStateWithLifecycle import androidx.lifecycle.viewmodel.compose.viewModel as composeViewModel -import com.meta.wearable.dat.core.types.Permission -import com.meta.wearable.dat.core.types.PermissionStatus import com.meta.wearable.dat.externalsampleapps.cameraaccess.BuildConfig import com.meta.wearable.dat.externalsampleapps.cameraaccess.gemini.GeminiSessionViewModel import com.meta.wearable.dat.externalsampleapps.cameraaccess.wearables.WearablesViewModel @@ -65,7 +63,6 @@ import com.meta.wearable.dat.externalsampleapps.cameraaccess.wearables.Wearables @Composable fun CameraAccessScaffold( viewModel: WearablesViewModel, - onRequestWearablesPermission: suspend (Permission) -> PermissionStatus, modifier: Modifier = Modifier, ) { val uiState by viewModel.uiState.collectAsStateWithLifecycle() @@ -104,7 +101,6 @@ fun CameraAccessScaffold( uiState.isRegistered -> NonStreamScreen( viewModel = viewModel, - onRequestWearablesPermission = onRequestWearablesPermission, ) else -> HomeScreen( diff --git a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/ui/NonStreamScreen.kt b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/ui/NonStreamScreen.kt index 394c5625..75580704 100644 --- a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/ui/NonStreamScreen.kt +++ b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/ui/NonStreamScreen.kt @@ -56,8 +56,6 @@ import androidx.compose.ui.text.font.FontWeight import androidx.compose.ui.text.style.TextAlign import androidx.compose.ui.unit.dp import androidx.lifecycle.compose.collectAsStateWithLifecycle -import com.meta.wearable.dat.core.types.Permission -import com.meta.wearable.dat.core.types.PermissionStatus import com.meta.wearable.dat.core.types.RegistrationState import com.meta.wearable.dat.externalsampleapps.cameraaccess.R import com.meta.wearable.dat.externalsampleapps.cameraaccess.wearables.WearablesViewModel @@ -67,7 +65,6 @@ import kotlinx.coroutines.launch @Composable fun NonStreamScreen( viewModel: WearablesViewModel, - onRequestWearablesPermission: suspend (Permission) -> PermissionStatus, modifier: Modifier = Modifier, ) { val uiState by viewModel.uiState.collectAsStateWithLifecycle() @@ -183,7 +180,7 @@ fun NonStreamScreen( // Start Streaming Button (glasses) SwitchButton( label = stringResource(R.string.stream_button_title), - onClick = { viewModel.navigateToStreaming(onRequestWearablesPermission) }, + onClick = { viewModel.navigateToStreaming() }, enabled = uiState.hasActiveDevice, ) diff --git a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/ui/StreamScreen.kt b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/ui/StreamScreen.kt index 2a0e67bb..ab8e3f90 100644 --- a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/ui/StreamScreen.kt +++ b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/ui/StreamScreen.kt @@ -219,8 +219,8 @@ fun StreamScreen( ) { IconButton(onClick = { val newEnabled = !videoStreamingEnabled + SettingsManager.videoStreamingEnabled = newEnabled videoStreamingEnabled = newEnabled - streamViewModel.setVideoStreamingEnabled(newEnabled, lifecycleOwner) }) { Icon( imageVector = if (videoStreamingEnabled) Icons.Default.Videocam else Icons.Default.VideocamOff, @@ -330,4 +330,4 @@ fun StreamScreen( ) } } -} \ No newline at end of file +} diff --git a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/wearables/WearablesViewModel.kt b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/wearables/WearablesViewModel.kt index f3bd2176..b4c409f7 100644 --- a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/wearables/WearablesViewModel.kt +++ b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/wearables/WearablesViewModel.kt @@ -24,10 +24,9 @@ import com.meta.wearable.dat.core.Wearables import com.meta.wearable.dat.core.selectors.AutoDeviceSelector import com.meta.wearable.dat.core.selectors.DeviceSelector import com.meta.wearable.dat.core.types.DeviceIdentifier -import com.meta.wearable.dat.core.types.Permission -import com.meta.wearable.dat.core.types.PermissionStatus import com.meta.wearable.dat.core.types.RegistrationState import com.meta.wearable.dat.mockdevice.MockDeviceKit +import com.meta.wearable.dat.externalsampleapps.cameraaccess.settings.SettingsManager import kotlinx.collections.immutable.toImmutableList import kotlinx.coroutines.Job import kotlinx.coroutines.flow.MutableStateFlow @@ -124,37 +123,16 @@ class WearablesViewModel(application: Application) : AndroidViewModel(applicatio Wearables.startUnregistration(activity) } - fun navigateToStreaming(onRequestWearablesPermission: suspend (Permission) -> PermissionStatus) { - viewModelScope.launch { - val permission = Permission.CAMERA // Camera permission is required for streaming - val result = Wearables.checkPermissionStatus(permission) - - // Handle the result - result.onFailure { error, _ -> - setRecentError("Permission check error: ${error.description}") - return@launch - } - - val permissionStatus = result.getOrNull() - if (permissionStatus == PermissionStatus.Granted) { - _uiState.update { it.copy(isStreaming = true) } - return@launch - } - - // Request permission - val requestedPermissionStatus = onRequestWearablesPermission(permission) - when (requestedPermissionStatus) { - PermissionStatus.Denied -> { - setRecentError("Permission denied") - } - PermissionStatus.Granted -> { - _uiState.update { it.copy(isStreaming = true) } - } - } + fun navigateToStreaming() { + if (_uiState.value.isStreaming) { + return } + SettingsManager.videoStreamingEnabled = true + _uiState.update { it.copy(isStreaming = true, isPhoneMode = false) } } fun navigateToPhoneMode() { + SettingsManager.videoStreamingEnabled = true _uiState.update { it.copy(isStreaming = true, isPhoneMode = true) } } From 8bdc5b5f9b2af2767407000b6ef49d3fa4eb2582 Mon Sep 17 00:00:00 2001 From: Ryo Suzuki Date: Fri, 5 Jun 2026 16:29:56 +0900 Subject: [PATCH 68/68] Stabilize Android audio routing settings --- .../cameraaccess/gemini/AudioManager.kt | 491 +++++++++++++++--- .../cameraaccess/gemini/GeminiLiveService.kt | 1 + .../cameraaccess/settings/SettingsManager.kt | 2 +- .../cameraaccess/ui/SettingsScreen.kt | 15 +- .../wearables/WearablesViewModel.kt | 3 - 5 files changed, 439 insertions(+), 73 deletions(-) diff --git a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/AudioManager.kt b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/AudioManager.kt index 283065e4..b97e94f4 100644 --- a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/AudioManager.kt +++ b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/AudioManager.kt @@ -52,6 +52,18 @@ class AudioManager(private val appContext: Context) { private var commDeviceSet = false private var scoStarted = false private var preferredBtDevice: AudioDeviceInfo? = null + private var preferredBtInputDevice: AudioDeviceInfo? = null + private var preferredBtOutputDevice: AudioDeviceInfo? = null + private var lastInputLevelLogMs = 0L + private var lastPlaybackLevelLogMs = 0L + private var silentInputLevelLogs = 0 + private var fellBackToBuiltInMic = false + + private data class BluetoothAudioRoute( + val communicationDevice: AudioDeviceInfo?, + val inputDevice: AudioDeviceInfo?, + val outputDevice: AudioDeviceInfo?, + ) /** * "Mic mute" without tearing down the whole Gemini session. @@ -76,48 +88,66 @@ class AudioManager(private val appContext: Context) { val sysAm = appContext.getSystemService(Context.AUDIO_SERVICE) as android.media.AudioManager val demoSpeakerMode = SettingsManager.demoSpeakerModeEnabled + var useBluetoothMediaOutputOnly = false if (demoSpeakerMode) { sysAm.mode = android.media.AudioManager.MODE_IN_COMMUNICATION commDeviceSet = false scoStarted = false preferredBtDevice = null + preferredBtInputDevice = null + preferredBtOutputDevice = null Log.d(TAG, "Demo speaker mode enabled -> use phone-style communication input without BT SCO") } else { - // ✅ BT 마이크가 있으면 그걸 우선 사용, 없으면 폰 마이크로 폴백 - preferredBtDevice = findBluetoothInputDeviceOrNull() - - if (preferredBtDevice != null) { - // 통화 모드로 전환 (SCO 입력 안정화에 도움) + val bluetoothRoute = findBluetoothAudioRoute(sysAm) + preferredBtDevice = bluetoothRoute.communicationDevice + preferredBtInputDevice = bluetoothRoute.inputDevice + preferredBtOutputDevice = bluetoothRoute.outputDevice + + if (bluetoothRoute.outputDevice != null && isBluetoothMediaOutput(bluetoothRoute.outputDevice)) { + useBluetoothMediaOutputOnly = true + preferredBtInputDevice = null + commDeviceSet = false + scoStarted = false + try { + sysAm.mode = android.media.AudioManager.MODE_NORMAL + } catch (t: Throwable) { + Log.w(TAG, "MODE_NORMAL for Bluetooth media output failed: ${t.message}") + } + Log.d( + TAG, + "Bluetooth media output detected -> use built-in mic with media output, " + + "output=${describeDevice(bluetoothRoute.outputDevice)}" + ) + } else if (bluetoothRoute.communicationDevice != null || bluetoothRoute.inputDevice != null) { sysAm.mode = android.media.AudioManager.MODE_IN_COMMUNICATION - // Android 12+ : communication device 선택 시도 (실패해도 폴백 가능) - if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.S) { + if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.S && bluetoothRoute.communicationDevice != null) { try { - commDeviceSet = sysAm.setCommunicationDevice(preferredBtDevice!!) - Log.d(TAG, "setCommunicationDevice(BT) = $commDeviceSet, dev=${preferredBtDevice?.productName}") + commDeviceSet = sysAm.setCommunicationDevice(bluetoothRoute.communicationDevice) + Log.d(TAG, "setCommunicationDevice(BT) = $commDeviceSet, dev=${describeDevice(bluetoothRoute.communicationDevice)}") } catch (t: Throwable) { commDeviceSet = false Log.w(TAG, "setCommunicationDevice failed: ${t.message}") } } - // 구형/일부 기기 fallback: SCO 시작 (BT 없으면 시작하지 않음) try { sysAm.startBluetoothSco() sysAm.isBluetoothScoOn = true scoStarted = true + waitForBluetoothSco(sysAm) Log.d(TAG, "Bluetooth SCO started") } catch (t: Throwable) { scoStarted = false Log.w(TAG, "startBluetoothSco failed: ${t.message}") } } else { - // ✅ BT가 없으면 강제 라우팅/모드 변경 안 함 (그냥 폰 마이크) commDeviceSet = false scoStarted = false Log.d(TAG, "No BT mic -> fallback to phone mic") } + logBluetoothRoute("selected", bluetoothRoute) } val bufferSize = AudioRecord.getMinBufferSize( @@ -126,23 +156,9 @@ class AudioManager(private val appContext: Context) { AudioFormat.ENCODING_PCM_16BIT ) - audioRecord = AudioRecord( - MediaRecorder.AudioSource.VOICE_COMMUNICATION, - GeminiConfig.INPUT_AUDIO_SAMPLE_RATE, - AudioFormat.CHANNEL_IN_MONO, - AudioFormat.ENCODING_PCM_16BIT, - bufferSize - ) - - val preferredInputDevice = if (demoSpeakerMode) findBuiltInMicOrNull() else preferredBtDevice - preferredInputDevice?.let { dev -> - try { - val ok = audioRecord?.setPreferredDevice(dev) == true - Log.d(TAG, "AudioRecord.setPreferredDevice ok=$ok dev=${dev.productName}") - } catch (t: Throwable) { - Log.w(TAG, "setPreferredDevice failed: ${t.message}") - } - } + val preferredInputDevice = + if (demoSpeakerMode || useBluetoothMediaOutputOnly) findBuiltInMicOrNull() else preferredBtInputDevice + audioRecord = buildAudioRecord(preferredInputDevice, bufferSize) val routed = audioRecord?.routedDevice Log.d(TAG, "AudioRecord routedDevice: type=${routed?.type} name=${routed?.productName}") @@ -151,35 +167,10 @@ class AudioManager(private val appContext: Context) { enableVoiceProcessing(audioRecord?.audioSessionId ?: 0) } - val newAudioTrack = AudioTrack.Builder() - .setAudioAttributes( - AudioAttributes.Builder() - .setUsage( - if (demoSpeakerMode) { - AudioAttributes.USAGE_MEDIA - } else { - AudioAttributes.USAGE_VOICE_COMMUNICATION - } - ) - .setContentType(AudioAttributes.CONTENT_TYPE_SPEECH) - .build() - ) - .setAudioFormat( - AudioFormat.Builder() - .setSampleRate(GeminiConfig.OUTPUT_AUDIO_SAMPLE_RATE) - .setChannelMask(AudioFormat.CHANNEL_OUT_MONO) - .setEncoding(AudioFormat.ENCODING_PCM_16BIT) - .build() - ) - .setTransferMode(AudioTrack.MODE_STREAM) - .setBufferSizeInBytes( - AudioTrack.getMinBufferSize( - GeminiConfig.OUTPUT_AUDIO_SAMPLE_RATE, - AudioFormat.CHANNEL_OUT_MONO, - AudioFormat.ENCODING_PCM_16BIT - ) * 2 - ) - .build() + val newAudioTrack = buildAudioTrack( + useMediaOutput = demoSpeakerMode || useBluetoothMediaOutputOnly, + preferredOutputDevice = if (demoSpeakerMode) null else preferredBtOutputDevice, + ) audioRecord?.startRecording() synchronized(playbackLock) { @@ -196,6 +187,8 @@ class AudioManager(private val appContext: Context) { synchronized(accumulateLock) { accumulatedData.reset() } + silentInputLevelLogs = 0 + fellBackToBuiltInMic = false captureThread = Thread( { @@ -204,6 +197,8 @@ class AudioManager(private val appContext: Context) { while (isCapturing) { val read = audioRecord?.read(buffer, 0, buffer.size) ?: break if (read > 0) { + logInputLevelIfNeeded(buffer, read) + if (!micEnabled) { // Mic muted: discard data and clear any partial buffer. synchronized(accumulateLock) { @@ -230,24 +225,325 @@ class AudioManager(private val appContext: Context) { "audio-capture" ).also { it.start() } - Log.d(TAG, "Audio capture started (16kHz mono PCM16, demoSpeakerMode=$demoSpeakerMode)") + Log.d( + TAG, + "Audio capture started (16kHz mono PCM16, demoSpeakerMode=$demoSpeakerMode, " + + "useBluetoothMediaOutputOnly=$useBluetoothMediaOutputOnly)" + ) } - private fun findBluetoothInputDeviceOrNull(): AudioDeviceInfo? { + private fun logInputLevelIfNeeded(buffer: ByteArray, byteCount: Int) { + val now = System.currentTimeMillis() + if (now - lastInputLevelLogMs < 1000) return + lastInputLevelLogMs = now + + var sumSquares = 0.0 + var peak = 0 + var i = 0 + while (i + 1 < byteCount) { + val sample = ((buffer[i + 1].toInt() shl 8) or (buffer[i].toInt() and 0xff)).toShort().toInt() + val abs = kotlin.math.abs(sample) + if (abs > peak) peak = abs + sumSquares += sample.toDouble() * sample.toDouble() + i += 2 + } + val samples = byteCount / 2 + if (samples == 0) return + + val rms = kotlin.math.sqrt(sumSquares / samples) + Log.d( + TAG, + "Input level rms=${rms.toInt()} peak=$peak device=${describeDevice(audioRecord?.routedDevice)}" + ) + + if (preferredBtInputDevice != null && !fellBackToBuiltInMic && rms < 1.0 && peak == 0) { + silentInputLevelLogs++ + if (silentInputLevelLogs >= 3) { + switchToBuiltInMicAfterSilentBluetooth() + } + } else { + silentInputLevelLogs = 0 + } + } + + private fun switchToBuiltInMicAfterSilentBluetooth() { + val builtInMic = findBuiltInMicOrNull() + if (builtInMic == null) { + Log.w(TAG, "Bluetooth input is silent, but no built-in mic fallback was found") + return + } + + try { + leaveBluetoothCommunicationRoute() + val ok = rebuildAudioRecord(builtInMic) + fellBackToBuiltInMic = true + silentInputLevelLogs = 0 + Log.w( + TAG, + "Bluetooth input stayed silent; rebuilt capture for built-in mic " + + "preferredOk=$ok dev=${describeDevice(builtInMic)} routed=${describeDevice(audioRecord?.routedDevice)}" + ) + if (ok) { + switchPlaybackToMediaBluetoothAfterMicFallback() + } + } catch (t: Throwable) { + Log.w(TAG, "Built-in mic fallback failed: ${t.message}") + } + } + + private fun switchPlaybackToMediaBluetoothAfterMicFallback() { + leaveBluetoothCommunicationRoute() + + try { + Thread.sleep(150) + val mediaOutput = findBluetoothMediaOutputDeviceOrNull() + preferredBtOutputDevice = mediaOutput ?: preferredBtOutputDevice + + val newTrack = buildAudioTrack( + useMediaOutput = true, + preferredOutputDevice = preferredBtOutputDevice, + ) + + synchronized(playbackLock) { + playbackGeneration++ + val oldTrack = audioTrack + audioTrack = newTrack + try { + newTrack.play() + } catch (t: Throwable) { + Log.w(TAG, "Fallback AudioTrack.play failed: ${t.message}") + } + try { + oldTrack?.stop() + } catch (_: Throwable) { + } + try { + oldTrack?.release() + } catch (_: Throwable) { + } + } + Log.w(TAG, "Switched playback to media Bluetooth output dev=${describeDevice(preferredBtOutputDevice)}") + } catch (t: Throwable) { + Log.w(TAG, "Media Bluetooth playback fallback failed: ${t.message}") + } + } + + private fun leaveBluetoothCommunicationRoute() { val sysAm = appContext.getSystemService(Context.AUDIO_SERVICE) as android.media.AudioManager - // 입력 디바이스 목록에서 BT 계열 우선 탐색 + try { + if (scoStarted) { + sysAm.stopBluetoothSco() + sysAm.isBluetoothScoOn = false + scoStarted = false + } + } catch (t: Throwable) { + Log.w(TAG, "stopBluetoothSco during fallback failed: ${t.message}") + } + + if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.S && commDeviceSet) { + try { + sysAm.clearCommunicationDevice() + commDeviceSet = false + } catch (t: Throwable) { + Log.w(TAG, "clearCommunicationDevice during fallback failed: ${t.message}") + } + } + + try { + sysAm.mode = android.media.AudioManager.MODE_NORMAL + } catch (t: Throwable) { + Log.w(TAG, "MODE_NORMAL during fallback failed: ${t.message}") + } + } + + private fun buildAudioRecord( + preferredInputDevice: AudioDeviceInfo?, + bufferSize: Int, + ): AudioRecord { + val record = AudioRecord( + MediaRecorder.AudioSource.VOICE_COMMUNICATION, + GeminiConfig.INPUT_AUDIO_SAMPLE_RATE, + AudioFormat.CHANNEL_IN_MONO, + AudioFormat.ENCODING_PCM_16BIT, + bufferSize + ) + + preferredInputDevice?.let { dev -> + try { + val ok = record.setPreferredDevice(dev) + Log.d(TAG, "AudioRecord.setPreferredDevice ok=$ok dev=${describeDevice(dev)}") + } catch (t: Throwable) { + Log.w(TAG, "setPreferredDevice failed: ${t.message}") + } + } + + return record + } + + private fun rebuildAudioRecord(preferredInputDevice: AudioDeviceInfo): Boolean { + val bufferSize = AudioRecord.getMinBufferSize( + GeminiConfig.INPUT_AUDIO_SAMPLE_RATE, + AudioFormat.CHANNEL_IN_MONO, + AudioFormat.ENCODING_PCM_16BIT + ) + + val oldRecord = audioRecord + audioRecord = null + try { + oldRecord?.stop() + } catch (_: Throwable) { + } + try { + oldRecord?.release() + } catch (_: Throwable) { + } + + val newRecord = buildAudioRecord(preferredInputDevice, bufferSize) + audioRecord = newRecord + newRecord.startRecording() + + synchronized(accumulateLock) { + accumulatedData.reset() + } + + return newRecord.preferredDevice?.id == preferredInputDevice.id + } + + private fun buildAudioTrack( + useMediaOutput: Boolean, + preferredOutputDevice: AudioDeviceInfo?, + ): AudioTrack { + val track = AudioTrack.Builder() + .setAudioAttributes( + AudioAttributes.Builder() + .setUsage( + if (useMediaOutput) { + AudioAttributes.USAGE_MEDIA + } else { + AudioAttributes.USAGE_VOICE_COMMUNICATION + } + ) + .setContentType(AudioAttributes.CONTENT_TYPE_SPEECH) + .build() + ) + .setAudioFormat( + AudioFormat.Builder() + .setSampleRate(GeminiConfig.OUTPUT_AUDIO_SAMPLE_RATE) + .setChannelMask(AudioFormat.CHANNEL_OUT_MONO) + .setEncoding(AudioFormat.ENCODING_PCM_16BIT) + .build() + ) + .setTransferMode(AudioTrack.MODE_STREAM) + .setBufferSizeInBytes( + AudioTrack.getMinBufferSize( + GeminiConfig.OUTPUT_AUDIO_SAMPLE_RATE, + AudioFormat.CHANNEL_OUT_MONO, + AudioFormat.ENCODING_PCM_16BIT + ) * 2 + ) + .build() + + preferredOutputDevice?.let { dev -> + try { + val ok = track.setPreferredDevice(dev) + Log.d(TAG, "AudioTrack.setPreferredDevice ok=$ok dev=${describeDevice(dev)} media=$useMediaOutput") + } catch (t: Throwable) { + Log.w(TAG, "AudioTrack.setPreferredDevice failed: ${t.message}") + } + } + + return track + } + + private fun waitForBluetoothSco(sysAm: android.media.AudioManager) { + repeat(12) { + if (sysAm.isBluetoothScoOn) return + Thread.sleep(100) + } + } + + private fun findBluetoothAudioRoute(sysAm: android.media.AudioManager): BluetoothAudioRoute { val inputs = sysAm.getDevices(android.media.AudioManager.GET_DEVICES_INPUTS) + val outputs = sysAm.getDevices(android.media.AudioManager.GET_DEVICES_OUTPUTS) - // 1순위: SCO (통화용) - inputs.firstOrNull { it.type == AudioDeviceInfo.TYPE_BLUETOOTH_SCO }?.let { return it } + val communicationDevice = + if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.S) { + sysAm.availableCommunicationDevices.firstOrNull { isBluetoothDevice(it) } + } else { + null + } + val inputDevice = + inputs.firstOrNull { it.type == AudioDeviceInfo.TYPE_BLUETOOTH_SCO } + ?: inputs.firstOrNull { isBluetoothDevice(it) } + ?: communicationDevice?.takeIf { it.isSource } + val outputDevice = + outputs.firstOrNull { it.type == AudioDeviceInfo.TYPE_BLUETOOTH_A2DP } + ?: outputs.firstOrNull { + Build.VERSION.SDK_INT >= Build.VERSION_CODES.S && + (it.type == AudioDeviceInfo.TYPE_BLE_HEADSET || + it.type == AudioDeviceInfo.TYPE_BLE_SPEAKER) + } + ?: outputs.firstOrNull { it.type == AudioDeviceInfo.TYPE_BLUETOOTH_SCO } + ?: communicationDevice?.takeIf { it.isSink } - // 2순위: BLE Headset (기기/OS에 따라 여기로 잡히는 경우가 있음) + logAudioDevices(sysAm, inputs, outputs) + + return BluetoothAudioRoute( + communicationDevice = communicationDevice, + inputDevice = inputDevice, + outputDevice = outputDevice, + ) + } + + private fun isBluetoothDevice(device: AudioDeviceInfo): Boolean { + return when (device.type) { + AudioDeviceInfo.TYPE_BLUETOOTH_SCO, + AudioDeviceInfo.TYPE_BLUETOOTH_A2DP -> true + else -> + Build.VERSION.SDK_INT >= Build.VERSION_CODES.S && + (device.type == AudioDeviceInfo.TYPE_BLE_HEADSET || + device.type == AudioDeviceInfo.TYPE_BLE_SPEAKER) + } + } + + private fun isBluetoothMediaOutput(device: AudioDeviceInfo): Boolean { + return when (device.type) { + AudioDeviceInfo.TYPE_BLUETOOTH_A2DP -> true + else -> + Build.VERSION.SDK_INT >= Build.VERSION_CODES.S && + (device.type == AudioDeviceInfo.TYPE_BLE_HEADSET || + device.type == AudioDeviceInfo.TYPE_BLE_SPEAKER) + } + } + + private fun logAudioDevices( + sysAm: android.media.AudioManager, + inputs: Array, + outputs: Array, + ) { + Log.d(TAG, "Audio inputs: ${inputs.joinToString { describeDevice(it) }}") + Log.d(TAG, "Audio outputs: ${outputs.joinToString { describeDevice(it) }}") if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.S) { - inputs.firstOrNull { it.type == AudioDeviceInfo.TYPE_BLE_HEADSET }?.let { return it } + Log.d( + TAG, + "Communication devices: ${sysAm.availableCommunicationDevices.joinToString { describeDevice(it) }}" + ) } + } - return null + private fun logBluetoothRoute(label: String, route: BluetoothAudioRoute) { + Log.d( + TAG, + "Bluetooth route $label: communication=${describeDevice(route.communicationDevice)}, " + + "input=${describeDevice(route.inputDevice)}, output=${describeDevice(route.outputDevice)}" + ) + } + + private fun describeDevice(device: AudioDeviceInfo?): String { + return device?.let { + "type=${it.type}, name=${it.productName}, source=${it.isSource}, sink=${it.isSink}" + } ?: "none" } private fun findBuiltInMicOrNull(): AudioDeviceInfo? { @@ -262,6 +558,18 @@ class AudioManager(private val appContext: Context) { return outputs.firstOrNull { it.type == AudioDeviceInfo.TYPE_BUILTIN_SPEAKER } } + private fun findBluetoothMediaOutputDeviceOrNull(): AudioDeviceInfo? { + val sysAm = appContext.getSystemService(Context.AUDIO_SERVICE) as android.media.AudioManager + val outputs = sysAm.getDevices(android.media.AudioManager.GET_DEVICES_OUTPUTS) + return outputs.firstOrNull { it.type == AudioDeviceInfo.TYPE_BLUETOOTH_A2DP } + ?: outputs.firstOrNull { + Build.VERSION.SDK_INT >= Build.VERSION_CODES.S && + (it.type == AudioDeviceInfo.TYPE_BLE_HEADSET || + it.type == AudioDeviceInfo.TYPE_BLE_SPEAKER) + } + ?: outputs.firstOrNull { it.type == AudioDeviceInfo.TYPE_BLUETOOTH_SCO } + } + private fun enableVoiceProcessing(audioSessionId: Int) { if (audioSessionId == 0) return @@ -313,9 +621,12 @@ class AudioManager(private val appContext: Context) { if (!isCapturing || generation != playbackGeneration) return@synchronized val track = audioTrack ?: return@synchronized try { + ensurePreferredPlaybackDevice(track) val written = track.write(chunk, 0, chunk.size) if (written < 0) { Log.w(TAG, "AudioTrack.write failed: $written") + } else { + logPlaybackLevelIfNeeded(track, chunk, written) } } catch (t: Throwable) { Log.w(TAG, "AudioTrack.write threw: ${t.message}") @@ -327,6 +638,53 @@ class AudioManager(private val appContext: Context) { } } + private fun ensurePreferredPlaybackDevice(track: AudioTrack) { + val routed = track.routedDevice + if (routed != null && isBluetoothMediaOutput(routed)) return + + val mediaOutput = findBluetoothMediaOutputDeviceOrNull() ?: preferredBtOutputDevice ?: return + preferredBtOutputDevice = mediaOutput + + try { + val ok = track.setPreferredDevice(mediaOutput) + Log.w( + TAG, + "Reassert playback Bluetooth output ok=$ok dev=${describeDevice(mediaOutput)} " + + "previous=${describeDevice(routed)}" + ) + } catch (t: Throwable) { + Log.w(TAG, "Reassert playback Bluetooth output threw: ${t.message}") + } + } + + private fun logPlaybackLevelIfNeeded(track: AudioTrack, buffer: ByteArray, byteCount: Int) { + val now = System.currentTimeMillis() + if (now - lastPlaybackLevelLogMs < 1000) return + lastPlaybackLevelLogMs = now + + val level = computePcmLevel(buffer, byteCount) + Log.d( + TAG, + "Playback write bytes=$byteCount rms=${level.first} peak=${level.second} device=${describeDevice(track.routedDevice)}" + ) + } + + private fun computePcmLevel(buffer: ByteArray, byteCount: Int): Pair { + var sumSquares = 0.0 + var peak = 0 + var i = 0 + while (i + 1 < byteCount) { + val sample = ((buffer[i + 1].toInt() shl 8) or (buffer[i].toInt() and 0xff)).toShort().toInt() + val abs = kotlin.math.abs(sample) + if (abs > peak) peak = abs + sumSquares += sample.toDouble() * sample.toDouble() + i += 2 + } + val samples = byteCount / 2 + if (samples == 0) return 0 to 0 + return kotlin.math.sqrt(sumSquares / samples).toInt() to peak + } + fun stopPlayback() { val generation = playbackGeneration try { @@ -407,8 +765,9 @@ class AudioManager(private val appContext: Context) { } preferredBtDevice = null + preferredBtInputDevice = null + preferredBtOutputDevice = null - // 필요하면 모드 원복 (기기 따라 유지해도 되지만 안전하게 NORMAL 추천) sysAm.mode = android.media.AudioManager.MODE_NORMAL Log.d(TAG, "Audio capture stopped") diff --git a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/GeminiLiveService.kt b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/GeminiLiveService.kt index 28e56bca..81a7e7f0 100644 --- a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/GeminiLiveService.kt +++ b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/gemini/GeminiLiveService.kt @@ -358,6 +358,7 @@ class GeminiLiveService { val base64Data = inlineData.optString("data", "") if (base64Data.isNotEmpty()) { val audioData = Base64.decode(base64Data, Base64.DEFAULT) + Log.d(TAG, "Audio received bytes=${audioData.size}") if (!_isModelSpeaking.value) { _isModelSpeaking.value = true if (lastUserSpeechEnd > 0 && !responseLatencyLogged) { diff --git a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/settings/SettingsManager.kt b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/settings/SettingsManager.kt index 9429ff6c..7c8261c5 100644 --- a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/settings/SettingsManager.kt +++ b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/settings/SettingsManager.kt @@ -45,7 +45,7 @@ object SettingsManager { set(value) = prefs.edit().putString("webrtcSignalingURL", value).apply() var videoStreamingEnabled: Boolean - get() = prefs.getBoolean("videoStreamingEnabled", true) + get() = prefs.getBoolean("videoStreamingEnabled", false) set(value) = prefs.edit().putBoolean("videoStreamingEnabled", value).apply() var proactiveNotificationsEnabled: Boolean diff --git a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/ui/SettingsScreen.kt b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/ui/SettingsScreen.kt index fb2a44b5..1c58add3 100644 --- a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/ui/SettingsScreen.kt +++ b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/ui/SettingsScreen.kt @@ -134,7 +134,10 @@ fun SettingsScreen( Spacer(modifier = Modifier.width(12.dp)) Switch( checked = videoStreamingEnabled, - onCheckedChange = { videoStreamingEnabled = it }, + onCheckedChange = { + videoStreamingEnabled = it + SettingsManager.videoStreamingEnabled = it + }, ) } @@ -156,7 +159,10 @@ fun SettingsScreen( Spacer(modifier = Modifier.width(12.dp)) Switch( checked = demoSpeakerModeEnabled, - onCheckedChange = { demoSpeakerModeEnabled = it }, + onCheckedChange = { + demoSpeakerModeEnabled = it + SettingsManager.demoSpeakerModeEnabled = it + }, ) } @@ -235,7 +241,10 @@ fun SettingsScreen( Spacer(modifier = Modifier.width(12.dp)) Switch( checked = proactiveNotificationsEnabled, - onCheckedChange = { proactiveNotificationsEnabled = it }, + onCheckedChange = { + proactiveNotificationsEnabled = it + SettingsManager.proactiveNotificationsEnabled = it + }, ) } diff --git a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/wearables/WearablesViewModel.kt b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/wearables/WearablesViewModel.kt index b4c409f7..0feb7772 100644 --- a/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/wearables/WearablesViewModel.kt +++ b/samples/CameraAccessAndroid/app/src/main/java/com/meta/wearable/dat/externalsampleapps/cameraaccess/wearables/WearablesViewModel.kt @@ -26,7 +26,6 @@ import com.meta.wearable.dat.core.selectors.DeviceSelector import com.meta.wearable.dat.core.types.DeviceIdentifier import com.meta.wearable.dat.core.types.RegistrationState import com.meta.wearable.dat.mockdevice.MockDeviceKit -import com.meta.wearable.dat.externalsampleapps.cameraaccess.settings.SettingsManager import kotlinx.collections.immutable.toImmutableList import kotlinx.coroutines.Job import kotlinx.coroutines.flow.MutableStateFlow @@ -127,12 +126,10 @@ class WearablesViewModel(application: Application) : AndroidViewModel(applicatio if (_uiState.value.isStreaming) { return } - SettingsManager.videoStreamingEnabled = true _uiState.update { it.copy(isStreaming = true, isPhoneMode = false) } } fun navigateToPhoneMode() { - SettingsManager.videoStreamingEnabled = true _uiState.update { it.copy(isStreaming = true, isPhoneMode = true) } }