DeepFundAI
diff --git a/‎assets/entitlements.mac.plist‎
Lines changed: 1 addition & 1 deletion b/‎assets/entitlements.mac.plist‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎electron/main/index.ts‎
Lines changed: 2 additions & 2 deletions b/‎electron/main/index.ts‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎electron/main/services/task-window-manager.ts‎
Lines changed: 2 additions & 2 deletions b/‎electron/main/services/task-window-manager.ts‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎electron/main/ui/window.ts‎
Lines changed: 27 additions & 2 deletions b/‎electron/main/ui/window.ts‎
Lines changed: 27 additions & 2 deletions
diff --git a/‎package.json‎
Lines changed: 3 additions & 3 deletions b/‎package.json‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎pnpm-lock.yaml‎
Lines changed: 11 additions & 11 deletions b/‎pnpm-lock.yaml‎
Lines changed: 11 additions & 11 deletions
diff --git a/‎public/models/vosk-model-small-en-us-0.15.zip‎
39.3 MB b/‎public/models/vosk-model-small-en-us-0.15.zip‎
39.3 MB
diff --git a/‎src/components/chat/ChatInputArea.tsx‎
Lines changed: 32 additions & 1 deletion b/‎src/components/chat/ChatInputArea.tsx‎
Lines changed: 32 additions & 1 deletion
diff --git a/‎src/hooks/useVoiceInput.ts‎
Lines changed: 155 additions & 0 deletions b/‎src/hooks/useVoiceInput.ts‎
Lines changed: 155 additions & 0 deletions
diff --git a/‎src/icons/deepfundai-icons.tsx‎
Lines changed: 1 addition & 1 deletion b/‎src/icons/deepfundai-icons.tsx‎
Lines changed: 1 addition & 1 deletion
@@ -27,7 +27,7 @@
     <true/>
 
     <!-- Allows access to microphone for speech recognition -->
-    <key>com.apple.security.device.microphone</key>
+    <key>com.apple.security.device.audio-input</key>
     <true/>
 
     <!-- Allows access to camera -->
 
@@ -26,7 +26,7 @@ import { EkoService } from "./services/eko-service";
 import { ServerManager } from "./services/server-manager";
 import { MainWindowManager } from "./windows/main-window";
 import { taskScheduler } from "./services/task-scheduler";
-import { windowContextManager, type WindowContext } from "./services/window-context-manager";
+import { windowContextManager } from "./services/window-context-manager";
 import { cwd } from "node:process";
 import { registerAllIpcHandlers } from "./ipc";
 
@@ -145,7 +145,7 @@ function setupMainWindowCloseHandler(window: BrowserWindow, service: EkoService)
         const allTaskIds = service['eko']?.getAllTaskId() || [];
         await service.abortAllTasks();
 
-        allTaskIds.forEach(taskId => {
+        allTaskIds.forEach((taskId: string) => {
           window.webContents.send('task-aborted-by-system', {
             taskId,
             reason: 'User closed window, task terminated',
 
@@ -41,7 +41,7 @@ export class TaskWindowManager {
       throw new Error(`Maximum concurrent tasks reached (${this.maxConcurrentTasks})`);
     }
 
-    const taskWindow = createWindow(`http://localhost:5173/main?taskId=${taskId}&executionId=${executionId}`)
+    const taskWindow = await createWindow(`http://localhost:5173/main?taskId=${taskId}&executionId=${executionId}`)
     const detailView = createView(`https://www.google.com`, "view", '2');
 
     taskWindow.contentView.addChildView(detailView);
@@ -109,7 +109,7 @@ export class TaskWindowManager {
           const allTaskIds = ekoService['eko']?.getAllTaskId() || [];
           await ekoService.abortAllTasks();
 
-          allTaskIds.forEach(tid => {
+          allTaskIds.forEach((tid: string) => {
             taskWindow.webContents.send('task-aborted-by-system', {
               taskId: tid,
               reason: 'User closed scheduled task window, task terminated',
 
@@ -1,9 +1,22 @@
-import { app, BrowserWindow } from 'electron';
+import { app, BrowserWindow, systemPreferences } from 'electron';
 import path from 'node:path';
 import { isDev } from '../utils/constants';
 import { store } from '../utils/store';
 
-export function createWindow(rendererURL: string) {
+async function setupMacPermissions() {
+  // macOS requires explicit microphone permission request
+  if (process.platform === 'darwin') {
+    const status = systemPreferences.getMediaAccessStatus('microphone');
+    console.log('[Window] Current microphone permission status:', status);
+
+    if (status !== 'granted') {
+      const result = await systemPreferences.askForMediaAccess('microphone');
+      console.log('[Window] Permission request result:', result);
+    }
+  }
+}
+
+export async function createWindow(rendererURL: string) {
   const preloadPath = isDev
     ? path.join(app.getAppPath(), '..', 'preload', 'index.cjs')
     : path.join(app.getAppPath(), 'dist', 'electron', 'preload', 'index.cjs');
@@ -23,6 +36,18 @@ export function createWindow(rendererURL: string) {
     },
   });
 
+  win.webContents.session.setPermissionRequestHandler((_webContents, permission, callback) => {
+    // Allow media permissions (includes microphone and camera)
+    if (permission === 'media') {
+      console.log(`[Window] Granting ${permission} permission`);
+      setupMacPermissions();
+      callback(true);
+    } else {
+      console.log(`[Window] Denying ${permission} permission`);
+      callback(false);
+    }
+  });
+
   win.loadURL(rendererURL).catch(err => {
     console.error('[Window] Failed to load URL:', err);
   });
 
@@ -1,6 +1,6 @@
 {
   "name": "ai-browser",
-  "version": "0.0.11",
+  "version": "0.0.12",
   "description": "DeepFundAI Browser - AI-Powered Intelligent Browser",
   "author": "Shuai Liu <lsustc@mail.ustc.edu.cn>",
   "license": "MIT",
@@ -29,8 +29,8 @@
   "dependencies": {
     "@ant-design/cssinjs": "^1.23.0",
     "@ant-design/icons": "5.x",
-    "@jarvis-agent/core": "^0.1.4",
-    "@jarvis-agent/electron": "^0.1.9",
+    "@jarvis-agent/core": "^0.1.5",
+    "@jarvis-agent/electron": "^0.1.10",
     "@jest/globals": "^30.1.2",
     "@react-spring/web": "^10.0.1",
     "antd": "^5.26.5",
 
@@ -1,7 +1,10 @@
 import React from 'react';
-import { Input, Button } from 'antd';
+import { Input, Button, App } from 'antd';
+import { AudioOutlined, AudioMutedOutlined } from '@ant-design/icons';
 import { SendMessage, CancleTask } from '@/icons/deepfundai-icons';
 import { useTranslation } from 'react-i18next';
+import { useVoiceInput } from '@/hooks/useVoiceInput';
+import { logger } from '@/utils/logger';
 
 interface ChatInputAreaProps {
   query: string;
@@ -23,6 +26,7 @@ export const ChatInputArea: React.FC<ChatInputAreaProps> = ({
   onCancel,
 }) => {
   const { t } = useTranslation('main');
+  const { message: antdMessage } = App.useApp();
 
   const handleKeyDown = (e: React.KeyboardEvent<HTMLTextAreaElement>) => {
     if (e.key === 'Enter' && !e.shiftKey) {
@@ -31,6 +35,18 @@ export const ChatInputArea: React.FC<ChatInputAreaProps> = ({
     }
   };
 
+  // Voice input hook
+  const { isRecording, toggleRecording } = useVoiceInput({
+    onTextRecognized: (text) => {
+      // Append recognized text to input
+      onQueryChange(query ? `${query} ${text}` : text);
+    },
+    onError: (error) => {
+      antdMessage.error(t('voice_input_error'));
+      logger.error('Voice input error', error, 'ChatInputArea');
+    },
+  });
+
   return (
     <div className='h-30 gradient-border relative'>
       <Input.TextArea
@@ -48,6 +64,21 @@ export const ChatInputArea: React.FC<ChatInputAreaProps> = ({
         }}
       />
       <div className='absolute bottom-4 right-4 flex items-center gap-2'>
+        {!isCurrentTaskRunning && (
+          <Button
+            type='text'
+            onClick={(e) => {
+              e.preventDefault();
+              e.stopPropagation();
+              toggleRecording();
+            }}
+            disabled={isCurrentTaskRunning}
+            className='!p-0 !w-8 !h-8 !min-w-0 flex items-center justify-center text-lg'
+            title={isRecording ? t('voice_input_stop') : t('voice_input_start')}
+          >
+            {isRecording ? <AudioOutlined /> : <AudioMutedOutlined />}
+          </Button>
+        )}
         {isCurrentTaskRunning ? (
           <Button
             type='text'
 
@@ -0,0 +1,155 @@
+import { useState, useRef, useCallback, useEffect } from 'react';
+import {
+  initSpeechRecognitionWithProvider,
+  startSpeechRecognition,
+  stopSpeechRecognition,
+  cleanupSpeechRecognition
+} from '@/services/speech-recognition';
+import { SpeechRecognitionConfig } from '@/models/speech-recognition/speech-recognition-base';
+import { logger } from '@/utils/logger';
+import { useLanguageStore } from '@/stores/languageStore';
+
+export type VoiceInputStatus = 'idle' | 'recording' | 'error';
+
+interface UseVoiceInputOptions {
+  onTextRecognized?: (text: string) => void;
+  onError?: (error: string) => void;
+}
+
+/**
+ * Hook for voice input functionality
+ * Manages speech recognition state and provides control methods
+ */
+export const useVoiceInput = ({ onTextRecognized, onError }: UseVoiceInputOptions = {}) => {
+  const [status, setStatus] = useState<VoiceInputStatus>('idle');
+  const isInitialized = useRef(false);
+  const isRecordingRef = useRef(false); // Track actual recording state
+  const onTextRecognizedRef = useRef(onTextRecognized);
+  const onErrorRef = useRef(onError);
+  const { language } = useLanguageStore();
+
+  // Update refs when callbacks change
+  useEffect(() => {
+    onTextRecognizedRef.current = onTextRecognized;
+    onErrorRef.current = onError;
+  }, [onTextRecognized, onError]);
+
+  // Initialize speech recognition and reinitialize when language changes
+  useEffect(() => {
+    let mounted = true;
+    let initializationStarted = false;
+
+    // Async initialization
+    (async () => {
+      try {
+        // Cleanup previous instance and wait for it to complete
+        await cleanupSpeechRecognition();
+
+        // Check if still mounted after cleanup
+        if (!mounted) return;
+
+        // Select model based on current language
+        const modelType = language === 'zh-CN' ? 'small-cn' : 'small-en';
+
+        const config: SpeechRecognitionConfig = {
+          provider: 'vosk',
+          modelType
+        };
+
+        initializationStarted = true;
+        await initSpeechRecognitionWithProvider(config, (text: string) => {
+          if (onTextRecognizedRef.current) {
+            onTextRecognizedRef.current(text);
+          }
+        });
+
+        if (mounted) {
+          isInitialized.current = true;
+        }
+      } catch (error) {
+        logger.error('Failed to initialize speech recognition', error, 'VoiceInput');
+        if (mounted && onErrorRef.current) {
+          onErrorRef.current('Speech recognition initialization failed');
+        }
+      }
+    })();
+
+    // Cleanup on unmount or language change
+    return () => {
+      mounted = false;
+      isRecordingRef.current = false;
+      if (initializationStarted) {
+        cleanupSpeechRecognition();
+      }
+    };
+  }, [language]); // Reinitialize when language changes
+
+  /**
+   * Start voice recording
+   */
+  const startRecording = useCallback(async () => {
+    if (isRecordingRef.current) return;
+
+    if (!isInitialized.current) {
+      const errorMsg = 'Speech recognition not initialized';
+      logger.error(errorMsg, undefined, 'VoiceInput');
+      if (onErrorRef.current) {
+        onErrorRef.current(errorMsg);
+      }
+      setStatus('error');
+      return;
+    }
+
+    try {
+      isRecordingRef.current = true;
+      setStatus('recording');
+      await startSpeechRecognition();
+    } catch (error) {
+      logger.error('Failed to start recording', error, 'VoiceInput');
+      isRecordingRef.current = false;
+      setStatus('idle');
+      if (onErrorRef.current) {
+        onErrorRef.current('Failed to start voice recording');
+      }
+    }
+  }, []);
+
+  /**
+   * Stop voice recording
+   */
+  const stopRecording = useCallback(async () => {
+    if (!isRecordingRef.current) return;
+
+    try {
+      isRecordingRef.current = false;
+      await stopSpeechRecognition();
+      setStatus('idle');
+    } catch (error) {
+      logger.error('Failed to stop recording', error, 'VoiceInput');
+      isRecordingRef.current = false;
+      setStatus('idle');
+      if (onErrorRef.current) {
+        onErrorRef.current('Failed to stop voice recording');
+      }
+    }
+  }, []);
+
+  /**
+   * Toggle recording state
+   */
+  const toggleRecording = useCallback(async () => {
+    if (isRecordingRef.current) {
+      await stopRecording();
+    } else {
+      await startRecording();
+    }
+  }, [startRecording, stopRecording]);
+
+  return {
+    status,
+    isRecording: status === 'recording',
+    startRecording,
+    stopRecording,
+    toggleRecording,
+  };
+};
@@ -215,7 +215,7 @@ const StepUpDownSvg = () => (
 
 // Send message
 const SendMessageSvg = () => (
-<svg xmlns="http://www.w3.org/2000/svg" width="24px" height="24px" viewBox="0 0 24 24" version="1.1">
+<svg xmlns="http://www.w3.org/2000/svg" width="20px" height="20px" viewBox="0 0 24 24" version="1.1">
     <g id="version-8-26" stroke="none" stroke-width="1" fill="none" fill-rule="evenodd">
         <g id="group-50backup-4" transform="translate(-2.000000, -2.000000)">
             <g id="group-48" transform="translate(2.000000, 2.000000)">