Skip to content

Commit 4091520

Browse files
authored
Merge pull request #22 from DeepFundAI/ls-dev
Ls dev
2 parents 3e1ddf5 + 26a3ff8 commit 4091520

File tree

18 files changed

+381
-73
lines changed

18 files changed

+381
-73
lines changed

assets/entitlements.mac.plist

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
<true/>
2828

2929
<!-- Allows access to microphone for speech recognition -->
30-
<key>com.apple.security.device.microphone</key>
30+
<key>com.apple.security.device.audio-input</key>
3131
<true/>
3232

3333
<!-- Allows access to camera -->

electron/main/index.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ import { EkoService } from "./services/eko-service";
2626
import { ServerManager } from "./services/server-manager";
2727
import { MainWindowManager } from "./windows/main-window";
2828
import { taskScheduler } from "./services/task-scheduler";
29-
import { windowContextManager, type WindowContext } from "./services/window-context-manager";
29+
import { windowContextManager } from "./services/window-context-manager";
3030
import { cwd } from "node:process";
3131
import { registerAllIpcHandlers } from "./ipc";
3232

@@ -145,7 +145,7 @@ function setupMainWindowCloseHandler(window: BrowserWindow, service: EkoService)
145145
const allTaskIds = service['eko']?.getAllTaskId() || [];
146146
await service.abortAllTasks();
147147

148-
allTaskIds.forEach(taskId => {
148+
allTaskIds.forEach((taskId: string) => {
149149
window.webContents.send('task-aborted-by-system', {
150150
taskId,
151151
reason: 'User closed window, task terminated',

electron/main/services/task-window-manager.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ export class TaskWindowManager {
4141
throw new Error(`Maximum concurrent tasks reached (${this.maxConcurrentTasks})`);
4242
}
4343

44-
const taskWindow = createWindow(`http://localhost:5173/main?taskId=${taskId}&executionId=${executionId}`)
44+
const taskWindow = await createWindow(`http://localhost:5173/main?taskId=${taskId}&executionId=${executionId}`)
4545
const detailView = createView(`https://www.google.com`, "view", '2');
4646

4747
taskWindow.contentView.addChildView(detailView);
@@ -109,7 +109,7 @@ export class TaskWindowManager {
109109
const allTaskIds = ekoService['eko']?.getAllTaskId() || [];
110110
await ekoService.abortAllTasks();
111111

112-
allTaskIds.forEach(tid => {
112+
allTaskIds.forEach((tid: string) => {
113113
taskWindow.webContents.send('task-aborted-by-system', {
114114
taskId: tid,
115115
reason: 'User closed scheduled task window, task terminated',

electron/main/ui/window.ts

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,22 @@
1-
import { app, BrowserWindow } from 'electron';
1+
import { app, BrowserWindow, systemPreferences } from 'electron';
22
import path from 'node:path';
33
import { isDev } from '../utils/constants';
44
import { store } from '../utils/store';
55

6-
export function createWindow(rendererURL: string) {
6+
async function setupMacPermissions() {
7+
// macOS requires explicit microphone permission request
8+
if (process.platform === 'darwin') {
9+
const status = systemPreferences.getMediaAccessStatus('microphone');
10+
console.log('[Window] Current microphone permission status:', status);
11+
12+
if (status !== 'granted') {
13+
const result = await systemPreferences.askForMediaAccess('microphone');
14+
console.log('[Window] Permission request result:', result);
15+
}
16+
}
17+
}
18+
19+
export async function createWindow(rendererURL: string) {
720
const preloadPath = isDev
821
? path.join(app.getAppPath(), '..', 'preload', 'index.cjs')
922
: path.join(app.getAppPath(), 'dist', 'electron', 'preload', 'index.cjs');
@@ -23,6 +36,18 @@ export function createWindow(rendererURL: string) {
2336
},
2437
});
2538

39+
win.webContents.session.setPermissionRequestHandler((_webContents, permission, callback) => {
40+
// Allow media permissions (includes microphone and camera)
41+
if (permission === 'media') {
42+
console.log(`[Window] Granting ${permission} permission`);
43+
setupMacPermissions();
44+
callback(true);
45+
} else {
46+
console.log(`[Window] Denying ${permission} permission`);
47+
callback(false);
48+
}
49+
});
50+
2651
win.loadURL(rendererURL).catch(err => {
2752
console.error('[Window] Failed to load URL:', err);
2853
});

package.json

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "ai-browser",
3-
"version": "0.0.11",
3+
"version": "0.0.12",
44
"description": "DeepFundAI Browser - AI-Powered Intelligent Browser",
55
"author": "Shuai Liu <lsustc@mail.ustc.edu.cn>",
66
"license": "MIT",
@@ -29,8 +29,8 @@
2929
"dependencies": {
3030
"@ant-design/cssinjs": "^1.23.0",
3131
"@ant-design/icons": "5.x",
32-
"@jarvis-agent/core": "^0.1.4",
33-
"@jarvis-agent/electron": "^0.1.9",
32+
"@jarvis-agent/core": "^0.1.5",
33+
"@jarvis-agent/electron": "^0.1.10",
3434
"@jest/globals": "^30.1.2",
3535
"@react-spring/web": "^10.0.1",
3636
"antd": "^5.26.5",

pnpm-lock.yaml

Lines changed: 11 additions & 11 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.
39.3 MB
Binary file not shown.

src/components/chat/ChatInputArea.tsx

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
11
import React from 'react';
2-
import { Input, Button } from 'antd';
2+
import { Input, Button, App } from 'antd';
3+
import { AudioOutlined, AudioMutedOutlined } from '@ant-design/icons';
34
import { SendMessage, CancleTask } from '@/icons/deepfundai-icons';
45
import { useTranslation } from 'react-i18next';
6+
import { useVoiceInput } from '@/hooks/useVoiceInput';
7+
import { logger } from '@/utils/logger';
58

69
interface ChatInputAreaProps {
710
query: string;
@@ -23,6 +26,7 @@ export const ChatInputArea: React.FC<ChatInputAreaProps> = ({
2326
onCancel,
2427
}) => {
2528
const { t } = useTranslation('main');
29+
const { message: antdMessage } = App.useApp();
2630

2731
const handleKeyDown = (e: React.KeyboardEvent<HTMLTextAreaElement>) => {
2832
if (e.key === 'Enter' && !e.shiftKey) {
@@ -31,6 +35,18 @@ export const ChatInputArea: React.FC<ChatInputAreaProps> = ({
3135
}
3236
};
3337

38+
// Voice input hook
39+
const { isRecording, toggleRecording } = useVoiceInput({
40+
onTextRecognized: (text) => {
41+
// Append recognized text to input
42+
onQueryChange(query ? `${query} ${text}` : text);
43+
},
44+
onError: (error) => {
45+
antdMessage.error(t('voice_input_error'));
46+
logger.error('Voice input error', error, 'ChatInputArea');
47+
},
48+
});
49+
3450
return (
3551
<div className='h-30 gradient-border relative'>
3652
<Input.TextArea
@@ -48,6 +64,21 @@ export const ChatInputArea: React.FC<ChatInputAreaProps> = ({
4864
}}
4965
/>
5066
<div className='absolute bottom-4 right-4 flex items-center gap-2'>
67+
{!isCurrentTaskRunning && (
68+
<Button
69+
type='text'
70+
onClick={(e) => {
71+
e.preventDefault();
72+
e.stopPropagation();
73+
toggleRecording();
74+
}}
75+
disabled={isCurrentTaskRunning}
76+
className='!p-0 !w-8 !h-8 !min-w-0 flex items-center justify-center text-lg'
77+
title={isRecording ? t('voice_input_stop') : t('voice_input_start')}
78+
>
79+
{isRecording ? <AudioOutlined /> : <AudioMutedOutlined />}
80+
</Button>
81+
)}
5182
{isCurrentTaskRunning ? (
5283
<Button
5384
type='text'

src/hooks/useVoiceInput.ts

Lines changed: 155 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,155 @@
1+
import { useState, useRef, useCallback, useEffect } from 'react';
2+
import {
3+
initSpeechRecognitionWithProvider,
4+
startSpeechRecognition,
5+
stopSpeechRecognition,
6+
cleanupSpeechRecognition
7+
} from '@/services/speech-recognition';
8+
import { SpeechRecognitionConfig } from '@/models/speech-recognition/speech-recognition-base';
9+
import { logger } from '@/utils/logger';
10+
import { useLanguageStore } from '@/stores/languageStore';
11+
12+
export type VoiceInputStatus = 'idle' | 'recording' | 'error';
13+
14+
interface UseVoiceInputOptions {
15+
onTextRecognized?: (text: string) => void;
16+
onError?: (error: string) => void;
17+
}
18+
19+
/**
20+
* Hook for voice input functionality
21+
* Manages speech recognition state and provides control methods
22+
*/
23+
export const useVoiceInput = ({ onTextRecognized, onError }: UseVoiceInputOptions = {}) => {
24+
const [status, setStatus] = useState<VoiceInputStatus>('idle');
25+
const isInitialized = useRef(false);
26+
const isRecordingRef = useRef(false); // Track actual recording state
27+
const onTextRecognizedRef = useRef(onTextRecognized);
28+
const onErrorRef = useRef(onError);
29+
const { language } = useLanguageStore();
30+
31+
// Update refs when callbacks change
32+
useEffect(() => {
33+
onTextRecognizedRef.current = onTextRecognized;
34+
onErrorRef.current = onError;
35+
}, [onTextRecognized, onError]);
36+
37+
// Initialize speech recognition and reinitialize when language changes
38+
useEffect(() => {
39+
let mounted = true;
40+
let initializationStarted = false;
41+
42+
// Async initialization
43+
(async () => {
44+
try {
45+
// Cleanup previous instance and wait for it to complete
46+
await cleanupSpeechRecognition();
47+
48+
// Check if still mounted after cleanup
49+
if (!mounted) return;
50+
51+
// Select model based on current language
52+
const modelType = language === 'zh-CN' ? 'small-cn' : 'small-en';
53+
54+
const config: SpeechRecognitionConfig = {
55+
provider: 'vosk',
56+
modelType
57+
};
58+
59+
initializationStarted = true;
60+
await initSpeechRecognitionWithProvider(config, (text: string) => {
61+
if (onTextRecognizedRef.current) {
62+
onTextRecognizedRef.current(text);
63+
}
64+
});
65+
66+
if (mounted) {
67+
isInitialized.current = true;
68+
}
69+
} catch (error) {
70+
logger.error('Failed to initialize speech recognition', error, 'VoiceInput');
71+
if (mounted && onErrorRef.current) {
72+
onErrorRef.current('Speech recognition initialization failed');
73+
}
74+
}
75+
})();
76+
77+
// Cleanup on unmount or language change
78+
return () => {
79+
mounted = false;
80+
isRecordingRef.current = false;
81+
if (initializationStarted) {
82+
cleanupSpeechRecognition();
83+
}
84+
};
85+
}, [language]); // Reinitialize when language changes
86+
87+
/**
88+
* Start voice recording
89+
*/
90+
const startRecording = useCallback(async () => {
91+
if (isRecordingRef.current) return;
92+
93+
if (!isInitialized.current) {
94+
const errorMsg = 'Speech recognition not initialized';
95+
logger.error(errorMsg, undefined, 'VoiceInput');
96+
if (onErrorRef.current) {
97+
onErrorRef.current(errorMsg);
98+
}
99+
setStatus('error');
100+
return;
101+
}
102+
103+
try {
104+
isRecordingRef.current = true;
105+
setStatus('recording');
106+
await startSpeechRecognition();
107+
} catch (error) {
108+
logger.error('Failed to start recording', error, 'VoiceInput');
109+
isRecordingRef.current = false;
110+
setStatus('idle');
111+
if (onErrorRef.current) {
112+
onErrorRef.current('Failed to start voice recording');
113+
}
114+
}
115+
}, []);
116+
117+
/**
118+
* Stop voice recording
119+
*/
120+
const stopRecording = useCallback(async () => {
121+
if (!isRecordingRef.current) return;
122+
123+
try {
124+
isRecordingRef.current = false;
125+
await stopSpeechRecognition();
126+
setStatus('idle');
127+
} catch (error) {
128+
logger.error('Failed to stop recording', error, 'VoiceInput');
129+
isRecordingRef.current = false;
130+
setStatus('idle');
131+
if (onErrorRef.current) {
132+
onErrorRef.current('Failed to stop voice recording');
133+
}
134+
}
135+
}, []);
136+
137+
/**
138+
* Toggle recording state
139+
*/
140+
const toggleRecording = useCallback(async () => {
141+
if (isRecordingRef.current) {
142+
await stopRecording();
143+
} else {
144+
await startRecording();
145+
}
146+
}, [startRecording, stopRecording]);
147+
148+
return {
149+
status,
150+
isRecording: status === 'recording',
151+
startRecording,
152+
stopRecording,
153+
toggleRecording,
154+
};
155+
};

src/icons/deepfundai-icons.tsx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -215,7 +215,7 @@ const StepUpDownSvg = () => (
215215

216216
// Send message
217217
const SendMessageSvg = () => (
218-
<svg xmlns="http://www.w3.org/2000/svg" width="24px" height="24px" viewBox="0 0 24 24" version="1.1">
218+
<svg xmlns="http://www.w3.org/2000/svg" width="20px" height="20px" viewBox="0 0 24 24" version="1.1">
219219
<g id="version-8-26" stroke="none" stroke-width="1" fill="none" fill-rule="evenodd">
220220
<g id="group-50backup-4" transform="translate(-2.000000, -2.000000)">
221221
<g id="group-48" transform="translate(2.000000, 2.000000)">

0 commit comments

Comments
 (0)