Skip to content

Commit 47adf9c

Browse files
committed
feat: support language-based speech recognition model switching
- Add English speech model (vosk-model-small-en-us-0.15.zip) - Auto-switch models when language changes (small-cn for Chinese, small-en for English) - Add language-specific text processing (remove spaces for Chinese, keep for English) - Fix race condition by serializing cleanup and initialization operations - Clean up debug logging while preserving dynamic prompts
1 parent e9fff48 commit 47adf9c

File tree

5 files changed

+49
-27
lines changed

5 files changed

+49
-27
lines changed
39.3 MB
Binary file not shown.

src/hooks/useVoiceInput.ts

Lines changed: 24 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import {
77
} from '@/services/speech-recognition';
88
import { SpeechRecognitionConfig } from '@/models/speech-recognition/speech-recognition-base';
99
import { logger } from '@/utils/logger';
10+
import { useLanguageStore } from '@/stores/languageStore';
1011

1112
export type VoiceInputStatus = 'idle' | 'recording' | 'error';
1213

@@ -25,27 +26,37 @@ export const useVoiceInput = ({ onTextRecognized, onError }: UseVoiceInputOption
2526
const isRecordingRef = useRef(false); // Track actual recording state
2627
const onTextRecognizedRef = useRef(onTextRecognized);
2728
const onErrorRef = useRef(onError);
29+
const { language } = useLanguageStore();
2830

2931
// Update refs when callbacks change
3032
useEffect(() => {
3133
onTextRecognizedRef.current = onTextRecognized;
3234
onErrorRef.current = onError;
3335
}, [onTextRecognized, onError]);
3436

35-
// Initialize speech recognition on mount (only once)
37+
// Initialize speech recognition and reinitialize when language changes
3638
useEffect(() => {
37-
if (isInitialized.current) return;
38-
39-
const config: SpeechRecognitionConfig = {
40-
provider: 'vosk',
41-
modelType: 'small-cn'
42-
};
43-
4439
let mounted = true;
40+
let initializationStarted = false;
4541

4642
// Async initialization
4743
(async () => {
4844
try {
45+
// Cleanup previous instance and wait for it to complete
46+
await cleanupSpeechRecognition();
47+
48+
// Check if still mounted after cleanup
49+
if (!mounted) return;
50+
51+
// Select model based on current language
52+
const modelType = language === 'zh-CN' ? 'small-cn' : 'small-en';
53+
54+
const config: SpeechRecognitionConfig = {
55+
provider: 'vosk',
56+
modelType
57+
};
58+
59+
initializationStarted = true;
4960
await initSpeechRecognitionWithProvider(config, (text: string) => {
5061
if (onTextRecognizedRef.current) {
5162
onTextRecognizedRef.current(text);
@@ -63,14 +74,15 @@ export const useVoiceInput = ({ onTextRecognized, onError }: UseVoiceInputOption
6374
}
6475
})();
6576

66-
// Cleanup on unmount
77+
// Cleanup on unmount or language change
6778
return () => {
6879
mounted = false;
6980
isRecordingRef.current = false;
70-
cleanupSpeechRecognition();
71-
isInitialized.current = false;
81+
if (initializationStarted) {
82+
cleanupSpeechRecognition();
83+
}
7284
};
73-
}, []); // Empty dependency array - only run once
85+
}, [language]); // Reinitialize when language changes
7486

7587
/**
7688
* Start voice recording

src/models/speech-recognition/speech-recognition-base.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ export interface SpeechRecognitionConfig {
99
appId?: string;
1010
apiSecret?: string;
1111
xfApiKey?: string;
12-
modelType?: 'small-cn' | 'standard-cn';
12+
modelType?: 'small-cn' | 'small-en';
1313
}
1414

1515
export interface SpeechRecognitionError {

src/models/speech-recognition/speech-recognition-vosk.ts

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ import { SpeechRecognitionBase, SpeechRecognitionConfig } from "./speech-recogni
33
// Model configuration
44
const MODEL_CONFIG = {
55
'small-cn': '/models/vosk-model-small-cn-0.22.tar.gz',
6-
'standard-cn': '/models/vosk-model-cn-0.22.tar.gz'
6+
'small-en': '/models/vosk-model-small-en-us-0.15.zip'
77
} as const;
88

99
// Audio configuration
@@ -51,9 +51,8 @@ export class SpeechRecognitionVosk implements SpeechRecognitionBase {
5151
return;
5252
}
5353

54-
// 3. Load model
55-
const modelPath = MODEL_CONFIG[this.config.modelType || 'small-cn'];
56-
console.log(`🎤 Loading speech model: ${modelPath}`);
54+
// Load model based on config
55+
const modelPath = MODEL_CONFIG[this.config.modelType || 'small-en'];
5756

5857
// Ensure Vosk is globally available
5958
const Vosk = (window as any).Vosk;
@@ -121,12 +120,11 @@ export class SpeechRecognitionVosk implements SpeechRecognitionBase {
121120
// Set recognition result callback
122121
try {
123122
this.recognizer.on('result', (message: any) => {
124-
console.log('🎤 Received recognition result event:', message);
125123
const text = message.result?.text;
126124
if (text && text.trim()) {
127-
// Remove all spaces for Chinese text
128-
const cleanedText = text.replace(/\s+/g, '');
129-
console.log('🎤 Speech recognition result:', cleanedText);
125+
// Remove spaces only for Chinese, keep spaces for English
126+
const isChinese = this.config.modelType === 'small-cn';
127+
const cleanedText = isChinese ? text.replace(/\s+/g, '') : text.trim();
130128
if (cleanedText && this.onRecognizedCallback) {
131129
this.onRecognizedCallback(cleanedText);
132130
}

src/services/speech-recognition.ts

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,13 @@ import { logger } from "@/utils/logger";
77

88
let speechRecognition: SpeechRecognitionBase | null = null;
99
let initializationPromise: Promise<void> | null = null;
10+
let currentCleanup: Promise<void> | null = null;
1011

1112
// New initialization function, supports multiple providers (async to wait for Vosk model loading)
1213
export async function initSpeechRecognitionWithProvider(config: SpeechRecognitionConfig, onRecognized?: (text: string) => void): Promise<void> {
13-
// Return existing initialization promise if already initializing
14-
if (initializationPromise) {
15-
return initializationPromise;
14+
// Wait for ongoing cleanup to complete
15+
if (currentCleanup) {
16+
await currentCleanup;
1617
}
1718

1819
initializationPromise = (async () => {
@@ -78,7 +79,18 @@ export async function stopSpeechRecognition() {
7879

7980
// Cleanup resources
8081
export async function cleanupSpeechRecognition() {
81-
await speechRecognition?.cleanup();
82-
speechRecognition = null;
83-
initializationPromise = null;
82+
if (currentCleanup) {
83+
return currentCleanup;
84+
}
85+
86+
currentCleanup = (async () => {
87+
if (speechRecognition) {
88+
await speechRecognition.cleanup();
89+
speechRecognition = null;
90+
}
91+
initializationPromise = null;
92+
})();
93+
94+
await currentCleanup;
95+
currentCleanup = null;
8496
}

0 commit comments

Comments
 (0)