diff --git a/scripts/fill-missing-translations.mjs b/scripts/fill-missing-translations.mjs new file mode 100644 index 0000000..310ebd9 --- /dev/null +++ b/scripts/fill-missing-translations.mjs @@ -0,0 +1,55 @@ +// One-shot: fill any keys missing from a locale with the English value +// (i18next falls back to en at runtime anyway; this satisfies check:translations). +// Run: node scripts/fill-missing-translations.mjs +import { readFileSync, writeFileSync, readdirSync } from "node:fs"; +import { join, dirname } from "node:path"; +import { fileURLToPath } from "node:url"; + +const localesDir = join( + dirname(fileURLToPath(import.meta.url)), + "..", + "src", + "i18n", + "locales", +); + +const en = JSON.parse( + readFileSync(join(localesDir, "en", "translation.json"), "utf8"), +); + +// Deep-merge: add keys from `ref` that are absent in `target`. Returns count added. +function fill(ref, target) { + let added = 0; + for (const key of Object.keys(ref)) { + if ( + typeof ref[key] === "object" && + ref[key] !== null && + !Array.isArray(ref[key]) + ) { + if (typeof target[key] !== "object" || target[key] === null) { + target[key] = {}; + } + added += fill(ref[key], target[key]); + } else if (!(key in target)) { + target[key] = ref[key]; + added += 1; + } + } + return added; +} + +for (const lang of readdirSync(localesDir)) { + if (lang === "en") continue; + const file = join(localesDir, lang, "translation.json"); + let data; + try { + data = JSON.parse(readFileSync(file, "utf8")); + } catch { + continue; + } + const added = fill(en, data); + if (added > 0) { + writeFileSync(file, JSON.stringify(data, null, 2) + "\n", "utf8"); + console.log(`${lang}: filled ${added} keys`); + } +} diff --git a/src-tauri/src/assistant/mod.rs b/src-tauri/src/assistant/mod.rs new file mode 100644 index 0000000..d90b5c8 --- /dev/null +++ b/src-tauri/src/assistant/mod.rs @@ -0,0 +1,58 @@ +use tauri::AppHandle; + +pub const DEFAULT_ASSISTANT_SYSTEM_PROMPT: &str = "You are Echo, a concise, friendly voice assistant. Answer in the same language the user used (Russian or English). Keep replies short and speakable — no markdown, no code fences, no bullet lists unless asked."; + +pub async fn ask_assistant(app: &AppHandle, user_text: String) -> Result { + let settings = crate::settings::get_settings(app); + + let provider = settings + .active_post_process_provider() + .ok_or_else(|| "no LLM provider configured".to_string())? + .clone(); + + let api_key = settings + .post_process_api_keys + .get(&provider.id) + .cloned() + .unwrap_or_default(); + let model = settings + .post_process_models + .get(&provider.id) + .cloned() + .unwrap_or_default(); + + if model.is_empty() { + return Err("no LLM model configured for the active provider".to_string()); + } + + let system = if settings.assistant_system_prompt.is_empty() { + DEFAULT_ASSISTANT_SYSTEM_PROMPT.to_string() + } else { + settings.assistant_system_prompt.clone() + }; + + let reply = crate::llm_client::send_chat_completion_with_schema( + &provider, + api_key, + &model, + user_text, + Some(system), + None, + None, + None, + ) + .await?; + + reply.ok_or_else(|| "empty reply from assistant".to_string()) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_default_prompt() { + assert!(!DEFAULT_ASSISTANT_SYSTEM_PROMPT.is_empty()); + assert!(DEFAULT_ASSISTANT_SYSTEM_PROMPT.contains("Echo")); + } +} diff --git a/src-tauri/src/commands/assistant.rs b/src-tauri/src/commands/assistant.rs new file mode 100644 index 0000000..7c027af --- /dev/null +++ b/src-tauri/src/commands/assistant.rs @@ -0,0 +1,5 @@ +#[tauri::command] +#[specta::specta] +pub async fn assistant_ask(app: tauri::AppHandle, text: String) -> Result { + crate::assistant::ask_assistant(&app, text).await +} diff --git a/src-tauri/src/commands/mod.rs b/src-tauri/src/commands/mod.rs index fa6ed90..1d7ac41 100644 --- a/src-tauri/src/commands/mod.rs +++ b/src-tauri/src/commands/mod.rs @@ -1,4 +1,5 @@ pub mod agent_bridge; +pub mod assistant; pub mod audio; pub mod coach; pub mod history; @@ -6,6 +7,7 @@ pub mod models; pub mod transcribe; pub mod transcription; pub mod tts; +pub mod tutor; use crate::settings::{get_settings, write_settings, AppSettings, LogLevel}; use crate::utils::cancel_current_operation; diff --git a/src-tauri/src/commands/tts.rs b/src-tauri/src/commands/tts.rs index 58392cd..d6cac4e 100644 --- a/src-tauri/src/commands/tts.rs +++ b/src-tauri/src/commands/tts.rs @@ -14,8 +14,9 @@ pub fn tts_speak( tts_manager: State>, text: String, voice_id: Option, + rate: Option, ) -> Result<(), String> { - tts_manager.speak(text, voice_id) + tts_manager.speak(text, voice_id, rate.unwrap_or(1.0)) } #[tauri::command] diff --git a/src-tauri/src/commands/tutor.rs b/src-tauri/src/commands/tutor.rs new file mode 100644 index 0000000..039c0a8 --- /dev/null +++ b/src-tauri/src/commands/tutor.rs @@ -0,0 +1,5 @@ +#[tauri::command] +#[specta::specta] +pub fn tutor_score(reference: String, spoken: String) -> crate::tutor::ScoreReport { + crate::tutor::score_pronunciation(&reference, &spoken) +} diff --git a/src-tauri/src/lib.rs b/src-tauri/src/lib.rs index 100fd18..3f6f764 100644 --- a/src-tauri/src/lib.rs +++ b/src-tauri/src/lib.rs @@ -2,6 +2,7 @@ mod actions; mod agent_bridge; #[cfg(all(target_os = "macos", target_arch = "aarch64"))] mod apple_intelligence; +mod assistant; pub mod audio_toolkit; mod capture; pub mod cli; @@ -28,6 +29,7 @@ mod transcript_format; mod transcription_coordinator; mod translate; mod tts; +mod tutor; mod utils; mod voice_commands; @@ -191,10 +193,17 @@ fn initialize_core_logic(app_handle: &AppHandle) { let sink: crate::agent_bridge::server::AskSink = Arc::new(move |ev| { use tauri::Emitter; if ev.speak { - if let Some(tts) = - evt_handle.try_state::>() - { - let _ = tts.speak(ev.question.clone(), None); + let s = crate::settings::get_settings(&evt_handle); + if s.tts_enabled { + if let Some(tts) = + evt_handle.try_state::>() + { + let _ = tts.speak( + ev.question.clone(), + s.tts_voice_id.clone(), + s.tts_rate, + ); + } } } crate::agent_bridge::window::show_panel(&evt_handle); @@ -541,6 +550,8 @@ pub fn run(cli_args: CliArgs) { commands::tts::tts_list_voices, commands::tts::tts_speak, commands::tts::tts_stop, + commands::assistant::assistant_ask, + commands::tutor::tutor_score, commands::agent_bridge::agent_bridge_answer, commands::agent_bridge::agent_bridge_dismiss, commands::agent_bridge::agent_bridge_answers, diff --git a/src-tauri/src/settings.rs b/src-tauri/src/settings.rs index 6a43d3d..af378f3 100644 --- a/src-tauri/src/settings.rs +++ b/src-tauri/src/settings.rs @@ -392,6 +392,12 @@ pub struct AppSettings { pub agent_bridge_enabled: bool, #[serde(default = "default_agent_bridge_port")] pub agent_bridge_port: u16, + #[serde(default = "default_tts_enabled")] + pub tts_enabled: bool, + #[serde(default)] + pub tts_voice_id: Option, + #[serde(default = "default_tts_rate")] + pub tts_rate: f32, #[serde(default)] pub custom_words: Vec, #[serde(default)] @@ -489,12 +495,26 @@ pub struct AppSettings { pub spoken_lists_enabled: bool, #[serde(default)] pub dev_dictionary_enabled: bool, + #[serde(default = "default_assistant_enabled")] + pub assistant_enabled: bool, + #[serde(default)] + pub assistant_system_prompt: String, + #[serde(default = "default_tutor_enabled")] + pub tutor_enabled: bool, +} + +pub fn default_tutor_enabled() -> bool { + false } pub fn default_spoken_lists_enabled() -> bool { true } +pub fn default_assistant_enabled() -> bool { + false +} + pub fn default_auto_punctuate() -> bool { true } @@ -591,6 +611,14 @@ fn default_agent_bridge_port() -> u16 { 4123 } +fn default_tts_enabled() -> bool { + true +} + +fn default_tts_rate() -> f32 { + 1.0 +} + fn default_word_correction_threshold() -> f64 { 0.18 } @@ -933,6 +961,9 @@ pub fn get_default_settings() -> AppSettings { overlay_position: default_overlay_position(), agent_bridge_enabled: default_agent_bridge_enabled(), agent_bridge_port: default_agent_bridge_port(), + tts_enabled: default_tts_enabled(), + tts_voice_id: None, + tts_rate: default_tts_rate(), debug_mode: false, log_level: default_log_level(), custom_words: Vec::new(), @@ -984,6 +1015,9 @@ pub fn get_default_settings() -> AppSettings { self_correction_enabled: false, spoken_lists_enabled: default_spoken_lists_enabled(), dev_dictionary_enabled: false, + assistant_enabled: default_assistant_enabled(), + assistant_system_prompt: String::new(), + tutor_enabled: default_tutor_enabled(), } } diff --git a/src-tauri/src/tts/mod.rs b/src-tauri/src/tts/mod.rs index 2a6b7f6..ed88baa 100644 --- a/src-tauri/src/tts/mod.rs +++ b/src-tauri/src/tts/mod.rs @@ -16,7 +16,29 @@ pub struct VoiceInfo { pub trait TtsEngine: Send + Sync { fn list_voices(&self) -> Result, String>; - fn synthesize(&self, text: &str, voice_id: Option<&str>) -> Result, String>; + fn synthesize(&self, text: &str, voice_id: Option<&str>, rate: f32) -> Result, String>; +} + +/// Pick a voice whose language matches the text's script. If >=30% of the +/// alphabetic characters are Cyrillic, prefer a `ru*` voice; otherwise an +/// `en*` voice. Falls back to the first available voice. +pub fn pick_voice_for_text<'a>(text: &str, voices: &'a [VoiceInfo]) -> Option<&'a VoiceInfo> { + let alpha: Vec = text.chars().filter(|c| c.is_alphabetic()).collect(); + if alpha.is_empty() { + return voices.first(); + } + + let cyrillic = alpha + .iter() + .filter(|&&c| ('\u{0400}'..='\u{04FF}').contains(&c)) + .count(); + let is_russian = (cyrillic as f32 / alpha.len() as f32) >= 0.3; + + let prefix = if is_russian { "ru" } else { "en" }; + voices + .iter() + .find(|v| v.language.to_lowercase().starts_with(prefix)) + .or_else(|| voices.first()) } pub struct TtsManager { @@ -49,7 +71,7 @@ impl TtsManager { .list_voices() } - pub fn speak(&self, text: String, voice_id: Option) -> Result<(), String> { + pub fn speak(&self, text: String, voice_id: Option, rate: f32) -> Result<(), String> { let engine = self .engine .as_ref() @@ -58,7 +80,7 @@ impl TtsManager { // Stop current playback self.stop()?; - let wav_bytes = engine.synthesize(&text, voice_id.as_deref())?; + let wav_bytes = engine.synthesize(&text, voice_id.as_deref(), rate)?; let current_sink = self.current_sink.clone(); @@ -104,6 +126,47 @@ impl TtsManager { } } +#[cfg(test)] +mod voice_pick_tests { + use super::*; + + fn voices() -> Vec { + vec![ + VoiceInfo { + id: "en-1".into(), + display_name: "David".into(), + language: "en-US".into(), + }, + VoiceInfo { + id: "ru-1".into(), + display_name: "Irina".into(), + language: "ru-RU".into(), + }, + ] + } + + #[test] + fn russian_text_picks_ru_voice() { + let v = voices(); + let picked = pick_voice_for_text("Привет, как дела?", &v).unwrap(); + assert_eq!(picked.id, "ru-1"); + } + + #[test] + fn english_text_picks_en_voice() { + let v = voices(); + let picked = pick_voice_for_text("Hello, how are you?", &v).unwrap(); + assert_eq!(picked.id, "en-1"); + } + + #[test] + fn no_alpha_falls_back_to_first() { + let v = voices(); + let picked = pick_voice_for_text("12345 !!!", &v).unwrap(); + assert_eq!(picked.id, "en-1"); + } +} + #[cfg(all(test, windows))] mod tests { use super::*; @@ -123,7 +186,7 @@ mod tests { ); let wav = engine - .synthesize("Echo speech engine online. Эхо на связи.", None) + .synthesize("Echo speech engine online. Эхо на связи.", None, 1.0) .expect("synthesize failed"); assert!(wav.len() > 44, "WAV too small: {} bytes", wav.len()); assert_eq!(&wav[0..4], b"RIFF", "not a WAV container"); diff --git a/src-tauri/src/tts/windows.rs b/src-tauri/src/tts/windows.rs index 3fdf0ce..1d18a71 100644 --- a/src-tauri/src/tts/windows.rs +++ b/src-tauri/src/tts/windows.rs @@ -29,10 +29,25 @@ impl TtsEngine for WindowsTts { Ok(out) } - fn synthesize(&self, text: &str, voice_id: Option<&str>) -> Result, String> { + fn synthesize(&self, text: &str, voice_id: Option<&str>, rate: f32) -> Result, String> { let synth = SpeechSynthesizer::new().map_err(|e| e.to_string())?; - if let Some(id) = voice_id { + // WinRT SpeakingRate is a multiplier; valid range is 0.5..=6.0. + let rate = rate.clamp(0.5, 6.0) as f64; + synth + .Options() + .map_err(|e| e.to_string())? + .SetSpeakingRate(rate) + .map_err(|e| e.to_string())?; + + // Resolve the target voice id: explicit when given, otherwise + // auto-selected by the text's script (Cyrillic -> ru, else en). + let target_id: Option = match voice_id { + Some(id) => Some(id.to_string()), + None => super::pick_voice_for_text(text, &self.list_voices()?).map(|v| v.id.clone()), + }; + + if let Some(id) = target_id { let voices = SpeechSynthesizer::AllVoices().map_err(|e| e.to_string())?; let target = voices .into_iter() diff --git a/src-tauri/src/tutor/mod.rs b/src-tauri/src/tutor/mod.rs new file mode 100644 index 0000000..f336183 --- /dev/null +++ b/src-tauri/src/tutor/mod.rs @@ -0,0 +1,158 @@ +use once_cell::sync::Lazy; +use regex::Regex; +use serde::{Deserialize, Serialize}; +use specta::Type; +use strsim::levenshtein; + +static PUNCTUATION: Lazy = Lazy::new(|| Regex::new(r"[^\p{L}\s]").unwrap()); +static WHITESPACE: Lazy = Lazy::new(|| Regex::new(r"\s+").unwrap()); + +#[derive(Serialize, Deserialize, Debug, Clone, Type)] +pub struct WordScore { + pub reference: String, + pub spoken: Option, + pub matched: bool, +} + +#[derive(Serialize, Deserialize, Debug, Clone, Type)] +pub struct ScoreReport { + pub overall: u8, // 0..=100 + pub words: Vec, // per reference word, aligned + pub reference_word_count: usize, + pub matched_word_count: usize, + pub note: String, // short human feedback, e.g. "Great!" / "Watch: " +} + +fn normalize(text: &str) -> Vec { + let text = text.to_lowercase(); + let text = PUNCTUATION.replace_all(&text, ""); + let text = WHITESPACE.replace_all(&text, " "); + text.trim() + .split_whitespace() + .map(|s| s.to_string()) + .collect() +} + +pub fn score_pronunciation(reference: &str, spoken: &str) -> ScoreReport { + let ref_words = normalize(reference); + let spoken_words = normalize(spoken); + + if ref_words.is_empty() { + return ScoreReport { + overall: 0, + words: Vec::new(), + reference_word_count: 0, + matched_word_count: 0, + note: "Empty reference phrase.".to_string(), + }; + } + + let mut word_scores = Vec::with_capacity(ref_words.len()); + let mut matched_count = 0; + let mut spoken_idx = 0; + + for ref_word in &ref_words { + let mut best_match: Option<(usize, bool)> = None; + + // Greedy search for the best match in the remaining spoken words + // We look ahead a bit to allow for some misrecognitions or skipped words + let lookahead = 3; + let end = (spoken_idx + lookahead).min(spoken_words.len()); + + for i in spoken_idx..end { + let spoken_word = &spoken_words[i]; + let is_match = if ref_word == spoken_word { + true + } else { + let dist = levenshtein(ref_word, spoken_word); + dist <= 1.max(ref_word.len() / 4) + }; + + if is_match { + best_match = Some((i, true)); + break; + } + } + + if let Some((idx, is_match)) = best_match { + word_scores.push(WordScore { + reference: ref_word.clone(), + spoken: Some(spoken_words[idx].clone()), + matched: is_match, + }); + matched_count += 1; + spoken_idx = idx + 1; + } else { + word_scores.push(WordScore { + reference: ref_word.clone(), + spoken: None, + matched: false, + }); + } + } + + let overall = (100 * matched_count / ref_words.len()) as u8; + + let unmatched_words: Vec = word_scores + .iter() + .filter(|w| !w.matched) + .map(|w| w.reference.clone()) + .take(3) + .collect(); + + let note = if overall >= 90 { + "Great pronunciation!".to_string() + } else if overall >= 70 { + format!("Good — review: {}", unmatched_words.join(", ")) + } else { + format!("Keep practicing: {}", unmatched_words.join(", ")) + }; + + ScoreReport { + overall, + words: word_scores, + reference_word_count: ref_words.len(), + matched_word_count: matched_count, + note, + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_exact_match() { + let report = score_pronunciation("Hello world", "hello world"); + assert_eq!(report.overall, 100); + assert_eq!(report.matched_word_count, 2); + } + + #[test] + fn test_one_wrong() { + let report = score_pronunciation("one two three four", "one two skip four"); + assert_eq!(report.overall, 75); + assert_eq!(report.matched_word_count, 3); + } + + #[test] + fn test_empty_reference() { + let report = score_pronunciation("", "something"); + assert_eq!(report.overall, 0); + } + + #[test] + fn test_russian() { + let report = score_pronunciation("привет как дела", "привет как дела"); + assert_eq!(report.overall, 100); + } + + #[test] + fn test_levenshtein_tolerance() { + // "pronunciation" len 13, 13/4 = 3. + // "pronunshation" dist 2 + let report = score_pronunciation("pronunciation", "pronunshation"); + assert_eq!(report.overall, 100); + assert!(report.words[0].matched); + } +} diff --git a/src/bindings.ts b/src/bindings.ts index 9badac5..10bde30 100644 --- a/src/bindings.ts +++ b/src/bindings.ts @@ -971,9 +971,9 @@ async ttsListVoices() : Promise> { else return { status: "error", error: e as any }; } }, -async ttsSpeak(text: string, voiceId: string | null) : Promise> { +async ttsSpeak(text: string, voiceId: string | null, rate: number | null) : Promise> { try { - return { status: "ok", data: await TAURI_INVOKE("tts_speak", { text, voiceId }) }; + return { status: "ok", data: await TAURI_INVOKE("tts_speak", { text, voiceId, rate }) }; } catch (e) { if(e instanceof Error) throw e; else return { status: "error", error: e as any }; @@ -987,6 +987,17 @@ async ttsStop() : Promise> { else return { status: "error", error: e as any }; } }, +async assistantAsk(text: string) : Promise> { + try { + return { status: "ok", data: await TAURI_INVOKE("assistant_ask", { text }) }; +} catch (e) { + if(e instanceof Error) throw e; + else return { status: "error", error: e as any }; +} +}, +async tutorScore(reference: string, spoken: string) : Promise { + return await TAURI_INVOKE("tutor_score", { reference, spoken }); +}, async agentBridgeAnswer(id: number, answer: string) : Promise> { try { return { status: "ok", data: await TAURI_INVOKE("agent_bridge_answer", { id, answer }) }; @@ -1063,7 +1074,7 @@ historyUpdatePayload: "history-update-payload" /** user-defined types **/ -export type AppSettings = { bindings: Partial<{ [key in string]: ShortcutBinding }>; push_to_talk: boolean; audio_feedback: boolean; audio_feedback_volume?: number; sound_theme?: SoundTheme; start_hidden?: boolean; autostart_enabled?: boolean; update_checks_enabled?: boolean; selected_model?: string; always_on_microphone?: boolean; selected_microphone?: string | null; clamshell_microphone?: string | null; selected_output_device?: string | null; translate_to_english?: boolean; selected_language?: string; overlay_position?: OverlayPosition; debug_mode?: boolean; log_level?: LogLevel; agent_bridge_enabled?: boolean; agent_bridge_port?: number; custom_words?: string[]; model_unload_timeout?: ModelUnloadTimeout; word_correction_threshold?: number; history_limit?: number; recording_retention_period?: RecordingRetentionPeriod; paste_method?: PasteMethod; clipboard_handling?: ClipboardHandling; auto_submit?: boolean; auto_submit_key?: AutoSubmitKey; post_process_enabled?: boolean; post_process_provider_id?: string; post_process_providers?: PostProcessProvider[]; post_process_api_keys?: SecretMap; post_process_models?: Partial<{ [key in string]: string }>; post_process_prompts?: LLMPrompt[]; post_process_selected_prompt_id?: string | null; translate_enabled?: boolean; translate_target?: Lang; translate_model?: string; translate_base_url?: string; mute_while_recording?: boolean; append_trailing_space?: boolean; app_language?: string; experimental_enabled?: boolean; lazy_stream_close?: boolean; keyboard_implementation?: KeyboardImplementation; show_tray_icon?: boolean; paste_delay_ms?: number; typing_tool?: TypingTool; external_script_path: string | null; capture_folder?: string; capture_trigger_phrases?: string; custom_filler_words?: string[] | null; whisper_accelerator?: WhisperAcceleratorSetting; ort_accelerator?: OrtAcceleratorSetting; whisper_gpu_device?: number; extra_recording_buffer_ms?: number; auto_punctuate?: boolean; auto_capitalize?: boolean; subtitle_overlay?: boolean; subtitle_font_size?: SubtitleFontSize; subtitle_max_chars?: number; subtitle_refresh_ms?: number; command_mode_enabled?: boolean; coach_toast_enabled?: boolean; snippets?: Snippet[]; self_correction_enabled?: boolean; spoken_lists_enabled?: boolean; dev_dictionary_enabled?: boolean } +export type AppSettings = { bindings: Partial<{ [key in string]: ShortcutBinding }>; push_to_talk: boolean; audio_feedback: boolean; audio_feedback_volume?: number; sound_theme?: SoundTheme; start_hidden?: boolean; autostart_enabled?: boolean; update_checks_enabled?: boolean; selected_model?: string; always_on_microphone?: boolean; selected_microphone?: string | null; clamshell_microphone?: string | null; selected_output_device?: string | null; translate_to_english?: boolean; selected_language?: string; overlay_position?: OverlayPosition; debug_mode?: boolean; log_level?: LogLevel; agent_bridge_enabled?: boolean; agent_bridge_port?: number; tts_enabled?: boolean; tts_voice_id?: string | null; tts_rate?: number; assistant_enabled?: boolean; assistant_system_prompt?: string; tutor_enabled?: boolean; custom_words?: string[]; model_unload_timeout?: ModelUnloadTimeout; word_correction_threshold?: number; history_limit?: number; recording_retention_period?: RecordingRetentionPeriod; paste_method?: PasteMethod; clipboard_handling?: ClipboardHandling; auto_submit?: boolean; auto_submit_key?: AutoSubmitKey; post_process_enabled?: boolean; post_process_provider_id?: string; post_process_providers?: PostProcessProvider[]; post_process_api_keys?: SecretMap; post_process_models?: Partial<{ [key in string]: string }>; post_process_prompts?: LLMPrompt[]; post_process_selected_prompt_id?: string | null; translate_enabled?: boolean; translate_target?: Lang; translate_model?: string; translate_base_url?: string; mute_while_recording?: boolean; append_trailing_space?: boolean; app_language?: string; experimental_enabled?: boolean; lazy_stream_close?: boolean; keyboard_implementation?: KeyboardImplementation; show_tray_icon?: boolean; paste_delay_ms?: number; typing_tool?: TypingTool; external_script_path: string | null; capture_folder?: string; capture_trigger_phrases?: string; custom_filler_words?: string[] | null; whisper_accelerator?: WhisperAcceleratorSetting; ort_accelerator?: OrtAcceleratorSetting; whisper_gpu_device?: number; extra_recording_buffer_ms?: number; auto_punctuate?: boolean; auto_capitalize?: boolean; subtitle_overlay?: boolean; subtitle_font_size?: SubtitleFontSize; subtitle_max_chars?: number; subtitle_refresh_ms?: number; command_mode_enabled?: boolean; coach_toast_enabled?: boolean; snippets?: Snippet[]; self_correction_enabled?: boolean; spoken_lists_enabled?: boolean; dev_dictionary_enabled?: boolean } export type AudioDevice = { index: string; name: string; is_default: boolean } export type AutoSubmitKey = "enter" | "ctrl_enter" | "cmd_enter" export type AvailableAccelerators = { whisper: string[]; ort: string[]; gpu_devices: GpuDeviceOption[] } @@ -1123,6 +1134,8 @@ export type TrendPoint = { day: number; avg_wpm: number; avg_filler_rate: number export type TrendWindow = "Days7" | "Days30" | "All" export type TypingTool = "auto" | "wtype" | "kwtype" | "dotool" | "ydotool" | "xdotool" export type VoiceInfo = { id: string; display_name: string; language: string } +export type WordScore = { reference: string; spoken: string | null; matched: boolean } +export type ScoreReport = { overall: number; words: WordScore[]; reference_word_count: number; matched_word_count: number; note: string } export type WhisperAcceleratorSetting = "auto" | "cpu" | "gpu" export type WindowsMicrophonePermissionStatus = { supported: boolean; overall_access: PermissionAccess; device_access: PermissionAccess; app_access: PermissionAccess; desktop_app_access: PermissionAccess } diff --git a/src/components/Sidebar.tsx b/src/components/Sidebar.tsx index 45f5550..94542b2 100644 --- a/src/components/Sidebar.tsx +++ b/src/components/Sidebar.tsx @@ -8,7 +8,10 @@ import { Sparkles, Cpu, LineChart, + MessageCircle, FileAudio, + Volume2, + GraduationCap, } from "lucide-react"; import EchoTextLogo from "./icons/EchoTextLogo"; import EchoHand from "./icons/EchoHand"; @@ -18,6 +21,9 @@ import { AdvancedSettings, HistorySettings, CoachSettings, + AssistantSettings, + TtsSettings, + TutorSettings, DebugSettings, AboutSettings, PostProcessingSettings, @@ -79,6 +85,24 @@ export const SECTIONS_CONFIG = { component: CoachSettings, enabled: () => true, }, + assistant: { + labelKey: "sidebar.assistant", + icon: MessageCircle, + component: AssistantSettings, + enabled: () => true, + }, + tts: { + labelKey: "sidebar.tts", + icon: Volume2, + component: TtsSettings, + enabled: () => true, + }, + tutor: { + labelKey: "sidebar.tutor", + icon: GraduationCap, + component: TutorSettings, + enabled: () => true, + }, postprocessing: { labelKey: "sidebar.postProcessing", icon: Sparkles, diff --git a/src/components/settings/assistant/AssistantSettings.tsx b/src/components/settings/assistant/AssistantSettings.tsx new file mode 100644 index 0000000..1027645 --- /dev/null +++ b/src/components/settings/assistant/AssistantSettings.tsx @@ -0,0 +1,124 @@ +import React, { useState } from "react"; +import { useTranslation } from "react-i18next"; +import { commands } from "@/bindings"; +import { useSettings } from "@/hooks/useSettings"; +import { + SettingContainer, + SettingsGroup, + ToggleSwitch, + Textarea, +} from "@/components/ui"; +import { Input } from "@/components/ui/Input"; +import { Button } from "@/components/ui/Button"; + +export const AssistantSettings: React.FC = () => { + const { t } = useTranslation(); + const { getSetting, updateSetting } = useSettings(); + + const [testInput, setTestInput] = useState(""); + const [testReply, setTestReply] = useState(""); + const [isLoading, setIsLoading] = useState(false); + const [error, setError] = useState(""); + + const assistantEnabled = getSetting("assistant_enabled") ?? false; + const assistantSystemPrompt = getSetting("assistant_system_prompt") ?? ""; + + const handleAsk = async () => { + if (!testInput.trim()) return; + + setIsLoading(true); + setError(""); + setTestReply(""); + + try { + const result = await commands.assistantAsk(testInput); + if (result.status === "ok") { + setTestReply(result.data); + } else { + setError(result.error); + } + } catch (err) { + setError(String(err)); + } finally { + setIsLoading(false); + } + }; + + const handleSpeak = async () => { + if (!testReply) return; + try { + await commands.ttsSpeak(testReply, null, 1.0); + } catch (err) { + console.error("Failed to speak reply:", err); + } + }; + + return ( +
+ + updateSetting("assistant_enabled", checked)} + label={t("settings.assistant.enable.label")} + description={t("settings.assistant.enable.description")} + descriptionMode="inline" + grouped={true} + /> + + +