Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 55 additions & 0 deletions scripts/fill-missing-translations.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
// One-shot: fill any keys missing from a locale with the English value
// (i18next falls back to en at runtime anyway; this satisfies check:translations).
// Run: node scripts/fill-missing-translations.mjs
import { readFileSync, writeFileSync, readdirSync } from "node:fs";
import { join, dirname } from "node:path";
import { fileURLToPath } from "node:url";

const localesDir = join(
dirname(fileURLToPath(import.meta.url)),
"..",
"src",
"i18n",
"locales",
);

const en = JSON.parse(
readFileSync(join(localesDir, "en", "translation.json"), "utf8"),
);

// Deep-merge: add keys from `ref` that are absent in `target`. Returns count added.
function fill(ref, target) {
let added = 0;
for (const key of Object.keys(ref)) {
if (
typeof ref[key] === "object" &&
ref[key] !== null &&
!Array.isArray(ref[key])
) {
if (typeof target[key] !== "object" || target[key] === null) {
target[key] = {};
}
added += fill(ref[key], target[key]);
} else if (!(key in target)) {
target[key] = ref[key];
added += 1;
}
}
return added;
}

for (const lang of readdirSync(localesDir)) {
if (lang === "en") continue;
const file = join(localesDir, lang, "translation.json");
let data;
try {
data = JSON.parse(readFileSync(file, "utf8"));
} catch {
continue;
}
const added = fill(en, data);
if (added > 0) {
writeFileSync(file, JSON.stringify(data, null, 2) + "\n", "utf8");
console.log(`${lang}: filled ${added} keys`);
}
}
58 changes: 58 additions & 0 deletions src-tauri/src/assistant/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
use tauri::AppHandle;

pub const DEFAULT_ASSISTANT_SYSTEM_PROMPT: &str = "You are Echo, a concise, friendly voice assistant. Answer in the same language the user used (Russian or English). Keep replies short and speakable — no markdown, no code fences, no bullet lists unless asked.";

pub async fn ask_assistant(app: &AppHandle, user_text: String) -> Result<String, String> {
let settings = crate::settings::get_settings(app);

let provider = settings
.active_post_process_provider()
.ok_or_else(|| "no LLM provider configured".to_string())?
.clone();

let api_key = settings
.post_process_api_keys
.get(&provider.id)
.cloned()
.unwrap_or_default();
let model = settings
.post_process_models
.get(&provider.id)
.cloned()
.unwrap_or_default();

if model.is_empty() {
return Err("no LLM model configured for the active provider".to_string());
}

let system = if settings.assistant_system_prompt.is_empty() {
DEFAULT_ASSISTANT_SYSTEM_PROMPT.to_string()
} else {
settings.assistant_system_prompt.clone()
};

let reply = crate::llm_client::send_chat_completion_with_schema(
&provider,
api_key,
&model,
user_text,
Some(system),
None,
None,
None,
)
.await?;

reply.ok_or_else(|| "empty reply from assistant".to_string())
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn test_default_prompt() {
assert!(!DEFAULT_ASSISTANT_SYSTEM_PROMPT.is_empty());
assert!(DEFAULT_ASSISTANT_SYSTEM_PROMPT.contains("Echo"));
}
}
5 changes: 5 additions & 0 deletions src-tauri/src/commands/assistant.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#[tauri::command]
#[specta::specta]
pub async fn assistant_ask(app: tauri::AppHandle, text: String) -> Result<String, String> {
crate::assistant::ask_assistant(&app, text).await
}
2 changes: 2 additions & 0 deletions src-tauri/src/commands/mod.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
pub mod agent_bridge;
pub mod assistant;
pub mod audio;
pub mod coach;
pub mod history;
pub mod models;
pub mod transcribe;
pub mod transcription;
pub mod tts;
pub mod tutor;

use crate::settings::{get_settings, write_settings, AppSettings, LogLevel};
use crate::utils::cancel_current_operation;
Expand Down
3 changes: 2 additions & 1 deletion src-tauri/src/commands/tts.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,9 @@ pub fn tts_speak(
tts_manager: State<Arc<TtsManager>>,
text: String,
voice_id: Option<String>,
rate: Option<f32>,
) -> Result<(), String> {
tts_manager.speak(text, voice_id)
tts_manager.speak(text, voice_id, rate.unwrap_or(1.0))
}

#[tauri::command]
Expand Down
5 changes: 5 additions & 0 deletions src-tauri/src/commands/tutor.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#[tauri::command]
#[specta::specta]
pub fn tutor_score(reference: String, spoken: String) -> crate::tutor::ScoreReport {
crate::tutor::score_pronunciation(&reference, &spoken)
}
19 changes: 15 additions & 4 deletions src-tauri/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ mod actions;
mod agent_bridge;
#[cfg(all(target_os = "macos", target_arch = "aarch64"))]
mod apple_intelligence;
mod assistant;
pub mod audio_toolkit;
mod capture;
pub mod cli;
Expand All @@ -28,6 +29,7 @@ mod transcript_format;
mod transcription_coordinator;
mod translate;
mod tts;
mod tutor;
mod utils;
mod voice_commands;

Expand Down Expand Up @@ -191,10 +193,17 @@ fn initialize_core_logic(app_handle: &AppHandle) {
let sink: crate::agent_bridge::server::AskSink = Arc::new(move |ev| {
use tauri::Emitter;
if ev.speak {
if let Some(tts) =
evt_handle.try_state::<Arc<crate::tts::TtsManager>>()
{
let _ = tts.speak(ev.question.clone(), None);
let s = crate::settings::get_settings(&evt_handle);
if s.tts_enabled {
if let Some(tts) =
evt_handle.try_state::<Arc<crate::tts::TtsManager>>()
{
let _ = tts.speak(
ev.question.clone(),
s.tts_voice_id.clone(),
s.tts_rate,
);
}
}
}
crate::agent_bridge::window::show_panel(&evt_handle);
Expand Down Expand Up @@ -541,6 +550,8 @@ pub fn run(cli_args: CliArgs) {
commands::tts::tts_list_voices,
commands::tts::tts_speak,
commands::tts::tts_stop,
commands::assistant::assistant_ask,
commands::tutor::tutor_score,
commands::agent_bridge::agent_bridge_answer,
commands::agent_bridge::agent_bridge_dismiss,
commands::agent_bridge::agent_bridge_answers,
Expand Down
34 changes: 34 additions & 0 deletions src-tauri/src/settings.rs
Original file line number Diff line number Diff line change
Expand Up @@ -392,6 +392,12 @@ pub struct AppSettings {
pub agent_bridge_enabled: bool,
#[serde(default = "default_agent_bridge_port")]
pub agent_bridge_port: u16,
#[serde(default = "default_tts_enabled")]
pub tts_enabled: bool,
#[serde(default)]
pub tts_voice_id: Option<String>,
#[serde(default = "default_tts_rate")]
pub tts_rate: f32,
#[serde(default)]
pub custom_words: Vec<String>,
#[serde(default)]
Expand Down Expand Up @@ -489,12 +495,26 @@ pub struct AppSettings {
pub spoken_lists_enabled: bool,
#[serde(default)]
pub dev_dictionary_enabled: bool,
#[serde(default = "default_assistant_enabled")]
pub assistant_enabled: bool,
#[serde(default)]
pub assistant_system_prompt: String,
#[serde(default = "default_tutor_enabled")]
pub tutor_enabled: bool,
}

pub fn default_tutor_enabled() -> bool {
false
}

pub fn default_spoken_lists_enabled() -> bool {
true
}

pub fn default_assistant_enabled() -> bool {
false
}

pub fn default_auto_punctuate() -> bool {
true
}
Expand Down Expand Up @@ -591,6 +611,14 @@ fn default_agent_bridge_port() -> u16 {
4123
}

fn default_tts_enabled() -> bool {
true
}

fn default_tts_rate() -> f32 {
1.0
}

fn default_word_correction_threshold() -> f64 {
0.18
}
Expand Down Expand Up @@ -933,6 +961,9 @@ pub fn get_default_settings() -> AppSettings {
overlay_position: default_overlay_position(),
agent_bridge_enabled: default_agent_bridge_enabled(),
agent_bridge_port: default_agent_bridge_port(),
tts_enabled: default_tts_enabled(),
tts_voice_id: None,
tts_rate: default_tts_rate(),
debug_mode: false,
log_level: default_log_level(),
custom_words: Vec::new(),
Expand Down Expand Up @@ -984,6 +1015,9 @@ pub fn get_default_settings() -> AppSettings {
self_correction_enabled: false,
spoken_lists_enabled: default_spoken_lists_enabled(),
dev_dictionary_enabled: false,
assistant_enabled: default_assistant_enabled(),
assistant_system_prompt: String::new(),
tutor_enabled: default_tutor_enabled(),
}
}

Expand Down
71 changes: 67 additions & 4 deletions src-tauri/src/tts/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,29 @@ pub struct VoiceInfo {

pub trait TtsEngine: Send + Sync {
fn list_voices(&self) -> Result<Vec<VoiceInfo>, String>;
fn synthesize(&self, text: &str, voice_id: Option<&str>) -> Result<Vec<u8>, String>;
fn synthesize(&self, text: &str, voice_id: Option<&str>, rate: f32) -> Result<Vec<u8>, String>;
}

/// Pick a voice whose language matches the text's script. If >=30% of the
/// alphabetic characters are Cyrillic, prefer a `ru*` voice; otherwise an
/// `en*` voice. Falls back to the first available voice.
pub fn pick_voice_for_text<'a>(text: &str, voices: &'a [VoiceInfo]) -> Option<&'a VoiceInfo> {
let alpha: Vec<char> = text.chars().filter(|c| c.is_alphabetic()).collect();
if alpha.is_empty() {
return voices.first();
}

let cyrillic = alpha
.iter()
.filter(|&&c| ('\u{0400}'..='\u{04FF}').contains(&c))
.count();
let is_russian = (cyrillic as f32 / alpha.len() as f32) >= 0.3;

let prefix = if is_russian { "ru" } else { "en" };
voices
.iter()
.find(|v| v.language.to_lowercase().starts_with(prefix))
.or_else(|| voices.first())
}

pub struct TtsManager {
Expand Down Expand Up @@ -49,7 +71,7 @@ impl TtsManager {
.list_voices()
}

pub fn speak(&self, text: String, voice_id: Option<String>) -> Result<(), String> {
pub fn speak(&self, text: String, voice_id: Option<String>, rate: f32) -> Result<(), String> {
let engine = self
.engine
.as_ref()
Expand All @@ -58,7 +80,7 @@ impl TtsManager {
// Stop current playback
self.stop()?;

let wav_bytes = engine.synthesize(&text, voice_id.as_deref())?;
let wav_bytes = engine.synthesize(&text, voice_id.as_deref(), rate)?;

let current_sink = self.current_sink.clone();

Expand Down Expand Up @@ -104,6 +126,47 @@ impl TtsManager {
}
}

#[cfg(test)]
mod voice_pick_tests {
use super::*;

fn voices() -> Vec<VoiceInfo> {
vec![
VoiceInfo {
id: "en-1".into(),
display_name: "David".into(),
language: "en-US".into(),
},
VoiceInfo {
id: "ru-1".into(),
display_name: "Irina".into(),
language: "ru-RU".into(),
},
]
}

#[test]
fn russian_text_picks_ru_voice() {
let v = voices();
let picked = pick_voice_for_text("Привет, как дела?", &v).unwrap();
assert_eq!(picked.id, "ru-1");
}

#[test]
fn english_text_picks_en_voice() {
let v = voices();
let picked = pick_voice_for_text("Hello, how are you?", &v).unwrap();
assert_eq!(picked.id, "en-1");
}

#[test]
fn no_alpha_falls_back_to_first() {
let v = voices();
let picked = pick_voice_for_text("12345 !!!", &v).unwrap();
assert_eq!(picked.id, "en-1");
}
}

#[cfg(all(test, windows))]
mod tests {
use super::*;
Expand All @@ -123,7 +186,7 @@ mod tests {
);

let wav = engine
.synthesize("Echo speech engine online. Эхо на связи.", None)
.synthesize("Echo speech engine online. Эхо на связи.", None, 1.0)
.expect("synthesize failed");
assert!(wav.len() > 44, "WAV too small: {} bytes", wav.len());
assert_eq!(&wav[0..4], b"RIFF", "not a WAV container");
Expand Down
Loading
Loading