Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -72,3 +72,4 @@ OPENCODE_MODEL_ID=big-pickle
# STT_API_KEY=
# STT_MODEL=
# STT_LANGUAGE=
# STT_PROMPT_INJECTION=true # Set to "true" to inject a warning about potential STT typos to the LLM. Or set a custom string.
18 changes: 17 additions & 1 deletion src/bot/handlers/voice.ts
Original file line number Diff line number Diff line change
Expand Up @@ -219,8 +219,24 @@ export async function handleVoiceMessage(ctx: Context, deps: VoiceMessageDeps):

logger.info(`[Voice] Transcribed audio: ${recognizedText.length} chars`);

// --- NEW LOGIC: Inject Prompt for LLM ---
let textForLLM = recognizedText;
const injectionSetting = config.stt.promptInjection;

if (injectionSetting) {
const normalizedSetting = injectionSetting.trim().toLowerCase();
if (normalizedSetting === "true" || normalizedSetting === "1") {
const defaultPrompt =
"[Note: The following text is transcribed from voice. There may be homophone or mispronunciation errors. Please interpret the intended meaning based on context.]";
textForLLM = `${defaultPrompt}\n${recognizedText}`;
} else if (normalizedSetting !== "false" && normalizedSetting !== "0") {
textForLLM = `${injectionSetting}\n${recognizedText}`;
}
}

// Process the recognized text as a prompt
await processPrompt(ctx, recognizedText, deps);
await processPrompt(ctx, textForLLM, deps);

} catch (err) {
const errorMessage = err instanceof Error ? err.message : "unknown error";
logger.error("[Voice] Error processing voice message:", err);
Expand Down
1 change: 1 addition & 0 deletions src/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -131,5 +131,6 @@ export const config = {
apiKey: getEnvVar("STT_API_KEY", false),
model: getEnvVar("STT_MODEL", false) || "whisper-large-v3-turbo",
language: getEnvVar("STT_LANGUAGE", false),
promptInjection: getEnvVar("STT_PROMPT_INJECTION", false),
},
};