From 0bc2509e41af14b4481b4081760cb42d030156c3 Mon Sep 17 00:00:00 2001 From: ysslang Date: Sun, 1 Mar 2026 05:50:00 +0000 Subject: [PATCH] feat: add STT prompt injection configurable via STT_PROMPT_INJECTION --- .env.example | 1 + src/bot/handlers/voice.ts | 18 +++++++++++++++++- src/config.ts | 1 + 3 files changed, 19 insertions(+), 1 deletion(-) diff --git a/.env.example b/.env.example index 439abdd..02bdb91 100644 --- a/.env.example +++ b/.env.example @@ -72,3 +72,4 @@ OPENCODE_MODEL_ID=big-pickle # STT_API_KEY= # STT_MODEL= # STT_LANGUAGE= +# STT_PROMPT_INJECTION=true # Set to "true" to inject a warning about potential STT typos to the LLM. Or set a custom string. diff --git a/src/bot/handlers/voice.ts b/src/bot/handlers/voice.ts index 67d9f68..8a73671 100644 --- a/src/bot/handlers/voice.ts +++ b/src/bot/handlers/voice.ts @@ -219,8 +219,24 @@ export async function handleVoiceMessage(ctx: Context, deps: VoiceMessageDeps): logger.info(`[Voice] Transcribed audio: ${recognizedText.length} chars`); + // --- NEW LOGIC: Inject Prompt for LLM --- + let textForLLM = recognizedText; + const injectionSetting = config.stt.promptInjection; + + if (injectionSetting) { + const normalizedSetting = injectionSetting.trim().toLowerCase(); + if (normalizedSetting === "true" || normalizedSetting === "1") { + const defaultPrompt = + "[Note: The following text is transcribed from voice. There may be homophone or mispronunciation errors. Please interpret the intended meaning based on context.]"; + textForLLM = `${defaultPrompt}\n${recognizedText}`; + } else if (normalizedSetting !== "false" && normalizedSetting !== "0") { + textForLLM = `${injectionSetting}\n${recognizedText}`; + } + } + // Process the recognized text as a prompt - await processPrompt(ctx, recognizedText, deps); + await processPrompt(ctx, textForLLM, deps); + } catch (err) { const errorMessage = err instanceof Error ? err.message : "unknown error"; logger.error("[Voice] Error processing voice message:", err); diff --git a/src/config.ts b/src/config.ts index 87931a9..0a8474f 100644 --- a/src/config.ts +++ b/src/config.ts @@ -131,5 +131,6 @@ export const config = { apiKey: getEnvVar("STT_API_KEY", false), model: getEnvVar("STT_MODEL", false) || "whisper-large-v3-turbo", language: getEnvVar("STT_LANGUAGE", false), + promptInjection: getEnvVar("STT_PROMPT_INJECTION", false), }, };