@@ -10,28 +10,36 @@ const voiceMode = ref<'mixed' | 'continuous'>('mixed')
模式:
- {{ voiceMode === 'mixed' ? '语音识别结果追加到输入框,可继续编辑' : '持续识别语音并自动替换内容' }}
+ {{
+ voiceMode === 'append'
+ ? '追加模式:每次语音识别结果会追加到输入框末尾,适合混合输入'
+ : '替换模式:每次语音识别会替换输入框全部内容,适合纯语音输入'
+ }}
diff --git a/packages/components/src/sender-actions/voice-button/index.vue b/packages/components/src/sender-actions/voice-button/index.vue
index 1cd7d5f84..e2a7a6683 100644
--- a/packages/components/src/sender-actions/voice-button/index.vue
+++ b/packages/components/src/sender-actions/voice-button/index.vue
@@ -34,16 +34,24 @@ const insertTranscript = (transcript: string) => {
return
}
- // 在单次录音会话期间,持续替换当前的语音插入范围
- const range = speechRange.value ?? {
- from: editorInstance.state.selection.from,
- to: editorInstance.state.selection.to,
+ // autoReplace 模式:替换整个输入框内容
+ if (speechRange.value === null) {
+ // 首次插入,记录起始位置为 0
+ speechRange.value = {
+ from: 0,
+ to: 0,
+ }
}
- const tr = editorInstance.state.tr.insertText(transcript, range.from, range.to)
+
+ // 替换从起始位置到当前内容末尾的所有文本
+ const docSize = editorInstance.state.doc.content.size
+ const tr = editorInstance.state.tr.insertText(transcript, speechRange.value.from, docSize)
editorInstance.view.dispatch(tr)
+
+ // 更新范围,保持起始位置不变,更新结束位置
speechRange.value = {
- from: range.from,
- to: range.from + transcript.length,
+ from: speechRange.value.from,
+ to: speechRange.value.from + transcript.length,
}
editorInstance.commands.focus('end')
}
@@ -62,7 +70,9 @@ const speechOptions = {
emit('speech-interim', transcript)
},
onFinal: (transcript: string) => {
- insertTranscript(transcript)
+ if (!props.speechConfig?.autoReplace) {
+ insertTranscript(transcript)
+ }
emit('speech-final', transcript)
},
onEnd: (transcript?: string) => {
From 986c63cb3d320070e98fb66e9252d78b5908ee28 Mon Sep 17 00:00:00 2001
From: SonyLeo <746591437@qq.com>
Date: Wed, 15 Apr 2026 21:02:00 -0700
Subject: [PATCH 4/6] feat(voice-button): improve speech handling with
autoReplace and continuous recognition support
---
docs/demos/sender/voice-input.vue | 6 +-
docs/src/components/sender.md | 6 +-
.../src/sender-actions/voice-button/index.vue | 112 +++++++++++++-----
.../voice-button/speech.types.ts | 4 +-
.../voice-button/webSpeechHandler.ts | 68 +++++++++--
5 files changed, 146 insertions(+), 50 deletions(-)
diff --git a/docs/demos/sender/voice-input.vue b/docs/demos/sender/voice-input.vue
index d3e617d50..b820a3885 100644
--- a/docs/demos/sender/voice-input.vue
+++ b/docs/demos/sender/voice-input.vue
@@ -22,7 +22,7 @@ const voiceMode = ref<'append' | 'replace'>('append')
{{
voiceMode === 'append'
? '追加模式:每次语音识别结果会追加到输入框末尾,适合混合输入'
- : '替换模式:每次语音识别会替换输入框全部内容,适合纯语音输入'
+ : '替换模式:在同一次录音会话内持续识别,并用最新结果更新本次语音输入内容'
}}
('append')
:placeholder="
voiceMode === 'append'
? '可以打字或点击麦克风说话,语音内容会追加...'
- : '点击麦克风说话,每次识别会替换全部内容...'
+ : '点击麦克风连续说话,本次语音内容会持续更新...'
"
>
@@ -39,7 +39,7 @@ const voiceMode = ref<'append' | 'replace'>('append')
:speech-config="
voiceMode === 'append'
? { autoReplace: false, interimResults: true }
- : { autoReplace: true, interimResults: true }
+ : { autoReplace: true, continuous: true, interimResults: true }
"
/>
diff --git a/docs/src/components/sender.md b/docs/src/components/sender.md
index a7bcfc7f7..f1a1f3b3d 100644
--- a/docs/src/components/sender.md
+++ b/docs/src/components/sender.md
@@ -155,9 +155,9 @@ TrSender.Suggestion.configure({ items: suggestions, filterFn: customFilter })
#### 基础语音识别
-使用浏览器内置的语音识别功能,支持混合输入和连续识别两种模式。可通过 `speechConfig.lang` 显式指定识别语言。
+使用浏览器内置的语音识别功能,支持追加写入和替换写入两种体验。可通过 `speechConfig.lang` 显式指定识别语言,并结合 `speechConfig.continuous` 控制是否持续识别。
-
+
:::tip lang 语言说明
`lang` 用于指定语音识别语言,建议显式传入,并与页面的 `html lang` 保持一致,避免页面语言和浏览器环境语言不一致时出现识别偏差。
@@ -613,7 +613,7 @@ interface SpeechConfig {
lang?: string // 内置 Web Speech 的识别语言;未传入时使用 navigator.language
continuous?: boolean // 内置 Web Speech 是否持续识别
interimResults?: boolean // 内置 Web Speech 是否返回中间结果
- autoReplace?: boolean // 是否在本次录音期间用最新识别结果替换当前语音插入内容
+ autoReplace?: boolean // 是否在本次录音期间仅用最新识别结果替换语音写入的内容区间
}
// 模板项(联合类型)
diff --git a/packages/components/src/sender-actions/voice-button/index.vue b/packages/components/src/sender-actions/voice-button/index.vue
index e2a7a6683..211d5b957 100644
--- a/packages/components/src/sender-actions/voice-button/index.vue
+++ b/packages/components/src/sender-actions/voice-button/index.vue
@@ -16,75 +16,123 @@ const emit = defineEmits()
// 从 Context 获取最小依赖:只需要 editor 和 disabled
const { editor, disabled: contextDisabled } = useSenderContext()
const isDisabled = computed(() => props.disabled || contextDisabled.value)
+const isAutoReplace = computed(() => props.speechConfig?.autoReplace ?? false)
const speechRange = ref<{ from: number; to: number } | null>(null)
+const committedTranscript = ref('')
+const speechPrefix = ref('')
-const resetSpeechRange = () => {
+const resetSpeechSession = () => {
speechRange.value = null
+ committedTranscript.value = ''
+ speechPrefix.value = ''
}
-const insertTranscript = (transcript: string) => {
- if (!props.autoInsert || !editor.value || !transcript) return
+const ensureSpeechRange = () => {
+ if (speechRange.value || !editor.value) {
+ return speechRange.value
+ }
+
+ const { from, to } = editor.value.state.selection
+ const previousText = from === to ? (editor.value.state.doc.resolve(from).nodeBefore?.textContent ?? '') : ''
+
+ speechPrefix.value = previousText && /\S$/.test(previousText) ? ' ' : ''
+ speechRange.value = {
+ from,
+ to,
+ }
+
+ return speechRange.value
+}
- const editorInstance = editor.value
- const autoReplace = props.speechConfig?.autoReplace ?? false
+const focusEditor = () => {
+ if (!editor.value) return
- if (!autoReplace) {
- editorInstance.commands.insertContent(transcript + ' ')
- editorInstance.commands.focus('end')
+ if (isAutoReplace.value && speechRange.value) {
+ editor.value.commands.focus(speechRange.value.to)
return
}
- // autoReplace 模式:替换整个输入框内容
- if (speechRange.value === null) {
- // 首次插入,记录起始位置为 0
- speechRange.value = {
- from: 0,
- to: 0,
- }
+ editor.value.commands.focus('end')
+}
+
+const appendTranscript = (transcript: string) => {
+ if (!props.autoInsert || !editor.value || !transcript) return
+
+ editor.value.commands.insertContent(transcript + ' ')
+ focusEditor()
+}
+
+const replaceTranscript = (transcript: string) => {
+ if (!props.autoInsert || !editor.value || !transcript) return
+
+ const range = ensureSpeechRange()
+ const nextTranscript = `${speechPrefix.value}${transcript}`
+
+ if (!range) {
+ return
}
- // 替换从起始位置到当前内容末尾的所有文本
- const docSize = editorInstance.state.doc.content.size
- const tr = editorInstance.state.tr.insertText(transcript, speechRange.value.from, docSize)
- editorInstance.view.dispatch(tr)
+ const tr = editor.value.state.tr.insertText(nextTranscript, range.from, range.to)
+ editor.value.view.dispatch(tr)
- // 更新范围,保持起始位置不变,更新结束位置
speechRange.value = {
- from: speechRange.value.from,
- to: speechRange.value.from + transcript.length,
+ from: range.from,
+ to: range.from + nextTranscript.length,
+ }
+
+ focusEditor()
+}
+
+const mergeCommittedTranscript = (transcript: string) => {
+ if (!transcript) {
+ return committedTranscript.value
+ }
+
+ if (!committedTranscript.value || transcript.startsWith(committedTranscript.value)) {
+ committedTranscript.value = transcript
+ return committedTranscript.value
}
- editorInstance.commands.focus('end')
+
+ if (committedTranscript.value !== transcript && !committedTranscript.value.endsWith(transcript)) {
+ committedTranscript.value += transcript
+ }
+
+ return committedTranscript.value
}
// 语音配置 - 使用普通对象而不是 computed,避免每次都创建新对象
const speechOptions = {
...props.speechConfig,
onStart: () => {
- resetSpeechRange()
+ resetSpeechSession()
+ if (isAutoReplace.value) {
+ ensureSpeechRange()
+ }
emit('speech-start')
},
onInterim: (transcript: string) => {
- if (props.speechConfig?.autoReplace) {
- insertTranscript(transcript)
+ if (isAutoReplace.value) {
+ replaceTranscript(transcript)
}
emit('speech-interim', transcript)
},
onFinal: (transcript: string) => {
- if (!props.speechConfig?.autoReplace) {
- insertTranscript(transcript)
+ if (isAutoReplace.value) {
+ replaceTranscript(mergeCommittedTranscript(transcript))
+ } else {
+ appendTranscript(transcript)
}
emit('speech-final', transcript)
},
onEnd: (transcript?: string) => {
- // 结束后聚焦编辑器,确保光标可见
if (editor.value) {
- editor.value.commands.focus('end')
+ focusEditor()
}
- resetSpeechRange()
+ resetSpeechSession()
emit('speech-end', transcript)
},
onError: (error: Error) => {
- resetSpeechRange()
+ resetSpeechSession()
emit('speech-error', error)
},
}
diff --git a/packages/components/src/sender-actions/voice-button/speech.types.ts b/packages/components/src/sender-actions/voice-button/speech.types.ts
index a182d1aae..17b2de995 100644
--- a/packages/components/src/sender-actions/voice-button/speech.types.ts
+++ b/packages/components/src/sender-actions/voice-button/speech.types.ts
@@ -1,4 +1,4 @@
-/**
+/**
* 语音识别相关类型定义
*/
// 语音回调函数集合
@@ -27,7 +27,7 @@ export interface SpeechConfig {
lang?: string // 识别语言,默认浏览器语言
continuous?: boolean // 是否持续识别
interimResults?: boolean // 是否返回中间结果
- autoReplace?: boolean // 是否在本次录音期间自动替换语音插入内容
+ autoReplace?: boolean // 是否在本次录音期间仅替换语音写入的内容区间
}
// 语音识别状态
diff --git a/packages/components/src/sender-actions/voice-button/webSpeechHandler.ts b/packages/components/src/sender-actions/voice-button/webSpeechHandler.ts
index 39a61638a..9075133fb 100644
--- a/packages/components/src/sender-actions/voice-button/webSpeechHandler.ts
+++ b/packages/components/src/sender-actions/voice-button/webSpeechHandler.ts
@@ -1,5 +1,35 @@
import type { SpeechCallbacks, SpeechHandler, SpeechConfig } from './speech.types'
+interface ParsedSpeechResult {
+ finalTranscript: string
+ interimTranscript: string
+}
+
+export function parseSpeechRecognitionResult(event: SpeechRecognitionEvent): ParsedSpeechResult {
+ let finalTranscript = ''
+ let interimTranscript = ''
+
+ for (let index = event.resultIndex; index < event.results.length; index++) {
+ const result = event.results[index]
+ const transcript = result[0]?.transcript ?? ''
+
+ if (!transcript) {
+ continue
+ }
+
+ if (result.isFinal) {
+ finalTranscript += transcript
+ } else {
+ interimTranscript += transcript
+ }
+ }
+
+ return {
+ finalTranscript,
+ interimTranscript,
+ }
+}
+
/**
* 内置 Web Speech API 处理器
* 基于浏览器原生 Web Speech API 实现的语音识别
@@ -7,6 +37,11 @@ import type { SpeechCallbacks, SpeechHandler, SpeechConfig } from './speech.type
export class WebSpeechHandler implements SpeechHandler {
private recognition?: SpeechRecognition
private options: SpeechConfig
+ private finalizedTranscript: string = ''
+
+ private resetSessionTranscript(): void {
+ this.finalizedTranscript = ''
+ }
/**
* 初始化语音识别实例
@@ -45,25 +80,33 @@ export class WebSpeechHandler implements SpeechHandler {
*/
private setupEventHandlers(callbacks: SpeechCallbacks): void {
if (!this.recognition || !callbacks) return
+
this.recognition.onstart = () => {
+ this.resetSessionTranscript()
callbacks.onStart()
}
+
this.recognition.onend = () => {
- callbacks.onEnd()
+ callbacks.onEnd(this.finalizedTranscript || undefined)
+ this.resetSessionTranscript()
}
+
this.recognition.onresult = (event: SpeechRecognitionEvent) => {
- const transcript = Array.from(event.results)
- .map((result) => result[0].transcript)
- .join('')
- const current = event.results[event.resultIndex]
- if (current?.isFinal) {
- callbacks.onFinal(transcript)
- } else {
- callbacks.onInterim(transcript)
+ const { finalTranscript, interimTranscript } = parseSpeechRecognitionResult(event)
+
+ if (finalTranscript) {
+ this.finalizedTranscript += finalTranscript
+ callbacks.onFinal(finalTranscript)
+ }
+
+ if (interimTranscript) {
+ callbacks.onInterim(this.finalizedTranscript + interimTranscript)
}
}
+
this.recognition.onerror = (event: SpeechRecognitionErrorEvent) => {
callbacks.onError(new Error(event.error))
+ this.resetSessionTranscript()
this.cleanup()
}
}
@@ -88,8 +131,10 @@ export class WebSpeechHandler implements SpeechHandler {
callbacks.onError(new Error('浏览器不支持语音识别'))
return
}
- // 绑定事件处理器
+
+ this.resetSessionTranscript()
this.setupEventHandlers(callbacks)
+
try {
this.recognition.start()
} catch (error) {
@@ -102,7 +147,10 @@ export class WebSpeechHandler implements SpeechHandler {
*/
stop(): void {
if (!this.recognition) return
+
this.cleanup()
+ this.resetSessionTranscript()
+
try {
this.recognition.stop()
} catch (error) {
From f0a4ca2f023dda2a97ba846615b7661bd7defe08 Mon Sep 17 00:00:00 2001
From: SonyLeo <746591437@qq.com>
Date: Wed, 15 Apr 2026 23:51:11 -0700
Subject: [PATCH 5/6] docs(sender): update voice input descriptions for clarity
on speech modes
---
docs/demos/sender/voice-input.vue | 4 +-
docs/src/components/sender.md | 11 ++--
.../src/sender-actions/voice-button/index.vue | 50 ++-----------------
.../voice-button/speech.types.ts | 2 +-
4 files changed, 11 insertions(+), 56 deletions(-)
diff --git a/docs/demos/sender/voice-input.vue b/docs/demos/sender/voice-input.vue
index b820a3885..48e868e0a 100644
--- a/docs/demos/sender/voice-input.vue
+++ b/docs/demos/sender/voice-input.vue
@@ -22,7 +22,7 @@ const voiceMode = ref<'append' | 'replace'>('append')
{{
voiceMode === 'append'
? '追加模式:每次语音识别结果会追加到输入框末尾,适合混合输入'
- : '替换模式:在同一次录音会话内持续识别,并用最新结果更新本次语音输入内容'
+ : '替换模式:在录音期间使用最新识别结果直接替换整个输入框内容'
}}
('append')
:placeholder="
voiceMode === 'append'
? '可以打字或点击麦克风说话,语音内容会追加...'
- : '点击麦克风连续说话,本次语音内容会持续更新...'
+ : '点击麦克风连续说话,输入框内容会被语音结果持续替换...'
"
>
diff --git a/docs/src/components/sender.md b/docs/src/components/sender.md
index f1a1f3b3d..428d93c97 100644
--- a/docs/src/components/sender.md
+++ b/docs/src/components/sender.md
@@ -1,4 +1,4 @@
----
+---
outline: [1, 3]
---
@@ -155,9 +155,9 @@ TrSender.Suggestion.configure({ items: suggestions, filterFn: customFilter })
#### 基础语音识别
-使用浏览器内置的语音识别功能,支持追加写入和替换写入两种体验。可通过 `speechConfig.lang` 显式指定识别语言,并结合 `speechConfig.continuous` 控制是否持续识别。
+使用浏览器内置的语音识别功能,支持追加写入和整框替换两种体验。可通过 `speechConfig.lang` 显式指定识别语言,并结合 `speechConfig.continuous` 控制是否持续识别。
-
+
:::tip lang 语言说明
`lang` 用于指定语音识别语言,建议显式传入,并与页面的 `html lang` 保持一致,避免页面语言和浏览器环境语言不一致时出现识别偏差。
@@ -170,7 +170,6 @@ TrSender.Suggestion.configure({ items: suggestions, filterFn: customFilter })
| `zh` | 中文 |
| `zh-CN` | 简体中文 |
| `en-US` | 美式英语 |
-:::
#### 自定义语音服务
@@ -613,7 +612,7 @@ interface SpeechConfig {
lang?: string // 内置 Web Speech 的识别语言;未传入时使用 navigator.language
continuous?: boolean // 内置 Web Speech 是否持续识别
interimResults?: boolean // 内置 Web Speech 是否返回中间结果
- autoReplace?: boolean // 是否在本次录音期间仅用最新识别结果替换语音写入的内容区间
+ autoReplace?: boolean // 是否在录音期间用识别结果替换整个输入框内容
}
// 模板项(联合类型)
@@ -801,4 +800,4 @@ Sender 组件提供了丰富的 CSS 变量用于自定义样式。
|-------|----------|
| startSpeech | 使用 `VoiceButton.start()` |
| stopSpeech | 使用 `VoiceButton.stop()` |
-| activateTemplateFirstField | 自动处理,无需调用 |
\ No newline at end of file
+| activateTemplateFirstField | 自动处理,无需调用 |
diff --git a/packages/components/src/sender-actions/voice-button/index.vue b/packages/components/src/sender-actions/voice-button/index.vue
index 211d5b957..2908aeaa2 100644
--- a/packages/components/src/sender-actions/voice-button/index.vue
+++ b/packages/components/src/sender-actions/voice-button/index.vue
@@ -1,5 +1,5 @@