Skip to content

Commit a4e78aa

Browse files
committed
feat: make LLM and TTS timeouts optional, improve Float32Array audio input handling, and refine human turn metric recording
1 parent 6e00e50 commit a4e78aa

5 files changed

Lines changed: 56 additions & 18 deletions

File tree

packages/talkio/src/agent/actors/llm.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -54,10 +54,10 @@ export const llmActor = fromCallback<
5454
}
5555

5656
// Set up timeout
57-
const timeoutMs = config.timeout?.llmMs ?? 30000;
57+
const timeoutMs = config.timeout?.llmMs;
5858
let timeoutId: ReturnType<typeof setTimeout> | null = null;
5959

60-
if (timeoutMs > 0) {
60+
if (timeoutMs && timeoutMs > 0) {
6161
timeoutId = setTimeout(() => {
6262
if (!isAborted) {
6363
isAborted = true;

packages/talkio/src/agent/actors/tts.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,10 +52,10 @@ export const ttsActor = fromCallback<
5252
}
5353

5454
// Set up timeout
55-
const timeoutMs = config.timeout?.ttsMs ?? 10000;
55+
const timeoutMs = config.timeout?.ttsMs;
5656
let timeoutId: ReturnType<typeof setTimeout> | null = null;
5757

58-
if (timeoutMs > 0) {
58+
if (timeoutMs && timeoutMs > 0) {
5959
timeoutId = setTimeout(() => {
6060
if (!isAborted) {
6161
isAborted = true;

packages/talkio/src/agent/create-agent.ts

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import { nanoid } from "nanoid";
2323
import { createActor } from "xstate";
2424

25+
import { float32ToLinear16 } from "../audio/conversions";
2526
import type { AudioInput } from "../audio/preprocessor";
2627
import { normalizeFormat, type AudioFormat } from "../audio/types";
2728
import type { STTProvider, TTSProvider } from "../providers/types";
@@ -268,13 +269,23 @@ function isRunningState(value: string | Record<string, unknown>): boolean {
268269
* For Blob inputs, this will throw - use the async version or
269270
* convert the Blob to ArrayBuffer before calling sendAudio.
270271
*/
271-
function toArrayBuffer(input: AudioInput): ArrayBuffer {
272+
function toArrayBuffer(input: AudioInput, encoding: AudioFormat["encoding"]): ArrayBuffer {
272273
if (input instanceof ArrayBuffer) {
273274
return input;
274275
}
275276

276-
if (input instanceof Float32Array || input instanceof Int16Array || input instanceof Uint8Array) {
277-
// Create a copy to ensure we have a standalone ArrayBuffer
277+
if (input instanceof Float32Array) {
278+
if (encoding === "linear16") {
279+
return float32ToLinear16(input);
280+
}
281+
if (encoding === "float32") {
282+
const buffer = new ArrayBuffer(input.byteLength);
283+
new Float32Array(buffer).set(input);
284+
return buffer;
285+
}
286+
}
287+
288+
if (input instanceof Int16Array || input instanceof Uint8Array) {
278289
const buffer = new ArrayBuffer(input.byteLength);
279290
new Uint8Array(buffer).set(new Uint8Array(input.buffer, input.byteOffset, input.byteLength));
280291
return buffer;
@@ -465,7 +476,7 @@ export function createAgent<
465476
sendAudio(audio: AudioInput) {
466477
if (isStopped) return;
467478
// Convert input to ArrayBuffer synchronously
468-
const arrayBuffer = toArrayBuffer(audio);
479+
const arrayBuffer = toArrayBuffer(audio, inputFormat.encoding);
469480
actor.send({ type: "_audio:input", audio: arrayBuffer, timestamp: Date.now() });
470481
},
471482

packages/talkio/src/agent/machine.ts

Lines changed: 35 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ const agentMachineSetup = setup({
8080
},
8181

8282
sttSpeechDurationMet: ({ context }) => {
83-
if (!context.speechStartedAt) return false;
83+
if (context.speechStartedAt === null) return false;
8484
const minDuration = context.config.interruption?.minDurationMs ?? 200;
8585
return Date.now() - context.speechStartedAt >= minDuration;
8686
},
@@ -390,11 +390,14 @@ const agentMachineSetup = setup({
390390
}),
391391
}),
392392
recordHumanTurnStart: assign({
393-
metrics: ({ context }) => ({
394-
...context.metrics,
395-
humanTurnStartTime: Date.now(),
396-
totalTurns: context.metrics.totalTurns + 1,
397-
}),
393+
metrics: ({ context }) => {
394+
if (context.metrics.humanTurnStartTime !== null) return context.metrics;
395+
return {
396+
...context.metrics,
397+
humanTurnStartTime: Date.now(),
398+
totalTurns: context.metrics.totalTurns + 1,
399+
};
400+
},
398401
}),
399402

400403
recordHumanTurnEnd: assign({
@@ -467,6 +470,17 @@ const agentMachineSetup = setup({
467470
};
468471
},
469472
}),
473+
recordResponseLengthFromComplete: assign({
474+
metrics: ({ context, event }) => {
475+
if (event.type !== "_llm:complete") return context.metrics;
476+
const responseLength = event.fullText.length;
477+
if (responseLength <= context.metrics.currentResponseLength) return context.metrics;
478+
return {
479+
...context.metrics,
480+
currentResponseLength: responseLength,
481+
};
482+
},
483+
}),
470484

471485
recordFirstSentence: assign({
472486
metrics: ({ context }) => {
@@ -808,6 +822,7 @@ export const agentMachine = agentMachineSetup.createMachine({
808822
actions: [
809823
{ type: "setHumanTurnStarted" },
810824
{ type: "emitHumanTurnStarted" },
825+
{ type: "recordHumanTurnStart" },
811826
{ type: "debugLogEvent" },
812827
{ type: "emitHumanTurnTranscript" },
813828
{ type: "updatePartialTranscriptFromEvent" },
@@ -832,6 +847,7 @@ export const agentMachine = agentMachineSetup.createMachine({
832847
actions: [
833848
{ type: "setHumanTurnStarted" },
834849
{ type: "emitHumanTurnStarted" },
850+
{ type: "recordHumanTurnStart" },
835851
{ type: "debugLogEvent" },
836852
{ type: "emitHumanTurnTranscript" },
837853
{ type: "forwardToTurnDetector" },
@@ -860,6 +876,7 @@ export const agentMachine = agentMachineSetup.createMachine({
860876
actions: [
861877
{ type: "setHumanTurnStarted" },
862878
{ type: "emitHumanTurnStarted" },
879+
{ type: "recordHumanTurnStart" },
863880
{ type: "debugLogEvent" },
864881
{ type: "recordHumanTurnEnd" },
865882
{ type: "emitHumanTurnTranscript" },
@@ -920,6 +937,7 @@ export const agentMachine = agentMachineSetup.createMachine({
920937
actions: [
921938
{ type: "setHumanTurnStarted" },
922939
{ type: "emitHumanTurnStarted" },
940+
{ type: "recordHumanTurnStart" },
923941
{ type: "debugLogEvent" },
924942
{ type: "recordHumanTurnEnd" },
925943
{ type: "emitHumanTurnTranscript" },
@@ -1042,14 +1060,19 @@ export const agentMachine = agentMachineSetup.createMachine({
10421060
guard: "aiTurnHadNoAudio",
10431061
actions: [
10441062
{ type: "clearLLMRef" },
1063+
{ type: "recordResponseLengthFromComplete" },
10451064
{ type: "recordAITurnComplete" },
10461065
{ type: "emitAITurnEnded" },
10471066
{ type: "addAssistantMessageFromEvent" },
10481067
{ type: "resetTurnMetrics" },
10491068
],
10501069
},
10511070
{
1052-
actions: [{ type: "clearLLMRef" }, { type: "addAssistantMessageFromEvent" }],
1071+
actions: [
1072+
{ type: "clearLLMRef" },
1073+
{ type: "recordResponseLengthFromComplete" },
1074+
{ type: "addAssistantMessageFromEvent" },
1075+
],
10531076
},
10541077
],
10551078
},
@@ -1177,7 +1200,11 @@ export const agentMachine = agentMachineSetup.createMachine({
11771200
},
11781201
},
11791202
stopped: {
1180-
entry: [{ type: "abortSessionController" }, { type: "emitAgentStopped" }],
1203+
entry: [
1204+
{ type: "abortSessionController" },
1205+
{ type: "abortTurnController" },
1206+
{ type: "emitAgentStopped" },
1207+
],
11811208
type: "final",
11821209
},
11831210
},

packages/talkio/src/types/config.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -79,10 +79,10 @@ export interface InterruptionConfig {
7979
* ```
8080
*/
8181
export interface TimeoutConfig {
82-
/** Timeout for LLM generation in milliseconds. @default 30000 */
82+
/** Timeout for LLM generation in milliseconds. Undefined disables the timeout. */
8383
llmMs?: number;
8484

85-
/** Timeout for TTS synthesis in milliseconds. @default 10000 */
85+
/** Timeout for TTS synthesis in milliseconds. Undefined disables the timeout. */
8686
ttsMs?: number;
8787
}
8888

0 commit comments

Comments
 (0)