diff --git a/examples/network/services/chataudioio/main.js b/examples/network/services/chataudioio/main.js
index 789e22d45..b93ab77cf 100644
--- a/examples/network/services/chataudioio/main.js
+++ b/examples/network/services/chataudioio/main.js
@@ -16,7 +16,7 @@ import ChatAudioIO from "ChatAudioIO";
const chat = new ChatAudioIO({
specifier: "humeAIEVI",
- voiceName: "Sunny",
+ voiceID: "Sunny",
instructions: "You're a hostile fisherman with a salty sense of humor. You dislike people and care even less for fish.",
onStateChanged(state) {
trace(`State: ${ChatAudioIO.states[state]} ${this.error ?? ""}\n`);
diff --git a/modules/network/services/chatAudioIO/ChatAudioIO.js b/modules/network/services/chatAudioIO/ChatAudioIO.js
index 28f6152cd..5a24f0e0e 100644
--- a/modules/network/services/chatAudioIO/ChatAudioIO.js
+++ b/modules/network/services/chatAudioIO/ChatAudioIO.js
@@ -78,7 +78,7 @@ class ChatAudioIO {
this.onOutputTranscript = options.onOutputTranscript ?? callback;
this.onStateChanged = options.onStateChanged ?? callback;
- this.createWorker(options.specifier, options.instructions, options.functions, options.voiceID, options.providerID, options.modelID);
+ this.createWorker(options.specifier, options.instructions, options.functions, options.voiceID, options.providerID, options.modelID, options.apiKey);
}
close() {
this.worker?.terminate();
@@ -96,7 +96,7 @@ class ChatAudioIO {
if (this.output)
this.output.volume = volume;
}
- createWorker(specifier, instructions, functions, voiceID, providerID, modelID) {
+ createWorker(specifier, instructions, functions, voiceID, providerID, modelID, apiKey) {
this.worker = new Worker(specifier, {
static: 512 * 1024,
chunk: {
@@ -118,7 +118,8 @@ class ChatAudioIO {
functions,
voiceID,
providerID,
- modelID
+ modelID,
+ apiKey
})
this.ensureInput();
}
diff --git a/modules/network/services/chatAudioIO/readme.md b/modules/network/services/chatAudioIO/readme.md
index be4e6c43f..66c524220 100644
--- a/modules/network/services/chatAudioIO/readme.md
+++ b/modules/network/services/chatAudioIO/readme.md
@@ -4,11 +4,11 @@ Updated December 3, 2025
## Architecture
-The conversation module uses a JavaScript [worker](https://moddable.com/documentation/base/worker). The worker is in charge of networks protocols, communicating with the AI cloud services, and encoding/decoding audio samples.
+The conversation module uses a JavaScript [worker](https://moddable.com/documentation/base/worker). The worker is in charge of networks protocols, communicating with the AI cloud services, and encoding/decoding audio samples.
The conversation module and its worker communicate with [marshalled messages](https://www.moddable.com/documentation/xs/XS%20Marshalling). They share input and output audio buffers for efficiency. This document describes the [messages](#Messages).
-Because audio samples are transmitted as Base64 encoded data embedded in JSON, workers use a special parser to optimize memory usage and throughput. This document also describes the [`JSONBase64Parser`](#JSONBase64Parser).
+Because audio samples are transmitted as Base64 encoded data embedded in JSON, workers use a special parser to optimize memory usage and throughput. This document also describes the [`JSONBase64Parser`](#JSONBase64Parser).
The conversation library implements support for various AI cloud services using this worker architecture:
@@ -32,6 +32,7 @@ The `options` object selects and configures a service. Its properties are:
- `voiceID`: *string*, the identifier of the voice, optional
- `providerID`: *string*, the identifier of the language model provider, optional
- `modelID`: *string*, the identifier of the language model, optional
+- `apiKey`: *string*, the API key of the AI cloud services, optional
The `options` object can also provides callbacks. All callbacks are optional.
@@ -49,7 +50,7 @@ The `options` object can also provides callbacks. All callbacks are optional.
static CONNECTING = 2;
static CONNECTED = 3;
static SPEAKING = 4; // user is speaking (sending audio to cloud)
- static LISTENING = 5; // user is listening (receiving audio from cloud)
+ static LISTENING = 5; // user is listening (receiving audio from cloud)
static WAITING = 6;
```
@@ -94,14 +95,15 @@ Use `sendText` to inform the service about user interactions that did not involv
- `voiceID`: *string*, the identifier of the voice, optional
- `providerID`: *string*, the identifier of the language model provider, optional
- `modelID`: *string*, the identifier of the language model, optional
+- `apiKey`: *string*, the API key of the AI cloud services, optional
The `voiceID`, `providerID` and `modelID` are specific to each service. Look at [ConversationalAI assets](https://github.com/Moddable-OpenSource/moddable/blob/public/contributed/conversationalAI/assets.js) to get voice, provider and model identifiers, names and descriptions by service.
The format of function descriptions is a JSON schema that is more or less common to all services.
```javascript
-{
- id:"configure",
+{
+ id:"configure",
instructions: "You are a helpful lighting system bot. You can turn lights on and off. Do not perform any other tasks.",
functions: [
{
@@ -117,7 +119,7 @@ The format of function descriptions is a JSON schema that is more or less common
},
required: ["light_name"],
}
- },
+ },
{
name: "turn_light_off",
description: "Turn the light off. Call this whenever you need to turn off a light, for example when a customer tells 'turn bedroom light off.'",
@@ -131,7 +133,7 @@ The format of function descriptions is a JSON schema that is more or less common
},
required: ["light_name"],
}
- }
+ }
]
}
```
@@ -261,7 +263,7 @@ Tell the application that the worker is connected and has configured the service
#### disconnected
-Tell the application that the worker is disconnected from the service.
+Tell the application that the worker is disconnected from the service.
```javascript
{
@@ -293,7 +295,7 @@ Tell the application that the worker is receiving audio samples from the service
}
```
-The application creates an audio output object which reads audio samples from the output buffer.
+The application creates an audio output object which reads audio samples from the output buffer.
#### receiveAudio
diff --git a/modules/network/services/chatAudioIO/workers/deepgramAgent.js b/modules/network/services/chatAudioIO/workers/deepgramAgent.js
index 92593d003..f4ddd1be5 100644
--- a/modules/network/services/chatAudioIO/workers/deepgramAgent.js
+++ b/modules/network/services/chatAudioIO/workers/deepgramAgent.js
@@ -28,9 +28,6 @@ class DeepgramVoiceAgentModel extends ChatWebSocketWorker {
super(options);
this.host = "agent.deepgram.com";
this.path = `/v1/agent/converse`;
- this.headers = [
- ["Authorization", `Token ${config.deepgramKey}`],
- ];
this.keepAliveTimer = null;
}
close() {
@@ -40,6 +37,10 @@ class DeepgramVoiceAgentModel extends ChatWebSocketWorker {
configure(message) {
const prompt = message.instructions ?? "";
const functions = message.functions ?? [];
+ const apiKey = message.apiKey ?? config.deepgramKey;
+ this.headers = [
+ ["Authorization", `Token ${apiKey}`],
+ ];
this.setup = {
type: "Settings",
experimental: true,
diff --git a/modules/network/services/chatAudioIO/workers/elevenLabsAgent.js b/modules/network/services/chatAudioIO/workers/elevenLabsAgent.js
index 3711c4d71..a0808c85b 100644
--- a/modules/network/services/chatAudioIO/workers/elevenLabsAgent.js
+++ b/modules/network/services/chatAudioIO/workers/elevenLabsAgent.js
@@ -45,6 +45,11 @@ class ElevenLabsModel extends ChatWebSocketWorker {
this.setup = {
type: "conversation_initiation_client_data",
}
+ const apiKey = message.apiKey ?? config.elevenLabsKey;
+ this.headers = new Map([
+ [ "xi-api-key", apiKey ],
+ [ "Content-Type", "application/json" ],
+ ]);
this.body = {
conversation_config: {
asr: {
@@ -80,10 +85,7 @@ class ElevenLabsModel extends ChatWebSocketWorker {
...device.network.https,
host: this.host
});
- const headers = new Map([
- [ "xi-api-key", config.elevenLabsKey ],
- [ "Content-Type", "application/json" ],
- ]);
+ const headers = this.headers
const request = (method, path, body) => {
let buffer = null;
let length = 0;
@@ -150,6 +152,8 @@ class ElevenLabsModel extends ChatWebSocketWorker {
case 3:
client.close();
if (json?.agent_id) {
+ this.headers.delete("content-length");
+ this.headers.delete("Content-Type");
this.path = `/v1/convai/conversation?agent_id=${json.agent_id}`;
super.connect(message);
}
diff --git a/modules/network/services/chatAudioIO/workers/googleGeminiLive.js b/modules/network/services/chatAudioIO/workers/googleGeminiLive.js
index 5c510b1a2..297897ffa 100644
--- a/modules/network/services/chatAudioIO/workers/googleGeminiLive.js
+++ b/modules/network/services/chatAudioIO/workers/googleGeminiLive.js
@@ -28,7 +28,6 @@ class GoogleGeminiLiveModel extends ChatWebSocketWorker {
constructor(options) {
super(options);
this.host = "generativelanguage.googleapis.com";
- this.path = `/ws/google.ai.generativelanguage.v1beta.GenerativeService.BidiGenerateContent?key=${config.geminiAPIKey}`;
this.headers = null;
this.audioPrefix = audioPrefix;
this.audioSuffix = audioSuffix;
@@ -39,6 +38,8 @@ class GoogleGeminiLiveModel extends ChatWebSocketWorker {
const tools = message.functions ?? [];
const voiceName = message.voiceID ?? "aoede";
const model = message.modelID ?? "gemini-2.5-flash-native-audio-preview-12-2025";
+ const apiKey = message.apiKey ?? config.geminiAPIKey;
+ this.path = `/ws/google.ai.generativelanguage.v1beta.GenerativeService.BidiGenerateContent?key=${apiKey}`;
this.setup = {
model: `models/${model}`,
generationConfig: {
diff --git a/modules/network/services/chatAudioIO/workers/humeAIEVI.js b/modules/network/services/chatAudioIO/workers/humeAIEVI.js
index 9ecaef183..792b0731a 100644
--- a/modules/network/services/chatAudioIO/workers/humeAIEVI.js
+++ b/modules/network/services/chatAudioIO/workers/humeAIEVI.js
@@ -28,7 +28,6 @@ class HumeAIEVIModel extends ChatWebSocketWorker {
constructor(options) {
super(options);
this.host = "api.hume.ai";
- this.path = `/v0/evi/chat?api_key=${config.humeAIKey}`;
this.headers = null;
this.audioPrefix = audioPrefix;
this.audioSuffix = audioSuffix;
@@ -39,10 +38,7 @@ class HumeAIEVIModel extends ChatWebSocketWorker {
...device.network.https,
host: this.host
});
- const headers = new Map([
- [ "X-Hume-Api-Key", config.humeAIKey ],
- [ "Content-Type", "application/json" ],
- ]);
+ const headers = this.headers
const request = (method, path, body) => {
let buffer = null;
let length = 0;
@@ -105,6 +101,8 @@ class HumeAIEVIModel extends ChatWebSocketWorker {
case 3:
client.close();
this.path += `&config_id=${json.id}`;
+ this.headers.delete("content-length");
+ this.headers.delete("Content-Type");
super.connect(message);
return;
}
@@ -129,6 +127,12 @@ class HumeAIEVIModel extends ChatWebSocketWorker {
system_prompt: instructions,
tools,
};
+ const apiKey = message.apiKey ?? config.humeAIKey;
+ this.path = `/v0/evi/chat?api_key=${apiKey}`;
+ this.headers = new Map([
+ [ "X-Hume-Api-Key", apiKey ],
+ [ "Content-Type", "application/json" ],
+ ]);
this.body = {
evi_version: "4-mini",
name: "Moddable",
diff --git a/modules/network/services/chatAudioIO/workers/openAIRealtime.js b/modules/network/services/chatAudioIO/workers/openAIRealtime.js
index fc89a6212..6ca4eb12f 100644
--- a/modules/network/services/chatAudioIO/workers/openAIRealtime.js
+++ b/modules/network/services/chatAudioIO/workers/openAIRealtime.js
@@ -29,9 +29,6 @@ class OpenAIRealTimeModel extends ChatWebSocketWorker {
constructor(options) {
super(options);
this.host = "api.openai.com";
- this.headers = [
- ["Authorization", `Bearer ${config.openAIKey}`]
- ];
this.audioPrefix = audioPrefix;
this.audioSuffix = audioSuffix;
}
@@ -45,6 +42,10 @@ class OpenAIRealTimeModel extends ChatWebSocketWorker {
tool.type = "function";
tool.parameters.additionalProperties = false;
});
+ const apiKey = message.apiKey ?? config.openAIKey;
+ this.headers = [
+ ["Authorization", `Bearer ${apiKey}`]
+ ];
this.session = {
type: 'realtime',
audio: {
diff --git a/typings/ChatAudioIO.d.ts b/typings/ChatAudioIO.d.ts
index 5435e7617..6f49c832d 100644
--- a/typings/ChatAudioIO.d.ts
+++ b/typings/ChatAudioIO.d.ts
@@ -20,9 +20,10 @@
declare module "ChatAudioIO" {
type ChatAudioIOOptions = {
- specifier: "googleGeminiLive" | "openAIRealtime" | "humeAIEVI" | "elevenLabsAgent";
+ specifier: "googleGeminiLive" | "openAIRealtime" | "humeAIEVI" | "elevenLabsAgent" | "deepgramAgent";
voiceName?: string;
instructions?: string;
+ apiKey?: string;
onStateChanged?: (this: ChatAudioIO, state: number) => void;
onInputTranscript?: (this: ChatAudioIO, text: string, more: boolean) => void;