Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "assemblyai",
"version": "4.18.5",
"version": "4.19.0",
"description": "The AssemblyAI JavaScript SDK provides an easy-to-use interface for interacting with the AssemblyAI API, which supports async and real-time transcription, as well as the latest LeMUR models.",
"engines": {
"node": ">=18"
Expand Down
28 changes: 27 additions & 1 deletion src/types/openapi.generated.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1656,6 +1656,20 @@ export type SpeakerOptions = {
max_speakers_expected?: number | null;
};

/**
* Code switching language detection result
*/
export type CodeSwitchingLanguage = {
/**
* The language code detected
*/
language_code: string;
/**
* The confidence score for this language detection, between 0.0 and 1.0
*/
confidence: number;
};

/**
* Options for controlling the behavior of Automatic Language Detection
*/
Expand All @@ -1668,6 +1682,14 @@ export type LanguageDetectionOptions = {
* The language to fallback to in case the language detection does not predict any of the expected ones.
*/
fallback_language?: string | null;
/**
* Should code switching be enabled for this transcription.
*/
code_switching?: boolean | null;
/**
* The confidence threshold for the automatically detected code switching language.
*/
code_switching_confidence_threshold?: number | null;
};

/**
Expand Down Expand Up @@ -2716,6 +2738,10 @@ export type Transcript = {
* List of language codes detected in the audio file when language detection is enabled
*/
language_codes: LiteralUnion<TranscriptLanguageCode, string>[] | null;
/**
* List of detected languages with confidence scores when code switching is enabled
*/
code_switching_languages?: CodeSwitchingLanguage[] | null;
/**
* The confidence threshold for the automatically detected language.
* An error will be returned if the language confidence is below this threshold.
Expand Down Expand Up @@ -3270,7 +3296,7 @@ export type TranscriptOptionalParams = {
/**
* Options for controlling the behavior of Automatic Language Detection
*/
language_detection_options?: LanguageDetectionOptions;
language_detection_options?: LanguageDetectionOptions | null;
/**
* Enable {@link https://www.assemblyai.com/docs/models/speech-recognition#multichannel-transcription | Multichannel } transcription, can be true or false.
* @defaultValue false
Expand Down
146 changes: 146 additions & 0 deletions tests/unit/language-detection-options.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
import fetchMock from "jest-fetch-mock";
import { LanguageDetectionOptions } from "../../src";
import { createClient, requestMatches } from "./utils";

fetchMock.enableMocks();

const assembly = createClient();
const transcriptId = "transcript_123";
const remoteAudioURL = "https://assembly.ai/espn.m4a";

beforeEach(() => {
jest.clearAllMocks();
fetchMock.resetMocks();
fetchMock.doMock();
});

describe("language detection options", () => {
it("should create transcript with all language_detection_options", async () => {
const languageDetectionOptions: LanguageDetectionOptions = {
expected_languages: ["en", "es"],
fallback_language: "en",
code_switching: true,
code_switching_confidence_threshold: 0.8,
};

fetchMock.doMockOnceIf(
requestMatches({ url: "/v2/transcript", method: "POST" }),
JSON.stringify({ id: transcriptId, status: "queued" }),
);

const transcript = await assembly.transcripts.submit({
audio_url: remoteAudioURL,
language_detection: true,
language_detection_options: languageDetectionOptions,
});

expect(transcript.id).toBe(transcriptId);
expect(transcript.status).toBe("queued");

// Verify the request body included language_detection_options
const requestBody = JSON.parse(fetchMock.mock.calls[0][1]?.body as string);
expect(requestBody.language_detection).toBe(true);
expect(requestBody.language_detection_options).toEqual(
languageDetectionOptions,
);
});

it("should create transcript with only code_switching enabled", async () => {
const languageDetectionOptions: LanguageDetectionOptions = {
code_switching: true,
};

fetchMock.doMockOnceIf(
requestMatches({ url: "/v2/transcript", method: "POST" }),
JSON.stringify({ id: transcriptId, status: "queued" }),
);

const transcript = await assembly.transcripts.submit({
audio_url: remoteAudioURL,
language_detection: true,
language_detection_options: languageDetectionOptions,
});

expect(transcript.id).toBe(transcriptId);

const requestBody = JSON.parse(fetchMock.mock.calls[0][1]?.body as string);
expect(requestBody.language_detection_options.code_switching).toBe(true);
expect(
requestBody.language_detection_options
.code_switching_confidence_threshold,
).toBeUndefined();
});

it("should create transcript with code_switching and confidence threshold", async () => {
const languageDetectionOptions: LanguageDetectionOptions = {
code_switching: true,
code_switching_confidence_threshold: 0.75,
};

fetchMock.doMockOnceIf(
requestMatches({ url: "/v2/transcript", method: "POST" }),
JSON.stringify({ id: transcriptId, status: "queued" }),
);

const transcript = await assembly.transcripts.submit({
audio_url: remoteAudioURL,
language_detection: true,
language_detection_options: languageDetectionOptions,
});

expect(transcript.id).toBe(transcriptId);

const requestBody = JSON.parse(fetchMock.mock.calls[0][1]?.body as string);
expect(requestBody.language_detection_options.code_switching).toBe(true);
expect(
requestBody.language_detection_options
.code_switching_confidence_threshold,
).toBe(0.75);
});

it("should create transcript with only confidence threshold", async () => {
const languageDetectionOptions: LanguageDetectionOptions = {
code_switching_confidence_threshold: 0.9,
};

fetchMock.doMockOnceIf(
requestMatches({ url: "/v2/transcript", method: "POST" }),
JSON.stringify({ id: transcriptId, status: "queued" }),
);

const transcript = await assembly.transcripts.submit({
audio_url: remoteAudioURL,
language_detection: true,
language_detection_options: languageDetectionOptions,
});

expect(transcript.id).toBe(transcriptId);

const requestBody = JSON.parse(fetchMock.mock.calls[0][1]?.body as string);
expect(
requestBody.language_detection_options.code_switching,
).toBeUndefined();
expect(
requestBody.language_detection_options
.code_switching_confidence_threshold,
).toBe(0.9);
});

it("should handle null language_detection_options", async () => {
fetchMock.doMockOnceIf(
requestMatches({ url: "/v2/transcript", method: "POST" }),
JSON.stringify({ id: transcriptId, status: "queued" }),
);

const transcript = await assembly.transcripts.submit({
audio_url: remoteAudioURL,
language_detection: true,
language_detection_options: null,
});

expect(transcript.id).toBe(transcriptId);

const requestBody = JSON.parse(fetchMock.mock.calls[0][1]?.body as string);
expect(requestBody.language_detection_options).toBe(null);
});
});