Skip to content

Commit 36697aa

Browse files
authored
Merge pull request #39 from DeepLcom/aj/ACL-1878
Add Voice API endpoints and AsyncAPI specifications
2 parents 926b656 + ee9414c commit 36697aa

File tree

5 files changed

+1356
-2
lines changed

5 files changed

+1356
-2
lines changed

CHANGELOG.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,11 @@ The major and minor version numbers reflect changes to the DeepL API
99
number is used only for corrections to the OpenAPI specification, for example:
1010
typos, schema fixes, or adding examples.
1111

12+
## [3.7.0] - 2025-11-10
13+
### Added
14+
* Add new endpoints `/v1/voice/realtime` (REST) and `/v1/voice/realtime/connect` (WebSocket) which enable real-time voice transcription and translation via WebSocket streaming
15+
* Adds AsyncAPI specification - `voice.asyncapi.yaml` and `voice.asyncapi.json` - documenting the Voice WebSocket streaming protocol
16+
1217
## [3.6.1] - 2025-11-06
1318
### Changed
1419
* Hebrew (`HE`), Thai (`TH`), and Vietnamese (`VI`) are now included in the `/v2/languages` endpoint response as they now support document translation in addition to text translation
@@ -253,6 +258,7 @@ keys within an organization.
253258
## [2.0.0]
254259
Initial release of the OpenAPI specification.
255260

261+
[3.7.0]: https://github.com/DeepLcom/openapi/compare/v3.6.1...v3.7.0
256262
[3.6.1]: https://github.com/DeepLcom/openapi/compare/v3.6.0...v3.6.1
257263
[3.6.0]: https://github.com/DeepLcom/openapi/compare/v3.5.0...v3.6.0
258264
[3.5.0]: https://github.com/DeepLcom/openapi/compare/v3.4.2...v3.5.0

openapi.json

Lines changed: 208 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
"name": "DeepL - Contact us",
99
"url": "https://www.deepl.com/contact-us"
1010
},
11-
"version": "3.6.1"
11+
"version": "3.7.0"
1212
},
1313
"externalDocs": {
1414
"description": "DeepL Pro - Plans and pricing",
@@ -48,6 +48,10 @@
4848
{
4949
"name": "MetaInformation",
5050
"description": "Information about API usage and value ranges"
51+
},
52+
{
53+
"name": "VoiceAPI",
54+
"description": "The Voice API provides real-time voice transcription and translation services.\nUse a two-step flow: first request a streaming URL via REST, then establish a WebSocket connection for streaming audio and receiving transcriptions."
5155
}
5256
],
5357
"paths": {
@@ -2662,6 +2666,119 @@
26622666
}
26632667
]
26642668
}
2669+
},
2670+
"/v1/voice/realtime": {
2671+
"post": {
2672+
"tags": [
2673+
"VoiceAPI"
2674+
],
2675+
"summary": "Get Streaming URL",
2676+
"operationId": "getVoiceStreamingUrl",
2677+
"requestBody": {
2678+
"required": true,
2679+
"content": {
2680+
"application/json": {
2681+
"schema": {
2682+
"type": "object",
2683+
"required": [
2684+
"media_content_type"
2685+
],
2686+
"properties": {
2687+
"media_content_type": {
2688+
"$ref": "#/components/schemas/VoiceMediaContentType"
2689+
},
2690+
"source_language": {
2691+
"$ref": "#/components/schemas/VoiceSourceLanguage"
2692+
},
2693+
"source_language_mode": {
2694+
"$ref": "#/components/schemas/VoiceSourceLanguageMode"
2695+
},
2696+
"target_languages": {
2697+
"$ref": "#/components/schemas/VoiceTargetLanguages"
2698+
},
2699+
"glossary_id": {
2700+
"$ref": "#/components/schemas/GlossaryId"
2701+
},
2702+
"formality": {
2703+
"$ref": "#/components/schemas/Formality"
2704+
}
2705+
}
2706+
},
2707+
"examples": {
2708+
"basic": {
2709+
"summary": "Basic configuration",
2710+
"value": {
2711+
"media_content_type": "audio/ogg; codecs=opus",
2712+
"source_language": "en",
2713+
"source_language_mode": "auto",
2714+
"target_languages": [
2715+
"de",
2716+
"fr",
2717+
"es"
2718+
]
2719+
}
2720+
},
2721+
"with_glossary": {
2722+
"summary": "With glossary and formality",
2723+
"value": {
2724+
"media_content_type": "audio/pcm; encoding=s16le; rate=16000",
2725+
"source_language": "en",
2726+
"source_language_mode": "fixed",
2727+
"target_languages": [
2728+
"de",
2729+
"fr"
2730+
],
2731+
"glossary_id": "def3a26b-3e84-45b3-84ae-0c0aaf3525f7",
2732+
"formality": "more"
2733+
}
2734+
}
2735+
}
2736+
}
2737+
}
2738+
},
2739+
"responses": {
2740+
"200": {
2741+
"description": "Successfully obtained streaming URL and token",
2742+
"content": {
2743+
"application/json": {
2744+
"schema": {
2745+
"$ref": "#/components/schemas/VoiceStreamingResponse"
2746+
},
2747+
"example": {
2748+
"streaming_url": "wss://api.deepl.com/v1/voice/realtime/connect",
2749+
"token": "VGhpcyBpcyBhIGZha2UgdG9rZW4K"
2750+
}
2751+
}
2752+
}
2753+
},
2754+
"400": {
2755+
"$ref": "#/components/responses/BadRequest"
2756+
},
2757+
"401": {
2758+
"$ref": "#/components/responses/Unauthorized"
2759+
},
2760+
"403": {
2761+
"$ref": "#/components/responses/Forbidden"
2762+
},
2763+
"429": {
2764+
"$ref": "#/components/responses/TooManyRequests"
2765+
},
2766+
"456": {
2767+
"$ref": "#/components/responses/QuotaExceeded"
2768+
},
2769+
"500": {
2770+
"$ref": "#/components/responses/InternalServerError"
2771+
},
2772+
"503": {
2773+
"$ref": "#/components/responses/ServiceUnavailable"
2774+
}
2775+
},
2776+
"security": [
2777+
{
2778+
"auth_header": []
2779+
}
2780+
]
2781+
}
26652782
}
26662783
},
26672784
"components": {
@@ -3591,6 +3708,96 @@
35913708
],
35923709
"example": "de"
35933710
},
3711+
"VoiceMediaContentType": {
3712+
"type": "string",
3713+
"description": "The audio format for streaming. Specifies container, codec, and encoding parameters.\nSupported formats include PCM (recommended), OPUS (recommended for low bandwidth), FLAC, MP3, and AAC.\nPCM formats require explicit sample rate. Mono audio only.",
3714+
"enum": [
3715+
"audio/auto",
3716+
"audio/flac",
3717+
"audio/mpeg",
3718+
"audio/ogg",
3719+
"audio/webm",
3720+
"audio/x-matroska",
3721+
"audio/ogg; codecs=flac",
3722+
"audio/ogg; codecs=opus",
3723+
"audio/pcm; encoding=s16le; rate=8000",
3724+
"audio/pcm; encoding=s16le; rate=16000",
3725+
"audio/pcm; encoding=s16le; rate=44100",
3726+
"audio/pcm; encoding=s16le; rate=48000",
3727+
"audio/webm; codecs=opus",
3728+
"audio/x-matroska; codecs=aac",
3729+
"audio/x-matroska; codecs=flac",
3730+
"audio/x-matroska; codecs=mp3",
3731+
"audio/x-matroska; codecs=opus"
3732+
],
3733+
"example": "audio/ogg; codecs=opus"
3734+
},
3735+
"VoiceSourceLanguage": {
3736+
"type": "string",
3737+
"description": "Source language of the audio stream. Must be one of the supported Voice API source languages.\nLanguage identifier must comply with IETF BCP 47 language tags.",
3738+
"enum": [
3739+
"zh",
3740+
"nl",
3741+
"en",
3742+
"fr",
3743+
"de",
3744+
"id",
3745+
"it",
3746+
"ja",
3747+
"ko",
3748+
"pl",
3749+
"pt",
3750+
"ro",
3751+
"ru",
3752+
"es",
3753+
"sv",
3754+
"tr",
3755+
"uk"
3756+
],
3757+
"example": "en"
3758+
},
3759+
"VoiceSourceLanguageMode": {
3760+
"type": "string",
3761+
"description": "Controls how the source_language value is used.\n- `auto`: Treats source language as a hint; server can override\n- `fixed`: Treats source language as mandatory; server must use this language",
3762+
"enum": [
3763+
"auto",
3764+
"fixed"
3765+
],
3766+
"default": "auto",
3767+
"example": "auto"
3768+
},
3769+
"VoiceStreamingResponse": {
3770+
"type": "object",
3771+
"required": [
3772+
"streaming_url",
3773+
"token"
3774+
],
3775+
"properties": {
3776+
"streaming_url": {
3777+
"type": "string",
3778+
"description": "The WebSocket URL to use for establishing the streaming connection. This URL is ephemeral and valid for one-time use only.",
3779+
"example": "wss://api.deepl.com/v1/voice/realtime/connect"
3780+
},
3781+
"token": {
3782+
"type": "string",
3783+
"description": "A unique ephemeral token for authentication with the streaming endpoint. Pass this as a query parameter when connecting to the WebSocket URL.",
3784+
"example": "VGhpcyBpcyBhIGZha2UgdG9rZW4K"
3785+
}
3786+
}
3787+
},
3788+
"VoiceTargetLanguages": {
3789+
"type": "array",
3790+
"description": "List of target languages for translation. The stream will emit translations for each language.\nMaximum 5 target languages per stream. Language identifiers must comply with IETF BCP 47.",
3791+
"items": {
3792+
"type": "string"
3793+
},
3794+
"maxItems": 5,
3795+
"example": [
3796+
"de",
3797+
"fr",
3798+
"es"
3799+
]
3800+
},
35943801
"WritingStyle": {
35953802
"type": "string",
35963803
"description": "Specify a style to rephrase your text in a way that fits your audience and goals.\nThe `prefer_` prefix allows falling back to the default style if the language does not yet support styles.",

0 commit comments

Comments
 (0)