From 9bed98fb9f338a0fa4c02d578bce14894a63ee33 Mon Sep 17 00:00:00 2001 From: James Walker Date: Wed, 18 Mar 2026 13:16:32 +0000 Subject: [PATCH] Point quickstart to new Python client --- .../realtime/assets/sm-rt-example.py | 60 ++++++ docs/speech-to-text/realtime/quickstart.mdx | 196 +++++++++++++----- 2 files changed, 204 insertions(+), 52 deletions(-) create mode 100644 docs/speech-to-text/realtime/assets/sm-rt-example.py diff --git a/docs/speech-to-text/realtime/assets/sm-rt-example.py b/docs/speech-to-text/realtime/assets/sm-rt-example.py new file mode 100644 index 00000000..e2defd3d --- /dev/null +++ b/docs/speech-to-text/realtime/assets/sm-rt-example.py @@ -0,0 +1,60 @@ +import asyncio +from speechmatics.rt import ( + AudioEncoding, AudioFormat, AuthenticationError, + Microphone, ServerMessageType, TranscriptResult, + TranscriptionConfig, AsyncClient, +) + +API_KEY = YOUR_API_KEY + +# Set up config and format for transcription +audio_format = AudioFormat( + encoding=AudioEncoding.PCM_S16LE, + sample_rate=16000, + chunk_size=4096, +) +config = TranscriptionConfig( + language="en", + max_delay=0.7, +) + +async def main(): + + # Set up microphone + mic = Microphone( + sample_rate=audio_format.sample_rate, + chunk_size=audio_format.chunk_size + ) + if not mic.start(): + print("Mic not started — please install PyAudio") + + try: + async with AsyncClient(api_key=API_KEY) as client: + # Handle ADD_TRANSCRIPT message + @client.on(ServerMessageType.ADD_TRANSCRIPT) + def handle_finals(msg): + if final := TranscriptResult.from_message(msg).metadata.transcript: + print(f"[Final]: {final}") + + try: + # Begin transcribing + await client.start_session( + transcription_config=config, + audio_format=audio_format + ) + while True: + await client.send_audio( + await mic.read( + chunk_size=audio_format.chunk_size + ) + ) + except KeyboardInterrupt: + pass + finally: + mic.stop() + + except AuthenticationError as e: + print(f"Auth error: {e}") + +if __name__ == "__main__": + asyncio.run(main()) \ No newline at end of file diff --git a/docs/speech-to-text/realtime/quickstart.mdx b/docs/speech-to-text/realtime/quickstart.mdx index b131207a..55fc31ee 100644 --- a/docs/speech-to-text/realtime/quickstart.mdx +++ b/docs/speech-to-text/realtime/quickstart.mdx @@ -1,98 +1,190 @@ --- -description: Learn how to convert streaming audio to text. +pagination_prev: null +pagination_next: null +description: Learn how to transcribe streaming audio to text in real time. --- import Admonition from '@theme/Admonition'; import CodeBlock from '@theme/CodeBlock'; import Tabs from '@theme/Tabs'; import TabItem from '@theme/TabItem'; +import { Grid } from '@radix-ui/themes'; +import { LinkCard } from "@site/src/theme/LinkCard"; +import { Users, BookMarked, Zap, Mic, Radio, Clock } from 'lucide-react'; import javascriptRadioExample from "./assets/javascript-radio-example.js?raw" -import pythonRadioExample from "./assets/url-example.py?raw" +import pythonRtExample from "./assets/sm-rt-example.py?raw" # Quickstart :::tip -The easiest way to try Realtime transcription is via the [web portal](https://portal.speechmatics.com/jobs/create/real-time). +The quickest way to try real-time transcription is via the [web portal](https://portal.speechmatics.com/jobs/create/real-time) — no code required. ::: -## Using the Realtime SaaS webSocket API +## Using the Realtime API + +The Realtime API streams audio over a WebSocket connection and returns transcript results as you speak. Unlike the [Batch API](/speech-to-text/batch/quickstart), results arrive continuously — within milliseconds of the spoken words. ### 1. Create an API key -[Create an API key in the portal here](https://portal.speechmatics.com/settings/api-keys), which you'll use to securely access the API. -Store the key as a managed secret. +[Create an API key in the portal](https://portal.speechmatics.com/settings/api-keys), which you'll use to securely access the API. Store the key as a managed secret. :::info Enterprise customers may need to speak to [Support](https://support.speechmatics.com) to get your API keys. ::: -### 2. Pick and install a library - -Check out our [JavaScript client](https://www.npmjs.com/package/@speechmatics/real-time-client) or [Python client](https://pypi.org/project/speechmatics-python/) to get started. +### 2. Install the library - - ``` - npm install @speechmatics/real-time-client @speechmatics/auth - ``` - - ``` - pip3 install speechmatics-python - ``` + Install using pip: + ``` + pip install speechmatics-rt pyaudio + ``` + :::note + `pyaudio` is required for microphone input in this quickstart. + ::: + + + Install using npm: + ``` + npm install @speechmatics/real-time-client @speechmatics/auth + ``` +### 3. Run the example -### 3. Insert your API key - -Paste your API key into `YOUR_API_KEY` in the code. +Replace `YOUR_API_KEY` with your key, then run the script. + + + {pythonRtExample} + + Speak into your microphone. You should see output like: + ``` + [Final]: Hello, welcome to Speechmatics. + [Final]: This is a real-time transcription example. + ``` + Press `Ctrl+C` to stop. + {javascriptRadioExample} - - - - {pythonRadioExample} - + This example transcribes a live radio stream. You should see a rolling transcript printed to the console. + Press `Ctrl+C` to stop. +## Understanding the output +The API returns two types of transcript results. You can use either or both depending on your use case. -## Transcript outputs +| Type | Latency | Stability | Best for | +|------|---------|-----------|----------| +| **Final** | ~0.7–2s | Definitive, never revised | Accurate transcripts, subtitles | +| **Partial** | <500ms | May be revised | Live captions, voice interfaces | -The API returns transcripts in JSON format. You can receive two types of output: [Final](#final-transcripts) and [Partial](#partial-transcripts) transcripts. Choose the type based on your latency and accuracy needs. +**Finals** represent the best transcription for a span of audio and are never updated once emitted. You can tune their latency using [`max_delay`](/speech-to-text/realtime/output#latency) — lower values reduce delay at the cost of slight accuracy. -### Final transcripts +**Partials** are emitted immediately as audio arrives and may be revised as more context is processed. A common pattern is to display partials immediately, then replace them with finals as they arrive. -Final transcripts are the definitive result. -- They reflect the best transcription for the spoken audio. -- Once displayed, they are not updated. -- Words arrive incrementally, with some delay. +To receive partials, set `enable_partials=True` in your `TranscriptionConfig` and register a handler for `ADD_PARTIAL_TRANSCRIPT`: -You control the latency and accuracy tradeoff [using the `max_delay` setting](/speech-to-text/realtime/output#latency) in your `transcription_config`. -Larger values of `max_delay` increase accuracy by giving the system more time to process audio context. - -:::tip -Best for accurate, completed transcripts where some delay is acceptable -::: - -### Partial transcripts - -Partial transcripts are low-latency and can update later as more conversation context arrives. -- You must enable them using `enable_partials` in your `transcription_config`. -- Partials are emitted quickly (typically less than 500ms). -- The engine may revise them as more audio is processed. - -You can combine partials with finals for a responsive user experience — show partials first, then replace them with finals as they arrive. - -You control the latency and accuracy tradeoff using the [`max_delay` setting](/speech-to-text/realtime/output#latency) in your `transcription_config`. + + + ```python + config = TranscriptionConfig( + language="en", + max_delay=0.7, + enable_partials=True, # Enable partial transcripts + ) + + async with AsyncClient(api_key=API_KEY) as client: + @client.on(ServerMessageType.ADD_PARTIAL_TRANSCRIPT) + def handle_partials(msg): + if partial := TranscriptResult.from_message(msg).metadata.transcript: + print(f"[Partial]: {partial}") + + @client.on(ServerMessageType.ADD_TRANSCRIPT) + def handle_finals(msg): + if final := TranscriptResult.from_message(msg).metadata.transcript: + print(f"[Final]: {final}") + ``` + With both handlers registered, you'll see partials arrive first, then be superseded by the final result: + ``` + [Partial]: Hello wel + [Partial]: Hello welcome to + [Final]: Hello, welcome to Speechmatics. + ``` + + + ```javascript + await client.start(jwt, { + transcription_config: { + language: "en", + enable_partials: true, // Enable partial transcripts + }, + }); + + client.addEventListener("receiveMessage", ({ data }) => { + if (data.message === "AddPartialTranscript") { + process.stdout.write(`[Partial]: ${data.metadata.transcript}\r`); + } else if (data.message === "AddTranscript") { + console.log(`[Final]: ${data.metadata.transcript}`); + } + }); + ``` + With both handlers registered, you'll see partials arrive first, then be superseded by the final result: + ``` + [Partial]: Hello wel + [Partial]: Hello welcome to + [Final]: Hello, welcome to Speechmatics. + ``` + + -:::tip -Use partials for: real-time captions, voice interfaces, or any case where speed matters -::: +## Next steps + +Now that you have real-time transcription working, explore these features to build more powerful applications. + + + } + /> + } + /> + } + /> + } + /> + } + /> + } + /> +