livekit · rocky-terracotta · Jan 10, 2026
diff --git a/livekit-plugins/livekit-plugins-qwen3/.env.example b/livekit-plugins/livekit-plugins-qwen3/.env.example
@@ -0,0 +1,6 @@
+# DashScope API key (required)
+# Get your key at: https://dashscope.console.aliyun.com/
+DASHSCOPE_API_KEY=your_api_key_here
+
+# Optional: Use international endpoint (default is China mainland)
+# DASHSCOPE_BASE_URL=wss://dashscope-intl.aliyuncs.com/api-ws/v1/realtime
diff --git a/livekit-plugins/livekit-plugins-qwen3/README.md b/livekit-plugins/livekit-plugins-qwen3/README.md
@@ -0,0 +1,170 @@
+# livekit-plugins-qwen3
+
+[![PyPI version](https://badge.fury.io/py/livekit-plugins-qwen3.svg)](https://pypi.org/project/livekit-plugins-qwen3/)
+[![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0)
+
+A [LiveKit Agents](https://docs.livekit.io/agents/) plugin for Qwen3 TTS and STT (Alibaba Cloud DashScope).
+
+## Features
+
+- **Text-to-Speech (TTS)**: Real-time streaming speech synthesis with multiple voices and languages
+- **Speech-to-Text (STT)**: Real-time streaming speech recognition with interim results
+
+## Installation
+
+```bash
+pip install livekit-plugins-qwen3
+```
+
+## Configuration
+
+Set your DashScope API key as an environment variable:
+
+```bash
+export DASHSCOPE_API_KEY=your_api_key
+```
+
+Or copy `.env.example` to `.env` and fill in your credentials.
+
+Get your API key at: https://dashscope.console.aliyun.com/
+
+### International Users
+
+By default, the plugin uses the China mainland endpoint. For international access, set:
+
+```bash
+export DASHSCOPE_BASE_URL=wss://dashscope-intl.aliyuncs.com/api-ws/v1/realtime
+```
+
+## Usage
+
+### Text-to-Speech (TTS)
+
+```python
+from livekit.plugins import qwen3
+
+tts = qwen3.TTS(
+    voice="Kiki",      # or "Rocky", "Cherry", etc.
+    language="auto",   # or "chinese", "english", "cantonese", etc.
+)
+
+# Streaming TTS
+stream = tts.stream()
+stream.push_text("Hello, world!")
+stream.flush()
+
+async for event in stream:
+    # Handle audio events
+    pass
+```
+
+### Speech-to-Text (STT)
+
+```python
+from livekit.plugins import qwen3
+
+stt = qwen3.STT(
+    language="zh",  # or "en", "yue" (Cantonese), etc.
+)
+
+# Streaming STT
+stream = stt.stream()
+
+# Push audio frames
+stream.push_frame(audio_frame)
+
+# Get transcription events
+async for event in stream:
+    if event.type == stt.SpeechEventType.INTERIM_TRANSCRIPT:
+        print(f"Interim: {event.alternatives[0].text}")
+    elif event.type == stt.SpeechEventType.FINAL_TRANSCRIPT:
+        print(f"Final: {event.alternatives[0].text}")
+```
+
+### With LiveKit Agent
+
+```python
+from livekit.agents import Agent, AgentSession
+from livekit.plugins import qwen3
+
+agent = Agent(
+    tts=qwen3.TTS(voice="Kiki"),
+    stt=qwen3.STT(language="zh"),
+)
+```
+
+## Available Options
+
+### TTS Voices
+
+| Category | Voices |
+|----------|--------|
+| Female | Kiki, Cherry, Jennifer |
+| Male | Rocky, Ethan, Ryan |
+| Regional | Sichuan-Sunny, Shanghai-Jada, Beijing-Yunxi |
+| Cantonese | Cantonese_ProfessionalHost |
+
+### TTS Languages
+
+`auto`, `chinese`, `english`, `cantonese`, `german`, `italian`, `portuguese`, `spanish`, `japanese`, `korean`, `french`, `russian`
+
+### STT Languages
+
+`zh` (Chinese), `en` (English), `yue` (Cantonese), and more.
+
+### Models
+
+| Feature | Model |
+|---------|-------|
+| TTS | `qwen3-tts-flash-realtime` |
+| STT | `qwen3-asr-flash-realtime` |
+
+## API Reference
+
+### TTS
+
+```python
+qwen3.TTS(
+    model: str = "qwen3-tts-flash-realtime",
+    voice: str = "Kiki",
+    language: str = "auto",
+    mode: str = "server_commit",
+    sample_rate: int = 24000,
+    api_key: str | None = None,
+    base_url: str | None = None,
+)
+```
+
+### STT
+
+```python
+qwen3.STT(
+    model: str = "qwen3-asr-flash-realtime",
+    language: str = "zh",
+    sample_rate: int = 16000,
+    api_key: str | None = None,
+    base_url: str | None = None,
+)
+```
+
+## Links
+
+- [Qwen3 TTS API Reference](https://www.alibabacloud.com/help/en/model-studio/qwen-tts-api)
+- [Qwen3 TTS Guide](https://www.alibabacloud.com/help/en/model-studio/qwen-tts)
+- [Qwen3 ASR Guide](https://www.alibabacloud.com/help/en/model-studio/qwen-real-time-speech-recognition)
+- [LiveKit Agents Documentation](https://docs.livekit.io/agents/)
+
+## Contributing
+
+Contributions are welcome! Please feel free to submit a Pull Request.
+
+1. Fork the repository
+2. Create your feature branch (`git checkout -b feature/amazing-feature`)
+3. Run linting: `ruff check --fix && ruff format`
+4. Commit your changes (`git commit -m 'Add amazing feature'`)
+5. Push to the branch (`git push origin feature/amazing-feature`)
+6. Open a Pull Request
+
+## License
+
+This project is licensed under the Apache 2.0 License - see the [LICENSE](LICENSE) file for details.
diff --git a/livekit-plugins/livekit-plugins-qwen3/livekit/plugins/qwen3/__init__.py b/livekit-plugins/livekit-plugins-qwen3/livekit/plugins/qwen3/__init__.py
@@ -0,0 +1,73 @@
+"""Qwen3 TTS and STT plugins for LiveKit Agents
+
+This plugin provides Text-to-Speech and Speech-to-Text capabilities using
+Alibaba Cloud's Qwen3 (DashScope) service via WebSocket streaming.
+
+See https://www.alibabacloud.com/help/en/model-studio/qwen-tts for TTS information.
+See https://www.alibabacloud.com/help/en/model-studio/qwen-real-time-speech-recognition for ASR information.
+"""
+
+from .models import (
+    DEFAULT_BASE_URL,
+    DEFAULT_LANGUAGE,
+    DEFAULT_MODE,
+    DEFAULT_MODEL,
+    DEFAULT_SAMPLE_RATE,
+    DEFAULT_VOICE,
+    INTL_BASE_URL,
+    TTSLanguage,
+    TTSMode,
+    TTSModel,
+    TTSVoice,
+)
+from .stt import (
+    DEFAULT_STT_LANGUAGE,
+    DEFAULT_STT_MODEL,
+    DEFAULT_STT_SAMPLE_RATE,
+    STT,
+)
+from .tts import TTS
+from .version import __version__
+
+__all__ = [
+    # TTS
+    "TTS",
+    "__version__",
+    "TTSModel",
+    "TTSVoice",
+    "TTSLanguage",
+    "TTSMode",
+    "DEFAULT_MODEL",
+    "DEFAULT_VOICE",
+    "DEFAULT_LANGUAGE",
+    "DEFAULT_MODE",
+    "DEFAULT_SAMPLE_RATE",
+    "DEFAULT_BASE_URL",
+    "INTL_BASE_URL",
+    # STT
+    "STT",
+    "DEFAULT_STT_MODEL",
+    "DEFAULT_STT_LANGUAGE",
+    "DEFAULT_STT_SAMPLE_RATE",
+]
+
+from livekit.agents import Plugin
+
+from .log import logger
+
+
+class Qwen3Plugin(Plugin):
+    def __init__(self) -> None:
+        super().__init__(__name__, __version__, __package__, logger)
+
+
+Plugin.register_plugin(Qwen3Plugin())
+
+# pdoc configuration to hide internal modules
+_module = dir()
+NOT_IN_ALL = [m for m in _module if m not in __all__]
+
+__pdoc__ = {}
+
+for n in NOT_IN_ALL:
+    __pdoc__[n] = False
diff --git a/livekit-plugins/livekit-plugins-qwen3/livekit/plugins/qwen3/log.py b/livekit-plugins/livekit-plugins-qwen3/livekit/plugins/qwen3/log.py
@@ -0,0 +1,3 @@
+import logging
+
+logger = logging.getLogger(__name__)
diff --git a/livekit-plugins/livekit-plugins-qwen3/livekit/plugins/qwen3/models.py b/livekit-plugins/livekit-plugins-qwen3/livekit/plugins/qwen3/models.py
@@ -0,0 +1,61 @@
+from typing import Literal
+
+# Qwen3 TTS Model
+TTSModel = Literal["qwen3-tts-flash-realtime"]
+
+# Available voices for Qwen3 TTS
+# See: https://www.alibabacloud.com/help/en/model-studio/qwen-tts
+TTSVoice = Literal[
+    # Female voices
+    "Kiki",
+    "Cherry",
+    "Jennifer",
+    # Male voices
+    "Rocky",
+    "Ethan",
+    "Ryan",
+    # Regional variants
+    "Sichuan-Sunny",
+    "Shanghai-Jada",
+    "Beijing-Yunxi",
+    # Cantonese voices
+    "Cantonese_ProfessionalHost",
+]
+
+# Supported languages (lowercase as required by API)
+TTSLanguage = Literal[
+    "auto",
+    "chinese",
+    "english",
+    "german",
+    "italian",
+    "portuguese",
+    "spanish",
+    "japanese",
+    "korean",
+    "french",
+    "russian",
+]
+
+# Session modes
+TTSMode = Literal[
+    "server_commit",  # Auto-triggers synthesis after pause (recommended for streaming)
+    "commit",  # Manual trigger via commit_text_buffer
+]
+
+# Audio format (only PCM supported for realtime)
+TTSAudioFormat = Literal["pcm"]
+
+# Sample rate
+TTSSampleRate = Literal[24000]
+
+# Default values
+DEFAULT_MODEL: TTSModel = "qwen3-tts-flash-realtime"
+DEFAULT_VOICE: TTSVoice = "Kiki"
+DEFAULT_LANGUAGE: TTSLanguage = "auto"
+DEFAULT_MODE: TTSMode = "server_commit"
+DEFAULT_SAMPLE_RATE: TTSSampleRate = 24000
+
+# Base URLs
+DEFAULT_BASE_URL = "wss://dashscope.aliyuncs.com/api-ws/v1/realtime"
+INTL_BASE_URL = "wss://dashscope-intl.aliyuncs.com/api-ws/v1/realtime"
diff --git a/livekit-plugins/livekit-plugins-qwen3/livekit/plugins/qwen3/py.typed b/livekit-plugins/livekit-plugins-qwen3/livekit/plugins/qwen3/py.typed
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		import logging

		logger = logging.getLogger(__name__)