-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathgenerate_qwen_reference.py
More file actions
80 lines (62 loc) · 2.63 KB
/
generate_qwen_reference.py
File metadata and controls
80 lines (62 loc) · 2.63 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
from __future__ import annotations
import argparse
import json
import urllib.error
import urllib.request
from pathlib import Path
import config
DEFAULT_TEXT = (
"你好,这是一段用于本地配音模型参考音色的示例语音。"
"请保持发音清晰、语气自然、节奏平稳,让整体听感更接近正式播报。"
)
def qwen_tts_api_url() -> str:
base = (config.QWEN_TTS_BASE_URL or "https://dashscope.aliyuncs.com/api/v1").rstrip("/")
return f"{base}/services/aigc/multimodal-generation/generation"
def qwen_tts_download(text: str, voice: str, out_file: Path) -> None:
payload = {
"model": config.QWEN_TTS_MODEL,
"input": {
"text": " ".join(text.splitlines()).strip(),
"voice": voice,
"language_type": "Chinese",
},
}
req = urllib.request.Request(
qwen_tts_api_url(),
data=json.dumps(payload).encode("utf-8"),
headers={
"Authorization": f"Bearer {config.QWEN_TTS_API_KEY}",
"Content-Type": "application/json",
},
method="POST",
)
try:
with urllib.request.urlopen(req, timeout=90) as resp:
body = json.loads(resp.read().decode("utf-8"))
except urllib.error.HTTPError as exc:
detail = exc.read().decode("utf-8", errors="ignore")
raise RuntimeError(f"Qwen TTS 请求失败: {detail or exc}") from exc
audio_url = body.get("output", {}).get("audio", {}).get("url")
if not audio_url:
message = body.get("message") or body.get("code") or "Qwen TTS 未返回音频地址"
raise RuntimeError(message)
out_file.parent.mkdir(parents=True, exist_ok=True)
with urllib.request.urlopen(audio_url, timeout=90) as audio_resp:
out_file.write_bytes(audio_resp.read())
def main() -> None:
parser = argparse.ArgumentParser()
parser.add_argument("--voice", default=config.QWEN_TTS_VOICE or "Cherry")
parser.add_argument("--text", default=DEFAULT_TEXT)
parser.add_argument("--out", default="voice_reference_cherry.wav")
args = parser.parse_args()
if not config.QWEN_TTS_API_KEY:
raise SystemExit("未配置 qwen_tts_api_key,无法生成参考音色")
output_path = Path(args.out).expanduser()
if not output_path.is_absolute():
output_path = (Path(__file__).resolve().parent / output_path).resolve()
qwen_tts_download(args.text, args.voice, output_path)
output_path.with_suffix(".txt").write_text(args.text, encoding="utf-8")
print(f"saved voice reference: {output_path}")
print(f"saved prompt text: {output_path.with_suffix('.txt')}")
if __name__ == "__main__":
main()