diff --git a/docs/develop/sound-generation/index.zh.md b/docs/develop/sound-generation/index.zh.md new file mode 100644 index 0000000000..2a3306147b --- /dev/null +++ b/docs/develop/sound-generation/index.zh.md @@ -0,0 +1,22 @@ +# Sound 生成 + +本文档用于承接 XBuilder 中 Sound 生成功能的开发设计与实现拆解。 + +## 范围 + +Sound 生成当前规划包含: + +- 语音合成(Text-to-Speech, TTS) +- 音效(Sound Effect) +- 背景音乐(Background Music) + +## 当前实现建议 + +当前实现上建议 **TTS 优先**,并以统一的 Sound 生成任务模型向后兼容音效与背景音乐。 + +- MVP 优先落地:阿里云 CosyVoice TTS +- 后续扩展:同一套任务接口继续支持 Sound Effect / Background Music + +## 设计文档 + +- Sound 一期 TTS 接口设计与 `builder-backend` 开发任务拆解:[`./tts-phase-1.zh.md`](tts-phase-1.zh.md) diff --git a/docs/develop/sound-generation/tts-phase-1.zh.md b/docs/develop/sound-generation/tts-phase-1.zh.md new file mode 100644 index 0000000000..a98c24a5c4 --- /dev/null +++ b/docs/develop/sound-generation/tts-phase-1.zh.md @@ -0,0 +1,236 @@ +# Sound 一期 TTS 功能 & 接口设计 + +本文档用于对齐 `builder`(前端)与 `builder-backend`(服务端)的 Sound 生成功能设计。 + +当前方案基于阿里云 CosyVoice v3.5 的真实约束: + +> **前端表达“我要哪类声音、以什么语气说这段话”** +> **服务端负责“使用哪个 target model / voice_id / instruction 去实现”** + +尤其需要注意: + +- `cosyvoice-v3.5-flash` / `cosyvoice-v3.5-plus` **没有系统音色** +- 必须先通过**声音设计/复刻**生成 voice_id,后续语音合成时再把该 voice_id 作为 `voice` 参数使用 +- 一期前端只选择 `性别 + 年龄段`,服务端为每个桶位固定维护一个默认音色 +- 模型本身通常可以根据 `text` 推断基础情绪和表达方式,可以附带可选的 `instruction` 作为补充说明,而不是用固定枚举的 `emotion` / `useCase` 等 +- 暂不开放 `rate / pitch / volume` 等参数,后续根据需求再评估是否增加“基础参数调整”功能 + +这样可以最大程度贴合 CosyVoice v3.5 的真实能力边界,同时保持前端心智简洁。 + +--- + +## 1. 设计结论 + +### 1.1 前端暴露“易理解、可复用、不过度绑定供应商”的概念 + +Sound TTS 的前端公开协议只保留这些字段: + +- `name`:生成后的素材名 +- `description`:素材描述 +- `category`:MVP 版本只有 `voice`(前端不暴露 category 选择,运行时固定传 voice) +- `speechSettings` + - `text`:要说的话 + - `voiceGender`:声音性别(男 / 女) + - `voiceAgeGroup`:声音年龄段(儿童 / 青年 / 中年 / 老年) + - `instruction`:补充“希望怎么说”“更偏什么感觉”“面向谁说”等开放信息 + +### 1.2 前端不包含这些底层字段 + +以下字段都不应该成为前端协议的一部分: + +- `provider` +- `model` +- 供应商原始 `voice_id` +- `format` +- `sampleRate` + +原因: + +1. 这些都是服务端接供应商时的实现细节。 +2. 它们会把前端交互变成“调底层模型参数”。 +3. 未来更换供应商或调整模型时,不应要求前端同步改协议。 + +### 1.3 服务端负责按分桶选择固定音色与参数映射 + +服务端根据前端提交的请求参数自行决定: + +- 使用哪个 CosyVoice target model 和 `voice_id` +- 如何组织最终调用参数 +- 输出什么格式和采样率 + +--- + +## 2. MVP 范围 + +### 2.1 本期必须实现 + +- 文本转语音(TTS) +- 手动配置基础音色维度与合成参数 +- 异步生成任务创建 / 查询 / SSE 订阅 / 取消 +- 返回音频 URL +- 前端将结果落为 XBuilder 的 `Sound` Asset + +### 2.2 本期明确不做 + +- 音效生成(Sound Effect) +- 背景音乐生成(Background Music) +- 将生成结果自动入库到公共素材库 + +--- + +## 3. 核心原则 + +1. **前端讲用户语言,后端讲供应商语言。** +2. **前端只选性别与年龄桶位,不直接接触 voice_id。** +3. **前端只暴露最小化的选项:性别、年龄段、开放 `instruction`。** +4. **生成结果沿用现有 `/aigc/task` 异步任务体系。** + +--- + +## 4. 数据模型 + +### 4.1 基础音色维度(前端可见) + +```ts +export type SoundVoiceGender = 'male' | 'female' + +export type SoundVoiceAgeGroup = 'child' | 'youth' | 'middle-aged' | 'senior' +``` + +说明: + +- 服务端内部按 `男 / 女 × 儿童 / 青年 / 中年 / 老年` 维护 8 个基础桶位 +- 每个桶位在一期只对应一个固定默认音色 +- 前端暴露的是桶位维度,不暴露供应商原始 `voice_id` +- 这些固定选项由前端直接内置,不再单独提供“获取可选项”的接口 +- 前端所有可控项都由用户手动选择,不再引入推荐值 + +### 4.2 生成请求(前端可见) + +```ts +export type SpeechSoundSettings = { + name: string + description: string + category: 'voice' + speechSettings: { + text: string + voiceGender: SoundVoiceGender + voiceAgeGroup: SoundVoiceAgeGroup + instruction?: string + } +} +``` + +说明: + +- `voiceGender / voiceAgeGroup` 用于选定基础音色桶位 +- `text` 暂定限制为最多 200 个字符,避免长文本导致生成用时过长 +- `instruction` 限制为最多 50 个字符,不超过 CosyVoice 可接受的长度范围 + +--- + +## 5. 接口设计 + +### 5.1 创建 Sound 生成任务 + +继续沿用统一任务接口: + +```http +POST /aigc/tasks +Content-Type: application/json +``` + +#### Request + +```json +{ + "type": "generateSound", + "parameters": { + "settings": { + "name": "hero-hello", + "description": "主角的一句开心问候", + "category": "voice", + "speechSettings": { + "text": "你好,我们出发吧!", + "voiceGender": "female", + "voiceAgeGroup": "youth", + "instruction": "像在提醒队友准备出发,语气轻快一点" + } + } + } +} +``` + +### 5.2 TaskResult 设计 + +```ts +export type TaskResultGenerateSound = { + audioUrl: string +} +``` + +--- + +## 6. 前端交互 + +UI 顺序: + +1. 输入素材名称 +2. 输入要说的话 +3. 选择声音性别 / 年龄段 +4. 按需补充 `instruction` +5. 点击生成 +6. 试听生成结果 +7. 采用到项目 + +--- + +## 7. 服务端开发建议 + +### 任务 1:维护基础音色库存 + +服务端维护一份基础音色库存,例如: + +```text +builder-backend/internal/aigc/sound/base_voice_inventory.json +``` + +库存项可包含: + +- `target_model` +- `voice_gender` +- `voice_age_group` +- `voice_id` + +其中: + +- `voice_id` 来自阿里百炼账号下已设计/复刻的可用于 CosyVoice `target_model` 的音色 +- `voice_gender + voice_age_group` 共同决定一个基础音色桶位 + +建议: + +- 一期先按 `男 / 女 × 儿童 / 青年 / 中年 / 老年` 维护 8 个基础桶位 +- 每个桶位只维护一个默认基础音色,优先保证稳定、自然、通用 +- 如果后续某个桶位的默认音色无法覆盖主要场景,再考虑扩展该桶位的候选音色数量 + +### 任务 2:处理 Sound 生成任务 + +服务端收到请求后完成: + +1. 根据 `voiceGender + voiceAgeGroup` 选定基础音色桶位对应的固定 `voice_id` +2. 根据 `text / instruction` 等信息生成最终调用参数 +3. 调用供应商生成音频 +4. 上传对象存储并写回 task result + +调用参数默认值策略: + +- `instruction` 为空时,仅基于 `text` 做自然表达 +- 输出格式固定用 `mp3` +- 其它参数不指定,使用 CosyVoice 默认值 + +--- + +## 8. 相关文档 + +- [阿里百炼语音合成模型用户指南](https://help.aliyun.com/zh/model-studio/tts-model) +- [CosyVoice 语音合成 API 参考](https://help.aliyun.com/zh/model-studio/non-realtime-cosyvoice-api) diff --git a/docs/openapi.yaml b/docs/openapi.yaml index fe2929c17c..3610bc6948 100644 --- a/docs/openapi.yaml +++ b/docs/openapi.yaml @@ -2676,6 +2676,7 @@ paths: | `generateAnimationVideo` | Generate animation video | | `extractVideoFrames` | Extract frames from video | | `generateBackdrop` | Generate backdrop image | + | `generateSound` | Generate sound asset (TTS in current MVP) | Quota and rate limits: @@ -2831,6 +2832,24 @@ paths: default: 1 examples: - 4 + - title: generateSound + type: object + required: + - type + - parameters + properties: + type: + type: string + enum: + - generateSound + parameters: + description: Parameters for speech sound generation. + type: object + required: + - settings + properties: + settings: + $ref: "#/components/schemas/AIGCSpeechSoundSettings" responses: "202": description: Task accepted and queued for processing. @@ -4708,6 +4727,7 @@ components: - generateAnimationVideo - extractVideoFrames - generateBackdrop + - generateSound examples: - removeBackground status: @@ -4776,6 +4796,15 @@ components: items: type: string format: uri + - title: generateSound + type: object + required: + - audioUrl + properties: + audioUrl: + description: Universal URL of the generated audio file (e.g. kodo://bucket/key). + type: string + format: uri error: description: Error details. Only available when status is failed. type: object @@ -4943,6 +4972,106 @@ components: - ui - unspecified + SoundCategory: + description: Category of sound asset. + type: string + enum: + - voice + - effect + - music + - ambience + - other + examples: + - voice + + AIGCSoundVoiceGender: + description: Gender bucket of the generated voice. + type: string + enum: + - male + - female + examples: + - female + + AIGCSoundVoiceAgeGroup: + description: Age bucket of the generated voice. + type: string + enum: + - child + - youth + - middle-aged + - senior + examples: + - middle-aged + + AIGCSpeechSettings: + description: Speech-specific generation settings. + type: object + required: + - text + - voiceGender + - voiceAgeGroup + properties: + text: + description: Text to synthesize into speech. + type: string + minLength: 1 + maxLength: 200 + examples: + - 你好,我们出发吧! + voiceGender: + $ref: "#/components/schemas/AIGCSoundVoiceGender" + voiceAgeGroup: + $ref: "#/components/schemas/AIGCSoundVoiceAgeGroup" + instruction: + description: Optional free-form instruction describing how the speech should be delivered. + type: string + maxLength: 50 + examples: + - 像在提醒队友准备出发,语气轻快一点 + + AIGCSoundBaseSettings: + description: Base settings shared by all sound-generation requests. + type: object + required: + - name + - description + - category + properties: + name: + description: Name of the resulting Sound asset. + type: string + examples: + - hero-hello + description: + description: Description of the resulting Sound asset. + type: string + examples: + - 主角的一句开心问候 + category: + $ref: "#/components/schemas/SoundCategory" + + AIGCSpeechSoundSettings: + description: | + Settings for generating a speech sound asset. + + This is the only supported sound-generation request type in the current MVP. + allOf: + - $ref: "#/components/schemas/AIGCSoundBaseSettings" + - type: object + required: + - speechSettings + properties: + category: + description: Category of the resulting Sound asset. Fixed to `voice` for speech generation. + type: string + enum: + - voice + examples: + - voice + speechSettings: + $ref: "#/components/schemas/AIGCSpeechSettings" + UpInfo: description: Upload credentials and configuration. type: object diff --git a/spx-gui/src/apis/aigc.ts b/spx-gui/src/apis/aigc.ts index aca88b1d52..2f7f5acfdc 100644 --- a/spx-gui/src/apis/aigc.ts +++ b/spx-gui/src/apis/aigc.ts @@ -9,6 +9,7 @@ import { client, type FileCollection, Perspective, + SoundCategory, SpriteCategory, type UniversalUrl } from './common' @@ -65,12 +66,38 @@ export type BackdropSettings = { perspective: Perspective } +export type SoundVoiceGender = 'male' | 'female' + +export type SoundVoiceAgeGroup = 'child' | 'youth' | 'middle-aged' | 'senior' + +export type SoundSettingsBase = { + name: string + description: string + category: SoundCategory +} + +export type SpeechSettings = { + text: string + voiceGender: SoundVoiceGender + voiceAgeGroup: SoundVoiceAgeGroup + instruction?: string +} + +export type SpeechSoundSettings = SoundSettingsBase & { + category: SoundCategory.Voice + speechSettings: SpeechSettings +} + +// TODO: support more sound types in the future +export type SoundSettings = SpeechSoundSettings + export const enum TaskType { RemoveBackground = 'removeBackground', GenerateCostume = 'generateCostume', GenerateAnimationVideo = 'generateAnimationVideo', ExtractVideoFrames = 'extractVideoFrames', - GenerateBackdrop = 'generateBackdrop' + GenerateBackdrop = 'generateBackdrop', + GenerateSound = 'generateSound' } export const enum TaskStatus { @@ -120,12 +147,17 @@ export type TaskResultGenerateBackdrop = { imageUrls: UniversalUrl[] } +export type TaskResultGenerateSound = { + audioUrl: UniversalUrl +} + export type TaskResult = { [TaskType.RemoveBackground]: TaskResultRemoveBackground [TaskType.GenerateCostume]: TaskResultGenerateCostume [TaskType.GenerateAnimationVideo]: TaskResultGenerateAnimationVideo [TaskType.ExtractVideoFrames]: TaskResultExtractVideoFrames [TaskType.GenerateBackdrop]: TaskResultGenerateBackdrop + [TaskType.GenerateSound]: TaskResultGenerateSound }[T] export type Task = { @@ -174,12 +206,17 @@ export type TaskParamsGenerateBackdrop = { n: number } +export type TaskParamsGenerateSound = { + settings: SoundSettings +} + export type TaskParams = { [TaskType.RemoveBackground]: TaskParamsRemoveBackground [TaskType.GenerateCostume]: TaskParamsGenerateCostume [TaskType.GenerateAnimationVideo]: TaskParamsGenerateAnimationVideo [TaskType.ExtractVideoFrames]: TaskParamsExtractVideoFrames [TaskType.GenerateBackdrop]: TaskParamsGenerateBackdrop + [TaskType.GenerateSound]: TaskParamsGenerateSound }[T] export function createTask(type: T, params: TaskParams, signal?: AbortSignal): Promise> { diff --git a/spx-gui/src/apis/common/index.ts b/spx-gui/src/apis/common/index.ts index b94a728617..6add117101 100644 --- a/spx-gui/src/apis/common/index.ts +++ b/spx-gui/src/apis/common/index.ts @@ -88,6 +88,8 @@ export const enum BackdropCategory { } export const enum SoundCategory { + /** Voice sounds are spoken or narrated audio assets, such as TTS lines, narration, or dialogue clips. */ + Voice = 'voice', /** Sound effects are audio elements that enhance the gaming experience by providing auditory feedback for actions, events, or interactions within the game. */ Effect = 'effect', /** Music tracks are composed pieces that set the tone, mood, and atmosphere of the game, often playing in the background during gameplay or specific scenes. */ diff --git a/spx-gui/src/components/asset/gen/modal.ts b/spx-gui/src/components/asset/gen/modal.ts index e38a705e59..57abaf4566 100644 --- a/spx-gui/src/components/asset/gen/modal.ts +++ b/spx-gui/src/components/asset/gen/modal.ts @@ -4,6 +4,7 @@ import { I18n } from '@/utils/i18n' import type { SpxProject } from '@/models/spx/project' import { SpriteGen } from '@/models/spx/gen/sprite-gen' import { BackdropGen } from '@/models/spx/gen/backdrop-gen' +import { SoundGen } from '@/models/spx/gen/sound-gen' import type { AssetGenModel } from '@/models/spx/common/asset' export interface GenHelpers { @@ -41,3 +42,13 @@ export function initBackdropGen(i18n: I18n, project: SpxProject, onCleanup: OnCl }) return g } + +/** Init a sound-gen instance for asset generation modals, and handle its lifecycle properly. */ +export function initSoundGen(onCleanup: OnCleanup) { + const g = new SoundGen() + onCleanup(() => { + g.cancel() + g.dispose() + }) + return g +} diff --git a/spx-gui/src/components/asset/gen/sound/SoundGenModal.vue b/spx-gui/src/components/asset/gen/sound/SoundGenModal.vue new file mode 100644 index 0000000000..e3dce49475 --- /dev/null +++ b/spx-gui/src/components/asset/gen/sound/SoundGenModal.vue @@ -0,0 +1,87 @@ + + + diff --git a/spx-gui/src/components/asset/gen/sound/TTSGen.vue b/spx-gui/src/components/asset/gen/sound/TTSGen.vue new file mode 100644 index 0000000000..352b00024a --- /dev/null +++ b/spx-gui/src/components/asset/gen/sound/TTSGen.vue @@ -0,0 +1,215 @@ + + + diff --git a/spx-gui/src/components/asset/index.ts b/spx-gui/src/components/asset/index.ts index 715914d908..a05bdfd145 100644 --- a/spx-gui/src/components/asset/index.ts +++ b/spx-gui/src/components/asset/index.ts @@ -34,6 +34,7 @@ import GroupCostumesModal from './animation/GroupCostumesModal.vue' import AssetLibraryManagementModal from './library/management/AssetLibraryManagementModal.vue' import SpriteGenModal from './gen/sprite/SpriteGenModal.vue' import BackdropGenModal from './gen/backdrop/BackdropGenModal.vue' +import SoundGenModal from './gen/sound/SoundGenModal.vue' import type { GenHelpers } from './gen/modal' export function useSpriteGenModal() { @@ -51,6 +52,13 @@ export function useBackdropGenModal() { } } +export function useSoundGenModal() { + const invokeModal = useModal(SoundGenModal) + return function invokeSoundGenModal(project: SpxProject) { + return invokeModal({ project }) + } +} + export function useAddAssetFromLibrary() { const editorCtx = useEditorCtx() const genHelpers = useGenHelpers() diff --git a/spx-gui/src/components/editor/stage/sound/AddSoundMenu.vue b/spx-gui/src/components/editor/stage/sound/AddSoundMenu.vue index a4f968826a..1ddc979e0e 100644 --- a/spx-gui/src/components/editor/stage/sound/AddSoundMenu.vue +++ b/spx-gui/src/components/editor/stage/sound/AddSoundMenu.vue @@ -15,6 +15,9 @@ {{ $t({ en: 'Record', zh: '录音' }) }} + + {{ $t({ en: 'Generate with AI', zh: '使用 AI 生成' }) }} + @@ -22,7 +25,12 @@ import { UIMenu, UIMenuItem } from '@/components/ui' import { AssetType } from '@/apis/asset' import { useMessageHandle } from '@/utils/exception' -import { useAddAssetFromLibrary, useAddSoundFromLocalFile, useAddSoundByRecording } from '@/components/asset' +import { + useAddAssetFromLibrary, + useAddSoundFromLocalFile, + useAddSoundByRecording, + useSoundGenModal +} from '@/components/asset' import { useEditorCtx } from '../../EditorContextProvider.vue' import type { SoundsEditorState } from './sounds-editor-state' @@ -67,4 +75,19 @@ const handleRecord = useMessageHandle( zh: '录音失败' } ).fn + +const invokeSoundGenModal = useSoundGenModal() +const handleGenerate = useMessageHandle( + async () => { + const sound = await invokeSoundGenModal(editorCtx.project) + await editorCtx.state.history.doAction({ name: { en: 'Add sound', zh: '添加声音' } }, () => { + editorCtx.project.addSound(sound) + }) + props.state.select(sound.id) + }, + { + en: 'Failed to generate sound', + zh: '生成声音失败' + } +).fn diff --git a/spx-gui/src/models/spx/gen/aigc-mock.ts b/spx-gui/src/models/spx/gen/aigc-mock.ts index b068acf3ec..235d4c06b9 100644 --- a/spx-gui/src/models/spx/gen/aigc-mock.ts +++ b/spx-gui/src/models/spx/gen/aigc-mock.ts @@ -471,6 +471,13 @@ export class MockAigcApis { imageUrls: this.range(p.n).map((i) => this.url(`backdrop-${name}-${i + 1}.png`)) } as TaskResult } + case TaskType.GenerateSound: { + const p = params as TaskParams + const name = this.sanitize(p.settings.name) + return { + audioUrl: this.url(`sound-${name}.mp3`) + } as TaskResult + } default: throw new Error(`unsupported task type: ${type as string}`) } diff --git a/spx-gui/src/models/spx/gen/common.ts b/spx-gui/src/models/spx/gen/common.ts index 7d493b5054..ce72590af4 100644 --- a/spx-gui/src/models/spx/gen/common.ts +++ b/spx-gui/src/models/spx/gen/common.ts @@ -220,7 +220,8 @@ export const taskDurations: Record = { [TaskType.GenerateCostume]: 15, [TaskType.GenerateAnimationVideo]: 180, [TaskType.ExtractVideoFrames]: 12, - [TaskType.GenerateBackdrop]: 15 + [TaskType.GenerateBackdrop]: 15, + [TaskType.GenerateSound]: 5 } export type TaskApis = Pick diff --git a/spx-gui/src/models/spx/gen/sound-gen.ts b/spx-gui/src/models/spx/gen/sound-gen.ts new file mode 100644 index 0000000000..9f755a9775 --- /dev/null +++ b/spx-gui/src/models/spx/gen/sound-gen.ts @@ -0,0 +1,147 @@ +import { nanoid } from 'nanoid' +import { reactive } from 'vue' +import { SoundCategory } from '@/apis/common' +import { + adoptAsset, + TaskStatus, + TaskType, + type SpeechSoundSettings, + type SpeechSettings, + type TaskResultGenerateSound +} from '@/apis/aigc' +import { Disposable } from '@/utils/disposable' +import { createFileWithUniversalUrl } from '../../common/cloud' +import { ensureValidSoundName, validateSoundName, type SoundLikeParent } from '../common/asset-name' +import { sound2Asset } from '../common/asset' +import { Sound } from '../sound' +import { Phase, Task } from './common' + +export type SoundGenInits = { + id?: string + generateTask?: Task | null + generatePhase?: Phase +} + +type GenerateSpeechSettingsUpdates = Partial> & { + speechSettings?: Partial +} + +export class SoundGen extends Disposable { + id: string + settings: SpeechSoundSettings + private generateTask: Task | null + private generatePhase: Phase + + constructor(inits: SoundGenInits = {}) { + super() + this.id = inits.id ?? nanoid() + this.settings = { + name: '', + description: '', + category: SoundCategory.Voice, + speechSettings: { + text: '', + voiceGender: 'male', + voiceAgeGroup: 'youth', + instruction: '' + } + } + this.generateTask = inits.generateTask ?? null + this.generatePhase = inits.generatePhase ?? new Phase({ en: 'generate sound', zh: '生成声音' }) + return reactive(this) as this + } + + private parent: SoundLikeParent | null = null + setParent(parent: SoundLikeParent | null) { + this.parent = parent + } + + get name() { + return this.settings.name + } + setName(name: string) { + const err = validateSoundName(name, this.parent) + if (err != null) throw new Error(`invalid name ${name}: ${err.en}`) + this.settings.name = name + this.result?.setName(name) + } + + setSettings(updates: GenerateSpeechSettingsUpdates) { + if (updates.name != null && updates.name !== this.settings.name) { + updates = { ...updates, name: ensureValidSoundName(updates.name, this.parent) } + } + const { speechSettings, ...rest } = updates + Object.assign(this.settings, rest) + if (speechSettings != null) { + Object.assign(this.settings.speechSettings, speechSettings) + } + if (updates.name != null) this.result?.setName(updates.name) + } + + get generateState() { + return this.generatePhase.state + } + + get result() { + return this.generatePhase.state.status === 'finished' ? this.generatePhase.state.result : null + } + + reset() { + this.generateTask = null + this.generatePhase.reset() + } + + async generate() { + return this.generatePhase.run(async (reporter) => { + // this.generateTask?.tryCancel() + // this.generateTask = new Task(TaskType.GenerateSound) + // await this.generateTask.start({ settings: this.settings }) + // const taskResult = await this.generateTask.untilCompleted(reporter) + await new Promise((resolve) => setTimeout(resolve, 2000)) + const taskResult = { audioUrl: 'kodo://xbuilder-usercontent-test/aigc/Fg6U9fhpxuxGdrBIvoQzx7Xbzf64-58171.mp3' } + return this.createSound(taskResult) + }) + } + + private async createSound(taskResult: TaskResultGenerateSound) { + const file = createFileWithUniversalUrl(taskResult.audioUrl) + const sound = await Sound.create(this.settings.name, file) + sound.setAssetMetadata({ + description: this.settings.description, + extraSettings: { + category: this.settings.category + } + }) + sound.setExtraConfig({ + builder_soundGen: { + ...this.settings, + result: { + audioUrl: taskResult.audioUrl + } + } + }) + return sound + } + + async recordAdoption() { + const sound = this.result + if (sound == null) throw new Error('result sound expected') + const taskIds = this.generateTask?.data?.status === TaskStatus.Completed ? [this.generateTask.data.id] : [] + const assetData = await sound2Asset(sound) + return adoptAsset({ + taskIds, + asset: { + ...assetData, + displayName: this.settings.name, + description: this.settings.description, + extraSettings: { + category: this.settings.category + } + } + }) + } + + cancel() { + return this.generateTask?.tryCancel() + } +}