diff --git a/app/src/i18n/locales/en/translation.json b/app/src/i18n/locales/en/translation.json index a349c22f..83cea5be 100644 --- a/app/src/i18n/locales/en/translation.json +++ b/app/src/i18n/locales/en/translation.json @@ -369,7 +369,7 @@ "editTranscription": "Edit transcription", "deleteSample": "Delete sample", "addSample": "Add Sample", - "note": "Note: A single 30-second sample is the sweet spot. Quality may decrease with multiple samples. In a future update samples might be interchangeable and tagged for varying styles of the same voice.", + "note": "Add multiple samples to improve voice quality — more diverse recordings produce better results. Samples are automatically combined.", "deleteDialog": { "title": "Delete Sample", "description": "Are you sure you want to delete this audio sample? This action cannot be undone.", diff --git a/app/src/i18n/locales/ja/translation.json b/app/src/i18n/locales/ja/translation.json index a7f0fd24..94ebf4cf 100644 --- a/app/src/i18n/locales/ja/translation.json +++ b/app/src/i18n/locales/ja/translation.json @@ -369,7 +369,7 @@ "editTranscription": "文字起こしを編集", "deleteSample": "サンプルを削除", "addSample": "サンプルを追加", - "note": "メモ:30 秒のサンプル 1 本が最適です。サンプルを複数追加すると品質が低下することがあります。今後のアップデートで、同じボイスの異なるスタイル向けにサンプルを切り替え可能にし、タグ付けできるようにするかもしれません。", + "note": "複数のサンプルを追加すると音声品質が向上します — 多様な録音ほど良い結果が得られます。サンプルは自動的に結合されます。", "deleteDialog": { "title": "サンプルを削除", "description": "このオーディオサンプルを本当に削除しますか? この操作は元に戻せません。", diff --git a/app/src/i18n/locales/zh-CN/translation.json b/app/src/i18n/locales/zh-CN/translation.json index b0c78241..53aa133a 100644 --- a/app/src/i18n/locales/zh-CN/translation.json +++ b/app/src/i18n/locales/zh-CN/translation.json @@ -369,7 +369,7 @@ "editTranscription": "编辑转录", "deleteSample": "删除样本", "addSample": "添加样本", - "note": "注意:单个 30 秒的样本效果最佳。多个样本可能会降低质量。未来版本中样本可能会变得可互换,并为同一声音的不同风格打标签。", + "note": "添加多个样本可提升音质 — 录音越多样化,效果越好。样本将自动合并。", "deleteDialog": { "title": "删除样本", "description": "确定要删除此音频样本吗?此操作不可撤销。", diff --git a/app/src/i18n/locales/zh-TW/translation.json b/app/src/i18n/locales/zh-TW/translation.json index e946fc95..7e012b8d 100644 --- a/app/src/i18n/locales/zh-TW/translation.json +++ b/app/src/i18n/locales/zh-TW/translation.json @@ -369,7 +369,7 @@ "editTranscription": "編輯轉錄", "deleteSample": "刪除樣本", "addSample": "新增樣本", - "note": "注意:單一 30 秒的樣本效果最佳。多個樣本可能會降低品質。未來版本中樣本可能可互換,並為同一聲音的不同風格加上標籤。", + "note": "添加多個樣本可提升音質 — 錄音越多樣化,效果越好。樣本將自動合併。", "deleteDialog": { "title": "刪除樣本", "description": "確定要刪除此音訊樣本嗎?此操作無法復原。", diff --git a/app/src/lib/api/models/ProfileSampleResponse.ts b/app/src/lib/api/models/ProfileSampleResponse.ts index 4f95dc78..c681d05e 100644 --- a/app/src/lib/api/models/ProfileSampleResponse.ts +++ b/app/src/lib/api/models/ProfileSampleResponse.ts @@ -10,4 +10,5 @@ export type ProfileSampleResponse = { profile_id: string; audio_path: string; reference_text: string; + sort_order: number; }; diff --git a/app/src/lib/api/schemas/$ProfileSampleResponse.ts b/app/src/lib/api/schemas/$ProfileSampleResponse.ts index 5a15e989..fab1a1d3 100644 --- a/app/src/lib/api/schemas/$ProfileSampleResponse.ts +++ b/app/src/lib/api/schemas/$ProfileSampleResponse.ts @@ -21,5 +21,9 @@ export const $ProfileSampleResponse = { type: 'string', isRequired: true, }, + sort_order: { + type: 'number', + isRequired: true, + }, }, } as const; diff --git a/app/src/lib/hooks/useAudioRecording.ts b/app/src/lib/hooks/useAudioRecording.ts index 6c253674..ddbed11d 100644 --- a/app/src/lib/hooks/useAudioRecording.ts +++ b/app/src/lib/hooks/useAudioRecording.ts @@ -21,12 +21,18 @@ export function useAudioRecording({ const timerRef = useRef(null); const startTimeRef = useRef(null); const cancelledRef = useRef(false); + // Monotonically-increasing session counter. Each call to startRecording + // increments it; the onstop closure captures it and bails out if it no + // longer matches — prevents a slow convertToWav from a previous session + // from calling onRecordingComplete after a new recording has already begun. + const sessionRef = useRef(0); const startRecording = useCallback(async () => { try { setError(null); chunksRef.current = []; cancelledRef.current = false; + sessionRef.current += 1; setDuration(0); // Check if getUserMedia is available @@ -88,6 +94,12 @@ export function useAudioRecording({ } }; + // Capture the session ID for this recording at the time the recorder + // is set up. If the user starts a new recording before the async + // onstop work finishes (e.g. convertToWav is slow), the new session + // will have a different ID and we skip the stale completion callback. + const thisSession = sessionRef.current; + mediaRecorder.onstop = async () => { // Snapshot the cancellation flag and recorded duration immediately — // cancelRecording() clears chunks and sets cancelledRef synchronously @@ -105,17 +117,32 @@ export function useAudioRecording({ }); streamRef.current = null; + // Clear the recorder ref now that it's done — prevents stopRecording + // from accidentally operating on an already-stopped MediaRecorder if + // the user clicks stop again before state catches up. + if (mediaRecorderRef.current === mediaRecorder) { + mediaRecorderRef.current = null; + } + // Don't fire completion callback if the recording was cancelled if (wasCancelled) return; + // Don't fire completion callback if a newer session has started + if (sessionRef.current !== thisSession) return; + // Convert to WAV format to avoid needing ffmpeg on backend try { const wavBlob = await convertToWav(webmBlob); - onRecordingComplete?.(wavBlob, recordedDuration); + // Final guard: still belongs to this session? + if (sessionRef.current === thisSession) { + onRecordingComplete?.(wavBlob, recordedDuration); + } } catch (err) { console.error('Error converting audio to WAV:', err); // Fallback to original blob if conversion fails - onRecordingComplete?.(webmBlob, recordedDuration); + if (sessionRef.current === thisSession) { + onRecordingComplete?.(webmBlob, recordedDuration); + } } }; @@ -162,10 +189,12 @@ export function useAudioRecording({ setError(errorMessage); setIsRecording(false); } - }, [maxDurationSeconds, onRecordingComplete]); + }, [maxDurationSeconds, onRecordingComplete, platform.metadata.isTauri]); const stopRecording = useCallback(() => { - if (mediaRecorderRef.current && isRecording) { + // Check the ref rather than the `isRecording` state so this works even + // if React hasn't flushed the state update yet (e.g. rapid UI clicks). + if (mediaRecorderRef.current && mediaRecorderRef.current.state !== 'inactive') { mediaRecorderRef.current.stop(); setIsRecording(false); @@ -174,13 +203,18 @@ export function useAudioRecording({ timerRef.current = null; } } - }, [isRecording]); + }, []); const cancelRecording = useCallback(() => { if (mediaRecorderRef.current) { cancelledRef.current = true; // Must be set before stop() triggers onstop chunksRef.current = []; - mediaRecorderRef.current.stop(); + if (mediaRecorderRef.current.state !== 'inactive') { + mediaRecorderRef.current.stop(); + } + // Clear immediately so onstop (if it fires synchronously on some + // browsers) doesn't see a stale ref. + mediaRecorderRef.current = null; setIsRecording(false); setDuration(0); } diff --git a/backend/database/migrations.py b/backend/database/migrations.py index d353b58c..4172e928 100644 --- a/backend/database/migrations.py +++ b/backend/database/migrations.py @@ -43,11 +43,14 @@ def run_migrations(engine) -> None: _migrate_generation_versions(engine, inspector, tables) _migrate_capture_settings(engine, inspector, tables) _migrate_mcp_bindings(engine, inspector, tables) + _migrate_profile_samples(engine, inspector, tables) _normalize_storage_paths(engine, tables) + _add_performance_indexes(engine, tables) # -- helpers --------------------------------------------------------------- + def _get_columns(inspector, table: str) -> set[str]: return {col["name"] for col in inspector.get_columns(table)} @@ -62,6 +65,7 @@ def _add_column(engine, table: str, column_sql: str, label: str) -> None: # -- per-table migrations -------------------------------------------------- + def _migrate_story_items(engine, inspector, tables: set[str]) -> None: if "story_items" not in tables: return @@ -73,15 +77,15 @@ def _migrate_story_items(engine, inspector, tables: set[str]) -> None: logger.info("Migrating story_items: removing position column, using start_time_ms") with engine.connect() as conn: if "start_time_ms" not in columns: - conn.execute(text( - "ALTER TABLE story_items ADD COLUMN start_time_ms INTEGER DEFAULT 0" - )) - result = conn.execute(text(""" + conn.execute(text("ALTER TABLE story_items ADD COLUMN start_time_ms INTEGER DEFAULT 0")) + result = conn.execute( + text(""" SELECT si.id, si.story_id, si.position, g.duration FROM story_items si JOIN generations g ON si.generation_id = g.id ORDER BY si.story_id, si.position - """)) + """) + ) current_story_id = None current_time_ms = 0 for item_id, story_id, _position, duration in result.fetchall(): @@ -96,7 +100,8 @@ def _migrate_story_items(engine, inspector, tables: set[str]) -> None: conn.commit() # Recreate table without the position column (SQLite lacks DROP COLUMN) - conn.execute(text(""" + conn.execute( + text(""" CREATE TABLE story_items_new ( id VARCHAR PRIMARY KEY, story_id VARCHAR NOT NULL, @@ -110,13 +115,16 @@ def _migrate_story_items(engine, inspector, tables: set[str]) -> None: FOREIGN KEY (story_id) REFERENCES stories(id), FOREIGN KEY (generation_id) REFERENCES generations(id) ) - """)) - conn.execute(text(""" + """) + ) + conn.execute( + text(""" INSERT INTO story_items_new (id, story_id, generation_id, start_time_ms, track, trim_start_ms, trim_end_ms, version_id, created_at) SELECT id, story_id, generation_id, start_time_ms, COALESCE(track, 0), COALESCE(trim_start_ms, 0), COALESCE(trim_end_ms, 0), version_id, created_at FROM story_items - """)) + """) + ) conn.execute(text("DROP TABLE story_items")) conn.execute(text("ALTER TABLE story_items_new RENAME TO story_items")) conn.commit() @@ -292,13 +300,55 @@ def _supports_drop_column(engine) -> bool: return tuple(int(p) for p in sqlite3.sqlite_version.split(".")[:3]) >= (3, 35, 0) +def _migrate_profile_samples(engine, inspector, tables: set[str]) -> None: + if "profile_samples" not in tables: + return + columns = _get_columns(inspector, "profile_samples") + if "sort_order" not in columns: + _add_column(engine, "profile_samples", "sort_order INTEGER NOT NULL DEFAULT 0", "sort_order") + with engine.connect() as conn: + conn.execute( + text("CREATE INDEX IF NOT EXISTS ix_profile_samples_sort_order ON profile_samples (sort_order)") + ) + conn.commit() + + +def _add_performance_indexes(engine, tables: set[str]) -> None: + """Add query-performance indexes that were missing from the initial schema. + + Each CREATE INDEX is wrapped in IF NOT EXISTS so this is safe to call on + every startup against both new and existing databases. + """ + indexes = [ + # History page: filter/sort by profile, status, created_at + ("generations", "ix_generations_profile_id", "profile_id"), + ("generations", "ix_generations_status", "status"), + ("generations", "ix_generations_created_at", "created_at"), + ("generations", "ix_generations_is_favorited", "is_favorited"), + # Version lookups per generation + ("generation_versions", "ix_generation_versions_generation_id", "generation_id"), + # Story item lookups per story + ("story_items", "ix_story_items_story_id", "story_id"), + # Capture list sorted by date + ("captures", "ix_captures_created_at", "created_at"), + # Sample lookups per profile + ("profile_samples", "ix_profile_samples_profile_id", "profile_id"), + ] + with engine.connect() as conn: + for table, index_name, column in indexes: + if table not in tables: + continue + conn.execute(text(f"CREATE INDEX IF NOT EXISTS {index_name} ON {table} ({column})")) + conn.commit() + + def _normalize_storage_paths(engine, tables: set[str]) -> None: """Normalize stored file paths to be relative to the configured data dir.""" from pathlib import Path - from ..config import get_data_dir, to_storage_path, resolve_storage_path + from ..config import get_data_dir, resolve_storage_path, to_storage_path - data_dir = get_data_dir() + get_data_dir() path_columns = [ ("generations", "audio_path"), @@ -312,9 +362,7 @@ def _normalize_storage_paths(engine, tables: set[str]) -> None: for table, column in path_columns: if table not in tables: continue - rows = conn.execute( - text(f"SELECT id, {column} FROM {table} WHERE {column} IS NOT NULL") - ).fetchall() + rows = conn.execute(text(f"SELECT id, {column} FROM {table} WHERE {column} IS NOT NULL")).fetchall() for row_id, path_val in rows: if not path_val: continue diff --git a/backend/database/models.py b/backend/database/models.py index 6ef2213e..09e40d8c 100644 --- a/backend/database/models.py +++ b/backend/database/models.py @@ -1,9 +1,9 @@ """ORM model definitions for the voicebox SQLite database.""" -from datetime import datetime import uuid +from datetime import UTC, datetime -from sqlalchemy import Column, String, Integer, Float, DateTime, Text, ForeignKey, Boolean, JSON +from sqlalchemy import JSON, Boolean, Column, DateTime, Float, ForeignKey, Integer, String, Text from sqlalchemy.ext.declarative import declarative_base from ..utils.capture_chords import ( @@ -34,18 +34,18 @@ class VoiceProfile(Base): # Voice type system — added v0.3.x voice_type = Column(String, default="cloned") # "cloned" | "preset" | "designed" - preset_engine = Column(String, nullable=True) # e.g. "kokoro" — only for preset + preset_engine = Column(String, nullable=True) # e.g. "kokoro" — only for preset preset_voice_id = Column(String, nullable=True) # e.g. "am_adam" — only for preset - design_prompt = Column(Text, nullable=True) # text description — only for designed - default_engine = Column(String, nullable=True) # auto-selected engine, locked for preset + design_prompt = Column(Text, nullable=True) # text description — only for designed + default_engine = Column(String, nullable=True) # auto-selected engine, locked for preset # Free-form character prompt used by the compose button and the # personality-rewrite path on /generate. Describes *what* this voice # says and how, orthogonal to how it sounds (handled by the preset / # cloning metadata above). personality = Column(Text, nullable=True) - created_at = Column(DateTime, default=datetime.utcnow) - updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow) + created_at = Column(DateTime, default=lambda: datetime.now(UTC)) + updated_at = Column(DateTime, default=lambda: datetime.now(UTC), onupdate=lambda: datetime.now(UTC)) class ProfileSample(Base): @@ -54,9 +54,10 @@ class ProfileSample(Base): __tablename__ = "profile_samples" id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4())) - profile_id = Column(String, ForeignKey("profiles.id"), nullable=False) + profile_id = Column(String, ForeignKey("profiles.id"), nullable=False, index=True) audio_path = Column(String, nullable=False) reference_text = Column(Text, nullable=False) + sort_order = Column(Integer, nullable=False, default=0, index=True) class Generation(Base): @@ -65,7 +66,7 @@ class Generation(Base): __tablename__ = "generations" id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4())) - profile_id = Column(String, ForeignKey("profiles.id"), nullable=False) + profile_id = Column(String, ForeignKey("profiles.id"), nullable=False, index=True) text = Column(Text, nullable=False) language = Column(String, default="en") audio_path = Column(String, nullable=True) @@ -74,15 +75,15 @@ class Generation(Base): instruct = Column(Text) engine = Column(String, default="qwen") model_size = Column(String, nullable=True) - status = Column(String, default="completed") + status = Column(String, default="completed", index=True) error = Column(Text, nullable=True) - is_favorited = Column(Boolean, default=False) + is_favorited = Column(Boolean, default=False, index=True) # Origin of this generation — "manual" for plain /generate calls, # "personality_speak" for rows whose text was rewritten through the # profile's personality LLM before TTS. Future sources (bulk import, # agent replies, etc.) can extend this. source = Column(String, nullable=False, default="manual") - created_at = Column(DateTime, default=datetime.utcnow) + created_at = Column(DateTime, default=lambda: datetime.now(UTC), index=True) class Story(Base): @@ -93,8 +94,8 @@ class Story(Base): id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4())) name = Column(String, nullable=False) description = Column(Text) - created_at = Column(DateTime, default=datetime.utcnow) - updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow) + created_at = Column(DateTime, default=lambda: datetime.now(UTC)) + updated_at = Column(DateTime, default=lambda: datetime.now(UTC), onupdate=lambda: datetime.now(UTC)) class StoryItem(Base): @@ -103,7 +104,7 @@ class StoryItem(Base): __tablename__ = "story_items" id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4())) - story_id = Column(String, ForeignKey("stories.id"), nullable=False) + story_id = Column(String, ForeignKey("stories.id"), nullable=False, index=True) generation_id = Column(String, ForeignKey("generations.id"), nullable=False) version_id = Column(String, ForeignKey("generation_versions.id"), nullable=True) start_time_ms = Column(Integer, nullable=False, default=0) @@ -111,7 +112,7 @@ class StoryItem(Base): trim_start_ms = Column(Integer, nullable=False, default=0) trim_end_ms = Column(Integer, nullable=False, default=0) volume = Column(Float, nullable=False, default=1.0) - created_at = Column(DateTime, default=datetime.utcnow) + created_at = Column(DateTime, default=lambda: datetime.now(UTC)) class Project(Base): @@ -122,8 +123,8 @@ class Project(Base): id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4())) name = Column(String, nullable=False) data = Column(Text) - created_at = Column(DateTime, default=datetime.utcnow) - updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow) + created_at = Column(DateTime, default=lambda: datetime.now(UTC)) + updated_at = Column(DateTime, default=lambda: datetime.now(UTC), onupdate=lambda: datetime.now(UTC)) class GenerationVersion(Base): @@ -132,13 +133,13 @@ class GenerationVersion(Base): __tablename__ = "generation_versions" id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4())) - generation_id = Column(String, ForeignKey("generations.id"), nullable=False) + generation_id = Column(String, ForeignKey("generations.id"), nullable=False, index=True) label = Column(String, nullable=False) audio_path = Column(String, nullable=False) effects_chain = Column(Text, nullable=True) source_version_id = Column(String, ForeignKey("generation_versions.id"), nullable=True) is_default = Column(Boolean, default=False) - created_at = Column(DateTime, default=datetime.utcnow) + created_at = Column(DateTime, default=lambda: datetime.now(UTC)) class EffectPreset(Base): @@ -152,7 +153,7 @@ class EffectPreset(Base): effects_chain = Column(Text, nullable=False) is_builtin = Column(Boolean, default=False) sort_order = Column(Integer, default=100) - created_at = Column(DateTime, default=datetime.utcnow) + created_at = Column(DateTime, default=lambda: datetime.now(UTC)) class AudioChannel(Base): @@ -163,7 +164,7 @@ class AudioChannel(Base): id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4())) name = Column(String, nullable=False) is_default = Column(Boolean, default=False) - created_at = Column(DateTime, default=datetime.utcnow) + created_at = Column(DateTime, default=lambda: datetime.now(UTC)) class ChannelDeviceMapping(Base): @@ -212,13 +213,9 @@ class CaptureSettings(Base): hotkey_enabled = Column(Boolean, nullable=False, default=False) # Lists of keytap key names (e.g. "MetaRight", "ControlRight"). Right-hand # modifiers by default so they don't collide with left-hand shortcuts. - chord_push_to_talk_keys = Column( - JSON, nullable=False, default=default_push_to_talk_chord - ) - chord_toggle_to_talk_keys = Column( - JSON, nullable=False, default=default_toggle_to_talk_chord - ) - updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow) + chord_push_to_talk_keys = Column(JSON, nullable=False, default=default_push_to_talk_chord) + chord_toggle_to_talk_keys = Column(JSON, nullable=False, default=default_toggle_to_talk_chord) + updated_at = Column(DateTime, default=lambda: datetime.now(UTC), onupdate=lambda: datetime.now(UTC)) class GenerationSettings(Base): @@ -231,7 +228,7 @@ class GenerationSettings(Base): crossfade_ms = Column(Integer, nullable=False, default=50) normalize_audio = Column(Boolean, nullable=False, default=True) autoplay_on_generate = Column(Boolean, nullable=False, default=True) - updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow) + updated_at = Column(DateTime, default=lambda: datetime.now(UTC), onupdate=lambda: datetime.now(UTC)) class MCPClientBinding(Base): @@ -254,8 +251,8 @@ class MCPClientBinding(Base): # (rewrite) before TTS by default. Callers can still override per call. default_personality = Column(Boolean, nullable=False, default=False) last_seen_at = Column(DateTime, nullable=True) - created_at = Column(DateTime, default=datetime.utcnow) - updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow) + created_at = Column(DateTime, default=lambda: datetime.now(UTC)) + updated_at = Column(DateTime, default=lambda: datetime.now(UTC), onupdate=lambda: datetime.now(UTC)) class Capture(Base): @@ -278,4 +275,4 @@ class Capture(Base): stt_model = Column(String, nullable=True) llm_model = Column(String, nullable=True) refinement_flags = Column(Text, nullable=True) # JSON blob - created_at = Column(DateTime, default=datetime.utcnow) + created_at = Column(DateTime, default=lambda: datetime.now(UTC), index=True) diff --git a/backend/models.py b/backend/models.py index 06f321ac..1069a590 100644 --- a/backend/models.py +++ b/backend/models.py @@ -71,11 +71,18 @@ class ProfileSampleResponse(BaseModel): profile_id: str audio_path: str reference_text: str + sort_order: int = 0 class Config: from_attributes = True +class SampleReorderRequest(BaseModel): + """Request model for reordering profile samples.""" + + sample_ids: list[str] + + class GenerationRequest(BaseModel): """Request model for voice generation.""" diff --git a/backend/routes/profiles.py b/backend/routes/profiles.py index e0f7f7fd..125983c7 100644 --- a/backend/routes/profiles.py +++ b/backend/routes/profiles.py @@ -4,7 +4,7 @@ import json as _json import logging import tempfile -from datetime import datetime +from datetime import UTC, datetime from pathlib import Path from fastapi import APIRouter, Depends, File, Form, HTTPException, UploadFile @@ -13,9 +13,10 @@ from .. import config, models from ..app import safe_content_disposition -from ..database import VoiceProfile as DBVoiceProfile, get_db +from ..database import ProfileSample as DBProfileSample, VoiceProfile as DBVoiceProfile, get_db from ..services import channels, export_import, personality, profiles from ..services.profiles import _profile_to_response +from ..utils.cache import clear_profile_cache logger = logging.getLogger(__name__) @@ -31,9 +32,9 @@ async def create_profile( try: return await profiles.create_profile(data, db) except ValueError as e: - raise HTTPException(status_code=400, detail=str(e)) + raise HTTPException(status_code=400, detail=str(e)) from e except Exception as e: - raise HTTPException(status_code=400, detail=str(e)) + raise HTTPException(status_code=400, detail=str(e)) from e @router.get("/profiles", response_model=list[models.VoiceProfileResponse]) @@ -49,21 +50,29 @@ async def import_profile( ): """Import a voice profile from a ZIP archive.""" MAX_FILE_SIZE = 100 * 1024 * 1024 - - content = await file.read() - - if len(content) > MAX_FILE_SIZE: - raise HTTPException( - status_code=400, detail=f"File too large. Maximum size is {MAX_FILE_SIZE / (1024 * 1024)}MB" - ) + CHUNK_SIZE = 1024 * 1024 # 1 MB + + # Stream-read with an early size cap so oversized uploads are rejected + # before the entire payload is buffered into memory. + chunks: list[bytes] = [] + total = 0 + while chunk := await file.read(CHUNK_SIZE): + total += len(chunk) + if total > MAX_FILE_SIZE: + raise HTTPException( + status_code=413, + detail=f"File too large. Maximum size is {MAX_FILE_SIZE // (1024 * 1024)} MB.", + ) + chunks.append(chunk) + content = b"".join(chunks) try: profile = await export_import.import_profile_from_zip(content, db) return profile except ValueError as e: - raise HTTPException(status_code=400, detail=str(e)) + raise HTTPException(status_code=400, detail=str(e)) from e except Exception as e: - raise HTTPException(status_code=500, detail=str(e)) + raise HTTPException(status_code=500, detail=str(e)) from e # ── Preset Voice Endpoints ─────────────────────────────────────────── @@ -106,6 +115,7 @@ async def list_preset_voices(engine: str): } return {"engine": engine, "voices": []} + @router.get("/profiles/{profile_id}", response_model=models.VoiceProfileResponse) async def get_profile( profile_id: str, @@ -131,7 +141,7 @@ async def update_profile( raise HTTPException(status_code=404, detail="Profile not found") return profile except ValueError as e: - raise HTTPException(status_code=400, detail=str(e)) + raise HTTPException(status_code=400, detail=str(e)) from e @router.delete("/profiles/{profile_id}") @@ -184,9 +194,9 @@ async def add_profile_sample( ) return sample except ValueError as e: - raise HTTPException(status_code=400, detail=str(e)) + raise HTTPException(status_code=400, detail=str(e)) from e except Exception as e: - raise HTTPException(status_code=500, detail=f"Failed to process audio file: {str(e)}") + raise HTTPException(status_code=500, detail=f"Failed to process audio file: {e!s}") from e finally: Path(tmp_path).unlink(missing_ok=True) @@ -225,6 +235,26 @@ async def update_profile_sample( return sample +@router.patch("/profiles/{profile_id}/samples/reorder") +async def reorder_samples( + profile_id: str, + data: models.SampleReorderRequest, + db: Session = Depends(get_db), +): + """Reorder voice profile samples. Pass sample_ids in desired order.""" + for idx, sample_id in enumerate(data.sample_ids): + db.query(DBProfileSample).filter( + DBProfileSample.id == sample_id, + DBProfileSample.profile_id == profile_id, + ).update({"sort_order": idx}) + db.commit() + clear_profile_cache(profile_id) + return {"ok": True} + + +AVATAR_MAX_FILE_SIZE = 10 * 1024 * 1024 # 10 MB — avatars are images; 10 MB is generous + + @router.post("/profiles/{profile_id}/avatar", response_model=models.VoiceProfileResponse) async def upload_profile_avatar( profile_id: str, @@ -232,8 +262,19 @@ async def upload_profile_avatar( db: Session = Depends(get_db), ): """Upload or update avatar image for a profile.""" - with tempfile.NamedTemporaryFile(delete=False, suffix=Path(file.filename).suffix) as tmp: - content = await file.read() + chunks: list[bytes] = [] + total = 0 + while chunk := await file.read(1024 * 1024): + total += len(chunk) + if total > AVATAR_MAX_FILE_SIZE: + raise HTTPException( + status_code=413, + detail=f"Avatar too large. Maximum size is {AVATAR_MAX_FILE_SIZE // (1024 * 1024)} MB.", + ) + chunks.append(chunk) + content = b"".join(chunks) + + with tempfile.NamedTemporaryFile(delete=False, suffix=Path(file.filename or "avatar.jpg").suffix) as tmp: tmp.write(content) tmp_path = tmp.name @@ -241,7 +282,7 @@ async def upload_profile_avatar( profile = await profiles.upload_avatar(profile_id, tmp_path, db) return profile except ValueError as e: - raise HTTPException(status_code=400, detail=str(e)) + raise HTTPException(status_code=400, detail=str(e)) from e finally: Path(tmp_path).unlink(missing_ok=True) @@ -302,9 +343,9 @@ async def export_profile( headers={"Content-Disposition": safe_content_disposition("attachment", filename)}, ) except ValueError as e: - raise HTTPException(status_code=400, detail=str(e)) + raise HTTPException(status_code=400, detail=str(e)) from e except Exception as e: - raise HTTPException(status_code=500, detail=str(e)) + raise HTTPException(status_code=500, detail=str(e)) from e @router.get("/profiles/{profile_id}/channels") @@ -317,7 +358,7 @@ async def get_profile_channels( channel_ids = await channels.get_profile_channels(profile_id, db) return {"channel_ids": channel_ids} except ValueError as e: - raise HTTPException(status_code=400, detail=str(e)) + raise HTTPException(status_code=400, detail=str(e)) from e @router.put("/profiles/{profile_id}/channels") @@ -331,7 +372,7 @@ async def set_profile_channels( await channels.set_profile_channels(profile_id, data, db) return {"message": "Profile channels updated successfully"} except ValueError as e: - raise HTTPException(status_code=400, detail=str(e)) + raise HTTPException(status_code=400, detail=str(e)) from e @router.put("/profiles/{profile_id}/effects", response_model=models.VoiceProfileResponse) @@ -356,7 +397,7 @@ async def update_profile_effects( else: profile.effects_chain = None - profile.updated_at = datetime.utcnow() + profile.updated_at = datetime.now(UTC) db.commit() db.refresh(profile) @@ -386,7 +427,5 @@ async def compose_in_character( try: result = await personality.compose_as_profile(profile.personality) except ValueError as e: - raise HTTPException(status_code=400, detail=str(e)) - return models.PersonalityTextResponse( - text=result.text, model_size=result.model_size - ) + raise HTTPException(status_code=400, detail=str(e)) from e + return models.PersonalityTextResponse(text=result.text, model_size=result.model_size) diff --git a/backend/services/profiles.py b/backend/services/profiles.py index d7d32fa0..e1ffd0a2 100644 --- a/backend/services/profiles.py +++ b/backend/services/profiles.py @@ -4,8 +4,7 @@ import logging import shutil import uuid -from datetime import datetime -from pathlib import Path +from datetime import UTC, datetime from sqlalchemy import func from sqlalchemy.orm import Session @@ -120,9 +119,7 @@ def validate_profile_engine(profile, engine: str) -> None: if not preset_engine or not preset_voice_id: raise ValueError(f"Preset profile {profile.id} is missing preset engine metadata") if preset_engine != engine: - raise ValueError( - f"Preset profile {profile.id} only supports engine '{preset_engine}', not '{engine}'" - ) + raise ValueError(f"Preset profile {profile.id} only supports engine '{preset_engine}', not '{engine}'") return if voice_type == "designed": @@ -183,8 +180,8 @@ async def create_profile( design_prompt=data.design_prompt, default_engine=default_engine, personality=data.personality, - created_at=datetime.utcnow(), - updated_at=datetime.utcnow(), + created_at=datetime.now(UTC), + updated_at=datetime.now(UTC), ) db.add(db_profile) @@ -222,9 +219,7 @@ async def add_profile_sample( raise ValueError(f"Profile {profile_id} not found") # Validate and load audio in a single pass, off the event loop - is_valid, error_msg, audio, sr = await asyncio.to_thread( - validate_and_load_reference_audio, audio_path - ) + is_valid, error_msg, audio, sr = await asyncio.to_thread(validate_and_load_reference_audio, audio_path) if not is_valid: raise ValueError(f"Invalid reference audio: {error_msg}") @@ -235,16 +230,25 @@ async def add_profile_sample( dest_path = profile_dir / f"{sample_id}.wav" await asyncio.to_thread(save_audio, audio, str(dest_path), sr) + # Assign sort_order = max(existing) + 1 so each new sample appends at + # the end rather than landing at 0 (which would make all samples tie for + # the top position and produce non-deterministic ordering). + from sqlalchemy import func as _func + + max_order = db.query(_func.max(DBProfileSample.sort_order)).filter_by(profile_id=profile_id).scalar() + next_order = (max_order or 0) + 1 + db_sample = DBProfileSample( id=sample_id, profile_id=profile_id, audio_path=config.to_storage_path(dest_path), reference_text=reference_text, + sort_order=next_order, ) db.add(db_sample) - profile.updated_at = datetime.utcnow() + profile.updated_at = datetime.now(UTC) db.commit() db.refresh(db_sample) @@ -291,11 +295,7 @@ def get_profile_orm_by_name_or_id( row = db.query(DBVoiceProfile).filter(DBVoiceProfile.id == name_or_id).first() if row is not None: return row - return ( - db.query(DBVoiceProfile) - .filter(func.lower(DBVoiceProfile.name) == name_or_id.lower()) - .first() - ) + return db.query(DBVoiceProfile).filter(func.lower(DBVoiceProfile.name) == name_or_id.lower()).first() async def get_profile_samples( @@ -385,7 +385,9 @@ async def update_profile( preset_engine = getattr(profile, "preset_engine", None) preset_voice_id = getattr(profile, "preset_voice_id", None) design_prompt = getattr(profile, "design_prompt", None) - default_engine = data.default_engine if data.default_engine is not None else getattr(profile, "default_engine", None) + default_engine = ( + data.default_engine if data.default_engine is not None else getattr(profile, "default_engine", None) + ) validation_error = _validate_profile_fields( voice_type=voice_type, @@ -403,7 +405,7 @@ async def update_profile( profile.personality = data.personality if data.default_engine is not None: profile.default_engine = data.default_engine or None # empty string → NULL - profile.updated_at = datetime.utcnow() + profile.updated_at = datetime.now(UTC) db.commit() db.refresh(profile) @@ -571,7 +573,7 @@ async def create_voice_prompt_for_profile( raise ValueError(f"Engine '{engine}' does not support cloned voice profiles") # ── Cloned profiles: create from audio samples ── - samples = db.query(DBProfileSample).filter_by(profile_id=profile_id).all() + samples = db.query(DBProfileSample).filter_by(profile_id=profile_id).order_by(DBProfileSample.sort_order).all() if not samples: raise ValueError(f"No samples found for profile {profile_id}") @@ -672,7 +674,7 @@ async def upload_avatar( process_avatar(image_path, str(output_path)) profile.avatar_path = config.to_storage_path(output_path) - profile.updated_at = datetime.utcnow() + profile.updated_at = datetime.now(UTC) db.commit() db.refresh(profile) @@ -703,7 +705,7 @@ async def delete_avatar( avatar_path.unlink() profile.avatar_path = None - profile.updated_at = datetime.utcnow() + profile.updated_at = datetime.now(UTC) db.commit()