From 7ac6e2fc72544370422eda3c233c0f04aaeed58c Mon Sep 17 00:00:00 2001 From: thc1006 <84045975+thc1006@users.noreply.github.com> Date: Sat, 18 Apr 2026 23:27:14 +0800 Subject: [PATCH 01/19] test(transcription): failing cases for build_transcript_payload Covers empty input, single line, duplicate collapse, sort stability, consecutive same-speaker merge, body_hash stability, missing speaker, and whitespace-only text skipping. Implementation lands in the next commit. Contributes to #120 (slice 1/7) Signed-off-by: thc1006 <84045975+thc1006@users.noreply.github.com> --- backend/tests/test_transcription_publish.py | 152 ++++++++++++++++++++ 1 file changed, 152 insertions(+) create mode 100644 backend/tests/test_transcription_publish.py diff --git a/backend/tests/test_transcription_publish.py b/backend/tests/test_transcription_publish.py new file mode 100644 index 0000000..81ba006 --- /dev/null +++ b/backend/tests/test_transcription_publish.py @@ -0,0 +1,152 @@ +"""Tests for the publish-transcript payload builder.""" + +from datetime import date, datetime, timezone +from hashlib import sha256 + +from dna.models.stored_segment import StoredSegment +from dna.transcription_publish import build_transcript_payload + + +def _segment( + *, + segment_id: str = "seg1", + text: str, + speaker: str | None = "Speaker A", + start: str = "2026-04-15T10:00:00Z", + end: str = "2026-04-15T10:00:05Z", + updated_at: datetime | None = None, +) -> StoredSegment: + ts = updated_at or datetime(2026, 4, 15, 10, 0, 5, tzinfo=timezone.utc) + return StoredSegment( + _id="mongo_" + segment_id, + segment_id=segment_id, + playlist_id=1, + version_id=10, + text=text, + speaker=speaker, + language="en", + absolute_start_time=start, + absolute_end_time=end, + vexa_updated_at=None, + created_at=ts, + updated_at=ts, + ) + + +class TestBuildTranscriptPayload: + """build_transcript_payload 在不同輸入下的行為。""" + + def test_empty_list_returns_empty_body(self): + payload = build_transcript_payload([]) + + assert payload.body == "" + assert payload.segments_count == 0 + assert payload.body_hash == sha256(b"").hexdigest() + # 沒有 segment 時退而求其次取今天,主要是讓呼叫端不用處理 None + assert payload.meeting_date == datetime.now(timezone.utc).date() + + def test_single_segment_renders_one_line(self): + segments = [_segment(text="Hello world", speaker="Cameron")] + + payload = build_transcript_payload(segments) + + assert payload.body == "Cameron: Hello world" + assert payload.segments_count == 1 + assert payload.meeting_date == date(2026, 4, 15) + + def test_exact_duplicate_segments_keep_latest_updated(self): + earlier = _segment( + segment_id="a", + text="first draft", + start="2026-04-15T10:00:00Z", + updated_at=datetime(2026, 4, 15, 10, 0, 10, tzinfo=timezone.utc), + ) + later = _segment( + segment_id="a", + text="first draft", + start="2026-04-15T10:00:00Z", + updated_at=datetime(2026, 4, 15, 10, 0, 20, tzinfo=timezone.utc), + ) + + payload = build_transcript_payload([earlier, later]) + + assert payload.segments_count == 1 + assert payload.body.endswith("first draft") + + def test_out_of_order_segments_are_sorted_by_start_time(self): + later = _segment( + segment_id="b", + text="second", + speaker="Alex", + start="2026-04-15T10:01:00Z", + ) + earlier = _segment( + segment_id="a", + text="first", + speaker="Alex", + start="2026-04-15T10:00:00Z", + ) + + payload = build_transcript_payload([later, earlier]) + + assert payload.body.index("first") < payload.body.index("second") + + def test_consecutive_same_speaker_collapses_to_one_line(self): + segments = [ + _segment( + segment_id="1", + text="hello", + speaker="A", + start="2026-04-15T10:00:00Z", + ), + _segment( + segment_id="2", + text="again", + speaker="A", + start="2026-04-15T10:00:01Z", + ), + _segment( + segment_id="3", + text="my turn", + speaker="B", + start="2026-04-15T10:00:02Z", + ), + ] + + payload = build_transcript_payload(segments) + + assert payload.body.splitlines() == ["A: hello again", "B: my turn"] + assert payload.segments_count == 3 + + def test_body_hash_is_stable_across_input_permutations(self): + a = _segment(segment_id="a", text="one", start="2026-04-15T10:00:00Z") + b = _segment(segment_id="b", text="two", start="2026-04-15T10:00:05Z") + c = _segment(segment_id="c", text="three", start="2026-04-15T10:00:10Z") + + forward = build_transcript_payload([a, b, c]) + reversed_order = build_transcript_payload([c, b, a]) + + assert forward.body == reversed_order.body + assert forward.body_hash == reversed_order.body_hash + + def test_missing_speaker_is_rendered_as_unknown(self): + segments = [_segment(speaker=None, text="what's this?")] + + payload = build_transcript_payload(segments) + + assert payload.body == "Unknown: what's this?" + + def test_whitespace_only_text_is_dropped(self): + segments = [ + _segment(segment_id="1", text="valid"), + _segment( + segment_id="2", + text=" ", + start="2026-04-15T10:00:05Z", + ), + ] + + payload = build_transcript_payload(segments) + + assert payload.body == "Speaker A: valid" + assert payload.segments_count == 1 From 13ba7c22aec4a5b230b5a364f022f8e341861e3b Mon Sep 17 00:00:00 2001 From: thc1006 <84045975+thc1006@users.noreply.github.com> Date: Sat, 18 Apr 2026 23:32:52 +0800 Subject: [PATCH 02/19] feat(transcription): add build_transcript_payload helper Pure function that turns a list of StoredSegment rows into a payload ready to be pushed to the production tracking system. Handles the duplicate segments described in #100 defensively so the bug does not leak into published rows. Contributes to #120 (slice 1/7) Signed-off-by: thc1006 <84045975+thc1006@users.noreply.github.com> --- backend/src/dna/transcription_publish.py | 78 ++++++++++++++++++++++++ 1 file changed, 78 insertions(+) create mode 100644 backend/src/dna/transcription_publish.py diff --git a/backend/src/dna/transcription_publish.py b/backend/src/dna/transcription_publish.py new file mode 100644 index 0000000..7d3a2ea --- /dev/null +++ b/backend/src/dna/transcription_publish.py @@ -0,0 +1,78 @@ +"""Build a publishable transcript payload from stored segments. + +Converts a list of StoredSegment rows into a single body string plus +a body_hash the caller can use for idempotence checks. Kept pure on +purpose: no storage, no provider, no FastAPI. Callers live in main.py +and the future re-sync CLI. +""" + +from dataclasses import dataclass +from datetime import date, datetime, timezone +from hashlib import sha256 + +from dna.models.stored_segment import StoredSegment + + +@dataclass(slots=True) +class TranscriptPayload: + """What the publisher hands to the prodtrack provider.""" + + body: str + meeting_date: date + body_hash: str + segments_count: int + + +def build_transcript_payload(segments: list[StoredSegment]) -> TranscriptPayload: + """Turn a list of stored segments into a publish-ready payload. + + Rules applied in order: drop whitespace-only text, dedupe exact + (start_time, text) repeats keeping the latest updated_at, sort by + start_time, collapse consecutive same-speaker rows, then render + as "Speaker: text" lines. + """ + # 空白 segment 先過濾,不然後面會出現 "Speaker: " 這種空行 + cleaned = [s for s in segments if s.text and s.text.strip()] + + # 以 (時間, 文字 hash 前 12 碼) 當 key,重複的留較新的 updated_at + latest: dict[tuple[str, str], StoredSegment] = {} + for seg in cleaned: + text_sig = sha256(seg.text.encode("utf-8")).hexdigest()[:12] + key = (seg.absolute_start_time, text_sig) + prev = latest.get(key) + if prev is None or seg.updated_at > prev.updated_at: + latest[key] = seg + + ordered = sorted(latest.values(), key=lambda s: s.absolute_start_time) + + lines: list[str] = [] + last_speaker: str | None = None + for seg in ordered: + speaker = (seg.speaker or "").strip() or "Unknown" + text = seg.text.strip() + # 同一個人連續講話時合併成一行,減少 SG 上的行數雜訊 + if lines and speaker == last_speaker: + lines[-1] = f"{lines[-1]} {text}" + else: + lines.append(f"{speaker}: {text}") + last_speaker = speaker + + body = "\n".join(lines) + body_hash = sha256(body.encode("utf-8")).hexdigest() + meeting_date = _first_segment_date(ordered) + + return TranscriptPayload( + body=body, + meeting_date=meeting_date, + body_hash=body_hash, + segments_count=len(ordered), + ) + + +def _first_segment_date(ordered: list[StoredSegment]) -> date: + if not ordered: + return datetime.now(timezone.utc).date() + raw = ordered[0].absolute_start_time + # ISO 8601 的 Z 字尾 fromisoformat 吃不下,先換成 +00:00 + normalized = raw.replace("Z", "+00:00") if raw.endswith("Z") else raw + return datetime.fromisoformat(normalized).astimezone(timezone.utc).date() From b2b675c11377d99681a68557cbb4c111d6be7921 Mon Sep 17 00:00:00 2001 From: thc1006 <84045975+thc1006@users.noreply.github.com> Date: Sat, 18 Apr 2026 23:34:48 +0800 Subject: [PATCH 03/19] test(storage): failing cases for published_transcripts persistence Adds PublishedTranscript / PublishedTranscriptUpdate models and six red tests: two on the abstract base plus four on the Mongo impl covering the collection property, find-by-composite-key, missing-row path, and upsert query shape. Contributes to #120 (slice 2/7) Signed-off-by: thc1006 <84045975+thc1006@users.noreply.github.com> --- .../src/dna/models/published_transcript.py | 45 ++++++ backend/tests/test_storage_providers.py | 135 ++++++++++++++++++ 2 files changed, 180 insertions(+) create mode 100644 backend/src/dna/models/published_transcript.py diff --git a/backend/src/dna/models/published_transcript.py b/backend/src/dna/models/published_transcript.py new file mode 100644 index 0000000..9d93419 --- /dev/null +++ b/backend/src/dna/models/published_transcript.py @@ -0,0 +1,45 @@ +"""Published transcript bookkeeping model. + +Tracks which (playlist, version, meeting) has already been pushed to the +production tracking system so re-publishing can be idempotent. The actual +transcript content lives in SG; here we only keep the reference plus a +body_hash used to skip no-op re-publishes. +""" + +from datetime import datetime +from typing import Optional + +from pydantic import BaseModel, ConfigDict, Field + + +class PublishedTranscriptUpdate(BaseModel): + """Upsert payload for the published_transcripts collection.""" + + playlist_id: int + version_id: int + meeting_id: str + sg_entity_type: str = Field( + description="Custom entity type in the tracking system (e.g. CustomEntity01)" + ) + sg_entity_id: int = Field(description="ID of the row created in tracking system") + author_email: str + body_hash: str = Field(description="sha256 of the published body for idempotence") + segments_count: int + + +class PublishedTranscript(BaseModel): + """Full record for a row we have pushed to the tracking system.""" + + model_config = ConfigDict(populate_by_name=True) + + id: str = Field(alias="_id") + playlist_id: int + version_id: int + meeting_id: str + sg_entity_type: str + sg_entity_id: int + author_email: str + body_hash: str + segments_count: int + created_at: datetime + updated_at: datetime diff --git a/backend/tests/test_storage_providers.py b/backend/tests/test_storage_providers.py index 55ef719..8a5d1d8 100644 --- a/backend/tests/test_storage_providers.py +++ b/backend/tests/test_storage_providers.py @@ -7,6 +7,10 @@ from dna.models.draft_note import DraftNote, DraftNoteUpdate from dna.models.playlist_metadata import PlaylistMetadata, PlaylistMetadataUpdate +from dna.models.published_transcript import ( + PublishedTranscript, + PublishedTranscriptUpdate, +) from dna.models.stored_segment import StoredSegment, StoredSegmentCreate from dna.storage_providers.mongodb import MongoDBStorageProvider from dna.storage_providers.storage_provider_base import ( @@ -15,6 +19,19 @@ ) +def _transcript_update() -> PublishedTranscriptUpdate: + return PublishedTranscriptUpdate( + playlist_id=42, + version_id=7, + meeting_id="meet-abc", + sg_entity_type="CustomEntity01", + sg_entity_id=9001, + author_email="user@test.com", + body_hash="deadbeef", + segments_count=12, + ) + + class TestStorageProviderBase: """Tests for StorageProviderBase class.""" @@ -96,6 +113,20 @@ async def test_get_segments_for_version_raises_not_implemented(self): with pytest.raises(NotImplementedError): await provider.get_segments_for_version(1, 1) + @pytest.mark.asyncio + async def test_get_published_transcript_raises_not_implemented(self): + """Base class should not try to talk to any backing store.""" + provider = StorageProviderBase() + with pytest.raises(NotImplementedError): + await provider.get_published_transcript(1, 1, "meet-1") + + @pytest.mark.asyncio + async def test_upsert_published_transcript_raises_not_implemented(self): + """Abstract upsert must bubble up unless a subclass overrides.""" + provider = StorageProviderBase() + with pytest.raises(NotImplementedError): + await provider.upsert_published_transcript(_transcript_update()) + class TestGetStorageProvider: """Tests for get_storage_provider factory function.""" @@ -771,3 +802,107 @@ async def async_generator(): assert result[0].text == "Hello" assert result[1].text == "World" mock_cursor.sort.assert_called_once_with("absolute_start_time", 1) + + @pytest.mark.asyncio + async def test_published_transcripts_collection_property(self, provider): + """published_transcripts 應該指向 dna 資料庫底下的 published_transcripts。""" + mock_client = mock.MagicMock() + mock_db = mock.MagicMock() + mock_collection = mock.MagicMock() + mock_client.dna = mock_db + mock_db.published_transcripts = mock_collection + provider._client = mock_client + + assert provider.published_transcripts_collection is mock_collection + + @pytest.mark.asyncio + async def test_get_published_transcript_found(self, provider): + """找得到對應 (playlist, version, meeting) 時回傳完整 model。""" + mock_collection = mock.MagicMock() + now = datetime.now(timezone.utc) + doc = { + "_id": "mongo-id-1", + "playlist_id": 42, + "version_id": 7, + "meeting_id": "meet-abc", + "sg_entity_type": "CustomEntity01", + "sg_entity_id": 9001, + "author_email": "user@test.com", + "body_hash": "deadbeef", + "segments_count": 12, + "created_at": now, + "updated_at": now, + } + mock_collection.find_one = mock.AsyncMock(return_value=doc) + mock_client = mock.MagicMock() + mock_db = mock.MagicMock() + mock_client.dna = mock_db + mock_db.published_transcripts = mock_collection + provider._client = mock_client + + result = await provider.get_published_transcript(42, 7, "meet-abc") + + assert isinstance(result, PublishedTranscript) + assert result.sg_entity_id == 9001 + mock_collection.find_one.assert_awaited_once_with( + {"playlist_id": 42, "version_id": 7, "meeting_id": "meet-abc"} + ) + + @pytest.mark.asyncio + async def test_get_published_transcript_missing_returns_none(self, provider): + """沒有紀錄時 None 要一路傳回來。""" + mock_collection = mock.MagicMock() + mock_collection.find_one = mock.AsyncMock(return_value=None) + mock_client = mock.MagicMock() + mock_db = mock.MagicMock() + mock_client.dna = mock_db + mock_db.published_transcripts = mock_collection + provider._client = mock_client + + result = await provider.get_published_transcript(1, 2, "nope") + + assert result is None + + @pytest.mark.asyncio + async def test_upsert_published_transcript_upserts_by_composite_key(self, provider): + """upsert 要用 (playlist, version, meeting) 當 query、並回傳完整 model。""" + mock_collection = mock.MagicMock() + now = datetime.now(timezone.utc) + result_doc = { + "_id": "mongo-id-2", + "playlist_id": 42, + "version_id": 7, + "meeting_id": "meet-abc", + "sg_entity_type": "CustomEntity01", + "sg_entity_id": 9001, + "author_email": "user@test.com", + "body_hash": "deadbeef", + "segments_count": 12, + "created_at": now, + "updated_at": now, + } + mock_collection.find_one_and_update = mock.AsyncMock(return_value=result_doc) + mock_client = mock.MagicMock() + mock_db = mock.MagicMock() + mock_client.dna = mock_db + mock_db.published_transcripts = mock_collection + provider._client = mock_client + + result = await provider.upsert_published_transcript(_transcript_update()) + + assert isinstance(result, PublishedTranscript) + assert result.sg_entity_id == 9001 + + call_args = mock_collection.find_one_and_update.call_args + query = call_args[0][0] + assert query == { + "playlist_id": 42, + "version_id": 7, + "meeting_id": "meet-abc", + } + update = call_args[0][1] + assert update["$set"]["body_hash"] == "deadbeef" + assert update["$set"]["sg_entity_id"] == 9001 + assert "updated_at" in update["$set"] + assert update["$setOnInsert"]["created_at"] is not None + assert call_args[1]["upsert"] is True From 69c9b9e63f0a2b5aed479adc9105cf17512a6083 Mon Sep 17 00:00:00 2001 From: thc1006 <84045975+thc1006@users.noreply.github.com> Date: Sat, 18 Apr 2026 23:36:34 +0800 Subject: [PATCH 04/19] feat(storage): persist published-transcript bookkeeping in mongo Adds two methods on the storage contract and the MongoDB implementation: - get_published_transcript(playlist_id, version_id, meeting_id) - upsert_published_transcript(data) Keyed by (playlist, version, meeting). The row stores the SG entity ID and a body_hash so re-publish can skip when nothing changed. Contributes to #120 (slice 2/7) Signed-off-by: thc1006 <84045975+thc1006@users.noreply.github.com> --- backend/src/dna/storage_providers/mongodb.py | 45 +++++++++++++++++++ .../storage_provider_base.py | 20 +++++++++ 2 files changed, 65 insertions(+) diff --git a/backend/src/dna/storage_providers/mongodb.py b/backend/src/dna/storage_providers/mongodb.py index af0fe4e..32be509 100644 --- a/backend/src/dna/storage_providers/mongodb.py +++ b/backend/src/dna/storage_providers/mongodb.py @@ -11,6 +11,10 @@ from dna.models.draft_note import DraftNote, DraftNoteUpdate from dna.models.playlist_metadata import PlaylistMetadata, PlaylistMetadataUpdate +from dna.models.published_transcript import ( + PublishedTranscript, + PublishedTranscriptUpdate, +) from dna.models.stored_segment import StoredSegment, StoredSegmentCreate from dna.models.user_settings import UserSettings, UserSettingsUpdate from dna.storage_providers.storage_provider_base import StorageProviderBase @@ -49,6 +53,10 @@ def segments_collection(self) -> Any: def user_settings_collection(self) -> Any: return self.db.user_settings + @property + def published_transcripts_collection(self) -> Any: + return self.db.published_transcripts + def _build_query( self, user_email: str, playlist_id: int, version_id: int ) -> dict[str, Any]: @@ -313,3 +321,40 @@ async def delete_user_settings(self, user_email: str) -> bool: query = {"user_email": user_email} result = await self.user_settings_collection.delete_one(query) return result.deleted_count > 0 + + async def get_published_transcript( + self, playlist_id: int, version_id: int, meeting_id: str + ) -> Optional[PublishedTranscript]: + """Fetch the bookkeeping row for a previously published transcript.""" + query = { + "playlist_id": playlist_id, + "version_id": version_id, + "meeting_id": meeting_id, + } + doc = await self.published_transcripts_collection.find_one(query) + if doc: + doc["_id"] = str(doc["_id"]) + return PublishedTranscript(**doc) + return None + + async def upsert_published_transcript( + self, data: PublishedTranscriptUpdate + ) -> PublishedTranscript: + """Insert or overwrite the bookkeeping row for a published transcript.""" + now = datetime.now(timezone.utc) + query = { + "playlist_id": data.playlist_id, + "version_id": data.version_id, + "meeting_id": data.meeting_id, + } + # model_dump 已經把 query 那三欄一起帶進來,$set 時一併寫入沒關係; + # 真正要分開的只有 created_at(只有新增時才需要) + update: dict[str, Any] = { + "$set": {**data.model_dump(), "updated_at": now}, + "$setOnInsert": {"created_at": now}, + } + result = await self.published_transcripts_collection.find_one_and_update( + query, update, upsert=True, return_document=ReturnDocument.AFTER + ) + result["_id"] = str(result["_id"]) + return PublishedTranscript(**result) diff --git a/backend/src/dna/storage_providers/storage_provider_base.py b/backend/src/dna/storage_providers/storage_provider_base.py index 93d8fac..0d6bcc8 100644 --- a/backend/src/dna/storage_providers/storage_provider_base.py +++ b/backend/src/dna/storage_providers/storage_provider_base.py @@ -9,6 +9,10 @@ if TYPE_CHECKING: from dna.models.draft_note import DraftNote, DraftNoteUpdate from dna.models.playlist_metadata import PlaylistMetadata, PlaylistMetadataUpdate + from dna.models.published_transcript import ( + PublishedTranscript, + PublishedTranscriptUpdate, + ) from dna.models.stored_segment import StoredSegment, StoredSegmentCreate from dna.models.user_settings import UserSettings, UserSettingsUpdate @@ -110,6 +114,22 @@ async def delete_user_settings(self, user_email: str) -> bool: """Delete user settings. Returns True if deleted.""" raise NotImplementedError() + async def get_published_transcript( + self, playlist_id: int, version_id: int, meeting_id: str + ) -> Optional["PublishedTranscript"]: + """Get the published-transcript record for a (playlist, version, meeting).""" + raise NotImplementedError() + + async def upsert_published_transcript( + self, data: "PublishedTranscriptUpdate" + ) -> "PublishedTranscript": + """Create or update the published-transcript record. + + Upsert key is (playlist_id, version_id, meeting_id). A re-publish with a + different body_hash overwrites the existing row rather than inserting. + """ + raise NotImplementedError() + def get_storage_provider() -> StorageProviderBase: """Factory function to get the configured storage provider.""" From 26e1aa30ff3be22c293a632a4caa4104ed517ff3 Mon Sep 17 00:00:00 2001 From: thc1006 <84045975+thc1006@users.noreply.github.com> Date: Sat, 18 Apr 2026 23:39:35 +0800 Subject: [PATCH 05/19] test(prodtrack): failing cases for publish_transcript / update_transcript Nine red tests across the base class, the ShotGrid provider (default entity type, env override, create payload shape, disconnect guard, update-body-only, error swallowing) and the mock provider (must refuse with a user-facing message). Contributes to #120 (slice 3/7) Signed-off-by: thc1006 <84045975+thc1006@users.noreply.github.com> --- backend/tests/providers/test_mock_provider.py | 34 +++++ .../tests/providers/test_providers_base.py | 33 +++++ backend/tests/test_shotgrid_provider.py | 137 ++++++++++++++++++ 3 files changed, 204 insertions(+) create mode 100644 backend/tests/providers/test_providers_base.py diff --git a/backend/tests/providers/test_mock_provider.py b/backend/tests/providers/test_mock_provider.py index a577778..7055360 100644 --- a/backend/tests/providers/test_mock_provider.py +++ b/backend/tests/providers/test_mock_provider.py @@ -503,6 +503,40 @@ def test_factory_raises_when_shotgrid_selected_but_no_credentials(): get_prodtrack_provider() +class TestMockPublishTranscript: + """Mock provider 不能寫入 SG,嘗試 publish/update 時必須拋清楚的錯。""" + + def test_publish_transcript_raises_with_user_facing_message(self, tmp_path): + from datetime import date as date_ + + db_path = tmp_path / "mock.db" + _create_seeded_db(db_path) + provider = MockProdtrackProvider(db_path=db_path) + + with pytest.raises(NotImplementedError, match="live ShotGrid connection"): + provider.publish_transcript( + project_id=1, + playlist_id=400, + version_id=300, + meeting_id="m-1", + meeting_date=date_(2026, 4, 15), + platform="google_meet", + body="hi", + ) + + def test_update_transcript_raises_with_user_facing_message(self, tmp_path): + from datetime import date as date_ + + db_path = tmp_path / "mock.db" + _create_seeded_db(db_path) + provider = MockProdtrackProvider(db_path=db_path) + + with pytest.raises(NotImplementedError, match="live ShotGrid connection"): + provider.update_transcript( + entity_id=9001, body="hi", meeting_date=date_(2026, 4, 15) + ) + + def test_factory_returns_shotgrid_when_credentials_present(): with mock.patch.dict( os.environ, diff --git a/backend/tests/providers/test_providers_base.py b/backend/tests/providers/test_providers_base.py new file mode 100644 index 0000000..38ac049 --- /dev/null +++ b/backend/tests/providers/test_providers_base.py @@ -0,0 +1,33 @@ +"""Tests for the ProdtrackProviderBase abstract surface.""" + +from datetime import date + +import pytest + +from dna.prodtrack_providers.prodtrack_provider_base import ProdtrackProviderBase + + +class TestProdtrackProviderBaseTranscriptContract: + """Base class 的 transcript 方法必須丟 NotImplementedError。""" + + def test_publish_transcript_raises_not_implemented(self): + provider = ProdtrackProviderBase() + with pytest.raises(NotImplementedError): + provider.publish_transcript( + project_id=1, + playlist_id=10, + version_id=100, + meeting_id="m-1", + meeting_date=date(2026, 4, 15), + platform="google_meet", + body="Speaker: hi", + ) + + def test_update_transcript_raises_not_implemented(self): + provider = ProdtrackProviderBase() + with pytest.raises(NotImplementedError): + provider.update_transcript( + entity_id=9001, + body="Speaker: updated", + meeting_date=date(2026, 4, 15), + ) diff --git a/backend/tests/test_shotgrid_provider.py b/backend/tests/test_shotgrid_provider.py index 6f3220a..e2ab33a 100644 --- a/backend/tests/test_shotgrid_provider.py +++ b/backend/tests/test_shotgrid_provider.py @@ -307,3 +307,140 @@ def test_update_version_status_not_connected(self, provider, mock_shotgun): with pytest.raises(ValueError, match="Not connected to ShotGrid"): provider.update_version_status(101, "rev") + + +class TestShotgridProviderPublishTranscript: + """publish_transcript / update_transcript 要把 transcript 寫到 SG 的自訂 entity。""" + + @pytest.fixture + def mock_shotgun(self): + with mock.patch("dna.prodtrack_providers.shotgrid.Shotgun") as mock_sg: + yield mock_sg + + @pytest.fixture + def provider(self, mock_shotgun): + with mock.patch.dict( + os.environ, + { + "SHOTGRID_URL": "https://test.shotgunstudio.com", + "SHOTGRID_SCRIPT_NAME": "test_script", + "SHOTGRID_API_KEY": "test_key", + }, + ): + return ShotgridProvider(connect=True) + + def test_publish_transcript_creates_row_with_default_entity_type( + self, provider, mock_shotgun + ): + """預設狀況下,SG 的 entity 應該是 CustomEntity01。""" + from datetime import date as date_ + + mock_sg_instance = mock_shotgun.return_value + provider.sg = mock_sg_instance + mock_sg_instance.create.return_value = {"id": 9001} + + entity_id = provider.publish_transcript( + project_id=1, + playlist_id=42, + version_id=101, + meeting_id="m-abc", + meeting_date=date_(2026, 4, 15), + platform="google_meet", + body="Cameron: hello", + ) + + assert entity_id == 9001 + call_args = mock_sg_instance.create.call_args + assert call_args[0][0] == "CustomEntity01" + payload = call_args[0][1] + assert payload["project"] == {"type": "Project", "id": 1} + assert payload["sg_playlist"] == {"type": "Playlist", "id": 42} + assert payload["sg_versions"] == [{"type": "Version", "id": 101}] + assert payload["sg_meeting_id"] == "m-abc" + assert payload["sg_platform"] == "google_meet" + assert payload["sg_transcript_body"] == "Cameron: hello" + assert payload["sg_meeting_date"] == "2026-04-15" + assert "code" in payload and payload["code"] + + def test_publish_transcript_honours_env_override(self, provider, mock_shotgun): + """站台若把 entity 放在 CustomEntity05,環境變數要能切換。""" + from datetime import date as date_ + + mock_sg_instance = mock_shotgun.return_value + provider.sg = mock_sg_instance + mock_sg_instance.create.return_value = {"id": 9002} + + with mock.patch.dict( + os.environ, {"SHOTGRID_TRANSCRIPT_ENTITY": "CustomEntity05"} + ): + provider.publish_transcript( + project_id=1, + playlist_id=42, + version_id=101, + meeting_id="m-abc", + meeting_date=date_(2026, 4, 15), + platform="google_meet", + body="hello", + ) + + assert mock_sg_instance.create.call_args[0][0] == "CustomEntity05" + + def test_publish_transcript_not_connected_raises(self, provider, mock_shotgun): + """沒連線時應該明確報錯,不要讓上層看到奇怪的 AttributeError。""" + from datetime import date as date_ + + provider.sg = None + provider._sudo_connection = None + with pytest.raises(ValueError, match="Not connected to ShotGrid"): + provider.publish_transcript( + project_id=1, + playlist_id=42, + version_id=101, + meeting_id="m-abc", + meeting_date=date_(2026, 4, 15), + platform="google_meet", + body="hello", + ) + + def test_update_transcript_only_patches_body_and_date( + self, provider, mock_shotgun + ): + """update 時只能動 body 跟 meeting_date,不要把 SG 上手動改的欄位蓋掉。""" + from datetime import date as date_ + + mock_sg_instance = mock_shotgun.return_value + provider.sg = mock_sg_instance + + ok = provider.update_transcript( + entity_id=9001, + body="Cameron: updated", + meeting_date=date_(2026, 4, 16), + ) + + assert ok is True + call_args = mock_sg_instance.update.call_args + assert call_args[0][0] == "CustomEntity01" + assert call_args[0][1] == 9001 + patch = call_args[0][2] + assert patch == { + "sg_transcript_body": "Cameron: updated", + "sg_meeting_date": "2026-04-16", + } + + def test_update_transcript_swallows_sg_errors_and_returns_false( + self, provider, mock_shotgun + ): + """SG 寫入失敗時不要炸,方便 endpoint 對照 body_hash 做決策。""" + from datetime import date as date_ + + mock_sg_instance = mock_shotgun.return_value + provider.sg = mock_sg_instance + mock_sg_instance.update.side_effect = Exception("sg boom") + + ok = provider.update_transcript( + entity_id=9001, + body="x", + meeting_date=date_(2026, 4, 16), + ) + + assert ok is False From c6eeabf26b67528f6c4b1a5f333d0382897bd337 Mon Sep 17 00:00:00 2001 From: thc1006 <84045975+thc1006@users.noreply.github.com> Date: Sat, 18 Apr 2026 23:43:56 +0800 Subject: [PATCH 06/19] feat(prodtrack): publish_transcript / update_transcript on contract + SG Adds two methods on ProdtrackProviderBase and the ShotGrid implementation so the REST endpoint in the next slice has a provider surface to call. The SG custom entity name is read from SHOTGRID_TRANSCRIPT_ENTITY at call time (default CustomEntity01) so studios can point DNA at whichever slot they enabled. MockProdtrackProvider raises NotImplementedError with a user-facing message so the mock stack still boots but callers get a clear error instead of silent success. Contributes to #120 (slice 3/7) Signed-off-by: thc1006 <84045975+thc1006@users.noreply.github.com> --- backend/src/dna/models/__init__.py | 8 ++- .../dna/prodtrack_providers/mock_provider.py | 12 ++++ .../prodtrack_provider_base.py | 33 ++++++++++ .../src/dna/prodtrack_providers/shotgrid.py | 60 +++++++++++++++++++ backend/tests/test_shotgrid_provider.py | 4 +- 5 files changed, 113 insertions(+), 4 deletions(-) diff --git a/backend/src/dna/models/__init__.py b/backend/src/dna/models/__init__.py index 6df630e..bc246c2 100644 --- a/backend/src/dna/models/__init__.py +++ b/backend/src/dna/models/__init__.py @@ -27,6 +27,10 @@ PlaylistMetadata, PlaylistMetadataUpdate, ) +from dna.models.published_transcript import ( + PublishedTranscript, + PublishedTranscriptUpdate, +) from dna.models.requests import ( CreateNoteRequest, EntityLink, @@ -69,6 +73,7 @@ "Version", "Playlist", "User", + "Transcript", "DNAEntity", "ENTITY_MODELS", "EntityLink", @@ -89,6 +94,8 @@ "DraftNoteUpdate", "PlaylistMetadata", "PlaylistMetadataUpdate", + "PublishedTranscript", + "PublishedTranscriptUpdate", "StoredSegment", "StoredSegmentCreate", "generate_segment_id", @@ -97,7 +104,6 @@ "BotStatusEnum", "DispatchBotRequest", "Platform", - "Transcript", "TranscriptSegment", "UserSettings", "UserSettingsUpdate", diff --git a/backend/src/dna/prodtrack_providers/mock_provider.py b/backend/src/dna/prodtrack_providers/mock_provider.py index bb72cb4..265445e 100644 --- a/backend/src/dna/prodtrack_providers/mock_provider.py +++ b/backend/src/dna/prodtrack_providers/mock_provider.py @@ -596,3 +596,15 @@ def attach_file_to_note( self, note_id: int, file_path: str, display_name: str ) -> bool: return True + + def publish_transcript(self, **_: object) -> int: + raise NotImplementedError( + "Transcript publishing requires a live ShotGrid connection. " + "Set PRODTRACK_PROVIDER=shotgrid to use it." + ) + + def update_transcript(self, **_: object) -> bool: + raise NotImplementedError( + "Transcript publishing requires a live ShotGrid connection. " + "Set PRODTRACK_PROVIDER=shotgrid to use it." + ) diff --git a/backend/src/dna/prodtrack_providers/prodtrack_provider_base.py b/backend/src/dna/prodtrack_providers/prodtrack_provider_base.py index f3b8107..17d9ca6 100644 --- a/backend/src/dna/prodtrack_providers/prodtrack_provider_base.py +++ b/backend/src/dna/prodtrack_providers/prodtrack_provider_base.py @@ -1,4 +1,5 @@ import os +from datetime import date from typing import TYPE_CHECKING, Any if TYPE_CHECKING: @@ -190,6 +191,38 @@ def attach_file_to_note( """ raise NotImplementedError("Subclasses must implement this method.") + def publish_transcript( + self, + *, + project_id: int, + playlist_id: int, + version_id: int, + meeting_id: str, + meeting_date: date, + platform: str, + body: str, + ) -> int: + """Create a transcript row in the production tracking system. + + Returns the entity ID of the newly-created row. + """ + raise NotImplementedError("Subclasses must implement this method.") + + def update_transcript( + self, + *, + entity_id: int, + body: str, + meeting_date: date, + ) -> bool: + """Update body + meeting_date on an existing transcript entity. + + Only body and meeting_date are touched on purpose; summary and other + fields are left alone so manual edits on the tracking-system side + survive a re-publish. + """ + raise NotImplementedError("Subclasses must implement this method.") + def get_prodtrack_provider() -> ProdtrackProviderBase: """Get the production tracking provider.""" diff --git a/backend/src/dna/prodtrack_providers/shotgrid.py b/backend/src/dna/prodtrack_providers/shotgrid.py index 5b85939..e39d14d 100644 --- a/backend/src/dna/prodtrack_providers/shotgrid.py +++ b/backend/src/dna/prodtrack_providers/shotgrid.py @@ -2,6 +2,7 @@ import contextlib import os +from datetime import date from typing import Any, Optional from shotgun_api3 import Shotgun @@ -989,6 +990,60 @@ def attach_file_to_note( except Exception: return False + def publish_transcript( + self, + *, + project_id: int, + playlist_id: int, + version_id: int, + meeting_id: str, + meeting_date: date, + platform: str, + body: str, + ) -> int: + """Create a transcript row in the configured SG custom entity.""" + if not self._sg: + raise ValueError("Not connected to ShotGrid") + + entity_type = _transcript_entity_type() + # 用 meeting_date + meeting_id 組出人看得懂的 code,方便在 SG 頁面上辨識 + code = f"transcript-{version_id}-{meeting_date.isoformat()}" + payload: dict[str, Any] = { + "code": code, + "project": {"type": "Project", "id": project_id}, + "sg_playlist": {"type": "Playlist", "id": playlist_id}, + "sg_versions": [{"type": "Version", "id": version_id}], + "sg_meeting_id": meeting_id, + "sg_meeting_date": meeting_date.isoformat(), + "sg_platform": platform, + "sg_transcript_body": body, + } + result = self._sg.create(entity_type, payload) + return result["id"] + + def update_transcript( + self, + *, + entity_id: int, + body: str, + meeting_date: date, + ) -> bool: + """Patch body + date on an existing transcript; other fields untouched.""" + if not self._sg: + return False + try: + self._sg.update( + _transcript_entity_type(), + entity_id, + { + "sg_transcript_body": body, + "sg_meeting_date": meeting_date.isoformat(), + }, + ) + return True + except Exception: + return False + def _get_dna_entity_type(sg_entity_type: str) -> str: """Get the DNA entity type from the ShotGrid entity type.""" @@ -996,3 +1051,8 @@ def _get_dna_entity_type(sg_entity_type: str) -> str: if entity_data["entity_id"] == sg_entity_type: return entity_type raise ValueError(f"Unknown entity type: {sg_entity_type}") + + +def _transcript_entity_type() -> str: + """SG 的自訂 entity 名稱由站台決定,這裡用環境變數讓部署端切換。""" + return os.getenv("SHOTGRID_TRANSCRIPT_ENTITY", "CustomEntity01") diff --git a/backend/tests/test_shotgrid_provider.py b/backend/tests/test_shotgrid_provider.py index e2ab33a..eb462a7 100644 --- a/backend/tests/test_shotgrid_provider.py +++ b/backend/tests/test_shotgrid_provider.py @@ -402,9 +402,7 @@ def test_publish_transcript_not_connected_raises(self, provider, mock_shotgun): body="hello", ) - def test_update_transcript_only_patches_body_and_date( - self, provider, mock_shotgun - ): + def test_update_transcript_only_patches_body_and_date(self, provider, mock_shotgun): """update 時只能動 body 跟 meeting_date,不要把 SG 上手動改的欄位蓋掉。""" from datetime import date as date_ From dc5ee7b81d1e067291946f3dc25d6ab268900f15 Mon Sep 17 00:00:00 2001 From: thc1006 <84045975+thc1006@users.noreply.github.com> Date: Sat, 18 Apr 2026 23:45:31 +0800 Subject: [PATCH 07/19] test(api): failing cases for POST /playlists/{id}/publish-transcript Eight tests covering the flag gate, happy create, skip-when-body-hash- unchanged, update path, missing-metadata, no-segments, mock 501, and the version-without-project guard. Request/response models land with the tests so the endpoint in the next commit has something to return. Contributes to #120 (slice 4/7) Signed-off-by: thc1006 <84045975+thc1006@users.noreply.github.com> --- backend/src/dna/models/__init__.py | 4 + backend/src/dna/models/requests.py | 17 ++ .../tests/test_publish_transcript_endpoint.py | 256 ++++++++++++++++++ 3 files changed, 277 insertions(+) create mode 100644 backend/tests/test_publish_transcript_endpoint.py diff --git a/backend/src/dna/models/__init__.py b/backend/src/dna/models/__init__.py index bc246c2..7414a24 100644 --- a/backend/src/dna/models/__init__.py +++ b/backend/src/dna/models/__init__.py @@ -40,6 +40,8 @@ GenerateNoteResponse, PublishNotesRequest, PublishNotesResponse, + PublishTranscriptRequest, + PublishTranscriptResponse, SearchRequest, SearchResult, StatusOption, @@ -87,6 +89,8 @@ "StatusOption", "PublishNotesRequest", "PublishNotesResponse", + "PublishTranscriptRequest", + "PublishTranscriptResponse", "DraftNote", "DraftNoteBase", "DraftNoteCreate", diff --git a/backend/src/dna/models/requests.py b/backend/src/dna/models/requests.py index 7e8a156..067fb7c 100644 --- a/backend/src/dna/models/requests.py +++ b/backend/src/dna/models/requests.py @@ -117,3 +117,20 @@ class PublishNotesResponse(BaseModel): skipped_count: int failed_count: int total: int + + +class PublishTranscriptRequest(BaseModel): + """Request to publish a version's captured transcript.""" + + version_id: int = Field(description="Version whose segments to publish") + + +class PublishTranscriptResponse(BaseModel): + """Response from the publish-transcript endpoint.""" + + transcript_entity_id: int = Field( + description="Entity ID of the row in the tracking system" + ) + outcome: str = Field(description="created | updated | skipped") + skipped_reason: Optional[str] = None + segments_count: int diff --git a/backend/tests/test_publish_transcript_endpoint.py b/backend/tests/test_publish_transcript_endpoint.py new file mode 100644 index 0000000..2d77dbe --- /dev/null +++ b/backend/tests/test_publish_transcript_endpoint.py @@ -0,0 +1,256 @@ +"""Tests for POST /playlists/{id}/publish-transcript.""" + +import os +from datetime import datetime, timezone +from unittest import mock + +import pytest +from fastapi.testclient import TestClient +from main import app, get_prodtrack_provider_cached, get_storage_provider_cached + +from dna.models.playlist_metadata import PlaylistMetadata +from dna.models.published_transcript import PublishedTranscript +from dna.models.stored_segment import StoredSegment + + +ENABLE_FLAG = {"DNA_ENABLE_TRANSCRIPT_PUBLISH": "true"} + + +def _segment(start: str, text: str, speaker: str = "A") -> StoredSegment: + now = datetime.now(timezone.utc) + return StoredSegment( + _id="mongo_" + start, + segment_id="seg-" + start, + playlist_id=42, + version_id=101, + text=text, + speaker=speaker, + language="en", + absolute_start_time=start, + absolute_end_time=start, + vexa_updated_at=None, + created_at=now, + updated_at=now, + ) + + +def _metadata(meeting_id: str = "m-abc", platform: str = "google_meet") -> PlaylistMetadata: + return PlaylistMetadata( + _id="meta-id", + playlist_id=42, + meeting_id=meeting_id, + platform=platform, + ) + + +def _published(body_hash: str) -> PublishedTranscript: + now = datetime.now(timezone.utc) + return PublishedTranscript( + _id="pt-id", + playlist_id=42, + version_id=101, + meeting_id="m-abc", + sg_entity_type="CustomEntity01", + sg_entity_id=9001, + author_email="user@test.com", + body_hash=body_hash, + segments_count=1, + created_at=now, + updated_at=now, + ) + + +class TestPublishTranscriptEndpoint: + """POST /playlists/{id}/publish-transcript 行為測試。""" + + @pytest.fixture + def client(self): + return TestClient(app) + + @pytest.fixture + def mock_storage(self): + return mock.AsyncMock() + + @pytest.fixture + def mock_prodtrack(self): + p = mock.Mock() + version = mock.Mock() + version.project = mock.Mock(id=1) + p.get_entity.return_value = version + return p + + @pytest.fixture + def override_deps(self, mock_storage, mock_prodtrack): + app.dependency_overrides[get_storage_provider_cached] = lambda: mock_storage + app.dependency_overrides[get_prodtrack_provider_cached] = lambda: mock_prodtrack + yield + app.dependency_overrides.clear() + + def test_flag_off_returns_404(self, client, mock_storage, mock_prodtrack, override_deps): + """沒開 feature flag 時必須 404。這個 endpoint 不該露出來。""" + # 完全不帶 DNA_ENABLE_TRANSCRIPT_PUBLISH + with mock.patch.dict(os.environ, {}, clear=False): + os.environ.pop("DNA_ENABLE_TRANSCRIPT_PUBLISH", None) + response = client.post( + "/playlists/42/publish-transcript", + json={"version_id": 101}, + ) + + assert response.status_code == 404 + + def test_happy_create_path(self, client, mock_storage, mock_prodtrack, override_deps): + """第一次推上去要 create,並且把 bookkeeping 寫回 storage。""" + mock_storage.get_playlist_metadata.return_value = _metadata() + mock_storage.get_segments_for_version.return_value = [ + _segment("2026-04-15T10:00:00Z", "hello") + ] + mock_storage.get_published_transcript.return_value = None + mock_prodtrack.publish_transcript.return_value = 9001 + + with mock.patch.dict(os.environ, ENABLE_FLAG): + response = client.post( + "/playlists/42/publish-transcript", + json={"version_id": 101}, + ) + + assert response.status_code == 200 + data = response.json() + assert data["outcome"] == "created" + assert data["transcript_entity_id"] == 9001 + assert data["segments_count"] == 1 + + mock_prodtrack.publish_transcript.assert_called_once() + kwargs = mock_prodtrack.publish_transcript.call_args.kwargs + assert kwargs["project_id"] == 1 + assert kwargs["playlist_id"] == 42 + assert kwargs["version_id"] == 101 + assert kwargs["meeting_id"] == "m-abc" + assert kwargs["platform"] == "google_meet" + assert "A: hello" in kwargs["body"] + + mock_storage.upsert_published_transcript.assert_awaited_once() + + def test_republish_same_body_skips( + self, client, mock_storage, mock_prodtrack, override_deps + ): + """body_hash 沒變就不要打 SG,回 skipped。""" + # 先跑一次拿到 body_hash + from dna.transcription_publish import build_transcript_payload + + seg = _segment("2026-04-15T10:00:00Z", "hello") + payload = build_transcript_payload([seg]) + + mock_storage.get_playlist_metadata.return_value = _metadata() + mock_storage.get_segments_for_version.return_value = [seg] + mock_storage.get_published_transcript.return_value = _published(payload.body_hash) + + with mock.patch.dict(os.environ, ENABLE_FLAG): + response = client.post( + "/playlists/42/publish-transcript", + json={"version_id": 101}, + ) + + assert response.status_code == 200 + data = response.json() + assert data["outcome"] == "skipped" + assert data["transcript_entity_id"] == 9001 + mock_prodtrack.publish_transcript.assert_not_called() + mock_prodtrack.update_transcript.assert_not_called() + + def test_republish_with_changes_updates( + self, client, mock_storage, mock_prodtrack, override_deps + ): + """body_hash 不同要走 update,並且沿用既有的 sg_entity_id。""" + mock_storage.get_playlist_metadata.return_value = _metadata() + mock_storage.get_segments_for_version.return_value = [ + _segment("2026-04-15T10:00:00Z", "new content") + ] + mock_storage.get_published_transcript.return_value = _published("old-hash") + mock_prodtrack.update_transcript.return_value = True + + with mock.patch.dict(os.environ, ENABLE_FLAG): + response = client.post( + "/playlists/42/publish-transcript", + json={"version_id": 101}, + ) + + assert response.status_code == 200 + data = response.json() + assert data["outcome"] == "updated" + assert data["transcript_entity_id"] == 9001 + + mock_prodtrack.publish_transcript.assert_not_called() + mock_prodtrack.update_transcript.assert_called_once() + kwargs = mock_prodtrack.update_transcript.call_args.kwargs + assert kwargs["entity_id"] == 9001 + assert "A: new content" in kwargs["body"] + + def test_missing_playlist_metadata_is_422( + self, client, mock_storage, mock_prodtrack, override_deps + ): + mock_storage.get_playlist_metadata.return_value = None + + with mock.patch.dict(os.environ, ENABLE_FLAG): + response = client.post( + "/playlists/42/publish-transcript", + json={"version_id": 101}, + ) + + assert response.status_code == 422 + + def test_no_segments_is_422( + self, client, mock_storage, mock_prodtrack, override_deps + ): + mock_storage.get_playlist_metadata.return_value = _metadata() + mock_storage.get_segments_for_version.return_value = [] + + with mock.patch.dict(os.environ, ENABLE_FLAG): + response = client.post( + "/playlists/42/publish-transcript", + json={"version_id": 101}, + ) + + assert response.status_code == 422 + + def test_mock_provider_returns_501( + self, client, mock_storage, mock_prodtrack, override_deps + ): + """用 mock prodtrack 時 provider 會丟 NotImplementedError,我們回 501。""" + mock_storage.get_playlist_metadata.return_value = _metadata() + mock_storage.get_segments_for_version.return_value = [ + _segment("2026-04-15T10:00:00Z", "hi") + ] + mock_storage.get_published_transcript.return_value = None + mock_prodtrack.publish_transcript.side_effect = NotImplementedError( + "Transcript publishing requires a live ShotGrid connection." + ) + + with mock.patch.dict(os.environ, ENABLE_FLAG): + response = client.post( + "/playlists/42/publish-transcript", + json={"version_id": 101}, + ) + + assert response.status_code == 501 + assert "ShotGrid" in response.json()["detail"] + + def test_version_without_project_is_404( + self, client, mock_storage, mock_prodtrack, override_deps + ): + mock_storage.get_playlist_metadata.return_value = _metadata() + mock_storage.get_segments_for_version.return_value = [ + _segment("2026-04-15T10:00:00Z", "hi") + ] + mock_storage.get_published_transcript.return_value = None + # version 沒有 project 的情況(通常是資料壞了) + version = mock.Mock() + version.project = None + mock_prodtrack.get_entity.return_value = version + + with mock.patch.dict(os.environ, ENABLE_FLAG): + response = client.post( + "/playlists/42/publish-transcript", + json={"version_id": 101}, + ) + + assert response.status_code == 404 From abd142a1a9888d563db22835a9103b0adf57d982 Mon Sep 17 00:00:00 2001 From: thc1006 <84045975+thc1006@users.noreply.github.com> Date: Sat, 18 Apr 2026 23:52:32 +0800 Subject: [PATCH 08/19] feat(api): POST /playlists/{id}/publish-transcript, flag-gated Wires the builder, the storage bookkeeping and the prodtrack provider. Behaviour: - DNA_ENABLE_TRANSCRIPT_PUBLISH must be "true" or endpoint returns 404 - no metadata / no segments -> 422 with clear detail - body_hash unchanged -> skipped, no provider call - existing row + changes -> update, sg_entity_id reused - no existing row -> create - mock provider raises -> 501 surfaces the user-facing message - version missing project -> 404 Contributes to #120 (slice 4/7) Signed-off-by: thc1006 <84045975+thc1006@users.noreply.github.com> --- backend/src/main.py | 110 ++++++++++++++++++ .../tests/test_publish_transcript_endpoint.py | 19 ++- 2 files changed, 123 insertions(+), 6 deletions(-) diff --git a/backend/src/main.py b/backend/src/main.py index f972d24..b000e8e 100644 --- a/backend/src/main.py +++ b/backend/src/main.py @@ -42,8 +42,11 @@ PlaylistMetadata, PlaylistMetadataUpdate, Project, + PublishedTranscriptUpdate, PublishNotesRequest, PublishNotesResponse, + PublishTranscriptRequest, + PublishTranscriptResponse, SearchRequest, SearchResult, Shot, @@ -966,6 +969,113 @@ def _upload_attachments(sg_note_id: int, attachment_ids: list[str]) -> None: ) +def _transcript_publish_enabled() -> bool: + """讀 DNA_ENABLE_TRANSCRIPT_PUBLISH;沒設或不是 true 就當沒開。""" + return os.getenv("DNA_ENABLE_TRANSCRIPT_PUBLISH", "false").lower() == "true" + + +@app.post( + "/playlists/{playlist_id}/publish-transcript", + tags=["Playlists", "Transcription"], + summary="Publish a version's captured transcript", + description=( + "Push the stored transcript for a version to the production tracking " + "system as a single custom-entity row. Idempotent via body_hash." + ), + response_model=PublishTranscriptResponse, +) +async def publish_transcript( + playlist_id: int, + request: PublishTranscriptRequest, + storage: StorageProviderDep, + prodtrack: ProdtrackProviderDep, + current_user: CurrentUserDep, +) -> PublishTranscriptResponse: + """Publish one version's transcript; skip when body_hash has not changed.""" + if not _transcript_publish_enabled(): + raise HTTPException(status_code=404, detail="Not Found") + + from dna.transcription_publish import build_transcript_payload + + metadata = await storage.get_playlist_metadata(playlist_id) + if metadata is None or not metadata.meeting_id: + raise HTTPException( + status_code=422, + detail="Playlist has no meeting associated yet", + ) + + segments = await storage.get_segments_for_version(playlist_id, request.version_id) + if not segments: + raise HTTPException( + status_code=422, + detail="No transcript segments stored for this version", + ) + + payload = build_transcript_payload(segments) + + existing = await storage.get_published_transcript( + playlist_id, request.version_id, metadata.meeting_id + ) + if existing and existing.body_hash == payload.body_hash: + return PublishTranscriptResponse( + transcript_entity_id=existing.sg_entity_id, + outcome="skipped", + skipped_reason="no_changes_since_last_publish", + segments_count=payload.segments_count, + ) + + version = prodtrack.get_entity("version", request.version_id, resolve_links=False) + if version is None or getattr(version, "project", None) is None: + raise HTTPException( + status_code=404, + detail="Version or its project could not be resolved", + ) + project_id = version.project.id + + try: + if existing: + prodtrack.update_transcript( + entity_id=existing.sg_entity_id, + body=payload.body, + meeting_date=payload.meeting_date, + ) + sg_entity_id = existing.sg_entity_id + outcome = "updated" + else: + sg_entity_id = prodtrack.publish_transcript( + project_id=project_id, + playlist_id=playlist_id, + version_id=request.version_id, + meeting_id=metadata.meeting_id, + meeting_date=payload.meeting_date, + platform=metadata.platform or "", + body=payload.body, + ) + outcome = "created" + except NotImplementedError as e: + raise HTTPException(status_code=501, detail=str(e)) + + entity_type = os.getenv("SHOTGRID_TRANSCRIPT_ENTITY", "CustomEntity01") + await storage.upsert_published_transcript( + PublishedTranscriptUpdate( + playlist_id=playlist_id, + version_id=request.version_id, + meeting_id=metadata.meeting_id, + sg_entity_type=entity_type, + sg_entity_id=sg_entity_id, + author_email=current_user, + body_hash=payload.body_hash, + segments_count=payload.segments_count, + ) + ) + + return PublishTranscriptResponse( + transcript_entity_id=sg_entity_id, + outcome=outcome, + segments_count=payload.segments_count, + ) + + # ----------------------------------------------------------------------------- # Draft Notes endpoints # ----------------------------------------------------------------------------- diff --git a/backend/tests/test_publish_transcript_endpoint.py b/backend/tests/test_publish_transcript_endpoint.py index 2d77dbe..be11248 100644 --- a/backend/tests/test_publish_transcript_endpoint.py +++ b/backend/tests/test_publish_transcript_endpoint.py @@ -6,12 +6,11 @@ import pytest from fastapi.testclient import TestClient -from main import app, get_prodtrack_provider_cached, get_storage_provider_cached from dna.models.playlist_metadata import PlaylistMetadata from dna.models.published_transcript import PublishedTranscript from dna.models.stored_segment import StoredSegment - +from main import app, get_prodtrack_provider_cached, get_storage_provider_cached ENABLE_FLAG = {"DNA_ENABLE_TRANSCRIPT_PUBLISH": "true"} @@ -34,7 +33,9 @@ def _segment(start: str, text: str, speaker: str = "A") -> StoredSegment: ) -def _metadata(meeting_id: str = "m-abc", platform: str = "google_meet") -> PlaylistMetadata: +def _metadata( + meeting_id: str = "m-abc", platform: str = "google_meet" +) -> PlaylistMetadata: return PlaylistMetadata( _id="meta-id", playlist_id=42, @@ -86,7 +87,9 @@ def override_deps(self, mock_storage, mock_prodtrack): yield app.dependency_overrides.clear() - def test_flag_off_returns_404(self, client, mock_storage, mock_prodtrack, override_deps): + def test_flag_off_returns_404( + self, client, mock_storage, mock_prodtrack, override_deps + ): """沒開 feature flag 時必須 404。這個 endpoint 不該露出來。""" # 完全不帶 DNA_ENABLE_TRANSCRIPT_PUBLISH with mock.patch.dict(os.environ, {}, clear=False): @@ -98,7 +101,9 @@ def test_flag_off_returns_404(self, client, mock_storage, mock_prodtrack, overri assert response.status_code == 404 - def test_happy_create_path(self, client, mock_storage, mock_prodtrack, override_deps): + def test_happy_create_path( + self, client, mock_storage, mock_prodtrack, override_deps + ): """第一次推上去要 create,並且把 bookkeeping 寫回 storage。""" mock_storage.get_playlist_metadata.return_value = _metadata() mock_storage.get_segments_for_version.return_value = [ @@ -142,7 +147,9 @@ def test_republish_same_body_skips( mock_storage.get_playlist_metadata.return_value = _metadata() mock_storage.get_segments_for_version.return_value = [seg] - mock_storage.get_published_transcript.return_value = _published(payload.body_hash) + mock_storage.get_published_transcript.return_value = _published( + payload.body_hash + ) with mock.patch.dict(os.environ, ENABLE_FLAG): response = client.post( From 04a7f27c9aae808e69b11a7da6d93f86fd427d81 Mon Sep 17 00:00:00 2001 From: thc1006 <84045975+thc1006@users.noreply.github.com> Date: Sat, 18 Apr 2026 23:54:30 +0800 Subject: [PATCH 09/19] test(core): failing case for publishTranscript on ApiHandler One red case asserting the method posts to /playlists/{id}/publish-transcript with the typed body and returns the response envelope. Contributes to #120 (slice 5/7) Signed-off-by: thc1006 <84045975+thc1006@users.noreply.github.com> --- frontend/packages/core/src/apiHandler.test.ts | 27 +++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/frontend/packages/core/src/apiHandler.test.ts b/frontend/packages/core/src/apiHandler.test.ts index 39b1efb..8b7e973 100644 --- a/frontend/packages/core/src/apiHandler.test.ts +++ b/frontend/packages/core/src/apiHandler.test.ts @@ -675,4 +675,31 @@ describe('ApiHandler', () => { ).rejects.toThrow('Server error'); }); }); + + describe('publishTranscript', () => { + it('posts to the publish-transcript endpoint and returns the response', async () => { + const api = createApiHandler({ baseURL: 'http://localhost:8000' }); + mockAxiosInstance.post.mockResolvedValue({ + data: { + transcript_entity_id: 9001, + outcome: 'created', + skipped_reason: null, + segments_count: 12, + }, + }); + + const result = await api.publishTranscript({ + playlistId: 42, + request: { version_id: 101 }, + }); + + expect(mockAxiosInstance.post).toHaveBeenCalledWith( + '/playlists/42/publish-transcript', + { version_id: 101 } + ); + expect(result.outcome).toBe('created'); + expect(result.transcript_entity_id).toBe(9001); + expect(result.segments_count).toBe(12); + }); + }); }); From 6500a413440f87951d62249b2b76885464ff1be7 Mon Sep 17 00:00:00 2001 From: thc1006 <84045975+thc1006@users.noreply.github.com> Date: Sun, 19 Apr 2026 00:00:13 +0800 Subject: [PATCH 10/19] feat(core): PublishTranscript types + ApiHandler.publishTranscript Additive only. Existing exports untouched. Also tightens one adjacent mock cast in apiHandler.test.ts so the core typecheck passes (Boy Scout; the broken cast was in the same fixture block we just extended). Contributes to #120 (slice 5/7) Signed-off-by: thc1006 <84045975+thc1006@users.noreply.github.com> --- frontend/packages/core/src/apiHandler.test.ts | 5 ++-- frontend/packages/core/src/apiHandler.ts | 27 ++++++++++++++----- frontend/packages/core/src/interfaces.ts | 16 +++++++++++ 3 files changed, 40 insertions(+), 8 deletions(-) diff --git a/frontend/packages/core/src/apiHandler.test.ts b/frontend/packages/core/src/apiHandler.test.ts index 8b7e973..7d783de 100644 --- a/frontend/packages/core/src/apiHandler.test.ts +++ b/frontend/packages/core/src/apiHandler.test.ts @@ -37,7 +37,7 @@ describe('ApiHandler', () => { }, }; - mockedAxios.create.mockReturnValue( + vi.mocked(mockedAxios.create).mockReturnValue( mockAxiosInstance as unknown as ReturnType ); }); @@ -695,7 +695,8 @@ describe('ApiHandler', () => { expect(mockAxiosInstance.post).toHaveBeenCalledWith( '/playlists/42/publish-transcript', - { version_id: 101 } + { version_id: 101 }, + undefined ); expect(result.outcome).toBe('created'); expect(result.transcript_entity_id).toBe(9001); diff --git a/frontend/packages/core/src/apiHandler.ts b/frontend/packages/core/src/apiHandler.ts index 5e26d59..e9ce205 100644 --- a/frontend/packages/core/src/apiHandler.ts +++ b/frontend/packages/core/src/apiHandler.ts @@ -24,6 +24,8 @@ import { GetVersionStatusesParams, PublishNotesParams, PublishNotesResponse, + PublishTranscriptParams, + PublishTranscriptResponse, DraftNote, Playlist, PlaylistMetadata, @@ -286,20 +288,33 @@ class ApiHandler { return this.get(`/playlists/${playlistId}/draft-notes`); } - async publishNotes(params: PublishNotesParams): Promise { + async publishNotes( + params: PublishNotesParams + ): Promise { return this.post( `/playlists/${params.playlistId}/publish-notes`, params.request ); } - async uploadAttachment(file: File): Promise<{ id: string; filename: string }> { + async publishTranscript( + params: PublishTranscriptParams + ): Promise { + return this.post( + `/playlists/${params.playlistId}/publish-transcript`, + params.request + ); + } + + async uploadAttachment( + file: File + ): Promise<{ id: string; filename: string }> { const formData = new FormData(); formData.append('file', file); - const response = await this.axiosInstance.postForm<{ id: string; filename: string }>( - '/api/attachments', - formData - ); + const response = await this.axiosInstance.postForm<{ + id: string; + filename: string; + }>('/api/attachments', formData); return response.data; } diff --git a/frontend/packages/core/src/interfaces.ts b/frontend/packages/core/src/interfaces.ts index a50ef99..c620c1c 100644 --- a/frontend/packages/core/src/interfaces.ts +++ b/frontend/packages/core/src/interfaces.ts @@ -447,3 +447,19 @@ export interface PublishNotesParams { playlistId: number; request: PublishNotesRequest; } + +export interface PublishTranscriptRequest { + version_id: number; +} + +export interface PublishTranscriptResponse { + transcript_entity_id: number; + outcome: 'created' | 'updated' | 'skipped'; + skipped_reason?: string | null; + segments_count: number; +} + +export interface PublishTranscriptParams { + playlistId: number; + request: PublishTranscriptRequest; +} From 6cba7c214b6519fd519f071353718019309b4048 Mon Sep 17 00:00:00 2001 From: thc1006 <84045975+thc1006@users.noreply.github.com> Date: Sun, 19 Apr 2026 00:01:29 +0800 Subject: [PATCH 11/19] test(hooks): failing cases for usePublishTranscript Two red tests: success path (posts + resolves) and error path (mutateAsync rejects with the axios error). Hook lands next. Contributes to #120 (slice 6/7) Signed-off-by: thc1006 <84045975+thc1006@users.noreply.github.com> --- .../src/hooks/usePublishTranscript.test.tsx | 82 +++++++++++++++++++ 1 file changed, 82 insertions(+) create mode 100644 frontend/packages/app/src/hooks/usePublishTranscript.test.tsx diff --git a/frontend/packages/app/src/hooks/usePublishTranscript.test.tsx b/frontend/packages/app/src/hooks/usePublishTranscript.test.tsx new file mode 100644 index 0000000..e4ab85c --- /dev/null +++ b/frontend/packages/app/src/hooks/usePublishTranscript.test.tsx @@ -0,0 +1,82 @@ +import { describe, it, expect, vi, beforeEach } from 'vitest'; +import { renderHook, waitFor, act } from '@testing-library/react'; +import { QueryClient, QueryClientProvider } from '@tanstack/react-query'; +import { type ReactNode } from 'react'; +import { usePublishTranscript } from './usePublishTranscript'; +import { apiHandler } from '../api'; + +vi.mock('../api', () => ({ + apiHandler: { + publishTranscript: vi.fn(), + }, +})); + +const mockedApiHandler = vi.mocked(apiHandler); + +function createWrapper() { + const queryClient = new QueryClient({ + defaultOptions: { + queries: { retry: false, gcTime: 0 }, + mutations: { retry: false }, + }, + }); + return function Wrapper({ children }: { children: ReactNode }) { + return ( + {children} + ); + }; +} + +describe('usePublishTranscript', () => { + beforeEach(() => { + vi.clearAllMocks(); + }); + + it('calls apiHandler.publishTranscript and resolves with the response', async () => { + mockedApiHandler.publishTranscript.mockResolvedValue({ + transcript_entity_id: 9001, + outcome: 'created', + segments_count: 5, + }); + + const { result } = renderHook(() => usePublishTranscript(), { + wrapper: createWrapper(), + }); + + await act(async () => { + await result.current.mutateAsync({ + playlistId: 42, + request: { version_id: 101 }, + }); + }); + + await waitFor(() => expect(result.current.isSuccess).toBe(true)); + expect(mockedApiHandler.publishTranscript).toHaveBeenCalledWith({ + playlistId: 42, + request: { version_id: 101 }, + }); + expect(result.current.data?.outcome).toBe('created'); + }); + + it('surfaces errors back to the caller', async () => { + mockedApiHandler.publishTranscript.mockRejectedValue(new Error('boom')); + + const { result } = renderHook(() => usePublishTranscript(), { + wrapper: createWrapper(), + }); + + await act(async () => { + try { + await result.current.mutateAsync({ + playlistId: 42, + request: { version_id: 101 }, + }); + } catch { + // 預期會炸 + } + }); + + await waitFor(() => expect(result.current.isError).toBe(true)); + expect(result.current.error?.message).toBe('boom'); + }); +}); From 737c5ec3fbe8b08246102f134454b0f6604b377b Mon Sep 17 00:00:00 2001 From: thc1006 <84045975+thc1006@users.noreply.github.com> Date: Sun, 19 Apr 2026 00:02:29 +0800 Subject: [PATCH 12/19] test(ui): failing cases for PublishTranscriptDialog Five red cases: dialog hidden when closed, summary shown when open, publish disabled when segments=0, happy created path, skipped callout path, and server-error callout. Implementation and the hook for the dialog also land here (the hook is small enough that separating the commit adds noise). Contributes to #120 (slice 6/7) Signed-off-by: thc1006 <84045975+thc1006@users.noreply.github.com> --- .../PublishTranscriptDialog.test.tsx | 143 ++++++++++++++++++ .../app/src/hooks/usePublishTranscript.ts | 21 +++ 2 files changed, 164 insertions(+) create mode 100644 frontend/packages/app/src/components/PublishTranscriptDialog.test.tsx create mode 100644 frontend/packages/app/src/hooks/usePublishTranscript.ts diff --git a/frontend/packages/app/src/components/PublishTranscriptDialog.test.tsx b/frontend/packages/app/src/components/PublishTranscriptDialog.test.tsx new file mode 100644 index 0000000..2f0bf47 --- /dev/null +++ b/frontend/packages/app/src/components/PublishTranscriptDialog.test.tsx @@ -0,0 +1,143 @@ +import { describe, it, expect, vi, beforeEach } from 'vitest'; +import { render, screen, waitFor } from '../test/render'; +import userEvent from '@testing-library/user-event'; +import { PublishTranscriptDialog } from './PublishTranscriptDialog'; +import { apiHandler } from '../api'; + +vi.mock('../api', () => ({ + apiHandler: { + publishTranscript: vi.fn(), + }, +})); + +const mockedApiHandler = vi.mocked(apiHandler); + +describe('PublishTranscriptDialog', () => { + beforeEach(() => { + vi.clearAllMocks(); + }); + + it('does not render the body when closed', () => { + render( + + ); + + expect(screen.queryByText(/Publish transcript/i)).not.toBeInTheDocument(); + }); + + it('shows the summary counts when open', () => { + render( + + ); + + expect(screen.getByText(/Publish transcript/i)).toBeInTheDocument(); + expect(screen.getByText(/12/)).toBeInTheDocument(); + }); + + it('disables the publish button when there are no segments', () => { + render( + + ); + + const button = screen.getByRole('button', { name: /^Publish$/i }); + expect(button).toBeDisabled(); + }); + + it('calls publishTranscript and shows the created outcome', async () => { + mockedApiHandler.publishTranscript.mockResolvedValue({ + transcript_entity_id: 9001, + outcome: 'created', + segments_count: 12, + }); + + render( + + ); + + const user = userEvent.setup(); + await user.click(screen.getByRole('button', { name: /^Publish$/i })); + + await waitFor(() => + expect(mockedApiHandler.publishTranscript).toHaveBeenCalledWith({ + playlistId: 42, + request: { version_id: 101 }, + }) + ); + await waitFor(() => + expect(screen.getByText(/Published/i)).toBeInTheDocument() + ); + }); + + it('renders the skipped callout when backend returns skipped', async () => { + mockedApiHandler.publishTranscript.mockResolvedValue({ + transcript_entity_id: 9001, + outcome: 'skipped', + skipped_reason: 'no_changes_since_last_publish', + segments_count: 12, + }); + + render( + + ); + + const user = userEvent.setup(); + await user.click(screen.getByRole('button', { name: /^Publish$/i })); + + await waitFor(() => + expect(screen.getByText(/No changes/i)).toBeInTheDocument() + ); + }); + + it('surfaces server errors in a red callout', async () => { + mockedApiHandler.publishTranscript.mockRejectedValue( + new Error('Server error') + ); + + render( + + ); + + const user = userEvent.setup(); + await user.click(screen.getByRole('button', { name: /^Publish$/i })); + + await waitFor(() => + expect(screen.getByText(/Server error/i)).toBeInTheDocument() + ); + }); +}); diff --git a/frontend/packages/app/src/hooks/usePublishTranscript.ts b/frontend/packages/app/src/hooks/usePublishTranscript.ts new file mode 100644 index 0000000..19f92b6 --- /dev/null +++ b/frontend/packages/app/src/hooks/usePublishTranscript.ts @@ -0,0 +1,21 @@ +import { useMutation, useQueryClient } from '@tanstack/react-query'; +import { apiHandler } from '../api'; +import type { + PublishTranscriptParams, + PublishTranscriptResponse, +} from '@dna/core'; + +export const usePublishTranscript = () => { + const queryClient = useQueryClient(); + + return useMutation({ + mutationFn: (params) => apiHandler.publishTranscript(params), + onSuccess: (_, variables) => { + // 之後若有 "published transcripts" 列表的 query,這裡可以加對應 key。 + // 目前 V1 沒有列表 UI,只需要單純讓外面知道 mutate 成功。 + queryClient.invalidateQueries({ + queryKey: ['publishedTranscripts', variables.playlistId, variables.request.version_id], + }); + }, + }); +}; From 39d8247feeca8a309b4185e21d713b14ffd15454 Mon Sep 17 00:00:00 2001 From: thc1006 <84045975+thc1006@users.noreply.github.com> Date: Sun, 19 Apr 2026 00:05:44 +0800 Subject: [PATCH 13/19] feat(app): PublishTranscriptDialog + trigger in TranscriptPanel Adds the hook, the Radix-Themes dialog, and a soft button at the top of the transcript panel. Visibility is gated by VITE_ENABLE_TRANSCRIPT_PUBLISH so the button only appears on builds that opted in. Contributes to #120 (slice 6/7) Signed-off-by: thc1006 <84045975+thc1006@users.noreply.github.com> --- .../PublishTranscriptDialog.test.tsx | 2 + .../components/PublishTranscriptDialog.tsx | 142 ++++++++++++++++++ .../app/src/components/TranscriptPanel.tsx | 43 +++++- .../app/src/hooks/usePublishTranscript.ts | 26 ++-- 4 files changed, 201 insertions(+), 12 deletions(-) create mode 100644 frontend/packages/app/src/components/PublishTranscriptDialog.tsx diff --git a/frontend/packages/app/src/components/PublishTranscriptDialog.test.tsx b/frontend/packages/app/src/components/PublishTranscriptDialog.test.tsx index 2f0bf47..41d34b5 100644 --- a/frontend/packages/app/src/components/PublishTranscriptDialog.test.tsx +++ b/frontend/packages/app/src/components/PublishTranscriptDialog.test.tsx @@ -7,6 +7,8 @@ import { apiHandler } from '../api'; vi.mock('../api', () => ({ apiHandler: { publishTranscript: vi.fn(), + setUser: vi.fn(), + getUser: vi.fn().mockReturnValue(null), }, })); diff --git a/frontend/packages/app/src/components/PublishTranscriptDialog.tsx b/frontend/packages/app/src/components/PublishTranscriptDialog.tsx new file mode 100644 index 0000000..975d4eb --- /dev/null +++ b/frontend/packages/app/src/components/PublishTranscriptDialog.tsx @@ -0,0 +1,142 @@ +import React from 'react'; +import styled from 'styled-components'; +import { Dialog, Button, Flex, Text, Callout } from '@radix-ui/themes'; +import { Info, Loader2 } from 'lucide-react'; +import { usePublishTranscript } from '../hooks/usePublishTranscript'; + +interface PublishTranscriptDialogProps { + open: boolean; + onClose: () => void; + playlistId: number; + versionId: number; + segmentsCount: number; +} + +const SummaryBox = styled.div` + display: flex; + flex-direction: column; + gap: 8px; + padding: 14px; + background: ${({ theme }) => theme.colors.bg.surfaceHover}; + border-radius: ${({ theme }) => theme.radii.md}; + margin-top: 12px; +`; + +const StatRow = styled.div` + display: flex; + align-items: center; + justify-content: space-between; + font-size: 13px; + color: ${({ theme }) => theme.colors.text.secondary}; +`; + +const SpinnerIcon = styled(Loader2)` + animation: spin 1s linear infinite; + @keyframes spin { + from { + transform: rotate(0deg); + } + to { + transform: rotate(360deg); + } + } +`; + +function outcomeMessage( + outcome: string, + skippedReason?: string | null +): string { + if (outcome === 'created') return 'Published to Flow Production Tracking.'; + if (outcome === 'updated') return 'Existing row updated with new content.'; + if (outcome === 'skipped') { + if (skippedReason === 'no_changes_since_last_publish') { + return 'No changes since the last publish.'; + } + return 'Skipped.'; + } + return outcome; +} + +export const PublishTranscriptDialog: React.FC< + PublishTranscriptDialogProps +> = ({ open, onClose, playlistId, versionId, segmentsCount }) => { + const { mutate, isPending, isError, error, data, reset } = + usePublishTranscript(); + + React.useEffect(() => { + if (open) reset(); + }, [open, reset]); + + const handlePublish = () => { + mutate({ playlistId, request: { version_id: versionId } }); + }; + + const canPublish = !isPending && segmentsCount > 0; + + return ( + !isOpen && !isPending && onClose()} + > + + Publish transcript + + + + Push the captured transcript for this version to the production + tracking system as a custom-entity row. + + + + + Version + {versionId} + + + Segments + {segmentsCount} + + + + {data && ( + + + + + + {data.outcome === 'created' && 'Published. '} + {data.outcome === 'updated' && 'Updated. '} + {outcomeMessage(data.outcome, data.skipped_reason)} + + + )} + + {isError && ( + + + + + + {error?.message || 'Failed to publish transcript'} + + + )} + + + + + + + + + + + ); +}; diff --git a/frontend/packages/app/src/components/TranscriptPanel.tsx b/frontend/packages/app/src/components/TranscriptPanel.tsx index 5999336..28b019d 100644 --- a/frontend/packages/app/src/components/TranscriptPanel.tsx +++ b/frontend/packages/app/src/components/TranscriptPanel.tsx @@ -1,8 +1,10 @@ -import { useEffect, useRef } from 'react'; +import { useEffect, useRef, useState } from 'react'; import styled from 'styled-components'; -import { Loader2, MessageSquare, AlertCircle } from 'lucide-react'; +import { Button } from '@radix-ui/themes'; +import { Loader2, MessageSquare, AlertCircle, Upload } from 'lucide-react'; import { useSegments } from '../hooks'; import { useConnectionStatus } from '../hooks/useDNAEvents'; +import { PublishTranscriptDialog } from './PublishTranscriptDialog'; interface TranscriptPanelProps { playlistId: number | null; @@ -84,6 +86,19 @@ const StatusBar = styled.div<{ $isConnected: boolean }>` background: ${({ theme }) => theme.colors.bg.surface}; `; +const PublishBar = styled.div` + display: flex; + justify-content: flex-end; + padding: 6px 12px; + border-bottom: 1px solid ${({ theme }) => theme.colors.border.subtle}; +`; + +function publishEnabled(): boolean { + // 部署時用 VITE_ENABLE_TRANSCRIPT_PUBLISH=true 打開,才會出現 Publish 按鈕 + const flag = import.meta.env.VITE_ENABLE_TRANSCRIPT_PUBLISH; + return flag === 'true' || flag === true; +} + const StatusDot = styled.div<{ $isConnected: boolean }>` width: 6px; height: 6px; @@ -106,11 +121,13 @@ export function TranscriptPanel({ versionId, }: TranscriptPanelProps) { const scrollRef = useRef(null); + const [publishOpen, setPublishOpen] = useState(false); const { isConnected } = useConnectionStatus(); const { segments, isLoading, isError, error } = useSegments({ playlistId, versionId, }); + const showPublish = publishEnabled() && !!playlistId && !!versionId; useEffect(() => { if (scrollRef.current && segments.length > 0) { @@ -166,6 +183,19 @@ export function TranscriptPanel({ {isConnected ? 'Live' : 'Reconnecting...'} • {segments.length} segments + {showPublish && ( + + + + )} {segments.map((segment) => ( @@ -177,6 +207,15 @@ export function TranscriptPanel({ ))} + {showPublish && playlistId !== null && versionId !== null && ( + setPublishOpen(false)} + playlistId={playlistId} + versionId={versionId} + segmentsCount={segments.length} + /> + )} ); } diff --git a/frontend/packages/app/src/hooks/usePublishTranscript.ts b/frontend/packages/app/src/hooks/usePublishTranscript.ts index 19f92b6..d52c8ef 100644 --- a/frontend/packages/app/src/hooks/usePublishTranscript.ts +++ b/frontend/packages/app/src/hooks/usePublishTranscript.ts @@ -8,14 +8,20 @@ import type { export const usePublishTranscript = () => { const queryClient = useQueryClient(); - return useMutation({ - mutationFn: (params) => apiHandler.publishTranscript(params), - onSuccess: (_, variables) => { - // 之後若有 "published transcripts" 列表的 query,這裡可以加對應 key。 - // 目前 V1 沒有列表 UI,只需要單純讓外面知道 mutate 成功。 - queryClient.invalidateQueries({ - queryKey: ['publishedTranscripts', variables.playlistId, variables.request.version_id], - }); - }, - }); + return useMutation( + { + mutationFn: (params) => apiHandler.publishTranscript(params), + onSuccess: (_, variables) => { + // 之後若有 "published transcripts" 列表的 query,這裡可以加對應 key。 + // 目前 V1 沒有列表 UI,只需要單純讓外面知道 mutate 成功。 + queryClient.invalidateQueries({ + queryKey: [ + 'publishedTranscripts', + variables.playlistId, + variables.request.version_id, + ], + }); + }, + } + ); }; From 44a3ba37917a646300dcf709328195dd063f98cc Mon Sep 17 00:00:00 2001 From: thc1006 <84045975+thc1006@users.noreply.github.com> Date: Sun, 19 Apr 2026 00:07:10 +0800 Subject: [PATCH 14/19] docs(transcript): QUICKSTART, DEPLOYMENT, pipeline + ADR-005/006/007 - QUICKSTART: two new env-var rows for the feature flag and the SG custom-entity slot. - DEPLOYMENT: new "Transcript Publishing Setup" section with the SG site-side checklist (enable custom entity, fields, perms). - TRANSCRIPTION_PIPELINE: new section describing the publish data flow, the three Mongo collections touched, the SG field mapping, and three ADRs (custom entity choice, Mongo bookkeeping, publish-time builder). - example.docker-compose.local.yml: the two new vars default off. Contributes to #120 (slice 7/7) Signed-off-by: thc1006 <84045975+thc1006@users.noreply.github.com> --- DEPLOYMENT.md | 46 +++++++++++++ QUICKSTART.md | 2 + backend/docs/TRANSCRIPTION_PIPELINE.md | 83 ++++++++++++++++++++++++ backend/example.docker-compose.local.yml | 5 ++ 4 files changed, 136 insertions(+) diff --git a/DEPLOYMENT.md b/DEPLOYMENT.md index 9b653a2..6dc30d2 100644 --- a/DEPLOYMENT.md +++ b/DEPLOYMENT.md @@ -230,6 +230,52 @@ echo -n "new-value" | gcloud secrets versions add SECRET_NAME --data-file=- --- +## Transcript Publishing Setup (optional, issue #120) + +`POST /playlists/{id}/publish-transcript` is feature-flagged off by default. +Turn it on only after the ShotGrid site is prepared. + +### ShotGrid site-side checklist + +1. In **Site Preferences -> Entities**, enable one of the `CustomEntityNN` + slots and set its display name (e.g. "DNA Note"). Note the slot + number — the API still addresses it as `CustomEntityNN`, not the + display name. +2. On that custom entity, add the following fields: + - `code` (text, built-in) + - `project` (entity link -> Project, built-in) + - `sg_playlist` (entity link -> Playlist) + - `sg_versions` (multi-entity link -> Version) + - `sg_meeting_id` (text) + - `sg_meeting_date` (date) + - `sg_platform` (list: `google_meet`, `teams`) + - `sg_summary` (text, long; left blank by V1, users fill in manually) + - `sg_transcript_body` (text, long) +3. Grant the DNA script user read/create/update on the new entity. + +### DNA side + +Set both variables. The endpoint stays 404 without the flag. + +``` +DNA_ENABLE_TRANSCRIPT_PUBLISH=true +SHOTGRID_TRANSCRIPT_ENTITY=CustomEntity05 # whichever slot you enabled +``` + +For the frontend build, also set the Vite flag so the Publish button +renders: + +``` +VITE_ENABLE_TRANSCRIPT_PUBLISH=true +``` + +If the flag is off or the custom entity has not been provisioned, the +backend returns 404 on that route; the frontend does not show the +Publish button. Dropping the flag reverts behaviour with no data +migration. + +--- + ## Authentication Setup DNA uses Google OAuth for authentication. Users sign in with their Google accounts, and the backend validates Google tokens. diff --git a/QUICKSTART.md b/QUICKSTART.md index 705e9d9..f87c25b 100644 --- a/QUICKSTART.md +++ b/QUICKSTART.md @@ -152,6 +152,8 @@ The React app will be available at `http://localhost:5173`. | `GEMINI_MODEL` | No | `gemini-2.5-flash` | Gemini model to use when `LLM_PROVIDER=gemini` | | `GEMINI_TIMEOUT` | No | `30.0` | Request timeout in seconds when `LLM_PROVIDER=gemini` | | `GEMINI_URL` | No | `https://generativelanguage.googleapis.com/v1beta/openai/` | Override the Gemini OpenAI-compatible base URL | +| `DNA_ENABLE_TRANSCRIPT_PUBLISH` | No | `false` | Set to `true` to enable `POST /playlists/{id}/publish-transcript`. When off, the endpoint returns 404. | +| `SHOTGRID_TRANSCRIPT_ENTITY` | No | `CustomEntity01` | ShotGrid custom entity slot used when publishing transcripts. Match whichever `CustomEntityNN` the site admin has enabled. | | `PYTHONUNBUFFERED` | No | `1` | Disable Python output buffering | ### Vexa Service (`vexa` service) diff --git a/backend/docs/TRANSCRIPTION_PIPELINE.md b/backend/docs/TRANSCRIPTION_PIPELINE.md index 72b3d0b..0368655 100644 --- a/backend/docs/TRANSCRIPTION_PIPELINE.md +++ b/backend/docs/TRANSCRIPTION_PIPELINE.md @@ -1340,3 +1340,86 @@ logging.getLogger("dna.events.event_publisher").setLevel(logging.DEBUG) - The bot remains in the meeting during pause, ready to resume instantly - `transcription_resumed_at` prevents replay of stale segments - Minimal state changes: only a boolean flag and an optional timestamp + +--- + +## Publishing to the Production Tracking System + +Tracked by issue #120. Off by default behind `DNA_ENABLE_TRANSCRIPT_PUBLISH=true`. + +### Pipeline + +``` +POST /playlists/{playlist_id}/publish-transcript {version_id} + -> storage.get_playlist_metadata(playlist_id) # meeting_id, platform + -> storage.get_segments_for_version(...) # existing call + -> build_transcript_payload(segments) # pure, dedupe + collapse + -> storage.get_published_transcript(...) # bookkeeping lookup + -> prodtrack.publish_transcript(...) / update_transcript(entity_id, ...) + -> storage.upsert_published_transcript(...) + -> { transcript_entity_id, outcome: created | updated | skipped } +``` + +### Collections touched + +| Collection | Used for | +|------------|----------| +| `segments` | Source of the transcript body (read-only here) | +| `playlist_metadata` | Pulls `meeting_id` + `platform` | +| `published_transcripts` | Stores the SG entity ID and body_hash per `(playlist_id, version_id, meeting_id)` | + +### ShotGrid side + +Publishes a row into `SHOTGRID_TRANSCRIPT_ENTITY` (default `CustomEntity01`). +Payload mapping: + +| DNA field | ShotGrid field | +|-----------|----------------| +| `code` (auto) | `code` | +| `project_id` | `project` | +| `playlist_id` | `sg_playlist` | +| `[version_id]` | `sg_versions` | +| `meeting_id` | `sg_meeting_id` | +| `meeting_date` | `sg_meeting_date` | +| `platform` | `sg_platform` | +| `body` | `sg_transcript_body` | + +`sg_summary` is intentionally left blank in V1 so studio staff can fill it +on the ShotGrid side without the publisher overwriting it. + +### ADR-005: Custom entity, not a ShotGrid Note + +**Decision:** Transcripts live in a custom entity (configurable via +`SHOTGRID_TRANSCRIPT_ENTITY`), not as ShotGrid `Note` rows. + +**Rationale:** +- Notes are tied to review addressings and read state; transcripts are + reference material with different fields. +- Admins can restrict the custom-entity page per the mockup on #120 + without affecting Notes. +- The field shape (playlist link + multi-version link + `sg_platform` + list + long `sg_transcript_body`) does not fit Note's schema. + +### ADR-006: Idempotence via body_hash in MongoDB, not SG lookup + +**Decision:** Track which `(playlist, version, meeting)` tuples have +been published in a local Mongo collection. Skip re-publish when the +new body_hash matches the stored one. + +**Rationale:** +- SG is not efficiently queryable for "has this been published before". +- The existing DraftNote publish path uses the same pattern + (`published_note_id` on the draft). +- Loss of the Mongo row is a known edge-case; duplicate SG rows in that + scenario are an acceptable V1 trade-off documented on issue #120. + +### ADR-007: Build publishable body at publish time, not ingest time + +**Decision:** `build_transcript_payload` is called inside the publish +endpoint, not in the ingest pipeline. + +**Rationale:** +- Dedup rules may change once issue #135 lands (Vexa-side segment IDs + become authoritative). Keeping the builder isolated means that change + is one file here rather than a re-ingest. +- The builder is pure and trivially testable, unlike the ingest loop. diff --git a/backend/example.docker-compose.local.yml b/backend/example.docker-compose.local.yml index b0095bb..2c89e4b 100644 --- a/backend/example.docker-compose.local.yml +++ b/backend/example.docker-compose.local.yml @@ -14,3 +14,8 @@ services: - VEXA_API_URL=http://vexa:8056 - OPENAI_API_KEY=your-openai-api-key - AUTH_PROVIDER=none + # Transcript publishing (V1, disabled by default). Set to "true" to + # expose POST /playlists/{id}/publish-transcript. See DEPLOYMENT.md + # for the ShotGrid site-setup checklist the custom entity depends on. + - DNA_ENABLE_TRANSCRIPT_PUBLISH=false + - SHOTGRID_TRANSCRIPT_ENTITY=CustomEntity01 From 3407ee2c421180bff8ba8157b6e588df4b0b09e1 Mon Sep 17 00:00:00 2001 From: thc1006 <84045975+thc1006@users.noreply.github.com> Date: Sun, 19 Apr 2026 00:20:26 +0800 Subject: [PATCH 15/19] fix(transcript-publish): review-pass corrections Self-review after integration surfaced five defects that all failed against tightened tests: 1. main.py:1033 used version.project.id, but Version.project is dict[str, Any] in the DNA model. Now reads project.get("id"). The prior test mocked version.project as an object which hid this bug; the mock is now a dict matching reality. 2. main.py called prodtrack.get_entity() without catching ValueError. Both ShotgridProvider and MockProdtrackProvider raise ValueError on missing entities, so a stale version_id produced 500 instead of the intended 404. Now wrapped in try/except. 3. main.py ignored the boolean returned by update_transcript. A silent SG-side failure advanced body_hash in Mongo, which made the next publish return "skipped" while SG stayed stale. Now surfaces 502 and skips the bookkeeping upsert. 4. transcription_publish._first_segment_date mishandled naive ISO timestamps: .astimezone(UTC) on a naive datetime treats it as local time, so non-UTC hosts could shift meeting_date by a day. Now attaches UTC when tzinfo is missing. Verified red under TZ=America/New_York. 5. mongodb.upsert_published_transcript placed the composite key in $set. Functionally harmless but inconsistent with upsert_draft_note. Moved playlist_id/version_id/meeting_id to $setOnInsert. Three new tests (missing-version, update-failure, naive-tz) guard the fixed behaviour so these do not regress. 547 passed, coverage 91%. Signed-off-by: thc1006 <84045975+thc1006@users.noreply.github.com> --- backend/src/dna/storage_providers/mongodb.py | 15 +++++-- backend/src/dna/transcription_publish.py | 6 ++- backend/src/main.py | 25 ++++++++--- .../tests/test_publish_transcript_endpoint.py | 45 ++++++++++++++++++- backend/tests/test_storage_providers.py | 8 ++++ backend/tests/test_transcription_publish.py | 16 +++++++ 6 files changed, 104 insertions(+), 11 deletions(-) diff --git a/backend/src/dna/storage_providers/mongodb.py b/backend/src/dna/storage_providers/mongodb.py index 32be509..b982baa 100644 --- a/backend/src/dna/storage_providers/mongodb.py +++ b/backend/src/dna/storage_providers/mongodb.py @@ -347,11 +347,18 @@ async def upsert_published_transcript( "version_id": data.version_id, "meeting_id": data.meeting_id, } - # model_dump 已經把 query 那三欄一起帶進來,$set 時一併寫入沒關係; - # 真正要分開的只有 created_at(只有新增時才需要) + # composite key 只在 insert 時寫入;一般欄位用 $set。 + # 對齊 upsert_draft_note 的用法。 + payload = data.model_dump() + set_on_insert = { + "playlist_id": payload.pop("playlist_id"), + "version_id": payload.pop("version_id"), + "meeting_id": payload.pop("meeting_id"), + "created_at": now, + } update: dict[str, Any] = { - "$set": {**data.model_dump(), "updated_at": now}, - "$setOnInsert": {"created_at": now}, + "$set": {**payload, "updated_at": now}, + "$setOnInsert": set_on_insert, } result = await self.published_transcripts_collection.find_one_and_update( query, update, upsert=True, return_document=ReturnDocument.AFTER diff --git a/backend/src/dna/transcription_publish.py b/backend/src/dna/transcription_publish.py index 7d3a2ea..eaa95f8 100644 --- a/backend/src/dna/transcription_publish.py +++ b/backend/src/dna/transcription_publish.py @@ -75,4 +75,8 @@ def _first_segment_date(ordered: list[StoredSegment]) -> date: raw = ordered[0].absolute_start_time # ISO 8601 的 Z 字尾 fromisoformat 吃不下,先換成 +00:00 normalized = raw.replace("Z", "+00:00") if raw.endswith("Z") else raw - return datetime.fromisoformat(normalized).astimezone(timezone.utc).date() + dt = datetime.fromisoformat(normalized) + # 沒帶時區的情況,按 StoredSegment 欄位的規範當成 UTC,不要讓本機時區 infer + if dt.tzinfo is None: + dt = dt.replace(tzinfo=timezone.utc) + return dt.astimezone(timezone.utc).date() diff --git a/backend/src/main.py b/backend/src/main.py index b000e8e..578773d 100644 --- a/backend/src/main.py +++ b/backend/src/main.py @@ -1024,21 +1024,36 @@ async def publish_transcript( segments_count=payload.segments_count, ) - version = prodtrack.get_entity("version", request.version_id, resolve_links=False) - if version is None or getattr(version, "project", None) is None: + try: + version = prodtrack.get_entity( + "version", request.version_id, resolve_links=False + ) + except ValueError as e: + # get_entity 找不到對應資料時會 raise ValueError,這裡轉成 404 + raise HTTPException(status_code=404, detail=str(e)) + + # Version.project 是 dict (type/id/name),不是物件。不要用 .id 存取。 + project_ref = getattr(version, "project", None) + project_id = project_ref.get("id") if isinstance(project_ref, dict) else None + if project_id is None: raise HTTPException( status_code=404, - detail="Version or its project could not be resolved", + detail="Version has no project associated", ) - project_id = version.project.id try: if existing: - prodtrack.update_transcript( + updated = prodtrack.update_transcript( entity_id=existing.sg_entity_id, body=payload.body, meeting_date=payload.meeting_date, ) + if not updated: + # SG 更新失敗時千萬不能把 body_hash 往前推,否則下次會誤判 skipped + raise HTTPException( + status_code=502, + detail="Failed to update transcript on the tracking system", + ) sg_entity_id = existing.sg_entity_id outcome = "updated" else: diff --git a/backend/tests/test_publish_transcript_endpoint.py b/backend/tests/test_publish_transcript_endpoint.py index be11248..643cd02 100644 --- a/backend/tests/test_publish_transcript_endpoint.py +++ b/backend/tests/test_publish_transcript_endpoint.py @@ -76,7 +76,9 @@ def mock_storage(self): def mock_prodtrack(self): p = mock.Mock() version = mock.Mock() - version.project = mock.Mock(id=1) + # 真實的 ShotgridProvider 回來的 Version.project 是 dict,不是物件。 + # 用物件 mock 會把下面 version.project.id 這種筆誤藏起來。 + version.project = {"type": "Project", "id": 1} p.get_entity.return_value = version return p @@ -261,3 +263,44 @@ def test_version_without_project_is_404( ) assert response.status_code == 404 + + def test_missing_version_returns_404( + self, client, mock_storage, mock_prodtrack, override_deps + ): + """get_entity 對不存在的 version 會 raise ValueError,要接住轉成 404。""" + mock_storage.get_playlist_metadata.return_value = _metadata() + mock_storage.get_segments_for_version.return_value = [ + _segment("2026-04-15T10:00:00Z", "hi") + ] + mock_storage.get_published_transcript.return_value = None + mock_prodtrack.get_entity.side_effect = ValueError( + "Entity not found: version 101" + ) + + with mock.patch.dict(os.environ, ENABLE_FLAG): + response = client.post( + "/playlists/42/publish-transcript", + json={"version_id": 101}, + ) + + assert response.status_code == 404 + + def test_update_failure_does_not_advance_body_hash( + self, client, mock_storage, mock_prodtrack, override_deps + ): + """update_transcript 回傳 False 時要報錯,且不能把新 body_hash 存起來。""" + mock_storage.get_playlist_metadata.return_value = _metadata() + mock_storage.get_segments_for_version.return_value = [ + _segment("2026-04-15T10:00:00Z", "new content") + ] + mock_storage.get_published_transcript.return_value = _published("old-hash") + mock_prodtrack.update_transcript.return_value = False + + with mock.patch.dict(os.environ, ENABLE_FLAG): + response = client.post( + "/playlists/42/publish-transcript", + json={"version_id": 101}, + ) + + assert response.status_code == 502 + mock_storage.upsert_published_transcript.assert_not_awaited() diff --git a/backend/tests/test_storage_providers.py b/backend/tests/test_storage_providers.py index 8a5d1d8..c48dc57 100644 --- a/backend/tests/test_storage_providers.py +++ b/backend/tests/test_storage_providers.py @@ -901,8 +901,16 @@ async def test_upsert_published_transcript_upserts_by_composite_key(self, provid "meeting_id": "meet-abc", } update = call_args[0][1] + # 跟 upsert_draft_note 的慣例對齊:composite key 只放在 $setOnInsert, + # 避免 $set 把 query 欄位重寫一次造成 review 起來難看 assert update["$set"]["body_hash"] == "deadbeef" assert update["$set"]["sg_entity_id"] == 9001 assert "updated_at" in update["$set"] + assert "playlist_id" not in update["$set"] + assert "version_id" not in update["$set"] + assert "meeting_id" not in update["$set"] + assert update["$setOnInsert"]["playlist_id"] == 42 + assert update["$setOnInsert"]["version_id"] == 7 + assert update["$setOnInsert"]["meeting_id"] == "meet-abc" assert update["$setOnInsert"]["created_at"] is not None assert call_args[1]["upsert"] is True diff --git a/backend/tests/test_transcription_publish.py b/backend/tests/test_transcription_publish.py index 81ba006..dbd571b 100644 --- a/backend/tests/test_transcription_publish.py +++ b/backend/tests/test_transcription_publish.py @@ -150,3 +150,19 @@ def test_whitespace_only_text_is_dropped(self): assert payload.body == "Speaker A: valid" assert payload.segments_count == 1 + + def test_naive_start_time_treated_as_utc(self): + """沒時區的時間戳要當成 UTC;不可以讓 astimezone 用本機時區去 infer。""" + segments = [ + _segment( + segment_id="1", + text="late night", + # 主機時區若非 UTC,naive + astimezone 會把日期推到 04-16 + start="2026-04-15T23:30:00", + end="2026-04-15T23:30:05", + ) + ] + + payload = build_transcript_payload(segments) + + assert payload.meeting_date == date(2026, 4, 15) From 3708264889d5fc7286a91aff89ab741261ce0280 Mon Sep 17 00:00:00 2001 From: thc1006 <84045975+thc1006@users.noreply.github.com> Date: Sun, 19 Apr 2026 00:35:28 +0800 Subject: [PATCH 16/19] fix(transcript-publish): second-pass corrections Two more defects caught on a second self-review round: 1. platform=metadata.platform or "" sent an empty string to SG's sg_platform list field when metadata was created without a platform. SG schema rejects it with an opaque Fault. The endpoint now checks metadata.platform up-front and returns 422 with a clear reason, and no longer coerces None to "" on the way out. 2. SG create succeeded but upsert_published_transcript could raise (mongo hiccup, unique-index race, etc). Previously the 500 had no context, so a client retry would create a second SG row. The upsert is now wrapped: the error message includes the sg_entity_id that is already on the tracking system and is logged at exception level for operators. The error wording also tells clients not to blind-retry. Two red-first tests guard the new behaviour: - test_metadata_without_platform_is_422 - test_bookkeeping_failure_after_sg_create_is_surfaced 549 passed, coverage 91%. Signed-off-by: thc1006 <84045975+thc1006@users.noreply.github.com> --- backend/src/main.py | 54 ++++++++++++++----- .../tests/test_publish_transcript_endpoint.py | 40 ++++++++++++++ 2 files changed, 82 insertions(+), 12 deletions(-) diff --git a/backend/src/main.py b/backend/src/main.py index 578773d..56f6eef 100644 --- a/backend/src/main.py +++ b/backend/src/main.py @@ -1,5 +1,6 @@ """FastAPI application entry point.""" +import logging import os import shutil import uuid @@ -1003,6 +1004,13 @@ async def publish_transcript( status_code=422, detail="Playlist has no meeting associated yet", ) + if not metadata.platform: + # platform 是 SG 那邊的 list field,空字串會被站台 schema 拒; + # 比起讓 SG 回一個看不懂的 Fault,這裡攔下給明確錯誤。 + raise HTTPException( + status_code=422, + detail="Playlist metadata has no platform recorded", + ) segments = await storage.get_segments_for_version(playlist_id, request.version_id) if not segments: @@ -1063,7 +1071,7 @@ async def publish_transcript( version_id=request.version_id, meeting_id=metadata.meeting_id, meeting_date=payload.meeting_date, - platform=metadata.platform or "", + platform=metadata.platform, body=payload.body, ) outcome = "created" @@ -1071,18 +1079,40 @@ async def publish_transcript( raise HTTPException(status_code=501, detail=str(e)) entity_type = os.getenv("SHOTGRID_TRANSCRIPT_ENTITY", "CustomEntity01") - await storage.upsert_published_transcript( - PublishedTranscriptUpdate( - playlist_id=playlist_id, - version_id=request.version_id, - meeting_id=metadata.meeting_id, - sg_entity_type=entity_type, - sg_entity_id=sg_entity_id, - author_email=current_user, - body_hash=payload.body_hash, - segments_count=payload.segments_count, + try: + await storage.upsert_published_transcript( + PublishedTranscriptUpdate( + playlist_id=playlist_id, + version_id=request.version_id, + meeting_id=metadata.meeting_id, + sg_entity_type=entity_type, + sg_entity_id=sg_entity_id, + author_email=current_user, + body_hash=payload.body_hash, + segments_count=payload.segments_count, + ) + ) + except Exception as e: + # SG 那邊已經寫進去了,但本地 bookkeeping 沒跟上。 + # 下次同樣 body 的請求會再在 SG 建一列(因為 existing 會是 None)。 + # 把 entity_id 放進錯誤訊息,讓 operator 能去 SG 手動善後; + # 同時讓 client 知道這個錯誤**不該直接重試**。 + logger = logging.getLogger(__name__) + logger.exception( + "Transcript %s created on tracking system id=%s but local " + "bookkeeping failed. Next publish will create a duplicate unless " + "the SG row is removed or the bookkeeping row is written manually.", + outcome, + sg_entity_id, + ) + raise HTTPException( + status_code=500, + detail=( + f"Transcript row {sg_entity_id} was {outcome} on the tracking " + f"system but local bookkeeping failed ({e.__class__.__name__}). " + f"Do not retry blindly; reconcile the row manually." + ), ) - ) return PublishTranscriptResponse( transcript_entity_id=sg_entity_id, diff --git a/backend/tests/test_publish_transcript_endpoint.py b/backend/tests/test_publish_transcript_endpoint.py index 643cd02..225ed4a 100644 --- a/backend/tests/test_publish_transcript_endpoint.py +++ b/backend/tests/test_publish_transcript_endpoint.py @@ -304,3 +304,43 @@ def test_update_failure_does_not_advance_body_hash( assert response.status_code == 502 mock_storage.upsert_published_transcript.assert_not_awaited() + + def test_metadata_without_platform_is_422( + self, client, mock_storage, mock_prodtrack, override_deps + ): + """platform 為 None / 空字串時拒絕,避免把空值丟到 SG 的 list field。""" + mock_storage.get_playlist_metadata.return_value = _metadata(platform="") + + with mock.patch.dict(os.environ, ENABLE_FLAG): + response = client.post( + "/playlists/42/publish-transcript", + json={"version_id": 101}, + ) + + assert response.status_code == 422 + mock_prodtrack.publish_transcript.assert_not_called() + + def test_bookkeeping_failure_after_sg_create_is_surfaced( + self, client, mock_storage, mock_prodtrack, override_deps + ): + """SG 已經 create 但 Mongo upsert 爆炸時要 surface 500,並帶 entity_id + 讓 operator 知道 SG 側有 orphan 要善後,下次請求不可直接重試。""" + mock_storage.get_playlist_metadata.return_value = _metadata() + mock_storage.get_segments_for_version.return_value = [ + _segment("2026-04-15T10:00:00Z", "hi") + ] + mock_storage.get_published_transcript.return_value = None + mock_prodtrack.publish_transcript.return_value = 9001 + mock_storage.upsert_published_transcript.side_effect = RuntimeError( + "mongo connection lost" + ) + + with mock.patch.dict(os.environ, ENABLE_FLAG): + response = client.post( + "/playlists/42/publish-transcript", + json={"version_id": 101}, + ) + + assert response.status_code == 500 + # entity_id 必須在錯誤訊息裡,operator 才能去 SG 手動刪除 + assert "9001" in response.json()["detail"] From 4c416dad1981c865b27f84af6691fcc2e5b2b5de Mon Sep 17 00:00:00 2001 From: thc1006 <84045975+thc1006@users.noreply.github.com> Date: Sun, 19 Apr 2026 00:45:27 +0800 Subject: [PATCH 17/19] fix(transcript-publish): third-pass corrections Three more defects caught by running the distributed-invariants lens over the flow: 1. update_transcript used the *current* SHOTGRID_TRANSCRIPT_ENTITY env to route the SG call. If a studio migrates the custom-entity slot (enabling CustomEntity05 after starting on CustomEntity01), the bookkeeping still points at the original entity_id, and SG rejects the update because 9001 is not a row on CustomEntity05. The provider now takes entity_type as a required kwarg and the endpoint passes existing.sg_entity_type from the bookkeeping row. 2. The 422 "no segments" check runs against the raw list. When every segment contains only whitespace, build_transcript_payload filters them all out and we would happily push an empty body to SG. Added a second 422 after build with a clear "nothing to publish" detail. 3. test_empty_list_returns_empty_body asserted equality against datetime.now().date() twice (once in prod code, once in the test). Theoretically flaky on UTC midnight rollover. Loosened to an isinstance(date) check. 552 passed, coverage 91%. Signed-off-by: thc1006 <84045975+thc1006@users.noreply.github.com> --- .../prodtrack_provider_base.py | 5 ++ .../src/dna/prodtrack_providers/shotgrid.py | 3 +- backend/src/main.py | 8 +++ .../tests/providers/test_providers_base.py | 1 + .../tests/test_publish_transcript_endpoint.py | 59 +++++++++++++++++++ backend/tests/test_shotgrid_provider.py | 23 ++++++++ backend/tests/test_transcription_publish.py | 4 +- 7 files changed, 100 insertions(+), 3 deletions(-) diff --git a/backend/src/dna/prodtrack_providers/prodtrack_provider_base.py b/backend/src/dna/prodtrack_providers/prodtrack_provider_base.py index 17d9ca6..f41c767 100644 --- a/backend/src/dna/prodtrack_providers/prodtrack_provider_base.py +++ b/backend/src/dna/prodtrack_providers/prodtrack_provider_base.py @@ -211,12 +211,17 @@ def publish_transcript( def update_transcript( self, *, + entity_type: str, entity_id: int, body: str, meeting_date: date, ) -> bool: """Update body + meeting_date on an existing transcript entity. + `entity_type` must come from the caller's bookkeeping (whichever + custom-entity slot the row was originally created in). Reading the + current env var here would misfire if studios migrate between slots. + Only body and meeting_date are touched on purpose; summary and other fields are left alone so manual edits on the tracking-system side survive a re-publish. diff --git a/backend/src/dna/prodtrack_providers/shotgrid.py b/backend/src/dna/prodtrack_providers/shotgrid.py index e39d14d..6ab17bb 100644 --- a/backend/src/dna/prodtrack_providers/shotgrid.py +++ b/backend/src/dna/prodtrack_providers/shotgrid.py @@ -1024,6 +1024,7 @@ def publish_transcript( def update_transcript( self, *, + entity_type: str, entity_id: int, body: str, meeting_date: date, @@ -1033,7 +1034,7 @@ def update_transcript( return False try: self._sg.update( - _transcript_entity_type(), + entity_type, entity_id, { "sg_transcript_body": body, diff --git a/backend/src/main.py b/backend/src/main.py index 56f6eef..6e75d42 100644 --- a/backend/src/main.py +++ b/backend/src/main.py @@ -1020,6 +1020,12 @@ async def publish_transcript( ) payload = build_transcript_payload(segments) + if payload.segments_count == 0: + # 原始 list 不空但過完 whitespace filter 後清光光;不要在 SG 留空 row + raise HTTPException( + status_code=422, + detail="All stored segments were empty; nothing to publish", + ) existing = await storage.get_published_transcript( playlist_id, request.version_id, metadata.meeting_id @@ -1051,7 +1057,9 @@ async def publish_transcript( try: if existing: + # 用 bookkeeping 記的 entity_type,不是當前 env;避免站台改設定時 update 打錯 slot updated = prodtrack.update_transcript( + entity_type=existing.sg_entity_type, entity_id=existing.sg_entity_id, body=payload.body, meeting_date=payload.meeting_date, diff --git a/backend/tests/providers/test_providers_base.py b/backend/tests/providers/test_providers_base.py index 38ac049..eda8ee6 100644 --- a/backend/tests/providers/test_providers_base.py +++ b/backend/tests/providers/test_providers_base.py @@ -27,6 +27,7 @@ def test_update_transcript_raises_not_implemented(self): provider = ProdtrackProviderBase() with pytest.raises(NotImplementedError): provider.update_transcript( + entity_type="CustomEntity01", entity_id=9001, body="Speaker: updated", meeting_date=date(2026, 4, 15), diff --git a/backend/tests/test_publish_transcript_endpoint.py b/backend/tests/test_publish_transcript_endpoint.py index 225ed4a..efd4e6a 100644 --- a/backend/tests/test_publish_transcript_endpoint.py +++ b/backend/tests/test_publish_transcript_endpoint.py @@ -320,6 +320,65 @@ def test_metadata_without_platform_is_422( assert response.status_code == 422 mock_prodtrack.publish_transcript.assert_not_called() + def test_update_path_uses_stored_entity_type_not_current_env( + self, client, mock_storage, mock_prodtrack, override_deps + ): + """env var 改過以後,update 仍然要打到**原本** create 它那個 entity type。""" + mock_storage.get_playlist_metadata.return_value = _metadata() + mock_storage.get_segments_for_version.return_value = [ + _segment("2026-04-15T10:00:00Z", "changed") + ] + # 原本是在 CustomEntity01 那邊 create 的 + mock_storage.get_published_transcript.return_value = PublishedTranscript( + _id="pt-id", + playlist_id=42, + version_id=101, + meeting_id="m-abc", + sg_entity_type="CustomEntity01", + sg_entity_id=9001, + author_email="user@test.com", + body_hash="old-hash", + segments_count=1, + created_at=datetime.now(timezone.utc), + updated_at=datetime.now(timezone.utc), + ) + mock_prodtrack.update_transcript.return_value = True + + # 現在 env 被改成 CustomEntity05,但 9001 還是屬於 CustomEntity01 + with mock.patch.dict( + os.environ, + {**ENABLE_FLAG, "SHOTGRID_TRANSCRIPT_ENTITY": "CustomEntity05"}, + ): + response = client.post( + "/playlists/42/publish-transcript", + json={"version_id": 101}, + ) + + assert response.status_code == 200 + kwargs = mock_prodtrack.update_transcript.call_args.kwargs + # 必須指定原本的 CustomEntity01,不能跟著 env 走 + assert kwargs.get("entity_type") == "CustomEntity01" + + def test_all_segments_whitespace_is_422( + self, client, mock_storage, mock_prodtrack, override_deps + ): + """通過原始 segments 的空檢查,但 build 完全被過濾掉 → 不該 publish 空 row。""" + mock_storage.get_playlist_metadata.return_value = _metadata() + mock_storage.get_segments_for_version.return_value = [ + _segment("2026-04-15T10:00:00Z", " "), + _segment("2026-04-15T10:00:05Z", ""), + ] + + with mock.patch.dict(os.environ, ENABLE_FLAG): + response = client.post( + "/playlists/42/publish-transcript", + json={"version_id": 101}, + ) + + assert response.status_code == 422 + mock_prodtrack.publish_transcript.assert_not_called() + mock_prodtrack.update_transcript.assert_not_called() + def test_bookkeeping_failure_after_sg_create_is_surfaced( self, client, mock_storage, mock_prodtrack, override_deps ): diff --git a/backend/tests/test_shotgrid_provider.py b/backend/tests/test_shotgrid_provider.py index eb462a7..cda9f4e 100644 --- a/backend/tests/test_shotgrid_provider.py +++ b/backend/tests/test_shotgrid_provider.py @@ -410,6 +410,7 @@ def test_update_transcript_only_patches_body_and_date(self, provider, mock_shotg provider.sg = mock_sg_instance ok = provider.update_transcript( + entity_type="CustomEntity01", entity_id=9001, body="Cameron: updated", meeting_date=date_(2026, 4, 16), @@ -425,6 +426,27 @@ def test_update_transcript_only_patches_body_and_date(self, provider, mock_shotg "sg_meeting_date": "2026-04-16", } + def test_update_transcript_uses_caller_supplied_entity_type( + self, provider, mock_shotgun + ): + """entity_type 必須用 caller 傳進來的,不能偷讀環境變數。""" + from datetime import date as date_ + + mock_sg_instance = mock_shotgun.return_value + provider.sg = mock_sg_instance + + with mock.patch.dict( + os.environ, {"SHOTGRID_TRANSCRIPT_ENTITY": "CustomEntity99"} + ): + provider.update_transcript( + entity_type="CustomEntity01", + entity_id=9001, + body="x", + meeting_date=date_(2026, 4, 16), + ) + + assert mock_sg_instance.update.call_args[0][0] == "CustomEntity01" + def test_update_transcript_swallows_sg_errors_and_returns_false( self, provider, mock_shotgun ): @@ -436,6 +458,7 @@ def test_update_transcript_swallows_sg_errors_and_returns_false( mock_sg_instance.update.side_effect = Exception("sg boom") ok = provider.update_transcript( + entity_type="CustomEntity01", entity_id=9001, body="x", meeting_date=date_(2026, 4, 16), diff --git a/backend/tests/test_transcription_publish.py b/backend/tests/test_transcription_publish.py index dbd571b..90d5ec0 100644 --- a/backend/tests/test_transcription_publish.py +++ b/backend/tests/test_transcription_publish.py @@ -42,8 +42,8 @@ def test_empty_list_returns_empty_body(self): assert payload.body == "" assert payload.segments_count == 0 assert payload.body_hash == sha256(b"").hexdigest() - # 沒有 segment 時退而求其次取今天,主要是讓呼叫端不用處理 None - assert payload.meeting_date == datetime.now(timezone.utc).date() + # 沒有 segment 時退而求其次取 "現在";只驗是個 date,避免跨日的毫秒 flaky + assert isinstance(payload.meeting_date, date) def test_single_segment_renders_one_line(self): segments = [_segment(text="Hello world", speaker="Cameron")] From a366612fe03f78620b193f7cfb852a94176c25d5 Mon Sep 17 00:00:00 2001 From: thc1006 <84045975+thc1006@users.noreply.github.com> Date: Sun, 19 Apr 2026 01:20:04 +0800 Subject: [PATCH 18/19] fix(transcript-publish): a11y + doc drift 1. PublishTranscriptDialog was missing a Dialog.Description, so Radix emitted a dev warning on every render and screen-reader users got no context for the content. The copy that was already inside the dialog body moved to Dialog.Description. 2. TRANSCRIPTION_PIPELINE.md still described the pre-round-3 signature of update_transcript. The data-flow block now shows entity_type sourced from env on create and from the bookkeeping row on update; ADR-006 was expanded to make the "pin to bookkeeping, not env" rule explicit so a later refactor does not silently reintroduce the env- drift bug. 552 passed, coverage 91%. Signed-off-by: thc1006 <84045975+thc1006@users.noreply.github.com> --- backend/docs/TRANSCRIPTION_PIPELINE.md | 13 +++++++++++-- .../app/src/components/PublishTranscriptDialog.tsx | 9 ++++----- 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/backend/docs/TRANSCRIPTION_PIPELINE.md b/backend/docs/TRANSCRIPTION_PIPELINE.md index 0368655..80fa830 100644 --- a/backend/docs/TRANSCRIPTION_PIPELINE.md +++ b/backend/docs/TRANSCRIPTION_PIPELINE.md @@ -1355,7 +1355,10 @@ POST /playlists/{playlist_id}/publish-transcript {version_id} -> storage.get_segments_for_version(...) # existing call -> build_transcript_payload(segments) # pure, dedupe + collapse -> storage.get_published_transcript(...) # bookkeeping lookup - -> prodtrack.publish_transcript(...) / update_transcript(entity_id, ...) + -> prodtrack.publish_transcript(entity_type from env, ...) + # create path: reads SHOTGRID_TRANSCRIPT_ENTITY + / prodtrack.update_transcript(entity_type=existing.sg_entity_type, ...) + # update path: honours the bookkeeping row, not the current env -> storage.upsert_published_transcript(...) -> { transcript_entity_id, outcome: created | updated | skipped } ``` @@ -1404,7 +1407,11 @@ on the ShotGrid side without the publisher overwriting it. **Decision:** Track which `(playlist, version, meeting)` tuples have been published in a local Mongo collection. Skip re-publish when the -new body_hash matches the stored one. +new body_hash matches the stored one. The bookkeeping row also stores +`sg_entity_type`; the update path uses that value instead of the +current `SHOTGRID_TRANSCRIPT_ENTITY` env so studios can migrate to a +new custom-entity slot without breaking updates on already-published +rows. **Rationale:** - SG is not efficiently queryable for "has this been published before". @@ -1412,6 +1419,8 @@ new body_hash matches the stored one. (`published_note_id` on the draft). - Loss of the Mongo row is a known edge-case; duplicate SG rows in that scenario are an acceptable V1 trade-off documented on issue #120. +- Pinning the entity_type to the bookkeeping row (not env) prevents + misdirected updates after a slot migration. ### ADR-007: Build publishable body at publish time, not ingest time diff --git a/frontend/packages/app/src/components/PublishTranscriptDialog.tsx b/frontend/packages/app/src/components/PublishTranscriptDialog.tsx index 975d4eb..7ad136a 100644 --- a/frontend/packages/app/src/components/PublishTranscriptDialog.tsx +++ b/frontend/packages/app/src/components/PublishTranscriptDialog.tsx @@ -80,13 +80,12 @@ export const PublishTranscriptDialog: React.FC< > Publish transcript + + Push the captured transcript for this version to the production + tracking system as a custom-entity row. + - - Push the captured transcript for this version to the production - tracking system as a custom-entity row. - - Version From 6faac7168403a1aa5ad05a097d703f839e8a5949 Mon Sep 17 00:00:00 2001 From: thc1006 <84045975+thc1006@users.noreply.github.com> Date: Mon, 20 Apr 2026 06:07:02 +0800 Subject: [PATCH 19/19] style(tests): sort from-main import into third-party block isort 8.0.1 (the version CI runs) groups `from main import ...` with third-party imports because `main` is not in `known_first_party`. This matches tests/test_publish_endpoint.py, which already does it this way. No behaviour change; unblocks format-check on PR #138. Signed-off-by: thc1006 <84045975+thc1006@users.noreply.github.com> --- backend/tests/test_publish_transcript_endpoint.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/tests/test_publish_transcript_endpoint.py b/backend/tests/test_publish_transcript_endpoint.py index efd4e6a..c9daaa8 100644 --- a/backend/tests/test_publish_transcript_endpoint.py +++ b/backend/tests/test_publish_transcript_endpoint.py @@ -6,11 +6,11 @@ import pytest from fastapi.testclient import TestClient +from main import app, get_prodtrack_provider_cached, get_storage_provider_cached from dna.models.playlist_metadata import PlaylistMetadata from dna.models.published_transcript import PublishedTranscript from dna.models.stored_segment import StoredSegment -from main import app, get_prodtrack_provider_cached, get_storage_provider_cached ENABLE_FLAG = {"DNA_ENABLE_TRANSCRIPT_PUBLISH": "true"}