From 9ad7baaaaf08abdd27d01d586e0e14c2a4fd6bb3 Mon Sep 17 00:00:00 2001 From: baiqing Date: Thu, 2 Jul 2026 17:35:48 +0800 Subject: [PATCH] =?UTF-8?q?feat(agent):=20MediaBridge=20=E2=80=94=20un-stu?= =?UTF-8?q?b=20inspect=5Ftimeline=20+=20import=5Fmedia=20MCP=20tools?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Give the agent eyes and hands: a new injected MediaBridge trait carries the two capabilities the agent crate deliberately cannot link (wgpu compositing lives in opentake-render; the user-facing import path lives behind src-tauri MediaState), keeping CoreHandle the narrow document surface and dispatch unit-testable with a fake bridge (default trait methods return "not available", so non-Tauri builds run bridge-less). - inspect_timeline (upstream ToolExecutor+InspectTimeline.swift): renders the composited frame at startFrame, or maxFrames (default 6, cap 12) evenly sampled across [startFrame,endFrame) with upstream's exact center-of-bucket formula; aspect-preserving downscale to 512 longest edge (never upscales); JPEG q70 per frame + a meta block {fps,width,height,totalFrames,frameNumbers}. All range/validation errors mirror upstream strings. TauriMediaBridge composites via a self-contained RenderDevice per call (export.rs's isolation discipline, not the preview's cached RenderState). - import_media (upstream ToolExecutor+Import.swift): exactly-one-of url/path/ bytes with upstream's validation set (folderId existence, bytes<=15MB base64 + required mimeType with the 1:1 MIME table, path missing/unsupported-ext messages); path reuses the SAME import_one/mirror_dir path as the media panel (posters/manifest/events consistent), directories mirror recursively; bytes writes imported-. into bundle media/. url returns a structured not-yet-supported error (no HTTP client dep chosen yet; flagged for follow-up — upstream caps downloads at 1GB HTTPS-only). Gates: fmt/clippy -D warnings clean; cargo test --workspace 1408 (agent 221); pnpm build clean; pnpm test 330. --- Cargo.lock | 1 + crates/opentake-agent/Cargo.toml | 2 + crates/opentake-agent/src/mcp/dispatch.rs | 539 ++++++++++- crates/opentake-agent/src/mcp/media_bridge.rs | 200 ++++ crates/opentake-agent/src/mcp/mod.rs | 1 + crates/opentake-agent/src/mcp/server.rs | 55 +- src-tauri/Cargo.toml | 5 +- src-tauri/src/lib.rs | 14 +- src-tauri/src/mcp.rs | 860 +++++++++++++++++- src-tauri/src/media.rs | 34 +- 10 files changed, 1683 insertions(+), 28 deletions(-) create mode 100644 crates/opentake-agent/src/mcp/media_bridge.rs diff --git a/Cargo.lock b/Cargo.lock index 556815d..33d4044 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3287,6 +3287,7 @@ dependencies = [ "anyhow", "async-trait", "axum", + "base64 0.22.1", "futures", "http", "keyring", diff --git a/crates/opentake-agent/Cargo.toml b/crates/opentake-agent/Cargo.toml index 9dca585..ceba764 100644 --- a/crates/opentake-agent/Cargo.toml +++ b/crates/opentake-agent/Cargo.toml @@ -21,6 +21,8 @@ tracing = "0.1" regex = "1" async-trait = "0.1" futures = "0.3" +# Base64-encode composited `inspect_timeline` frame bytes into MCP image content. +base64 = "0.22" tokio = { version = "1", features = ["rt", "rt-multi-thread", "macros", "sync", "net", "time"] } # MCP server transport (Streamable HTTP over axum/hyper) + in-app chat HTTP. diff --git a/crates/opentake-agent/src/mcp/dispatch.rs b/crates/opentake-agent/src/mcp/dispatch.rs index 94b5343..be67b09 100644 --- a/crates/opentake-agent/src/mcp/dispatch.rs +++ b/crates/opentake-agent/src/mcp/dispatch.rs @@ -36,6 +36,7 @@ use serde_json::Value; use crate::mcp::core_handle::CoreHandle; use crate::mcp::gen_catalog; +use crate::mcp::media_bridge::{frame_to_block, ImportSource, InspectResult, MediaBridge}; use crate::plugin::registry::PluginRegistry; use crate::signal::engine; use crate::signal::rules::OpContext; @@ -43,26 +44,52 @@ use crate::tools::args::{self, *}; use crate::tools::encode_timeline::encode_timeline; use crate::tools::errors::{decode_tool_args, ToolError}; use crate::tools::names::ToolName; -use crate::tools::result::ToolResult; +use crate::tools::result::{Block, ToolResult}; use crate::tools::short_id; +/// `inspect_timeline` frame-sampling + downscale constants, 1:1 with upstream +/// `ToolExecutor+InspectTimeline`: default 6 sampled frames, hard cap 12, longest +/// render edge 512px (the JPEG quality lives with the encoder in the bridge). +const INSPECT_TIMELINE_DEFAULT_FRAMES: i32 = 6; +const INSPECT_TIMELINE_MAX_FRAMES: i32 = 12; +const INSPECT_TIMELINE_MAX_DIMENSION: u32 = 512; + /// The in-process tool dispatcher. Holds the [`CoreHandle`] boundary, the plugin /// registry (read-locked for the active plugin), and a per-dispatcher agent-undo /// stack so `undo` only reverts edits this session made. pub struct Dispatcher { handle: Arc, registry: Arc>, + /// The render + import side-door (`inspect_timeline` / `import_media`), or + /// `None` in a non-Tauri build / tests. See [`MediaBridge`]. Kept separate from + /// [`CoreHandle`] because those two capabilities reach into crates the agent + /// layer does not link (`opentake-render`, the src-tauri import path). + bridge: Option>, /// Action names of agent edits applied through this dispatcher, newest last. /// Guards `undo`: we only revert when this session has pushed an edit. agent_undo: Mutex>, } impl Dispatcher { - /// New dispatcher over a core handle + plugin registry. + /// New dispatcher over a core handle + plugin registry, with no media bridge + /// (the two render/import tools then report "not available"). Used by tests and + /// any non-Tauri host. pub fn new(handle: Arc, registry: Arc>) -> Self { + Self::with_bridge(handle, registry, None) + } + + /// New dispatcher with an optional [`MediaBridge`] wired in. The Tauri shell + /// (`src-tauri/src/mcp.rs`) passes `Some(bridge)` so `inspect_timeline` / + /// `import_media` reach the real GPU + import paths. + pub fn with_bridge( + handle: Arc, + registry: Arc>, + bridge: Option>, + ) -> Self { Dispatcher { handle, registry, + bridge, agent_undo: Mutex::new(Vec::new()), } } @@ -179,21 +206,22 @@ impl Dispatcher { ToolName::SmartReframe => self.smart_reframe(args), ToolName::TightenSilences => self.tighten_silences(args, before), + // --- Render + import (wired to the injected MediaBridge) --- + ToolName::InspectTimeline => self.inspect_timeline(args, before), + ToolName::ImportMedia => self.import_media(args, manifest), + // --- Not yet implementable in this phase (honest stubs) --- - // Media reads (inspect/transcript/search) + import need the media - // backend via a widened CoreHandle; generation/upscale need the async - // GenClient + BYOK auth; inspect_timeline needs the render+text path. + // Media reads (inspect/transcript/search) still need the analysis + // backend; generation/upscale need the async GenClient + BYOK auth. // Motion graphics (#34) now routes through the planned Motion Canvas // plugin: render mp4 -> import media -> place clip. ToolName::InspectMedia | ToolName::GetTranscript - | ToolName::InspectTimeline | ToolName::SearchMedia | ToolName::GenerateVideo | ToolName::GenerateImage | ToolName::GenerateAudio | ToolName::UpscaleMedia - | ToolName::ImportMedia | ToolName::AddCaptions | ToolName::AddMotionGraphic | ToolName::EditMotionGraphic => Ok(ToolResult::error(format!( @@ -217,6 +245,164 @@ impl Dispatcher { Ok(ToolResult::ok(payload.to_string())) } + // MARK: - Render + import tool bodies (backed by the MediaBridge) + + /// `inspect_timeline`: composite one project frame, or `maxFrames` frames + /// evenly sampled across `[startFrame, endFrame)`, downscaled for tokens. + /// 1:1 port of upstream `ToolExecutor+InspectTimeline.inspectTimeline` + /// (frame-range validation + even sampling here; the GPU composite + JPEG + /// encode behind the [`MediaBridge`]). Returns MCP image content per frame plus + /// a trailing meta text block (`fps`/`width`/`height`/`totalFrames`/ + /// `frameNumbers`). + fn inspect_timeline(&self, args: &Value, before: &Timeline) -> Result { + let a: InspectTimelineArgs = decode_tool_args(args, "")?; + + let total_frames = before.total_frames(); + if total_frames <= 0 { + return Ok(ToolResult::error("Timeline is empty — nothing to render.")); + } + + let start_frame = a.start_frame.unwrap_or(0); + if start_frame < 0 || start_frame >= total_frames { + return Ok(ToolResult::error(format!( + "startFrame {start_frame} out of range [0, {total_frames})." + ))); + } + + // Single frame, or evenly-sampled frames across [startFrame, endFrame). + // Mirrors upstream exactly: count = clamp(maxFrames|default, ≤max, ≤span), + // frame_i = startFrame + floor(span * (i + 0.5) / count). + let sampled: Vec = if let Some(raw_end) = a.end_frame { + let end_frame = raw_end.min(total_frames); + if end_frame <= start_frame { + return Ok(ToolResult::error(format!( + "endFrame must be greater than startFrame ({start_frame})." + ))); + } + let span = end_frame - start_frame; + let count = a + .max_frames + .unwrap_or(INSPECT_TIMELINE_DEFAULT_FRAMES) + .min(INSPECT_TIMELINE_MAX_FRAMES) + .min(span) + .max(1); + (0..count) + .map(|i| { + let offset = (span as f64 * (i as f64 + 0.5) / count as f64).floor() as i32; + start_frame + offset + }) + .collect() + } else { + vec![start_frame] + }; + + let Some(bridge) = self.bridge.as_ref() else { + return Ok(ToolResult::error( + "inspect_timeline: rendering is not available in this build", + )); + }; + + let InspectResult { + frames, + width, + height, + } = bridge + .inspect_timeline(&sampled, INSPECT_TIMELINE_MAX_DIMENSION) + .map_err(|e| ToolError::new(e.message))?; + + if frames.is_empty() { + return Ok(ToolResult::error("Failed to render timeline frames.")); + } + + // Image blocks first, then the meta text block — upstream's + // `imageBlocks + [metaJSON]` order. + let mut blocks: Vec = frames.iter().map(frame_to_block).collect(); + let rendered_frames: Vec = frames.iter().map(|f| f.frame).collect(); + let meta = serde_json::json!({ + "fps": before.fps, + "width": width, + "height": height, + "totalFrames": total_frames, + "frameNumbers": rendered_frames, + }); + blocks.push(Block::text(meta.to_string())); + Ok(ToolResult::blocks(blocks)) + } + + /// `import_media`: import external media (url / path / bytes) through the SAME + /// path as the user-facing import, via the [`MediaBridge`]. 1:1 port of + /// upstream `ToolExecutor+Import.importMedia` — exactly-one-of-source + /// validation + folderId existence check here; the IO (download / recursive + /// path import / bytes write + poster/manifest/event) behind the bridge. + fn import_media( + &self, + args: &Value, + manifest: &MediaManifest, + ) -> Result { + let a: ImportMediaArgs = decode_tool_args(args, "")?; + // Validate the nested `source` object's own keys (upstream + // `validateUnknownKeys(source, path: "source")`). The top-level decode + // sees `source` as an opaque object, so an unknown key inside it would be + // silently dropped without this explicit second decode. + let source = match args.get("source") { + Some(raw) => decode_tool_args::(raw, "source")?, + None => { + return Ok(ToolResult::error("Missing required 'source' object")); + } + }; + + // Exactly one of url / path / bytes. + let set_count = [&source.url, &source.path, &source.bytes] + .iter() + .filter(|v| v.is_some()) + .count(); + if set_count != 1 { + return Ok(ToolResult::error(format!( + "source must set exactly one of 'url', 'path', or 'bytes' (got {set_count})" + ))); + } + + // folderId, when provided, must name an existing folder (upstream + // `resolveFolderId`). There is no reference fallback for a tool call. + if let Some(folder_id) = a.folder_id.as_deref() { + if !manifest.folders.iter().any(|f| f.id == folder_id) { + return Ok(ToolResult::error(format!( + "folderId not found: {folder_id}" + ))); + } + } + + let import_source = if let Some(path) = source.path.clone() { + ImportSource::Path(path) + } else if let Some(base64) = source.bytes.clone() { + let Some(mime_type) = source.mime_type.clone() else { + return Ok(ToolResult::error( + "source.mimeType is required when source.bytes is set", + )); + }; + ImportSource::Bytes { base64, mime_type } + } else if let Some(url) = source.url.clone() { + ImportSource::Url { + url, + mime_type: source.mime_type.clone(), + } + } else { + // Unreachable: set_count == 1 guaranteed one branch above. + return Ok(ToolResult::error("import_media: no source set")); + }; + + let Some(bridge) = self.bridge.as_ref() else { + return Ok(ToolResult::error( + "import_media: importing is not available in this build", + )); + }; + + let outcome = bridge + .import_media(import_source, a.name.clone(), a.folder_id.clone()) + .map_err(|e| ToolError::new(e.message))?; + Ok(ToolResult::ok(outcome.message)) + } + // MARK: - Editing tool bodies fn add_clips( @@ -2650,4 +2836,343 @@ mod tests { assert!(!r.is_error, "{}", r.text_joined()); assert!(r.text_joined().contains("Deactivated")); } + + // MARK: - MediaBridge tools (inspect_timeline / import_media) + + use crate::mcp::media_bridge::{ + BridgeError, ImportOutcome, ImportSource, InspectResult, InspectedFrame, MediaBridge, + }; + use crate::tools::result::Block; + + /// One recorded `import_media` forward: a `kind:detail` tag plus the name / + /// folder the dispatcher passed through. + struct ImportCall { + tag: String, + name: Option, + folder_id: Option, + } + + /// A recording fake bridge: captures the last inspect/import call so tests can + /// assert the dispatcher forwarded validated args, and returns canned output. + #[derive(Default)] + struct FakeBridge { + inspect_calls: Mutex, u32)>>, + import_calls: Mutex>, + } + + impl MediaBridge for FakeBridge { + fn inspect_timeline( + &self, + frames: &[i32], + max_longest_edge: u32, + ) -> Result { + self.inspect_calls + .lock() + .unwrap() + .push((frames.to_vec(), max_longest_edge)); + Ok(InspectResult { + frames: frames + .iter() + .map(|&frame| InspectedFrame { + frame, + bytes: vec![0xff, 0xd8, 0xff, 0xe0], // JPEG SOI/APP0 stub + media_type: "image/jpeg".into(), + }) + .collect(), + width: 512, + height: 288, + }) + } + + fn import_media( + &self, + source: ImportSource, + name: Option, + folder_id: Option, + ) -> Result { + let tag = match &source { + ImportSource::Path(p) => format!("path:{p}"), + ImportSource::Bytes { mime_type, .. } => format!("bytes:{mime_type}"), + ImportSource::Url { url, .. } => format!("url:{url}"), + }; + self.import_calls.lock().unwrap().push(ImportCall { + tag: tag.clone(), + name, + folder_id, + }); + Ok(ImportOutcome { + message: format!("Imported via {tag}."), + }) + } + } + + /// A dispatcher whose timeline has a single 60-frame clip and a `FakeBridge` + /// wired in. Returns both so tests can inspect the recorded bridge calls. + fn dispatcher_with_fake_bridge() -> (Dispatcher, Arc) { + let mut tl = Timeline::new(); + tl.fps = 30; + let mut track = opentake_domain::Track::new("track-1", ClipType::Video); + track.clips.push(Clip::new("clip-1", "asset-1", 0, 60)); + tl.tracks.push(track); + let mut m = MediaManifest::new(); + m.entries.push(entry("asset-1", "Hero")); + let handle = Arc::new(StateHandle::new(tl, m)); + let bridge = Arc::new(FakeBridge::default()); + let d = Dispatcher::with_bridge( + handle, + Arc::new(RwLock::new(PluginRegistry::new())), + Some(bridge.clone() as Arc), + ); + (d, bridge) + } + + #[test] + fn inspect_timeline_without_bridge_reports_unavailable() { + // The seeded TestHandle timeline is empty, so first assert the empty guard, + // then a non-empty timeline with no bridge reports "not available". + let d = dispatcher_with(seeded_handle()); + let r = d.dispatch("inspect_timeline", serde_json::json!({ "startFrame": 0 })); + assert!(r.is_error); + assert!( + r.text_joined().contains("not available"), + "{}", + r.text_joined() + ); + } + + #[test] + fn inspect_timeline_empty_timeline_errors() { + let (d, _b) = { + // A bridge is present but the timeline is empty → the empty guard fires + // before the bridge is ever consulted. + let handle = Arc::new(StateHandle::new(Timeline::new(), MediaManifest::new())); + let bridge = Arc::new(FakeBridge::default()); + let d = Dispatcher::with_bridge( + handle, + Arc::new(RwLock::new(PluginRegistry::new())), + Some(bridge.clone() as Arc), + ); + (d, bridge) + }; + let r = d.dispatch("inspect_timeline", serde_json::json!({})); + assert!(r.is_error); + assert!( + r.text_joined().contains("Timeline is empty"), + "{}", + r.text_joined() + ); + } + + #[test] + fn inspect_timeline_start_frame_out_of_range_errors() { + let (d, _b) = dispatcher_with_fake_bridge(); + // total_frames is 60; startFrame 60 is out of range [0, 60). + let r = d.dispatch("inspect_timeline", serde_json::json!({ "startFrame": 60 })); + assert!(r.is_error); + assert!( + r.text_joined().contains("out of range"), + "{}", + r.text_joined() + ); + } + + #[test] + fn inspect_timeline_end_before_start_errors() { + let (d, _b) = dispatcher_with_fake_bridge(); + let r = d.dispatch( + "inspect_timeline", + serde_json::json!({ "startFrame": 30, "endFrame": 20 }), + ); + assert!(r.is_error); + assert!( + r.text_joined().contains("greater than startFrame"), + "{}", + r.text_joined() + ); + } + + #[test] + fn inspect_timeline_single_frame_returns_one_image_and_meta() { + let (d, bridge) = dispatcher_with_fake_bridge(); + let r = d.dispatch("inspect_timeline", serde_json::json!({ "startFrame": 5 })); + assert!(!r.is_error, "{}", r.text_joined()); + + // The bridge was asked for exactly [5] at the 512px cap. + let calls = bridge.inspect_calls.lock().unwrap(); + assert_eq!(calls.len(), 1); + assert_eq!(calls[0], (vec![5], 512)); + + // One image block, then a meta text block (upstream order). + let images = r + .content + .iter() + .filter(|b| matches!(b, Block::Image { .. })) + .count(); + assert_eq!(images, 1, "one composited frame"); + // The last text block is the meta JSON with the sampled frame numbers. + let meta_text = match &r.content[1] { + Block::Text { text } => text.clone(), + _ => panic!("expected meta text block after the image"), + }; + let meta: Value = serde_json::from_str(&meta_text).unwrap(); + assert_eq!(meta["frameNumbers"], serde_json::json!([5])); + assert_eq!(meta["totalFrames"], serde_json::json!(60)); + assert_eq!(meta["width"], serde_json::json!(512)); + assert_eq!(meta["fps"], serde_json::json!(30)); + } + + #[test] + fn inspect_timeline_range_samples_frames_evenly_capped() { + let (d, bridge) = dispatcher_with_fake_bridge(); + // [0, 60) with default 6 frames → floor(60*(i+0.5)/6): 5,15,25,35,45,55. + let r = d.dispatch( + "inspect_timeline", + serde_json::json!({ "startFrame": 0, "endFrame": 60 }), + ); + assert!(!r.is_error, "{}", r.text_joined()); + let calls = bridge.inspect_calls.lock().unwrap(); + assert_eq!(calls[0].0, vec![5, 15, 25, 35, 45, 55]); + } + + #[test] + fn inspect_timeline_max_frames_is_capped_at_12() { + let (d, bridge) = dispatcher_with_fake_bridge(); + // maxFrames 100 is clamped to 12 (and to the span, which is 60 here). + let r = d.dispatch( + "inspect_timeline", + serde_json::json!({ "startFrame": 0, "endFrame": 60, "maxFrames": 100 }), + ); + assert!(!r.is_error, "{}", r.text_joined()); + assert_eq!(bridge.inspect_calls.lock().unwrap()[0].0.len(), 12); + } + + #[test] + fn inspect_timeline_rejects_unknown_arg() { + let (d, _b) = dispatcher_with_fake_bridge(); + let r = d.dispatch("inspect_timeline", serde_json::json!({ "bogus": 1 })); + assert!(r.is_error); + assert!( + r.text_joined().contains("unknown field"), + "{}", + r.text_joined() + ); + } + + #[test] + fn import_media_without_bridge_reports_unavailable() { + let d = dispatcher_with(seeded_handle()); + let r = d.dispatch( + "import_media", + serde_json::json!({ "source": { "path": "/x.mp4" } }), + ); + assert!(r.is_error); + assert!( + r.text_joined().contains("not available"), + "{}", + r.text_joined() + ); + } + + #[test] + fn import_media_requires_exactly_one_source() { + let (d, _b) = dispatcher_with_fake_bridge(); + // Zero of url/path/bytes. + let none = d.dispatch("import_media", serde_json::json!({ "source": {} })); + assert!(none.is_error); + assert!( + none.text_joined().contains("exactly one"), + "{}", + none.text_joined() + ); + // Two of them. + let two = d.dispatch( + "import_media", + serde_json::json!({ "source": { "path": "/a.mp4", "url": "https://x/a.mp4" } }), + ); + assert!(two.is_error); + assert!( + two.text_joined().contains("exactly one"), + "{}", + two.text_joined() + ); + } + + #[test] + fn import_media_bytes_requires_mime_type() { + let (d, _b) = dispatcher_with_fake_bridge(); + let r = d.dispatch( + "import_media", + serde_json::json!({ "source": { "bytes": "AAAA" } }), + ); + assert!(r.is_error); + assert!( + r.text_joined().contains("mimeType is required"), + "{}", + r.text_joined() + ); + } + + #[test] + fn import_media_unknown_folder_id_errors() { + let (d, _b) = dispatcher_with_fake_bridge(); + let r = d.dispatch( + "import_media", + serde_json::json!({ "source": { "path": "/a.mp4" }, "folderId": "ghost" }), + ); + assert!(r.is_error); + assert!( + r.text_joined().contains("folderId not found"), + "{}", + r.text_joined() + ); + } + + #[test] + fn import_media_rejects_unknown_nested_source_key() { + let (d, _b) = dispatcher_with_fake_bridge(); + let r = d.dispatch( + "import_media", + serde_json::json!({ "source": { "url": "https://x/a.mp4", "bogus": 1 } }), + ); + assert!(r.is_error); + assert!( + r.text_joined().contains("source: unknown field"), + "{}", + r.text_joined() + ); + } + + #[test] + fn import_media_path_forwards_to_bridge_and_returns_message() { + let (d, bridge) = dispatcher_with_fake_bridge(); + let r = d.dispatch( + "import_media", + serde_json::json!({ "source": { "path": "/clip.mp4" }, "name": "Clip" }), + ); + assert!(!r.is_error, "{}", r.text_joined()); + assert!( + r.text_joined().contains("Imported via path:/clip.mp4"), + "{}", + r.text_joined() + ); + let calls = bridge.import_calls.lock().unwrap(); + assert_eq!(calls.len(), 1); + assert_eq!(calls[0].tag, "path:/clip.mp4"); + assert_eq!(calls[0].name.as_deref(), Some("Clip")); + assert_eq!(calls[0].folder_id, None); + } + + #[test] + fn import_media_bytes_forwards_mime_to_bridge() { + let (d, bridge) = dispatcher_with_fake_bridge(); + let r = d.dispatch( + "import_media", + serde_json::json!({ "source": { "bytes": "AAAA", "mimeType": "image/png" } }), + ); + assert!(!r.is_error, "{}", r.text_joined()); + assert_eq!( + bridge.import_calls.lock().unwrap()[0].tag, + "bytes:image/png" + ); + } } diff --git a/crates/opentake-agent/src/mcp/media_bridge.rs b/crates/opentake-agent/src/mcp/media_bridge.rs new file mode 100644 index 0000000..a4467ce --- /dev/null +++ b/crates/opentake-agent/src/mcp/media_bridge.rs @@ -0,0 +1,200 @@ +//! `MediaBridge` — the injected side-door to the two capabilities that live +//! outside `opentake-core`: GPU compositing (in `opentake-render`, driven from +//! `src-tauri`) and the user-facing media-import machinery (in +//! `src-tauri/src/media.rs`, behind `MediaState`/`MediaEngine`). +//! +//! Why a *separate* trait instead of widening [`CoreHandle`](super::core_handle): +//! [`CoreHandle`] is deliberately the narrow *document* surface (read timeline, +//! read media, apply one command, project dir, decode analysis PCM). Its +//! production impl wraps only [`opentake_core::AppCore`]. The two capabilities the +//! media tools need reach into crates the agent layer does **not** — and by design +//! should not — link: `opentake-render` (wgpu) for compositing, and the +//! `src-tauri` import path (posters / manifest / events) for `import_media`. +//! Folding them into `CoreHandle` would either drag a GPU dependency into this +//! crate or hide logic behind default methods that can't reach the real paths. +//! +//! So the bridge is injected exactly like the plugin registry is — an optional +//! collaborator the [`Dispatcher`](super::dispatch::Dispatcher) holds. In a plain +//! `vite dev` / non-Tauri build (and in unit tests) there is no bridge and the two +//! tools report an honest "not available" instead of failing to compile. The real +//! implementation is constructed and injected in `src-tauri/src/mcp.rs`, where the +//! render + import code already lives. +//! +//! Both methods default to `Err("unsupported")` so a hand-rolled bridge (or the +//! absence of one) never breaks the build. + +use crate::tools::result::Block; + +/// One composited timeline frame produced by [`MediaBridge::inspect_timeline`], +/// ready to become MCP image content. `bytes` are already-encoded image data +/// (JPEG in the production path) — the agent crate never links an image encoder; +/// the bridge (which does) hands back finished bytes plus their media type. +#[derive(Debug, Clone)] +pub struct InspectedFrame { + /// The project frame this image was rendered at. + pub frame: i32, + /// Encoded image bytes (e.g. JPEG). + pub bytes: Vec, + /// MIME type of `bytes` (e.g. `"image/jpeg"`). + pub media_type: String, +} + +/// The rendered result of an `inspect_timeline` call: the sampled frames plus the +/// downscaled render dimensions, mirroring upstream `inspectTimeline`'s +/// `imageBlocks + [metaJSON]` result. `total_frames` is echoed back in the meta. +#[derive(Debug, Clone)] +pub struct InspectResult { + /// Composited frames, in sample order. + pub frames: Vec, + /// Downscaled render width (px) — the `width` field of the meta block. + pub width: u32, + /// Downscaled render height (px). + pub height: u32, +} + +/// The outcome of an `import_media` call, mirroring upstream's `.ok("…")` string +/// results. The dispatcher wraps `message` in a [`crate::tools::result::ToolResult`]. +#[derive(Debug, Clone)] +pub struct ImportOutcome { + /// Human/LLM-facing confirmation line (same shape as upstream `.ok(...)`). + pub message: String, +} + +/// One decoded `source` object for [`MediaBridge::import_media`]. The dispatcher +/// has already enforced *exactly one* of `url` / `path` / `bytes` is set and that +/// `mime_type` is present when `bytes` is; the bridge does the IO. +#[derive(Debug, Clone)] +pub enum ImportSource { + /// Absolute local file or directory path, imported in place (directories are + /// mirrored recursively). + Path(String), + /// Base64-encoded inline bytes written into the project bundle's `media/`. + Bytes { + /// Raw base64 (already length-checked by the dispatcher). + base64: String, + /// Required MIME type (drives the written file's extension). + mime_type: String, + }, + /// HTTPS URL downloaded into the project bundle's `media/`. + Url { + /// The HTTPS URL (scheme already validated by the dispatcher). + url: String, + /// Optional MIME override for extension inference (signed URLs). + mime_type: Option, + }, +} + +/// A user-visible import error the bridge raises. Carries an LLM-facing message +/// the dispatcher surfaces verbatim (upstream `ToolError` messages). +#[derive(Debug, Clone)] +pub struct BridgeError { + /// The message shown to the model. + pub message: String, +} + +impl BridgeError { + /// Wrap a message. + pub fn new(message: impl Into) -> Self { + BridgeError { + message: message.into(), + } + } +} + +impl std::fmt::Display for BridgeError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_str(&self.message) + } +} + +impl std::error::Error for BridgeError {} + +/// The injected capability boundary for the render + import tools. `Send + Sync` +/// so the [`Dispatcher`](super::dispatch::Dispatcher) can hold `Arc` across threads (matching [`CoreHandle`](super::core_handle)). +pub trait MediaBridge: Send + Sync { + /// Composite the timeline at each `frames` value and return them as encoded + /// image bytes, downscaled so the longest edge is at most `max_longest_edge`. + /// Frame numbers are validated by the dispatcher; the bridge composites and + /// encodes. Frames that fail to render are dropped (upstream `continue`s past a + /// failed `generator.image(at:)`), so the returned `frames` may be shorter than + /// the request; an all-empty render is an `Err`. + fn inspect_timeline( + &self, + _frames: &[i32], + _max_longest_edge: u32, + ) -> Result { + Err(BridgeError::new( + "inspect_timeline: rendering is not available in this build", + )) + } + + /// Import media through the SAME path as the user-facing import (posters, + /// manifest entry, `MediaChanged` event). `folder_id`, when set, has already + /// been checked to exist by the dispatcher. Returns the confirmation message. + fn import_media( + &self, + _source: ImportSource, + _name: Option, + _folder_id: Option, + ) -> Result { + Err(BridgeError::new( + "import_media: importing is not available in this build", + )) + } +} + +/// Turn one [`InspectedFrame`] into an MCP image [`Block`], base64-encoding the +/// bytes (rmcp image content is base64). Kept here so the dispatcher stays free of +/// encoding concerns. +pub fn frame_to_block(frame: &InspectedFrame) -> Block { + use base64::Engine as _; + let b64 = base64::engine::general_purpose::STANDARD.encode(&frame.bytes); + Block::image(b64, frame.media_type.clone()) +} + +#[cfg(test)] +mod tests { + use super::*; + + /// The default trait methods report "unsupported" without a real bridge — the + /// non-Tauri build path. + struct NoopBridge; + impl MediaBridge for NoopBridge {} + + #[test] + fn default_inspect_timeline_is_unsupported() { + let b = NoopBridge; + let err = b.inspect_timeline(&[0], 512).unwrap_err(); + assert!(err.message.contains("not available"), "{}", err.message); + } + + #[test] + fn default_import_media_is_unsupported() { + let b = NoopBridge; + let err = b + .import_media(ImportSource::Path("/x.mp4".into()), None, None) + .unwrap_err(); + assert!(err.message.contains("not available"), "{}", err.message); + } + + #[test] + fn frame_to_block_base64_encodes_image_content() { + let f = InspectedFrame { + frame: 3, + bytes: vec![0xff, 0xd8, 0xff, 0xe0], + media_type: "image/jpeg".into(), + }; + match frame_to_block(&f) { + Block::Image { base64, media_type } => { + use base64::Engine as _; + assert_eq!(media_type, "image/jpeg"); + let decoded = base64::engine::general_purpose::STANDARD + .decode(base64) + .unwrap(); + assert_eq!(decoded, vec![0xff, 0xd8, 0xff, 0xe0]); + } + _ => panic!("expected image block"), + } + } +} diff --git a/crates/opentake-agent/src/mcp/mod.rs b/crates/opentake-agent/src/mcp/mod.rs index 4d7c10b..2336234 100644 --- a/crates/opentake-agent/src/mcp/mod.rs +++ b/crates/opentake-agent/src/mcp/mod.rs @@ -13,4 +13,5 @@ pub mod convert; pub mod core_handle; pub mod dispatch; pub mod gen_catalog; +pub mod media_bridge; pub mod server; diff --git a/crates/opentake-agent/src/mcp/server.rs b/crates/opentake-agent/src/mcp/server.rs index bc1cc1e..c2b2e9b 100644 --- a/crates/opentake-agent/src/mcp/server.rs +++ b/crates/opentake-agent/src/mcp/server.rs @@ -27,6 +27,7 @@ use serde_json::{Map, Value}; use crate::mcp::convert::to_call_tool_result; use crate::mcp::core_handle::CoreHandle; use crate::mcp::dispatch::Dispatcher; +use crate::mcp::media_bridge::MediaBridge; use crate::plugin::registry::PluginRegistry; use crate::prompt::assemble::assemble_system_prompt; use crate::tools::descriptions::{description, input_schema}; @@ -43,14 +44,25 @@ pub struct McpServer { } impl McpServer { - /// Build a session server over the shared document handle + plugin registry. + /// Build a session server over the shared document handle + plugin registry, + /// with no media bridge (render/import tools then report "not available"). pub fn new(handle: Arc, registry: Arc>) -> Self { + Self::with_bridge(handle, registry, None) + } + + /// Build a session server with an optional [`MediaBridge`] injected, so + /// `inspect_timeline` / `import_media` reach the real GPU + import paths. + pub fn with_bridge( + handle: Arc, + registry: Arc>, + bridge: Option>, + ) -> Self { let instructions = registry .read() .map(|r| assemble_system_prompt(&r, "default")) .unwrap_or_default(); McpServer { - dispatcher: Arc::new(Dispatcher::new(handle, registry)), + dispatcher: Arc::new(Dispatcher::with_bridge(handle, registry, bridge)), instructions, } } @@ -183,11 +195,22 @@ async fn oauth_protected_resource() -> axum::Json { })) } -/// Build the axum router: `StreamableHttpService` at `/mcp`, the OAuth -/// well-known endpoint, and the loopback guard layered over everything. +/// Build the axum router with no media bridge (render/import tools report "not +/// available"). See [`build_router_with_bridge`]. pub fn build_router( handle: Arc, registry: Arc>, +) -> axum::Router { + build_router_with_bridge(handle, registry, None) +} + +/// Build the axum router: `StreamableHttpService` at `/mcp`, the OAuth +/// well-known endpoint, and the loopback guard layered over everything. The +/// optional [`MediaBridge`] is cloned into each per-session [`McpServer`]. +pub fn build_router_with_bridge( + handle: Arc, + registry: Arc>, + bridge: Option>, ) -> axum::Router { use rmcp::transport::streamable_http_server::session::local::LocalSessionManager; use rmcp::transport::streamable_http_server::{ @@ -195,7 +218,13 @@ pub fn build_router( }; let service = StreamableHttpService::new( - move || Ok(McpServer::new(handle.clone(), registry.clone())), + move || { + Ok(McpServer::with_bridge( + handle.clone(), + registry.clone(), + bridge.clone(), + )) + }, Arc::new(LocalSessionManager::default()), StreamableHttpServerConfig::default(), ); @@ -209,13 +238,25 @@ pub fn build_router( .layer(axum::middleware::from_fn(localhost_guard)) } -/// Bind `addr` (loopback) and serve the MCP router until the process exits. +/// Bind `addr` (loopback) and serve the MCP router with no media bridge. See +/// [`serve_with_bridge`]. pub async fn serve( addr: SocketAddr, handle: Arc, registry: Arc>, ) -> std::io::Result<()> { - let router = build_router(handle, registry); + serve_with_bridge(addr, handle, registry, None).await +} + +/// Bind `addr` (loopback) and serve the MCP router until the process exits, with +/// an optional [`MediaBridge`] injected (the Tauri shell passes `Some`). +pub async fn serve_with_bridge( + addr: SocketAddr, + handle: Arc, + registry: Arc>, + bridge: Option>, +) -> std::io::Result<()> { + let router = build_router_with_bridge(handle, registry, bridge); let listener = tokio::net::TcpListener::bind(addr).await?; tracing::info!("MCP server listening on http://{addr}/mcp"); axum::serve(listener, router).await diff --git a/src-tauri/Cargo.toml b/src-tauri/Cargo.toml index bb3dbd9..3f923a2 100644 --- a/src-tauri/Cargo.toml +++ b/src-tauri/Cargo.toml @@ -34,8 +34,9 @@ opentake-media = { workspace = true } opentake-render = { workspace = true } opentake-gen = { workspace = true } opentake-agent = { workspace = true } -# Preview composite frames are returned to the WebView as a base64 PNG data URL. -image = { version = "0.25", default-features = false, features = ["png"] } +# Preview composite frames are returned to the WebView as a base64 PNG data URL; +# the agent's `inspect_timeline` bridge encodes composited frames as JPEG. +image = { version = "0.25", default-features = false, features = ["png", "jpeg"] } base64 = "0.22" # Streaming playback transport (#53 / #64), behind the `playback-engine` feature. diff --git a/src-tauri/src/lib.rs b/src-tauri/src/lib.rs index 2753ddd..d1dcc3f 100644 --- a/src-tauri/src/lib.rs +++ b/src-tauri/src/lib.rs @@ -96,17 +96,25 @@ pub fn run() { .app_data_dir() .unwrap_or_else(|_| std::env::temp_dir()) .join("models"); - let engine = MediaEngine::new(cache_root, models_dir); + let engine = MediaEngine::new(cache_root.clone(), models_dir.clone()); // Bring up the loopback MCP server (#36) over a session-sharing clone // of the core, before the core is moved into managed state. Bundled + - // user workflow plugins live under /workflows. + // user workflow plugins live under /workflows. The + // media bridge (agent inspect_timeline / import_media) is built from + // the SAME cache/models dirs as the UI's engine, so imports share the + // same poster/manifest caches. let workflows_dir = app .path() .app_data_dir() .unwrap_or_else(|_| std::env::temp_dir()) .join("workflows"); - mcp::spawn(core.clone(), workflows_dir); + mcp::spawn( + core.clone(), + workflows_dir, + cache_root.clone(), + models_dir.clone(), + ); // Global asset library (#37/#54): a cross-project copy-on-favorite // store under /OpenTake/Library, falling back to the OS diff --git a/src-tauri/src/mcp.rs b/src-tauri/src/mcp.rs index d3f07cd..bc283b0 100644 --- a/src-tauri/src/mcp.rs +++ b/src-tauri/src/mcp.rs @@ -1,4 +1,5 @@ -//! Spawns the loopback MCP server (#36) on the Tauri async runtime. +//! Spawns the loopback MCP server (#36) on the Tauri async runtime, and wires the +//! agent's render + import side-door ([`MediaBridge`]). //! //! The server exposes the in-process tool dispatcher over Streamable-HTTP at //! `http://127.0.0.1:19789/mcp` so external agents (`claude mcp add --transport @@ -6,14 +7,45 @@ //! same [`AppCore`] the UI edits. The plugin registry seeds the bundled //! workflows (e.g. the default audio-first Skill) plus any user-authored plugins //! under `/workflows`. +//! +//! `inspect_timeline` and `import_media` need capabilities that live outside +//! `opentake-core` — GPU compositing (`opentake-render`) and the user-facing +//! import path (`crate::media`). The agent crate can't (by design shouldn't) link +//! those, so it takes them through the injected [`MediaBridge`]. This module is +//! where that boundary is implemented ([`TauriMediaBridge`]) and handed to the +//! dispatcher: it owns a session-sharing [`AppCore`] clone plus a [`MediaEngine`] +//! built from the same cache/models dirs the UI uses, so imports produce the exact +//! same posters / manifest entries / `MediaChanged` events as the media panel. +use std::collections::HashMap; use std::path::{Path, PathBuf}; +use std::rc::Rc; use std::sync::{Arc, RwLock}; +use base64::Engine as _; + use opentake_agent::mcp::core_handle::{AppCoreHandle, CoreHandle}; +use opentake_agent::mcp::media_bridge::{ + BridgeError, ImportOutcome, ImportSource, InspectResult, InspectedFrame, MediaBridge, +}; use opentake_agent::mcp::server; use opentake_agent::plugin::registry::PluginRegistry; -use opentake_core::AppCore; +use opentake_core::{importable_clip_type, AppCore}; +use opentake_domain::{ClipType, MediaSource, TextStyle}; +use opentake_media::{decode_frame_at, FrameRequest, MediaEngine}; +use opentake_render::gpu::texture::upload_rgba; +use opentake_render::{ + build_render_plan, even, Compositor, CosmicTextRasterizer, DecodedFrame, GpuTexture, + RenderDevice, RenderSize, SourceMetrics, TextRasterRequest, TextRasterizer, TextureCache, + TextureResolver, TextureSource, +}; + +/// JPEG quality `inspect_timeline` encodes composited frames at (upstream +/// `inspectTimelineJPEGQuality = 0.7`). `image` takes a 0–100 byte. +const INSPECT_JPEG_QUALITY: u8 = 70; + +/// Per-frame texture cache size — bounds VRAM during a multi-frame inspect. +const TEXTURE_CACHE_CAP: usize = 64; /// Built-in workflows + any user-authored plugins under `workflows_dir` /// (user plugins override a built-in with the same id, since `register` replaces @@ -33,10 +65,14 @@ fn build_registry(workflows_dir: &Path) -> PluginRegistry { } /// Spawn the MCP server. `core` is a clone that shares the live session; -/// `workflows_dir` is `/workflows`. A bind failure (port in use) is -/// logged, not fatal — the app keeps running without the agent network face. -pub fn spawn(core: AppCore, workflows_dir: PathBuf) { - let handle: Arc = Arc::new(AppCoreHandle::new(core)); +/// `workflows_dir` is `/workflows`; `cache_root` / `models_dir` are +/// the same paths the UI's [`MediaEngine`] uses, so the bridge's imports land in +/// the same caches. A bind failure (port in use) is logged, not fatal — the app +/// keeps running without the agent network face. +pub fn spawn(core: AppCore, workflows_dir: PathBuf, cache_root: PathBuf, models_dir: PathBuf) { + let handle: Arc = Arc::new(AppCoreHandle::new(core.clone())); + let bridge: Arc = + Arc::new(TauriMediaBridge::new(core, cache_root, models_dir)); let registry = Arc::new(RwLock::new(build_registry(&workflows_dir))); tauri::async_runtime::spawn(async move { let addr = match server::DEFAULT_ADDR.parse() { @@ -46,8 +82,818 @@ pub fn spawn(core: AppCore, workflows_dir: PathBuf) { return; } }; - if let Err(e) = server::serve(addr, handle, registry).await { + if let Err(e) = server::serve_with_bridge(addr, handle, registry, Some(bridge)).await { eprintln!("[mcp] server stopped: {e}"); } }); } + +/// The production [`MediaBridge`]: composites timeline frames on the GPU and +/// imports media through the same path as the media panel. +struct TauriMediaBridge { + /// A session-sharing clone of the authoritative core (import + snapshot). + core: AppCore, + /// Media engine over the UI's cache/models dirs — probing + poster warming on + /// import go through this, so imported assets are cached exactly like the + /// panel's. Built here (the engine is not `Clone`) from the same paths. + engine: MediaEngine, +} + +impl TauriMediaBridge { + fn new(core: AppCore, cache_root: PathBuf, models_dir: PathBuf) -> Self { + TauriMediaBridge { + core, + engine: MediaEngine::new(cache_root, models_dir), + } + } +} + +impl MediaBridge for TauriMediaBridge { + fn inspect_timeline( + &self, + frames: &[i32], + max_longest_edge: u32, + ) -> Result { + // Snapshot the live session, then composite off the session lock (the + // preview path's discipline; a local GPU context per call keeps this off + // the preview's cached `RenderState` mutex, matching export.rs). + let timeline = self.core.get_timeline().timeline; + let manifest = self.core.media(); + let project_dir = self.core.project_dir(); + composite_frames_jpeg(&timeline, &manifest, &project_dir, frames, max_longest_edge) + } + + fn import_media( + &self, + source: ImportSource, + name: Option, + folder_id: Option, + ) -> Result { + match source { + ImportSource::Path(path) => { + self.import_from_path(&path, name.as_deref(), folder_id.as_deref()) + } + ImportSource::Bytes { base64, mime_type } => { + self.import_from_bytes(&base64, &mime_type, name.as_deref(), folder_id.as_deref()) + } + // HTTPS download is not wired in this build: doing a byte-capped 1 GB + // streaming download from this synchronous bridge would require adding a + // full async HTTP stack (reqwest + a tokio runtime) to `src-tauri` — the + // one client in the workspace (`opentake-gen`) is a typed provider client, + // not a general file downloader, and is async. Path + bytes are fully + // functional; url returns a clear, structured error (mirrors upstream's + // validation-error shape) so the agent knows to fall back to path/bytes. + ImportSource::Url { .. } => Err(BridgeError::new( + "source.url import is not yet supported in this build — download the file and pass source.path (a local path), or inline it as source.bytes with source.mimeType.", + )), + } + } +} + +impl TauriMediaBridge { + /// `path` import: in place, mirroring directories recursively — the exact + /// `crate::media` path the media panel uses (`import_one` / `mirror_dir`), so + /// posters/manifest/events stay consistent. 1:1 with upstream + /// `ToolExecutor+Import.importFromPath`. + fn import_from_path( + &self, + path: &str, + name: Option<&str>, + folder_id: Option<&str>, + ) -> Result { + let file_url = PathBuf::from(path); + let meta = std::fs::metadata(&file_url) + .map_err(|_| BridgeError::new(format!("File not found: {path}")))?; + + if meta.is_dir() { + // Recursive directory import (剪注-style folder mirroring). Reuse the + // media panel's `mirror_dir`; count what actually landed. + let before_entries = self.core.media().entries.len(); + let before_folders = self.core.media().folders.len(); + let mut skipped = Vec::new(); + let parent = folder_id.map(|s| s.to_string()); + crate::media::mirror_dir(&self.core, &self.engine, &file_url, parent, &mut skipped); + let after = self.core.media(); + let asset_count = after.entries.len().saturating_sub(before_entries); + let folder_count = after.folders.len().saturating_sub(before_folders); + if asset_count == 0 { + return Err(BridgeError::new(format!( + "No supported media found in folder: {path}" + ))); + } + let dir_name = file_url + .file_name() + .map(|s| s.to_string_lossy().into_owned()) + .unwrap_or_default(); + return Ok(ImportOutcome { + message: format!( + "Imported {asset_count} file(s) into {folder_count} folder(s) from '{dir_name}', mirroring its structure. Available now in get_media / list_folders." + ), + }); + } + + // Single file. Validate the extension up front for upstream's precise + // error (`import_one` would just skip an unsupported file). + let ext = file_url + .extension() + .and_then(|s| s.to_str()) + .map(|s| s.to_ascii_lowercase()) + .unwrap_or_default(); + if importable_clip_type(&file_url).is_none() { + return Err(BridgeError::new(format!( + "Unsupported file extension '.{ext}'. Supported: mov/mp4/m4v, mp3/wav/aac/m4a, png/jpg/jpeg/tiff/heic." + ))); + } + let entry = crate::media::import_one(&self.core, &self.engine, &file_url) + .ok_or_else(|| BridgeError::new(format!("Failed to import file: {path}")))?; + let entry = self.apply_import_metadata(entry, name, folder_id); + Ok(ImportOutcome { + message: format!( + "Imported '{}' (id: {}, type: {}) from path. Available now in get_media.", + entry.name, + entry.id, + clip_type_name(entry.kind) + ), + }) + } + + /// `bytes` import: write the base64 payload into the project bundle's `media/`, + /// then register it through the same import path. 1:1 with upstream + /// `ToolExecutor+Import.importFromBytes`. + fn import_from_bytes( + &self, + base64: &str, + mime_type: &str, + name: Option<&str>, + folder_id: Option<&str>, + ) -> Result { + let Some(file_ext) = crate::media::file_extension_for_mime(mime_type) else { + return Err(BridgeError::new(format!( + "Unsupported mimeType '{mime_type}'. {}", + crate::media::IMPORT_ACCEPTED_MIMES + ))); + }; + let data = base64::engine::general_purpose::STANDARD + .decode(base64.trim()) + .ok() + .filter(|d| !d.is_empty()) + .ok_or_else(|| BridgeError::new("source.bytes is not valid non-empty base64"))?; + + let project_dir = self + .core + .project_dir() + .ok_or_else(|| BridgeError::new("No project is open; cannot import bytes"))?; + let media_dir = project_dir.join("media"); + std::fs::create_dir_all(&media_dir) + .map_err(|e| BridgeError::new(format!("Failed to prepare media directory: {e}")))?; + + let filename = format!("imported-{}.{file_ext}", short_uuid()); + let dest = media_dir.join(filename); + std::fs::write(&dest, &data) + .map_err(|e| BridgeError::new(format!("Failed to write bytes to disk: {e}")))?; + + let Some(entry) = crate::media::import_one(&self.core, &self.engine, &dest) else { + let _ = std::fs::remove_file(&dest); + return Err(BridgeError::new("Failed to register imported asset")); + }; + let entry = self.apply_import_metadata(entry, name, folder_id); + Ok(ImportOutcome { + message: format!( + "Imported '{}' (id: {}, type: {}, {} bytes). Available now in get_media.", + entry.name, + entry.id, + clip_type_name(entry.kind), + data.len() + ), + }) + } + + /// Apply the optional display name + folder placement to a freshly imported + /// asset (upstream `applyImportMetadata`): rename via `RenameMedia`, place via + /// `MoveToFolder`. Returns the (possibly renamed) entry for the confirmation. + fn apply_import_metadata( + &self, + mut entry: opentake_domain::MediaManifestEntry, + name: Option<&str>, + folder_id: Option<&str>, + ) -> opentake_domain::MediaManifestEntry { + if let Some(name) = name { + if self + .core + .apply(opentake_core::EditCommand::RenameMedia { + entries: vec![opentake_ops::RenameEntry { + id: entry.id.clone(), + name: name.to_string(), + }], + }) + .is_ok() + { + entry.name = name.to_string(); + } + } + if let Some(folder_id) = folder_id { + let _ = self.core.apply(opentake_core::EditCommand::MoveToFolder { + asset_ids: vec![entry.id.clone()], + folder_id: Some(folder_id.to_string()), + }); + } + entry + } +} + +/// Lowercase `ClipType` name for the import confirmation (`video`/`audio`/…), +/// matching upstream `asset.type.rawValue`. +fn clip_type_name(kind: ClipType) -> &'static str { + match kind { + ClipType::Video => "video", + ClipType::Audio => "audio", + ClipType::Image => "image", + ClipType::Text => "text", + ClipType::Lottie => "lottie", + } +} + +/// An 8-hex-char pseudo-unique token for a written-bytes filename (upstream uses +/// `UUID().uuidString.prefix(8)`). Derived from the system clock — a filename +/// disambiguator only, never a security or collision-critical id. +fn short_uuid() -> String { + let nanos = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .map(|d| d.as_nanos()) + .unwrap_or(0); + format!("{:08x}", (nanos as u64) & 0xffff_ffff) +} + +// MARK: - Timeline compositing for inspect_timeline + +/// Aspect-preserving downscale so the longest edge is at most `longest_edge` +/// (never upscales). 1:1 with upstream `inspectTimeline`'s `fit(_:longestEdge:)`, +/// then even-ized for the encoder. `longest_edge == 0` means no cap. +fn fit_render_size(canvas_w: i32, canvas_h: i32, longest_edge: u32) -> RenderSize { + let cw = canvas_w.max(2) as f64; + let ch = canvas_h.max(2) as f64; + if longest_edge == 0 { + return RenderSize::new(even(cw), even(ch)); + } + let long = cw.max(ch); + let scale = if long > longest_edge as f64 { + longest_edge as f64 / long + } else { + 1.0 + }; + RenderSize::new(even(cw * scale), even(ch * scale)) +} + +/// Composite each frame in `frames` at the downscaled render size and JPEG-encode +/// it. A local GPU context is acquired for the batch (export.rs discipline). +/// Frames that fail to render are dropped (upstream `continue`s past a failed +/// `generator.image(at:)`); an all-empty render is an `Err`. +fn composite_frames_jpeg( + timeline: &opentake_domain::Timeline, + manifest: &opentake_domain::MediaManifest, + project_dir: &Option, + frames: &[i32], + max_longest_edge: u32, +) -> Result { + let render_size = fit_render_size(timeline.width, timeline.height, max_longest_edge); + + let text = project_text(timeline); + let (sizes, media) = project_media(manifest, project_dir); + let metrics = ManifestMetrics { sizes }; + let plan = build_render_plan(timeline, render_size, &metrics); + + let dev = + RenderDevice::try_new().map_err(|e| BridgeError::new(format!("no GPU device: {e}")))?; + let compositor = Compositor::new(&dev.device); + let text_rasterizer = CosmicTextRasterizer::new(); + + let mut out_frames: Vec = Vec::with_capacity(frames.len()); + for &f in frames { + let frame_plan = plan.frame(timeline, f); + let mut resolver = InspectResolver { + device: &dev.device, + queue: &dev.queue, + cache: TextureCache::new(TEXTURE_CACHE_CAP), + media: &media, + timeline_fps: plan.fps, + text: &text, + text_rasterizer: &text_rasterizer, + render_box: (render_size.width, render_size.height), + }; + let composite = match compositor.render_to_rgba( + &dev.device, + &dev.queue, + render_size, + &frame_plan, + &mut resolver, + ) { + Ok(c) => c, + Err(_) => continue, // skip an unrenderable frame (upstream parity) + }; + let Some(bytes) = encode_jpeg(&composite) else { + continue; + }; + out_frames.push(InspectedFrame { + frame: f, + bytes, + media_type: "image/jpeg".into(), + }); + } + + if out_frames.is_empty() { + return Err(BridgeError::new("Failed to render timeline frames.")); + } + Ok(InspectResult { + frames: out_frames, + width: render_size.width, + height: render_size.height, + }) +} + +/// JPEG-encode an RGBA composite at [`INSPECT_JPEG_QUALITY`]. `None` on an encode +/// failure so the caller drops the frame (upstream skips a failed encode). +/// +/// JPEG carries no alpha channel and `image`'s `JpegEncoder` only accepts `L8` / +/// `Rgb8`, so the RGBA composite is flattened to RGB first. The compositor clears +/// to opaque black and produces a fully-composited frame, so dropping the (opaque) +/// alpha is lossless for the visible pixels — matching upstream, which composites +/// onto an opaque canvas before `encodeJPEG`. +fn encode_jpeg(frame: &DecodedFrame) -> Option> { + let rgb = rgba_to_rgb(&frame.rgba); + let mut bytes: Vec = Vec::new(); + let mut encoder = + image::codecs::jpeg::JpegEncoder::new_with_quality(&mut bytes, INSPECT_JPEG_QUALITY); + encoder + .encode( + &rgb, + frame.width, + frame.height, + image::ExtendedColorType::Rgb8, + ) + .ok()?; + Some(bytes) +} + +/// Drop the alpha channel from a tightly-packed RGBA buffer, yielding RGB. Used +/// to feed the alpha-less JPEG encoder. +fn rgba_to_rgb(rgba: &[u8]) -> Vec { + let mut rgb = Vec::with_capacity(rgba.len() / 4 * 3); + for px in rgba.chunks_exact(4) { + rgb.extend_from_slice(&px[..3]); + } + rgb +} + +/// Resolvable info for one media asset, projected from the manifest. +struct MediaInfo { + path: PathBuf, +} + +/// A text clip projected from the timeline, keyed by clip id. +struct TextInfo { + content: String, + style: TextStyle, + box_norm: (f64, f64, f64, f64), +} + +/// `SourceMetrics` backed by the media manifest (intrinsic size only). +struct ManifestMetrics { + sizes: HashMap, +} + +impl SourceMetrics for ManifestMetrics { + fn natural_size(&self, media_ref: &str) -> Option<(u32, u32)> { + self.sizes.get(media_ref).copied() + } +} + +/// `TextureResolver` that decodes a layer's pixels on demand via ffmpeg and +/// uploads them to the GPU. Mirrors the preview / export resolvers; the decode box +/// is the downscaled inspect render size. Lottie is skipped (returns `None`). +struct InspectResolver<'d> { + device: &'d opentake_render::wgpu::Device, + queue: &'d opentake_render::wgpu::Queue, + cache: TextureCache, + media: &'d HashMap, + timeline_fps: i32, + text: &'d HashMap, + text_rasterizer: &'d CosmicTextRasterizer, + render_box: (u32, u32), +} + +impl InspectResolver<'_> { + fn resolve_text(&mut self, clip_id: &str) -> Option> { + let key = format!("t:{clip_id}"); + if let Some(tex) = self.cache.get(&key) { + return Some(tex); + } + let info = self.text.get(clip_id)?; + let req = TextRasterRequest { + clip_id, + content: &info.content, + style: &info.style, + box_norm: info.box_norm, + canvas: self.render_box, + }; + let frame = self.text_rasterizer.rasterize(&req)?; + let tex = upload_rgba(self.device, self.queue, &frame, false, Some("inspect-text")); + Some(self.cache.insert(key, tex)) + } +} + +impl TextureResolver for InspectResolver<'_> { + fn resolve(&mut self, source: &TextureSource, source_frame: i64) -> Option> { + let (media_ref, key, is_image) = match source { + TextureSource::Decoded { media_ref } => { + (media_ref, format!("v:{media_ref}:{source_frame}"), false) + } + TextureSource::Image { media_ref } => (media_ref, format!("i:{media_ref}"), true), + TextureSource::Text { clip_id } => return self.resolve_text(clip_id), + TextureSource::Lottie { .. } => return None, + }; + + if let Some(tex) = self.cache.get(&key) { + return Some(tex); + } + + let info = self.media.get(media_ref)?; + let time_secs = if is_image { + 0.0 + } else { + project_frame_time_secs(source_frame, self.timeline_fps) + }; + + let req = FrameRequest { + time_secs, + max_size: self.render_box, + // Tight tolerance keeps each inspected frame on the exact target time + // (quality over the scrub-oriented wide tolerance the preview uses). + tolerance_secs: 0.0, + apply_rotation: true, + }; + let (_actual, frame) = decode_frame_at(&info.path, &req).ok()?; + let decoded = DecodedFrame::new(frame.width, frame.height, frame.rgba, false); + let tex = upload_rgba( + self.device, + self.queue, + &decoded, + false, + Some("inspect-src"), + ); + Some(self.cache.insert(key, tex)) + } +} + +/// Project the timeline's text clips (content + style + box) into the per-clip +/// lookup the resolver rasterizes from. Keyed by clip id. +fn project_text(timeline: &opentake_domain::Timeline) -> HashMap { + let mut text: HashMap = HashMap::new(); + for track in &timeline.tracks { + for clip in &track.clips { + if clip.media_type != ClipType::Text { + continue; + } + let (Some(content), Some(style)) = (&clip.text_content, &clip.text_style) else { + continue; + }; + let tl = clip.transform.top_left(); + text.insert( + clip.id.clone(), + TextInfo { + content: content.clone(), + style: style.clone(), + box_norm: (tl.x, tl.y, clip.transform.width, clip.transform.height), + }, + ); + } + } + text +} + +/// Project the media manifest into the render-side `(sizes, media)` lookups, +/// resolving project-relative paths against `project_dir`. +fn project_media( + manifest: &opentake_domain::MediaManifest, + project_dir: &Option, +) -> (HashMap, HashMap) { + let mut sizes: HashMap = HashMap::new(); + let mut media: HashMap = HashMap::new(); + for entry in &manifest.entries { + let path = match &entry.source { + MediaSource::External { absolute_path } => PathBuf::from(absolute_path), + MediaSource::Project { relative_path } => match project_dir { + Some(base) => base.join(relative_path), + None => continue, + }, + }; + if let (Some(w), Some(h)) = (entry.source_width, entry.source_height) { + if w > 0 && h > 0 { + sizes.insert(entry.id.clone(), (w as u32, h as u32)); + } + } + media.insert(entry.id.clone(), MediaInfo { path }); + } + (sizes, media) +} + +fn project_frame_time_secs(source_frame: i64, timeline_fps: i32) -> f64 { + let fps = if timeline_fps > 0 { + timeline_fps as f64 + } else { + 30.0 + }; + (source_frame.max(0) as f64) / fps +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn fit_render_size_downscales_to_longest_edge_keeping_aspect() { + // 1920x1080, cap 512 → scale 512/1920 → 512x288 (even-ized). + let rs = fit_render_size(1920, 1080, 512); + assert_eq!(rs, RenderSize::new(512, 288)); + } + + #[test] + fn fit_render_size_never_upscales_under_cap() { + let rs = fit_render_size(320, 240, 512); + assert_eq!(rs, RenderSize::new(320, 240)); + } + + #[test] + fn fit_render_size_no_cap_just_evenizes() { + let rs = fit_render_size(1921, 1081, 0); + assert_eq!(rs, RenderSize::new(1920, 1080)); + } + + #[test] + fn clip_type_name_is_lowercase_raw_value() { + assert_eq!(clip_type_name(ClipType::Video), "video"); + assert_eq!(clip_type_name(ClipType::Audio), "audio"); + assert_eq!(clip_type_name(ClipType::Image), "image"); + } + + #[test] + fn short_uuid_is_eight_hex_chars() { + let s = short_uuid(); + assert_eq!(s.len(), 8); + assert!(s.chars().all(|c| c.is_ascii_hexdigit())); + } + + #[test] + fn rgba_to_rgb_drops_alpha_channel() { + // Two pixels: (1,2,3,255), (4,5,6,128) → RGB only. + let rgba = vec![1, 2, 3, 255, 4, 5, 6, 128]; + assert_eq!(rgba_to_rgb(&rgba), vec![1, 2, 3, 4, 5, 6]); + } + + #[test] + fn encode_jpeg_produces_jpeg_soi_marker() { + // 16x16 opaque RGBA composite → a valid JPEG (alpha flattened to RGB). + let frame = DecodedFrame::new(16, 16, vec![255u8; 16 * 16 * 4], false); + let bytes = encode_jpeg(&frame).expect("jpeg encodes"); + // JPEG files start with the SOI marker 0xFFD8. + assert_eq!(&bytes[..2], &[0xff, 0xd8]); + } + + #[test] + fn url_import_returns_structured_not_supported_error() { + // The bridge fully wires path + bytes; url reports a clear fallback message + // (path/bytes) rather than silently failing. Constructed without a GPU/core + // so this stays a pure unit assertion on the url arm. + let bridge = TauriMediaBridge::new( + AppCore::new(), + std::env::temp_dir().join("inspect-cache"), + std::env::temp_dir().join("inspect-models"), + ); + let err = bridge + .import_media( + ImportSource::Url { + url: "https://example.com/a.mp4".into(), + mime_type: None, + }, + None, + None, + ) + .unwrap_err(); + assert!(err.message.contains("not yet supported"), "{}", err.message); + assert!(err.message.contains("source.path"), "{}", err.message); + } + + #[test] + fn bytes_import_without_open_project_errors_after_valid_decode() { + // A fresh AppCore has no project dir; a valid base64 png payload with a + // known mime still can't be written (no bundle) — matches upstream's + // "No project is open" guard, and proves the mime + base64 checks passed. + let bridge = TauriMediaBridge::new( + AppCore::new(), + std::env::temp_dir().join("inspect-cache"), + std::env::temp_dir().join("inspect-models"), + ); + let b64 = base64::engine::general_purpose::STANDARD.encode([1u8, 2, 3, 4]); + let err = bridge + .import_from_bytes(&b64, "image/png", None, None) + .unwrap_err(); + assert!( + err.message.contains("No project is open"), + "{}", + err.message + ); + } + + #[test] + fn bytes_import_rejects_unknown_mime() { + let bridge = TauriMediaBridge::new( + AppCore::new(), + std::env::temp_dir().join("inspect-cache"), + std::env::temp_dir().join("inspect-models"), + ); + let err = bridge + .import_from_bytes("AAAA", "application/zip", None, None) + .unwrap_err(); + assert!( + err.message.contains("Unsupported mimeType"), + "{}", + err.message + ); + } + + // MARK: - ffmpeg + GPU gated end-to-end (mirrors the export integration skip + // discipline: auto-skip when ffmpeg is off PATH or no GPU adapter is present). + + use std::process::Command; + + /// True when ffmpeg is on PATH (fixture generation). + fn ffmpeg_ready() -> bool { + Command::new("ffmpeg") + .arg("-version") + .output() + .map(|o| o.status.success()) + .unwrap_or(false) + } + + /// Generate an `frames`-frame test video at `path`. Returns false (→ skip). + fn make_video(path: &Path, w: u32, h: u32, fps: u32, frames: u32) -> bool { + let dur = frames as f64 / fps as f64; + Command::new("ffmpeg") + .args([ + "-v", + "error", + "-f", + "lavfi", + "-i", + &format!("testsrc=duration={dur}:size={w}x{h}:rate={fps}"), + "-c:v", + "libx264", + "-pix_fmt", + "yuv420p", + "-y", + ]) + .arg(path) + .status() + .map(|s| s.success()) + .unwrap_or(false) + } + + fn external_entry( + id: &str, + path: &Path, + w: i32, + h: i32, + ) -> opentake_domain::MediaManifestEntry { + opentake_domain::MediaManifestEntry { + id: id.into(), + name: id.into(), + kind: ClipType::Video, + source: MediaSource::External { + absolute_path: path.to_string_lossy().into_owned(), + }, + duration: 2.0, + generation_input: None, + source_width: Some(w), + source_height: Some(h), + source_fps: Some(30.0), + has_audio: Some(false), + folder_id: None, + cached_remote_url: None, + cached_remote_url_expires_at: None, + } + } + + #[test] + fn inspect_timeline_composites_real_frames_when_gpu_available() { + if !ffmpeg_ready() { + eprintln!("skip: ffmpeg not available"); + return; + } + // A GPU adapter may be unavailable in CI/headless — skip, don't fail + // (same policy as the export integration test). + if opentake_render::RenderDevice::try_new().is_err() { + eprintln!("skip: no GPU adapter available"); + return; + } + + let tmp = tempfile::tempdir().unwrap(); + let video = tmp.path().join("clip.mp4"); + if !make_video(&video, 320, 240, 30, 30) { + eprintln!("skip: could not generate fixture media"); + return; + } + + // A 30-frame timeline over the fixture clip. + let mut timeline = opentake_domain::Timeline::new(); + timeline.width = 320; + timeline.height = 240; + timeline.fps = 30; + let mut track = opentake_domain::Track::new("track-1", ClipType::Video); + track + .clips + .push(opentake_domain::Clip::new("clip-1", "asset-1", 0, 30)); + timeline.tracks.push(track); + let mut manifest = opentake_domain::MediaManifest::new(); + manifest + .entries + .push(external_entry("asset-1", &video, 320, 240)); + + // Sample 3 frames across [0, 30) at the 512px cap. + let res = composite_frames_jpeg(&timeline, &manifest, &None, &[0, 10, 20], 512) + .expect("composite should succeed with a GPU + fixture"); + assert_eq!(res.frames.len(), 3); + // 320x240 is already under 512 → unscaled. + assert_eq!((res.width, res.height), (320, 240)); + for f in &res.frames { + assert_eq!(f.media_type, "image/jpeg"); + assert_eq!(&f.bytes[..2], &[0xff, 0xd8], "each frame is a JPEG"); + } + assert_eq!( + res.frames.iter().map(|f| f.frame).collect::>(), + vec![0, 10, 20] + ); + } + + #[test] + fn import_from_path_single_file_registers_asset_end_to_end() { + if !ffmpeg_ready() { + eprintln!("skip: ffmpeg not available"); + return; + } + let tmp = tempfile::tempdir().unwrap(); + let video = tmp.path().join("My Clip.mp4"); + if !make_video(&video, 160, 120, 30, 15) { + eprintln!("skip: could not generate fixture media"); + return; + } + // A single-file path import references the file in place (no saved bundle + // needed), through the same `import_one` the media panel uses. + let bridge = TauriMediaBridge::new( + AppCore::new(), + tmp.path().join("cache"), + tmp.path().join("models"), + ); + let out = bridge + .import_from_path(&video.to_string_lossy(), None, None) + .expect("single-file path import"); + assert!(out.message.contains("from path"), "{}", out.message); + assert!(out.message.contains("type: video"), "{}", out.message); + // The asset is now in the shared core's manifest, named by its stem. + let manifest = bridge.core.media(); + assert_eq!(manifest.entries.len(), 1); + assert_eq!(manifest.entries[0].name, "My Clip"); + assert_eq!(manifest.entries[0].kind, ClipType::Video); + } + + #[test] + fn import_from_path_missing_file_errors() { + let bridge = TauriMediaBridge::new( + AppCore::new(), + std::env::temp_dir().join("cache"), + std::env::temp_dir().join("models"), + ); + let err = bridge + .import_from_path("/no/such/file.mp4", None, None) + .unwrap_err(); + assert!(err.message.contains("File not found"), "{}", err.message); + } + + #[test] + fn import_from_path_unsupported_extension_errors() { + let tmp = tempfile::tempdir().unwrap(); + let doc = tmp.path().join("notes.txt"); + std::fs::write(&doc, b"x").unwrap(); + let bridge = TauriMediaBridge::new( + AppCore::new(), + tmp.path().join("cache"), + tmp.path().join("models"), + ); + let err = bridge + .import_from_path(&doc.to_string_lossy(), None, None) + .unwrap_err(); + assert!( + err.message.contains("Unsupported file extension"), + "{}", + err.message + ); + } +} diff --git a/src-tauri/src/media.rs b/src-tauri/src/media.rs index e0ed53a..1b587ed 100644 --- a/src-tauri/src/media.rs +++ b/src-tauri/src/media.rs @@ -571,6 +571,32 @@ fn display_file_name(path: &Path) -> String { .unwrap_or_default() } +/// Map a MIME type to the file extension the imported asset is written with. +/// 1:1 port of upstream `ToolExecutor+Import.fileExtension(forMime:)` — the +/// accepted set the agent's `import_media` (bytes / url override) validates +/// against. `json`/Lottie is intentionally excluded from the import white-list +/// downstream, but the mapping is kept for parity with upstream's table. +pub(crate) fn file_extension_for_mime(mime: &str) -> Option<&'static str> { + match mime.to_ascii_lowercase().as_str() { + "video/mp4" | "video/mpeg4" => Some("mp4"), + "video/quicktime" => Some("mov"), + "audio/mpeg" | "audio/mp3" => Some("mp3"), + "audio/wav" | "audio/x-wav" | "audio/wave" => Some("wav"), + "audio/aac" => Some("aac"), + "audio/mp4" | "audio/m4a" | "audio/x-m4a" => Some("m4a"), + "image/png" => Some("png"), + "image/jpeg" | "image/jpg" => Some("jpg"), + "image/tiff" => Some("tiff"), + "image/heic" | "image/heif" => Some("heic"), + _ => None, + } +} + +/// The accepted-MIME error line upstream raises for an unsupported `mimeType` +/// (`ToolExecutor+Import`). Centralized so bytes / url imports share the wording. +pub(crate) const IMPORT_ACCEPTED_MIMES: &str = + "Accepted: video/mp4, video/quicktime, audio/mpeg, audio/wav, audio/aac, audio/mp4, image/png, image/jpeg, image/tiff, image/heic."; + /// Import one file into the core, probing it first. Returns the created entry, or /// `None` when the extension is not importable (the file is skipped, not an /// error — matches upstream's per-file tolerance during folder/batch import). @@ -583,7 +609,11 @@ fn display_file_name(path: &Path) -> String { /// [`MediaItemDto`] already carries a `thumbnail` path. A decode failure is /// swallowed (the card falls back to a type placeholder, exactly as today) and /// never turns an import into an error. -fn import_one(core: &AppCore, engine: &MediaEngine, path: &Path) -> Option { +pub(crate) fn import_one( + core: &AppCore, + engine: &MediaEngine, + path: &Path, +) -> Option { importable_clip_type(path)?; let probe = probe_media(engine, path); // `import_media_file` re-validates the extension; the type check above only @@ -658,7 +688,7 @@ pub fn import_folder( /// recurse into subdirectories. Hidden entries (dot-prefixed) are skipped. Names /// of non-importable visible files are appended to `skipped` so the caller can /// toast them. -fn mirror_dir( +pub(crate) fn mirror_dir( core: &AppCore, engine: &MediaEngine, dir: &Path,