diff --git a/agent_core/__init__.py b/agent_core/__init__.py index 1d907f95..256dfd4b 100644 --- a/agent_core/__init__.py +++ b/agent_core/__init__.py @@ -31,6 +31,7 @@ ) from agent_core.core.embedding_interface import EmbeddingInterface from agent_core.core.vlm_interface import VLMInterface +from agent_core.core.image_gen_interface import ImageGenInterface from agent_core.core.database_interface import DatabaseInterface from agent_core.core.trigger import Trigger from agent_core.core.task import Task, TodoItem, TodoStatus @@ -272,6 +273,7 @@ # Interfaces "EmbeddingInterface", "VLMInterface", + "ImageGenInterface", "DatabaseInterface", "GeminiClient", "GeminiAPIError", diff --git a/agent_core/core/credentials/embedded_credentials.py b/agent_core/core/credentials/embedded_credentials.py index e6718dfc..a8309d41 100644 --- a/agent_core/core/credentials/embedded_credentials.py +++ b/agent_core/core/credentials/embedded_credentials.py @@ -55,6 +55,16 @@ "api_id": ["MzQyNDc4MTc="], "api_hash": ["N2Q5ZjkzN2ZkNzAzYTI0NTkyMDQzNGM2YjU5MDE4OGE="], }, + "hubspot": { + "client_id": [ + "YmM5ZGM4MzctMGJjMC00Y2Y3L", + "WFmY2EtYWVjYTY4YWNiMjhm", + ], + "client_secret": [ + "MzI5MzM4ZDktNzVkYi00YTg4L", + "Tg2ZmQtMDZhNzI2N2FjNmRk", + ], + }, } diff --git a/agent_core/core/image_gen_interface.py b/agent_core/core/image_gen_interface.py new file mode 100644 index 00000000..ea9a4fff --- /dev/null +++ b/agent_core/core/image_gen_interface.py @@ -0,0 +1,12 @@ +# -*- coding: utf-8 -*- +""" +Image generation interface. + +Re-exports ImageGenInterface from the impl module for backward compatibility. +Use the runtime-specific wrapper (app/image_gen_interface.py) when running +inside CraftBot — it injects the appropriate state and usage hooks. +""" + +from agent_core.core.impl.image_gen import ImageGenInterface + +__all__ = ["ImageGenInterface"] diff --git a/agent_core/core/impl/action/router.py b/agent_core/core/impl/action/router.py index 437b19a1..6961a217 100644 --- a/agent_core/core/impl/action/router.py +++ b/agent_core/core/impl/action/router.py @@ -1238,10 +1238,45 @@ def _validate_parallel_actions( # Cap at 10 actions actions = actions[:10] + dropped_actions = [] + + # A message that waits for a user reply keeps the task parked until the + # user responds — so ending the task in the same batch is contradictory. + # task_end tears down the session, which means the user's reply can never + # be routed back to the waiting task (it gets orphaned into a new session). + # Resolve the conflict in favour of waiting: drop task_end, keep the task + # alive. The agent should end the task only AFTER the user replies. + def _wants_reply(action_dict: Dict[str, Any]) -> bool: + v = (action_dict.get("parameters") or {}).get("wait_for_user_reply") + if isinstance(v, str): + return v.strip().lower() == "true" + return bool(v) + + waits_for_reply = any(_wants_reply(a) for a in actions) + if waits_for_reply and any(a.get("action_name") == "task_end" for a in actions): + kept = [] + for action_dict in actions: + if action_dict.get("action_name") == "task_end": + dropped_action = action_dict.copy() + dropped_action["_error"] = ( + "Action dropped: cannot end the task in the same step as a " + "message with wait_for_user_reply=true. The task must stay " + "active to receive the user's reply — call task_end only " + "after the user has responded." + ) + dropped_actions.append(dropped_action) + logger.warning( + "[PARALLEL] Dropping task_end paired with " + "wait_for_user_reply=true — keeping task parked so the " + "user's reply can be routed back to it." + ) + else: + kept.append(action_dict) + actions = kept + # Check for non-parallelizable actions by looking up each action's parallelizable attribute # If found, we need to keep the non-parallelizable action (not just the first action) non_parallel_action = None - dropped_actions = [] for action_dict in actions: action_name = action_dict.get("action_name", "") if action_name: diff --git a/agent_core/core/impl/context/engine.py b/agent_core/core/impl/context/engine.py index a0dac5f6..46962c55 100644 --- a/agent_core/core/impl/context/engine.py +++ b/agent_core/core/impl/context/engine.py @@ -184,6 +184,7 @@ def create_system_policy(self) -> str: def create_system_environmental_context(self) -> str: """Create a system message block with environmental context.""" import platform + from datetime import datetime try: from app.config import AGENT_WORKSPACE_ROOT @@ -191,7 +192,10 @@ def create_system_environmental_context(self) -> str: AGENT_WORKSPACE_ROOT = "." local_timezone = get_localzone() + now = datetime.now(local_timezone) + current_datetime = now.strftime("%Y-%m-%d %H:%M:%S") + f" ({local_timezone})" return ENVIRONMENTAL_CONTEXT_PROMPT.format( + current_datetime=current_datetime, user_location=local_timezone, working_directory=AGENT_WORKSPACE_ROOT, operating_system=platform.system(), diff --git a/agent_core/core/impl/image_gen/__init__.py b/agent_core/core/impl/image_gen/__init__.py new file mode 100644 index 00000000..0cd06833 --- /dev/null +++ b/agent_core/core/impl/image_gen/__init__.py @@ -0,0 +1,6 @@ +# -*- coding: utf-8 -*- +"""Image generation interface package.""" + +from agent_core.core.impl.image_gen.interface import ImageGenInterface + +__all__ = ["ImageGenInterface"] diff --git a/agent_core/core/impl/image_gen/interface.py b/agent_core/core/impl/image_gen/interface.py new file mode 100644 index 00000000..9db38b6f --- /dev/null +++ b/agent_core/core/impl/image_gen/interface.py @@ -0,0 +1,555 @@ +# -*- coding: utf-8 -*- +""" +Image generation interface for agent_core. + +Supports OpenAI (gpt-image-2) and Gemini (gemini-*-image-preview) providers. +Provider logic lives here; the action (generate_image.py) just delegates. + +Mirrors VLMInterface structure — constructor, hooks, dispatch. Unlike VLM there +is intentionally no in-place reinitialize(): provider switches build a fresh +instance via agent_base.reinitialize_image_gen() and swap it in only on success, +so an in-flight generate_image() keeps using its old instance/client until done. +""" + +from __future__ import annotations + +import asyncio +import base64 +import io +import os +import tempfile +import urllib.request as _urllib_request +from datetime import datetime +from typing import Any, Dict, List, Optional + +from agent_core.core.hooks import ( + GetTokenCountHook, + ReportUsageHook, + SetTokenCountHook, + UsageEventData, +) +from agent_core.utils.logger import logger + +try: + from PIL import Image as _PilImage # type: ignore[import] +except ImportError: + _PilImage = None # type: ignore[assignment] + +# OpenAI supports only three canvas sizes. Document the closest-fit mapping so +# callers understand the constraint at a glance. +# +# True 16:9 (1920×1080) and 9:16 are not available. The values below use the +# widest/tallest canvases OpenAI exposes; warn at runtime when the mismatch +# matters (resolution ≥ 2K or aspect ratio 16:9/9:16). +_OPENAI_ASPECT_MAP: Dict[str, str] = { + "1:1": "1024x1024", + "3:4": "1024x1536", + "4:3": "1536x1024", + "9:16": "1024x1536", # nearest fit — true 9:16 not supported + "16:9": "1536x1024", # nearest fit — true 16:9 not supported +} +_OPENAI_INEXACT_RATIOS = {"16:9", "9:16"} + +_OPENAI_QUALITY_MAP: Dict[str, str] = { + "1K": "medium", + "2K": "high", + "4K": "high", # API tops out at 1536px; warn caller +} + +# ── Error message catalog (provider-keyed, English) ────────────────────────── +# Used to build human-readable RuntimeErrors that flow back through the +# action-selection loop, matching VLMInterface's raise-don't-return pattern. +_ERR: Dict[str, Dict[str, str]] = { + "openai": { + "quota": "OpenAI API rate limit or quota exceeded", + "invalid_key": "Invalid OpenAI API key — verify your key in settings.", + "content_policy": "Request blocked by OpenAI content policy — modify your prompt.", + "model_not_found": ( + "OpenAI model not available — ensure your account has access to gpt-image-2." + ), + "generic": "OpenAI image generation failed", + }, + "gemini": { + "quota": "Gemini API rate limit or quota exceeded", + "invalid_key": "Invalid Gemini API key — verify your Google API key in settings.", + "content_policy": "Request blocked by Gemini safety filters — modify your prompt.", + "model_not_found": ( + "Gemini model not available — ensure your account has access to the " + "image generation preview model." + ), + "generic": "Gemini image generation failed", + }, +} + + +def _classify_error(provider: str, exc: Exception) -> str: + """Map a raw exception message to a catalog entry for the given provider.""" + msg = str(exc).lower() + catalog = _ERR.get(provider, _ERR["openai"]) + if ( + "quota" in msg + or "rate" in msg + or "billing" in msg + or "insufficient_quota" in msg + ): + return catalog["quota"] + if "invalid" in msg and "key" in msg: + return catalog["invalid_key"] + if "content_policy" in msg or "safety" in msg or "blocked" in msg: + return catalog["content_policy"] + if "not found" in msg or "404" in msg or "not available" in msg: + return catalog["model_not_found"] + # Do NOT include the raw exception — SDK error messages can contain API key fragments. + return catalog["generic"] + + +# ── File-path helpers ───────────────────────────────────────────────────────── + + +def _build_save_path( + output_path: str, + timestamp: str, + index: int, + total: int, +) -> str: + if output_path: + if total > 1: + base, ext = os.path.splitext(output_path) + ext = ext or ".png" + return f"{base}_{index + 1}{ext}" + save_path = output_path + if not os.path.splitext(save_path)[1]: + save_path += ".png" + return save_path + return os.path.join( + tempfile.gettempdir(), f"generated_image_{timestamp}_{index + 1}.png" + ) + + +def _save_pil_image(pil_image: Any, save_path: str) -> None: + parent = os.path.dirname(os.path.abspath(save_path)) + if parent: + os.makedirs(parent, exist_ok=True) + pil_image.save(save_path, "PNG") + + +def _to_pil_image(img_data: Any) -> Any: + """Convert raw image data (bytes or base64 str) to a PIL Image.""" + if isinstance(img_data, str): + return _PilImage.open(io.BytesIO(base64.b64decode(img_data))) + if isinstance(img_data, bytes): + return _PilImage.open(io.BytesIO(img_data)) + return img_data + + +# ── Main interface ──────────────────────────────────────────────────────────── + + +class ImageGenInterface: + """Image generation interface with multi-provider support. + + Supports OpenAI (gpt-image-2) and Gemini image generation models. + Uses hooks for state access and usage reporting, mirroring VLMInterface. + + Args: + provider: Provider name ("openai", "gemini"). + model: Model name override (None = use registry default). + api_key: API key (required for openai/gemini). + base_url: Base URL override (unused by image providers currently). + deferred: If True, allow deferred initialization without raising. + get_token_count: Hook to read current token count from state. + set_token_count: Hook to write token count to state. + report_usage: Optional hook to report usage for cost tracking. + """ + + def __init__( + self, + *, + provider: Optional[str] = None, + model: Optional[str] = None, + api_key: Optional[str] = None, + base_url: Optional[str] = None, + deferred: bool = False, + get_token_count: Optional[GetTokenCountHook] = None, + set_token_count: Optional[SetTokenCountHook] = None, + report_usage: Optional[ReportUsageHook] = None, + ) -> None: + self.provider = provider + self._initialized = False + self._deferred = deferred + self._init_api_key = api_key + self._init_base_url = base_url + + self._get_token_count = get_token_count or (lambda: 0) + self._set_token_count = set_token_count or (lambda x: None) + self._report_usage = report_usage + + # Defer import to avoid circular dependency (same pattern as VLMInterface) + from app.models.factory import ModelFactory + from app.models.types import InterfaceType + + ctx = ModelFactory.create( + provider=provider, + interface=InterfaceType.IMAGE_GEN, + model_override=model, + api_key=api_key, + base_url=base_url, + deferred=deferred, + ) + self.provider = ctx["provider"] + self.model = ctx["model"] + self.client = ctx["client"] # OpenAI client or None + self._gemini_client = ctx["gemini_client"] + self._initialized = ctx.get("initialized", False) + try: + self._main_loop: Optional[asyncio.AbstractEventLoop] = ( + asyncio.get_event_loop() + ) + except RuntimeError: + self._main_loop = None + + @property + def is_initialized(self) -> bool: + return self._initialized + + def _report_usage_async( + self, + *, + provider: str, + model: str, + input_tokens: int, + output_tokens: int, + cached_tokens: int = 0, + ) -> None: + """Report image-generation usage if the hook is set (mirrors VLM). + + Best-effort: scheduling onto the running loop can fail when invoked + from a worker thread without one, in which case the usage is dropped + with a warning rather than breaking generation. + """ + if not self._report_usage: + return + try: + event = UsageEventData( + service_type="image_gen", + provider=provider, + model=model, + input_tokens=input_tokens, + output_tokens=output_tokens, + cached_tokens=cached_tokens, + ) + if self._main_loop is not None: + self._main_loop.call_soon_threadsafe( + lambda: asyncio.create_task(self._report_usage(event)) + ) + except Exception as e: + logger.warning(f"[IMAGE_GEN] Failed to report usage: {e}") + + # ─────────────────────────── Public API ────────────────────────────────── + + def generate_image( + self, + prompt: str, + resolution: str = "1K", + aspect_ratio: str = "1:1", + number_of_images: int = 1, + output_path: str = "", + negative_prompt: str = "", + reference_images: Optional[List[str]] = None, + safety_filter_level: str = "block_medium_and_above", + ) -> List[str]: + """Generate image(s) from a text prompt. + + Args: + prompt: Text description of the image to generate. + resolution: "1K", "2K", or "4K". + aspect_ratio: "1:1", "3:4", "4:3", "9:16", or "16:9". + number_of_images: Number of images to generate (1–4). + output_path: Absolute path for the output file. Timestamped temp + path used when empty. For multiple images the index is appended + before the extension. + negative_prompt: Elements to avoid (Gemini native; appended to + prompt for OpenAI which has no dedicated parameter). + reference_images: List of absolute paths to reference images. + **Provider semantics differ**: Gemini treats these as style + guidance; OpenAI's images.edit treats them as compositional / + mask inputs. Warn users accordingly. + safety_filter_level: Gemini safety threshold + ("block_none", "block_only_high", "block_medium_and_above", + "block_low_and_above"). Ignored by OpenAI. + + Returns: + List of absolute file paths to the generated PNG files. + + Raises: + RuntimeError: If the provider is unsupported or generation fails. + """ + if _PilImage is None: + raise RuntimeError( + "Pillow is required for image generation. " + "Install with: pip install Pillow" + ) + + if not prompt: + raise ValueError("prompt is required") + + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + ref = reference_images or [] + + if self.provider == "openai": + paths = self._openai_generate( + prompt=prompt, + resolution=resolution, + aspect_ratio=aspect_ratio, + number_of_images=number_of_images, + output_path=output_path, + negative_prompt=negative_prompt, + reference_images=ref, + timestamp=timestamp, + ) + elif self.provider == "gemini": + paths = self._gemini_generate( + prompt=prompt, + resolution=resolution, + aspect_ratio=aspect_ratio, + number_of_images=number_of_images, + output_path=output_path, + negative_prompt=negative_prompt, + reference_images=ref, + safety_filter_level=safety_filter_level, + timestamp=timestamp, + ) + else: + raise RuntimeError( + f"Provider '{self.provider}' does not support image generation. " + "Set image_gen_provider to 'openai' or 'gemini' in settings.json." + ) + + logger.info( + f"[IMAGE_GEN] Generated {len(paths)} image(s) via {self.provider} " + f"(model={self.model})" + ) + return paths + + async def generate_image_async(self, **kwargs) -> List[str]: + """Async wrapper — runs generate_image in a thread pool.""" + loop = asyncio.get_event_loop() + return await loop.run_in_executor(None, lambda: self.generate_image(**kwargs)) + + # ──────────────────────── OpenAI provider ──────────────────────────────── + + def _openai_generate( + self, + *, + prompt: str, + resolution: str, + aspect_ratio: str, + number_of_images: int, + output_path: str, + negative_prompt: str, + reference_images: List[str], + timestamp: str, + ) -> List[str]: + size = _OPENAI_ASPECT_MAP.get(aspect_ratio, "1024x1024") + if aspect_ratio in _OPENAI_INEXACT_RATIOS: + logger.warning( + f"[IMAGE_GEN] OpenAI does not support true {aspect_ratio}. " + f"Using closest available canvas {size} (~3:2 / ~2:3)." + ) + + quality = _OPENAI_QUALITY_MAP.get(resolution, "medium") + if resolution == "4K": + logger.warning( + "[IMAGE_GEN] OpenAI image generation tops out at 1536px; " + "'4K' is mapped to quality='high' (no true 4K output)." + ) + + full_prompt = prompt + if negative_prompt: + full_prompt += f"\n\nAvoid: {negative_prompt}" + + if reference_images: + logger.warning( + "[IMAGE_GEN] OpenAI reference_images uses images.edit(), which treats " + "inputs as compositional/mask data — not style guidance as in Gemini. " + "Output may differ significantly from the Gemini provider." + ) + + try: + valid_refs = [p for p in reference_images if os.path.isfile(p)] + if valid_refs: + # Open progressively into a list so a mid-loop failure still + # closes the handles we already opened. + image_files: List[Any] = [] + try: + for p in valid_refs: + image_files.append(open(p, "rb")) + response = self.client.images.edit( + model=self.model, + image=image_files, + prompt=full_prompt, + n=number_of_images, + size=size, + quality=quality, + ) + finally: + for f in image_files: + f.close() + else: + response = self.client.images.generate( + model=self.model, + prompt=full_prompt, + n=number_of_images, + size=size, + quality=quality, + ) + except Exception as exc: + raise RuntimeError(_classify_error("openai", exc)) from exc + + usage = getattr(response, "usage", None) + if usage is not None: + self._report_usage_async( + provider="openai", + model=self.model, + input_tokens=getattr(usage, "input_tokens", 0) or 0, + output_tokens=getattr(usage, "output_tokens", 0) or 0, + ) + + images_bytes: List[bytes] = [] + for item in response.data: + if item.b64_json: + images_bytes.append(base64.b64decode(item.b64_json)) + elif item.url: + with _urllib_request.urlopen(item.url, timeout=30) as r: + images_bytes.append(r.read()) + + if not images_bytes: + raise RuntimeError( + "OpenAI returned no image data — try rephrasing your prompt." + ) + + paths: List[str] = [] + for i, raw in enumerate(images_bytes[:number_of_images]): + save_path = _build_save_path(output_path, timestamp, i, len(images_bytes)) + pil_image = _PilImage.open(io.BytesIO(raw)) + _save_pil_image(pil_image, save_path) + paths.append(save_path) + + return paths + + # ──────────────────────── Gemini provider ──────────────────────────────── + + def _gemini_generate( + self, + *, + prompt: str, + resolution: str, + aspect_ratio: str, + number_of_images: int, + output_path: str, + negative_prompt: str, + reference_images: List[str], + safety_filter_level: str, + timestamp: str, + ) -> List[str]: + if not self._gemini_client: + raise RuntimeError( + "Gemini API key is not configured. " + "Set 'image_gen_provider' to 'openai' or add a Google API key in settings." + ) + + # Reference images as (bytes, mime) inline parts (style guidance) + ref_parts: List[Any] = [] + for ref_path in reference_images: + if not os.path.isfile(ref_path): + continue + try: + with open(ref_path, "rb") as f: + data = f.read() + ext = os.path.splitext(ref_path)[1].lower() + mime = { + ".png": "image/png", + ".jpg": "image/jpeg", + ".jpeg": "image/jpeg", + ".gif": "image/gif", + ".webp": "image/webp", + }.get(ext, "image/png") + ref_parts.append((data, mime)) + except Exception as exc: + logger.warning( + f"[IMAGE_GEN] Skipping unreadable reference image '{ref_path}': {exc}" + ) + + gen_prompt = ( + f"Generate an image based on the following description:\n\n{prompt}" + ) + gen_prompt += f"\n\nImage specifications:\n- Resolution: {resolution}\n- Aspect ratio: {aspect_ratio}\n- Number of variations: {number_of_images}" + if negative_prompt: + gen_prompt += f"\n- Avoid: {negative_prompt}" + + _threshold_map = { + "block_only_high": "BLOCK_ONLY_HIGH", + "block_medium_and_above": "BLOCK_MEDIUM_AND_ABOVE", + "block_low_and_above": "BLOCK_LOW_AND_ABOVE", + } + safety_settings = None + if safety_filter_level != "block_none": + threshold = _threshold_map.get( + safety_filter_level, "BLOCK_MEDIUM_AND_ABOVE" + ) + safety_settings = [ + {"category": cat, "threshold": threshold} + for cat in ( + "HARM_CATEGORY_HARASSMENT", + "HARM_CATEGORY_HATE_SPEECH", + "HARM_CATEGORY_SEXUALLY_EXPLICIT", + "HARM_CATEGORY_DANGEROUS_CONTENT", + ) + ] + + try: + # One client for the whole Gemini surface: the shared REST + # GeminiClient handles image generation too (no google-genai SDK — + # see GeminiClient's module docstring for why it avoids the SDK). + result = self._gemini_client.generate_image( + self.model, + prompt=gen_prompt, + reference_images=ref_parts, + image_size=resolution, + safety_settings=safety_settings, + ) + except Exception as exc: + raise RuntimeError(_classify_error("gemini", exc)) from exc + + usage_md = result.get("usage_metadata") or {} + if usage_md: + self._report_usage_async( + provider="gemini", + model=self.model, + input_tokens=usage_md.get("promptTokenCount", 0) or 0, + output_tokens=usage_md.get("candidatesTokenCount", 0) or 0, + cached_tokens=usage_md.get("cachedContentTokenCount", 0) or 0, + ) + + images_data = result.get("images") or [] + + if not images_data: + block_reason = result.get("block_reason") + if block_reason: + raise RuntimeError( + f"Gemini blocked the request (safety filter: {block_reason}). " + "Try modifying your prompt or adjusting safety_filter_level." + ) + raise RuntimeError( + "Gemini returned no image data — try rephrasing your prompt or " + "check that your API key has access to image generation." + ) + + paths: List[str] = [] + for i, img_data in enumerate(images_data[:number_of_images]): + save_path = _build_save_path(output_path, timestamp, i, len(images_data)) + pil_image = _to_pil_image(img_data) + _save_pil_image(pil_image, save_path) + paths.append(save_path) + + return paths diff --git a/agent_core/core/impl/task/manager.py b/agent_core/core/impl/task/manager.py index 5407f293..dda31562 100644 --- a/agent_core/core/impl/task/manager.py +++ b/agent_core/core/impl/task/manager.py @@ -62,7 +62,9 @@ # Session persistence hooks OnTaskPersistHook = Callable[["Task"], None] # (task) -OnTaskRemovePersistHook = Callable[[str], None] # (task_id) +OnTaskRemovePersistHook = Callable[ + ["Task"], None +] # (task) — receives full task so the implementation can decide whether to delete (truly remove) or preserve (e.g. for resume) based on terminal status # Chatserver hooks (WCA only) OnTaskCreatedChatserverHook = Callable[[Task], None] @@ -703,13 +705,16 @@ async def _end_task( if self._current_session_id == task.id: self._current_session_id = None - # Remove persisted session data (task + event stream) + # Hand the persisted session data to the consumer-specific hook. + # The hook receives the full task so it can decide between truly + # removing (e.g. WCA cleanup) and preserving (e.g. CraftBot's resume + # window, which writes the final event stream + keeps the rows). if self._on_task_remove_persist: try: - self._on_task_remove_persist(task.id) + self._on_task_remove_persist(task) except Exception as e: logger.warning( - f"[TaskManager] Failed to remove persisted task {task.id}: {e}" + f"[TaskManager] Task persistence finalize failed for {task.id}: {e}" ) # Clean up session-specific state (multi-task isolation) diff --git a/agent_core/core/impl/video_gen/__init__.py b/agent_core/core/impl/video_gen/__init__.py new file mode 100644 index 00000000..3fca283e --- /dev/null +++ b/agent_core/core/impl/video_gen/__init__.py @@ -0,0 +1,6 @@ +# -*- coding: utf-8 -*- +"""Video generation interface package.""" + +from agent_core.core.impl.video_gen.interface import VideoGenInterface + +__all__ = ["VideoGenInterface"] diff --git a/agent_core/core/impl/video_gen/interface.py b/agent_core/core/impl/video_gen/interface.py new file mode 100644 index 00000000..a049eb37 --- /dev/null +++ b/agent_core/core/impl/video_gen/interface.py @@ -0,0 +1,1198 @@ +# -*- coding: utf-8 -*- +""" +Video generation interface for agent_core. + +Supports OpenAI (Sora 2/Pro), Gemini (Veo 3.x), and BytePlus (Seedance). +All three providers expose async/long-running generation, so the public +``generate_video()`` blocks while internally submitting + polling + downloading. + +Mirrors VLMInterface / ImageGenInterface structure — constructor, hooks, +dispatch. Like image gen, there is intentionally no in-place reinitialize(); +provider switches build a fresh instance via agent_base.reinitialize_video_gen() +and swap it in only on success, so in-flight generate_video() calls keep their +old instance/client until done. + +The interface caps internal polling with ``poll_timeout_seconds`` (default +1500s = 25min, covering realistic generation times) but the outer +DEFAULT_ACTION_TIMEOUT (6000s) is the real ceiling. +""" + +from __future__ import annotations + +import asyncio +import base64 +import json +import os +import tempfile +import time +import urllib.request as _urllib_request +from datetime import datetime +from typing import Any, Dict, List, Optional, Tuple + +import requests + +from agent_core.core.hooks import ( + GetTokenCountHook, + ReportUsageHook, + SetTokenCountHook, + UsageEventData, +) +from agent_core.utils.logger import logger + +# Default polling cadence: start tight to catch fast Seedance jobs, back off to +# avoid hammering for slow Veo jobs. +_DEFAULT_POLL_TIMEOUT = 1500 # 25 min — well under DEFAULT_ACTION_TIMEOUT (100 min) +_DEFAULT_POLL_INITIAL = 5 +_DEFAULT_POLL_MAX = 15 + +# OpenAI Sora 2 size mapping. Sora 2 accepts portrait/landscape 720p; Sora 2 Pro +# adds 1024p and 1080p. We pick the nearest-fit canvas for the requested +# aspect_ratio × resolution combination and warn when downgrading. +_SORA_SIZES: Dict[str, Dict[str, str]] = { + "sora-2": { + "16:9": "1280x720", + "9:16": "720x1280", + }, + "sora-2-pro": { + "16:9": "1792x1024", + "9:16": "1024x1792", + }, +} +_SORA_VALID_SECONDS = { + "sora-2": {4, 8, 12}, + "sora-2-pro": {10, 15, 25}, +} + +# Gemini Veo accepts integer durations. 1080p/4k and reference-images require 8. +_VEO_DURATION_VALID = {4, 6, 8} +_VEO_RESOLUTION_VALID = {"720p", "1080p", "4k"} +_VEO_ASPECT_VALID = {"16:9", "9:16"} +_VEO_PERSON_GEN_VALID = {"allow_all", "allow_adult", "dont_allow"} + +# BytePlus Seedance accepts duration as a string flag in the content text. +# Resolution / aspect_ratio / camera_fixed / watermark also go via the +# inline text flags per the public Seedance API. +_SEEDANCE_RESOLUTION_VALID = {"480p", "720p", "1080p"} +_SEEDANCE_ASPECT_VALID = {"16:9", "9:16", "1:1", "4:3", "3:4", "21:9"} + +_AUDIO_CAPABLE_PROVIDERS = {"gemini", "openai", "byteplus"} # all three honor it + + +# ── Error message catalog ──────────────────────────────────────────────────── +_ERR: Dict[str, Dict[str, str]] = { + "openai": { + "quota": "OpenAI API rate limit or quota exceeded", + "invalid_key": "Invalid OpenAI API key — verify your key in settings.", + "content_policy": "Request blocked by OpenAI content policy — modify your prompt.", + "model_not_found": ( + "OpenAI model not available — ensure your account has access to Sora." + ), + "timeout": "OpenAI Sora generation timed out while polling for completion.", + "generic": "OpenAI video generation failed", + }, + "gemini": { + "quota": "Gemini API rate limit or quota exceeded", + "invalid_key": "Invalid Gemini API key — verify your Google API key in settings.", + "content_policy": "Request blocked by Gemini safety filters — modify your prompt.", + "model_not_found": ( + "Gemini model not available — ensure your account has access to a Veo " + "video generation model." + ), + "timeout": "Gemini Veo generation timed out while polling for completion.", + "generic": "Gemini video generation failed", + }, + "byteplus": { + "quota": "BytePlus API rate limit or quota exceeded", + "invalid_key": "Invalid BytePlus API key — verify your key in settings.", + "content_policy": "Request blocked by BytePlus content policy — modify your prompt.", + "model_not_found": ( + "BytePlus model not available — ensure your account has access to a " + "Seedance video model on the configured region." + ), + "timeout": "BytePlus Seedance generation timed out while polling for completion.", + "generic": "BytePlus video generation failed", + }, +} + + +def _extract_api_error(exc: Exception) -> Tuple[Optional[int], str]: + """Pull the HTTP status and the API's actual error message off an exception. + + Returns ``(status_code, api_message)``. Either may be ``None``/``""`` if + the exception isn't a ``requests.HTTPError`` or doesn't carry a JSON body. + + Why this matters: the bare ``str(exc)`` for an HTTPError is + ``"400 Client Error: Bad Request for url: https://...veo-3.1-generate-preview..."``. + Loose substring matching on that URL false-positives on everything from + ``rate`` (inside ``generate``) to ``content`` (inside any ``...content...`` + endpoint). Extracting the structured fields gives correct signal. + """ + status_code: Optional[int] = None + api_message = "" + resp = getattr(exc, "response", None) + if resp is not None: + try: + status_code = int(getattr(resp, "status_code", 0)) or None + except Exception: + status_code = None + try: + body = resp.json() + if isinstance(body, dict): + err = body.get("error") + if isinstance(err, dict): + api_message = str(err.get("message", "")).strip() + elif isinstance(err, str): + api_message = err.strip() + else: + api_message = str(body.get("message", "")).strip() + except Exception: + api_message = "" + return status_code, api_message + + +def _classify_error(provider: str, exc: Exception) -> str: + """Map a raw exception to a catalog entry for the given provider. + + Prefers the structured HTTP status + API error body over heuristic + substring matching on the exception's stringified form (which falsely + matched ``rate`` inside ``generate`` in the previous implementation). + The generic fallback surfaces the API's actual message so future bugs + aren't silently hidden behind a generic placeholder. + """ + catalog = _ERR.get(provider, _ERR["openai"]) + status_code, api_message = _extract_api_error(exc) + + # Prefer the API's own error message; fall back to the exception string. + raw = api_message or str(exc) + msg = raw.lower() + + is_quota = ( + status_code == 429 + or "rate limit" in msg + or "ratelimit" in msg + or "rate_limit" in msg + or "quota" in msg + or "billing" in msg + or "insufficient_quota" in msg + ) + if is_quota: + return catalog["quota"] + + is_auth = ( + status_code in (401, 403) + or "api key" in msg + or "api_key" in msg + or "invalid_api_key" in msg + or "authentication" in msg + or "unauthorized" in msg + ) + if is_auth: + return catalog["invalid_key"] + + is_policy = ( + "content policy" in msg + or "content_policy" in msg + or "safety" in msg + or "blocked" in msg + ) + if is_policy: + return catalog["content_policy"] + + is_not_found = ( + status_code == 404 + or "not found" in msg + or "not available" in msg + or "does not exist" in msg + ) + if is_not_found: + return catalog["model_not_found"] + + if "timeout" in msg or "timed out" in msg: + return catalog["timeout"] + + # Generic fallback — include the API's actual message so misclassified + # 400s like the durationSeconds / numberOfVideos errors surface clearly + # instead of getting swallowed as "generation failed". The API message + # is server-emitted text (no header / URL leakage of key fragments). + base = catalog["generic"] + if api_message: + return f"{base}: {api_message}" + return base + + +# ── File / image helpers ───────────────────────────────────────────────────── + + +def _build_save_path( + output_path: str, + timestamp: str, + index: int, + total: int, + suffix: str = ".mp4", +) -> str: + """Pick a save path for the i-th video out of `total`.""" + if output_path: + if total > 1: + base, ext = os.path.splitext(output_path) + ext = ext or suffix + return f"{base}_{index + 1}{ext}" + save_path = output_path + if not os.path.splitext(save_path)[1]: + save_path += suffix + return save_path + return os.path.join( + tempfile.gettempdir(), f"generated_video_{timestamp}_{index + 1}{suffix}" + ) + + +def _write_bytes(path: str, data: bytes) -> None: + parent = os.path.dirname(os.path.abspath(path)) + if parent: + os.makedirs(parent, exist_ok=True) + with open(path, "wb") as f: + f.write(data) + + +def _read_image_for_upload(image_path: str) -> Tuple[bytes, str]: + """Read an image from disk and return (bytes, mime). Raises if missing.""" + if not os.path.isfile(image_path): + raise FileNotFoundError(f"Reference image not found: {image_path}") + with open(image_path, "rb") as f: + data = f.read() + ext = os.path.splitext(image_path)[1].lower() + mime = { + ".png": "image/png", + ".jpg": "image/jpeg", + ".jpeg": "image/jpeg", + ".gif": "image/gif", + ".webp": "image/webp", + }.get(ext, "image/png") + return data, mime + + +# ── Main interface ─────────────────────────────────────────────────────────── + + +class VideoGenInterface: + """Video generation interface with multi-provider support. + + Supports OpenAI Sora 2/Pro, Gemini Veo 3.x, and BytePlus Seedance. + Uses hooks for state access and usage reporting, mirroring VLMInterface + and ImageGenInterface. + + All providers run as async/long-running operations; ``generate_video()`` + blocks while internally submitting + polling + downloading. + + Args: + provider: Provider name ("openai", "gemini", "byteplus"). + model: Model name override (None = use registry default). + api_key: API key (required for all supported providers). + base_url: Base URL override (used for byteplus REST endpoint). + deferred: If True, allow deferred initialization without raising. + get_token_count: Hook to read current token count from state. + set_token_count: Hook to write token count to state. + report_usage: Optional hook to report usage for cost tracking. + """ + + def __init__( + self, + *, + provider: Optional[str] = None, + model: Optional[str] = None, + api_key: Optional[str] = None, + base_url: Optional[str] = None, + deferred: bool = False, + get_token_count: Optional[GetTokenCountHook] = None, + set_token_count: Optional[SetTokenCountHook] = None, + report_usage: Optional[ReportUsageHook] = None, + ) -> None: + self.provider = provider + self._initialized = False + self._deferred = deferred + self._init_api_key = api_key + self._init_base_url = base_url + + self._get_token_count = get_token_count or (lambda: 0) + self._set_token_count = set_token_count or (lambda x: None) + self._report_usage = report_usage + + # Defer import to avoid circular dependency (same pattern as VLM/ImageGen) + from app.models.factory import ModelFactory + from app.models.types import InterfaceType + + ctx = ModelFactory.create( + provider=provider, + interface=InterfaceType.VIDEO_GEN, + model_override=model, + api_key=api_key, + base_url=base_url, + deferred=deferred, + ) + self.provider = ctx["provider"] + self.model = ctx["model"] + self.client = ctx["client"] # OpenAI client (Sora) or None + self._gemini_client = ctx["gemini_client"] # GeminiClient (Veo) or None + self._byteplus = ctx["byteplus"] # {"api_key", "base_url"} or None + self._initialized = ctx.get("initialized", False) + try: + self._main_loop: Optional[asyncio.AbstractEventLoop] = ( + asyncio.get_event_loop() + ) + except RuntimeError: + self._main_loop = None + + @property + def is_initialized(self) -> bool: + return self._initialized + + def _report_usage_async( + self, + *, + provider: str, + model: str, + input_tokens: int, + output_tokens: int, + cached_tokens: int = 0, + ) -> None: + """Report video-generation usage if the hook is set (mirrors VLM / ImageGen).""" + if not self._report_usage: + return + try: + event = UsageEventData( + service_type="video_gen", + provider=provider, + model=model, + input_tokens=input_tokens, + output_tokens=output_tokens, + cached_tokens=cached_tokens, + ) + if self._main_loop is not None: + self._main_loop.call_soon_threadsafe( + lambda: asyncio.create_task(self._report_usage(event)) + ) + except Exception as e: + logger.warning(f"[VIDEO_GEN] Failed to report usage: {e}") + + # ─────────────────────────── Public API ────────────────────────────────── + + def generate_video( + self, + prompt: str, + duration_seconds: int = 5, + aspect_ratio: str = "16:9", + resolution: str = "720p", + number_of_videos: int = 1, + output_path: str = "", + negative_prompt: str = "", + reference_image: Optional[str] = None, + last_frame: Optional[str] = None, + reference_images: Optional[List[str]] = None, + seed: Optional[int] = None, + with_audio: bool = True, + # Gemini / Veo specific + person_generation: str = "allow_adult", + # BytePlus / Seedance specific + camera_fixed: bool = False, + watermark: bool = False, + callback_url: str = "", + # Polling + poll_timeout_seconds: int = _DEFAULT_POLL_TIMEOUT, + ) -> List[str]: + """Generate video(s) from a text prompt (blocking). + + Submits the generation job, polls until completion, downloads the + result(s) to disk, and returns local file paths. + + Args: + prompt: Text description of the video to generate. + duration_seconds: Target duration in seconds. Each provider clamps + to its supported set (Sora 2: 4/8/12; Veo: 4/6/8; Seedance: 2–12). + aspect_ratio: "16:9", "9:16", "1:1", "4:3", "3:4", "21:9". Sora + and Veo only support 16:9 and 9:16. + resolution: "480p" / "720p" / "1080p" / "4k". Sora 2 standard tops + out at 720p; Veo Lite cannot do 4k. + number_of_videos: Number of videos (1–N, provider-capped). + output_path: Absolute path for the output file. Timestamped temp + path used when empty. For multiple videos the index is + appended before the extension. + negative_prompt: Elements to avoid (native on Veo and Seedance; + appended to prompt for Sora which has no dedicated param). + reference_image: Optional absolute path to a single start-frame + image for image-to-video. Mapped to ``input_reference`` (Sora), + ``image`` (Veo), or ``image_urls`` (Seedance). + last_frame: Optional absolute path to an end-frame image for + frame interpolation. Veo 3.1+ only; silently dropped elsewhere. + reference_images: Optional list of absolute paths to additional + style-reference images. Veo 3.1+ accepts up to 3; silently + dropped on Sora and Seedance. + seed: Optional deterministic seed. + with_audio: Whether to generate audio. Honored where the + provider exposes a request-time toggle: Seedance's + ``generate_audio`` (2.0+ models). Veo 3.x produces native + synchronized audio by default and has no toggle — the flag + is ignored there (Veo 2 is silent). Sora 2 produces audio + by default and the flag is also a no-op. + person_generation: Veo only — "allow_all", "allow_adult", or + "dont_allow". ``allow_all`` is geo-restricted (EU/UK/CH/MENA). + camera_fixed: BytePlus Seedance only — lock camera position. + watermark: BytePlus Seedance only — apply watermark to output. + callback_url: BytePlus Seedance only — webhook for completion. + poll_timeout_seconds: Internal polling cap (default 1500s). + Always remains below DEFAULT_ACTION_TIMEOUT (6000s). + + Returns: + List of absolute file paths to the generated MP4 files. + + Raises: + RuntimeError: If the provider is unsupported, the job is blocked, + generation fails, or polling times out. + """ + if not prompt: + raise ValueError("prompt is required") + + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + refs = reference_images or [] + + if self.provider == "openai": + paths = self._openai_generate( + prompt=prompt, + duration_seconds=duration_seconds, + aspect_ratio=aspect_ratio, + resolution=resolution, + number_of_videos=number_of_videos, + output_path=output_path, + negative_prompt=negative_prompt, + reference_image=reference_image, + seed=seed, + with_audio=with_audio, + timestamp=timestamp, + poll_timeout_seconds=poll_timeout_seconds, + ) + elif self.provider == "gemini": + paths = self._gemini_generate( + prompt=prompt, + duration_seconds=duration_seconds, + aspect_ratio=aspect_ratio, + resolution=resolution, + number_of_videos=number_of_videos, + output_path=output_path, + negative_prompt=negative_prompt, + reference_image=reference_image, + last_frame=last_frame, + reference_images=refs, + seed=seed, + with_audio=with_audio, + person_generation=person_generation, + timestamp=timestamp, + poll_timeout_seconds=poll_timeout_seconds, + ) + elif self.provider == "byteplus": + paths = self._byteplus_generate( + prompt=prompt, + duration_seconds=duration_seconds, + aspect_ratio=aspect_ratio, + resolution=resolution, + number_of_videos=number_of_videos, + output_path=output_path, + negative_prompt=negative_prompt, + reference_image=reference_image, + seed=seed, + with_audio=with_audio, + camera_fixed=camera_fixed, + watermark=watermark, + callback_url=callback_url, + timestamp=timestamp, + poll_timeout_seconds=poll_timeout_seconds, + ) + else: + raise RuntimeError( + f"Provider '{self.provider}' does not support video generation. " + "Set video_gen_provider to 'gemini', 'openai', or 'byteplus' in settings.json." + ) + + logger.info( + f"[VIDEO_GEN] Generated {len(paths)} video(s) via {self.provider} " + f"(model={self.model})" + ) + return paths + + async def generate_video_async(self, **kwargs) -> List[str]: + """Async wrapper — runs generate_video in a thread pool.""" + loop = asyncio.get_running_loop() + return await loop.run_in_executor(None, lambda: self.generate_video(**kwargs)) + + # ──────────────────────── OpenAI / Sora 2 ──────────────────────────────── + + def _openai_generate( + self, + *, + prompt: str, + duration_seconds: int, + aspect_ratio: str, + resolution: str, + number_of_videos: int, + output_path: str, + negative_prompt: str, + reference_image: Optional[str], + seed: Optional[int], + with_audio: bool, + timestamp: str, + poll_timeout_seconds: int, + ) -> List[str]: + # Sora 2 audio is on by default and not configurable per-request; the + # `with_audio=False` flag is silently honored as a no-op signal. + if not with_audio: + logger.info( + "[VIDEO_GEN] OpenAI Sora produces audio by default; " + "with_audio=False is a no-op." + ) + + # Map aspect_ratio + resolution to the closest Sora canvas. If the + # caller passes an unsupported aspect_ratio (Sora has only 16:9 / 9:16), + # default to 16:9 and warn. + size_map = _SORA_SIZES.get(self.model, _SORA_SIZES["sora-2"]) + if aspect_ratio not in size_map: + logger.warning( + f"[VIDEO_GEN] Sora does not support aspect_ratio={aspect_ratio}. " + "Defaulting to 16:9." + ) + aspect_ratio = "16:9" + size = size_map[aspect_ratio] + if resolution not in {"720p", "1024p", "1080p"}: + logger.warning( + f"[VIDEO_GEN] Sora ignores resolution={resolution}; using canvas {size} " + "from aspect_ratio + model tier." + ) + + # Clamp seconds to the model's supported set, picking the closest larger value. + valid_seconds = _SORA_VALID_SECONDS.get(self.model, {4, 8, 12}) + if duration_seconds not in valid_seconds: + chosen = min( + (s for s in valid_seconds if s >= duration_seconds), default=None + ) + if chosen is None: + chosen = max(valid_seconds) + logger.warning( + f"[VIDEO_GEN] Sora ({self.model}) does not support {duration_seconds}s; " + f"using {chosen}s." + ) + duration_seconds = chosen + + # Sora has no dedicated negative_prompt; append to the prompt body. + full_prompt = prompt + if negative_prompt: + full_prompt += f"\n\nAvoid: {negative_prompt}" + + # Validate the reference image up-front; the SDK expects the file + # to be passed inline (bytes / IOBase / PathLike), NOT a pre-uploaded + # file id. A separate files.create() round-trip is not part of the + # Sora image-to-video contract. + if reference_image and not os.path.isfile(reference_image): + raise RuntimeError(f"Sora reference_image not found: {reference_image}") + + # Sora 2's `n` parameter for multiple videos is not yet exposed in the + # public API; we issue `number_of_videos` independent jobs and stitch + # them into the returned list. Each job is independent so errors on + # one don't kill the rest — we collect partial results and report. + per_job_timeout = poll_timeout_seconds // max(1, number_of_videos) + paths: List[str] = [] + first_error: Optional[Exception] = None + for i in range(max(1, int(number_of_videos))): + ref_handle = None + try: + create_kwargs: Dict[str, Any] = { + "model": self.model, + "prompt": full_prompt, + "size": size, + "seconds": duration_seconds, + } + if seed is not None: + create_kwargs["seed"] = seed + if reference_image: + # Open a fresh handle per job — the SDK consumes the + # stream during multipart upload. Closed in finally. + ref_handle = open(reference_image, "rb") + create_kwargs["input_reference"] = ref_handle + + # The OpenAI SDK exposes Sora under client.videos (added in the + # 2026 SDK release). Older SDKs may need a fallback to the raw + # HTTP endpoint via client.post. + video_obj = self.client.videos.create(**create_kwargs) + video_id = video_obj.id + + # Poll for completion. + final_obj = self._poll_openai_video(video_id, per_job_timeout) + mp4_bytes = self._download_openai_video(video_id) + + save_path = _build_save_path( + output_path, timestamp, len(paths), number_of_videos + ) + _write_bytes(save_path, mp4_bytes) + paths.append(save_path) + + # Best-effort usage reporting. + usage = getattr(final_obj, "usage", None) + if usage is not None: + self._report_usage_async( + provider="openai", + model=self.model, + input_tokens=getattr(usage, "input_tokens", 0) or 0, + output_tokens=getattr(usage, "output_tokens", 0) or 0, + ) + except Exception as exc: + if first_error is None: + first_error = exc + logger.warning( + f"[VIDEO_GEN] Sora job {i + 1}/{number_of_videos} failed: {exc}" + ) + finally: + if ref_handle is not None: + try: + ref_handle.close() + except Exception: + pass + + if not paths: + raise RuntimeError( + _classify_error("openai", first_error or RuntimeError("no result")) + ) + return paths + + def _poll_openai_video(self, video_id: str, poll_timeout_seconds: int) -> Any: + """Poll a Sora video job to completion. Returns the final video object.""" + deadline = time.monotonic() + poll_timeout_seconds + delay = _DEFAULT_POLL_INITIAL + while True: + try: + obj = self.client.videos.retrieve(video_id) + except Exception as exc: + raise RuntimeError(_classify_error("openai", exc)) from exc + + status = getattr(obj, "status", None) + if status == "completed": + return obj + if status in ("failed", "cancelled", "error"): + error_msg = getattr(obj, "error", None) or status + raise RuntimeError( + f"OpenAI Sora job ended with status={status}: {error_msg}" + ) + if status not in (None, "pending", "queued", "processing", "running"): + raise RuntimeError( + f"OpenAI Sora job {video_id} returned unexpected status: {status!r}" + ) + + logger.debug( + f"[VIDEO_GEN] Sora job {video_id} status={status!r}; retrying in {delay:.1f}s" + ) + if time.monotonic() >= deadline: + raise RuntimeError( + f"OpenAI Sora job {video_id} did not complete within " + f"{poll_timeout_seconds}s (last status: {status})." + ) + + time.sleep(delay) + delay = min(delay * 1.5, _DEFAULT_POLL_MAX) + + def _download_openai_video(self, video_id: str) -> bytes: + """Download the MP4 bytes for a completed Sora video.""" + try: + content = self.client.videos.download_content(video_id) + except Exception as exc: + raise RuntimeError(_classify_error("openai", exc)) from exc + + # The SDK may return bytes directly or an HTTPResponse-like object. + if isinstance(content, bytes): + return content + if hasattr(content, "read"): + return content.read() + if hasattr(content, "content"): + return content.content + raise RuntimeError("OpenAI Sora download returned an unexpected payload type.") + + # ──────────────────────── Gemini / Veo ─────────────────────────────────── + + def _gemini_generate( + self, + *, + prompt: str, + duration_seconds: int, + aspect_ratio: str, + resolution: str, + number_of_videos: int, + output_path: str, + negative_prompt: str, + reference_image: Optional[str], + last_frame: Optional[str], + reference_images: List[str], + seed: Optional[int], + with_audio: bool, + person_generation: str, + timestamp: str, + poll_timeout_seconds: int, + ) -> List[str]: + if not self._gemini_client: + raise RuntimeError( + "Gemini API key is not configured. " + "Set 'video_gen_provider' to 'openai' or 'byteplus' or add " + "a Google API key in settings." + ) + + # Normalize aspect_ratio to Veo's allowed set. + if aspect_ratio not in _VEO_ASPECT_VALID: + logger.warning( + f"[VIDEO_GEN] Veo does not support aspect_ratio={aspect_ratio}. " + "Defaulting to 16:9." + ) + aspect_ratio = "16:9" + + if resolution not in _VEO_RESOLUTION_VALID: + logger.warning( + f"[VIDEO_GEN] Veo does not support resolution={resolution}. " + "Defaulting to 720p." + ) + resolution = "720p" + + # Veo durations are integers; 1080p/4k or any reference inputs force 8. + # Snap to the closest legal value. + forces_eight = ( + resolution in {"1080p", "4k"} + or reference_image + or last_frame + or reference_images + ) + if forces_eight: + duration_int = 8 + else: + duration_int = int(duration_seconds) + if duration_int not in _VEO_DURATION_VALID: + chosen = min( + (s for s in _VEO_DURATION_VALID if s >= duration_int), default=8 + ) + logger.warning( + f"[VIDEO_GEN] Veo does not support durationSeconds={duration_seconds}; " + f"using {chosen}s." + ) + duration_int = chosen + + _is_image_to_video = bool(reference_image or last_frame or reference_images) + _valid_person_gen = ( + {"allow_adult", "dont_allow"} + if _is_image_to_video + else {"allow_all", "dont_allow"} + ) + if person_generation not in _valid_person_gen: + logger.warning( + f"[VIDEO_GEN] Veo " + f"{'image-to-video' if _is_image_to_video else 'text-to-video'} " + f"does not support person_generation={person_generation}; " + "omitting it so the model uses its default." + ) + person_generation = None + + # Audio on Veo 3.x is model-controlled, not a request-time toggle: + # `veo-3.1-generate-preview` (and likely siblings) rejects the + # `generateAudio` field outright with 400 INVALID_ARGUMENT, while + # the model produces synchronized audio by default anyway. Veo 2 + # is silent and also has no toggle. So we never forward the flag + # to the Gemini client — letting the model use its native default. + # When the caller asked to disable audio, log so the dropped intent + # is visible. + if not with_audio: + logger.info( + "[VIDEO_GEN] Veo audio is model-controlled and cannot be " + "disabled per-request; with_audio=False is ignored." + ) + + # Read the start frame / last frame / reference image bytes off disk. + start_pair = None + last_pair = None + ref_pairs: List[Tuple[bytes, str]] = [] + if reference_image: + try: + start_pair = _read_image_for_upload(reference_image) + except Exception as exc: + logger.warning( + f"[VIDEO_GEN] Skipping unreadable start frame '{reference_image}': {exc}" + ) + if last_frame: + try: + last_pair = _read_image_for_upload(last_frame) + except Exception as exc: + logger.warning( + f"[VIDEO_GEN] Skipping unreadable last frame '{last_frame}': {exc}" + ) + for ref_path in reference_images[:3]: + try: + ref_pairs.append(_read_image_for_upload(ref_path)) + except Exception as exc: + logger.warning( + f"[VIDEO_GEN] Skipping unreadable reference frame '{ref_path}': {exc}" + ) + + try: + op = self._gemini_client.generate_video( + self.model, + prompt=prompt, + negative_prompt=negative_prompt or None, + image=start_pair, + last_frame=last_pair, + reference_images=ref_pairs or None, + aspect_ratio=aspect_ratio, + duration_seconds=duration_int, + resolution=resolution, + person_generation=person_generation, + number_of_videos=number_of_videos, + seed=seed, + # generate_audio intentionally omitted — see comment above. + ) + except Exception as exc: + raise RuntimeError(_classify_error("gemini", exc)) from exc + + operation_name = op.get("name") + if not operation_name: + raise RuntimeError( + "Gemini Veo did not return an operation name — cannot poll for result." + ) + + final = self._poll_gemini_operation(operation_name, poll_timeout_seconds) + + # Veo's response shape: response.generateVideoResponse.generatedSamples[].video.uri + response_body = final.get("response") or {} + gv = response_body.get("generateVideoResponse") or {} + samples = gv.get("generatedSamples") or [] + + if not samples: + block_reason = ( + gv.get("raiFilteredReason") + or response_body.get("blockReason") + or final.get("error", {}).get("message") + ) + if block_reason: + raise RuntimeError( + f"Gemini Veo blocked or returned no samples ({block_reason}). " + "Try modifying your prompt or adjusting person_generation." + ) + raise RuntimeError( + "Gemini Veo returned no video samples — try rephrasing your prompt " + "or check that your API key has access to a Veo model." + ) + + # Usage reporting (input/output token counts). + usage_md = final.get("metadata", {}).get("usageMetadata") or {} + if usage_md: + self._report_usage_async( + provider="gemini", + model=self.model, + input_tokens=usage_md.get("promptTokenCount", 0) or 0, + output_tokens=usage_md.get("candidatesTokenCount", 0) or 0, + cached_tokens=usage_md.get("cachedContentTokenCount", 0) or 0, + ) + + paths: List[str] = [] + for i, sample in enumerate(samples[:number_of_videos]): + video = sample.get("video") or {} + uri = video.get("uri") + inline = video.get("bytesBase64Encoded") + if uri: + try: + data = self._gemini_client.download_video(uri, timeout=180) + except Exception as exc: + raise RuntimeError(_classify_error("gemini", exc)) from exc + elif inline: + data = base64.b64decode(inline) + else: + logger.warning( + f"[VIDEO_GEN] Veo sample {i} had no uri or inline bytes — skipping." + ) + continue + + save_path = _build_save_path(output_path, timestamp, i, len(samples)) + _write_bytes(save_path, data) + paths.append(save_path) + + if not paths: + raise RuntimeError( + "Gemini Veo completed but produced no downloadable samples." + ) + return paths + + def _poll_gemini_operation( + self, operation_name: str, poll_timeout_seconds: int + ) -> Dict[str, Any]: + """Poll a Veo long-running operation until done. Returns final op JSON.""" + deadline = time.monotonic() + poll_timeout_seconds + delay = _DEFAULT_POLL_INITIAL + while True: + try: + op = self._gemini_client.poll_video_operation(operation_name) + except Exception as exc: + raise RuntimeError(_classify_error("gemini", exc)) from exc + + if op.get("done"): + err = op.get("error") + if err: + raise RuntimeError( + f"Gemini Veo operation failed: {err.get('message') or err}" + ) + return op + + if time.monotonic() >= deadline: + raise RuntimeError( + f"Gemini Veo operation {operation_name} did not complete within " + f"{poll_timeout_seconds}s." + ) + + time.sleep(delay) + delay = min(delay * 1.5, _DEFAULT_POLL_MAX) + + # ──────────────────────── BytePlus / Seedance ──────────────────────────── + + def _byteplus_generate( + self, + *, + prompt: str, + duration_seconds: int, + aspect_ratio: str, + resolution: str, + number_of_videos: int, + output_path: str, + negative_prompt: str, + reference_image: Optional[str], + seed: Optional[int], + with_audio: bool, + camera_fixed: bool, + watermark: bool, + callback_url: str, + timestamp: str, + poll_timeout_seconds: int, + ) -> List[str]: + if not self._byteplus: + raise RuntimeError( + "BytePlus API key is not configured. " + "Add a BytePlus key in settings or pick a different video provider." + ) + + api_key = self._byteplus["api_key"] + base_url = self._byteplus["base_url"].rstrip("/") + + # Validate Seedance enums; downshift if needed. + if aspect_ratio not in _SEEDANCE_ASPECT_VALID: + logger.warning( + f"[VIDEO_GEN] Seedance does not support aspect_ratio={aspect_ratio}. " + "Defaulting to 16:9." + ) + aspect_ratio = "16:9" + if resolution not in _SEEDANCE_RESOLUTION_VALID: + logger.warning( + f"[VIDEO_GEN] Seedance does not support resolution={resolution}. " + "Defaulting to 720p." + ) + resolution = "720p" + # Seedance accepts 2–12 seconds; clamp. + duration_int = max(2, min(12, int(duration_seconds))) + if duration_int != duration_seconds: + logger.warning( + f"[VIDEO_GEN] Seedance clamped duration_seconds={duration_seconds} → {duration_int}." + ) + + # BytePlus ModelArk takes generation params as TOP-LEVEL fields next to + # `content[]`, not as inline `--flag` directives in the prompt text. + # (The `--flag` style is the legacy Volcengine 2024 form and is + # rejected by the international `ark.ap-southeast.bytepluses.com` + # endpoint.) Verified against the official Dreamina Seedance 2.0 + # tutorial sample. + text_block = prompt + if negative_prompt: + text_block += f"\n\nAvoid: {negative_prompt}" + + content_parts: List[Dict[str, Any]] = [{"type": "text", "text": text_block}] + if reference_image: + try: + if not os.path.isfile(reference_image): + raise FileNotFoundError(reference_image) + with open(reference_image, "rb") as f: + img_bytes = f.read() + ext = os.path.splitext(reference_image)[1].lower() + mime = { + ".png": "image/png", + ".jpg": "image/jpeg", + ".jpeg": "image/jpeg", + ".webp": "image/webp", + }.get(ext, "image/jpeg") + data_url = ( + f"data:{mime};base64,{base64.b64encode(img_bytes).decode('utf-8')}" + ) + content_parts.append( + {"type": "image_url", "image_url": {"url": data_url}} + ) + except Exception as exc: + logger.warning( + f"[VIDEO_GEN] Skipping unreadable Seedance reference image " + f"'{reference_image}': {exc}" + ) + + body: Dict[str, Any] = { + "model": self.model, + "content": content_parts, + "ratio": aspect_ratio, + "resolution": resolution, + "duration": duration_int, + "generate_audio": bool(with_audio), + } + if seed is not None: + body["seed"] = int(seed) + if camera_fixed: + body["camera_fixed"] = True + if watermark: + body["watermark"] = True + if callback_url: + body["callback_url"] = callback_url + + # Number of videos: Seedance generates 1 per task; loop for N>1 so the + # user can request multiple variations from a single call. + per_job_timeout = poll_timeout_seconds // max(1, number_of_videos) + paths: List[str] = [] + first_error: Optional[Exception] = None + for i in range(max(1, int(number_of_videos))): + try: + task_id = self._byteplus_submit(api_key, base_url, body) + video_url = self._byteplus_poll( + api_key, base_url, task_id, per_job_timeout + ) + mp4_bytes = _download_video_url(video_url) + save_path = _build_save_path( + output_path, timestamp, len(paths), number_of_videos + ) + _write_bytes(save_path, mp4_bytes) + paths.append(save_path) + except Exception as exc: + if first_error is None: + first_error = exc + logger.warning( + f"[VIDEO_GEN] Seedance job {i + 1}/{number_of_videos} failed: {exc}" + ) + + if not paths: + raise RuntimeError( + _classify_error("byteplus", first_error or RuntimeError("no result")) + ) + return paths + + def _byteplus_submit( + self, api_key: str, base_url: str, body: Dict[str, Any] + ) -> str: + """Submit a Seedance task and return its task id.""" + url = f"{base_url}/contents/generations/tasks" + try: + r = requests.post( + url, + headers={ + "Authorization": f"Bearer {api_key}", + "Content-Type": "application/json", + }, + json=body, + timeout=60, + ) + except Exception as exc: + raise RuntimeError(_classify_error("byteplus", exc)) from exc + + if not r.ok: + try: + err_body = r.json() + logger.warning( + f"[VIDEO_GEN] BytePlus submit failed: status={r.status_code} body={err_body}" + ) + except Exception: + logger.warning( + f"[VIDEO_GEN] BytePlus submit failed: status={r.status_code} text={r.text[:1000]}" + ) + r.raise_for_status() + + data = r.json() + task_id = data.get("id") or data.get("task_id") + if not task_id: + raise RuntimeError( + "BytePlus Seedance did not return a task id. " + f"Response: {json.dumps(data)[:500]}" + ) + return str(task_id) + + def _byteplus_poll( + self, + api_key: str, + base_url: str, + task_id: str, + poll_timeout_seconds: int, + ) -> str: + """Poll a Seedance task until succeeded; return the video URL.""" + deadline = time.monotonic() + poll_timeout_seconds + delay = _DEFAULT_POLL_INITIAL + poll_url = f"{base_url}/contents/generations/tasks/{task_id}" + last_status = "unknown" + while True: + try: + r = requests.get( + poll_url, + headers={"Authorization": f"Bearer {api_key}"}, + timeout=30, + ) + r.raise_for_status() + except Exception as exc: + raise RuntimeError(_classify_error("byteplus", exc)) from exc + + data = r.json() + status = (data.get("status") or "").lower() + last_status = status or last_status + + if status == "succeeded": + content = data.get("content") or {} + # Seedance returns either content.video_url (current) or content.url (legacy). + video_url = content.get("video_url") or content.get("url") + if ( + not video_url + and isinstance(data.get("videos"), list) + and data["videos"] + ): + video_url = data["videos"][0].get("video_url") or data["videos"][ + 0 + ].get("url") + if not video_url: + raise RuntimeError( + "BytePlus Seedance reported succeeded but did not include a video_url." + ) + # Best-effort usage reporting. + usage = data.get("usage") or {} + if usage: + self._report_usage_async( + provider="byteplus", + model=self.model, + input_tokens=usage.get("prompt_tokens", 0) or 0, + output_tokens=usage.get("completion_tokens", 0) + or usage.get("output_tokens", 0) + or 0, + ) + return str(video_url) + + if status in ("failed", "cancelled"): + reason = data.get("error") or data.get("failure_reason") or status + raise RuntimeError( + f"BytePlus Seedance task {task_id} ended with status={status}: {reason}" + ) + + if time.monotonic() >= deadline: + raise RuntimeError( + f"BytePlus Seedance task {task_id} did not complete within " + f"{poll_timeout_seconds}s (last status: {last_status})." + ) + + time.sleep(delay) + delay = min(delay * 1.5, _DEFAULT_POLL_MAX) + + +# ── Shared HTTP download helper ────────────────────────────────────────────── + + +def _download_video_url(url: str, *, timeout: int = 180) -> bytes: + """Fetch a video URL into memory with an explicit timeout.""" + with _urllib_request.urlopen(url, timeout=timeout) as r: + return r.read() diff --git a/agent_core/core/llm/google_gemini_client.py b/agent_core/core/llm/google_gemini_client.py index 9cc1aacc..6bf0673f 100644 --- a/agent_core/core/llm/google_gemini_client.py +++ b/agent_core/core/llm/google_gemini_client.py @@ -403,7 +403,7 @@ def delete_cache(self, cache_name: str) -> None: url = self._endpoint(cache_id) response = requests.delete( url, - params={"key": self._api_key}, + headers={"x-goog-api-key": self._api_key}, timeout=self._timeout, ) response.raise_for_status() @@ -542,6 +542,244 @@ def generate_multimodal_with_cache( "cached_tokens": cached_tokens, } + def generate_image( + self, + model: str, + *, + prompt: str, + reference_images: Optional[List[tuple]] = None, + image_size: Optional[str] = None, + safety_settings: Optional[List[Dict[str, str]]] = None, + ) -> Dict[str, Any]: + """Generate image(s) via generateContent with the IMAGE response modality. + + Uses the same REST endpoint as the text/multimodal helpers (no + ``google-genai`` SDK), keeping the whole Gemini surface on one client. + + Args: + model: Image-capable model identifier (e.g. ``gemini-3-pro-image``). + prompt: Text description of the image to generate. + reference_images: Optional list of ``(bytes, mime_type)`` tuples sent + as inline reference parts (style guidance). + image_size: Optional size hint (e.g. ``"1K"``/``"2K"``/``"4K"``) passed + through ``generationConfig.imageConfig.imageSize``. + safety_settings: Optional list of ``{"category", "threshold"}`` dicts. + + Returns: + Dict with: + - images: List[bytes] of decoded image data (may be empty) + - usage_metadata: Dict from the response's ``usageMetadata`` + - block_reason: Optional[str] when blocked by a safety/finish reason + """ + parts: List[Dict[str, Any]] = [] + for data, mime in reference_images or []: + parts.append( + { + "inlineData": { + "mimeType": mime or "image/png", + "data": base64.b64encode(data).decode("utf-8"), + } + } + ) + parts.append({"text": prompt}) + + generation_config: Dict[str, Any] = { + "responseModalities": ["TEXT", "IMAGE"], + "candidateCount": 1, + } + if image_size: + generation_config["imageConfig"] = {"imageSize": image_size} + + payload: Dict[str, Any] = { + "contents": [{"role": "user", "parts": parts}], + "generationConfig": generation_config, + } + if safety_settings: + payload["safetySettings"] = safety_settings + + response = self._post_json( + f"{_normalise_model_name(model)}:generateContent", payload + ) + + images: List[bytes] = [] + for candidate in response.get("candidates", []) or []: + content = candidate.get("content") or {} + for part in content.get("parts", []) or []: + inline = part.get("inlineData") if isinstance(part, dict) else None + if inline and str(inline.get("mimeType", "")).startswith("image/"): + try: + images.append(base64.b64decode(inline["data"])) + except Exception: + pass + + block_reason: Optional[str] = None + if not images: + feedback = response.get("promptFeedback") + if isinstance(feedback, dict) and feedback.get("blockReason"): + block_reason = str(feedback["blockReason"]) + else: + for candidate in response.get("candidates", []) or []: + fr = candidate.get("finishReason") + if fr and "SAFETY" in str(fr).upper(): + block_reason = str(fr) + break + + return { + "images": images, + "usage_metadata": response.get("usageMetadata", {}) or {}, + "block_reason": block_reason, + } + + # ------------------------------------------------------------------ + # Veo video generation (long-running operation) + # ------------------------------------------------------------------ + def generate_video( + self, + model: str, + *, + prompt: str, + negative_prompt: Optional[str] = None, + image: Optional[tuple] = None, + last_frame: Optional[tuple] = None, + reference_images: Optional[List[tuple]] = None, + aspect_ratio: Optional[str] = None, + duration_seconds: Optional[str] = None, + resolution: Optional[str] = None, + person_generation: Optional[str] = None, + number_of_videos: Optional[int] = None, + seed: Optional[int] = None, + generate_audio: Optional[bool] = None, + ) -> Dict[str, Any]: + """Kick off a Veo video generation as a long-running operation. + + Args: + model: Veo model identifier (e.g. ``veo-3.1-generate-preview``). + prompt: Text description of the video to generate. + negative_prompt: Optional text describing what to avoid. + image: Optional ``(bytes, mime_type)`` start-frame for image-to-video. + last_frame: Optional ``(bytes, mime_type)`` end-frame for frame + interpolation (Veo 3.1+). + reference_images: Optional list of ``(bytes, mime_type)`` reference + frames for style guidance (up to 3, Veo 3.1+). + aspect_ratio: ``"16:9"`` or ``"9:16"``. + duration_seconds: Duration as an INTEGER. Veo 3 accepts ``4`` / + ``6`` / ``8``; 1080p+/refs require ``8``. (Earlier docs showed + a string, but the live ``veo-3.1-generate-preview`` model + rejects strings with 400 INVALID_ARGUMENT.) + resolution: ``"720p"`` / ``"1080p"`` / ``"4k"`` (4k unavailable on Lite). + person_generation: ``"allow_all"`` / ``"allow_adult"`` / ``"dont_allow"``. + ``allow_all`` is geo-restricted (EU/UK/CH/MENA). + number_of_videos: Number of videos to generate (typically 1–4). + seed: Optional deterministic seed. + generate_audio: Whether to generate synchronized native audio. + Default behavior is provider-decided; set explicitly to control. + + Returns: + Dict with the long-running operation ``{"name": "operations/..."}``. + Pass the returned name to :meth:`poll_video_operation` to wait for + completion. + """ + instance: Dict[str, Any] = {"prompt": prompt} + if image is not None: + data, mime = image + instance["image"] = { + "bytesBase64Encoded": base64.b64encode(data).decode("utf-8"), + "mimeType": mime or "image/png", + } + if last_frame is not None: + data, mime = last_frame + instance["lastFrame"] = { + "bytesBase64Encoded": base64.b64encode(data).decode("utf-8"), + "mimeType": mime or "image/png", + } + if reference_images: + instance["referenceImages"] = [ + { + "image": { + "bytesBase64Encoded": base64.b64encode(data).decode("utf-8"), + "mimeType": mime or "image/png", + } + } + for data, mime in reference_images + ] + + parameters: Dict[str, Any] = {} + if aspect_ratio is not None: + parameters["aspectRatio"] = aspect_ratio + if duration_seconds is not None: + # Veo expects an integer; passing a string yields + # 400 INVALID_ARGUMENT ("durationSeconds needs to be a number"). + parameters["durationSeconds"] = int(duration_seconds) + if resolution is not None: + parameters["resolution"] = resolution + if person_generation is not None: + parameters["personGeneration"] = person_generation + # numberOfVideos is rejected outright by some Veo variants (e.g. + # veo-3.1-generate-preview). Only include it when the caller asked + # for more than one — single-video requests omit the field. + if number_of_videos is not None and number_of_videos > 1: + parameters["numberOfVideos"] = number_of_videos + if seed is not None: + parameters["seed"] = seed + if negative_prompt: + parameters["negativePrompt"] = negative_prompt + if generate_audio is not None: + parameters["generateAudio"] = generate_audio + + payload: Dict[str, Any] = {"instances": [instance]} + if parameters: + payload["parameters"] = parameters + + return self._post_json( + f"{_normalise_model_name(model)}:predictLongRunning", payload + ) + + def poll_video_operation(self, operation_name: str) -> Dict[str, Any]: + """Poll a Veo long-running operation. Returns the raw operation JSON. + + The caller should check ``response["done"]`` and, when true, extract + ``response["response"]["generateVideoResponse"]["generatedSamples"]`` + for the resulting video URIs. + """ + path = operation_name.lstrip("/") + if path.startswith(f"{self._api_version}/"): + path = path[len(f"{self._api_version}/") :] + url = self._endpoint(path) + response = requests.get( + url, + headers={"x-goog-api-key": self._api_key}, + timeout=self._timeout, + ) + if not response.ok: + try: + logger.warning( + f"[GEMINI ERROR] Veo poll status={response.status_code} body={response.json()}" + ) + except Exception: + logger.warning( + f"[GEMINI ERROR] Veo poll status={response.status_code} text={response.text[:1000]}" + ) + response.raise_for_status() + return response.json() + + def download_video(self, video_uri: str, *, timeout: int = 120) -> bytes: + """Download a Veo signed video URI as raw MP4 bytes. + + Veo returns a URI on the ``generativelanguage.googleapis.com`` host + that requires the API key. Sent via the ``x-goog-api-key`` header so + the key never appears in the URL — otherwise it would leak through + ``HTTPError`` messages (which include the request URL) and any + request/response logging. + """ + response = requests.get( + video_uri, + headers={"x-goog-api-key": self._api_key}, + timeout=timeout, + stream=False, + ) + response.raise_for_status() + return response.content + # ------------------------------------------------------------------ # Internal helpers # ------------------------------------------------------------------ @@ -553,7 +791,7 @@ def _post_json(self, path: str, payload: Dict[str, Any]) -> Dict[str, Any]: """Send POST request and return JSON response.""" response = requests.post( self._endpoint(path), - params={"key": self._api_key}, + headers={"x-goog-api-key": self._api_key}, json=payload, timeout=self._timeout, ) diff --git a/agent_core/core/models/factory.py b/agent_core/core/models/factory.py index 531d60d9..a2476e18 100644 --- a/agent_core/core/models/factory.py +++ b/agent_core/core/models/factory.py @@ -127,7 +127,27 @@ def create( raise ValueError(f"Unsupported provider: {provider}") cfg = PROVIDER_CONFIG[provider] - model = model_override or MODEL_REGISTRY[provider][interface] + model = model_override or MODEL_REGISTRY[provider].get(interface) + if model is None: + if deferred: + return { + "provider": provider, + "model": None, + "client": None, + "gemini_client": None, + "remote_url": None, + "byteplus": None, + "anthropic_client": None, + "bedrock_client": None, + "initialized": False, + } + supported = ", ".join( + p for p, caps in MODEL_REGISTRY.items() if caps.get(interface) + ) + raise ValueError( + f"Provider '{provider}' does not support {interface.value}. " + f"Supported providers: {supported}" + ) # Use provided base_url or fall back to default resolved_base_url = base_url or cfg.default_base_url diff --git a/agent_core/core/models/model_registry.py b/agent_core/core/models/model_registry.py index 52bb37e9..1fbd7ac0 100644 --- a/agent_core/core/models/model_registry.py +++ b/agent_core/core/models/model_registry.py @@ -8,46 +8,67 @@ InterfaceType.LLM: "gpt-5.2-2025-12-11", InterfaceType.VLM: "gpt-5.2-2025-12-11", InterfaceType.EMBEDDING: "text-embedding-3-small", + InterfaceType.IMAGE_GEN: "gpt-image-2", + InterfaceType.VIDEO_GEN: "sora-2", }, "gemini": { InterfaceType.LLM: "gemini-2.5-pro", InterfaceType.VLM: "gemini-2.5-pro", InterfaceType.EMBEDDING: "text-embedding-004", + InterfaceType.IMAGE_GEN: "gemini-3-pro-image", + InterfaceType.VIDEO_GEN: "veo-3.1-generate-preview", }, "anthropic": { InterfaceType.LLM: "claude-sonnet-4-5-20250929", InterfaceType.VLM: "claude-sonnet-4-5-20250929", InterfaceType.EMBEDDING: None, # Anthropic does not provide native embedding models + InterfaceType.IMAGE_GEN: None, + InterfaceType.VIDEO_GEN: None, }, "byteplus": { InterfaceType.LLM: "seed-2-0-pro-260328", InterfaceType.VLM: "seed-2-0-pro-260328", InterfaceType.EMBEDDING: "skylark-embedding-vision-250615", + InterfaceType.IMAGE_GEN: None, + # BytePlus international (ap-southeast.bytepluses.com) model IDs use + # dated build suffixes, no dots, no `doubao-` prefix (`doubao-*` is + # the Volcengine China naming). Verified from BytePlus ModelArk docs. + InterfaceType.VIDEO_GEN: "seedance-1-0-pro-fast-251015", }, "remote": { InterfaceType.LLM: "llama3.2:3b", InterfaceType.VLM: "llava:7b", InterfaceType.EMBEDDING: "nomic-embed-text", + InterfaceType.IMAGE_GEN: None, + InterfaceType.VIDEO_GEN: None, }, "minimax": { InterfaceType.LLM: "MiniMax-Text-01", InterfaceType.VLM: "MiniMax-VL-01", InterfaceType.EMBEDDING: None, + InterfaceType.IMAGE_GEN: None, + InterfaceType.VIDEO_GEN: None, }, "deepseek": { InterfaceType.LLM: "deepseek-chat", InterfaceType.VLM: None, InterfaceType.EMBEDDING: None, + InterfaceType.IMAGE_GEN: None, + InterfaceType.VIDEO_GEN: None, }, "moonshot": { InterfaceType.LLM: "kimi-k2.5", InterfaceType.VLM: "moonshot-v1-8k-vision-preview", InterfaceType.EMBEDDING: None, + InterfaceType.IMAGE_GEN: None, + InterfaceType.VIDEO_GEN: None, }, "grok": { InterfaceType.LLM: "grok-3", InterfaceType.VLM: "grok-4-0709", InterfaceType.EMBEDDING: None, + InterfaceType.IMAGE_GEN: None, + InterfaceType.VIDEO_GEN: None, }, "openrouter": { # OpenRouter slugs follow `/` format. Default to a Claude @@ -55,6 +76,8 @@ InterfaceType.LLM: "anthropic/claude-sonnet-4.5", InterfaceType.VLM: "anthropic/claude-sonnet-4.5", InterfaceType.EMBEDDING: None, + InterfaceType.IMAGE_GEN: None, + InterfaceType.VIDEO_GEN: None, }, "bedrock": { # Default to Claude Haiku 4.5 — best price/performance on Bedrock with @@ -71,5 +94,7 @@ InterfaceType.LLM: "us.anthropic.claude-haiku-4-5-20251001-v1:0", InterfaceType.VLM: "us.anthropic.claude-haiku-4-5-20251001-v1:0", InterfaceType.EMBEDDING: "amazon.titan-embed-text-v2:0", + InterfaceType.IMAGE_GEN: None, + InterfaceType.VIDEO_GEN: None, }, } diff --git a/agent_core/core/models/types.py b/agent_core/core/models/types.py index 9f282e33..02ecf409 100644 --- a/agent_core/core/models/types.py +++ b/agent_core/core/models/types.py @@ -16,8 +16,12 @@ class InterfaceType(str, Enum): - LLM: Language model for text generation - VLM: Vision-language model for image understanding - EMBEDDING: Embedding model for vector representations + - IMAGE_GEN: Text-to-image generation + - VIDEO_GEN: Text-to-video / image-to-video generation """ LLM = "llm" VLM = "vlm" EMBEDDING = "embedding" + IMAGE_GEN = "image_gen" + VIDEO_GEN = "video_gen" diff --git a/agent_core/core/prompts/context.py b/agent_core/core/prompts/context.py index d9e1c1df..2d24e18d 100644 --- a/agent_core/core/prompts/context.py +++ b/agent_core/core/prompts/context.py @@ -193,6 +193,7 @@ ENVIRONMENTAL_CONTEXT_PROMPT = """ +- Current Date/Time: {current_datetime} - User Location: {user_location} - Current Working Directory: {working_directory} - Operating System: {operating_system} {os_version} ({os_platform}) diff --git a/app/agent_base.py b/app/agent_base.py index 4272c97b..0fb6bada 100644 --- a/app/agent_base.py +++ b/app/agent_base.py @@ -48,6 +48,8 @@ LINKEDIN_CLIENT_SECRET, NOTION_SHARED_CLIENT_ID, NOTION_SHARED_CLIENT_SECRET, + HUBSPOT_SHARED_CLIENT_ID, + HUBSPOT_SHARED_CLIENT_SECRET, SLACK_SHARED_CLIENT_ID, SLACK_SHARED_CLIENT_SECRET, TELEGRAM_SHARED_BOT_TOKEN, @@ -70,6 +72,8 @@ LLMConsecutiveFailureError, ) from app.vlm_interface import VLMInterface +from app.image_gen_interface import ImageGenInterface +from app.video_gen_interface import VideoGenInterface from app.database_interface import DatabaseInterface from app.logger import logger from agent_core import ( @@ -163,6 +167,8 @@ def __init__( llm_model: str | None = None, vlm_provider: str | None = None, vlm_model: str | None = None, + image_gen_provider: str | None = None, + image_gen_model: str | None = None, deferred_init: bool = False, ) -> None: """ @@ -179,6 +185,8 @@ def __init__( llm_model: Model name override (None = use registry default). vlm_provider: Provider name for VLM (defaults to llm_provider if None). vlm_model: VLM model name override (None = use registry default). + image_gen_provider: Provider name for image generation (openai or gemini). + image_gen_model: Image gen model override (None = use registry default). deferred_init: If True, allow LLM/VLM initialization to be deferred until API key is configured (useful for first-time setup). """ @@ -212,6 +220,35 @@ def __init__( deferred=deferred_init, ) + # Image generation uses its own provider/model settings + from app.config import get_image_gen_provider as _get_img_prov + + _img_provider = image_gen_provider or _get_img_prov() + _img_api_key = get_api_key(_img_provider) + self.image_gen = ImageGenInterface( + provider=_img_provider, + model=image_gen_model, + api_key=_img_api_key, + deferred=True, # always deferred — many users won't have an image-gen key + ) + + # Video generation uses its own provider/model settings (defaults to + # Gemini Veo since it's the strongest free-tier option). Always + # deferred — most users won't have a video-gen key configured. + from app.config import ( + get_video_gen_provider as _get_vid_prov, + get_video_gen_model as _get_vid_model, + ) + + _vid_provider = _get_vid_prov() + _vid_api_key = get_api_key(_vid_provider) + self.video_gen = VideoGenInterface( + provider=_vid_provider, + model=_get_vid_model(), + api_key=_vid_api_key, + deferred=True, + ) + self.event_stream_manager = EventStreamManager( self.llm, agent_file_system_path=AGENT_FILE_SYSTEM_PATH, @@ -308,6 +345,8 @@ def __init__( self.task_manager, self.state_manager, vlm_interface=self.vlm, + image_gen_interface=self.image_gen, + video_gen_interface=self.video_gen, memory_manager=self.memory_manager, context_engine=self.context_engine, ) @@ -411,6 +450,20 @@ async def react(self, trigger: Trigger) -> None: try: logger.debug("[REACT] starting...") + # ----- WORKFLOW 0: Consolidated restart notice (issue #280) ----- + # Recorded here, inside the running agent loop, so it reaches the UI + # (a boot-time record would be marked "seen" before the UI watcher + # starts). No LLM involved — just emit the prebuilt message. + if self._is_restart_notice_trigger(trigger): + message = trigger.payload.get("message", "") + if message: + self.state_manager.record_agent_message(message) + # Drop the sentinel session from active tracking since we return + # before the normal session cleanup runs. + if trigger.session_id: + self.triggers.mark_session_inactive(trigger.session_id) + return + # ----- WORKFLOW 1A: Memory Processing ----- if self._is_memory_trigger(trigger): task_created = await self._handle_memory_workflow(trigger) @@ -769,6 +822,10 @@ def _is_proactive_trigger(self, trigger: Trigger) -> bool: trigger_type = trigger.payload.get("type", "") return trigger_type in ("proactive_heartbeat", "proactive_planner") + def _is_restart_notice_trigger(self, trigger: Trigger) -> bool: + """Check if trigger is the consolidated post-restart notice (issue #280).""" + return trigger.payload.get("type") == "restart_notice" + def _is_gui_task_mode(self, session_id: str | None = None) -> bool: """Check if in GUI task execution mode.""" return ( @@ -1356,6 +1413,9 @@ async def _finalize_action_execution( task.waiting_for_user_reply = wait_for_reply if wait_for_reply: logger.info(f"[TASK] Task {task_id} is now waiting for user reply") + # Persist immediately so a restart can't restore a stale flag and + # resume a waiting task in the background (issue #281). + self._persist_task_state(task) # Check if parallel actions created multiple tasks parallel_results = action_output.get("parallel_results") @@ -1641,6 +1701,8 @@ async def _pause_task_for_limit_choice(self, session_id: str) -> None: task = self.task_manager.tasks.get(session_id) if self.task_manager else None if task: task.waiting_for_user_reply = True + # Persist immediately (issue #281) so a restart keeps this paused. + self._persist_task_state(task) # Update UI task status to "paused" - directly await to ensure # the WebSocket broadcast completes before the react loop cleans up. @@ -1703,6 +1765,7 @@ async def handle_limit_continue(self, session_id: str) -> None: # Clear waiting flag task.waiting_for_user_reply = False + self._persist_task_state(task) # Log to event stream as system message task_label = f' for task "{task.name}"' if task.name else "" @@ -2230,6 +2293,19 @@ async def _fire_session( logger.info( f"[TASK] Task {session_id} no longer waiting for user reply" ) + # Persist the cleared flag (issue #281) so a restart resumes + # this now-active task instead of leaving it stuck waiting. + self._persist_task_state(task) + # Dismiss any mirrored question on the Living UI creation + # screen now that the reply has landed — whether it was + # answered in the on-screen box or in chat (no-op unless this + # is a Living UI creation task). + try: + from app.living_ui import broadcast_living_ui_question + + await broadcast_living_ui_question(session_id, "") + except Exception: + pass if platform and task.source_platform != platform: logger.info( f"[TASK] Task {session_id} source_platform switched " @@ -2998,6 +3074,78 @@ def reinitialize_llm(self, provider: str | None = None) -> bool: ) return llm_ok and vlm_ok + def reinitialize_image_gen(self, provider: str | None = None) -> bool: + """Reinitialize the image generation interface with updated configuration. + + Creates a fresh ImageGenInterface instance rather than mutating the + existing one, so any in-flight action that holds a reference to the + old instance completes cleanly against the old provider/client. + + Args: + provider: Optional provider to switch to. If None, reads from settings. + + Returns: + True if reinitialization was successful. + """ + from app.config import get_image_gen_provider, get_api_key, get_image_gen_model + from app.image_gen_interface import ImageGenInterface + from app.internal_action_interface import InternalActionInterface + + target_provider = provider or get_image_gen_provider() + api_key = get_api_key(target_provider) + model = get_image_gen_model() + + new_interface = ImageGenInterface( + provider=target_provider, + model=model, + api_key=api_key, + deferred=False, + ) + ok = new_interface.is_initialized + if ok: + self.image_gen = new_interface + InternalActionInterface.image_gen_interface = new_interface + logger.info( + f"[AGENT] Image gen reinitialized: provider={target_provider}, success={ok}" + ) + return ok + + def reinitialize_video_gen(self, provider: str | None = None) -> bool: + """Reinitialize the video generation interface with updated configuration. + + Creates a fresh VideoGenInterface instance rather than mutating the + existing one, so any in-flight action that holds a reference to the + old instance completes cleanly against the old provider/client. + + Args: + provider: Optional provider to switch to. If None, reads from settings. + + Returns: + True if reinitialization was successful. + """ + from app.config import get_video_gen_provider, get_api_key, get_video_gen_model + from app.video_gen_interface import VideoGenInterface + from app.internal_action_interface import InternalActionInterface + + target_provider = provider or get_video_gen_provider() + api_key = get_api_key(target_provider) + model = get_video_gen_model() + + new_interface = VideoGenInterface( + provider=target_provider, + model=model, + api_key=api_key, + deferred=False, + ) + ok = new_interface.is_initialized + if ok: + self.video_gen = new_interface + InternalActionInterface.video_gen_interface = new_interface + logger.info( + f"[AGENT] Video gen reinitialized: provider={target_provider}, success={ok}" + ) + return ok + @property def is_llm_initialized(self) -> bool: """Check if the LLM interface is properly initialized.""" @@ -3253,6 +3401,29 @@ def _persist_all_sessions(self) -> None: except Exception as e: logger.warning(f"[PERSIST] Session persistence failed: {e}") + def _persist_task_state(self, task) -> None: + """Persist a single task's state to SessionStorage immediately. + + Called whenever a task's ``waiting_for_user_reply`` flag changes. The + flag otherwise only reaches disk via the next task-manager persist hook + or the graceful-shutdown pass — so a waiting task that goes idle (no + further task events) keeps a stale ``False`` on disk. If the app is then + force-quit before graceful shutdown, a restart restores the task as + not-waiting and resumes it in the background. Persisting on every flag + change keeps the on-disk state authoritative. See issue #281. + """ + if not task: + return + try: + from app.usage.session_storage import get_session_storage + + get_session_storage().persist_task(task) + except Exception as e: + logger.warning( + f"[PERSIST] Failed to persist waiting state for task " + f"{getattr(task, 'id', '?')}: {e}" + ) + async def _schedule_restored_task_triggers(self) -> None: """ Schedule triggers for tasks restored from the previous session. @@ -3263,6 +3434,59 @@ async def _schedule_restored_task_triggers(self) -> None: if not hasattr(self, "_restored_task_ids") or not self._restored_task_ids: return + # Consolidated restart notice (issue #280): previously every resumed + # task fired its own react cycle and the LLM sent a per-task + # "I'm resuming X" acknowledgement — 10 tasks meant 10 messages. Send + # ONE message, not tied to any task, summarising what's being restored. + # The per-task resume triggers below are told to continue *silently* so + # they don't each re-acknowledge. + restored_running = [ + task + for tid in self._restored_task_ids + if (task := self.task_manager.tasks.get(tid)) and task.status == "running" + ] + if restored_running: + resuming = [t for t in restored_running if not t.waiting_for_user_reply] + waiting = [t for t in restored_running if t.waiting_for_user_reply] + lines = ["I've restarted and am restoring your in-progress tasks."] + if resuming: + lines.append("") + lines.append(f"Resuming ({len(resuming)}):") + lines.extend(f" • {t.name}" for t in resuming) + if waiting: + lines.append("") + lines.append(f"Waiting for your reply ({len(waiting)}):") + lines.extend(f" • {t.name}" for t in waiting) + # Enqueue the notice as a high-priority trigger rather than + # recording it directly here. This method runs inside boot(), before + # the UI's event watcher starts — anything recorded now is marked + # "seen" during the watcher's startup pass and never reaches the UI. + # Routing it through a trigger means react() records it inside the + # running agent loop, after the watcher is live, so it surfaces in + # the interface just like the resumed tasks' own messages. + try: + await self.triggers.put( + Trigger( + fire_at=time.time(), + priority=1, # ahead of resumed tasks (priority 5/7) + next_action_description="Restart notice", + # Sentinel id so the heap never merges this with another + # session-less trigger (e.g. memory-at-startup) and + # clobbers the payload. + session_id="__restart_notice__", + payload={ + "type": "restart_notice", + "message": "\n".join(lines), + "gui_mode": STATE.gui_mode, + }, + ), + skip_merge=True, + ) + except Exception as e: + logger.warning( + f"[RESTORE] Failed to enqueue consolidated restart notice: {e}" + ) + for task_id in self._restored_task_ids: task = self.task_manager.tasks.get(task_id) if not task or task.status != "running": @@ -3295,7 +3519,15 @@ async def _schedule_restored_task_triggers(self) -> None: Trigger( fire_at=time.time(), priority=restore_priority, - next_action_description=("Resume task after agent restart"), + next_action_description=( + "Resume this task after an app restart. A " + "consolidated restart notice has already been " + "sent to the user, so do NOT send any " + "'resuming', acknowledgement, or greeting " + "message. Silently continue the task from where " + "it left off based on its todos and recent " + "event-stream activity." + ), session_id=task_id, payload={"gui_mode": STATE.gui_mode}, ), @@ -3484,6 +3716,9 @@ async def _initialize_external_libraries(self) -> None: # Notion (only used by the `invite` OAuth path; raw-token login needs nothing) "NOTION_SHARED_CLIENT_ID": NOTION_SHARED_CLIENT_ID, "NOTION_SHARED_CLIENT_SECRET": NOTION_SHARED_CLIENT_SECRET, + # HubSpot (only used by the `invite` OAuth path; Private App token login needs nothing) + "HUBSPOT_SHARED_CLIENT_ID": HUBSPOT_SHARED_CLIENT_ID, + "HUBSPOT_SHARED_CLIENT_SECRET": HUBSPOT_SHARED_CLIENT_SECRET, # Slack (only used by the `invite` OAuth path) "SLACK_SHARED_CLIENT_ID": SLACK_SHARED_CLIENT_ID, "SLACK_SHARED_CLIENT_SECRET": SLACK_SHARED_CLIENT_SECRET, diff --git a/app/config.py b/app/config.py index 3128bce6..2f954952 100644 --- a/app/config.py +++ b/app/config.py @@ -91,8 +91,12 @@ def _get_default_settings() -> Dict[str, Any]: "model": { "llm_provider": "anthropic", "vlm_provider": "anthropic", + "image_gen_provider": "openai", + "video_gen_provider": "gemini", "llm_model": None, "vlm_model": None, + "image_gen_model": None, + "video_gen_model": None, "slow_mode": False, "slow_mode_tpm_limit": 30000, }, @@ -215,6 +219,38 @@ def get_vlm_model() -> Optional[str]: return settings.get("model", {}).get("vlm_model") +def get_image_gen_provider() -> str: + """Get configured image generation provider.""" + settings = get_settings() + model = settings.get("model", {}) + return model.get("image_gen_provider") or model.get("vlm_provider", "openai") + + +def get_image_gen_model() -> Optional[str]: + """Get configured image generation model override (or None for default).""" + settings = get_settings() + return settings.get("model", {}).get("image_gen_model") + + +def get_video_gen_provider() -> str: + """Get configured video generation provider. + + Falls back to the image-gen provider, then to a sensible default + ('gemini' since Veo is the strongest free-tier video model). + """ + settings = get_settings() + model = settings.get("model", {}) + return ( + model.get("video_gen_provider") or model.get("image_gen_provider") or "gemini" + ) + + +def get_video_gen_model() -> Optional[str]: + """Get configured video generation model override (or None for default).""" + settings = get_settings() + return settings.get("model", {}).get("video_gen_model") + + def get_api_key(provider: str) -> str: """Get API key for a provider. @@ -467,3 +503,11 @@ def detect_and_save_os_language() -> str: NOTION_SHARED_CLIENT_SECRET: str = get_credential( "notion", "client_secret", "NOTION_SHARED_CLIENT_SECRET" ) + +# HubSpot (requires both client_id and client_secret - no PKCE support) +HUBSPOT_SHARED_CLIENT_ID: str = get_credential( + "hubspot", "client_id", "HUBSPOT_SHARED_CLIENT_ID" +) +HUBSPOT_SHARED_CLIENT_SECRET: str = get_credential( + "hubspot", "client_secret", "HUBSPOT_SHARED_CLIENT_SECRET" +) diff --git a/app/config/settings.json b/app/config/settings.json index 7f409a5e..00dc42fb 100644 --- a/app/config/settings.json +++ b/app/config/settings.json @@ -1,5 +1,5 @@ { - "version": "1.3.2", + "version": "1.3.3", "general": { "agent_name": "CraftBot", "os_language": "en" diff --git a/app/data/action/create_pdf.py b/app/data/action/create_pdf.py index 1045c2b2..04eba416 100644 --- a/app/data/action/create_pdf.py +++ b/app/data/action/create_pdf.py @@ -8,9 +8,8 @@ "Supports headings (# to #####), paragraphs, bullet and numbered lists, " "bold, italic, inline code, fenced code blocks, tables, strikethrough, " "blockquotes, and horizontal rules. " - "The first # heading is rendered as a gradient banner header. " - "Available themes: default (indigo), corporate (blue), minimal (grey), " - "warm (amber), forest (green). " + "The first # heading is rendered as a banner header. " + "Colours, typography, and margins are read from FORMAT.md at render time. " "Use absolute paths only." ), mode="CLI", @@ -39,19 +38,6 @@ "and ~~strikethrough~~." ), }, - "theme": { - "type": "string", - "example": "default", - "description": ( - "Visual colour theme. One of: " - "default (indigo) — general use; " - "corporate (blue) — business, finance, formal reports; " - "minimal (grey) — academic, technical, low-decoration; " - "warm (amber) — creative, personal, informal; " - "forest (green) — sustainability, nature, environmental. " - "Defaults to 'default'." - ), - }, "subtitle": { "type": "string", "example": "Confidential - Internal Use Only", @@ -89,10 +75,11 @@ }, "theme_used": { "type": "string", - "example": "corporate", + "example": "format_md", "description": ( - "The theme that was applied. Useful for downstream actions " - "(e.g. edit_pdf) that need to match colours to the document style." + "Always 'format_md'. Styling is derived from FORMAT.md " + "(accent=#FF4F18, base=#141517, muted=#6B6E76). " + "Useful for downstream actions (e.g. edit_pdf) that need to match colours." ), }, "message": { @@ -116,7 +103,6 @@ def create_pdf_file(input_data: dict) -> dict: simulated_mode = bool(input_data.get("simulated_mode", False)) file_path = str(input_data.get("file_path", "")).strip() content = str(input_data.get("content", "")).strip() - theme = str(input_data.get("theme", "default")).strip().lower() subtitle = str(input_data.get("subtitle", "")).strip() page_numbers = bool(input_data.get("page_numbers", True)) @@ -141,7 +127,7 @@ def create_pdf_file(input_data: dict) -> dict: } if simulated_mode: - return {"status": "success", "path": file_path} + return {"status": "success", "path": file_path, "theme_used": "format_md"} # ── Imports (executor pre-installs via requirement=, this is a fallback) ── import os @@ -168,70 +154,13 @@ def _ensure(pkg, import_as=None): from fpdf import FPDF from fpdf.fonts import TextStyle, FontFace from fpdf.pattern import LinearGradient + from app.config import AGENT_FILE_SYSTEM_PATH + from app.utils.pdf_format import load_style, build_theme as _build_theme - # ── Themes ──────────────────────────────────────────────────────────── - # Keys: hbg=gradient stop colours, accent=link/highlight colour, - # h2/h3=heading colours, body=body text, cbg/cc=code bg/fg, - # rule=accent rule below banner, htxt=banner text - _THEMES = { - "default": { - "hbg": [(30, 58, 138), (79, 70, 229)], - "accent": (79, 70, 229), - "h2": (30, 58, 138), - "h3": (55, 65, 81), - "body": (31, 41, 55), - "cbg": (243, 244, 246), - "cc": (17, 24, 39), - "rule": (199, 210, 254), - "htxt": (255, 255, 255), - }, - "corporate": { - "hbg": [(0, 72, 148), (0, 120, 212)], - "accent": (0, 120, 212), - "h2": (0, 72, 148), - "h3": (60, 60, 100), - "body": (31, 41, 55), - "cbg": (240, 247, 255), - "cc": (0, 72, 148), - "rule": (173, 216, 230), - "htxt": (255, 255, 255), - }, - "minimal": { - "hbg": [(50, 50, 50), (90, 90, 90)], - "accent": (80, 80, 80), - "h2": (40, 40, 40), - "h3": (80, 80, 80), - "body": (40, 40, 40), - "cbg": (245, 245, 245), - "cc": (30, 30, 30), - "rule": (200, 200, 200), - "htxt": (255, 255, 255), - }, - "warm": { - "hbg": [(120, 53, 15), (217, 119, 6)], - "accent": (180, 83, 9), - "h2": (120, 53, 15), - "h3": (92, 72, 44), - "body": (41, 37, 36), - "cbg": (255, 247, 237), - "cc": (120, 53, 15), - "rule": (253, 186, 116), - "htxt": (255, 255, 255), - }, - "forest": { - "hbg": [(20, 83, 45), (34, 197, 94)], - "accent": (22, 163, 74), - "h2": (20, 83, 45), - "h3": (55, 65, 55), - "body": (31, 41, 31), - "cbg": (240, 253, 244), - "cc": (20, 83, 45), - "rule": (134, 239, 172), - "htxt": (255, 255, 255), - }, - } - t = _THEMES.get(theme, _THEMES["default"]) - theme = theme if theme in _THEMES else "default" # resolve fallback for theme_used + # ── Style resolved from FORMAT.md (falls back to CraftBot brand defaults) ── + _fmt = load_style(AGENT_FILE_SYSTEM_PATH / "FORMAT.md") + t = _build_theme(_fmt) + _MARGIN_MM = _fmt["margin_in"] * 25.4 # ── Unicode sanitizer ───────────────────────────────────────────────── # fpdf2's built-in fonts (Helvetica, Courier, Times) only cover latin-1 @@ -317,8 +246,8 @@ def _sanitize(text): # FPDF setup pdf = FPDF() - pdf.set_auto_page_break(auto=True, margin=22) - pdf.set_margins(left=20, top=15, right=20) + pdf.set_auto_page_break(auto=True, margin=_MARGIN_MM) + pdf.set_margins(left=_MARGIN_MM, top=_MARGIN_MM, right=_MARGIN_MM) if doc_title: pdf.set_title(doc_title) pdf.set_creator("CraftBot") @@ -327,7 +256,11 @@ def _sanitize(text): pw = pdf.w - pdf.l_margin - pdf.r_margin # usable page width lm = pdf.l_margin y0 = 8 # banner top y-position - HH = 50 if subtitle else 40 # banner height + # Banner height: scale with FORMAT.md header_height_in but floor at 30mm + # so the title text always fits. FORMAT.md's 0.4" is a nav-bar spec; the + # PDF banner is a title block that needs proportionally more space. + _BASE_H = max(round(_fmt["header_height_in"] * 25.4 * 2.5), 30) + HH = _BASE_H + (10 if subtitle else 0) # ── Gradient banner ─────────────────────────────────────────────── grad = LinearGradient(lm, y0, lm + pw, y0, colors=t["hbg"]) @@ -335,15 +268,15 @@ def _sanitize(text): pdf.rect(lm, y0, pw, HH, style="F") if doc_title: - pdf.set_font("Helvetica", "B", 20) + pdf.set_font("Helvetica", "B", _fmt["h1_pt"]) pdf.set_text_color(*t["htxt"]) - title_y = y0 + (HH - 20) / 2 - (5 if subtitle else 0) + title_y = y0 + (HH - 12) / 2 - (5 if subtitle else 0) pdf.set_xy(lm + 8, title_y) pdf.cell(pw - 16, 12, doc_title[:72], align="L") if subtitle: pdf.set_font("Helvetica", "I", 9) - pdf.set_text_color(200, 210, 240) + pdf.set_text_color(*t["subtitle"]) pdf.set_xy(lm + 8, y0 + HH - 14) pdf.cell(pw - 16, 8, _sanitize(subtitle)[:100], align="L") @@ -358,7 +291,7 @@ def _sanitize(text): "h1": TextStyle( font_family="Helvetica", font_style="B", - font_size_pt=20, + font_size_pt=_fmt["h1_pt"], color=t["h2"], t_margin=10, b_margin=3, @@ -366,7 +299,7 @@ def _sanitize(text): "h2": TextStyle( font_family="Helvetica", font_style="B", - font_size_pt=16, + font_size_pt=_fmt["h2_pt"], color=t["h2"], t_margin=8, b_margin=2, @@ -374,7 +307,7 @@ def _sanitize(text): "h3": TextStyle( font_family="Helvetica", font_style="B", - font_size_pt=13, + font_size_pt=_fmt["h3_pt"], color=t["h3"], t_margin=6, b_margin=2, @@ -382,7 +315,7 @@ def _sanitize(text): "h4": TextStyle( font_family="Helvetica", font_style="BI", - font_size_pt=11, + font_size_pt=_fmt["body_pt"], color=t["h3"], t_margin=4, b_margin=1, @@ -390,20 +323,20 @@ def _sanitize(text): "h5": TextStyle( font_family="Helvetica", font_style="I", - font_size_pt=10, + font_size_pt=_fmt["small_pt"], color=t["h3"], t_margin=3, b_margin=1, ), "code": TextStyle( font_family="Courier", - font_size_pt=9, + font_size_pt=_fmt["code_pt"], color=t["cc"], fill_color=t["cbg"], ), "pre": TextStyle( font_family="Courier", - font_size_pt=9, + font_size_pt=_fmt["code_pt"], color=t["cc"], fill_color=t["cbg"], ), @@ -411,7 +344,7 @@ def _sanitize(text): } pdf.set_text_color(*t["body"]) - pdf.set_font("Helvetica", size=11) + pdf.set_font("Helvetica", size=_fmt["body_pt"]) pdf.write_html( html_body, font_family="Helvetica", @@ -426,8 +359,8 @@ def _sanitize(text): for pg in range(1, n_pages + 1): pdf.page = pg pdf.set_y(-12) - pdf.set_font("Helvetica", "I", 8) - pdf.set_text_color(150, 150, 150) + pdf.set_font("Helvetica", "I", _fmt["small_pt"]) + pdf.set_text_color(*_fmt["muted"]) pdf.cell(0, 5, f"Page {pg} of {n_pages}", align="C") # ── Write to disk ───────────────────────────────────────────────── @@ -442,7 +375,7 @@ def _sanitize(text): "path": abs_path, "pages": n_pages, "size_bytes": os.path.getsize(abs_path), - "theme_used": theme, + "theme_used": "format_md", } except PermissionError as exc: diff --git a/app/data/action/edit_pdf.py b/app/data/action/edit_pdf.py index cd3232d1..e9e0f973 100644 --- a/app/data/action/edit_pdf.py +++ b/app/data/action/edit_pdf.py @@ -14,9 +14,8 @@ "For tasks that require text reflow (rephrasing paragraphs, inserting new sections, " "reformatting layout): use create_pdf to rebuild the document with changes applied — " "the user receives the same output path with a clean result. " - "When editing a PDF created by create_pdf, use the theme_used value from that call " - "to pick matching accent colours: default=#4f46e5, corporate=#0078d4, " - "minimal=#505050, warm=#b45309, forest=#16a34a. " + "When editing a PDF created by create_pdf, match the accent colour to " + "FORMAT.md's highlight value (default #FF4F18) to align with the document style. " "Use absolute paths only." ), mode="CLI", @@ -586,10 +585,11 @@ def _get_span_at_rect(page, target_rect): # ── fill_field (AcroForm via pypdf) ─────────────────────── elif op_type == "fill_field": - # Defer all fill_field ops to after PyMuPDF saves - # (pypdf needs to open the saved file) - # We flag these for post-processing below - pass # handled in post-processing step + # Validate shape up-front so missing field_name is caught + # immediately, even if post-processing later fails wholesale. + if not str(op.get("field_name", "")).strip(): + warnings.append(f"{op_tag}: 'field_name' is required.") + # Actual fill is deferred — see post-processing block below. else: warnings.append(f"{op_tag}: unknown operation type '{op_type}'.") @@ -610,36 +610,56 @@ def _get_span_at_rect(page, target_rect): # ── Post-process: AcroForm fill_field via pypdf ─────────────────── acroform_ops = [ - op for op in operations if str(op.get("type", "")).lower() == "fill_field" + (j, op) + for j, op in enumerate(operations) + if str(op.get("type", "")).lower() == "fill_field" ] if acroform_ops: + # Step 1: open the saved file — failure here means all fill_field + # ops failed for the same upstream reason, warn per-op. try: reader = pypdf.PdfReader(abs_output) writer = pypdf.PdfWriter() writer.append(reader) existing_fields = reader.get_fields() or {} - for op in acroform_ops: - op_tag = "op[fill_field]" - field_name = str(op.get("field_name", "")) + except Exception as e: + for j, op in acroform_ops: + op_tag = f"op[{j}] 'fill_field'" + warnings.append( + f"{op_tag}: could not open PDF for AcroForm processing: " + f"{type(e).__name__}: {e}." + ) + else: + # Step 2: apply each fill_field op individually so failures + # are isolated — one bad field does not block the others. + for j, op in acroform_ops: + op_tag = f"op[{j}] 'fill_field'" + field_name = str(op.get("field_name", "")).strip() value = str(op.get("value", "")) if not field_name: - warnings.append(f"{op_tag}: 'field_name' is required.") - continue + continue # already warned in main loop validation if field_name not in existing_fields: warnings.append( f"{op_tag}: field '{field_name}' not found in AcroForm. " - f"Available fields: {list(existing_fields.keys())[:10]}." + f"Available: {list(existing_fields.keys())[:10]}." ) continue - for page_obj in writer.pages: - writer.update_page_form_field_values( - page_obj, {field_name: value} - ) - ops_done += 1 - with open(abs_output, "wb") as f: - writer.write(f) - except Exception as e: - warnings.append(f"AcroForm fill failed: {type(e).__name__}: {e}.") + try: + for page_obj in writer.pages: + writer.update_page_form_field_values( + page_obj, {field_name: value} + ) + ops_done += 1 + except Exception as e: + warnings.append(f"{op_tag}: {type(e).__name__}: {e}.") + + # Step 3: write result — isolated so a disk failure does not + # hide which fields were successfully processed. + try: + with open(abs_output, "wb") as f: + writer.write(f) + except Exception as e: + warnings.append(f"AcroForm write failed: {type(e).__name__}: {e}.") return _json( "success", diff --git a/app/data/action/generate_image.py b/app/data/action/generate_image.py index c51e7d6e..da3d9f63 100644 --- a/app/data/action/generate_image.py +++ b/app/data/action/generate_image.py @@ -1,15 +1,14 @@ from agent_core import action +from agent_core.utils.logger import logger @action( name="generate_image", - description="""Generates an image using either OpenAI's Images 2.0 (gpt-image-2) or Google's Nano Banana 2 (gemini-3.1-flash-image-preview) model. -- Automatically selects the provider based on which API key(s) are configured -- If only one API key is set, that provider is used automatically -- If both keys are configured, asks the user which provider to use and remembers the choice -- If no API keys are configured, returns an error with setup instructions -- Supports 1K, 2K, or 4K resolution and multiple aspect ratios -- TIP: When generating multiple images for the same project or related work, use 'reference_images' parameter with previously generated images to maintain consistent style across all outputs""", + description="""Generates an image from a text prompt using the configured image generation provider (OpenAI gpt-image-2 or Google Gemini). +- The provider is determined by 'image_gen_provider' in settings.json (default: openai). Supported: openai, gemini. +- Saves the result as a PNG file to output_path, or a timestamped temp file if omitted. +- TIP: When generating multiple related images, pass previously generated paths via 'reference_images' to maintain style consistency. +- NOTE: reference_images semantics differ by provider — Gemini uses them as style guidance; OpenAI's images.edit treats them as compositional/mask inputs.""", default=True, mode="CLI", action_sets=["content_creation", "image", "document_processing"], @@ -17,51 +16,43 @@ "prompt": { "type": "string", "example": "A serene mountain landscape at sunset with a lake reflection", - "description": "The text prompt describing the image to generate.", + "description": "Text description of the image to generate.", "required": True, }, "output_path": { "type": "string", "example": "C:/Users/user/Pictures/generated_image.png", - "description": "Absolute path where the generated image will be saved (e.g., C:/Users/user/image.png or /home/user/image.png). If not provided, saves to temp directory.", + "description": "Absolute path where the generated image will be saved. If omitted, saved to temp directory with a timestamped name.", }, "resolution": { "type": "string", "example": "2K", - "description": "Output resolution. Options: '1K' (1080p), '2K', '4K'. Default: '1K'. Higher resolution costs more.", + "description": "Output resolution: '1K' (default), '2K', or '4K'. Higher resolution costs more. OpenAI tops out at ~1536px regardless of this setting.", }, "aspect_ratio": { "type": "string", "example": "16:9", - "description": "Aspect ratio of the generated image. Options: '1:1', '3:4', '4:3', '9:16', '16:9'. Default: '1:1'.", + "description": "Aspect ratio: '1:1' (default), '3:4', '4:3', '9:16', '16:9'. OpenAI maps to the nearest available canvas size — true 16:9 and 9:16 are not supported natively.", }, "number_of_images": { "type": "integer", "example": 1, - "description": "Number of images to generate (1-4). Default: 1.", + "description": "Number of images to generate (1–4). Default: 1.", }, "negative_prompt": { "type": "string", "example": "blurry, low quality, distorted", - "description": "Text describing what to avoid in the generated image.", + "description": "Elements to avoid. Native on Gemini; appended to prompt for OpenAI.", }, "reference_images": { "type": "array", - "example": [ - "C:/Users/user/Pictures/reference1.png", - "C:/Users/user/Pictures/reference2.png", - ], - "description": "Optional list of reference image absolute paths to guide generation (up to 14 images). Use full absolute paths.", + "example": ["C:/Users/user/Pictures/ref.png"], + "description": "Optional reference image paths (up to 14). Gemini: style guidance. OpenAI: compositional/mask inputs (different behaviour — results may vary).", }, "safety_filter_level": { "type": "string", "example": "block_medium_and_above", - "description": "Safety filter level (Gemini only). Options: 'block_none', 'block_only_high', 'block_medium_and_above', 'block_low_and_above'. Default: 'block_medium_and_above'. Ignored when using OpenAI.", - }, - "provider_preference": { - "type": "string", - "example": "openai", - "description": "Which provider to use: 'openai' (Images 2.0 / gpt-image-2) or 'gemini' (Nano Banana 2 / gemini-3.1-flash-image-preview). Only needed when both API keys are configured and no saved preference exists. Providing this saves it as the default for future calls.", + "description": "Gemini safety filter: 'block_none', 'block_only_high', 'block_medium_and_above' (default), 'block_low_and_above'. Ignored by OpenAI.", }, }, output_schema={ @@ -72,22 +63,14 @@ }, "image_paths": { "type": "array", - "description": "List of paths to the generated image files.", - }, - "prompt_used": { - "type": "string", - "description": "The prompt that was used for generation.", - }, - "resolution": { - "type": "string", - "description": "The resolution of the generated image.", + "description": "Absolute paths to the generated PNG files.", }, "message": { "type": "string", - "description": "Status message or error message.", + "description": "Status message or error details.", }, }, - requirement=["google-genai", "openai", "Pillow"], + requirement=["openai", "Pillow"], test_payload={ "prompt": "A cute cartoon cat sitting on a rainbow", "resolution": "1K", @@ -97,491 +80,135 @@ }, ) def generate_image(input_data: dict) -> dict: - """ - Generates an image using OpenAI's Images 2.0 (gpt-image-2) or Google's Nano Banana 2 (gemini-3.1-flash-image-preview). - """ - import os - import sys - import subprocess - import importlib - import tempfile - from datetime import datetime - simulated_mode = input_data.get("simulated_mode", False) - if simulated_mode: return { "status": "success", "image_paths": ["/tmp/simulated_image_001.png"], - "prompt_used": input_data.get("prompt", "Simulated prompt"), - "resolution": input_data.get("resolution", "1K"), "message": "Image generated successfully (simulated mode).", } - # Determine which provider to use based on available API keys and user preference - from app.config import get_api_key, get_settings, save_settings - - openai_key = get_api_key("openai") - gemini_key = get_api_key("gemini") - - if not openai_key and not gemini_key: - return { - "status": "error", - "image_paths": [], - "prompt_used": "", - "resolution": "", - "message": ( - "No image generation API key is configured. " - "Tell the user they need either an OpenAI API key (for Images 2.0 / gpt-image-2) " - "or a Google Gemini API key (for Nano Banana 2 / gemini-3.1-flash-image-preview), " - "and ask if they need help setting one up." - ), - } - - provider_preference = input_data.get("provider_preference", "").strip().lower() - _cfg = get_settings() - saved_provider = _cfg.get("image_generation", {}).get("preferred_provider", "") - - if openai_key and not gemini_key: - provider = "openai" - elif gemini_key and not openai_key: - provider = "gemini" - else: - # Both keys present - if provider_preference in ("openai", "gemini"): - provider = provider_preference - _cfg.setdefault("image_generation", {})["preferred_provider"] = provider - save_settings(_cfg) - elif saved_provider in ("openai", "gemini"): - provider = saved_provider - else: - provider = "gemini" - - api_key = openai_key if provider == "openai" else gemini_key + import app.internal_action_interface as iai + from agent_core.core.models.model_registry import MODEL_REGISTRY + from agent_core.core.models.types import InterfaceType + from app.config import get_api_key, get_image_gen_provider - # Validate required input - prompt = input_data.get("prompt", "").strip() + prompt = str(input_data.get("prompt", "")).strip() if not prompt: return { "status": "error", "image_paths": [], - "prompt_used": "", - "resolution": "", - "message": "A prompt is required to generate an image.", + "message": "prompt is required.", } - # Get optional parameters - output_path = input_data.get("output_path", "") - resolution = input_data.get("resolution", "1K").upper() - aspect_ratio = input_data.get("aspect_ratio", "1:1") - number_of_images = min(max(int(input_data.get("number_of_images", 1)), 1), 4) - negative_prompt = input_data.get("negative_prompt", "") - reference_images = input_data.get("reference_images", []) - safety_filter_level = input_data.get( - "safety_filter_level", "block_medium_and_above" - ) + # Fallback priority when the configured provider can't generate images: + # prefer Gemini, then OpenAI, then any other image-gen-capable provider. + _IMAGE_GEN_PRIORITY = ["gemini", "openai"] - # Validate resolution with user feedback - valid_resolutions = ["1K", "2K", "4K"] - warnings = [] - if resolution not in valid_resolutions: - warnings.append( - f"Invalid resolution '{resolution}'. Defaulting to '1K'. Valid options: {', '.join(valid_resolutions)}." - ) - resolution = "1K" - - # Validate aspect ratio with user feedback - valid_ratios = ["1:1", "3:4", "4:3", "9:16", "16:9"] - if aspect_ratio not in valid_ratios: - warnings.append( - f"Invalid aspect ratio '{aspect_ratio}'. Defaulting to '1:1'. Valid options: {', '.join(valid_ratios)}." - ) - aspect_ratio = "1:1" - - # Validate safety filter level with user feedback - valid_safety_levels = [ - "block_none", - "block_only_high", - "block_medium_and_above", - "block_low_and_above", - ] - if safety_filter_level not in valid_safety_levels: - warnings.append( - f"Invalid safety filter level '{safety_filter_level}'. Defaulting to 'block_medium_and_above'. Valid options: {', '.join(valid_safety_levels)}." - ) - safety_filter_level = "block_medium_and_above" - - # Validate number_of_images with user feedback - raw_num = int(input_data.get("number_of_images", 1)) - if raw_num < 1 or raw_num > 4: - warnings.append( - f"number_of_images '{raw_num}' out of range. Clamped to {number_of_images}. Valid range: 1-4." - ) - - # Limit reference images to 14 - if len(reference_images) > 14: - warnings.append( - f"Too many reference images ({len(reference_images)}). Only the first 14 will be used." - ) - reference_images = reference_images[:14] - - # Helper: extract images from Gemini response - def _extract_images_from_response(response): - images = [] - # Primary path: candidates[].content.parts[].inline_data - if hasattr(response, "candidates") and response.candidates: - for candidate in response.candidates: - if not ( - hasattr(candidate, "content") - and hasattr(candidate.content, "parts") - ): - continue - for part in candidate.content.parts: - if hasattr(part, "inline_data") and part.inline_data: - if hasattr( - part.inline_data, "mime_type" - ) and part.inline_data.mime_type.startswith("image/"): - images.append(part.inline_data.data) - # Fallback: response.images (older SDK versions) - if not images and hasattr(response, "images"): - for img in response.images: - if hasattr(img, "data"): - images.append(img.data) - elif hasattr(img, "_pil_image"): - images.append(img) - return images + def _supports(p): + return bool(MODEL_REGISTRY.get(p, {}).get(InterfaceType.IMAGE_GEN)) - # Helper: check if response was blocked by safety filters - def _get_block_reason(response): - if hasattr(response, "prompt_feedback"): - feedback = response.prompt_feedback - if hasattr(feedback, "block_reason") and feedback.block_reason: - return str(feedback.block_reason) - if hasattr(response, "candidates") and response.candidates: - for candidate in response.candidates: - if hasattr(candidate, "finish_reason") and candidate.finish_reason: - reason = str(candidate.finish_reason) - if "SAFETY" in reason.upper(): - return reason + def _has_key(p): + try: + return bool(get_api_key(p)) + except Exception: + return False + + def _resolve_image_gen_provider(configured): + """Configured provider if it can image-gen AND has a key, else highest- + priority capable provider with a key, else None.""" + if configured and _supports(configured) and _has_key(configured): + return configured + candidates = list(_IMAGE_GEN_PRIORITY) + for p, caps in MODEL_REGISTRY.items(): + if caps.get(InterfaceType.IMAGE_GEN) and p not in candidates: + candidates.append(p) + for p in candidates: + if _supports(p) and _has_key(p): + return p return None - # Helper: build the save path for a generated image - def _build_save_path(output_path, timestamp, index, number_of_images, total_found): - if output_path: - if number_of_images > 1 or total_found > 1: - base, ext = os.path.splitext(output_path) - if not ext: - ext = ".png" - return f"{base}_{index + 1}{ext}" - else: - save_path = output_path - if not os.path.splitext(save_path)[1]: - save_path += ".png" - return save_path - else: - temp_dir = tempfile.gettempdir() - return os.path.join( - temp_dir, f"generated_image_{timestamp}_{index + 1}.png" - ) - - # Helper: convert image data to PIL Image - def _to_pil_image(img_data, Image, io, base64): - if isinstance(img_data, str): - image_bytes = base64.b64decode(img_data) - return Image.open(io.BytesIO(image_bytes)) - elif isinstance(img_data, bytes): - return Image.open(io.BytesIO(img_data)) - elif hasattr(img_data, "_pil_image"): - return img_data._pil_image - else: - return img_data + configured_provider = get_image_gen_provider() + effective_provider = _resolve_image_gen_provider(configured_provider) - # Ensure required packages are installed - def _ensure_package(pkg_name): - try: - importlib.import_module(pkg_name.replace("-", "_").split("[")[0]) - except ImportError: - subprocess.check_call( - [sys.executable, "-m", "pip", "install", pkg_name, "--quiet"] - ) - - try: - _ensure_package("google-genai") - _ensure_package("openai") - _ensure_package("Pillow") - except Exception as e: + if effective_provider is None: return { "status": "error", "image_paths": [], - "prompt_used": prompt, - "resolution": resolution, - "message": f"Failed to install required packages: {str(e)}", + "message": ( + "No image-generation provider is available. Image generation requires " + "OpenAI or Google (Gemini) with a configured API key. Set " + "'image_gen_provider' and add the matching key under 'api_keys' in settings.json." + ), } - try: - from PIL import Image - import io - import base64 - - image_paths = [] - timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") - - if provider == "gemini": - from google import genai - from google.genai import types - - client = genai.Client(api_key=api_key) - - # Prepare reference images if provided - image_parts = [] - for ref_path in reference_images: - if os.path.exists(ref_path): - try: - with open(ref_path, "rb") as f: - image_data = f.read() - ext = os.path.splitext(ref_path)[1].lower() - mime_map = { - ".png": "image/png", - ".jpg": "image/jpeg", - ".jpeg": "image/jpeg", - ".gif": "image/gif", - ".webp": "image/webp", - } - mime_type = mime_map.get(ext, "image/png") - image_parts.append( - types.Part.from_bytes(data=image_data, mime_type=mime_type) - ) - except Exception: - pass # Skip invalid reference images - - # Build the prompt with generation instructions - generation_prompt = f"""Generate an image based on the following description: - -{prompt} - -Image specifications: -- Resolution: {resolution} -- Aspect ratio: {aspect_ratio} -- Number of variations: {number_of_images}""" - - if negative_prompt: - generation_prompt += f"\n- Avoid: {negative_prompt}" - - content_parts = list(image_parts) - content_parts.append(generation_prompt) - - # Safety settings - safety_settings = None - if safety_filter_level != "block_none": - harm_block_threshold = { - "block_only_high": "BLOCK_ONLY_HIGH", - "block_medium_and_above": "BLOCK_MEDIUM_AND_ABOVE", - "block_low_and_above": "BLOCK_LOW_AND_ABOVE", - }.get(safety_filter_level, "BLOCK_MEDIUM_AND_ABOVE") - - safety_settings = [ - types.SafetySetting( - category=category, threshold=harm_block_threshold - ) - for category in ( - "HARM_CATEGORY_HARASSMENT", - "HARM_CATEGORY_HATE_SPEECH", - "HARM_CATEGORY_SEXUALLY_EXPLICIT", - "HARM_CATEGORY_DANGEROUS_CONTENT", - ) - ] - - generate_config = types.GenerateContentConfig( - candidate_count=1, - response_modalities=["TEXT", "IMAGE"], - image_config=types.ImageConfig(image_size=resolution), - safety_settings=safety_settings, + # Use the live interface when it already serves the effective provider; + # otherwise build a transient interface for the fallback provider (the + # configured provider can't generate images, but another configured key can). + image_gen = iai.InternalActionInterface.image_gen_interface + if ( + image_gen is None + or not getattr(image_gen, "is_initialized", False) + or image_gen.provider != effective_provider + ): + from app.image_gen_interface import ImageGenInterface + from app.config import get_image_gen_model + + if effective_provider != configured_provider: + logger.info( + f"[IMAGE_GEN] Configured provider '{configured_provider}' can't generate " + f"images; falling back to '{effective_provider}' (has a configured key)." ) - - response = client.models.generate_content( - model="gemini-3.1-flash-image-preview", - contents=content_parts, - config=generate_config, + try: + image_gen = ImageGenInterface( + provider=effective_provider, + # Honor the configured model override only when it matches the + # configured provider; a fallback provider uses its own default. + model=( + get_image_gen_model() + if effective_provider == configured_provider + else None + ), + api_key=get_api_key(effective_provider), + deferred=False, ) - - images_found = _extract_images_from_response(response) - - if not images_found: - block_reason = _get_block_reason(response) - if block_reason: - return { - "status": "error", - "image_paths": [], - "prompt_used": prompt, - "resolution": resolution, - "message": f"Image generation was blocked by safety filters: {block_reason}. Try modifying your prompt or adjusting safety_filter_level.", - } - return { - "status": "error", - "image_paths": [], - "prompt_used": prompt, - "resolution": resolution, - "message": "No images were generated. The model did not produce image output for this prompt. Try rephrasing your prompt or check if your API key has access to image generation.", - } - - for i, img_data in enumerate(images_found[:number_of_images]): - save_path = _build_save_path( - output_path, timestamp, i, number_of_images, len(images_found) - ) - parent_dir = os.path.dirname(os.path.abspath(save_path)) - if parent_dir: - os.makedirs(parent_dir, exist_ok=True) - pil_image = _to_pil_image(img_data, Image, io, base64) - pil_image.save(save_path, "PNG") - image_paths.append(save_path) - - model_label = "Nano Banana 2" - - elif provider == "openai": - from openai import OpenAI - - if safety_filter_level != "block_medium_and_above": - warnings.append( - "safety_filter_level is not supported by OpenAI and has been ignored." - ) - - # Map aspect_ratio to OpenAI size string - openai_size_map = { - "1:1": "1024x1024", - "16:9": "1536x1024", - "4:3": "1536x1024", - "9:16": "1024x1536", - "3:4": "1024x1536", + except Exception as e: + return { + "status": "error", + "image_paths": [], + "message": f"Failed to initialize image generation ({effective_provider}): {e}", + } + if not image_gen.is_initialized: + return { + "status": "error", + "image_paths": [], + "message": ( + f"Image generation provider '{effective_provider}' could not be " + "initialized — check its API key in settings.json." + ), } - openai_size = openai_size_map.get(aspect_ratio, "1024x1024") - - # Map resolution to OpenAI quality - openai_quality_map = {"1K": "medium", "2K": "high", "4K": "high"} - openai_quality = openai_quality_map.get(resolution, "medium") - - # Build prompt (OpenAI has no negative_prompt param — append to prompt) - full_prompt = prompt - if negative_prompt: - full_prompt += f"\n\nAvoid: {negative_prompt}" - - client = OpenAI(api_key=api_key) - - valid_ref_paths = [p for p in reference_images if os.path.exists(p)] - if valid_ref_paths: - image_files = [open(p, "rb") for p in valid_ref_paths] - try: - response = client.images.edit( - model="gpt-image-2", - image=image_files, - prompt=full_prompt, - n=number_of_images, - size=openai_size, - ) - finally: - for f in image_files: - f.close() - else: - response = client.images.generate( - model="gpt-image-2", - prompt=full_prompt, - n=number_of_images, - size=openai_size, - quality=openai_quality, - ) - - import urllib.request as _urllib_request - - images_found = [] - for item in response.data: - if item.b64_json: - images_found.append(base64.b64decode(item.b64_json)) - elif item.url: - with _urllib_request.urlopen(item.url) as _r: - images_found.append(_r.read()) - - if not images_found: - return { - "status": "error", - "image_paths": [], - "prompt_used": prompt, - "resolution": resolution, - "message": "No images were generated. The model did not produce image output for this prompt. Try rephrasing your prompt.", - } - - for i, img_bytes in enumerate(images_found[:number_of_images]): - save_path = _build_save_path( - output_path, timestamp, i, number_of_images, len(images_found) - ) - parent_dir = os.path.dirname(os.path.abspath(save_path)) - if parent_dir: - os.makedirs(parent_dir, exist_ok=True) - pil_image = Image.open(io.BytesIO(img_bytes)) - pil_image.save(save_path, "PNG") - image_paths.append(save_path) - - model_label = "Images 2.0" - message = ( - f"Successfully generated {len(image_paths)} image(s) using {model_label}." + try: + paths = image_gen.generate_image( + prompt=prompt, + resolution=str(input_data.get("resolution", "1K")).upper(), + aspect_ratio=str(input_data.get("aspect_ratio", "1:1")), + number_of_images=min(max(int(input_data.get("number_of_images", 1)), 1), 4), + output_path=str(input_data.get("output_path") or ""), + negative_prompt=str(input_data.get("negative_prompt") or ""), + reference_images=list(input_data.get("reference_images") or []), + safety_filter_level=str( + input_data.get("safety_filter_level") or "block_medium_and_above" + ), ) - if warnings: - message += " Warnings: " + " ".join(warnings) - return { "status": "success", - "image_paths": image_paths, - "prompt_used": prompt, - "resolution": resolution, - "message": message, + "image_paths": paths, + "message": f"Generated {len(paths)} image(s) via {effective_provider}.", } - except Exception as e: - error_message = str(e) - - if provider == "gemini": - if "quota" in error_message.lower() or "rate" in error_message.lower(): - error_message = ( - f"Gemini API rate limit or quota exceeded: {error_message}" - ) - elif "invalid" in error_message.lower() and "key" in error_message.lower(): - error_message = f"Invalid Gemini API key: {error_message}. Please verify your Google API key is correct." - elif ( - "permission" in error_message.lower() - or "access" in error_message.lower() - ): - error_message = f"Gemini API access denied: {error_message}. Ensure your API key has access to the Nano Banana 2 model." - elif ( - "safety" in error_message.lower() or "blocked" in error_message.lower() - ): - error_message = f"Content blocked by Gemini safety filters: {error_message}. Try modifying your prompt." - elif "not found" in error_message.lower() or "404" in error_message: - error_message = f"Gemini model not available: {error_message}. The gemini-3.1-flash-image-preview model may not be accessible with your API key. Try using Google AI Studio to verify access." - else: - if ( - "billing" in error_message.lower() - or "insufficient_quota" in error_message.lower() - or "rate" in error_message.lower() - ): - error_message = ( - f"OpenAI API rate limit or quota exceeded: {error_message}" - ) - elif "invalid_api_key" in error_message.lower() or ( - "invalid" in error_message.lower() and "key" in error_message.lower() - ): - error_message = f"Invalid OpenAI API key: {error_message}. Please verify your OpenAI API key is correct." - elif ( - "content_policy" in error_message.lower() - or "safety" in error_message.lower() - or "blocked" in error_message.lower() - ): - error_message = f"Content blocked by OpenAI safety policy: {error_message}. Try modifying your prompt." - elif "not found" in error_message.lower() or "404" in error_message: - error_message = f"OpenAI model not available: {error_message}. The gpt-image-2 model may not be accessible with your API key." - return { "status": "error", "image_paths": [], - "prompt_used": prompt, - "resolution": resolution, - "message": error_message, + "message": str(e), } diff --git a/app/data/action/generate_video.py b/app/data/action/generate_video.py new file mode 100644 index 00000000..9c52e0fd --- /dev/null +++ b/app/data/action/generate_video.py @@ -0,0 +1,266 @@ +from agent_core import action +from agent_core.utils.logger import logger + + +@action( + name="generate_video", + description="""Generates a video from a text prompt using the configured video generation provider (Google Gemini Veo, OpenAI Sora, or BytePlus Seedance). +- The provider is determined by 'video_gen_provider' in settings.json (default: gemini). Supported: gemini, openai, byteplus. +- The action BLOCKS while the long-running generation completes (typically 60-300s; up to ~25 min hard cap). +- Saves the result as an MP4 file to output_path, or a timestamped temp file if omitted. +- TIP: For image-to-video, pass an absolute path to a single start frame via 'reference_image'. +- NOTE: Per-provider quirks — Veo supports last_frame + reference_images (style); Sora accepts a single reference_image (uploaded inline as the SDK file field); Seedance accepts camera_fixed and watermark flags. Audio: Veo and Sora 2 produce synchronized audio; Seedance honors with_audio on 2.0+ models (silent on older 1.0 builds).""", + default=True, + mode="CLI", + action_sets=["content_creation", "video"], + input_schema={ + "prompt": { + "type": "string", + "example": "A drone shot of a misty mountain valley at dawn, cinematic", + "description": "Text description of the video to generate.", + "required": True, + }, + "output_path": { + "type": "string", + "example": "C:/Users/user/Videos/generated.mp4", + "description": "Absolute path where the generated video will be saved. If omitted, saved to temp directory with a timestamped name.", + }, + "duration_seconds": { + "type": "integer", + "example": 8, + "description": "Target duration in seconds. Per-provider clamping: Sora 2 = 4/8/12; Veo = 4/6/8 (forced to 8 with 1080p+/refs); Seedance = 2-12.", + }, + "aspect_ratio": { + "type": "string", + "example": "16:9", + "description": "Aspect ratio: '16:9' (default), '9:16', '1:1', '4:3', '3:4', '21:9'. Sora and Veo only support 16:9 and 9:16.", + }, + "resolution": { + "type": "string", + "example": "1080p", + "description": "Output resolution: '480p', '720p' (default), '1080p', '4k'. Sora 2 standard tops at 720p; 4k is Veo only.", + }, + "number_of_videos": { + "type": "integer", + "example": 1, + "description": "Number of videos to generate (1-4). Default: 1. Multiple videos are submitted as independent jobs for Sora and Seedance.", + }, + "negative_prompt": { + "type": "string", + "example": "blurry, low quality, distorted", + "description": "Elements to avoid. Native on Veo and Seedance; appended to prompt for Sora.", + }, + "reference_image": { + "type": "string", + "example": "C:/Users/user/Pictures/start_frame.png", + "description": "Optional absolute path to a single start-frame image for image-to-video.", + }, + "last_frame": { + "type": "string", + "example": "C:/Users/user/Pictures/end_frame.png", + "description": "Optional absolute path to an end-frame image for frame interpolation. Veo 3.1+ only; silently dropped elsewhere.", + }, + "reference_images": { + "type": "array", + "example": ["C:/Users/user/Pictures/style1.png"], + "description": "Optional list of absolute paths to additional style-reference images. Veo 3.1+ accepts up to 3; silently dropped on Sora and Seedance.", + }, + "seed": { + "type": "integer", + "example": 42, + "description": "Optional deterministic seed.", + }, + "with_audio": { + "type": "boolean", + "example": True, + "description": "Whether to generate synchronized native audio. Veo 3.x and Sora 2 produce audio by default (no toggle — this flag is ignored on those providers). Honored on BytePlus Seedance 2.0+; silent on older Seedance 1.0 builds.", + }, + "person_generation": { + "type": "string", + "example": "allow_adult", + "description": "Veo only — 'allow_all', 'allow_adult' (default), or 'dont_allow'. 'allow_all' is geo-restricted (EU/UK/CH/MENA).", + }, + "camera_fixed": { + "type": "boolean", + "example": False, + "description": "BytePlus Seedance only — lock camera position throughout the clip.", + }, + "watermark": { + "type": "boolean", + "example": False, + "description": "BytePlus Seedance only — apply watermark to output.", + }, + "callback_url": { + "type": "string", + "example": "", + "description": "BytePlus Seedance only — webhook for task completion (optional).", + }, + }, + output_schema={ + "status": { + "type": "string", + "example": "success", + "description": "'success' or 'error'.", + }, + "video_paths": { + "type": "array", + "description": "Absolute paths to the generated MP4 files.", + }, + "message": { + "type": "string", + "description": "Status message or error details.", + }, + }, + requirement=["openai", "requests"], + test_payload={ + "prompt": "A cute corgi running through autumn leaves, slow motion", + "duration_seconds": 4, + "aspect_ratio": "16:9", + "resolution": "720p", + "number_of_videos": 1, + "simulated_mode": True, + }, +) +def generate_video(input_data: dict) -> dict: + simulated_mode = input_data.get("simulated_mode", False) + if simulated_mode: + return { + "status": "success", + "video_paths": ["/tmp/simulated_video_001.mp4"], + "message": "Video generated successfully (simulated mode).", + } + + import app.internal_action_interface as iai + from agent_core.core.models.model_registry import MODEL_REGISTRY + from agent_core.core.models.types import InterfaceType + from app.config import get_api_key, get_video_gen_provider + + prompt = str(input_data.get("prompt", "")).strip() + if not prompt: + return { + "status": "error", + "video_paths": [], + "message": "prompt is required.", + } + + # Fallback priority when the configured provider can't generate videos: + # prefer Gemini Veo, then OpenAI Sora, then BytePlus Seedance. + _VIDEO_GEN_PRIORITY = ["gemini", "openai", "byteplus"] + + def _supports(p): + return bool(MODEL_REGISTRY.get(p, {}).get(InterfaceType.VIDEO_GEN)) + + def _has_key(p): + try: + return bool(get_api_key(p)) + except Exception: + return False + + def _resolve_video_gen_provider(configured): + """Configured provider if it can video-gen AND has a key, else highest- + priority capable provider with a key, else None.""" + if configured and _supports(configured) and _has_key(configured): + return configured + candidates = list(_VIDEO_GEN_PRIORITY) + for p, caps in MODEL_REGISTRY.items(): + if caps.get(InterfaceType.VIDEO_GEN) and p not in candidates: + candidates.append(p) + for p in candidates: + if _supports(p) and _has_key(p): + return p + return None + + configured_provider = get_video_gen_provider() + effective_provider = _resolve_video_gen_provider(configured_provider) + + if effective_provider is None: + return { + "status": "error", + "video_paths": [], + "message": ( + "No video-generation provider is available. Video generation requires " + "Google (Gemini Veo), OpenAI (Sora), or BytePlus (Seedance) with a " + "configured API key. Set 'video_gen_provider' and add the matching " + "key under 'api_keys' in settings.json." + ), + } + + # Use the live interface when it already serves the effective provider; + # otherwise build a transient interface for the fallback provider. + video_gen = iai.InternalActionInterface.video_gen_interface + if ( + video_gen is None + or not getattr(video_gen, "is_initialized", False) + or video_gen.provider != effective_provider + ): + from app.video_gen_interface import VideoGenInterface + from app.config import get_video_gen_model + + if effective_provider != configured_provider: + logger.info( + f"[VIDEO_GEN] Configured provider '{configured_provider}' can't generate " + f"videos; falling back to '{effective_provider}' (has a configured key)." + ) + try: + video_gen = VideoGenInterface( + provider=effective_provider, + # Honor the configured model override only when it matches the + # configured provider; a fallback provider uses its own default. + model=( + get_video_gen_model() + if effective_provider == configured_provider + else None + ), + api_key=get_api_key(effective_provider), + deferred=False, + ) + except Exception as e: + return { + "status": "error", + "video_paths": [], + "message": f"Failed to initialize video generation ({effective_provider}): {e}", + } + if not video_gen.is_initialized: + return { + "status": "error", + "video_paths": [], + "message": ( + f"Video generation provider '{effective_provider}' could not be " + "initialized — check its API key in settings.json." + ), + } + + try: + paths = video_gen.generate_video( + prompt=prompt, + duration_seconds=int(input_data.get("duration_seconds", 5)), + aspect_ratio=str(input_data.get("aspect_ratio", "16:9")), + resolution=str(input_data.get("resolution", "720p")), + number_of_videos=min(max(int(input_data.get("number_of_videos", 1)), 1), 4), + output_path=str(input_data.get("output_path") or ""), + negative_prompt=str(input_data.get("negative_prompt") or ""), + reference_image=(input_data.get("reference_image") or None), + last_frame=(input_data.get("last_frame") or None), + reference_images=list(input_data.get("reference_images") or []), + seed=( + int(input_data["seed"]) + if input_data.get("seed") not in (None, "") + else None + ), + with_audio=bool(input_data.get("with_audio", True)), + person_generation=str(input_data.get("person_generation") or "allow_adult"), + camera_fixed=bool(input_data.get("camera_fixed", False)), + watermark=bool(input_data.get("watermark", False)), + callback_url=str(input_data.get("callback_url") or ""), + ) + return { + "status": "success", + "video_paths": paths, + "message": f"Generated {len(paths)} video(s) via {effective_provider}.", + } + except Exception as e: + return { + "status": "error", + "video_paths": [], + "message": str(e), + } diff --git a/app/data/action/integrations/_helpers.py b/app/data/action/integrations/_helpers.py index 9f65c509..e29fdb65 100644 --- a/app/data/action/integrations/_helpers.py +++ b/app/data/action/integrations/_helpers.py @@ -32,6 +32,59 @@ async def send_discord_message(input_data: dict) -> dict: from typing import Any, Callable, Dict, Optional +# Common aliases the agent/user might use → canonical registered integration id. +# Google Workspace apps in particular are frequently referred to by short names +# or lumped under "google", which is not itself an integration. +# +# These live here (not in an action module) because action handlers are executed +# via exec() on their own extracted source — module-level names in the action +# file are NOT in scope at runtime. Handlers must import these inside the function +# body, the same way they import run_client/with_client. +INTEGRATION_ALIASES = { + "mail": "gmail", + "googlemail": "gmail", + "google mail": "gmail", + "drive": "google_drive", + "gdrive": "google_drive", + "googledrive": "google_drive", + "google drive": "google_drive", + "docs": "google_docs", + "gdocs": "google_docs", + "googledocs": "google_docs", + "google docs": "google_docs", + "google_doc": "google_docs", + "calendar": "google_calendar", + "gcal": "google_calendar", + "gcalendar": "google_calendar", + "google calendar": "google_calendar", + "youtube": "google_youtube", +} + +# Umbrella terms that aren't a single integration — Google Workspace apps are +# tracked individually, so callers must check the specific app. +GOOGLE_UMBRELLA = { + "google", + "google workspace", + "google_workspace", + "workspace", + "gsuite", + "g suite", + "google suite", +} +GOOGLE_FAMILY = ( + "gmail", + "google_drive", + "google_docs", + "google_calendar", + "google_youtube", +) + + +def normalize_integration_id(integration_id: str) -> str: + """Map a user/agent-supplied integration name through known aliases.""" + return INTEGRATION_ALIASES.get(integration_id, integration_id) + + def record_outgoing_message(platform_name: str, recipient: str, text: str) -> None: """Best-effort: record an outgoing platform message into the agent's conversation history. @@ -147,12 +200,22 @@ async def run_client( raw = method(**kwargs) if asyncio.iscoroutine(raw): raw = await raw - return _shape_result( + result = _shape_result( raw, unwrap_envelope=unwrap_envelope, success_message=success_message, fail_message=fail_message, ) + if result.get("status") != "error": + try: + from app.ui_layer.metrics.collector import MetricsCollector + + collector = MetricsCollector.get_instance() + if collector: + collector.record_integration_call(integration) + except Exception: + pass + return result except Exception as e: return {"status": "error", "message": str(e)} @@ -187,12 +250,22 @@ def run_client_sync( "status": "error", "message": f"{method_name!r} is async — use run_client (await) instead", } - return _shape_result( + result = _shape_result( raw, unwrap_envelope=unwrap_envelope, success_message=success_message, fail_message=fail_message, ) + if result.get("status") != "error": + try: + from app.ui_layer.metrics.collector import MetricsCollector + + collector = MetricsCollector.get_instance() + if collector: + collector.record_integration_call(integration) + except Exception: + pass + return result except Exception as e: return {"status": "error", "message": str(e)} @@ -243,6 +316,14 @@ async def with_client( result = fn(client, *args, **kwargs) if asyncio.iscoroutine(result): result = await result + try: + from app.ui_layer.metrics.collector import MetricsCollector + + collector = MetricsCollector.get_instance() + if collector: + collector.record_integration_call(integration) + except Exception: + pass return {"status": "success", "result": result} except Exception as e: return {"status": "error", "message": str(e)} diff --git a/app/data/action/integrations/google_workspace/gmail_actions.py b/app/data/action/integrations/google_workspace/gmail_actions.py index c9f15fcd..e7c82b18 100644 --- a/app/data/action/integrations/google_workspace/gmail_actions.py +++ b/app/data/action/integrations/google_workspace/gmail_actions.py @@ -78,7 +78,12 @@ def list_gmail(input_data: dict) -> dict: @action( name="get_gmail", - description="Get details of a specific Gmail message by ID.", + description=( + "Get details of a specific Gmail message by ID. " + "When full_body=true the response includes body text and an attachments list " + "(each entry: attachment_id, filename, mimeType, size). " + "Use attachment_id and filename with download_gmail_attachment." + ), action_sets=["gmail_mail", "gmail"], input_schema={ "message_id": { @@ -88,7 +93,7 @@ def list_gmail(input_data: dict) -> dict: }, "full_body": { "type": "boolean", - "description": "Whether to include full email body.", + "description": "Whether to include full email body and attachment metadata.", "example": False, }, }, @@ -940,19 +945,29 @@ def delete_gmail_label(input_data: dict) -> dict: @action( name="download_gmail_attachment", - description="Download a Gmail attachment to a local path. Get the attachment_id from get_gmail with full_body=true (payload.parts[].body.attachmentId).", + description=( + "Download a Gmail attachment to a local path. " + "First call get_gmail with full_body=true to get the attachments list — " + "each entry has attachment_id and filename. " + "Pass save_to as a directory path and filename separately, or as a full file path." + ), action_sets=["gmail_attachments", "gmail"], input_schema={ "message_id": {"type": "string", "description": "Message ID.", "example": ""}, "attachment_id": { "type": "string", - "description": "Attachment ID from the message payload.", + "description": "Attachment ID from get_gmail(full_body=true).attachments[].attachment_id.", "example": "", }, "save_to": { "type": "string", - "description": "Local path to save to.", - "example": "C:/Users/me/downloads/file.pdf", + "description": "Local path to save to. May be a directory; use filename to set the file name.", + "example": "C:/Users/me/downloads/", + }, + "filename": { + "type": "string", + "description": "Filename to use when save_to is a directory. Use the filename from get_gmail attachments list.", + "example": "invoice.pdf", }, }, output_schema={"status": {"type": "string", "example": "success"}}, @@ -969,6 +984,7 @@ def download_gmail_attachment(input_data: dict) -> dict: message_id=input_data["message_id"], attachment_id=input_data["attachment_id"], save_to=input_data["save_to"], + filename=input_data.get("filename"), ) diff --git a/app/data/action/integrations/hubspot/hubspot_actions.py b/app/data/action/integrations/hubspot/hubspot_actions.py new file mode 100644 index 00000000..58d8ee36 --- /dev/null +++ b/app/data/action/integrations/hubspot/hubspot_actions.py @@ -0,0 +1,3013 @@ +"""HubSpot action surface. + +Mirrors the HubSpot client in +``craftos_integrations/integrations/hubspot/__init__.py`` 1:1. Sub-sets are +prefixed with ``hubspot_`` per the action_set convention; the ``hubspot`` +umbrella tags the high-value 20% the agent should reach for by default. + +Identifier shape (always string): HubSpot returns numeric-looking IDs that +overflow JS number range — pass them through as strings. See +``craftos_integrations/integrations/hubspot/INTEGRATION.md`` for the full +gotcha list. +""" + +from agent_core import action + + +# ================================================================== +# Contacts +# ================================================================== + + +@action( + name="list_hubspot_contacts", + description="List HubSpot contacts. Paginated; pass 'after' from the previous response's paging.next.after to get more.", + action_sets=["hubspot_contacts", "hubspot"], + input_schema={ + "limit": { + "type": "integer", + "description": "Max results (1-100, default 30).", + "example": 30, + }, + "after": { + "type": "string", + "description": "Pagination cursor from previous response.", + "example": "", + }, + "properties": { + "type": "string", + "description": "Comma-separated property names to include.", + "example": "email,firstname,lastname", + }, + "archived": { + "type": "boolean", + "description": "Include archived contacts.", + "example": False, + }, + }, + output_schema={"status": {"type": "string", "example": "success"}}, +) +async def list_hubspot_contacts(input_data: dict) -> dict: + props = input_data.get("properties", "") + from app.data.action.integrations._helpers import run_client + + return await run_client( + "hubspot", + "list_contacts", + limit=input_data.get("limit", 30), + after=input_data.get("after") or None, + properties=[p.strip() for p in props.split(",") if p.strip()] or None, + archived=input_data.get("archived", False), + ) + + +@action( + name="get_hubspot_contact", + description="Get a HubSpot contact by ID. Returns properties and (if requested) associated objects.", + action_sets=["hubspot_contacts", "hubspot"], + input_schema={ + "contact_id": { + "type": "string", + "description": "HubSpot contact ID (numeric string).", + "example": "123456789", + }, + "properties": { + "type": "string", + "description": "Comma-separated property names to include.", + "example": "email,firstname,lastname,phone", + }, + "associations": { + "type": "string", + "description": "Comma-separated object types to include associations for.", + "example": "companies,deals", + }, + }, + output_schema={"status": {"type": "string", "example": "success"}}, +) +async def get_hubspot_contact(input_data: dict) -> dict: + props = input_data.get("properties", "") + assocs = input_data.get("associations", "") + from app.data.action.integrations._helpers import run_client + + return await run_client( + "hubspot", + "get_contact", + contact_id=input_data["contact_id"], + properties=[p.strip() for p in props.split(",") if p.strip()] or None, + associations=[a.strip() for a in assocs.split(",") if a.strip()] or None, + ) + + +@action( + name="create_hubspot_contact", + description="Create a HubSpot contact. 'properties' is a flat dict like {email, firstname, lastname, phone, company}.", + action_sets=["hubspot_contacts", "hubspot"], + input_schema={ + "properties": { + "type": "object", + "description": "Flat property dict.", + "example": { + "email": "jane@example.com", + "firstname": "Jane", + "lastname": "Doe", + }, + }, + }, + output_schema={"status": {"type": "string", "example": "success"}}, + parallelizable=False, +) +async def create_hubspot_contact(input_data: dict) -> dict: + from app.data.action.integrations._helpers import run_client + + return await run_client( + "hubspot", + "create_contact", + properties=input_data["properties"], + ) + + +@action( + name="update_hubspot_contact", + description="Update a HubSpot contact's properties.", + action_sets=["hubspot_contacts", "hubspot"], + input_schema={ + "contact_id": { + "type": "string", + "description": "Contact ID.", + "example": "123456789", + }, + "properties": { + "type": "object", + "description": "Properties to update (flat dict).", + "example": {"phone": "+1-555-0100"}, + }, + }, + output_schema={"status": {"type": "string", "example": "success"}}, + parallelizable=False, +) +async def update_hubspot_contact(input_data: dict) -> dict: + from app.data.action.integrations._helpers import run_client + + return await run_client( + "hubspot", + "update_contact", + contact_id=input_data["contact_id"], + properties=input_data["properties"], + ) + + +@action( + name="delete_hubspot_contact", + description="Archive (soft-delete) a HubSpot contact. The record can be restored from the trash UI.", + action_sets=["hubspot_contacts"], + input_schema={ + "contact_id": { + "type": "string", + "description": "Contact ID.", + "example": "123456789", + }, + }, + output_schema={"status": {"type": "string", "example": "success"}}, + parallelizable=False, +) +async def delete_hubspot_contact(input_data: dict) -> dict: + from app.data.action.integrations._helpers import run_client + + return await run_client( + "hubspot", "delete_contact", contact_id=input_data["contact_id"] + ) + + +@action( + name="search_hubspot_contacts", + description="Search HubSpot contacts. Use 'query' for free-text or 'filter_groups' for precise property filters (operators: EQ, NEQ, GT, GTE, LT, LTE, BETWEEN, IN, NOT_IN, CONTAINS_TOKEN, HAS_PROPERTY).", + action_sets=["hubspot_contacts", "hubspot"], + input_schema={ + "query": { + "type": "string", + "description": "Free-text search across default searchable properties.", + "example": "jane@example.com", + }, + "filter_groups": { + "type": "array", + "description": "Filter groups: [{filters: [{propertyName, operator, value}]}].", + "example": [ + { + "filters": [ + { + "propertyName": "email", + "operator": "EQ", + "value": "jane@example.com", + } + ] + } + ], + }, + "properties": { + "type": "string", + "description": "Comma-separated properties to return.", + "example": "email,firstname,lastname", + }, + "limit": { + "type": "integer", + "description": "Max results (1-100).", + "example": 30, + }, + "after": {"type": "string", "description": "Pagination cursor.", "example": ""}, + }, + output_schema={"status": {"type": "string", "example": "success"}}, +) +async def search_hubspot_contacts(input_data: dict) -> dict: + props = input_data.get("properties", "") + from app.data.action.integrations._helpers import run_client + + return await run_client( + "hubspot", + "search_contacts", + query=input_data.get("query") or None, + filter_groups=input_data.get("filter_groups") or None, + properties=[p.strip() for p in props.split(",") if p.strip()] or None, + limit=input_data.get("limit", 30), + after=input_data.get("after") or None, + ) + + +@action( + name="batch_get_hubspot_contacts", + description="Read up to 100 contacts in a single call. Cheaper than N gets.", + action_sets=["hubspot_contacts"], + input_schema={ + "ids": { + "type": "array", + "description": "Contact IDs.", + "example": ["123", "456", "789"], + }, + "properties": { + "type": "string", + "description": "Comma-separated properties to return.", + "example": "email,firstname", + }, + }, + output_schema={"status": {"type": "string", "example": "success"}}, +) +async def batch_get_hubspot_contacts(input_data: dict) -> dict: + props = input_data.get("properties", "") + from app.data.action.integrations._helpers import run_client + + return await run_client( + "hubspot", + "batch_get_contacts", + ids=input_data["ids"], + properties=[p.strip() for p in props.split(",") if p.strip()] or None, + ) + + +@action( + name="batch_create_hubspot_contacts", + description="Create up to 100 contacts in a single call. 'records' is a list of flat property dicts.", + action_sets=["hubspot_contacts"], + input_schema={ + "records": { + "type": "array", + "description": "List of property dicts.", + "example": [{"email": "a@x.com"}, {"email": "b@x.com"}], + }, + }, + output_schema={"status": {"type": "string", "example": "success"}}, + parallelizable=False, +) +async def batch_create_hubspot_contacts(input_data: dict) -> dict: + from app.data.action.integrations._helpers import run_client + + return await run_client( + "hubspot", "batch_create_contacts", records=input_data["records"] + ) + + +@action( + name="merge_hubspot_contacts", + description="Merge two contacts. The primary contact survives; the secondary is archived with associations transferred.", + action_sets=["hubspot_contacts"], + input_schema={ + "primary_id": { + "type": "string", + "description": "Contact ID that survives the merge.", + "example": "123", + }, + "id_to_merge": { + "type": "string", + "description": "Contact ID that gets merged INTO the primary.", + "example": "456", + }, + }, + output_schema={"status": {"type": "string", "example": "success"}}, + parallelizable=False, +) +async def merge_hubspot_contacts(input_data: dict) -> dict: + from app.data.action.integrations._helpers import run_client + + return await run_client( + "hubspot", + "merge_contacts", + primary_id=input_data["primary_id"], + id_to_merge=input_data["id_to_merge"], + ) + + +# ================================================================== +# Companies +# ================================================================== + + +@action( + name="list_hubspot_companies", + description="List HubSpot companies. Paginated via 'after' cursor.", + action_sets=["hubspot_companies", "hubspot"], + input_schema={ + "limit": { + "type": "integer", + "description": "Max results (1-100).", + "example": 30, + }, + "after": {"type": "string", "description": "Pagination cursor.", "example": ""}, + "properties": { + "type": "string", + "description": "Comma-separated property names.", + "example": "name,domain,industry", + }, + "archived": { + "type": "boolean", + "description": "Include archived.", + "example": False, + }, + }, + output_schema={"status": {"type": "string", "example": "success"}}, +) +async def list_hubspot_companies(input_data: dict) -> dict: + props = input_data.get("properties", "") + from app.data.action.integrations._helpers import run_client + + return await run_client( + "hubspot", + "list_companies", + limit=input_data.get("limit", 30), + after=input_data.get("after") or None, + properties=[p.strip() for p in props.split(",") if p.strip()] or None, + archived=input_data.get("archived", False), + ) + + +@action( + name="get_hubspot_company", + description="Get a HubSpot company by ID.", + action_sets=["hubspot_companies"], + input_schema={ + "company_id": { + "type": "string", + "description": "Company ID (numeric string).", + "example": "123456789", + }, + "properties": { + "type": "string", + "description": "Comma-separated properties.", + "example": "name,domain,industry,city", + }, + "associations": { + "type": "string", + "description": "Comma-separated association types.", + "example": "contacts,deals", + }, + }, + output_schema={"status": {"type": "string", "example": "success"}}, +) +async def get_hubspot_company(input_data: dict) -> dict: + props = input_data.get("properties", "") + assocs = input_data.get("associations", "") + from app.data.action.integrations._helpers import run_client + + return await run_client( + "hubspot", + "get_company", + company_id=input_data["company_id"], + properties=[p.strip() for p in props.split(",") if p.strip()] or None, + associations=[a.strip() for a in assocs.split(",") if a.strip()] or None, + ) + + +@action( + name="create_hubspot_company", + description="Create a HubSpot company. Typical properties: name, domain, industry, city, country.", + action_sets=["hubspot_companies", "hubspot"], + input_schema={ + "properties": { + "type": "object", + "description": "Flat property dict.", + "example": {"name": "Acme Co", "domain": "acme.com"}, + }, + }, + output_schema={"status": {"type": "string", "example": "success"}}, + parallelizable=False, +) +async def create_hubspot_company(input_data: dict) -> dict: + from app.data.action.integrations._helpers import run_client + + return await run_client( + "hubspot", "create_company", properties=input_data["properties"] + ) + + +@action( + name="update_hubspot_company", + description="Update a HubSpot company's properties.", + action_sets=["hubspot_companies"], + input_schema={ + "company_id": { + "type": "string", + "description": "Company ID.", + "example": "123456789", + }, + "properties": { + "type": "object", + "description": "Properties to update.", + "example": {"industry": "Software"}, + }, + }, + output_schema={"status": {"type": "string", "example": "success"}}, + parallelizable=False, +) +async def update_hubspot_company(input_data: dict) -> dict: + from app.data.action.integrations._helpers import run_client + + return await run_client( + "hubspot", + "update_company", + company_id=input_data["company_id"], + properties=input_data["properties"], + ) + + +@action( + name="delete_hubspot_company", + description="Archive (soft-delete) a HubSpot company.", + action_sets=["hubspot_companies"], + input_schema={ + "company_id": { + "type": "string", + "description": "Company ID.", + "example": "123456789", + }, + }, + output_schema={"status": {"type": "string", "example": "success"}}, + parallelizable=False, +) +async def delete_hubspot_company(input_data: dict) -> dict: + from app.data.action.integrations._helpers import run_client + + return await run_client( + "hubspot", "delete_company", company_id=input_data["company_id"] + ) + + +@action( + name="search_hubspot_companies", + description="Search HubSpot companies using query or filter_groups (same shape as contact search).", + action_sets=["hubspot_companies", "hubspot"], + input_schema={ + "query": { + "type": "string", + "description": "Free-text search.", + "example": "acme", + }, + "filter_groups": { + "type": "array", + "description": "Property filter groups.", + "example": [ + { + "filters": [ + { + "propertyName": "domain", + "operator": "EQ", + "value": "acme.com", + } + ] + } + ], + }, + "properties": { + "type": "string", + "description": "Comma-separated properties to return.", + "example": "name,domain", + }, + "limit": {"type": "integer", "description": "Max results.", "example": 30}, + "after": {"type": "string", "description": "Pagination cursor.", "example": ""}, + }, + output_schema={"status": {"type": "string", "example": "success"}}, +) +async def search_hubspot_companies(input_data: dict) -> dict: + props = input_data.get("properties", "") + from app.data.action.integrations._helpers import run_client + + return await run_client( + "hubspot", + "search_companies", + query=input_data.get("query") or None, + filter_groups=input_data.get("filter_groups") or None, + properties=[p.strip() for p in props.split(",") if p.strip()] or None, + limit=input_data.get("limit", 30), + after=input_data.get("after") or None, + ) + + +@action( + name="batch_get_hubspot_companies", + description="Read up to 100 companies in a single call.", + action_sets=["hubspot_companies"], + input_schema={ + "ids": { + "type": "array", + "description": "Company IDs.", + "example": ["123", "456"], + }, + "properties": { + "type": "string", + "description": "Comma-separated properties.", + "example": "name,domain", + }, + }, + output_schema={"status": {"type": "string", "example": "success"}}, +) +async def batch_get_hubspot_companies(input_data: dict) -> dict: + props = input_data.get("properties", "") + from app.data.action.integrations._helpers import run_client + + return await run_client( + "hubspot", + "batch_get_companies", + ids=input_data["ids"], + properties=[p.strip() for p in props.split(",") if p.strip()] or None, + ) + + +@action( + name="batch_create_hubspot_companies", + description="Create up to 100 companies in a single call.", + action_sets=["hubspot_companies"], + input_schema={ + "records": { + "type": "array", + "description": "List of property dicts.", + "example": [{"name": "Acme"}, {"name": "Foo"}], + }, + }, + output_schema={"status": {"type": "string", "example": "success"}}, + parallelizable=False, +) +async def batch_create_hubspot_companies(input_data: dict) -> dict: + from app.data.action.integrations._helpers import run_client + + return await run_client( + "hubspot", "batch_create_companies", records=input_data["records"] + ) + + +# ================================================================== +# Deals +# ================================================================== + + +@action( + name="list_hubspot_deals", + description="List HubSpot deals. Paginated.", + action_sets=["hubspot_deals", "hubspot"], + input_schema={ + "limit": {"type": "integer", "description": "Max results.", "example": 30}, + "after": {"type": "string", "description": "Pagination cursor.", "example": ""}, + "properties": { + "type": "string", + "description": "Comma-separated properties.", + "example": "dealname,amount,dealstage,pipeline", + }, + "archived": { + "type": "boolean", + "description": "Include archived.", + "example": False, + }, + }, + output_schema={"status": {"type": "string", "example": "success"}}, +) +async def list_hubspot_deals(input_data: dict) -> dict: + props = input_data.get("properties", "") + from app.data.action.integrations._helpers import run_client + + return await run_client( + "hubspot", + "list_deals", + limit=input_data.get("limit", 30), + after=input_data.get("after") or None, + properties=[p.strip() for p in props.split(",") if p.strip()] or None, + archived=input_data.get("archived", False), + ) + + +@action( + name="get_hubspot_deal", + description="Get a HubSpot deal by ID.", + action_sets=["hubspot_deals"], + input_schema={ + "deal_id": { + "type": "string", + "description": "Deal ID.", + "example": "123456789", + }, + "properties": { + "type": "string", + "description": "Comma-separated properties.", + "example": "dealname,amount,dealstage,pipeline,closedate", + }, + "associations": { + "type": "string", + "description": "Comma-separated association types.", + "example": "contacts,companies", + }, + }, + output_schema={"status": {"type": "string", "example": "success"}}, +) +async def get_hubspot_deal(input_data: dict) -> dict: + props = input_data.get("properties", "") + assocs = input_data.get("associations", "") + from app.data.action.integrations._helpers import run_client + + return await run_client( + "hubspot", + "get_deal", + deal_id=input_data["deal_id"], + properties=[p.strip() for p in props.split(",") if p.strip()] or None, + associations=[a.strip() for a in assocs.split(",") if a.strip()] or None, + ) + + +@action( + name="create_hubspot_deal", + description="Create a HubSpot deal. Typical properties: dealname, amount, dealstage, pipeline, closedate, hubspot_owner_id.", + action_sets=["hubspot_deals", "hubspot"], + input_schema={ + "properties": { + "type": "object", + "description": "Flat property dict.", + "example": { + "dealname": "Q3 renewal", + "amount": "50000", + "dealstage": "qualifiedtobuy", + }, + }, + }, + output_schema={"status": {"type": "string", "example": "success"}}, + parallelizable=False, +) +async def create_hubspot_deal(input_data: dict) -> dict: + from app.data.action.integrations._helpers import run_client + + return await run_client( + "hubspot", "create_deal", properties=input_data["properties"] + ) + + +@action( + name="update_hubspot_deal", + description="Update a HubSpot deal's properties.", + action_sets=["hubspot_deals", "hubspot"], + input_schema={ + "deal_id": { + "type": "string", + "description": "Deal ID.", + "example": "123456789", + }, + "properties": { + "type": "object", + "description": "Properties to update.", + "example": {"amount": "75000"}, + }, + }, + output_schema={"status": {"type": "string", "example": "success"}}, + parallelizable=False, +) +async def update_hubspot_deal(input_data: dict) -> dict: + from app.data.action.integrations._helpers import run_client + + return await run_client( + "hubspot", + "update_deal", + deal_id=input_data["deal_id"], + properties=input_data["properties"], + ) + + +@action( + name="delete_hubspot_deal", + description="Archive (soft-delete) a HubSpot deal.", + action_sets=["hubspot_deals"], + input_schema={ + "deal_id": { + "type": "string", + "description": "Deal ID.", + "example": "123456789", + }, + }, + output_schema={"status": {"type": "string", "example": "success"}}, + parallelizable=False, +) +async def delete_hubspot_deal(input_data: dict) -> dict: + from app.data.action.integrations._helpers import run_client + + return await run_client("hubspot", "delete_deal", deal_id=input_data["deal_id"]) + + +@action( + name="search_hubspot_deals", + description="Search HubSpot deals via query or filter_groups.", + action_sets=["hubspot_deals"], + input_schema={ + "query": { + "type": "string", + "description": "Free-text search.", + "example": "renewal", + }, + "filter_groups": { + "type": "array", + "description": "Property filter groups.", + "example": [ + { + "filters": [ + { + "propertyName": "dealstage", + "operator": "EQ", + "value": "closedwon", + } + ] + } + ], + }, + "properties": { + "type": "string", + "description": "Comma-separated properties.", + "example": "dealname,amount", + }, + "limit": {"type": "integer", "description": "Max results.", "example": 30}, + "after": {"type": "string", "description": "Pagination cursor.", "example": ""}, + }, + output_schema={"status": {"type": "string", "example": "success"}}, +) +async def search_hubspot_deals(input_data: dict) -> dict: + props = input_data.get("properties", "") + from app.data.action.integrations._helpers import run_client + + return await run_client( + "hubspot", + "search_deals", + query=input_data.get("query") or None, + filter_groups=input_data.get("filter_groups") or None, + properties=[p.strip() for p in props.split(",") if p.strip()] or None, + limit=input_data.get("limit", 30), + after=input_data.get("after") or None, + ) + + +@action( + name="batch_create_hubspot_deals", + description="Create up to 100 deals in a single call.", + action_sets=["hubspot_deals"], + input_schema={ + "records": { + "type": "array", + "description": "List of property dicts.", + "example": [{"dealname": "A"}, {"dealname": "B"}], + }, + }, + output_schema={"status": {"type": "string", "example": "success"}}, + parallelizable=False, +) +async def batch_create_hubspot_deals(input_data: dict) -> dict: + from app.data.action.integrations._helpers import run_client + + return await run_client( + "hubspot", "batch_create_deals", records=input_data["records"] + ) + + +@action( + name="move_hubspot_deal_stage", + description="Move a deal to a different pipeline stage. Helper around updating the 'dealstage' property.", + action_sets=["hubspot_deals", "hubspot"], + input_schema={ + "deal_id": { + "type": "string", + "description": "Deal ID.", + "example": "123456789", + }, + "stage_id": { + "type": "string", + "description": "Target stage ID (use list_hubspot_pipeline_stages to find).", + "example": "closedwon", + }, + }, + output_schema={"status": {"type": "string", "example": "success"}}, + parallelizable=False, +) +async def move_hubspot_deal_stage(input_data: dict) -> dict: + from app.data.action.integrations._helpers import run_client + + return await run_client( + "hubspot", + "move_deal_stage", + deal_id=input_data["deal_id"], + stage_id=input_data["stage_id"], + ) + + +@action( + name="list_hubspot_deals_by_pipeline", + description="List deals in a specific pipeline. Helper that wraps search with a pipeline filter.", + action_sets=["hubspot_deals"], + input_schema={ + "pipeline_id": { + "type": "string", + "description": "Pipeline ID.", + "example": "default", + }, + "limit": {"type": "integer", "description": "Max results.", "example": 30}, + "after": {"type": "string", "description": "Pagination cursor.", "example": ""}, + }, + output_schema={"status": {"type": "string", "example": "success"}}, +) +async def list_hubspot_deals_by_pipeline(input_data: dict) -> dict: + from app.data.action.integrations._helpers import run_client + + return await run_client( + "hubspot", + "list_deals_by_pipeline", + pipeline_id=input_data["pipeline_id"], + limit=input_data.get("limit", 30), + after=input_data.get("after") or None, + ) + + +# ================================================================== +# Tickets +# ================================================================== + + +@action( + name="list_hubspot_tickets", + description="List HubSpot support tickets. Paginated.", + action_sets=["hubspot_tickets", "hubspot"], + input_schema={ + "limit": {"type": "integer", "description": "Max results.", "example": 30}, + "after": {"type": "string", "description": "Pagination cursor.", "example": ""}, + "properties": { + "type": "string", + "description": "Comma-separated properties.", + "example": "subject,content,hs_pipeline_stage,hs_ticket_priority", + }, + "archived": { + "type": "boolean", + "description": "Include archived.", + "example": False, + }, + }, + output_schema={"status": {"type": "string", "example": "success"}}, +) +async def list_hubspot_tickets(input_data: dict) -> dict: + props = input_data.get("properties", "") + from app.data.action.integrations._helpers import run_client + + return await run_client( + "hubspot", + "list_tickets", + limit=input_data.get("limit", 30), + after=input_data.get("after") or None, + properties=[p.strip() for p in props.split(",") if p.strip()] or None, + archived=input_data.get("archived", False), + ) + + +@action( + name="get_hubspot_ticket", + description="Get a HubSpot ticket by ID.", + action_sets=["hubspot_tickets"], + input_schema={ + "ticket_id": { + "type": "string", + "description": "Ticket ID.", + "example": "123456789", + }, + "properties": { + "type": "string", + "description": "Comma-separated properties.", + "example": "subject,content,hs_pipeline_stage", + }, + "associations": { + "type": "string", + "description": "Comma-separated association types.", + "example": "contacts,companies", + }, + }, + output_schema={"status": {"type": "string", "example": "success"}}, +) +async def get_hubspot_ticket(input_data: dict) -> dict: + props = input_data.get("properties", "") + assocs = input_data.get("associations", "") + from app.data.action.integrations._helpers import run_client + + return await run_client( + "hubspot", + "get_ticket", + ticket_id=input_data["ticket_id"], + properties=[p.strip() for p in props.split(",") if p.strip()] or None, + associations=[a.strip() for a in assocs.split(",") if a.strip()] or None, + ) + + +@action( + name="create_hubspot_ticket", + description="Create a HubSpot support ticket. Typical properties: subject, content, hs_pipeline, hs_pipeline_stage, hs_ticket_priority (LOW/MEDIUM/HIGH/URGENT).", + action_sets=["hubspot_tickets", "hubspot"], + input_schema={ + "properties": { + "type": "object", + "description": "Flat property dict.", + "example": { + "subject": "Login fails", + "content": "User can't log in", + "hs_ticket_priority": "HIGH", + }, + }, + }, + output_schema={"status": {"type": "string", "example": "success"}}, + parallelizable=False, +) +async def create_hubspot_ticket(input_data: dict) -> dict: + from app.data.action.integrations._helpers import run_client + + return await run_client( + "hubspot", "create_ticket", properties=input_data["properties"] + ) + + +@action( + name="update_hubspot_ticket", + description="Update a HubSpot ticket's properties.", + action_sets=["hubspot_tickets"], + input_schema={ + "ticket_id": { + "type": "string", + "description": "Ticket ID.", + "example": "123456789", + }, + "properties": { + "type": "object", + "description": "Properties to update.", + "example": {"hs_ticket_priority": "URGENT"}, + }, + }, + output_schema={"status": {"type": "string", "example": "success"}}, + parallelizable=False, +) +async def update_hubspot_ticket(input_data: dict) -> dict: + from app.data.action.integrations._helpers import run_client + + return await run_client( + "hubspot", + "update_ticket", + ticket_id=input_data["ticket_id"], + properties=input_data["properties"], + ) + + +@action( + name="delete_hubspot_ticket", + description="Archive (soft-delete) a HubSpot ticket.", + action_sets=["hubspot_tickets"], + input_schema={ + "ticket_id": { + "type": "string", + "description": "Ticket ID.", + "example": "123456789", + }, + }, + output_schema={"status": {"type": "string", "example": "success"}}, + parallelizable=False, +) +async def delete_hubspot_ticket(input_data: dict) -> dict: + from app.data.action.integrations._helpers import run_client + + return await run_client( + "hubspot", "delete_ticket", ticket_id=input_data["ticket_id"] + ) + + +@action( + name="search_hubspot_tickets", + description="Search HubSpot tickets via query or filter_groups.", + action_sets=["hubspot_tickets"], + input_schema={ + "query": { + "type": "string", + "description": "Free-text search.", + "example": "login", + }, + "filter_groups": { + "type": "array", + "description": "Filter groups.", + "example": [ + { + "filters": [ + { + "propertyName": "hs_ticket_priority", + "operator": "EQ", + "value": "HIGH", + } + ] + } + ], + }, + "properties": { + "type": "string", + "description": "Comma-separated properties.", + "example": "subject,content", + }, + "limit": {"type": "integer", "description": "Max results.", "example": 30}, + "after": {"type": "string", "description": "Pagination cursor.", "example": ""}, + }, + output_schema={"status": {"type": "string", "example": "success"}}, +) +async def search_hubspot_tickets(input_data: dict) -> dict: + props = input_data.get("properties", "") + from app.data.action.integrations._helpers import run_client + + return await run_client( + "hubspot", + "search_tickets", + query=input_data.get("query") or None, + filter_groups=input_data.get("filter_groups") or None, + properties=[p.strip() for p in props.split(",") if p.strip()] or None, + limit=input_data.get("limit", 30), + after=input_data.get("after") or None, + ) + + +@action( + name="close_hubspot_ticket", + description="Move a ticket to its closed stage. Helper around updating 'hs_pipeline_stage'.", + action_sets=["hubspot_tickets", "hubspot"], + input_schema={ + "ticket_id": { + "type": "string", + "description": "Ticket ID.", + "example": "123456789", + }, + "closed_stage_id": { + "type": "string", + "description": "Closed-stage ID for this pipeline (use list_hubspot_pipeline_stages).", + "example": "4", + }, + }, + output_schema={"status": {"type": "string", "example": "success"}}, + parallelizable=False, +) +async def close_hubspot_ticket(input_data: dict) -> dict: + from app.data.action.integrations._helpers import run_client + + return await run_client( + "hubspot", + "close_ticket", + ticket_id=input_data["ticket_id"], + closed_stage_id=input_data["closed_stage_id"], + ) + + +@action( + name="list_hubspot_tickets_by_pipeline", + description="List tickets in a specific pipeline. Helper that wraps search.", + action_sets=["hubspot_tickets"], + input_schema={ + "pipeline_id": { + "type": "string", + "description": "Pipeline ID.", + "example": "0", + }, + "limit": {"type": "integer", "description": "Max results.", "example": 30}, + "after": {"type": "string", "description": "Pagination cursor.", "example": ""}, + }, + output_schema={"status": {"type": "string", "example": "success"}}, +) +async def list_hubspot_tickets_by_pipeline(input_data: dict) -> dict: + from app.data.action.integrations._helpers import run_client + + return await run_client( + "hubspot", + "list_tickets_by_pipeline", + pipeline_id=input_data["pipeline_id"], + limit=input_data.get("limit", 30), + after=input_data.get("after") or None, + ) + + +# ================================================================== +# Engagements (tasks / notes / calls / emails / meetings) +# ================================================================== + + +@action( + name="list_hubspot_tasks", + description="List HubSpot tasks (engagements).", + action_sets=["hubspot_engagements"], + input_schema={ + "limit": {"type": "integer", "description": "Max results.", "example": 30}, + "after": {"type": "string", "description": "Pagination cursor.", "example": ""}, + "properties": { + "type": "string", + "description": "Comma-separated properties.", + "example": "hs_task_subject,hs_task_status,hs_timestamp", + }, + }, + output_schema={"status": {"type": "string", "example": "success"}}, +) +async def list_hubspot_tasks(input_data: dict) -> dict: + props = input_data.get("properties", "") + from app.data.action.integrations._helpers import run_client + + return await run_client( + "hubspot", + "list_tasks", + limit=input_data.get("limit", 30), + after=input_data.get("after") or None, + properties=[p.strip() for p in props.split(",") if p.strip()] or None, + ) + + +@action( + name="create_hubspot_task", + description="Create a HubSpot task. Optionally associate it with a contact/company/deal/ticket.", + action_sets=["hubspot_engagements", "hubspot"], + input_schema={ + "subject": { + "type": "string", + "description": "Task title.", + "example": "Follow up on demo", + }, + "body": { + "type": "string", + "description": "Task description.", + "example": "Ask about pricing tier", + }, + "due_timestamp_ms": { + "type": "integer", + "description": "Due date in ms since epoch.", + "example": 1735689600000, + }, + "owner_id": { + "type": "string", + "description": "Owner (user) ID to assign.", + "example": "12345", + }, + "priority": { + "type": "string", + "description": "NONE | LOW | MEDIUM | HIGH.", + "example": "MEDIUM", + }, + "status": { + "type": "string", + "description": "NOT_STARTED | IN_PROGRESS | WAITING | COMPLETED | DEFERRED.", + "example": "NOT_STARTED", + }, + "associated_object_type": { + "type": "string", + "description": "Type of object to associate (contacts/companies/deals/tickets).", + "example": "contacts", + }, + "associated_object_id": { + "type": "string", + "description": "ID of the associated object.", + "example": "123456789", + }, + }, + output_schema={"status": {"type": "string", "example": "success"}}, + parallelizable=False, +) +async def create_hubspot_task(input_data: dict) -> dict: + from app.data.action.integrations._helpers import run_client + + return await run_client( + "hubspot", + "create_task", + subject=input_data["subject"], + body=input_data.get("body", ""), + due_timestamp_ms=input_data.get("due_timestamp_ms"), + owner_id=input_data.get("owner_id") or None, + priority=input_data.get("priority", "NONE"), + status=input_data.get("status", "NOT_STARTED"), + associated_object_type=input_data.get("associated_object_type") or None, + associated_object_id=input_data.get("associated_object_id") or None, + ) + + +@action( + name="update_hubspot_task", + description="Update a HubSpot task. Common updates: hs_task_status, hs_task_priority, hs_task_subject.", + action_sets=["hubspot_engagements"], + input_schema={ + "task_id": { + "type": "string", + "description": "Task ID.", + "example": "123456789", + }, + "properties": { + "type": "object", + "description": "Properties to update.", + "example": {"hs_task_status": "COMPLETED"}, + }, + }, + output_schema={"status": {"type": "string", "example": "success"}}, + parallelizable=False, +) +async def update_hubspot_task(input_data: dict) -> dict: + from app.data.action.integrations._helpers import run_client + + return await run_client( + "hubspot", + "update_task", + task_id=input_data["task_id"], + properties=input_data["properties"], + ) + + +@action( + name="delete_hubspot_task", + description="Archive a HubSpot task.", + action_sets=["hubspot_engagements"], + input_schema={ + "task_id": { + "type": "string", + "description": "Task ID.", + "example": "123456789", + }, + }, + output_schema={"status": {"type": "string", "example": "success"}}, + parallelizable=False, +) +async def delete_hubspot_task(input_data: dict) -> dict: + from app.data.action.integrations._helpers import run_client + + return await run_client("hubspot", "delete_task", task_id=input_data["task_id"]) + + +@action( + name="list_hubspot_notes", + description="List HubSpot notes (engagements).", + action_sets=["hubspot_engagements"], + input_schema={ + "limit": {"type": "integer", "description": "Max results.", "example": 30}, + "after": {"type": "string", "description": "Pagination cursor.", "example": ""}, + "properties": { + "type": "string", + "description": "Comma-separated properties.", + "example": "hs_note_body,hs_timestamp", + }, + }, + output_schema={"status": {"type": "string", "example": "success"}}, +) +async def list_hubspot_notes(input_data: dict) -> dict: + props = input_data.get("properties", "") + from app.data.action.integrations._helpers import run_client + + return await run_client( + "hubspot", + "list_notes", + limit=input_data.get("limit", 30), + after=input_data.get("after") or None, + properties=[p.strip() for p in props.split(",") if p.strip()] or None, + ) + + +@action( + name="create_hubspot_note", + description="Create a HubSpot note (typically attached to a contact/company/deal/ticket).", + action_sets=["hubspot_engagements", "hubspot"], + input_schema={ + "body": { + "type": "string", + "description": "Note content (HTML supported).", + "example": "Customer mentioned interest in Enterprise tier", + }, + "owner_id": {"type": "string", "description": "Owner ID.", "example": "12345"}, + "associated_object_type": { + "type": "string", + "description": "contacts/companies/deals/tickets.", + "example": "contacts", + }, + "associated_object_id": { + "type": "string", + "description": "ID of associated object.", + "example": "123456789", + }, + }, + output_schema={"status": {"type": "string", "example": "success"}}, + parallelizable=False, +) +async def create_hubspot_note(input_data: dict) -> dict: + from app.data.action.integrations._helpers import run_client + + return await run_client( + "hubspot", + "create_note", + body=input_data["body"], + owner_id=input_data.get("owner_id") or None, + associated_object_type=input_data.get("associated_object_type") or None, + associated_object_id=input_data.get("associated_object_id") or None, + ) + + +@action( + name="delete_hubspot_note", + description="Archive a HubSpot note.", + action_sets=["hubspot_engagements"], + input_schema={ + "note_id": { + "type": "string", + "description": "Note ID.", + "example": "123456789", + }, + }, + output_schema={"status": {"type": "string", "example": "success"}}, + parallelizable=False, +) +async def delete_hubspot_note(input_data: dict) -> dict: + from app.data.action.integrations._helpers import run_client + + return await run_client("hubspot", "delete_note", note_id=input_data["note_id"]) + + +@action( + name="list_hubspot_calls", + description="List HubSpot call engagements (logged calls).", + action_sets=["hubspot_engagements"], + input_schema={ + "limit": {"type": "integer", "description": "Max results.", "example": 30}, + "after": {"type": "string", "description": "Pagination cursor.", "example": ""}, + "properties": { + "type": "string", + "description": "Comma-separated properties.", + "example": "hs_call_title,hs_call_duration,hs_call_direction", + }, + }, + output_schema={"status": {"type": "string", "example": "success"}}, +) +async def list_hubspot_calls(input_data: dict) -> dict: + props = input_data.get("properties", "") + from app.data.action.integrations._helpers import run_client + + return await run_client( + "hubspot", + "list_calls", + limit=input_data.get("limit", 30), + after=input_data.get("after") or None, + properties=[p.strip() for p in props.split(",") if p.strip()] or None, + ) + + +@action( + name="log_hubspot_call", + description="Log a phone call as a HubSpot engagement.", + action_sets=["hubspot_engagements", "hubspot"], + input_schema={ + "title": { + "type": "string", + "description": "Call title.", + "example": "Discovery call", + }, + "body": { + "type": "string", + "description": "Call notes.", + "example": "Discussed pricing", + }, + "timestamp_ms": { + "type": "integer", + "description": "When the call happened (ms epoch). Defaults to now.", + "example": 1735689600000, + }, + "duration_ms": { + "type": "integer", + "description": "Call duration in ms.", + "example": 600000, + }, + "from_number": { + "type": "string", + "description": "Caller phone.", + "example": "+1-555-0100", + }, + "to_number": { + "type": "string", + "description": "Callee phone.", + "example": "+1-555-0200", + }, + "direction": { + "type": "string", + "description": "INBOUND | OUTBOUND.", + "example": "OUTBOUND", + }, + "disposition": { + "type": "string", + "description": "Outcome ID (configured per portal).", + "example": "", + }, + "owner_id": {"type": "string", "description": "Owner ID.", "example": "12345"}, + "associated_object_type": { + "type": "string", + "description": "contacts/companies/deals/tickets.", + "example": "contacts", + }, + "associated_object_id": { + "type": "string", + "description": "Associated object ID.", + "example": "123456789", + }, + }, + output_schema={"status": {"type": "string", "example": "success"}}, + parallelizable=False, +) +async def log_hubspot_call(input_data: dict) -> dict: + from app.data.action.integrations._helpers import run_client + + return await run_client( + "hubspot", + "log_call", + title=input_data["title"], + body=input_data.get("body", ""), + timestamp_ms=input_data.get("timestamp_ms"), + duration_ms=input_data.get("duration_ms"), + from_number=input_data.get("from_number") or None, + to_number=input_data.get("to_number") or None, + direction=input_data.get("direction", "OUTBOUND"), + disposition=input_data.get("disposition") or None, + owner_id=input_data.get("owner_id") or None, + associated_object_type=input_data.get("associated_object_type") or None, + associated_object_id=input_data.get("associated_object_id") or None, + ) + + +@action( + name="list_hubspot_emails", + description="List HubSpot email engagements (logged emails — not marketing email sends).", + action_sets=["hubspot_engagements"], + input_schema={ + "limit": {"type": "integer", "description": "Max results.", "example": 30}, + "after": {"type": "string", "description": "Pagination cursor.", "example": ""}, + "properties": { + "type": "string", + "description": "Comma-separated properties.", + "example": "hs_email_subject,hs_email_direction", + }, + }, + output_schema={"status": {"type": "string", "example": "success"}}, +) +async def list_hubspot_emails(input_data: dict) -> dict: + props = input_data.get("properties", "") + from app.data.action.integrations._helpers import run_client + + return await run_client( + "hubspot", + "list_emails", + limit=input_data.get("limit", 30), + after=input_data.get("after") or None, + properties=[p.strip() for p in props.split(",") if p.strip()] or None, + ) + + +@action( + name="log_hubspot_email", + description="Log an email as a HubSpot engagement (for record-keeping; doesn't actually send).", + action_sets=["hubspot_engagements"], + input_schema={ + "subject": { + "type": "string", + "description": "Email subject.", + "example": "Re: Pricing", + }, + "text_body": { + "type": "string", + "description": "Plain-text body.", + "example": "Here's the proposal", + }, + "html_body": { + "type": "string", + "description": "HTML body (optional).", + "example": "", + }, + "timestamp_ms": { + "type": "integer", + "description": "When sent (ms epoch).", + "example": 1735689600000, + }, + "direction": { + "type": "string", + "description": "EMAIL (incoming) | INCOMING_EMAIL | FORWARDED_EMAIL.", + "example": "EMAIL", + }, + "from_email": { + "type": "string", + "description": "Sender.", + "example": "you@yourdomain.com", + }, + "to_email": { + "type": "string", + "description": "Recipient.", + "example": "customer@example.com", + }, + "owner_id": {"type": "string", "description": "Owner ID.", "example": "12345"}, + "associated_object_type": { + "type": "string", + "description": "contacts/companies/deals/tickets.", + "example": "contacts", + }, + "associated_object_id": { + "type": "string", + "description": "Associated object ID.", + "example": "123456789", + }, + }, + output_schema={"status": {"type": "string", "example": "success"}}, + parallelizable=False, +) +async def log_hubspot_email(input_data: dict) -> dict: + from app.data.action.integrations._helpers import run_client + + return await run_client( + "hubspot", + "log_email", + subject=input_data["subject"], + text_body=input_data.get("text_body", ""), + html_body=input_data.get("html_body", ""), + timestamp_ms=input_data.get("timestamp_ms"), + direction=input_data.get("direction", "EMAIL"), + from_email=input_data.get("from_email") or None, + to_email=input_data.get("to_email") or None, + owner_id=input_data.get("owner_id") or None, + associated_object_type=input_data.get("associated_object_type") or None, + associated_object_id=input_data.get("associated_object_id") or None, + ) + + +@action( + name="list_hubspot_meetings", + description="List HubSpot meeting engagements.", + action_sets=["hubspot_engagements"], + input_schema={ + "limit": {"type": "integer", "description": "Max results.", "example": 30}, + "after": {"type": "string", "description": "Pagination cursor.", "example": ""}, + "properties": { + "type": "string", + "description": "Comma-separated properties.", + "example": "hs_meeting_title,hs_meeting_start_time", + }, + }, + output_schema={"status": {"type": "string", "example": "success"}}, +) +async def list_hubspot_meetings(input_data: dict) -> dict: + props = input_data.get("properties", "") + from app.data.action.integrations._helpers import run_client + + return await run_client( + "hubspot", + "list_meetings", + limit=input_data.get("limit", 30), + after=input_data.get("after") or None, + properties=[p.strip() for p in props.split(",") if p.strip()] or None, + ) + + +@action( + name="create_hubspot_meeting", + description="Create a HubSpot meeting engagement record.", + action_sets=["hubspot_engagements"], + input_schema={ + "title": { + "type": "string", + "description": "Meeting title.", + "example": "Quarterly review", + }, + "body": { + "type": "string", + "description": "Description / agenda.", + "example": "Review Q3 numbers", + }, + "start_timestamp_ms": { + "type": "integer", + "description": "Start time (ms epoch).", + "example": 1735689600000, + }, + "end_timestamp_ms": { + "type": "integer", + "description": "End time (ms epoch).", + "example": 1735693200000, + }, + "location": { + "type": "string", + "description": "Where (URL or address).", + "example": "https://zoom.us/j/123", + }, + "meeting_outcome": { + "type": "string", + "description": "Outcome ID (configured per portal).", + "example": "", + }, + "owner_id": {"type": "string", "description": "Owner ID.", "example": "12345"}, + "associated_object_type": { + "type": "string", + "description": "contacts/companies/deals/tickets.", + "example": "deals", + }, + "associated_object_id": { + "type": "string", + "description": "Associated object ID.", + "example": "123456789", + }, + }, + output_schema={"status": {"type": "string", "example": "success"}}, + parallelizable=False, +) +async def create_hubspot_meeting(input_data: dict) -> dict: + from app.data.action.integrations._helpers import run_client + + return await run_client( + "hubspot", + "create_meeting", + title=input_data["title"], + body=input_data.get("body", ""), + start_timestamp_ms=input_data["start_timestamp_ms"], + end_timestamp_ms=input_data["end_timestamp_ms"], + location=input_data.get("location") or None, + meeting_outcome=input_data.get("meeting_outcome") or None, + owner_id=input_data.get("owner_id") or None, + associated_object_type=input_data.get("associated_object_type") or None, + associated_object_id=input_data.get("associated_object_id") or None, + ) + + +@action( + name="delete_hubspot_meeting", + description="Archive a HubSpot meeting engagement.", + action_sets=["hubspot_engagements"], + input_schema={ + "meeting_id": { + "type": "string", + "description": "Meeting ID.", + "example": "123456789", + }, + }, + output_schema={"status": {"type": "string", "example": "success"}}, + parallelizable=False, +) +async def delete_hubspot_meeting(input_data: dict) -> dict: + from app.data.action.integrations._helpers import run_client + + return await run_client( + "hubspot", "delete_meeting", meeting_id=input_data["meeting_id"] + ) + + +# ================================================================== +# Lists +# ================================================================== + + +@action( + name="list_hubspot_lists", + description="List/search HubSpot lists. Optionally filter to specific list IDs.", + action_sets=["hubspot_lists"], + input_schema={ + "limit": { + "type": "integer", + "description": "Max results (1-500).", + "example": 30, + }, + "list_ids": { + "type": "array", + "description": "Optional: specific list IDs to fetch.", + "example": [], + }, + }, + output_schema={"status": {"type": "string", "example": "success"}}, +) +async def list_hubspot_lists(input_data: dict) -> dict: + from app.data.action.integrations._helpers import run_client + + return await run_client( + "hubspot", + "list_lists", + limit=input_data.get("limit", 30), + list_ids=input_data.get("list_ids") or None, + ) + + +@action( + name="get_hubspot_list", + description="Get a HubSpot list by ID.", + action_sets=["hubspot_lists"], + input_schema={ + "list_id": {"type": "string", "description": "List ID.", "example": "1"}, + }, + output_schema={"status": {"type": "string", "example": "success"}}, +) +async def get_hubspot_list(input_data: dict) -> dict: + from app.data.action.integrations._helpers import run_client + + return await run_client("hubspot", "get_list", list_id=input_data["list_id"]) + + +@action( + name="create_hubspot_list", + description="Create a HubSpot list. processing_type=MANUAL for static (you add contacts yourself); DYNAMIC for filter-based.", + action_sets=["hubspot_lists"], + input_schema={ + "name": { + "type": "string", + "description": "List name.", + "example": "Q3 prospects", + }, + "object_type_id": { + "type": "string", + "description": "Object type ID (0-1=contact, 0-2=company, 0-3=deal, 0-5=ticket).", + "example": "0-1", + }, + "processing_type": { + "type": "string", + "description": "MANUAL or DYNAMIC.", + "example": "MANUAL", + }, + "filter_branch": { + "type": "object", + "description": "Filter tree for DYNAMIC lists.", + "example": {}, + }, + }, + output_schema={"status": {"type": "string", "example": "success"}}, + parallelizable=False, +) +async def create_hubspot_list(input_data: dict) -> dict: + from app.data.action.integrations._helpers import run_client + + return await run_client( + "hubspot", + "create_list", + name=input_data["name"], + object_type_id=input_data.get("object_type_id", "0-1"), + processing_type=input_data.get("processing_type", "MANUAL"), + filter_branch=input_data.get("filter_branch") or None, + ) + + +@action( + name="delete_hubspot_list", + description="Delete a HubSpot list.", + action_sets=["hubspot_lists"], + input_schema={ + "list_id": {"type": "string", "description": "List ID.", "example": "1"}, + }, + output_schema={"status": {"type": "string", "example": "success"}}, + parallelizable=False, +) +async def delete_hubspot_list(input_data: dict) -> dict: + from app.data.action.integrations._helpers import run_client + + return await run_client("hubspot", "delete_list", list_id=input_data["list_id"]) + + +@action( + name="add_contacts_to_hubspot_list", + description="Add contact IDs to a static (MANUAL) list. No-op on DYNAMIC lists.", + action_sets=["hubspot_lists"], + input_schema={ + "list_id": {"type": "string", "description": "List ID.", "example": "1"}, + "contact_ids": { + "type": "array", + "description": "Contact IDs to add.", + "example": ["123", "456"], + }, + }, + output_schema={"status": {"type": "string", "example": "success"}}, + parallelizable=False, +) +async def add_contacts_to_hubspot_list(input_data: dict) -> dict: + from app.data.action.integrations._helpers import run_client + + return await run_client( + "hubspot", + "add_contacts_to_list", + list_id=input_data["list_id"], + contact_ids=input_data["contact_ids"], + ) + + +@action( + name="remove_contacts_from_hubspot_list", + description="Remove contact IDs from a static (MANUAL) list.", + action_sets=["hubspot_lists"], + input_schema={ + "list_id": {"type": "string", "description": "List ID.", "example": "1"}, + "contact_ids": { + "type": "array", + "description": "Contact IDs to remove.", + "example": ["123", "456"], + }, + }, + output_schema={"status": {"type": "string", "example": "success"}}, + parallelizable=False, +) +async def remove_contacts_from_hubspot_list(input_data: dict) -> dict: + from app.data.action.integrations._helpers import run_client + + return await run_client( + "hubspot", + "remove_contacts_from_list", + list_id=input_data["list_id"], + contact_ids=input_data["contact_ids"], + ) + + +# ================================================================== +# Pipelines +# ================================================================== + + +@action( + name="list_hubspot_pipelines", + description="List all pipelines for an object type (typically 'deals' or 'tickets').", + action_sets=["hubspot_pipelines"], + input_schema={ + "object_type": { + "type": "string", + "description": "Object type: deals or tickets.", + "example": "deals", + }, + }, + output_schema={"status": {"type": "string", "example": "success"}}, +) +async def list_hubspot_pipelines(input_data: dict) -> dict: + from app.data.action.integrations._helpers import run_client + + return await run_client( + "hubspot", "list_pipelines", object_type=input_data["object_type"] + ) + + +@action( + name="get_hubspot_pipeline", + description="Get a pipeline definition (including stages).", + action_sets=["hubspot_pipelines"], + input_schema={ + "object_type": { + "type": "string", + "description": "deals or tickets.", + "example": "deals", + }, + "pipeline_id": { + "type": "string", + "description": "Pipeline ID.", + "example": "default", + }, + }, + output_schema={"status": {"type": "string", "example": "success"}}, +) +async def get_hubspot_pipeline(input_data: dict) -> dict: + from app.data.action.integrations._helpers import run_client + + return await run_client( + "hubspot", + "get_pipeline", + object_type=input_data["object_type"], + pipeline_id=input_data["pipeline_id"], + ) + + +@action( + name="create_hubspot_pipeline", + description="Create a new pipeline. 'stages' is a list of {label, displayOrder, metadata:{probability,...}} dicts.", + action_sets=["hubspot_pipelines"], + input_schema={ + "object_type": { + "type": "string", + "description": "deals or tickets.", + "example": "deals", + }, + "label": { + "type": "string", + "description": "Pipeline name.", + "example": "Renewals", + }, + "stages": { + "type": "array", + "description": "Stage definitions.", + "example": [ + {"label": "New", "displayOrder": 0, "metadata": {"probability": "0.1"}} + ], + }, + "display_order": { + "type": "integer", + "description": "Display order among pipelines.", + "example": 0, + }, + }, + output_schema={"status": {"type": "string", "example": "success"}}, + parallelizable=False, +) +async def create_hubspot_pipeline(input_data: dict) -> dict: + from app.data.action.integrations._helpers import run_client + + return await run_client( + "hubspot", + "create_pipeline", + object_type=input_data["object_type"], + label=input_data["label"], + stages=input_data["stages"], + display_order=input_data.get("display_order", 0), + ) + + +@action( + name="list_hubspot_pipeline_stages", + description="List the stages of a pipeline. Returns stage IDs needed for move_hubspot_deal_stage / close_hubspot_ticket.", + action_sets=["hubspot_pipelines"], + input_schema={ + "object_type": { + "type": "string", + "description": "deals or tickets.", + "example": "deals", + }, + "pipeline_id": { + "type": "string", + "description": "Pipeline ID.", + "example": "default", + }, + }, + output_schema={"status": {"type": "string", "example": "success"}}, +) +async def list_hubspot_pipeline_stages(input_data: dict) -> dict: + from app.data.action.integrations._helpers import run_client + + return await run_client( + "hubspot", + "list_pipeline_stages", + object_type=input_data["object_type"], + pipeline_id=input_data["pipeline_id"], + ) + + +@action( + name="update_hubspot_pipeline_stage", + description="Update a pipeline stage's properties (label, displayOrder, metadata).", + action_sets=["hubspot_pipelines"], + input_schema={ + "object_type": { + "type": "string", + "description": "deals or tickets.", + "example": "deals", + }, + "pipeline_id": { + "type": "string", + "description": "Pipeline ID.", + "example": "default", + }, + "stage_id": { + "type": "string", + "description": "Stage ID.", + "example": "qualifiedtobuy", + }, + "properties": { + "type": "object", + "description": "Stage fields to update.", + "example": {"label": "Qualified — Buying"}, + }, + }, + output_schema={"status": {"type": "string", "example": "success"}}, + parallelizable=False, +) +async def update_hubspot_pipeline_stage(input_data: dict) -> dict: + from app.data.action.integrations._helpers import run_client + + return await run_client( + "hubspot", + "update_pipeline_stage", + object_type=input_data["object_type"], + pipeline_id=input_data["pipeline_id"], + stage_id=input_data["stage_id"], + properties=input_data["properties"], + ) + + +# ================================================================== +# Owners +# ================================================================== + + +@action( + name="list_hubspot_owners", + description="List HubSpot users (owners). Use this to find owner IDs for assignment.", + action_sets=["hubspot_owners", "hubspot"], + input_schema={ + "email": { + "type": "string", + "description": "Optional: filter to one owner by email.", + "example": "", + }, + "limit": { + "type": "integer", + "description": "Max results (1-500).", + "example": 100, + }, + }, + output_schema={"status": {"type": "string", "example": "success"}}, +) +async def list_hubspot_owners(input_data: dict) -> dict: + from app.data.action.integrations._helpers import run_client + + return await run_client( + "hubspot", + "list_owners", + email=input_data.get("email") or None, + limit=input_data.get("limit", 100), + ) + + +@action( + name="get_hubspot_owner", + description="Get a HubSpot owner (user) by ID.", + action_sets=["hubspot_owners"], + input_schema={ + "owner_id": {"type": "string", "description": "Owner ID.", "example": "12345"}, + }, + output_schema={"status": {"type": "string", "example": "success"}}, +) +async def get_hubspot_owner(input_data: dict) -> dict: + from app.data.action.integrations._helpers import run_client + + return await run_client("hubspot", "get_owner", owner_id=input_data["owner_id"]) + + +# ================================================================== +# Properties (custom-field schema management) +# ================================================================== + + +@action( + name="list_hubspot_properties", + description="List all defined properties for an object type. Use this to discover custom-field names before reading/writing them.", + action_sets=["hubspot_properties"], + input_schema={ + "object_type": { + "type": "string", + "description": "contacts/companies/deals/tickets or custom schema name.", + "example": "contacts", + }, + }, + output_schema={"status": {"type": "string", "example": "success"}}, +) +async def list_hubspot_properties(input_data: dict) -> dict: + from app.data.action.integrations._helpers import run_client + + return await run_client( + "hubspot", "list_properties", object_type=input_data["object_type"] + ) + + +@action( + name="get_hubspot_property", + description="Get a property definition (type, options, group).", + action_sets=["hubspot_properties"], + input_schema={ + "object_type": { + "type": "string", + "description": "Object type.", + "example": "contacts", + }, + "property_name": { + "type": "string", + "description": "Property internal name.", + "example": "firstname", + }, + }, + output_schema={"status": {"type": "string", "example": "success"}}, +) +async def get_hubspot_property(input_data: dict) -> dict: + from app.data.action.integrations._helpers import run_client + + return await run_client( + "hubspot", + "get_property", + object_type=input_data["object_type"], + property_name=input_data["property_name"], + ) + + +@action( + name="create_hubspot_property", + description="Create a new custom property. 'definition' must include name, label, type, fieldType, groupName.", + action_sets=["hubspot_properties"], + input_schema={ + "object_type": { + "type": "string", + "description": "Object type.", + "example": "contacts", + }, + "definition": { + "type": "object", + "description": "Property definition.", + "example": { + "name": "favorite_color", + "label": "Favorite color", + "type": "string", + "fieldType": "text", + "groupName": "contactinformation", + }, + }, + }, + output_schema={"status": {"type": "string", "example": "success"}}, + parallelizable=False, +) +async def create_hubspot_property(input_data: dict) -> dict: + from app.data.action.integrations._helpers import run_client + + return await run_client( + "hubspot", + "create_property", + object_type=input_data["object_type"], + definition=input_data["definition"], + ) + + +@action( + name="update_hubspot_property", + description="Update an existing property's definition (label, description, options).", + action_sets=["hubspot_properties"], + input_schema={ + "object_type": { + "type": "string", + "description": "Object type.", + "example": "contacts", + }, + "property_name": { + "type": "string", + "description": "Property internal name.", + "example": "favorite_color", + }, + "definition": { + "type": "object", + "description": "Fields to update.", + "example": {"label": "Color preference"}, + }, + }, + output_schema={"status": {"type": "string", "example": "success"}}, + parallelizable=False, +) +async def update_hubspot_property(input_data: dict) -> dict: + from app.data.action.integrations._helpers import run_client + + return await run_client( + "hubspot", + "update_property", + object_type=input_data["object_type"], + property_name=input_data["property_name"], + definition=input_data["definition"], + ) + + +@action( + name="delete_hubspot_property", + description="Delete a custom property. Built-in HubSpot properties cannot be deleted.", + action_sets=["hubspot_properties"], + input_schema={ + "object_type": { + "type": "string", + "description": "Object type.", + "example": "contacts", + }, + "property_name": { + "type": "string", + "description": "Property internal name.", + "example": "favorite_color", + }, + }, + output_schema={"status": {"type": "string", "example": "success"}}, + parallelizable=False, +) +async def delete_hubspot_property(input_data: dict) -> dict: + from app.data.action.integrations._helpers import run_client + + return await run_client( + "hubspot", + "delete_property", + object_type=input_data["object_type"], + property_name=input_data["property_name"], + ) + + +@action( + name="list_hubspot_property_groups", + description="List property groups for an object type (the visual sections grouping properties in HubSpot UI).", + action_sets=["hubspot_properties"], + input_schema={ + "object_type": { + "type": "string", + "description": "Object type.", + "example": "contacts", + }, + }, + output_schema={"status": {"type": "string", "example": "success"}}, +) +async def list_hubspot_property_groups(input_data: dict) -> dict: + from app.data.action.integrations._helpers import run_client + + return await run_client( + "hubspot", + "list_property_groups", + object_type=input_data["object_type"], + ) + + +# ================================================================== +# Associations (object-to-object links) +# ================================================================== + + +@action( + name="create_hubspot_association", + description="Link two objects (e.g. attach a contact to a deal). Leaves association_type_id empty for the default association between the pair.", + action_sets=["hubspot_associations", "hubspot"], + input_schema={ + "from_object_type": { + "type": "string", + "description": "Source object type.", + "example": "deals", + }, + "from_object_id": { + "type": "string", + "description": "Source object ID.", + "example": "123", + }, + "to_object_type": { + "type": "string", + "description": "Target object type.", + "example": "contacts", + }, + "to_object_id": { + "type": "string", + "description": "Target object ID.", + "example": "456", + }, + "association_type_id": { + "type": "integer", + "description": "Optional: specific association type ID (use list_hubspot_association_types).", + "example": 0, + }, + }, + output_schema={"status": {"type": "string", "example": "success"}}, + parallelizable=False, +) +async def create_hubspot_association(input_data: dict) -> dict: + from app.data.action.integrations._helpers import run_client + + return await run_client( + "hubspot", + "create_association", + from_object_type=input_data["from_object_type"], + from_object_id=input_data["from_object_id"], + to_object_type=input_data["to_object_type"], + to_object_id=input_data["to_object_id"], + association_type_id=input_data.get("association_type_id") or None, + ) + + +@action( + name="list_hubspot_associations", + description="List all objects of a given type associated with a source object.", + action_sets=["hubspot_associations"], + input_schema={ + "from_object_type": { + "type": "string", + "description": "Source object type.", + "example": "deals", + }, + "from_object_id": { + "type": "string", + "description": "Source object ID.", + "example": "123", + }, + "to_object_type": { + "type": "string", + "description": "Target object type to look up.", + "example": "contacts", + }, + "limit": { + "type": "integer", + "description": "Max results (1-500).", + "example": 100, + }, + "after": {"type": "string", "description": "Pagination cursor.", "example": ""}, + }, + output_schema={"status": {"type": "string", "example": "success"}}, +) +async def list_hubspot_associations(input_data: dict) -> dict: + from app.data.action.integrations._helpers import run_client + + return await run_client( + "hubspot", + "list_associations", + from_object_type=input_data["from_object_type"], + from_object_id=input_data["from_object_id"], + to_object_type=input_data["to_object_type"], + limit=input_data.get("limit", 100), + after=input_data.get("after") or None, + ) + + +@action( + name="delete_hubspot_association", + description="Remove an association between two objects.", + action_sets=["hubspot_associations"], + input_schema={ + "from_object_type": { + "type": "string", + "description": "Source type.", + "example": "deals", + }, + "from_object_id": { + "type": "string", + "description": "Source ID.", + "example": "123", + }, + "to_object_type": { + "type": "string", + "description": "Target type.", + "example": "contacts", + }, + "to_object_id": { + "type": "string", + "description": "Target ID.", + "example": "456", + }, + }, + output_schema={"status": {"type": "string", "example": "success"}}, + parallelizable=False, +) +async def delete_hubspot_association(input_data: dict) -> dict: + from app.data.action.integrations._helpers import run_client + + return await run_client( + "hubspot", + "delete_association", + from_object_type=input_data["from_object_type"], + from_object_id=input_data["from_object_id"], + to_object_type=input_data["to_object_type"], + to_object_id=input_data["to_object_id"], + ) + + +@action( + name="list_hubspot_association_types", + description="List the available association types between two object types (used when you need a specific labeled association).", + action_sets=["hubspot_associations"], + input_schema={ + "from_object_type": { + "type": "string", + "description": "Source type.", + "example": "deals", + }, + "to_object_type": { + "type": "string", + "description": "Target type.", + "example": "contacts", + }, + }, + output_schema={"status": {"type": "string", "example": "success"}}, +) +async def list_hubspot_association_types(input_data: dict) -> dict: + from app.data.action.integrations._helpers import run_client + + return await run_client( + "hubspot", + "list_association_types", + from_object_type=input_data["from_object_type"], + to_object_type=input_data["to_object_type"], + ) + + +# ================================================================== +# Forms +# ================================================================== + + +@action( + name="list_hubspot_forms", + description="List HubSpot forms (marketing v3).", + action_sets=["hubspot_forms"], + input_schema={ + "limit": {"type": "integer", "description": "Max results.", "example": 30}, + "after": {"type": "string", "description": "Pagination cursor.", "example": ""}, + }, + output_schema={"status": {"type": "string", "example": "success"}}, +) +async def list_hubspot_forms(input_data: dict) -> dict: + from app.data.action.integrations._helpers import run_client + + return await run_client( + "hubspot", + "list_forms", + limit=input_data.get("limit", 30), + after=input_data.get("after") or None, + ) + + +@action( + name="get_hubspot_form", + description="Get a HubSpot form definition by ID.", + action_sets=["hubspot_forms"], + input_schema={ + "form_id": { + "type": "string", + "description": "Form GUID.", + "example": "abc12345-6789-0abc-def0-123456789abc", + }, + }, + output_schema={"status": {"type": "string", "example": "success"}}, +) +async def get_hubspot_form(input_data: dict) -> dict: + from app.data.action.integrations._helpers import run_client + + return await run_client("hubspot", "get_form", form_id=input_data["form_id"]) + + +@action( + name="submit_hubspot_form", + description="Programmatically submit a HubSpot form. 'fields' is a list of {name, value} dicts.", + action_sets=["hubspot_forms"], + input_schema={ + "portal_id": { + "type": "string", + "description": "Portal/hub ID.", + "example": "12345678", + }, + "form_guid": { + "type": "string", + "description": "Form GUID.", + "example": "abc12345-6789-0abc-def0-123456789abc", + }, + "fields": { + "type": "array", + "description": "Form fields to submit.", + "example": [ + {"name": "email", "value": "jane@example.com"}, + {"name": "firstname", "value": "Jane"}, + ], + }, + "context": { + "type": "object", + "description": "Optional context (hutk, pageUrl, pageName, ipAddress).", + "example": {"pageName": "Demo Request"}, + }, + }, + output_schema={"status": {"type": "string", "example": "success"}}, + parallelizable=False, +) +async def submit_hubspot_form(input_data: dict) -> dict: + from app.data.action.integrations._helpers import run_client + + return await run_client( + "hubspot", + "submit_form", + portal_id=input_data["portal_id"], + form_guid=input_data["form_guid"], + fields=input_data["fields"], + context=input_data.get("context") or None, + ) + + +@action( + name="list_hubspot_form_submissions", + description="List submissions for a HubSpot form.", + action_sets=["hubspot_forms"], + input_schema={ + "form_guid": { + "type": "string", + "description": "Form GUID.", + "example": "abc12345-6789-0abc-def0-123456789abc", + }, + "limit": { + "type": "integer", + "description": "Max results (1-50).", + "example": 30, + }, + "after": {"type": "string", "description": "Pagination cursor.", "example": ""}, + }, + output_schema={"status": {"type": "string", "example": "success"}}, +) +async def list_hubspot_form_submissions(input_data: dict) -> dict: + from app.data.action.integrations._helpers import run_client + + return await run_client( + "hubspot", + "list_form_submissions", + form_guid=input_data["form_guid"], + limit=input_data.get("limit", 30), + after=input_data.get("after") or None, + ) + + +# ================================================================== +# Marketing email +# ================================================================== + + +@action( + name="list_hubspot_marketing_emails", + description="List marketing email campaigns.", + action_sets=["hubspot_marketing_email"], + input_schema={ + "limit": {"type": "integer", "description": "Max results.", "example": 30}, + "after": {"type": "string", "description": "Pagination cursor.", "example": ""}, + }, + output_schema={"status": {"type": "string", "example": "success"}}, +) +async def list_hubspot_marketing_emails(input_data: dict) -> dict: + from app.data.action.integrations._helpers import run_client + + return await run_client( + "hubspot", + "list_marketing_emails", + limit=input_data.get("limit", 30), + after=input_data.get("after") or None, + ) + + +@action( + name="get_hubspot_marketing_email", + description="Get a marketing email campaign by ID.", + action_sets=["hubspot_marketing_email"], + input_schema={ + "email_id": { + "type": "string", + "description": "Marketing email ID.", + "example": "123456789", + }, + }, + output_schema={"status": {"type": "string", "example": "success"}}, +) +async def get_hubspot_marketing_email(input_data: dict) -> dict: + from app.data.action.integrations._helpers import run_client + + return await run_client( + "hubspot", "get_marketing_email", email_id=input_data["email_id"] + ) + + +@action( + name="send_hubspot_single_send", + description="Send a one-off transactional email based on a pre-built marketing email template.", + action_sets=["hubspot_marketing_email", "hubspot"], + input_schema={ + "email_id": { + "type": "string", + "description": "Marketing email template ID.", + "example": "123456789", + }, + "to_email": { + "type": "string", + "description": "Recipient email.", + "example": "jane@example.com", + }, + "custom_properties": { + "type": "object", + "description": "Optional template variables.", + "example": {"first_name": "Jane"}, + }, + "contact_properties": { + "type": "object", + "description": "Optional contact-property overrides.", + "example": {}, + }, + }, + output_schema={"status": {"type": "string", "example": "success"}}, + parallelizable=False, +) +async def send_hubspot_single_send(input_data: dict) -> dict: + from app.data.action.integrations._helpers import run_client + + return await run_client( + "hubspot", + "send_single_email", + email_id=input_data["email_id"], + to_email=input_data["to_email"], + custom_properties=input_data.get("custom_properties") or None, + contact_properties=input_data.get("contact_properties") or None, + ) + + +@action( + name="get_hubspot_marketing_email_statistics", + description="Get aggregated send/open/click statistics for a marketing email.", + action_sets=["hubspot_marketing_email"], + input_schema={ + "email_id": { + "type": "string", + "description": "Marketing email ID.", + "example": "123456789", + }, + }, + output_schema={"status": {"type": "string", "example": "success"}}, +) +async def get_hubspot_marketing_email_statistics(input_data: dict) -> dict: + from app.data.action.integrations._helpers import run_client + + return await run_client( + "hubspot", + "get_marketing_email_statistics", + email_id=input_data["email_id"], + ) + + +# ================================================================== +# Files +# ================================================================== + + +@action( + name="upload_hubspot_file", + description="Upload a local file to the HubSpot file manager. 'access' controls visibility: PUBLIC_INDEXABLE / PUBLIC_NOT_INDEXABLE / HIDDEN / PRIVATE.", + action_sets=["hubspot_files"], + input_schema={ + "file_path": { + "type": "string", + "description": "Local path to the file.", + "example": "/tmp/contract.pdf", + }, + "folder_path": { + "type": "string", + "description": "HubSpot folder path.", + "example": "/", + }, + "access": { + "type": "string", + "description": "PUBLIC_INDEXABLE | PUBLIC_NOT_INDEXABLE | HIDDEN | PRIVATE.", + "example": "PRIVATE", + }, + "overwrite": { + "type": "boolean", + "description": "Overwrite existing file with the same name.", + "example": False, + }, + }, + output_schema={"status": {"type": "string", "example": "success"}}, + parallelizable=False, +) +async def upload_hubspot_file(input_data: dict) -> dict: + from app.data.action.integrations._helpers import run_client + + return await run_client( + "hubspot", + "upload_file", + file_path=input_data["file_path"], + folder_path=input_data.get("folder_path", "/"), + access=input_data.get("access", "PRIVATE"), + overwrite=input_data.get("overwrite", False), + ) + + +@action( + name="get_hubspot_file", + description="Get a file's metadata (including URL).", + action_sets=["hubspot_files"], + input_schema={ + "file_id": { + "type": "string", + "description": "File ID.", + "example": "123456789", + }, + }, + output_schema={"status": {"type": "string", "example": "success"}}, +) +async def get_hubspot_file(input_data: dict) -> dict: + from app.data.action.integrations._helpers import run_client + + return await run_client("hubspot", "get_file", file_id=input_data["file_id"]) + + +@action( + name="delete_hubspot_file", + description="Delete a file from the HubSpot file manager.", + action_sets=["hubspot_files"], + input_schema={ + "file_id": { + "type": "string", + "description": "File ID.", + "example": "123456789", + }, + }, + output_schema={"status": {"type": "string", "example": "success"}}, + parallelizable=False, +) +async def delete_hubspot_file(input_data: dict) -> dict: + from app.data.action.integrations._helpers import run_client + + return await run_client("hubspot", "delete_file", file_id=input_data["file_id"]) + + +@action( + name="list_hubspot_folders", + description="List folders in the HubSpot file manager.", + action_sets=["hubspot_files"], + input_schema={ + "limit": {"type": "integer", "description": "Max results.", "example": 30}, + "after": {"type": "string", "description": "Pagination cursor.", "example": ""}, + }, + output_schema={"status": {"type": "string", "example": "success"}}, +) +async def list_hubspot_folders(input_data: dict) -> dict: + from app.data.action.integrations._helpers import run_client + + return await run_client( + "hubspot", + "list_folders", + limit=input_data.get("limit", 30), + after=input_data.get("after") or None, + ) + + +# ================================================================== +# Conversations (Inbox) +# ================================================================== + + +@action( + name="list_hubspot_conversations", + description="List conversation threads in the HubSpot Inbox.", + action_sets=["hubspot_conversations"], + input_schema={ + "limit": {"type": "integer", "description": "Max results.", "example": 30}, + "after": {"type": "string", "description": "Pagination cursor.", "example": ""}, + }, + output_schema={"status": {"type": "string", "example": "success"}}, +) +async def list_hubspot_conversations(input_data: dict) -> dict: + from app.data.action.integrations._helpers import run_client + + return await run_client( + "hubspot", + "list_conversations", + limit=input_data.get("limit", 30), + after=input_data.get("after") or None, + ) + + +@action( + name="get_hubspot_conversation", + description="Get a conversation thread by ID.", + action_sets=["hubspot_conversations"], + input_schema={ + "thread_id": { + "type": "string", + "description": "Thread ID.", + "example": "123456789", + }, + }, + output_schema={"status": {"type": "string", "example": "success"}}, +) +async def get_hubspot_conversation(input_data: dict) -> dict: + from app.data.action.integrations._helpers import run_client + + return await run_client( + "hubspot", "get_conversation", thread_id=input_data["thread_id"] + ) + + +@action( + name="list_hubspot_conversation_messages", + description="List messages in a conversation thread.", + action_sets=["hubspot_conversations"], + input_schema={ + "thread_id": { + "type": "string", + "description": "Thread ID.", + "example": "123456789", + }, + "limit": {"type": "integer", "description": "Max results.", "example": 30}, + "after": {"type": "string", "description": "Pagination cursor.", "example": ""}, + }, + output_schema={"status": {"type": "string", "example": "success"}}, +) +async def list_hubspot_conversation_messages(input_data: dict) -> dict: + from app.data.action.integrations._helpers import run_client + + return await run_client( + "hubspot", + "list_conversation_messages", + thread_id=input_data["thread_id"], + limit=input_data.get("limit", 30), + after=input_data.get("after") or None, + ) + + +@action( + name="send_hubspot_conversation_message", + description="Send a message into a conversation thread. Requires the channel + channel-account IDs from the thread metadata.", + action_sets=["hubspot_conversations"], + input_schema={ + "thread_id": { + "type": "string", + "description": "Thread ID.", + "example": "123456789", + }, + "text": { + "type": "string", + "description": "Message body.", + "example": "Thanks for reaching out!", + }, + "channel_id": { + "type": "string", + "description": "Channel ID (from thread metadata).", + "example": "1000", + }, + "channel_account_id": { + "type": "string", + "description": "Channel account ID (from thread metadata).", + "example": "12345", + }, + "recipients": { + "type": "array", + "description": "Recipient list [{actorId, deliveryIdentifier:{type,value}}].", + "example": [ + { + "actorId": "V-123", + "deliveryIdentifier": { + "type": "HS_EMAIL_ADDRESS", + "value": "jane@example.com", + }, + } + ], + }, + "sender_actor_id": { + "type": "string", + "description": "Optional sender actor ID.", + "example": "", + }, + }, + output_schema={"status": {"type": "string", "example": "success"}}, + parallelizable=False, +) +async def send_hubspot_conversation_message(input_data: dict) -> dict: + from app.data.action.integrations._helpers import run_client + + return await run_client( + "hubspot", + "send_conversation_message", + thread_id=input_data["thread_id"], + text=input_data["text"], + channel_id=input_data["channel_id"], + channel_account_id=input_data["channel_account_id"], + recipients=input_data["recipients"], + sender_actor_id=input_data.get("sender_actor_id") or None, + ) + + +# ================================================================== +# Webhooks (App-level — requires HubSpot App ID, not portal ID) +# ================================================================== + + +@action( + name="list_hubspot_webhook_subscriptions", + description="List webhook subscriptions for a HubSpot App. Requires the App ID from the developer console.", + action_sets=["hubspot_webhooks"], + input_schema={ + "app_id": { + "type": "string", + "description": "HubSpot App ID (developer console).", + "example": "1234567", + }, + }, + output_schema={"status": {"type": "string", "example": "success"}}, +) +async def list_hubspot_webhook_subscriptions(input_data: dict) -> dict: + from app.data.action.integrations._helpers import run_client + + return await run_client( + "hubspot", + "list_webhook_subscriptions", + app_id=input_data["app_id"], + ) + + +@action( + name="create_hubspot_webhook_subscription", + description="Subscribe a HubSpot App to an event type (e.g. contact.creation, contact.propertyChange).", + action_sets=["hubspot_webhooks"], + input_schema={ + "app_id": { + "type": "string", + "description": "HubSpot App ID.", + "example": "1234567", + }, + "event_type": { + "type": "string", + "description": "Event type to subscribe to.", + "example": "contact.creation", + }, + "property_name": { + "type": "string", + "description": "Property name (only for *.propertyChange event types).", + "example": "", + }, + "active": { + "type": "boolean", + "description": "Whether the subscription is active.", + "example": True, + }, + }, + output_schema={"status": {"type": "string", "example": "success"}}, + parallelizable=False, +) +async def create_hubspot_webhook_subscription(input_data: dict) -> dict: + from app.data.action.integrations._helpers import run_client + + return await run_client( + "hubspot", + "create_webhook_subscription", + app_id=input_data["app_id"], + event_type=input_data["event_type"], + property_name=input_data.get("property_name") or None, + active=input_data.get("active", True), + ) + + +@action( + name="delete_hubspot_webhook_subscription", + description="Delete a webhook subscription.", + action_sets=["hubspot_webhooks"], + input_schema={ + "app_id": { + "type": "string", + "description": "HubSpot App ID.", + "example": "1234567", + }, + "subscription_id": { + "type": "string", + "description": "Subscription ID.", + "example": "abc123", + }, + }, + output_schema={"status": {"type": "string", "example": "success"}}, + parallelizable=False, +) +async def delete_hubspot_webhook_subscription(input_data: dict) -> dict: + from app.data.action.integrations._helpers import run_client + + return await run_client( + "hubspot", + "delete_webhook_subscription", + app_id=input_data["app_id"], + subscription_id=input_data["subscription_id"], + ) + + +# ================================================================== +# Intentionally NOT exposed as actions (and why) +# ================================================================== +# These HubSpot REST categories are admin / niche / non-user-facing and are +# excluded from this action surface. Add them later if a real use case appears. +# +# - Workflows / Automation API +# Workflow CRUD is admin-heavy and requires deep knowledge of HubSpot's +# visual builder semantics. The agent should USE existing workflows +# (via property writes that trigger them), not author new ones. +# - CMS Hub (pages, blogs, themes, modules, HubL templates) +# Site-author surface, not an agent surface. CraftBot is not a CMS. +# - CTAs (legacy + new) +# Marketing creative surface; rarely useful for agents. +# - Settings (users, teams, business units, brand kits, integration installs) +# Admin endpoints. Adding/removing users via an agent is rarely safe. +# - Quotes / Line Items / Products +# Commerce primitives; complex inter-object dependencies. Skip until a +# specific use case justifies the surface. +# - Payments / Subscriptions / Invoices (HubSpot Payments) +# Money-moving operations. Should require an explicit guarded action +# surface, not a default one. +# - Custom Objects / Custom Object Schemas (definitional) +# Schema authoring is admin-only and rare. Reading/writing instances +# of an existing custom object works via the generic /crm/v3/objects/{type} +# endpoints — already covered. +# - Analytics (events, custom behavioral events, attribution) +# Analytics ingestion + reporting is a category of its own; not useful +# for the conversational agent flow. +# - Email Subscriptions / Subscription Preferences +# Compliance-sensitive; the agent should not be flipping consent bits. +# - Single-Send API for marketing emails (legacy v1) +# Superseded by /marketing/v3/transactional/single-email/send — exposed. +# - Calling Extensions / Video Conferencing Extensions +# Provider plugins, not user-facing. diff --git a/app/data/action/integrations/integration_management.py b/app/data/action/integrations/integration_management.py index 0c2d5604..b416c566 100644 --- a/app/data/action/integrations/integration_management.py +++ b/app/data/action/integrations/integration_management.py @@ -9,6 +9,12 @@ from agent_core import action +# NOTE: integration alias/umbrella constants live in +# app.data.action.integrations._helpers and are imported INSIDE each handler. +# Action handlers run via exec() on their own extracted source, so module-level +# names defined here would NOT be in scope at runtime (NameError). + + @action( name="list_available_integrations", description=( @@ -93,8 +99,11 @@ def list_available_integrations(input_data: dict) -> dict: "integration_id": { "type": "string", "description": ( - "The integration to connect. Valid values: slack, discord, telegram, " - "whatsapp, whatsapp_business, google, notion, linkedin." + "The integration to connect, using its exact id. Valid values: slack, " + "discord, telegram, whatsapp, whatsapp_business, notion, linkedin, and " + "the Google Workspace apps as SEPARATE ids — gmail, google_drive, " + "google_docs, google_calendar, google_youtube (there is no single " + "'google' integration). Call list_available_integrations if unsure." ), "example": "telegram", }, @@ -161,7 +170,10 @@ def connect_integration(input_data: dict) -> dict: if input_data.get("simulated_mode"): return {"status": "success", "message": "Simulated mode", "auth_type": "token"} + from app.data.action.integrations._helpers import normalize_integration_id + integration_id = input_data.get("integration_id", "").strip().lower() + integration_id = normalize_integration_id(integration_id) credentials = input_data.get("credentials", {}) or {} auth_method = input_data.get("auth_method", "").strip().lower() @@ -378,8 +390,14 @@ def connect_integration(input_data: dict) -> dict: input_schema={ "integration_id": { "type": "string", - "description": "The integration to check status for.", - "example": "telegram", + "description": ( + "The integration to check status for, using its exact id. Google " + "Workspace apps are SEPARATE integrations — use 'gmail', " + "'google_drive', 'google_docs', 'google_calendar', or " + "'google_youtube', NOT 'google'. Call list_available_integrations " + "if unsure of the exact id." + ), + "example": "gmail", }, "session_id": { "type": "string", @@ -421,12 +439,37 @@ def check_integration_status(input_data: dict) -> dict: "message": "Simulated", } + from app.data.action.integrations._helpers import ( + GOOGLE_FAMILY, + GOOGLE_UMBRELLA, + normalize_integration_id, + ) + integration_id = input_data.get("integration_id", "").strip().lower() session_id = input_data.get("session_id", "").strip() if not integration_id: return {"status": "error", "message": "integration_id is required."} + # Normalize common aliases (e.g. 'gdrive' → 'google_drive'). + integration_id = normalize_integration_id(integration_id) + + # 'google' / 'google workspace' is not a single integration — the Workspace + # apps are tracked separately. Guide the caller to the specific app instead + # of failing with a bare "unknown integration". + if integration_id in GOOGLE_UMBRELLA: + return { + "status": "error", + "connected": False, + "accounts": [], + "message": ( + "'google' is not a single integration — Google Workspace apps are " + "tracked separately. Check the specific app instead: " + + ", ".join(GOOGLE_FAMILY) + + "." + ), + } + try: # If a session_id is provided, check WhatsApp QR session status if session_id and integration_id == "whatsapp": @@ -456,11 +499,22 @@ def check_integration_status(input_data: dict) -> dict: info = get_integration_info(integration_id) if not info: + # List the valid ids so the agent can self-correct instead of + # repeating an invalid guess. + try: + from craftos_integrations import list_all + + valid = ", ".join(sorted(list_all())) + except Exception: + valid = "" + message = f"Unknown integration: '{integration_id}'." + if valid: + message += f" Valid integrations: {valid}." return { "status": "error", "connected": False, "accounts": [], - "message": f"Unknown integration: '{integration_id}'.", + "message": message, } return { @@ -525,7 +579,10 @@ def disconnect_integration(input_data: dict) -> dict: if input_data.get("simulated_mode"): return {"status": "success", "message": "Simulated mode"} + from app.data.action.integrations._helpers import normalize_integration_id + integration_id = input_data.get("integration_id", "").strip().lower() + integration_id = normalize_integration_id(integration_id) account_id = input_data.get("account_id", "").strip() or None if not integration_id: diff --git a/app/data/action/living_ui_actions.py b/app/data/action/living_ui_actions.py index f5f2919f..4ea8105f 100644 --- a/app/data/action/living_ui_actions.py +++ b/app/data/action/living_ui_actions.py @@ -3,6 +3,149 @@ from agent_core import action +@action( + name="living_ui_scaffold", + description=( + "Create and register a new Living UI project from the template. " + "Call this FIRST when building a Living UI from a chat request — i.e. " + "when your task instruction does NOT already contain a 'Project ID' and " + "'Project Path' (those come pre-scaffolded from the Create Living UI modal). " + "This copies the project template (backend/, frontend/, config/), allocates " + "ports, and registers the project so it appears in the user's Living UI list. " + "Returns the project_id and an absolute project_path — use project_path as the " + "base for ALL subsequent file operations so files land in the right folders." + ), + default=False, + mode="CLI", + action_sets=["living_ui"], + parallelizable=False, + input_schema={ + "name": { + "type": "string", + "example": "Stock Forecaster", + "description": "Display name for the Living UI project.", + }, + "description": { + "type": "string", + "example": "A dashboard that forecasts stock performance.", + "description": "Short description of what the app does.", + }, + "features": { + "type": "array", + "example": ["watchlist", "forecasts", "alerts"], + "description": "Optional list of high-level features requested by the user.", + }, + "theme": { + "type": "string", + "enum": ["light", "dark", "system"], + "example": "system", + "description": "UI theme. Defaults to 'system'.", + }, + }, + output_schema={ + "status": { + "type": "string", + "example": "success", + "description": "Result: 'success' or 'error'.", + }, + "project_id": { + "type": "string", + "example": "abc12345", + "description": "The created project ID. Pass this to living_ui_notify_ready.", + }, + "project_path": { + "type": "string", + "example": "/workspace/living_ui/stock_forecaster_abc12345", + "description": "Absolute base path. Use this for ALL file operations.", + }, + "frontend_port": {"type": "integer", "description": "Allocated frontend port."}, + "backend_port": {"type": "integer", "description": "Allocated backend port."}, + "message": { + "type": "string", + "description": "Guidance on how to use the returned path.", + }, + }, + test_payload={ + "name": "Test App", + "description": "A test Living UI.", + "simulated_mode": True, + }, +) +async def living_ui_scaffold(input_data: dict) -> dict: + """Create, register, and associate a new Living UI project from the template.""" + name = input_data.get("name", "").strip() + description = input_data.get("description", "").strip() + features = input_data.get("features") or [] + theme = input_data.get("theme", "system") + # _session_id is injected by the ActionManager; for a Living UI task it equals + # the task id, which the progress/todo broadcast hooks key off of. + session_id = input_data.get("_session_id") + simulated_mode = input_data.get("simulated_mode", False) + + if not name or not description: + return {"status": "error", "message": "name and description are required"} + + if simulated_mode: + return { + "status": "success", + "project_id": "abc12345", + "project_path": "/workspace/living_ui/test_app_abc12345", + "frontend_port": 3100, + "backend_port": 3101, + "message": "Scaffolded. Use project_path for all file operations.", + } + + try: + from app.living_ui import get_living_ui_manager, broadcast_living_ui_created + + manager = get_living_ui_manager() + if not manager: + return { + "status": "error", + "message": ( + "Living UI manager not initialized. Living UI creation requires " + "the CraftBot desktop/browser app to be running." + ), + } + + # Tolerate a comma-separated string if the model passes one. + if isinstance(features, str): + features = [f.strip() for f in features.split(",") if f.strip()] + + project = await manager.create_project( + name=name, + description=description, + features=features, + theme=theme, + ) + + # Associate the project with the running task so the agent's todos and + # progress stream to the Living UI view, then mark it as in-progress. + if session_id: + manager.set_project_task(project.id, session_id) + manager.update_project_status(project.id, "creating") + + # Register it in the browser's project list immediately (modal-parity). + await broadcast_living_ui_created(project.to_dict()) + + return { + "status": "success", + "project_id": project.id, + "project_path": project.path, + "frontend_port": project.port, + "backend_port": project.backend_port, + "message": ( + f"Project '{project.name}' scaffolded at {project.path}. " + f"Use this absolute path as the base for ALL file operations " + f"(e.g. {project.path}/backend/models.py, {project.path}/frontend/). " + f"Do NOT write to bare relative paths. When the build is complete, " + f'call living_ui_notify_ready(project_id="{project.id}").' + ), + } + except Exception as e: + return {"status": "error", "message": f"Failed to scaffold project: {str(e)}"} + + @action( name="living_ui_notify_ready", description=( @@ -332,6 +475,15 @@ async def living_ui_report_progress(input_data: dict) -> dict: "description": "Env var name for port injection (e.g., PORT). Empty if app uses command-line flag.", "example": "PORT", }, + "project_id": { + "type": "string", + "description": ( + "If the task instruction provided a pre-created project_id " + "(a tab already shown to the user), pass it here so the import " + "populates that tab. Omit otherwise." + ), + "example": "a1b2c3d4", + }, }, output_schema={ "status": {"type": "string", "example": "success"}, @@ -357,6 +509,7 @@ async def living_ui_import_external(input_data: dict) -> dict: health_strategy=input_data.get("health_strategy", "tcp"), health_url=input_data.get("health_url", ""), port_env_var=input_data.get("port_env_var", "PORT"), + project_id=input_data.get("project_id") or None, ) return result except Exception as e: @@ -383,6 +536,15 @@ async def living_ui_import_external(input_data: dict) -> dict: "description": "Display name for the imported project (optional, auto-detected from manifest).", "example": "My App", }, + "project_id": { + "type": "string", + "description": ( + "If the task instruction provided a pre-created project_id " + "(a tab already shown to the user), pass it here so the import " + "populates that tab. Omit otherwise." + ), + "example": "a1b2c3d4", + }, }, output_schema={ "status": {"type": "string", "example": "success"}, @@ -401,11 +563,12 @@ async def living_ui_import_zip(input_data: dict) -> dict: zip_path = input_data.get("zip_path", "") name = input_data.get("name", "") + project_id = input_data.get("project_id") or None if not zip_path: return {"status": "error", "message": "zip_path is required."} - project = await manager.import_project_zip(zip_path, name) + project = await manager.import_project_zip(zip_path, name, project_id) # Clean up the ZIP file after successful import import os diff --git a/app/data/action/send_message.py b/app/data/action/send_message.py index 120752f0..b6ff597a 100644 --- a/app/data/action/send_message.py +++ b/app/data/action/send_message.py @@ -53,6 +53,17 @@ async def send_message(input_data: dict) -> dict: message, session_id=session_id ) + # Mirror a "waiting for reply" question onto the Living UI creation + # screen (no-op unless this session is a Living UI creation task) so the + # user can answer from the Living UI page even with the chat panel closed. + if wait_for_user_reply and session_id: + try: + from app.living_ui import broadcast_living_ui_question + + await broadcast_living_ui_question(session_id, message) + except Exception: + pass + fire_at_delay = 10800 if wait_for_user_reply else 0 # Return 'success' for test compatibility, but keep 'ok' in production if needed status = "success" if simulated_mode else "ok" diff --git a/app/image_gen_interface.py b/app/image_gen_interface.py new file mode 100644 index 00000000..0f2fa9ce --- /dev/null +++ b/app/image_gen_interface.py @@ -0,0 +1,55 @@ +# -*- coding: utf-8 -*- +""" +Image generation interface for CraftBot. + +Re-exports ImageGenInterface from agent_core with CraftBot-specific hooks +for state access (using STATE singleton) and usage reporting. +""" + +from typing import Optional + +from agent_core.core.impl.image_gen import ImageGenInterface as _ImageGenInterface +from agent_core.core.hooks.types import UsageEventData +from app.state.agent_state import get_session_props + + +def _get_token_count() -> int: + return get_session_props().get_property("token_count", 0) + + +def _set_token_count(count: int) -> None: + get_session_props().set_property("token_count", count) + + +async def _report_usage(event: UsageEventData) -> None: + from app.usage import get_usage_reporter + + await get_usage_reporter().report(event) + + +class ImageGenInterface(_ImageGenInterface): + """ImageGenInterface configured for CraftBot's STATE singleton. + + Automatically injects the get_token_count and set_token_count hooks + that use CraftBot's global STATE object. + """ + + def __init__( + self, + *, + provider: Optional[str] = None, + model: Optional[str] = None, + api_key: Optional[str] = None, + base_url: Optional[str] = None, + deferred: bool = False, + ) -> None: + super().__init__( + provider=provider, + model=model, + api_key=api_key, + base_url=base_url, + deferred=deferred, + get_token_count=_get_token_count, + set_token_count=_set_token_count, + report_usage=_report_usage, + ) diff --git a/app/internal_action_interface.py b/app/internal_action_interface.py index 0da8dc96..5136a88f 100644 --- a/app/internal_action_interface.py +++ b/app/internal_action_interface.py @@ -10,6 +10,8 @@ from typing import Dict, Any, Optional, List, TYPE_CHECKING from app.llm import LLMInterface, LLMCallType from app.vlm_interface import VLMInterface +from app.image_gen_interface import ImageGenInterface +from app.video_gen_interface import VideoGenInterface from app.task.task_manager import TaskManager from app.task import Task from app.state.state_manager import StateManager @@ -43,6 +45,8 @@ class InternalActionInterface: task_manager: Optional[TaskManager] = None state_manager: Optional[StateManager] = None vlm_interface: Optional[VLMInterface] = None + image_gen_interface: Optional[ImageGenInterface] = None + video_gen_interface: Optional[VideoGenInterface] = None context_engine: Optional["ContextEngine"] = None gui_module: Optional["GUIModule"] = None memory_manager: Optional[MemoryManager] = None @@ -57,6 +61,8 @@ def initialize( task_manager: TaskManager, state_manager: StateManager, vlm_interface: Optional[VLMInterface] = None, + image_gen_interface: Optional[ImageGenInterface] = None, + video_gen_interface: Optional[VideoGenInterface] = None, context_engine: Optional["ContextEngine"] = None, gui_module: Optional["GUIModule"] = None, memory_manager: MemoryManager | None = None, @@ -74,6 +80,8 @@ def initialize( cls.task_manager = task_manager cls.state_manager = state_manager cls.vlm_interface = vlm_interface + cls.image_gen_interface = image_gen_interface + cls.video_gen_interface = video_gen_interface cls.context_engine = context_engine cls.gui_module = gui_module cls.memory_manager = memory_manager @@ -110,6 +118,44 @@ def describe_image(cls, image_path: str, prompt: Optional[str] = None) -> str: ) return cls.vlm_interface.describe_image(image_path, user_prompt=prompt) + @classmethod + def generate_image(cls, **kwargs) -> List[str]: + """Generate image(s) from a prompt using the image generation interface. + + Delegates all arguments to ImageGenInterface.generate_image(). + + Returns: + List of absolute file paths to the generated images. + + Raises: + RuntimeError: If image_gen_interface is not initialized or generation fails. + """ + if cls.image_gen_interface is None: + raise RuntimeError( + "InternalActionInterface not initialized with ImageGenInterface." + ) + return cls.image_gen_interface.generate_image(**kwargs) + + @classmethod + def generate_video(cls, **kwargs) -> List[str]: + """Generate video(s) from a prompt using the video generation interface. + + Delegates all arguments to VideoGenInterface.generate_video(). Blocks + until the long-running generation completes (or the executor's action + timeout kills it). + + Returns: + List of absolute file paths to the generated MP4 files. + + Raises: + RuntimeError: If video_gen_interface is not initialized or generation fails. + """ + if cls.video_gen_interface is None: + raise RuntimeError( + "InternalActionInterface not initialized with VideoGenInterface." + ) + return cls.video_gen_interface.generate_video(**kwargs) + @classmethod def perform_ocr(cls, image_path: str, user_prompt: Optional[str] = None) -> dict: """ diff --git a/app/living_ui/__init__.py b/app/living_ui/__init__.py index 509a7755..27572e7d 100644 --- a/app/living_ui/__init__.py +++ b/app/living_ui/__init__.py @@ -19,7 +19,9 @@ from .broadcast import ( register_broadcast_callbacks, broadcast_living_ui_ready, + broadcast_living_ui_created, broadcast_living_ui_progress, + broadcast_living_ui_question, dispatch_living_ui_data_changed, make_todo_broadcast_hook, ) @@ -32,7 +34,9 @@ "set_living_ui_manager", "register_broadcast_callbacks", "broadcast_living_ui_ready", + "broadcast_living_ui_created", "broadcast_living_ui_progress", + "broadcast_living_ui_question", "dispatch_living_ui_data_changed", "make_todo_broadcast_hook", "restart_living_ui", diff --git a/app/living_ui/broadcast.py b/app/living_ui/broadcast.py index dd30b78e..3cc79d45 100644 --- a/app/living_ui/broadcast.py +++ b/app/living_ui/broadcast.py @@ -21,6 +21,9 @@ # Registered async callbacks into the browser adapter. _broadcast_ready_callback: Optional[Callable[[str, str, int], Awaitable[bool]]] = None +_broadcast_created_callback: Optional[Callable[[Dict[str, Any]], Awaitable[None]]] = ( + None +) _broadcast_progress_callback: Optional[ Callable[[str, str, int, str], Awaitable[None]] ] = None @@ -28,6 +31,9 @@ Callable[[str, List[Dict[str, Any]]], Awaitable[None]] ] = None _broadcast_data_changed_callback: Optional[Callable[[str], Awaitable[None]]] = None +_broadcast_question_callback: Optional[Callable[[str, str, str], Awaitable[None]]] = ( + None +) # Captured at register time so cross-thread dispatchers (action handlers # running on a worker thread pool) can schedule coroutines onto the main loop. @@ -41,6 +47,8 @@ def register_broadcast_callbacks( Callable[[str, List[Dict[str, Any]]], Awaitable[None]] ] = None, broadcast_data_changed: Optional[Callable[[str], Awaitable[None]]] = None, + broadcast_created: Optional[Callable[[Dict[str, Any]], Awaitable[None]]] = None, + broadcast_question: Optional[Callable[[str, str, str], Awaitable[None]]] = None, ) -> None: """Register broadcast callbacks for Living UI actions to use. @@ -48,13 +56,16 @@ def register_broadcast_callbacks( """ global \ _broadcast_ready_callback, \ + _broadcast_created_callback, \ _broadcast_progress_callback, \ _broadcast_todos_callback - global _broadcast_data_changed_callback, _main_loop + global _broadcast_data_changed_callback, _broadcast_question_callback, _main_loop _broadcast_ready_callback = broadcast_ready + _broadcast_created_callback = broadcast_created _broadcast_progress_callback = broadcast_progress _broadcast_todos_callback = broadcast_todos _broadcast_data_changed_callback = broadcast_data_changed + _broadcast_question_callback = broadcast_question try: _main_loop = asyncio.get_running_loop() except RuntimeError: @@ -76,6 +87,47 @@ async def broadcast_living_ui_ready(project_id: str, url: str, port: int) -> boo return False +async def broadcast_living_ui_created(project: Dict[str, Any]) -> bool: + """Broadcast that a Living UI project was created (and registered). + + Used by the agent's scaffold action so a chat-created Living UI shows up + in the browser's project list immediately, mirroring the modal flow. + Returns True on success. + """ + if _broadcast_created_callback: + await _broadcast_created_callback(project) + return True + logger.warning( + f"[LIVING_UI] broadcast_living_ui_created called but callback is None " + f"(manager={get_living_ui_manager() is not None})" + ) + return False + + +async def broadcast_living_ui_question(session_id: str, message: str) -> bool: + """Mirror an agent question (a send_message with wait_for_user_reply) onto the + Living UI creation screen, so the user can answer even with the chat closed. + + Resolves the *creating* project from the task/session id and no-ops if the + session isn't a Living UI creation task. The on-screen answer is posted back + through the normal chat reply path (target_session_id), which resumes the + waiting task — no separate resume mechanism is needed. Returns True if mirrored. + """ + if not session_id or not _broadcast_question_callback: + return False + manager = get_living_ui_manager() + if not manager: + return False + try: + project = manager.get_project_by_task_id(session_id) + except Exception: + project = None + if not project or getattr(project, "status", None) != "creating": + return False + await _broadcast_question_callback(project.id, session_id, message) + return True + + async def broadcast_living_ui_progress( project_id: str, phase: str, progress: int, message: str ) -> bool: diff --git a/app/living_ui/manager.py b/app/living_ui/manager.py index d0d4b393..40f34774 100644 --- a/app/living_ui/manager.py +++ b/app/living_ui/manager.py @@ -2253,6 +2253,37 @@ async def create_project( logger.info(f"[LIVING_UI] Created project: {name} ({project_id})") return project + def create_placeholder_project( + self, name: str, description: str = "" + ) -> LivingUIProject: + """Register a lightweight "creating" project so a tab/progress screen + appears immediately, before the real import/install populates it. + + Used by the import (ZIP/GitHub) and marketplace flows so they behave + like the form-create flow (which registers its project synchronously). + The actual importer — import_project_zip / import_external_app / + install_from_marketplace — must adopt this id (pass project_id=...) so + it overwrites this entry instead of creating a second tab. + + Intentionally NOT persisted to disk: a placeholder that never gets + adopted (e.g. the import task fails) is dropped on the next restart + rather than leaving a broken "creating" tab behind. The adopting + importer calls _save_projects() once it fills in the real fields. + """ + project_id = self._generate_id() + project = LivingUIProject( + id=project_id, + name=name or "Importing…", + description=description, + path="", # filled in when the real import adopts this id + status="creating", + ) + self.projects[project_id] = project + logger.info( + f"[LIVING_UI] Registered placeholder project: {name} ({project_id})" + ) + return project + def _replace_placeholders( self, directory: Path, replacements: Dict[str, str] ) -> None: @@ -2292,6 +2323,7 @@ async def install_from_marketplace( app_description: str, custom_fields: Optional[Dict[str, str]] = None, repo_url: str = "https://github.com/CraftOS-dev/living-ui-marketplace", + project_id: Optional[str] = None, ) -> Dict[str, Any]: """ Install a pre-built Living UI app from the marketplace. @@ -2313,7 +2345,9 @@ async def install_from_marketplace( import zipfile import io - project_id = self._generate_id() + # Adopt a pre-created placeholder id when provided (so the tab spawned + # at request time becomes this project), else allocate a fresh one. + project_id = project_id or self._generate_id() sanitized_name = self._sanitize_name(app_name) project_path = self.living_ui_dir / f"{sanitized_name}_{project_id}" @@ -2880,9 +2914,12 @@ async def import_external_app( health_strategy: str = "tcp", health_url: str = "", port_env_var: str = "PORT", + project_id: Optional[str] = None, ) -> Dict[str, Any]: """Import an external app as a Living UI project.""" - project_id = self._generate_id() + # Adopt the placeholder id when provided so the tab spawned at request + # time becomes this project instead of a second tab appearing. + project_id = project_id or self._generate_id() sanitized_name = self._sanitize_name(name) project_path = self.living_ui_dir / f"{sanitized_name}_{project_id}" @@ -2955,6 +2992,11 @@ async def import_external_app( app_runtime=app_runtime, ) + # Preserve the task link from an adopted placeholder so todo/question + # broadcasts (keyed by task id) keep targeting this tab. + existing = self.projects.get(project_id) + if existing and existing.task_id: + project.task_id = existing.task_id self.projects[project_id] = project self._save_projects() @@ -3171,12 +3213,15 @@ def export_project_zip(self, project_id: str) -> Path: return zip_path async def import_project_zip( - self, zip_path: str, name: str = "" + self, zip_path: str, name: str = "", project_id: Optional[str] = None ) -> "LivingUIProject": """Import a Living UI project from a ZIP file. The ZIP should contain a project directory structure with at least - a config/manifest.json. A new project ID and ports are allocated. + a config/manifest.json. Ports are allocated automatically. When + project_id is provided, the import adopts that id (overwriting the + placeholder tab spawned at request time) instead of generating a new + one — preventing a duplicate tab. """ zip_file = Path(zip_path) if not zip_file.exists(): @@ -3213,8 +3258,8 @@ async def import_project_zip( if not name: name = "imported_project" - # Generate new ID and project path - project_id = self._generate_id() + # Adopt the placeholder id when provided, else generate a new one + project_id = project_id or self._generate_id() sanitized_name = self._sanitize_name(name) project_path = self.living_ui_dir / f"{sanitized_name}_{project_id}" @@ -3268,6 +3313,11 @@ async def import_project_zip( app_runtime=app_runtime, ) + # Preserve the task link from an adopted placeholder so todo/question + # broadcasts (keyed by task id) keep targeting this tab. + existing = self.projects.get(project_id) + if existing and existing.task_id: + project.task_id = existing.task_id self.projects[project_id] = project self._save_projects() diff --git a/app/main.py b/app/main.py index 37f4b981..02455d5b 100644 --- a/app/main.py +++ b/app/main.py @@ -65,10 +65,12 @@ def _suppress_console_logging_early() -> None: from app.config import ( get_llm_provider, get_vlm_provider, + get_image_gen_provider, get_api_key, get_base_url, get_llm_model, get_vlm_model, + get_image_gen_model, ) from app.agent_base import AgentBase @@ -116,7 +118,8 @@ def _initial_settings() -> tuple: """Determine initial provider, API key, and base URL from settings.json. Returns: - Tuple of (provider, api_key, base_url, model, vlm_provider, vlm_model, has_valid_key) + Tuple of (provider, api_key, base_url, model, vlm_provider, vlm_model, + image_gen_provider, image_gen_model, has_valid_key) where has_valid_key indicates if a working API key was found. """ # Read directly from settings.json @@ -126,11 +129,23 @@ def _initial_settings() -> tuple: model = get_llm_model() # None → use registry default for the provider vlm_prov = get_vlm_provider() vlm_mod = get_vlm_model() + img_prov = get_image_gen_provider() + img_mod = get_image_gen_model() # Remote (Ollama) doesn't require API key has_key = bool(api_key) or provider == "remote" - return provider, api_key, base_url, model, vlm_prov, vlm_mod, has_key + return ( + provider, + api_key, + base_url, + model, + vlm_prov, + vlm_mod, + img_prov, + img_mod, + has_key, + ) async def main_async() -> None: @@ -139,9 +154,17 @@ async def main_async() -> None: browser_mode = cli_args.get("browser", False) # Get settings from settings.json - provider, api_key, base_url, model, vlm_prov, vlm_mod, has_valid_key = ( - _initial_settings() - ) + ( + provider, + api_key, + base_url, + model, + vlm_prov, + vlm_mod, + img_prov, + img_mod, + has_valid_key, + ) = _initial_settings() # CLI args override settings.json if provided if cli_args.get("provider"): @@ -166,6 +189,8 @@ async def main_async() -> None: llm_model=model, vlm_provider=vlm_prov, vlm_model=vlm_mod, + image_gen_provider=img_prov, + image_gen_model=img_mod, deferred_init=not has_valid_key, ) diff --git a/app/scheduler/manager.py b/app/scheduler/manager.py index eb9b67f3..32fb3be1 100644 --- a/app/scheduler/manager.py +++ b/app/scheduler/manager.py @@ -29,6 +29,12 @@ class SchedulerManager: Fires triggers into the TriggerQueue when schedules are due. """ + # A one-time task firing more than this many seconds after its scheduled + # time is treated as a "catch-up" (it became overdue while CraftBot was + # offline) and the executing agent is given staleness context so it can + # decide whether to proceed, confirm with the user, or skip. + CATCHUP_THRESHOLD_SECONDS = 120 + def __init__(self): self._schedules: Dict[str, ScheduledTask] = {} self._scheduler_tasks: Dict[str, asyncio.Task] = {} @@ -543,12 +549,14 @@ async def _fire_schedule(self, schedule: ScheduledTask) -> None: ) return + now = time.time() + # Update runtime state - schedule.last_run = time.time() + schedule.last_run = now schedule.run_count += 1 # Create unique session ID for this run - session_id = f"scheduled_{schedule.id}_{int(time.time())}" + session_id = f"scheduled_{schedule.id}_{int(now)}" # Build trigger payload payload = { @@ -562,11 +570,59 @@ async def _fire_schedule(self, schedule: ScheduledTask) -> None: **schedule.payload, # Merge custom payload } + description = f"[Scheduled] {schedule.name}: {schedule.instruction}" + + # Catch-up handling: a one-time task can become overdue while CraftBot is + # offline. Rather than apply a hard drop/fire cutoff, fire it but hand + # the executing agent the staleness context so it can use judgment — + # proceed if only slightly late, otherwise confirm with the user or skip + # if no longer relevant. + if ( + schedule.schedule.schedule_type == "once" + and schedule.schedule.fire_at is not None + ): + overdue = now - schedule.schedule.fire_at + if overdue > self.CATCHUP_THRESHOLD_SECONDS: + scheduled_for = datetime.fromtimestamp( + schedule.schedule.fire_at + ).strftime("%Y-%m-%d %H:%M:%S") + overdue_human = self._format_duration(overdue) + catch_up_note = ( + f"NOTE: This one-time task was scheduled for {scheduled_for} " + f"but is running about {overdue_human} late because CraftBot " + f"was offline at the scheduled time. Use your judgment: if it " + f"is only slightly late and still relevant, carry it out " + f"normally. If it is significantly late, or the action is " + f"time-sensitive or irreversible (e.g. sending a message or " + f"email), confirm with the user before proceeding, or skip it " + f"if it is no longer relevant." + ) + payload["is_catch_up"] = True + payload["overdue_seconds"] = overdue + payload["originally_scheduled_for"] = scheduled_for + payload["catch_up_note"] = catch_up_note + description = f"{description}\n\n{catch_up_note}" + logger.info( + f"[SCHEDULER] One-time task {schedule.id} is overdue by " + f"{overdue_human}; firing as catch-up with agent-judgment note" + ) + + # One-time tasks: remove from the persisted config BEFORE enqueueing so a + # crash/restart between firing and removal can never re-fire them. The + # in-memory trigger queue is not persisted, so once it's enqueued the + # config entry is no longer needed. + if not schedule.recurring: + self._schedules.pop(schedule.id, None) + self._save_config() + logger.info( + f"[SCHEDULER] One-time task fired, removed from config: {schedule.id}" + ) + # Create trigger trigger = Trigger( - fire_at=time.time(), + fire_at=now, priority=schedule.priority, - next_action_description=f"[Scheduled] {schedule.name}: {schedule.instruction}", + next_action_description=description, payload=payload, session_id=session_id, ) @@ -579,15 +635,23 @@ async def _fire_schedule(self, schedule: ScheduledTask) -> None: f"(run #{schedule.run_count})" ) - # Auto-remove non-recurring (immediate) tasks after firing - if not schedule.recurring: - logger.info(f"[SCHEDULER] One-time task fired, removing: {schedule.id}") - asyncio.create_task(self._remove_after_fire(schedule.id)) - - async def _remove_after_fire(self, schedule_id: str) -> None: - """Remove a one-time schedule after it has fired.""" - await asyncio.sleep(1) # Brief delay to ensure trigger is processed - self.remove_schedule(schedule_id) + @staticmethod + def _format_duration(seconds: float) -> str: + """Format a duration in seconds into a short human-readable string.""" + seconds = int(seconds) + if seconds < 60: + unit = "second" + value = seconds + elif seconds < 3600: + unit = "minute" + value = seconds // 60 + elif seconds < 86400: + unit = "hour" + value = seconds // 3600 + else: + unit = "day" + value = seconds // 86400 + return f"{value} {unit}{'s' if value != 1 else ''}" def _load_config(self) -> SchedulerConfig: """Load configuration from file.""" @@ -610,7 +674,27 @@ def _load_config(self) -> SchedulerConfig: expression = schedule_data.get("schedule", "") parsed_schedule = ScheduleParser.parse(expression) + # One-time tasks: restore the persisted absolute fire time + # instead of re-anchoring the raw expression to "now". Re-parsing + # "in 10 minutes" on every restart pushed the fire time forward, + # delaying the task indefinitely across restarts. + if parsed_schedule.schedule_type == "once": + stored_fire_at = schedule_data.get("fire_at") + if stored_fire_at is not None: + parsed_schedule.fire_at = stored_fire_at + task = ScheduledTask.from_dict(schedule_data, parsed_schedule) + + # Skip one-time tasks that already fired in a previous run but + # weren't removed before a crash/restart. Prevents the task from + # being executed (e.g. an email sent) a second time. + if not task.recurring and task.run_count > 0: + logger.info( + f"[SCHEDULER] Skipping already-fired one-time task: " + f"{task.id} - {task.name}" + ) + continue + task.next_run = ScheduleParser.calculate_next_fire_time(task.schedule) schedules.append(task) diff --git a/app/scheduler/types.py b/app/scheduler/types.py index b96fa35f..ae9c6bee 100644 --- a/app/scheduler/types.py +++ b/app/scheduler/types.py @@ -165,6 +165,13 @@ def to_dict(self, include_runtime: bool = False) -> Dict[str, Any]: "payload": self.payload, } + if self.schedule.schedule_type == "once" and self.schedule.fire_at is not None: + data["fire_at"] = self.schedule.fire_at + + if not self.recurring: + data["run_count"] = self.run_count + data["last_run"] = self.last_run + if include_runtime: data["last_run"] = self.last_run data["next_run"] = self.next_run diff --git a/app/task/task_manager.py b/app/task/task_manager.py index ff349c3b..5edee692 100644 --- a/app/task/task_manager.py +++ b/app/task/task_manager.py @@ -77,14 +77,34 @@ def _on_task_persist(task: Task) -> None: logger.warning(f"[TaskManager] Failed to persist task {task.id}: {e}") -def _on_task_remove_persist(task_id: str) -> None: - """Remove persisted task and its event stream from SessionStorage.""" - try: - from app.usage.session_storage import get_session_storage +def _make_on_task_remove_persist(event_stream_manager: EventStreamManager): + """Build the finalize-persistence hook. + + Called once per task at terminal status. Persists the final event stream + to disk so the task can be brought back via the resume flow. The task + row itself was already kept up-to-date by ``_on_task_persist`` on every + state change, so we don't re-write it here. We deliberately do NOT call + ``session_storage.remove_task`` — the row needs to stick around for the + Continue Task button to work. + """ - get_session_storage().remove_task(task_id) - except Exception as e: - logger.warning(f"[TaskManager] Failed to remove persisted task {task_id}: {e}") + def on_task_remove_persist(task: Task) -> None: + try: + from app.usage.session_storage import get_session_storage + + storage = get_session_storage() + # Persist the final event stream while it's still in memory. + # `on_stream_remove` (below) hasn't fired yet, so the per-task + # stream is still accessible by id. + stream = event_stream_manager.get_stream_by_id(task.id) + if stream is not None: + storage.persist_event_stream(task.id, stream) + except Exception as e: + logger.warning( + f"[TaskManager] Failed to persist final event stream for {task.id}: {e}" + ) + + return on_task_remove_persist def _make_on_stream_remove(event_stream_manager: EventStreamManager): @@ -136,7 +156,7 @@ def __init__( on_stream_remove=_make_on_stream_remove(event_stream_manager), # Session persistence hooks for crash recovery on_task_persist=_on_task_persist, - on_task_remove_persist=_on_task_remove_persist, + on_task_remove_persist=_make_on_task_remove_persist(event_stream_manager), # No chatserver hooks for CraftBot (local only) # No chatserver hooks for CraftBot (local only). on_task_created_chatserver=None, diff --git a/app/ui_layer/adapters/browser_adapter.py b/app/ui_layer/adapters/browser_adapter.py index 705081d0..57f8a42d 100644 --- a/app/ui_layer/adapters/browser_adapter.py +++ b/app/ui_layer/adapters/browser_adapter.py @@ -987,6 +987,8 @@ def __init__( broadcast_progress=self.broadcast_living_ui_progress, broadcast_todos=self.broadcast_living_ui_todos, broadcast_data_changed=self.broadcast_living_ui_data_changed, + broadcast_created=self.broadcast_living_ui_created, + broadcast_question=self.broadcast_living_ui_question, ) # Subscribe the Living UI module to TaskManager todo updates so that @@ -1495,6 +1497,15 @@ async def _handle_ws_message(self, data: Dict[str, Any], ws=None) -> None: task_id = data.get("taskId", "") await self._handle_task_cancel(task_id) + elif msg_type == "task_complete": + task_id = data.get("taskId", "") + await self._handle_task_complete(task_id) + + elif msg_type == "task_resume": + task_id = data.get("taskId", "") + message = data.get("message", "") or "" + await self._handle_task_resume(task_id, message) + elif msg_type == "option_click": value = data.get("value", "") session_id = data.get("sessionId", "") @@ -2649,6 +2660,18 @@ async def _handle_living_ui_create(self, data: Dict[str, Any]) -> None: } ) + # Mirror the new project into chat as a system message so the + # request is visible in the conversation (not just the new tab). + try: + await self._display_chat_message( + "System", + f"**Living UI: {name}**\n\n{description}\n\n" + "Building your app now — track progress in the new tab.", + "system", + ) + except Exception as e: + logger.debug(f"[LIVING_UI] create chat message failed: {e}") + # Broadcast initial status update await self._broadcast( { @@ -3050,6 +3073,40 @@ async def broadcast_living_ui_ready( logger.error(f"[LIVING_UI] Failed to launch project {project_id}") return False + async def broadcast_living_ui_created(self, project: Dict[str, Any]) -> None: + """Broadcast that a Living UI project was created (called from agent action). + + Mirrors the modal create flow's broadcast so a chat-created Living UI is + registered in the browser's project list and shows its build progress. + """ + await self._broadcast( + { + "type": "living_ui_create", + "data": { + "success": True, + "projectId": project.get("id", ""), + "project": project, + }, + } + ) + + async def broadcast_living_ui_question( + self, project_id: str, session_id: str, message: str + ) -> None: + """Mirror an agent question onto the creation screen so the user can + answer from the Living UI page even when the chat panel is closed. The + on-screen answer is sent back as a reply targeting `session_id`.""" + await self._broadcast( + { + "type": "living_ui_question", + "data": { + "projectId": project_id, + "sessionId": session_id, + "message": message, + }, + } + ) + async def broadcast_living_ui_progress( self, project_id: str, phase: str, progress: int, message: str ) -> None: @@ -3147,6 +3204,304 @@ async def _handle_task_cancel(self, task_id: str) -> None: } ) + async def _handle_task_resume(self, task_id: str, message: str) -> None: + """Re-open a terminated task and continue execution. + + Reads the task + persisted event stream from sessions.db (kept around + on task end specifically for this flow), reinstates them in memory, + flips the action panel row back to running, optionally injects a + continuation user message, and enqueues a trigger so the react loop + picks up where it left off. Token counters accumulate across resumes. + """ + try: + if not task_id: + await self._broadcast( + { + "type": "task_resume_response", + "data": { + "taskId": task_id, + "success": False, + "error": "Missing taskId", + }, + } + ) + return + + from app.usage.session_storage import get_session_storage + from agent_core.core.task import Task + from agent_core.core.impl.event_stream.event_stream import ( + get_cached_token_count, + ) + from app.state.agent_state import STATE + from app.trigger import Trigger + import time as _time + + agent = self._controller.agent + task_manager = agent.task_manager + + # Refuse if the task is still live (already in memory) — resume + # only applies to terminated tasks. + if task_id in task_manager.tasks: + live = task_manager.tasks[task_id] + if live.status not in ("completed", "error", "cancelled"): + await self._broadcast( + { + "type": "task_resume_response", + "data": { + "taskId": task_id, + "success": False, + "error": "Task is already running", + }, + } + ) + return + + storage = get_session_storage() + task_dict = storage.get_task(task_id) + if not task_dict: + await self._broadcast( + { + "type": "task_resume_response", + "data": { + "taskId": task_id, + "success": False, + "error": ( + "Task context is no longer available. It may " + "have been purged after 24h — please start a " + "new task." + ), + }, + } + ) + return + + # Reject internal/system workflows: their post-completion side + # effects already ran and resuming them produces inconsistent + # state. Mirrors the existing Create Skill gate. + wf_id = task_dict.get("workflow_id") or "" + selected_skills = task_dict.get("selected_skills") or [] + if wf_id in self._INTERNAL_WORKFLOW_IDS or any( + s in self._INTERNAL_SKILL_NAMES for s in selected_skills + ): + await self._broadcast( + { + "type": "task_resume_response", + "data": { + "taskId": task_id, + "success": False, + "error": "Internal workflow tasks cannot be resumed", + }, + } + ) + return + + # Rebuild the Task and reset terminal fields. Token counters and + # action_count stay as-is — a resume is a continuation, not a + # restart. Capture the prior terminal status BEFORE the reset so + # the resume system event can anchor the LLM with it. + task = Task.from_dict(task_dict) + prior_status = task.status + task.status = "running" + task.ended_at = None + task.final_summary = None + task.errors = [] + task.waiting_for_user_reply = False + + # Fresh empty temp dir (the old one was rmtree'd at task end). + temp_dir = task_manager._prepare_task_temp_dir(task_id) + task.temp_dir = str(temp_dir) + + # Re-insert into the live task map BEFORE wiring up the event + # stream so subsequent log() calls route to the correct task. + task_manager.tasks[task_id] = task + task_manager._current_session_id = task_id + + # Restore the persisted event stream so the LLM sees the full + # prior conversation. head_summary + tail_events were written + # by _make_on_task_remove_persist at task end. + stream = agent.event_stream_manager.create_stream(task_id, temp_dir) + t_head, t_records = storage.get_event_stream(task_id) + stream.head_summary = t_head + stream.tail_events = t_records + stream._total_tokens = sum(get_cached_token_count(r) for r in t_records) + + # Mark restored events as already-seen by the UI controller's + # polling loop. Without this, `_watch_agent_events` treats every + # restored event as new and re-emits ACTION_START into the + # action panel — which flips pre-resume actions from 'completed' + # back to 'running'. The matching ACTION_END for terminal + # actions (paired with task_end) was never persisted to the + # stream in the first place, so the flip is never undone and + # the action stays stuck spinning. Same dedup key shape used by + # the bootstrap loop in UIController._watch_agent_events. + store = self._controller.state_store + for record in t_records: + ev = record.event + store.dispatch("MARK_EVENT_SEEN", (ev.iso_ts, ev.kind, ev.message)) + + # Sync with state_manager and rebuild session caches so the LLM + # is set up the same way create_task would set it up. + if agent.state_manager: + agent.state_manager.on_task_created(task) + agent.state_manager.add_to_active_task(task=task) + task_manager._create_session_caches(task_id) + + # Mark as the current task on the global state property. + STATE.set_agent_property("current_task_id", task_id) + + # Persist the now-running task back to sessions.db (status flip). + try: + if task_manager._on_task_persist: + task_manager._on_task_persist(task) + except Exception: + pass + + # Log a system event so the resumed transcript has a clear + # marker, then optionally log the user's continuation message + # so the next LLM call sees it. + # + # Two messages here, one event: + # - `message` is what the LLM sees in the event stream — rich + # framing that anchors it as a *continuation*. Without this + # the model tends to re-execute the task from scratch + # because the task name reads like an imperative. + # - `display_message` is what the user sees in chat — the + # short, friendly "Task '' resumed by user." line. + llm_message = ( + f"Task '{task.name}' was previously {prior_status} and the user " + f"has now reopened it to continue. Do NOT repeat this task's " + f"full prior history. Do NOT call task_end immediately. " + f"Review the history, decide whether the task is incomplete and " + f"requires continuation or whether the user's intent has shifted, " + f"and act on that. If the task was previously completed, you MUST " + f"ask the user for their intent FIRST before taking any action." + ) + agent.event_stream_manager.log( + "system", + llm_message, + display_message=f"Task '{task.name}' resumed by user.", + task_id=task_id, + ) + if message.strip(): + agent.state_manager.record_user_message( + message.strip(), + session_id=task_id, + ) + + # Flip the action panel row back to running so the UI reflects + # the new state in both surfaces. + for item in self._action_panel._items: + if item.id == task_id: + item.status = "running" + item.completed_at = None + item.error_message = None + self._action_panel._persist_item(item) + await self._broadcast( + { + "type": "action_update", + "data": { + "id": task_id, + "status": "running", + "duration": None, + "error": None, + }, + } + ) + break + + # Enqueue a trigger so the react loop picks up the task. We use + # complex-task priority (7) for non-simple tasks, matching what + # _create_new_trigger does post-action. + is_simple = getattr(task, "mode", "complex") == "simple" + resume_priority = 5 if is_simple else 7 + await agent.triggers.put( + Trigger( + fire_at=_time.time(), + priority=resume_priority, + next_action_description=( + "Task was resumed by the user. Review the event stream " + "history. Do NOT call task_end immediately. If the task " + "was previously completed, you MUST ask the user for " + "their intent FIRST before taking any action." + ), + session_id=task_id, + payload={"gui_mode": STATE.gui_mode}, + ), + skip_merge=True, + ) + + await self._broadcast( + { + "type": "task_resume_response", + "data": { + "taskId": task_id, + "success": True, + "status": "running", + }, + } + ) + except Exception as e: + logger.warning(f"[task_resume] Failed to resume {task_id}: {e}") + await self._broadcast( + { + "type": "task_resume_response", + "data": { + "taskId": task_id, + "success": False, + "error": str(e), + }, + } + ) + + async def _handle_task_complete(self, task_id: str) -> None: + """Mark a running task as completed at the user's request.""" + try: + agent = self._controller.agent + task_manager = agent.task_manager + + task = ( + task_manager.get_task_by_id(task_id) if task_id else task_manager.active + ) + if not task: + await self._broadcast( + { + "type": "task_complete_response", + "data": { + "taskId": task_id, + "success": False, + "error": "Task not found", + }, + } + ) + return + + await task_manager.mark_task_completed( + message="Marked completed by user", + task_id=task.id, + ) + + await self._broadcast( + { + "type": "task_complete_response", + "data": { + "taskId": task.id, + "success": True, + "status": "completed", + }, + } + ) + except Exception as e: + await self._broadcast( + { + "type": "task_complete_response", + "data": { + "taskId": task_id, + "success": False, + "error": str(e), + }, + } + ) + async def _handle_option_click( self, value: str, session_id: str, message_id: str ) -> None: @@ -4659,6 +5014,8 @@ async def _handle_model_settings_update(self, data: Dict[str, Any]) -> None: try: new_provider = data.get("llmProvider") vlm_provider = data.get("vlmProvider") + image_gen_provider = data.get("imageGenProvider") + video_gen_provider = data.get("videoGenProvider") api_key = data.get("apiKey") provider_for_key = data.get("providerForKey") base_url = data.get("baseUrl") @@ -4717,12 +5074,41 @@ async def _handle_model_settings_update(self, data: Dict[str, Any]) -> None: ) return + # Capture the current image-gen provider/model BEFORE saving, so a + # failed reinitialize below can roll the persisted values back and + # keep settings.json consistent with the still-live interface. + prev_image_gen_provider = None + prev_image_gen_model = None + if image_gen_provider: + from app.config import ( + get_image_gen_provider as _get_ig_provider, + get_image_gen_model as _get_ig_model, + ) + + prev_image_gen_provider = _get_ig_provider() + prev_image_gen_model = _get_ig_model() + + prev_video_gen_provider = None + prev_video_gen_model = None + if video_gen_provider: + from app.config import ( + get_video_gen_provider as _get_vg_provider, + get_video_gen_model as _get_vg_model, + ) + + prev_video_gen_provider = _get_vg_provider() + prev_video_gen_model = _get_vg_model() + # Step 3: Now save settings (validation and connection test passed) result = update_model_settings( llm_provider=new_provider, vlm_provider=vlm_provider, + image_gen_provider=image_gen_provider, + video_gen_provider=video_gen_provider, llm_model=data.get("llmModel"), vlm_model=data.get("vlmModel"), + image_gen_model=data.get("imageGenModel"), + video_gen_model=data.get("videoGenModel"), api_key=api_key, provider_for_key=provider_for_key, base_url=base_url, @@ -4744,6 +5130,65 @@ async def _handle_model_settings_update(self, data: Dict[str, Any]) -> None: f"Settings saved but LLM reinitialization failed: {e}" ) + # Reinitialize image gen interface when its provider changes. + # Settings are already persisted above, and reinitialize_image_gen + # only swaps the live interface on success — so if it fails (e.g. + # the new provider has no API key) we must roll the saved image-gen + # provider/model back to match the still-live interface. Otherwise + # settings.json would advertise a provider the running interface + # can't serve. + if result.get("success") and image_gen_provider: + reinit_ok = False + try: + agent = self._controller.agent + reinit_ok = agent.reinitialize_image_gen(image_gen_provider) + except Exception as e: + logger.warning(f"[BROWSER] Failed to reinitialize image gen: {e}") + + if reinit_ok: + logger.info( + f"[BROWSER] Image gen reinitialized with provider: {image_gen_provider}" + ) + else: + # Roll persisted image-gen settings back to the previous + # (still-live) values to avoid a settings/interface mismatch. + update_model_settings( + image_gen_provider=prev_image_gen_provider, + image_gen_model=prev_image_gen_model, + ) + msg = ( + f"Image generation provider '{image_gen_provider}' could not be " + f"initialized — check its API key. Kept '{prev_image_gen_provider}'." + ) + logger.warning(f"[BROWSER] {msg}") + result["warning"] = result.get("warning") or msg + + # Reinitialize video gen interface when its provider changes. + # Mirrors the image gen pattern: roll back on reinit failure. + if result.get("success") and video_gen_provider: + reinit_vid_ok = False + try: + agent = self._controller.agent + reinit_vid_ok = agent.reinitialize_video_gen(video_gen_provider) + except Exception as e: + logger.warning(f"[BROWSER] Failed to reinitialize video gen: {e}") + + if reinit_vid_ok: + logger.info( + f"[BROWSER] Video gen reinitialized with provider: {video_gen_provider}" + ) + else: + update_model_settings( + video_gen_provider=prev_video_gen_provider, + video_gen_model=prev_video_gen_model, + ) + msg = ( + f"Video generation provider '{video_gen_provider}' could not be " + f"initialized — check its API key. Kept '{prev_video_gen_provider}'." + ) + logger.warning(f"[BROWSER] {msg}") + result["warning"] = result.get("warning") or msg + await self._broadcast( { "type": "model_settings_update", @@ -5915,22 +6360,69 @@ async def _handle_marketplace_install( ) return + # Spawn a placeholder tab immediately so the user sees the install is + # underway (the install itself is synchronous and can take a while). + # install_from_marketplace adopts this id so the same tab becomes the + # running app. + placeholder = self._living_ui_manager.create_placeholder_project( + app_name, app_description + ) + project_id = placeholder.id + await self.broadcast_living_ui_created(placeholder.to_dict()) + await self._broadcast( + { + "type": "living_ui_status", + "data": { + "projectId": project_id, + "phase": "initializing", + "progress": 10, + "message": "Installing from marketplace...", + }, + } + ) + result = await self._living_ui_manager.install_from_marketplace( app_id=app_id, app_name=app_name, app_description=app_description, custom_fields=custom_fields, + project_id=project_id, ) if result.get("status") == "success": - # Also broadcast as living_ui_create so the sidebar updates + # The project already exists as a tab (placeholder adopted) — flip + # it to running so the iframe loads. await self._broadcast( { - "type": "living_ui_create", + "type": "living_ui_ready", "data": { - "success": True, - "projectId": result["project"]["id"], - "project": result["project"], + "projectId": project_id, + "url": result.get("url"), + "port": result["project"].get("port"), + }, + } + ) + + # Mirror the install into chat as a system message so the request + # is visible in the conversation (not just the new tab). + body = f"{app_description}\n\n" if app_description else "" + try: + await self._display_chat_message( + "System", + f"**Living UI: {app_name}**\n\n{body}" + "Installed from the marketplace — open it in the new tab.", + "system", + ) + except Exception as e: + logger.debug(f"[LIVING_UI] marketplace chat message failed: {e}") + else: + # Install failed — surface the error on the spawned tab. + await self._broadcast( + { + "type": "living_ui_error", + "data": { + "projectId": project_id, + "error": result.get("error", "Marketplace install failed"), }, } ) @@ -5938,7 +6430,7 @@ async def _handle_marketplace_install( await self._broadcast( { "type": "living_ui_marketplace_install", - "data": {**result, "appId": app_id}, + "data": {**result, "projectId": project_id, "appId": app_id}, } ) @@ -5949,13 +6441,39 @@ async def _handle_living_ui_import(self, source: str, name: str) -> None: is_zip = source.lower().endswith(".zip") + # Spawn a placeholder tab immediately so the user sees the import is + # underway (mirrors the form-create flow). The importer skill adopts + # this project_id so the same tab transitions to the running app. + placeholder = self._living_ui_manager.create_placeholder_project(name) + project_id = placeholder.id + await self.broadcast_living_ui_created(placeholder.to_dict()) + await self._broadcast( + { + "type": "living_ui_status", + "data": { + "projectId": project_id, + "phase": "initializing", + "progress": 10, + "message": "Importing project...", + }, + } + ) + + adopt_note = ( + f"A tab has already been created for this import with " + f'project_id="{project_id}". You MUST pass project_id="{project_id}" ' + f"to the import action so it populates that existing tab instead of " + f"creating a duplicate.\n\n" + ) + if is_zip: task_instruction = ( f"Import this Living UI project from a ZIP file:\n" f"ZIP path: {source}\n" f"Name: {name}\n\n" + f"{adopt_note}" f"Steps:\n" - f"1. Call living_ui_import_zip to extract and register the project\n" + f'1. Call living_ui_import_zip (project_id="{project_id}") to extract and register the project\n' f"2. Review the project structure and manifest\n" f"3. Install dependencies if needed\n" f"4. Launch the app and verify it works\n" @@ -5966,11 +6484,12 @@ async def _handle_living_ui_import(self, source: str, name: str) -> None: f"Import this external app as a Living UI:\n" f"Source: {source}\n" f"Name: {name}\n\n" + f"{adopt_note}" f"Follow the living-ui-importer skill instructions:\n" f"1. Clone/copy the source code\n" f"2. Detect the app type (Go, Node, Python, etc.) — NEVER use Docker if native build is possible\n" f"3. Determine build/install command, start command, port config, and health check\n" - f"4. Call living_ui_import_external with the detected configuration\n" + f'4. Call living_ui_import_external with the detected configuration and project_id="{project_id}"\n' f"5. Launch the app and verify it works\n" f"6. Create LIVING_UI.md documenting the app" ) @@ -5987,6 +6506,10 @@ async def _handle_living_ui_import(self, source: str, name: str) -> None: from app.trigger import Trigger import time + # Link the task to the placeholder so question-mirroring and todo + # broadcasts (keyed by task id) target this tab. + self._living_ui_manager.set_project_task(project_id, task_id) + trigger = Trigger( fire_at=time.time(), priority=50, @@ -5995,6 +6518,30 @@ async def _handle_living_ui_import(self, source: str, name: str) -> None: payload={"type": "living_ui_import", "source": source}, ) await self._controller.agent.triggers.put(trigger) + else: + # Couldn't create the task — don't leave a stuck "creating" tab. + await self._broadcast( + { + "type": "living_ui_error", + "data": { + "projectId": project_id, + "error": "Failed to create import task", + }, + } + ) + + # Mirror the import into chat as a system message so the request is + # visible in the conversation (not just the new tab). + origin = "uploaded ZIP file" if is_zip else source + try: + await self._display_chat_message( + "System", + f"**Living UI: {name}**\n\nImporting from {origin}.\n\n" + "Setting up your app now — track progress in the new tab.", + "system", + ) + except Exception as e: + logger.debug(f"[LIVING_UI] import chat message failed: {e}") await self._broadcast( { @@ -7452,10 +7999,9 @@ async def _agent_profile_picture_handler( from aiohttp import web from app.ui_layer.settings.general_settings import ( - AGENT_PROFILE_DIR, - AGENT_PROFILE_DEFAULT_FILENAME, EXT_TO_MIME, _user_profile_picture_path, + get_default_picture_path, ) from app.onboarding import onboarding_manager @@ -7470,10 +8016,10 @@ async def _agent_profile_picture_handler( mime_type = EXT_TO_MIME.get(ext.lower(), "application/octet-stream") if target is None: - default_path = AGENT_PROFILE_DIR / AGENT_PROFILE_DEFAULT_FILENAME - if default_path.exists(): - target = default_path - mime_type = "image/png" + # Falls back to the bundled default (sys._MEIPASS) when the per-user + # data dir lacks it — e.g. the packaged macOS app (issue #254). + target = get_default_picture_path() + mime_type = "image/png" if target is None: raise web.HTTPNotFound(reason="Avatar not available") diff --git a/app/ui_layer/browser/frontend/src/components/layout/NavBar.tsx b/app/ui_layer/browser/frontend/src/components/layout/NavBar.tsx index 6b3457ad..c0e05a26 100644 --- a/app/ui_layer/browser/frontend/src/components/layout/NavBar.tsx +++ b/app/ui_layer/browser/frontend/src/components/layout/NavBar.tsx @@ -167,7 +167,7 @@ export function NavBar() { title={project.name} > - {project.status === 'creating' + {project.status === 'creating' || project.status === 'launching' || project.status === 'stopping' ? : } @@ -215,8 +215,9 @@ export function NavBar() { isOpen={showCreateModal} onClose={() => setShowCreateModal(false)} onSubmit={handleCreateSubmit} - onInstalled={(projectId) => navigate(`/living-ui/${projectId}`)} /> + {/* No onInstalled/navigate: marketplace installs just spawn a tab in the + navbar (like form-create) — the user opens it themselves. */} ) } diff --git a/app/ui_layer/browser/frontend/src/contexts/WebSocketContext.tsx b/app/ui_layer/browser/frontend/src/contexts/WebSocketContext.tsx index 96eeb1aa..a5fb608b 100644 --- a/app/ui_layer/browser/frontend/src/contexts/WebSocketContext.tsx +++ b/app/ui_layer/browser/frontend/src/contexts/WebSocketContext.tsx @@ -30,12 +30,16 @@ import { import { setLoadingOlder as tasksSetLoadingOlder, setCancellingTaskId as tasksSetCancellingTaskId, + setCompletingTaskId as tasksSetCompletingTaskId, + setResumingTaskId as tasksSetResumingTaskId, } from '../store/slices/tasksSlice' import { selectAllActions, selectHasMoreActions, selectLoadingOlderActions, selectCancellingTaskId, + selectCompletingTaskId, + selectResumingTaskId, selectOldestTaskCreatedAt, } from '../store/selectors/tasks' import { @@ -61,6 +65,8 @@ import { import { selectLocalLlm } from '../store/selectors/localLlm' import { setActiveId as livingUiSetActiveId, + markLaunching as livingUiMarkLaunching, + markStopping as livingUiMarkStopping, } from '../store/slices/livingUiSlice' import { selectLivingUiProjects, @@ -141,6 +147,8 @@ interface WebSocketContextType extends WebSocketState { hasMoreActions: boolean loadingOlderActions: boolean cancellingTaskId: string | null + completingTaskId: string | null + resumingTaskId: string | null // Slice-backed (dashboardSlice). dashboardMetrics: DashboardMetrics | null filteredMetricsCache: Record @@ -171,6 +179,8 @@ interface WebSocketContextType extends WebSocketState { sendCommand: (command: string) => void clearMessages: () => void cancelTask: (taskId: string) => void + completeTask: (taskId: string) => void + resumeTask: (taskId: string, message?: string) => void openFile: (path: string) => void openFolder: (path: string) => void requestFilteredMetrics: (period: MetricsTimePeriod) => void @@ -250,6 +260,8 @@ export function WebSocketProvider({ children }: { children: ReactNode }) { const hasMoreActions = useAppSelector(selectHasMoreActions) const loadingOlderActions = useAppSelector(selectLoadingOlderActions) const cancellingTaskId = useAppSelector(selectCancellingTaskId) + const completingTaskId = useAppSelector(selectCompletingTaskId) + const resumingTaskId = useAppSelector(selectResumingTaskId) const oldestTaskCreatedAt = useAppSelector(selectOldestTaskCreatedAt) const dashboardMetrics = useAppSelector(selectDashboardMetrics) const filteredMetricsCache = useAppSelector(selectFilteredMetricsCache) @@ -415,6 +427,24 @@ export function WebSocketProvider({ children }: { children: ReactNode }) { } }, [dispatch]) + const completeTask = useCallback((taskId: string) => { + if (client.isConnected) { + dispatch(tasksSetCompletingTaskId(taskId)) + client.sendString(JSON.stringify({ type: 'task_complete', taskId })) + } + }, [dispatch]) + + const resumeTask = useCallback((taskId: string, message?: string) => { + if (client.isConnected) { + dispatch(tasksSetResumingTaskId(taskId)) + client.sendString(JSON.stringify({ + type: 'task_resume', + taskId, + message: message || '', + })) + } + }, [dispatch]) + const sendOptionClick = useCallback((value: string, sessionId?: string, messageId?: string) => { // Optimistically record the selection in local state so the UI lock // survives virtualizer remounts, WS reconnects, and parent re-renders @@ -596,24 +626,29 @@ export function WebSocketProvider({ children }: { children: ReactNode }) { const launchLivingUI = useCallback((projectId: string) => { if (client.isConnected) { - // The backend response (living_ui_launch) will flip status to running. - // No optimistic transition here — the existing 'launching' literal - // wasn't part of LivingUIStatus and was a no-op for the UI. + // Optimistically flip to 'launching' so the button shows a spinner and + // the content swaps to the launching screen immediately — launch can + // take many seconds (install/build/start). The backend response + // (living_ui_launch) resolves it to running or error. + dispatch(livingUiMarkLaunching({ projectId })) client.sendString(JSON.stringify({ type: 'living_ui_launch', projectId, })) } - }, []) + }, [dispatch]) const stopLivingUI = useCallback((projectId: string) => { if (client.isConnected) { + // Optimistically flip to 'stopping' for immediate feedback; the backend + // response (living_ui_stop) resolves it to stopped (or reverts on error). + dispatch(livingUiMarkStopping({ projectId })) client.sendString(JSON.stringify({ type: 'living_ui_stop', projectId, })) } - }, []) + }, [dispatch]) const deleteLivingUI = useCallback((projectId: string) => { if (client.isConnected) { @@ -641,6 +676,8 @@ export function WebSocketProvider({ children }: { children: ReactNode }) { hasMoreActions, loadingOlderActions, cancellingTaskId, + completingTaskId, + resumingTaskId, dashboardMetrics, filteredMetricsCache, onboardingStep, @@ -665,6 +702,8 @@ export function WebSocketProvider({ children }: { children: ReactNode }) { sendCommand, clearMessages, cancelTask, + completeTask, + resumeTask, openFile, openFolder, requestFilteredMetrics, diff --git a/app/ui_layer/browser/frontend/src/pages/Chat/ChatPage.module.css b/app/ui_layer/browser/frontend/src/pages/Chat/ChatPage.module.css index db85a730..a16c697e 100644 --- a/app/ui_layer/browser/frontend/src/pages/Chat/ChatPage.module.css +++ b/app/ui_layer/browser/frontend/src/pages/Chat/ChatPage.module.css @@ -419,6 +419,40 @@ background: var(--color-error-light); } +/* Task mark-complete button - same hover-reveal pattern as cancel */ +.taskCompleteBtn { + opacity: 0; + flex-shrink: 0; + color: var(--text-muted); + transition: opacity var(--transition-fast), color var(--transition-fast); +} + +.taskItem:hover .taskCompleteBtn { + opacity: 1; +} + +.taskCompleteBtn:hover { + color: var(--color-success); + background: var(--color-success-light); +} + +/* Task resume button - shown on hover for terminal (ended) tasks */ +.taskResumeBtn { + opacity: 0; + flex-shrink: 0; + color: var(--text-muted); + transition: opacity var(--transition-fast), color var(--transition-fast); +} + +.taskItem:hover .taskResumeBtn { + opacity: 1; +} + +.taskResumeBtn:hover { + color: var(--color-success); + background: var(--color-success-light); +} + .spinning { animation: spin 1s linear infinite; } diff --git a/app/ui_layer/browser/frontend/src/pages/Chat/ChatPage.tsx b/app/ui_layer/browser/frontend/src/pages/Chat/ChatPage.tsx index d33ba4ae..00e20451 100644 --- a/app/ui_layer/browser/frontend/src/pages/Chat/ChatPage.tsx +++ b/app/ui_layer/browser/frontend/src/pages/Chat/ChatPage.tsx @@ -1,5 +1,5 @@ import React, { useState, useRef, useEffect, useCallback, useMemo } from 'react' -import { X, Loader2, Reply } from 'lucide-react' +import { Check, X, Loader2, Reply, RotateCw } from 'lucide-react' import { useWebSocket } from '../../contexts/WebSocketContext' import { IconButton, StatusIndicator } from '../../components/ui' import { Chat } from '../../components/Chat' @@ -19,6 +19,10 @@ export function ChatPage() { messages, cancelTask, cancellingTaskId, + completeTask, + completingTaskId, + resumeTask, + resumingTaskId, setReplyTarget, loadOlderActions, hasMoreActions, @@ -167,6 +171,24 @@ export function ChatPage() { icon={} /> )} + { + e.stopPropagation() + completeTask(task.id) + }} + disabled={completingTaskId === task.id || cancellingTaskId === task.id} + title="Mark Task Complete" + icon={ + completingTaskId === task.id ? ( + + ) : ( + + ) + } + /> )} + {(task.status === 'completed' || task.status === 'cancelled' || task.status === 'error') && ( + { + e.stopPropagation() + resumeTask(task.id) + }} + disabled={resumingTaskId === task.id} + title="Continue Task" + icon={ + resumingTaskId === task.id ? ( + + ) : ( + + ) + } + /> + )} {isExpanded && (
diff --git a/app/ui_layer/browser/frontend/src/pages/Dashboard/DashboardPage.tsx b/app/ui_layer/browser/frontend/src/pages/Dashboard/DashboardPage.tsx index 3a5dd717..8962443d 100644 --- a/app/ui_layer/browser/frontend/src/pages/Dashboard/DashboardPage.tsx +++ b/app/ui_layer/browser/frontend/src/pages/Dashboard/DashboardPage.tsx @@ -12,10 +12,10 @@ import { Timer, PlayCircle, Hammer, - Wrench, Bot, Building2, - Hash + Hash, + Globe } from 'lucide-react' import { useWebSocket } from '../../contexts/WebSocketContext' import { Badge, StatusIndicator } from '../../components/ui' @@ -113,9 +113,10 @@ export function DashboardPage() { const [tokenPeriod, setTokenPeriod] = useState('total') const [usagePeriod, setUsagePeriod] = useState('total') - // Expand/collapse state for top tools/skills lists + // Expand/collapse state for top tools/skills/integrations lists const [showAllTools, setShowAllTools] = useState(false) const [showAllSkills, setShowAllSkills] = useState(false) + const [showAllIntegrations, setShowAllIntegrations] = useState(false) // Request filtered metrics when period changes (for all periods including 'total') const handlePeriodChange = useCallback(( @@ -139,10 +140,10 @@ export function DashboardPage() { // Request 'total' metrics on initial load useEffect(() => { - if (!filteredMetricsCache['total']) { + if (connected && !filteredMetricsCache['total']) { requestFilteredMetrics('total') } - }, [requestFilteredMetrics, filteredMetricsCache]) + }, [connected, requestFilteredMetrics, filteredMetricsCache]) // Calculate statistics from actions const tasks = useMemo(() => actions.filter(a => a.itemType === 'task'), [actions]) @@ -218,6 +219,11 @@ export function DashboardPage() { const skillTotalInvocations = metrics?.skill?.totalInvocations ?? 0 const topSkills = metrics?.skill?.topSkills ?? [] + // Integration metrics + const integrationConnected = metrics?.integration?.connectedIntegrations ?? 0 + const integrationTotalCalls = metrics?.integration?.totalCalls ?? 0 + const topIntegrations = metrics?.integration?.topIntegrations ?? [] + // Model metrics const modelProvider = metrics?.model?.provider ?? '' const modelId = metrics?.model?.modelId ?? '' @@ -575,6 +581,52 @@ export function DashboardPage() {
+ {/* Integrations Panel */} +
+
+ +

Integrations

+
+
+
+
+ + {integrationConnected} + Connected +
+
+ + {integrationTotalCalls} + Total Calls +
+
+
+
Top Integrations
+ {topIntegrations.length > 0 ? ( +
+ {(showAllIntegrations ? topIntegrations : topIntegrations.slice(0, 3)).map((intg, index) => ( +
+ #{index + 1} + {intg.name} + {intg.count} +
+ ))} + {topIntegrations.length > 3 && ( + + )} +
+ ) : ( +
No usage yet
+ )} +
+
+
+ {/* Model Information Panel */}
diff --git a/app/ui_layer/browser/frontend/src/pages/LivingUI/CreationQuestionForm.module.css b/app/ui_layer/browser/frontend/src/pages/LivingUI/CreationQuestionForm.module.css new file mode 100644 index 00000000..c8e5a041 --- /dev/null +++ b/app/ui_layer/browser/frontend/src/pages/LivingUI/CreationQuestionForm.module.css @@ -0,0 +1,155 @@ +/* Living UI creation question form — shown when the agent asks a question + (send_message with wait_for_user_reply) during creation. */ + +.surface { + flex: 1; + display: flex; + align-items: center; + justify-content: center; + height: 100%; + width: 100%; + padding: var(--space-4); + background: var(--bg-primary); + overflow-y: auto; +} + +.questionWrap { + width: 100%; + max-width: 560px; +} + +.questionCard { + display: flex; + flex-direction: column; + gap: var(--space-3); + padding: var(--space-4); + background: var(--bg-secondary); + border: 1px solid var(--border-primary); + border-radius: var(--radius-lg); +} + +.brandRow { + display: flex; + align-items: center; + gap: var(--space-2); +} + +.brandIcon { + color: var(--color-primary); +} + +.buildingTitle { + font-size: var(--text-lg); + font-weight: var(--font-semibold); + color: var(--text-primary); +} + +.questionPrompt { + margin: 0; + font-size: var(--text-sm); + color: var(--text-secondary); + line-height: 1.5; +} + +.questionList { + display: flex; + flex-direction: column; + gap: var(--space-4); +} + +.questionField { + display: flex; + flex-direction: column; + gap: var(--space-2); +} + +.questionLabel { + display: flex; + align-items: flex-start; + gap: var(--space-2); + font-size: var(--text-sm); + color: var(--text-primary); + line-height: 1.4; +} + +.questionNum { + flex-shrink: 0; + display: inline-flex; + align-items: center; + justify-content: center; + width: 20px; + height: 20px; + border-radius: var(--radius-full); + background: var(--color-primary-subtle); + color: var(--color-primary); + font-size: var(--text-xs); + font-weight: var(--font-semibold); +} + +.questionInput { + width: 100%; + box-sizing: border-box; + padding: var(--space-3); + background: var(--bg-primary); + border: 1px solid var(--border-primary); + border-radius: var(--radius-md); + color: var(--text-primary); + font-family: inherit; + font-size: var(--text-sm); + line-height: 1.5; + resize: vertical; + transition: border-color var(--transition-fast); +} + +.questionInput::placeholder { + color: var(--text-muted); +} + +.questionInput:focus { + outline: none; + border-color: var(--color-primary); +} + +.questionTrailer { + margin: 0; + font-size: var(--text-sm); + color: var(--text-secondary); + line-height: 1.5; +} + +.questionActions { + display: flex; + align-items: center; + justify-content: space-between; + gap: var(--space-3); + margin-top: var(--space-1); +} + +.questionHint { + font-size: var(--text-xs); + color: var(--text-muted); +} + +.questionSubmit { + display: inline-flex; + align-items: center; + gap: var(--space-2); + padding: var(--space-2) var(--space-4); + background: var(--color-primary); + border: none; + border-radius: var(--radius-md); + color: #fff; + font-size: var(--text-sm); + font-weight: var(--font-semibold); + cursor: pointer; + transition: background var(--transition-fast); +} + +.questionSubmit:hover:not(:disabled) { + background: var(--color-primary-hover); +} + +.questionSubmit:disabled { + opacity: 0.5; + cursor: not-allowed; +} diff --git a/app/ui_layer/browser/frontend/src/pages/LivingUI/CreationQuestionForm.tsx b/app/ui_layer/browser/frontend/src/pages/LivingUI/CreationQuestionForm.tsx new file mode 100644 index 00000000..3c754bef --- /dev/null +++ b/app/ui_layer/browser/frontend/src/pages/LivingUI/CreationQuestionForm.tsx @@ -0,0 +1,144 @@ +import { useMemo, useState, type KeyboardEvent as ReactKeyboardEvent } from 'react' +import { ArrowRight, MessagesSquare } from 'lucide-react' +import styles from './CreationQuestionForm.module.css' + +interface Props { + projectName: string + message: string + onAnswer?: (text: string) => void +} + +interface ParsedQuestions { + preamble: string + items: { label: string; text: string }[] + trailer: string +} + +/** + * Split an agent question message into its parts. A numbered list ("1. … 2. …") + * becomes one field per question; preamble/trailer are the framing text around + * it. A message with no numbered list parses to zero items (single-box mode). + */ +function parseQuestions(message: string): ParsedQuestions { + const preamble: string[] = [] + const items: { label: string; text: string }[] = [] + const trailer: string[] = [] + let current: { label: string; text: string } | null = null + let seen = false + for (const raw of message.split('\n')) { + const m = raw.match(/^\s*(\d+)[.)]\s+(.*)$/) + if (m) { + seen = true + current = { label: m[1], text: m[2].trim() } + items.push(current) + } else if (!seen) { + if (raw.trim()) preamble.push(raw.trim()) + } else if (raw.trim() === '') { + current = null // a blank line ends the current question; rest is trailer + } else if (current) { + current.text += ' ' + raw.trim() // wrapped continuation of a question + } else { + trailer.push(raw.trim()) + } + } + return { preamble: preamble.join(' '), items, trailer: trailer.join(' ') } +} + +/** + * Form shown on the Living UI creation screen when the agent asks a question + * (a send_message with wait_for_user_reply). Mirrors the chat question so the + * user can answer with the chat panel closed. The answer is sent back through + * the normal reply path, resuming the task — so answering here or in chat are + * equivalent (whichever lands first wins). + */ +export function CreationQuestionForm({ projectName, message, onAnswer }: Props) { + const parsed = useMemo(() => parseQuestions(message), [message]) + const multi = parsed.items.length > 0 + const [single, setSingle] = useState('') + const [answers, setAnswers] = useState(() => parsed.items.map(() => '')) + + const canSend = multi ? answers.some(a => a.trim()) : single.trim().length > 0 + + const submit = () => { + if (multi) { + // Recombine into the numbered reply the agent expects (e.g. "1. …\n2. …"). + const parts = parsed.items + .map((q, i) => ({ label: q.label, a: answers[i].trim() })) + .filter(x => x.a) + if (!parts.length) return + onAnswer?.(parts.map(x => `${x.label}. ${x.a}`).join('\n')) + } else { + const t = single.trim() + if (!t) return + onAnswer?.(t) + } + } + + const onKey = (e: ReactKeyboardEvent) => { + if (e.key === 'Enter' && (e.metaKey || e.ctrlKey)) { + e.preventDefault() + submit() + } + } + + return ( +
+
+
+
+ + Building {projectName} +
+ + {multi ? ( + <> + {parsed.preamble &&

{parsed.preamble}

} +
+ {parsed.items.map((q, i) => ( +
+ +