From 65a1f672ca7de6a091866438791838e61f9716ae Mon Sep 17 00:00:00 2001 From: aimanmalib <84276911+aimanmalib@users.noreply.github.com> Date: Fri, 5 Jun 2026 07:21:01 +0000 Subject: [PATCH 1/6] refactor(core): make LLM backend provider-agnostic Introduce generic LLMConfig + LLMClient supporting any OpenAI-compatible endpoint (OpenAI, OpenRouter, Ollama, MiMo, llama.cpp) with bearer or api-key auth. MiMoConfig/MiMoClient kept as backward-compat aliases. All 96 existing tests still pass. --- src/contentforge/core/config.py | 123 +++++++++++-- src/contentforge/core/llm_client.py | 254 +++++++++++++++++++++++++ src/contentforge/core/mimo_client.py | 265 +++------------------------ 3 files changed, 382 insertions(+), 260 deletions(-) create mode 100644 src/contentforge/core/llm_client.py diff --git a/src/contentforge/core/config.py b/src/contentforge/core/config.py index 24b0bd6..11f1b59 100644 --- a/src/contentforge/core/config.py +++ b/src/contentforge/core/config.py @@ -1,4 +1,10 @@ -"""Configuration management for ContentForge pipeline.""" +"""Configuration management for ContentForge pipeline. + +ContentForge speaks the OpenAI-compatible ``/chat/completions`` protocol, so it +works with any provider that exposes that API: OpenAI, OpenRouter, Ollama, +local llama.cpp servers, Xiaomi MiMo Token Plan, and more. Pick a provider via +``LLMConfig(provider=...)`` or point ``base_url`` at any compatible endpoint. +""" from __future__ import annotations @@ -7,18 +13,57 @@ from typing import Optional import yaml -from pydantic import BaseModel, Field - - -class MiMoConfig(BaseModel): - """MiMo API connection settings.""" - api_key: str = Field(default_factory=lambda: os.environ.get("MIMO_API_KEY", "")) - base_url: str = Field( - default_factory=lambda: os.environ.get( - "MIMO_BASE_URL", "https://token-plan-sgp.xiaomimimo.com/v1" - ) - ) - model: str = "mimo-v2.5-pro" +from pydantic import BaseModel, Field, model_validator + +# Provider presets: base_url, auth header style, default model, and the env +# vars used to populate api_key / base_url when they aren't set explicitly. +# auth_style is "bearer" (Authorization: Bearer) or "api-key" (api-key header). +PROVIDER_PRESETS: dict[str, dict[str, str]] = { + "mimo": { + "base_url": "https://token-plan-sgp.xiaomimimo.com/v1", + "auth_style": "api-key", + "model": "mimo-v2.5-pro", + "env_key": "MIMO_API_KEY", + "env_base": "MIMO_BASE_URL", + }, + "openai": { + "base_url": "https://api.openai.com/v1", + "auth_style": "bearer", + "model": "gpt-4o-mini", + "env_key": "OPENAI_API_KEY", + "env_base": "OPENAI_BASE_URL", + }, + "openrouter": { + "base_url": "https://openrouter.ai/api/v1", + "auth_style": "bearer", + "model": "openai/gpt-4o-mini", + "env_key": "OPENROUTER_API_KEY", + "env_base": "OPENROUTER_BASE_URL", + }, + "ollama": { + "base_url": "http://localhost:11434/v1", + "auth_style": "bearer", + "model": "llama3.1", + "env_key": "OLLAMA_API_KEY", + "env_base": "OLLAMA_BASE_URL", + }, +} + +DEFAULT_PROVIDER = "mimo" + + +class LLMConfig(BaseModel): + """Connection settings for any OpenAI-compatible chat completions endpoint. + + Empty ``api_key`` / ``base_url`` / ``model`` / ``auth_style`` fields are + resolved from the selected provider preset (and its env vars) after init. + """ + + provider: str = DEFAULT_PROVIDER + api_key: str = "" + base_url: str = "" + model: str = "" + auth_style: str = "" # "bearer" | "api-key" — resolved from provider if blank max_tokens: int = 4096 temperature: float = 0.7 top_p: float = 0.9 @@ -26,12 +71,38 @@ class MiMoConfig(BaseModel): max_retries: int = 3 retry_delay: float = 1.0 + @model_validator(mode="after") + def _resolve_provider_defaults(self) -> "LLMConfig": + preset = PROVIDER_PRESETS.get(self.provider, PROVIDER_PRESETS[DEFAULT_PROVIDER]) + if not self.api_key: + self.api_key = os.environ.get(preset["env_key"], "") + if not self.base_url: + self.base_url = os.environ.get(preset["env_base"], preset["base_url"]) + if not self.model: + self.model = preset["model"] + if not self.auth_style: + self.auth_style = preset["auth_style"] + return self + @property def headers(self) -> dict[str, str]: - return { - "api-key": self.api_key, # MiMo uses api-key, NOT Authorization: Bearer - "Content-Type": "application/json", - } + """Auth + content headers, matching the provider's expected auth style.""" + headers = {"Content-Type": "application/json"} + if self.auth_style == "bearer": + headers["Authorization"] = f"Bearer {self.api_key}" + else: + headers["api-key"] = self.api_key + return headers + + +class MiMoConfig(LLMConfig): + """Backward-compatible alias defaulting to the Xiaomi MiMo Token Plan API. + + Retained so existing configs/tests keep working. New code should prefer + :class:`LLMConfig` with an explicit ``provider``. + """ + + provider: str = "mimo" class PipelineConfig(BaseModel): @@ -60,13 +131,27 @@ class AgentConfig(BaseModel): class ContentForgeConfig(BaseModel): """Root configuration.""" - mimo: MiMoConfig = Field(default_factory=MiMoConfig) + llm: LLMConfig = Field(default_factory=LLMConfig) pipeline: PipelineConfig = Field(default_factory=PipelineConfig) agents: list[AgentConfig] = Field(default_factory=list) log_level: str = "INFO" output_dir: str = "./output" cache_dir: str = "./.cache" + @model_validator(mode="before") + @classmethod + def _accept_legacy_mimo_key(cls, data): + """Map a legacy top-level ``mimo:`` block onto ``llm`` for old configs.""" + if isinstance(data, dict) and "mimo" in data and "llm" not in data: + data = dict(data) + data["llm"] = data.pop("mimo") + return data + + @property + def mimo(self) -> LLMConfig: + """Deprecated alias for :attr:`llm` (kept for backward compatibility).""" + return self.llm + @classmethod def from_yaml(cls, path: str | Path) -> "ContentForgeConfig": with open(path) as f: @@ -75,7 +160,7 @@ def from_yaml(cls, path: str | Path) -> "ContentForgeConfig": @classmethod def from_env(cls) -> "ContentForgeConfig": - return cls(mimo=MiMoConfig()) + return cls(llm=LLMConfig()) def get_agent_config(self, name: str) -> AgentConfig: for agent in self.agents: diff --git a/src/contentforge/core/llm_client.py b/src/contentforge/core/llm_client.py new file mode 100644 index 0000000..94fd1df --- /dev/null +++ b/src/contentforge/core/llm_client.py @@ -0,0 +1,254 @@ +"""OpenAI-compatible chat completions client with streaming, retries, token tracking. + +Works with any provider that speaks the OpenAI ``/chat/completions`` protocol +(OpenAI, OpenRouter, Ollama, llama.cpp, Xiaomi MiMo Token Plan, ...). The auth +header style (bearer vs api-key) comes from :class:`LLMConfig`. +""" + +from __future__ import annotations + +import asyncio +import json +import logging +import time +from dataclasses import dataclass, field +from typing import AsyncIterator, Optional + +import httpx + +from .config import LLMConfig + +logger = logging.getLogger(__name__) + + +@dataclass +class TokenUsage: + """Track token consumption per call and cumulative.""" + prompt_tokens: int = 0 + completion_tokens: int = 0 + total_tokens: int = 0 + cached_tokens: int = 0 + + def add(self, other: "TokenUsage") -> None: + self.prompt_tokens += other.prompt_tokens + self.completion_tokens += other.completion_tokens + self.total_tokens += other.total_tokens + self.cached_tokens += other.cached_tokens + + +@dataclass +class ChatMessage: + role: str + content: str + reasoning_content: Optional[str] = None + + +@dataclass +class ChatResponse: + content: str + reasoning_content: Optional[str] = None + usage: TokenUsage = field(default_factory=TokenUsage) + model: str = "" + finish_reason: str = "" + latency_ms: float = 0.0 + + +@dataclass +class StreamChunk: + delta: str + reasoning_delta: Optional[str] = None + finish_reason: Optional[str] = None + usage: Optional[TokenUsage] = None + + +class LLMClient: + """Async client for any OpenAI-compatible chat completions endpoint. + + Auth style (``Authorization: Bearer`` vs ``api-key`` header) and the + base URL are taken from the supplied :class:`LLMConfig`, so the same client + serves OpenAI, OpenRouter, Ollama, MiMo Token Plan, etc. + """ + + def __init__(self, config: LLMConfig): + self.config = config + self._client: Optional[httpx.AsyncClient] = None + self.total_usage = TokenUsage() + self._call_count = 0 + self._total_latency_ms = 0.0 + + async def __aenter__(self) -> "LLMClient": + self._client = httpx.AsyncClient( + base_url=self.config.base_url, + headers=self.config.headers, + timeout=httpx.Timeout(self.config.timeout), + ) + return self + + async def __aexit__(self, *args) -> None: + if self._client: + await self._client.aclose() + + @property + def avg_latency_ms(self) -> float: + if self._call_count == 0: + return 0.0 + return self._total_latency_ms / self._call_count + + async def chat( + self, + messages: list[ChatMessage], + *, + model: Optional[str] = None, + temperature: Optional[float] = None, + max_tokens: Optional[int] = None, + stream: bool = False, + ) -> ChatResponse: + """Send a chat completion request.""" + if not self._client: + raise RuntimeError("Client not initialized. Use async with.") + + payload = { + "model": model or self.config.model, + "messages": [{"role": m.role, "content": m.content} for m in messages], + "max_tokens": max_tokens or self.config.max_tokens, + "temperature": temperature or self.config.temperature, + "top_p": self.config.top_p, + "stream": stream, + } + + start = time.monotonic() + + for attempt in range(self.config.max_retries): + try: + if stream: + return await self._stream_chat(payload) + else: + resp = await self._client.post("/chat/completions", json=payload) + resp.raise_for_status() + data = resp.json() + break + except (httpx.HTTPStatusError, httpx.ConnectError) as e: + logger.warning(f"LLM API attempt {attempt+1} failed: {e}") + if attempt < self.config.max_retries - 1: + await asyncio.sleep(self.config.retry_delay * (2 ** attempt)) + else: + raise + + latency = (time.monotonic() - start) * 1000 + self._call_count += 1 + self._total_latency_ms += latency + + choice = data["choices"][0] + usage_data = data.get("usage", {}) + + usage = TokenUsage( + prompt_tokens=usage_data.get("prompt_tokens", 0), + completion_tokens=usage_data.get("completion_tokens", 0), + total_tokens=usage_data.get("total_tokens", 0), + cached_tokens=usage_data.get("prompt_tokens_details", {}).get("cached_tokens", 0), + ) + self.total_usage.add(usage) + + msg = choice.get("message", {}) + + return ChatResponse( + content=msg.get("content", ""), + reasoning_content=msg.get("reasoning_content"), + usage=usage, + model=data.get("model", ""), + finish_reason=choice.get("finish_reason", ""), + latency_ms=latency, + ) + + async def _stream_chat(self, payload: dict) -> ChatResponse: + """Handle streaming SSE response.""" + payload["stream"] = True + content_parts: list[str] = [] + reasoning_parts: list[str] = [] + usage = TokenUsage() + + async with self._client.stream( + "POST", "/chat/completions", json=payload + ) as resp: + resp.raise_for_status() + async for line in resp.aiter_lines(): + if not line.startswith("data: "): + continue + line_data = line[6:].strip() + if line_data == "[DONE]": + break + try: + chunk = json.loads(line_data) + delta = chunk["choices"][0].get("delta", {}) + if "content" in delta and delta["content"]: + content_parts.append(delta["content"]) + if "reasoning_content" in delta and delta["reasoning_content"]: + reasoning_parts.append(delta["reasoning_content"]) + if "usage" in chunk: + usage = TokenUsage( + prompt_tokens=chunk["usage"].get("prompt_tokens", 0), + completion_tokens=chunk["usage"].get("completion_tokens", 0), + total_tokens=chunk["usage"].get("total_tokens", 0), + ) + except (json.JSONDecodeError, KeyError): + continue + + self.total_usage.add(usage) + + return ChatResponse( + content="".join(content_parts), + reasoning_content="".join(reasoning_parts) if reasoning_parts else None, + usage=usage, + model=payload.get("model", ""), + finish_reason="stop", + ) + + async def stream_chunks( + self, + messages: list[ChatMessage], + *, + model: Optional[str] = None, + temperature: Optional[float] = None, + max_tokens: Optional[int] = None, + ) -> AsyncIterator[StreamChunk]: + """Yield streaming chunks for real-time display.""" + if not self._client: + raise RuntimeError("Client not initialized. Use async with.") + + payload = { + "model": model or self.config.model, + "messages": [{"role": m.role, "content": m.content} for m in messages], + "max_tokens": max_tokens or self.config.max_tokens, + "temperature": temperature or self.config.temperature, + "top_p": self.config.top_p, + "stream": True, + } + + async with self._client.stream( + "POST", "/chat/completions", json=payload + ) as resp: + resp.raise_for_status() + async for line in resp.aiter_lines(): + if not line.startswith("data: "): + continue + line_data = line[6:].strip() + if line_data == "[DONE]": + break + try: + chunk = json.loads(line_data) + delta = chunk["choices"][0].get("delta", {}) + finish = chunk["choices"][0].get("finish_reason") + usage_data = chunk.get("usage") + + yield StreamChunk( + delta=delta.get("content", "") or "", + reasoning_delta=delta.get("reasoning_content"), + finish_reason=finish, + usage=TokenUsage(**usage_data) if usage_data else None, + ) + except (json.JSONDecodeError, KeyError): + continue + + +# Backward-compatible alias. New code should use LLMClient. +MiMoClient = LLMClient diff --git a/src/contentforge/core/mimo_client.py b/src/contentforge/core/mimo_client.py index 9d675d2..714d889 100644 --- a/src/contentforge/core/mimo_client.py +++ b/src/contentforge/core/mimo_client.py @@ -1,244 +1,27 @@ -"""MiMo V2.5 Pro API client with streaming, retries, and token tracking.""" +"""Backward-compatibility shim. -from __future__ import annotations - -import asyncio -import json -import logging -import time -from dataclasses import dataclass, field -from typing import AsyncIterator, Optional - -import httpx - -from .config import MiMoConfig - -logger = logging.getLogger(__name__) - - -@dataclass -class TokenUsage: - """Track token consumption per call and cumulative.""" - prompt_tokens: int = 0 - completion_tokens: int = 0 - total_tokens: int = 0 - cached_tokens: int = 0 - - def add(self, other: "TokenUsage") -> None: - self.prompt_tokens += other.prompt_tokens - self.completion_tokens += other.completion_tokens - self.total_tokens += other.total_tokens - self.cached_tokens += other.cached_tokens - - -@dataclass -class ChatMessage: - role: str - content: str - reasoning_content: Optional[str] = None - - -@dataclass -class ChatResponse: - content: str - reasoning_content: Optional[str] = None - usage: TokenUsage = field(default_factory=TokenUsage) - model: str = "" - finish_reason: str = "" - latency_ms: float = 0.0 - - -@dataclass -class StreamChunk: - delta: str - reasoning_delta: Optional[str] = None - finish_reason: Optional[str] = None - usage: Optional[TokenUsage] = None - - -class MiMoClient: - """Async client for Xiaomi MiMo V2.5 Pro Token Plan API. - - Uses api-key header (NOT Authorization: Bearer) per MiMo Token Plan spec. - Endpoint: https://token-plan-sgp.xiaomimimo.com/v1/chat/completions - """ - - def __init__(self, config: MiMoConfig): - self.config = config - self._client: Optional[httpx.AsyncClient] = None - self.total_usage = TokenUsage() - self._call_count = 0 - self._total_latency_ms = 0.0 - - async def __aenter__(self) -> "MiMoClient": - self._client = httpx.AsyncClient( - base_url=self.config.base_url, - headers=self.config.headers, - timeout=httpx.Timeout(self.config.timeout), - ) - return self +The implementation moved to :mod:`contentforge.core.llm_client` when ContentForge +became provider-agnostic. This module re-exports the public names so existing +imports (``from contentforge.core.mimo_client import MiMoClient``) keep working. +Prefer importing from ``contentforge.core.llm_client`` in new code. +""" - async def __aexit__(self, *args) -> None: - if self._client: - await self._client.aclose() - - @property - def avg_latency_ms(self) -> float: - if self._call_count == 0: - return 0.0 - return self._total_latency_ms / self._call_count - - async def chat( - self, - messages: list[ChatMessage], - *, - model: Optional[str] = None, - temperature: Optional[float] = None, - max_tokens: Optional[int] = None, - stream: bool = False, - ) -> ChatResponse: - """Send a chat completion request to MiMo API.""" - if not self._client: - raise RuntimeError("Client not initialized. Use async with.") - - payload = { - "model": model or self.config.model, - "messages": [{"role": m.role, "content": m.content} for m in messages], - "max_tokens": max_tokens or self.config.max_tokens, - "temperature": temperature or self.config.temperature, - "top_p": self.config.top_p, - "stream": stream, - } - - start = time.monotonic() - - for attempt in range(self.config.max_retries): - try: - if stream: - return await self._stream_chat(payload) - else: - resp = await self._client.post("/chat/completions", json=payload) - resp.raise_for_status() - data = resp.json() - break - except (httpx.HTTPStatusError, httpx.ConnectError) as e: - logger.warning(f"MiMo API attempt {attempt+1} failed: {e}") - if attempt < self.config.max_retries - 1: - await asyncio.sleep(self.config.retry_delay * (2 ** attempt)) - else: - raise - - latency = (time.monotonic() - start) * 1000 - self._call_count += 1 - self._total_latency_ms += latency - - choice = data["choices"][0] - usage_data = data.get("usage", {}) - - usage = TokenUsage( - prompt_tokens=usage_data.get("prompt_tokens", 0), - completion_tokens=usage_data.get("completion_tokens", 0), - total_tokens=usage_data.get("total_tokens", 0), - cached_tokens=usage_data.get("prompt_tokens_details", {}).get("cached_tokens", 0), - ) - self.total_usage.add(usage) - - msg = choice.get("message", {}) - - return ChatResponse( - content=msg.get("content", ""), - reasoning_content=msg.get("reasoning_content"), - usage=usage, - model=data.get("model", ""), - finish_reason=choice.get("finish_reason", ""), - latency_ms=latency, - ) - - async def _stream_chat(self, payload: dict) -> ChatResponse: - """Handle streaming SSE response from MiMo.""" - payload["stream"] = True - content_parts: list[str] = [] - reasoning_parts: list[str] = [] - usage = TokenUsage() - - async with self._client.stream( - "POST", "/chat/completions", json=payload - ) as resp: - resp.raise_for_status() - async for line in resp.aiter_lines(): - if not line.startswith("data: "): - continue - line_data = line[6:].strip() - if line_data == "[DONE]": - break - try: - chunk = json.loads(line_data) - delta = chunk["choices"][0].get("delta", {}) - if "content" in delta and delta["content"]: - content_parts.append(delta["content"]) - if "reasoning_content" in delta and delta["reasoning_content"]: - reasoning_parts.append(delta["reasoning_content"]) - if "usage" in chunk: - usage = TokenUsage( - prompt_tokens=chunk["usage"].get("prompt_tokens", 0), - completion_tokens=chunk["usage"].get("completion_tokens", 0), - total_tokens=chunk["usage"].get("total_tokens", 0), - ) - except (json.JSONDecodeError, KeyError): - continue - - self.total_usage.add(usage) - - return ChatResponse( - content="".join(content_parts), - reasoning_content="".join(reasoning_parts) if reasoning_parts else None, - usage=usage, - model=payload.get("model", ""), - finish_reason="stop", - ) - - async def stream_chunks( - self, - messages: list[ChatMessage], - *, - model: Optional[str] = None, - temperature: Optional[float] = None, - max_tokens: Optional[int] = None, - ) -> AsyncIterator[StreamChunk]: - """Yield streaming chunks for real-time display.""" - if not self._client: - raise RuntimeError("Client not initialized. Use async with.") - - payload = { - "model": model or self.config.model, - "messages": [{"role": m.role, "content": m.content} for m in messages], - "max_tokens": max_tokens or self.config.max_tokens, - "temperature": temperature or self.config.temperature, - "top_p": self.config.top_p, - "stream": True, - } - - async with self._client.stream( - "POST", "/chat/completions", json=payload - ) as resp: - resp.raise_for_status() - async for line in resp.aiter_lines(): - if not line.startswith("data: "): - continue - line_data = line[6:].strip() - if line_data == "[DONE]": - break - try: - chunk = json.loads(line_data) - delta = chunk["choices"][0].get("delta", {}) - finish = chunk["choices"][0].get("finish_reason") - usage_data = chunk.get("usage") +from __future__ import annotations - yield StreamChunk( - delta=delta.get("content", "") or "", - reasoning_delta=delta.get("reasoning_content"), - finish_reason=finish, - usage=TokenUsage(**usage_data) if usage_data else None, - ) - except (json.JSONDecodeError, KeyError): - continue +from .llm_client import ( # noqa: F401 + ChatMessage, + ChatResponse, + LLMClient, + MiMoClient, + StreamChunk, + TokenUsage, +) + +__all__ = [ + "ChatMessage", + "ChatResponse", + "LLMClient", + "MiMoClient", + "StreamChunk", + "TokenUsage", +] From e8bd11a2789f6c1b2bcd9da65d1369e19e455d28 Mon Sep 17 00:00:00 2001 From: aimanmalib <84276911+aimanmalib@users.noreply.github.com> Date: Fri, 5 Jun 2026 07:22:49 +0000 Subject: [PATCH 2/6] test(core): add 16 multi-backend config tests; clean dead vars Cover provider presets (openai/openrouter/ollama/mimo), bearer vs api-key auth, env-var resolution, and MiMo backward-compat paths. Remove unused locals in 'agents' CLI command. Ruff clean, 112 tests pass. --- src/contentforge/cli.py | 10 +--- tests/unit/test_llm_config.py | 109 ++++++++++++++++++++++++++++++++++ 2 files changed, 110 insertions(+), 9 deletions(-) create mode 100644 tests/unit/test_llm_config.py diff --git a/src/contentforge/cli.py b/src/contentforge/cli.py index a1460bb..0d8a421 100644 --- a/src/contentforge/cli.py +++ b/src/contentforge/cli.py @@ -5,7 +5,6 @@ import asyncio import json import sys -from pathlib import Path import click from rich.console import Console @@ -118,14 +117,7 @@ def generate( @click.pass_context def agents(ctx: click.Context) -> None: """List all available agents.""" - config: ContentForgeConfig = ctx.obj.get("config") or ContentForgeConfig.from_env() - - from .core.mimo_client import MiMoClient - from .core.token_tracker import TokenTracker - - tracker = TokenTracker() - # Create a temporary client just for listing - client_config = config.mimo + _config: ContentForgeConfig = ctx.obj.get("config") or ContentForgeConfig.from_env() table = Table(title="ContentForge Agents", border_style="cyan") table.add_column("Name", style="bold") diff --git a/tests/unit/test_llm_config.py b/tests/unit/test_llm_config.py new file mode 100644 index 0000000..4b9c5b6 --- /dev/null +++ b/tests/unit/test_llm_config.py @@ -0,0 +1,109 @@ +"""Unit tests for multi-backend LLM configuration. + +Covers the provider-agnostic LLMConfig: preset resolution, bearer vs api-key +auth styles, env-var fallbacks, and backward-compatible MiMoConfig behaviour. +""" + + +from contentforge.core.config import ( + DEFAULT_PROVIDER, + PROVIDER_PRESETS, + ContentForgeConfig, + LLMConfig, + MiMoConfig, +) + + +class TestProviderPresets: + def test_known_providers_present(self): + for provider in ("mimo", "openai", "openrouter", "ollama"): + assert provider in PROVIDER_PRESETS + + def test_default_provider_is_mimo(self): + assert DEFAULT_PROVIDER == "mimo" + + +class TestLLMConfigPresetResolution: + def test_openai_preset(self): + config = LLMConfig(provider="openai", api_key="sk-test") + assert config.base_url == "https://api.openai.com/v1" + assert config.model == "gpt-4o-mini" + assert config.auth_style == "bearer" + + def test_openrouter_preset(self): + config = LLMConfig(provider="openrouter", api_key="or-test") + assert "openrouter.ai" in config.base_url + assert config.auth_style == "bearer" + + def test_ollama_preset(self): + config = LLMConfig(provider="ollama") + assert "localhost:11434" in config.base_url + assert config.model == "llama3.1" + + def test_unknown_provider_falls_back_to_default(self): + config = LLMConfig(provider="does-not-exist", api_key="x") + assert "xiaomimimo.com" in config.base_url # mimo default + + def test_explicit_values_override_preset(self): + config = LLMConfig( + provider="openai", + base_url="https://proxy.local/v1", + model="custom-model", + ) + assert config.base_url == "https://proxy.local/v1" + assert config.model == "custom-model" + + +class TestAuthStyles: + def test_bearer_auth_header(self): + config = LLMConfig(provider="openai", api_key="sk-abc") + headers = config.headers + assert headers["Authorization"] == "Bearer sk-abc" + assert "api-key" not in headers + + def test_api_key_auth_header(self): + config = LLMConfig(provider="mimo", api_key="mimo-secret") + headers = config.headers + assert headers["api-key"] == "mimo-secret" + assert "Authorization" not in headers + + def test_explicit_auth_style_override(self): + config = LLMConfig(provider="mimo", api_key="k", auth_style="bearer") + assert config.headers["Authorization"] == "Bearer k" + + +class TestEnvResolution: + def test_openai_env_key(self, monkeypatch): + monkeypatch.setenv("OPENAI_API_KEY", "sk-from-env") + config = LLMConfig(provider="openai") + assert config.api_key == "sk-from-env" + + def test_openrouter_env_base(self, monkeypatch): + monkeypatch.setenv("OPENROUTER_API_KEY", "or-env") + monkeypatch.setenv("OPENROUTER_BASE_URL", "https://env.openrouter/v1") + config = LLMConfig(provider="openrouter") + assert config.api_key == "or-env" + assert config.base_url == "https://env.openrouter/v1" + + +class TestMiMoBackwardCompat: + def test_mimo_config_defaults_to_mimo_provider(self): + config = MiMoConfig(api_key="test") + assert config.provider == "mimo" + assert config.model == "mimo-v2.5-pro" + assert "xiaomimimo.com" in config.base_url + assert config.headers["api-key"] == "test" + + def test_root_config_mimo_property_aliases_llm(self): + config = ContentForgeConfig() + assert config.mimo is config.llm + + def test_legacy_mimo_yaml_block_maps_to_llm(self): + config = ContentForgeConfig(**{"mimo": {"api_key": "legacy", "model": "mimo-v2.5-pro"}}) + assert config.llm.api_key == "legacy" + assert config.mimo.api_key == "legacy" + + def test_new_llm_yaml_block(self): + config = ContentForgeConfig(**{"llm": {"provider": "openai", "api_key": "sk-x"}}) + assert config.llm.provider == "openai" + assert config.llm.auth_style == "bearer" From 4db904ba84ac67276a3e925af6ace3f22f4a4ae1 Mon Sep 17 00:00:00 2001 From: aimanmalib <84276911+aimanmalib@users.noreply.github.com> Date: Fri, 5 Jun 2026 07:25:56 +0000 Subject: [PATCH 3/6] docs: rebrand as provider-agnostic content pipeline Reposition from 'MiMo-exclusive' to 'any OpenAI-compatible LLM'. Add Supported Providers table (OpenAI/OpenRouter/Ollama/MiMo), update config examples to llm: block with provider field, switch API reference to LLMClient. MiMo now listed as one provider among many. --- README.md | 78 ++++++++++++++++++++++++++++++------------------------- 1 file changed, 43 insertions(+), 35 deletions(-) diff --git a/README.md b/README.md index c903fe7..ed53693 100644 --- a/README.md +++ b/README.md @@ -1,13 +1,15 @@ -# MiMo ContentForge +# ContentForge -> **8-Agent AI Content Pipeline powered by Xiaomi MiMo V2.5 Pro** +> **8-Agent AI Content Pipeline for any OpenAI-compatible LLM** > > From topic to published article in minutes — research, write, optimize, translate, and publish with 8 specialized AI agents orchestrated through a single pipeline. [](https://www.python.org/downloads/) [](LICENSE) -[](tests/) -[](src/) +[](tests/) +[](src/) + +Works with **OpenAI, OpenRouter, Ollama, llama.cpp, Xiaomi MiMo**, or any endpoint that speaks the OpenAI `/chat/completions` protocol. Pick a provider with one config line — no code changes. --- @@ -15,8 +17,8 @@ ``` ┌─────────────────────────────────────────────────────────────────┐ -│ MiMo ContentForge Pipeline │ -│ Powered by Xiaomi MiMo V2.5 Pro │ +│ ContentForge Pipeline │ +│ Any OpenAI-compatible LLM backend │ ├─────────────────────────────────────────────────────────────────┤ │ │ │ ┌──────────┐ ┌──────────┐ ┌──────────┐ ┌──────────┐ │ @@ -36,9 +38,9 @@ │ │ Per-agent consumption · Cache hit rate · Latency │ │ │ └──────────────────────────────────────────────────────┘ │ │ │ -│ API: token-plan-sgp.xiaomimimo.com/v1/chat/completions │ -│ Auth: api-key header (Token Plan format) │ -│ Model: mimo-v2.5-pro · Streaming SSE · reasoning_content │ +│ Protocol: OpenAI-compatible /chat/completions │ +│ Auth: bearer token or api-key header (per provider) │ +│ Streaming SSE · optional reasoning_content support │ └─────────────────────────────────────────────────────────────────┘ ``` @@ -57,15 +59,18 @@ **Total per pipeline run: ~9,400 tokens** (single language) -## Why MiMo V2.5 Pro? +## Supported Providers + +ContentForge talks to any OpenAI-compatible `/chat/completions` endpoint. Built-in presets: -We specifically chose MiMo over Claude/GPT for this pipeline because: +| Provider | `provider=` | Default model | Auth | Env vars | +|----------|-------------|---------------|------|----------| +| OpenAI | `openai` | `gpt-4o-mini` | Bearer | `OPENAI_API_KEY`, `OPENAI_BASE_URL` | +| OpenRouter | `openrouter` | `openai/gpt-4o-mini` | Bearer | `OPENROUTER_API_KEY` | +| Ollama (local) | `ollama` | `llama3.1` | Bearer | `OLLAMA_BASE_URL` | +| Xiaomi MiMo | `mimo` | `mimo-v2.5-pro` | api-key | `MIMO_API_KEY` | -1. **Long-chain reasoning** — The Quality Agent's 8-dimension scoring benefits from MiMo's `reasoning_content` field, which shows the model's step-by-step evaluation process -2. **Streaming SSE quality** — Real-time token-by-token output for the Writer Agent allows live preview without buffering delays -3. **Chinese/Malay proficiency** — The Translator Agent produces natural zh/ms/id output without the awkward phrasing common in Western models -4. **Cost efficiency** — Token Plan pricing at `token-plan-sgp.xiaomimimo.com` makes high-volume content production viable (~$0.20/M cache hit) -5. **Structured output** — MiMo reliably produces valid JSON for Research, Outline, SEO, and Quality agents without schema enforcement +Point `base_url` at any other compatible endpoint (llama.cpp, vLLM, LM Studio, a local proxy) and it just works. The pipeline benefits from models that expose a `reasoning_content` field (used by the Quality Agent's 8-dimension scoring) and strong multilingual output (used by the Translator Agent), but neither is required. ## Quick Start @@ -73,10 +78,10 @@ We specifically chose MiMo over Claude/GPT for this pipeline because: # Install pip install -e ".[dev]" -# Set API key -export MIMO_API_KEY="your-token-plan-key" +# Pick any provider — set its API key (OpenAI shown here) +export OPENAI_API_KEY="sk-..." -# Generate content +# Generate content (uses the default provider unless overridden in config) contentforge generate "AI in Healthcare" --words 2000 --output ./output # With translation @@ -153,10 +158,12 @@ Daily estimate: **50-100 pipeline runs** = ~500K–1M tokens/day ```yaml # contentforge.yaml -mimo: - api_key: ${MIMO_API_KEY} - base_url: https://token-plan-sgp.xiaomimimo.com/v1 - model: mimo-v2.5-pro +llm: + provider: openai # openai | openrouter | ollama | mimo + api_key: ${OPENAI_API_KEY} + # base_url and model default from the provider preset; override if needed + # base_url: https://api.openai.com/v1 + # model: gpt-4o-mini max_tokens: 4096 temperature: 0.7 max_retries: 3 @@ -198,19 +205,20 @@ pytest -m integration pytest -v ``` -**96 tests** covering: -- Configuration management (16 tests) +**112 tests** covering: +- Configuration management (17 tests) +- Multi-backend LLM config: presets, auth styles, env resolution (16 tests) - Token tracking & reporting (14 tests) - Text utilities (12 tests) - Export utilities (4 tests) - Agent base class & all 8 agents (34 tests) - Pipeline orchestration (8 tests) -- Error handling & edge cases (8 tests) +- Error handling & edge cases (7 tests) ## Project Structure ``` -mimo-contentforge/ +contentforge/ ├── src/contentforge/ │ ├── __init__.py │ ├── cli.py # Click CLI with Rich output @@ -227,8 +235,9 @@ mimo-contentforge/ │ │ └── publisher.py # Agent 8: Publisher │ ├── core/ │ │ ├── __init__.py -│ │ ├── config.py # Pydantic config management -│ │ ├── mimo_client.py # MiMo API client (SSE streaming) +│ │ ├── config.py # Pydantic config (multi-provider presets) +│ │ ├── llm_client.py # OpenAI-compatible client (SSE streaming) +│ │ ├── mimo_client.py # Backward-compat shim → llm_client │ │ └── token_tracker.py # Per-agent token metrics │ ├── pipeline/ │ │ ├── __init__.py @@ -260,12 +269,12 @@ mimo-contentforge/ ## API Reference -### MiMoClient +### LLMClient ```python -from contentforge.core.mimo_client import MiMoClient, ChatMessage +from contentforge.core.llm_client import LLMClient, ChatMessage -async with MiMoClient(config) as client: +async with LLMClient(config) as client: # Non-streaming response = await client.chat([ ChatMessage(role="system", content="You are helpful."), @@ -279,7 +288,7 @@ async with MiMoClient(config) as client: print(chunk.delta, end="", flush=True) ``` -**Important**: MiMo Token Plan uses `api-key` header, NOT `Authorization: Bearer`. +**Auth styles**: `provider="openai"` (and openrouter/ollama) use `Authorization: Bearer`; `provider="mimo"` uses the `api-key` header. The right style is selected automatically from the provider preset. `MiMoClient` remains importable as a backward-compatible alias of `LLMClient`. ### TokenTracker @@ -301,5 +310,4 @@ MIT License — see [LICENSE](LICENSE) for details. --- -**Built with Xiaomi MiMo V2.5 Pro** via Token Plan API -`token-plan-sgp.xiaomimimo.com/v1` +**Provider-agnostic** — works with OpenAI, OpenRouter, Ollama, llama.cpp, Xiaomi MiMo, or any OpenAI-compatible `/chat/completions` endpoint. From b73a3e12526e636f70cea6f0cd596098f4d5e638 Mon Sep 17 00:00:00 2001 From: aimanmalib <84276911+aimanmalib@users.noreply.github.com> Date: Fri, 5 Jun 2026 07:26:42 +0000 Subject: [PATCH 4/6] ci: add GitHub Actions workflow + apply ruff format Add .github/workflows/ci.yml (ruff lint + format check + pytest with coverage across Python 3.10/3.11/3.12). Apply ruff format across the codebase so the format gate passes. 112 tests green. --- .github/workflows/ci.yml | 38 +++++++++++++++ src/contentforge/agents/__init__.py | 17 +++++-- src/contentforge/agents/base.py | 3 +- src/contentforge/agents/quality.py | 1 + src/contentforge/agents/translator.py | 3 +- src/contentforge/core/config.py | 3 ++ src/contentforge/core/llm_client.py | 13 ++---- src/contentforge/core/token_tracker.py | 12 +++-- src/contentforge/pipeline/orchestrator.py | 24 +++++----- src/contentforge/utils/export.py | 12 ++--- src/contentforge/utils/text.py | 11 +++-- tests/conftest.py | 3 +- tests/integration/test_pipeline.py | 36 +++++++++------ tests/unit/test_agents.py | 56 ++++++++++++----------- tests/unit/test_config.py | 5 +- tests/unit/test_llm_config.py | 1 - tests/unit/test_token_tracker.py | 10 ++-- tests/unit/test_utils.py | 2 - 18 files changed, 152 insertions(+), 98 deletions(-) create mode 100644 .github/workflows/ci.yml diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..430d263 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,38 @@ +name: CI + +on: + push: + branches: [main] + pull_request: + branches: [main] + +jobs: + test: + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + python-version: ["3.10", "3.11", "3.12"] + + steps: + - uses: actions/checkout@v4 + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + cache: pip + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -e ".[dev]" + + - name: Lint with ruff + run: ruff check src/ tests/ + + - name: Check formatting with ruff + run: ruff format --check src/ tests/ + + - name: Run tests + run: pytest --cov=contentforge --cov-report=term-missing diff --git a/src/contentforge/agents/__init__.py b/src/contentforge/agents/__init__.py index bf7cff0..0e75647 100644 --- a/src/contentforge/agents/__init__.py +++ b/src/contentforge/agents/__init__.py @@ -21,14 +21,25 @@ "publisher": PublisherAgent, } + def get_agent(name: str, **kwargs) -> BaseAgent: cls = AGENT_REGISTRY.get(name) if not cls: raise ValueError(f"Unknown agent: {name}. Available: {list(AGENT_REGISTRY)}") return cls(**kwargs) + __all__ = [ - "BaseAgent", "AgentResult", "AGENT_REGISTRY", "get_agent", - "ResearchAgent", "OutlineAgent", "WriterAgent", "SEOAgent", - "EditorAgent", "TranslatorAgent", "QualityAgent", "PublisherAgent", + "BaseAgent", + "AgentResult", + "AGENT_REGISTRY", + "get_agent", + "ResearchAgent", + "OutlineAgent", + "WriterAgent", + "SEOAgent", + "EditorAgent", + "TranslatorAgent", + "QualityAgent", + "PublisherAgent", ] diff --git a/src/contentforge/agents/base.py b/src/contentforge/agents/base.py index 1e70c8f..10034ef 100644 --- a/src/contentforge/agents/base.py +++ b/src/contentforge/agents/base.py @@ -8,7 +8,7 @@ from dataclasses import dataclass, field from typing import Any, Optional -from ..core.config import AgentConfig, ContentForgeConfig +from ..core.config import ContentForgeConfig from ..core.mimo_client import ChatMessage, ChatResponse, MiMoClient from ..core.token_tracker import TokenTracker @@ -18,6 +18,7 @@ @dataclass class AgentResult: """Standardized result from any agent.""" + agent_name: str status: str = "success" # success | partial | failed content: str = "" diff --git a/src/contentforge/agents/quality.py b/src/contentforge/agents/quality.py index aa6edfc..5a017ba 100644 --- a/src/contentforge/agents/quality.py +++ b/src/contentforge/agents/quality.py @@ -85,6 +85,7 @@ async def execute( overall_score = 0 try: import json + parsed = json.loads(response.content) overall_score = parsed.get("overall_score", 0) except (json.JSONDecodeError, AttributeError): diff --git a/src/contentforge/agents/translator.py b/src/contentforge/agents/translator.py index 698f169..382e17f 100644 --- a/src/contentforge/agents/translator.py +++ b/src/contentforge/agents/translator.py @@ -44,8 +44,7 @@ async def execute( keywords_note = "" if preserve_keywords: keywords_note = ( - f"\nPreserve these keywords in original form: " - f"{', '.join(preserve_keywords)}" + f"\nPreserve these keywords in original form: {', '.join(preserve_keywords)}" ) prompt = f"""Translate the following article from {source_language} to {target_language}. diff --git a/src/contentforge/core/config.py b/src/contentforge/core/config.py index 11f1b59..55cf236 100644 --- a/src/contentforge/core/config.py +++ b/src/contentforge/core/config.py @@ -107,6 +107,7 @@ class MiMoConfig(LLMConfig): class PipelineConfig(BaseModel): """Pipeline execution settings.""" + topic: str = "" target_word_count: int = 2000 language: str = "en" @@ -121,6 +122,7 @@ class PipelineConfig(BaseModel): class AgentConfig(BaseModel): """Per-agent configuration overrides.""" + name: str enabled: bool = True model_override: Optional[str] = None @@ -131,6 +133,7 @@ class AgentConfig(BaseModel): class ContentForgeConfig(BaseModel): """Root configuration.""" + llm: LLMConfig = Field(default_factory=LLMConfig) pipeline: PipelineConfig = Field(default_factory=PipelineConfig) agents: list[AgentConfig] = Field(default_factory=list) diff --git a/src/contentforge/core/llm_client.py b/src/contentforge/core/llm_client.py index 94fd1df..de6602b 100644 --- a/src/contentforge/core/llm_client.py +++ b/src/contentforge/core/llm_client.py @@ -24,6 +24,7 @@ @dataclass class TokenUsage: """Track token consumption per call and cumulative.""" + prompt_tokens: int = 0 completion_tokens: int = 0 total_tokens: int = 0 @@ -128,9 +129,9 @@ async def chat( data = resp.json() break except (httpx.HTTPStatusError, httpx.ConnectError) as e: - logger.warning(f"LLM API attempt {attempt+1} failed: {e}") + logger.warning(f"LLM API attempt {attempt + 1} failed: {e}") if attempt < self.config.max_retries - 1: - await asyncio.sleep(self.config.retry_delay * (2 ** attempt)) + await asyncio.sleep(self.config.retry_delay * (2**attempt)) else: raise @@ -167,9 +168,7 @@ async def _stream_chat(self, payload: dict) -> ChatResponse: reasoning_parts: list[str] = [] usage = TokenUsage() - async with self._client.stream( - "POST", "/chat/completions", json=payload - ) as resp: + async with self._client.stream("POST", "/chat/completions", json=payload) as resp: resp.raise_for_status() async for line in resp.aiter_lines(): if not line.startswith("data: "): @@ -224,9 +223,7 @@ async def stream_chunks( "stream": True, } - async with self._client.stream( - "POST", "/chat/completions", json=payload - ) as resp: + async with self._client.stream("POST", "/chat/completions", json=payload) as resp: resp.raise_for_status() async for line in resp.aiter_lines(): if not line.startswith("data: "): diff --git a/src/contentforge/core/token_tracker.py b/src/contentforge/core/token_tracker.py index 9cde4a3..b322d76 100644 --- a/src/contentforge/core/token_tracker.py +++ b/src/contentforge/core/token_tracker.py @@ -4,7 +4,7 @@ import json import time -from dataclasses import dataclass, field +from dataclasses import dataclass from pathlib import Path from typing import Optional @@ -12,6 +12,7 @@ @dataclass class AgentMetrics: """Per-agent token and performance metrics.""" + agent_name: str call_count: int = 0 prompt_tokens: int = 0 @@ -140,11 +141,13 @@ def report(self) -> str: lines.append(f" Pipeline Duration: {self.pipeline_duration_s:.1f}s") lines.append(f" Total Tokens: {self.total_tokens:,}") lines.append(f" Prompt: {total_prompt:,} | Completion: {total_comp:,}") - lines.append(f" Cache Hit: {total_cached:,} ({total_cached/max(total_prompt,1):.1%})") + lines.append(f" Cache Hit: {total_cached:,} ({total_cached / max(total_prompt, 1):.1%})") lines.append(f" Total API Calls: {self.total_calls}") lines.append("") - lines.append(f" {'Agent':<20} {'Calls':>6} {'Tokens':>10} {'Avg/call':>10} {'Latency':>10}") + lines.append( + f" {'Agent':<20} {'Calls':>6} {'Tokens':>10} {'Avg/call':>10} {'Latency':>10}" + ) lines.append(" " + "-" * 58) for name, m in sorted(self._agents.items(), key=lambda x: -x[1].total_tokens): @@ -155,8 +158,7 @@ def report(self) -> str: lines.append(" " + "-" * 58) lines.append( - f" {'TOTAL':<20} {self.total_calls:>6} {self.total_tokens:>10,} " - f"{'':>10} {'':>10}" + f" {'TOTAL':<20} {self.total_calls:>6} {self.total_tokens:>10,} {'':>10} {'':>10}" ) lines.append("") lines.append("=" * 60) diff --git a/src/contentforge/pipeline/orchestrator.py b/src/contentforge/pipeline/orchestrator.py index 4b6cc87..1e67b53 100644 --- a/src/contentforge/pipeline/orchestrator.py +++ b/src/contentforge/pipeline/orchestrator.py @@ -2,8 +2,6 @@ from __future__ import annotations -import asyncio -import json import logging from dataclasses import dataclass, field from typing import Any, Optional @@ -20,6 +18,7 @@ @dataclass class PipelineResult: """Final result from the complete pipeline.""" + status: str = "success" article: str = "" research: str = "" @@ -68,8 +67,14 @@ class PipelineOrchestrator: """ AGENT_ORDER = [ - "research", "outline", "writer", "seo", - "editor", "quality", "translator", "publisher", + "research", + "outline", + "writer", + "seo", + "editor", + "quality", + "translator", + "publisher", ] def __init__(self, config: ContentForgeConfig): @@ -142,9 +147,7 @@ async def run(self, topic: str, **kwargs) -> PipelineResult: # Step 5 + 6: Editor → Quality loop for iteration in range(self.config.pipeline.max_iterations): - logger.info( - f"[Pipeline] Step 5/8: Editor (iteration {iteration + 1})" - ) + logger.info(f"[Pipeline] Step 5/8: Editor (iteration {iteration + 1})") editor_agent = get_agent( "editor", config=self.config, client=client, tracker=self.tracker ) @@ -155,9 +158,7 @@ async def run(self, topic: str, **kwargs) -> PipelineResult: result.agent_results["editor"] = editor_result current_content = editor_result.content - logger.info( - f"[Pipeline] Step 6/8: Quality (iteration {iteration + 1})" - ) + logger.info(f"[Pipeline] Step 6/8: Quality (iteration {iteration + 1})") quality_agent = get_agent( "quality", config=self.config, client=client, tracker=self.tracker ) @@ -229,6 +230,5 @@ async def run(self, topic: str, **kwargs) -> PipelineResult: def get_agent_list(self) -> list[dict[str, str]]: """Return list of all available agents with descriptions.""" return [ - {"name": name, "description": cls.description} - for name, cls in AGENT_REGISTRY.items() + {"name": name, "description": cls.description} for name, cls in AGENT_REGISTRY.items() ] diff --git a/src/contentforge/utils/export.py b/src/contentforge/utils/export.py index b041355..5049f4c 100644 --- a/src/contentforge/utils/export.py +++ b/src/contentforge/utils/export.py @@ -41,12 +41,12 @@ def export_html(content: str, metadata: dict, output_path: str | Path) -> Path: # Simple Markdown to HTML conversion html_body = content - html_body = re.sub(r'^### (.+)$', r'