Skip to content

Commit efd442a

Browse files
committed
feat(api,ui): add AI model admin console with Ollama support (#162)
1 parent d7783e2 commit efd442a

27 files changed

Lines changed: 2704 additions & 37 deletions

.env.example

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,10 @@ RAG_HNSW_EF_CONSTRUCTION=64
6464
# - openai: GPT models (gpt-4o, gpt-4o-mini, etc.)
6565
# - google-gla: Gemini models via Google AI Studio (gemini-2-5-flash, gemini-3-flash, gemini-3-pro)
6666
# - google-vertex: Gemini models via Vertex AI (gemini-*) [requires GCP auth]
67+
# - ollama: local models via Ollama's OpenAI-compatible endpoint (no API key)
68+
# e.g. AGENT_DEFAULT_MODEL=ollama:llama3.1 (requires `ollama serve` + `ollama pull llama3.1`)
69+
# Runtime-editable: the /admin "AI Models" tab persists overrides in the
70+
# app_config table and applies them live — no .env edit or restart needed.
6771
AGENT_DEFAULT_MODEL=anthropic:claude-sonnet-4-5
6872
AGENT_FALLBACK_MODEL=openai:gpt-4o
6973

PRPs/PRP-18-ai-model-admin-console.md

Lines changed: 674 additions & 0 deletions
Large diffs are not rendered by default.

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ Portfolio-grade end-to-end retail demand forecasting system.
1212
- **RAG Knowledge Base**: Postgres pgvector embeddings + evidence-grounded answers with citations
1313
- **Agentic Layer**: PydanticAI agents for autonomous experimentation and evidence-grounded Q&A with human-in-the-loop approval
1414
- **Data Seeder (The Forge)**: Reproducible synthetic data generator with realistic time-series patterns, scenario presets, and retail effects
15+
- **AI Models Console**: `/admin` → AI Models tab — swap the agent LLM (incl. fully-local Ollama), the RAG embedding model, and provider API keys at runtime; changes apply live with no restart
1516

1617
## Quick Start
1718

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
"""create app_config table
2+
3+
Revision ID: 378c112e4b32
4+
Revises: a8b9c0d1e234
5+
Create Date: 2026-05-18 12:38:56.878929
6+
7+
"""
8+
9+
from __future__ import annotations
10+
11+
from collections.abc import Sequence
12+
13+
import sqlalchemy as sa
14+
from alembic import op
15+
from sqlalchemy.dialects import postgresql
16+
17+
# revision identifiers, used by Alembic.
18+
revision: str = "378c112e4b32"
19+
down_revision: str | None = "a8b9c0d1e234"
20+
branch_labels: str | Sequence[str] | None = None
21+
depends_on: str | Sequence[str] | None = None
22+
23+
24+
def upgrade() -> None:
25+
"""Apply migration - create app_config key/value override store."""
26+
op.create_table(
27+
"app_config",
28+
sa.Column("key", sa.String(length=100), nullable=False),
29+
sa.Column(
30+
"value",
31+
postgresql.JSONB(astext_type=sa.Text()),
32+
nullable=False,
33+
),
34+
sa.Column(
35+
"updated_at",
36+
sa.DateTime(timezone=True),
37+
server_default=sa.text("now()"),
38+
nullable=False,
39+
),
40+
sa.PrimaryKeyConstraint("key"),
41+
)
42+
43+
44+
def downgrade() -> None:
45+
"""Revert migration - drop app_config table."""
46+
op.drop_table("app_config")

app/core/config.py

Lines changed: 55 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,58 @@
66
from pydantic import field_validator
77
from pydantic_settings import BaseSettings, SettingsConfigDict
88

9+
# Valid agent LLM provider prefixes for a "provider:model-name" identifier.
10+
# "ollama" runs the agent fully local via Ollama's OpenAI-compatible endpoint.
11+
VALID_MODEL_PROVIDERS: tuple[str, ...] = (
12+
"anthropic",
13+
"openai",
14+
"google-gla",
15+
"google-vertex",
16+
"ollama",
17+
)
18+
19+
20+
def validate_model_identifier(v: str) -> str:
21+
"""Validate an agent model identifier of the form ``provider:model-name``.
22+
23+
Shared by the ``Settings`` field validators and the runtime config service
24+
(``app/features/config``) so a UI-driven model change is checked the same
25+
way an env-var-driven one is.
26+
27+
Args:
28+
v: Model identifier string (e.g. ``anthropic:claude-sonnet-4-5``,
29+
``ollama:llama3.1``).
30+
31+
Returns:
32+
The validated model identifier, unchanged.
33+
34+
Raises:
35+
ValueError: If the format is invalid, the model name is blank, or the
36+
provider is not in :data:`VALID_MODEL_PROVIDERS`.
37+
"""
38+
if ":" not in v:
39+
raise ValueError(
40+
f"Invalid model identifier '{v}'. "
41+
"Expected format: 'provider:model-name' "
42+
"(e.g., 'anthropic:claude-sonnet-4-5', 'ollama:llama3.1')"
43+
)
44+
provider, model_name = v.split(":", 1)
45+
46+
# Validate model name is non-empty and not just whitespace
47+
if not model_name or not model_name.strip():
48+
raise ValueError(
49+
f"Invalid model identifier '{v}'. "
50+
"Model name after ':' cannot be empty or blank. "
51+
"Expected format: 'provider:model-name' "
52+
"(e.g., 'anthropic:claude-sonnet-4-5', 'ollama:llama3.1')"
53+
)
54+
55+
if provider not in VALID_MODEL_PROVIDERS:
56+
raise ValueError(
57+
f"Unknown provider '{provider}'. Valid providers: {list(VALID_MODEL_PROVIDERS)}"
58+
)
59+
return v
60+
961

1062
class Settings(BaseSettings):
1163
"""Application settings loaded from environment variables."""
@@ -130,39 +182,9 @@ class Settings(BaseSettings):
130182

131183
@field_validator("agent_default_model", "agent_fallback_model")
132184
@classmethod
133-
def validate_model_identifier(cls, v: str) -> str:
134-
"""Validate model identifier format (provider:model-name).
135-
136-
Args:
137-
v: Model identifier string.
138-
139-
Returns:
140-
Validated model identifier.
141-
142-
Raises:
143-
ValueError: If format is invalid or model name is missing.
144-
"""
145-
if ":" not in v:
146-
raise ValueError(
147-
f"Invalid model identifier '{v}'. "
148-
"Expected format: 'provider:model-name' "
149-
"(e.g., 'anthropic:claude-sonnet-4-5', 'google-gla:gemini-3-flash')"
150-
)
151-
provider, model_name = v.split(":", 1)
152-
153-
# Validate model name is non-empty and not just whitespace
154-
if not model_name or not model_name.strip():
155-
raise ValueError(
156-
f"Invalid model identifier '{v}'. "
157-
"Model name after ':' cannot be empty or blank. "
158-
"Expected format: 'provider:model-name' "
159-
"(e.g., 'anthropic:claude-sonnet-4-5', 'google-gla:gemini-3-flash')"
160-
)
161-
162-
valid_providers = ["anthropic", "openai", "google-gla", "google-vertex"]
163-
if provider not in valid_providers:
164-
raise ValueError(f"Unknown provider '{provider}'. Valid providers: {valid_providers}")
165-
return v
185+
def _validate_agent_model(cls, v: str) -> str:
186+
"""Validate agent model identifiers via :func:`validate_model_identifier`."""
187+
return validate_model_identifier(v)
166188

167189
@property
168190
def is_development(self) -> bool:

app/features/agents/agents/base.py

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,54 @@
99
from typing import Any
1010

1111
import structlog
12+
from pydantic_ai.models import Model
13+
from pydantic_ai.models.openai import OpenAIChatModel
14+
from pydantic_ai.providers.ollama import OllamaProvider
1215

1316
from app.core.config import get_settings
1417

1518
logger = structlog.get_logger()
1619

1720

21+
def build_agent_model(identifier: str) -> str | Model:
22+
"""Build the PydanticAI ``model`` argument for an agent identifier.
23+
24+
Cloud providers accept a plain ``provider:model-name`` string. Ollama does
25+
not — it needs an :class:`OpenAIChatModel` bound to an :class:`OllamaProvider`
26+
pointed at the host's OpenAI-compatible ``/v1`` endpoint.
27+
28+
Args:
29+
identifier: Model identifier (e.g. ``anthropic:claude-sonnet-4-5``,
30+
``ollama:llama3.1``).
31+
32+
Returns:
33+
The identifier string unchanged for cloud providers, or a configured
34+
:class:`OpenAIChatModel` for the ``ollama`` provider.
35+
"""
36+
provider = identifier.split(":", 1)[0]
37+
if provider != "ollama":
38+
return identifier
39+
40+
settings = get_settings()
41+
model_name = identifier.split(":", 1)[1]
42+
# CRITICAL: Ollama's OpenAI-compatible base ends in /v1.
43+
base_url = settings.ollama_base_url.rstrip("/") + "/v1"
44+
return OpenAIChatModel(model_name, provider=OllamaProvider(base_url=base_url))
45+
46+
47+
def reset_agent_caches() -> None:
48+
"""Drop the cached agent singletons so the next build picks up new config.
49+
50+
Called by the config service after a successful model/key change. Imports
51+
are local to avoid an import cycle (the agent modules import from here).
52+
"""
53+
from app.features.agents.agents.experiment import reset_experiment_agent
54+
from app.features.agents.agents.rag_assistant import reset_rag_assistant_agent
55+
56+
reset_experiment_agent()
57+
reset_rag_assistant_agent()
58+
59+
1860
def get_model_identifier() -> str:
1961
"""Get the configured model identifier for agents.
2062
@@ -68,6 +110,11 @@ def validate_api_key_for_model(model: str) -> None:
68110
settings = get_settings()
69111
provider = model.split(":")[0]
70112

113+
if provider == "ollama":
114+
# Local Ollama runs without an API key — nothing to validate or export.
115+
logger.debug("agents.api_key_validated", provider=provider, model=model)
116+
return
117+
71118
if provider == "anthropic":
72119
if not settings.anthropic_api_key:
73120
raise ValueError(

app/features/agents/agents/experiment.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
SAFETY_INSTRUCTIONS,
2020
SYSTEM_PROMPT_HEADER,
2121
TOOL_USAGE_INSTRUCTIONS,
22+
build_agent_model,
2223
get_model_identifier,
2324
get_model_settings,
2425
requires_approval,
@@ -74,8 +75,9 @@ def create_experiment_agent() -> Agent[AgentDeps, ExperimentReport]:
7475
Returns:
7576
Configured Agent instance with tools registered.
7677
"""
77-
model = get_model_identifier()
78-
validate_api_key_for_model(model) # Fail-fast validation
78+
identifier = get_model_identifier()
79+
validate_api_key_for_model(identifier) # Fail-fast validation
80+
model = build_agent_model(identifier) # str for cloud, Model object for ollama
7981

8082
agent: Agent[AgentDeps, ExperimentReport] = Agent(
8183
model=model,
@@ -351,3 +353,13 @@ def get_experiment_agent() -> Agent[AgentDeps, ExperimentReport]:
351353
if _experiment_agent is None:
352354
_experiment_agent = create_experiment_agent()
353355
return _experiment_agent
356+
357+
358+
def reset_experiment_agent() -> None:
359+
"""Drop the cached experiment agent so the next get_* call rebuilds it.
360+
361+
Used after a runtime model/key change so the new configuration takes
362+
effect without a process restart.
363+
"""
364+
global _experiment_agent
365+
_experiment_agent = None

app/features/agents/agents/rag_assistant.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
from app.features.agents.agents.base import (
1919
SAFETY_INSTRUCTIONS,
2020
SYSTEM_PROMPT_HEADER,
21+
build_agent_model,
2122
get_model_identifier,
2223
get_model_settings,
2324
validate_api_key_for_model,
@@ -75,8 +76,9 @@ def create_rag_assistant_agent() -> Agent[AgentDeps, RAGAnswer]:
7576
Returns:
7677
Configured Agent instance with tools registered.
7778
"""
78-
model = get_model_identifier()
79-
validate_api_key_for_model(model) # Fail-fast validation
79+
identifier = get_model_identifier()
80+
validate_api_key_for_model(identifier) # Fail-fast validation
81+
model = build_agent_model(identifier) # str for cloud, Model object for ollama
8082

8183
agent: Agent[AgentDeps, RAGAnswer] = Agent(
8284
model=model,
@@ -209,3 +211,13 @@ def get_rag_assistant_agent() -> Agent[AgentDeps, RAGAnswer]:
209211
if _rag_assistant_agent is None:
210212
_rag_assistant_agent = create_rag_assistant_agent()
211213
return _rag_assistant_agent
214+
215+
216+
def reset_rag_assistant_agent() -> None:
217+
"""Drop the cached RAG assistant agent so the next get_* call rebuilds it.
218+
219+
Used after a runtime model/key change so the new configuration takes
220+
effect without a process restart.
221+
"""
222+
global _rag_assistant_agent
223+
_rag_assistant_agent = None
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
"""Unit tests for agent base helpers (Ollama-aware model factory)."""
2+
3+
from collections.abc import Iterator
4+
5+
import pytest
6+
from pydantic_ai.models.openai import OpenAIChatModel
7+
8+
from app.core.config import get_settings
9+
from app.features.agents.agents.base import build_agent_model, validate_api_key_for_model
10+
11+
12+
@pytest.fixture(autouse=True)
13+
def _reset_settings() -> Iterator[None]:
14+
"""Reset the settings cache so key mutations do not leak across tests."""
15+
get_settings.cache_clear()
16+
yield
17+
get_settings.cache_clear()
18+
19+
20+
def test_build_agent_model_cloud_returns_string():
21+
"""A cloud identifier is returned unchanged (plain-string Agent path)."""
22+
assert build_agent_model("anthropic:claude-sonnet-4-5") == "anthropic:claude-sonnet-4-5"
23+
24+
25+
def test_build_agent_model_openai_returns_string():
26+
"""An openai identifier is also returned unchanged."""
27+
assert build_agent_model("openai:gpt-4o") == "openai:gpt-4o"
28+
29+
30+
def test_build_agent_model_ollama_returns_model_object():
31+
"""An ollama identifier becomes a configured OpenAIChatModel object."""
32+
model = build_agent_model("ollama:llama3.1")
33+
assert isinstance(model, OpenAIChatModel)
34+
35+
36+
def test_validate_api_key_for_model_ollama_skips_key_check():
37+
"""The ollama provider needs no API key — validation must not raise."""
38+
settings = get_settings()
39+
settings.anthropic_api_key = ""
40+
settings.openai_api_key = ""
41+
settings.google_api_key = ""
42+
# Should return without raising even though no cloud key is configured.
43+
validate_api_key_for_model("ollama:llama3.1")

app/features/config/__init__.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
"""Runtime-editable application configuration slice.
2+
3+
Exposes the ``app_config`` key/value override store, the ``/config`` REST
4+
surface, and the service that applies persisted overrides onto the live
5+
``Settings`` singleton (agent LLM model, RAG embedding model, provider keys).
6+
"""

0 commit comments

Comments
 (0)