diff --git a/.jshintrc b/.jshintrc
new file mode 100644
index 000000000..7a6412586
--- /dev/null
+++ b/.jshintrc
@@ -0,0 +1,8 @@
+{
+ "esversion": 11,
+ "undef": true,
+ "globals": {
+ "$": false,
+ "document": false,
+ }
+}
\ No newline at end of file
diff --git a/backend/workers/llm_manager.py b/backend/workers/llm_manager.py
new file mode 100644
index 000000000..38d9405e0
--- /dev/null
+++ b/backend/workers/llm_manager.py
@@ -0,0 +1,83 @@
+"""
+Manage LLM models
+"""
+from backend.lib.worker import BasicWorker
+from common.lib.llm.llm_client import LLMProviderClient
+
+class LLMProviderManager(BasicWorker):
+ """
+ Manages LLM models
+
+ Periodically refreshes the list of available models from an LLM provider.
+ Can also pull or delete models on demand when queued with a specific task.
+
+ Job details:
+ - task: "refresh" (default), "pull", or "delete"
+ - provider: the URL of the LLM provider, as configured in the
+ llm.providers setting. if not given, run on all providers
+
+ Job remote_id:
+ - For refresh: "manage-llm-refresh" (periodic) or "manage-llm-manual" (on-demand)
+ - For pull/delete: the model name to pull or delete
+ """
+ type = "manage-llm"
+ max_workers = 1
+ client = None
+
+ @classmethod
+ def ensure_job(cls, config=None):
+ """
+ Ensure the daily refresh job is always scheduled
+
+ :return: Job parameters for the worker
+ """
+ return {"remote_id": "manage-llm-refresh", "interval": 86400}
+
+ def work(self):
+ task = self.job.details.get("task", "refresh") if self.job.details else "refresh"
+ provider = self.job.details.get("provider", "") if self.job.details else None
+ model_name = self.job.data["remote_id"]
+ available_models = None
+
+ for provider_config in self.config.get("llm.providers", []):
+ if provider and provider != provider_config["url"]:
+ continue
+
+ try:
+ client = LLMProviderClient.get_client(self.config, provider_config)
+ except ValueError:
+ self.log.debug(f"{self.__class__.__name__}: invalid provider type: {provider_config['type']}, skipping")
+ continue
+
+ # note that technically it is possible to pull/delete a model on
+ # multiple providers at once (if a model_name is defined but no
+ # provider). may not be a problem? may be useful one day?
+ success = False
+ if task == "pull" and hasattr(client, "pull_model"):
+ success = client.pull_model(model_name)
+
+ elif task == "delete" and hasattr(client, "delete_model"):
+ success = client.delete_model(model_name)
+
+ if success or task == "refresh":
+ # refresh models after pulling/deleting, or when asked to
+ if available_models is None:
+ available_models = {}
+
+ for model in client.list_models():
+ model = client.build_model_entry(model)
+ available_models[model["id"]] = model
+
+ self.log.debug(f"{self.__class__.__name__}: ran task '{task}' (model name: {model_name or 'N/A'})")
+
+ elif success is None:
+ self.log.warning(f"{self.__class__.__name__}: task '{task}' unknown or not supported by client")
+ else:
+ self.log.warning(f"{self.__class__.__name__}: task '{task}' failed for model {model_name}")
+
+ if available_models is not None:
+ enabled_and_available = set(available_models.keys()) & set(self.config.get("llm.enabled_models", []))
+ self.config.set("llm.available_models", available_models)
+ self.config.set("llm.enabled_models", list(enabled_and_available))
+
+ self.job.finish()
diff --git a/backend/workers/refresh_items.py b/backend/workers/refresh_items.py
index 8a56c213f..7ab11645d 100644
--- a/backend/workers/refresh_items.py
+++ b/backend/workers/refresh_items.py
@@ -1,78 +1,26 @@
"""
Refresh items
"""
-import json
-
-import requests
-
from backend.lib.worker import BasicWorker
class ItemUpdater(BasicWorker):
"""
Refresh 4CAT items
- Refreshes settings that are dependent on external factors
+ Refreshes settings that are dependent on external factors.
+ LLM model refreshing is handled by the OllamaManager worker.
"""
type = "refresh-items"
max_workers = 1
- @classmethod
- def ensure_job(cls, config=None):
- """
- Ensure that the refresher is always running
-
- This is used to ensure that the refresher is always running, and if it is
- not, it will be started by the WorkerManager.
-
- :return: Job parameters for the worker
- """
- return {"remote_id": "refresh-items", "interval": 60}
+ # ensure_job is intentionally disabled: this worker currently does nothing
+ # and would only create unnecessary job queue churn. Re-enable when work()
+ # has actual tasks to perform.
+ # @classmethod
+ # def ensure_job(cls, config=None):
+ # return {"remote_id": "refresh-items", "interval": 60}
def work(self):
- # Refresh items
- self.refresh_settings()
-
+ # Placeholder – no tasks implemented yet.
self.job.finish()
-
- def refresh_settings(self):
- """
- Refresh settings
- """
- # LLM server settings
- llm_provider = self.config.get("llm.provider_type", "none").lower()
- llm_server = self.config.get("llm.server", "")
-
- # For now we only support the Ollama API
- if llm_provider == "ollama" and llm_server:
- headers = {"Content-Type": "application/json"}
- llm_api_key = self.config.get("llm.api_key", "")
- llm_auth_type = self.config.get("llm.auth_type", "")
- if llm_api_key and llm_auth_type:
- headers[llm_auth_type] = llm_api_key
-
- available_models = {}
- try:
- response = requests.get(f"{llm_server}/api/tags", headers=headers, timeout=10)
- if response.status_code == 200:
- settings = response.json()
- for model in settings.get("models", []):
- model = model["name"]
- try:
- model_metadata = requests.post(f"{llm_server}/api/show", headers=headers, json={"model": model}, timeout=10).json()
- available_models[model] = {
- "name": f"{model_metadata['model_info'].get('general.basename', model)} ({model_metadata['details']['parameter_size']} parameters)",
- "model_card": f"https://ollama.com/library/{model}",
- "provider": "local"
- }
-
- except (requests.RequestException, json.JSONDecodeError, KeyError) as e:
- self.log.debug(f"Could not get metadata for model {model} from Ollama - skipping (error: {e})")
-
- self.config.set("llm.available_models", available_models)
- self.log.debug("Refreshed LLM server settings cache")
- else:
- self.log.warning(f"Could not refresh LLM server settings cache - server returned status code {response.status_code}")
-
- except requests.RequestException as e:
- self.log.warning(f"Could not refresh LLM server settings cache - request error: {str(e)}")
\ No newline at end of file
diff --git a/common/assets/llms.json b/common/assets/llms.json
index 835dbaa09..c17351488 100644
--- a/common/assets/llms.json
+++ b/common/assets/llms.json
@@ -1,128 +1,140 @@
-{
- "none": {
- "name": "",
- "model_card": "",
- "provider": "",
- "default": true
- },
- "custom": {
- "name": "[custom]",
- "model_card": "",
- "provider": ""
- },
- "gpt-5.4": {
- "name": "[OpenAI] GPT-5.4",
- "model_card": "https://platform.openai.com/docs/models/gpt-5.4",
- "provider": "openai"
- },
- "gpt-5-mini": {
- "name": "[OpenAI] GPT-5 mini",
- "model_card": "https://platform.openai.com/docs/models/gpt-5-mini",
- "provider": "openai"
- },
- "gpt-5-nano": {
- "name": "[OpenAI] GPT-5 nano",
- "model_card": "https://platform.openai.com/docs/models/gpt-5-nano",
- "provider": "openai"
- },
- "gpt-5.4-pro": {
- "name": "[OpenAI] GPT-5.4 Pro",
- "model_card": "https://platform.openai.com/docs/models/gpt-5.4-pro",
- "provider": "openai"
- },
- "gpt-4.1-mini": {
- "name": "[OpenAI] GPT-4.1 mini",
- "model_card": "https://platform.openai.com/docs/models/gpt-4.1-mini",
- "provider": "openai"
- },
- "gpt-4.1-nano": {
- "name": "[OpenAI] GPT-4.1 nano",
- "model_card": "https://platform.openai.com/docs/models/gpt-4.1-nano",
- "provider": "openai"
- },
- "gpt-4.1": {
- "name": "[OpenAI] GPT-4.1",
- "model_card": "https://platform.openai.com/docs/models/gpt-4.1",
- "provider": "openai"
- },
- "gpt-4o-mini": {
- "name": "[OpenAI] GPT-4o mini",
- "model_card": "https://platform.openai.com/docs/models/gpt-4o-mini",
- "provider": "openai"
- },
- "gpt-4o": {
- "name": "[OpenAI] GPT-4o",
- "model_card": "https://platform.openai.com/docs/models/gpt-4o",
- "provider": "openai"
- },
- "gemini-3.1-pro-preview": {
- "name": "[Google] Gemini 3.1 Pro",
- "model_card": "https://docs.cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/3-1-pro",
- "provider": "google"
- },
- "gemini-3-flash-preview": {
- "name": "[Google] Gemini 3 Flash",
- "model_card": "https://cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/3-flash",
- "provider": "google"
- },
- "gemini-3.1-flash-lite-preview": {
- "name": "[Google] Gemini 3.1 Flash Lite",
- "provider": "google",
- "model_card": "https://docs.cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/3-1-flash-lite"
- },
- "claude-opus-4-6": {
- "name": "[Anthropic] Claude Opus 4.6 (latest)",
- "model_card": "https://www.anthropic.com/claude/opus",
- "provider": "anthropic"
- },
- "claude-sonnet-4-6": {
- "name": "[Anthropic] Claude Sonnet 4.6 (latest)",
- "model_card": "https://www.anthropic.com/claude/sonnet",
- "provider": "anthropic"
- },
- "claude-4-5-haiku": {
- "name": "[Anthropic] Claude 4.5 Haiku (latest)",
- "model_card": "https://www.anthropic.com/claude/haiku",
- "provider": "anthropic"
- },
- "magistral-small-2509": {
- "name": "[Mistral] Magistral Small 1.2 (25.09)",
- "model_card": "https://docs.mistral.ai/models/magistral-small-1-2-25-09",
- "provider": "mistral"
- },
- "magistral-medium-2509": {
- "name": "[Mistral] Magistral Medium 1.2 (25.09)",
- "model_card": "https://docs.mistral.ai/models/magistral-medium-1-2-25-09",
- "provider": "mistral"
- },
- "mistral-small-2506": {
- "name": "[Mistral] Mistral Small 3.2 (25.06)",
- "model_card": "https://docs.mistral.ai/models/mistral-small-3-2-25-06",
- "provider": "mistral"
- },
- "mistral-medium-2508": {
- "name": "[Mistral] Mistral Medium 3.1 (25.08)",
- "model_card": "mistral-medium-2508",
- "provider": "mistral"
- },
- "mistral-large-2512": {
- "name": "[Mistral] Mistral Large 3 (25.12)",
- "model_card": "https://docs.mistral.ai/models/mistral-large-3-25-12",
- "provider": "mistral"
- },
- "open-mistral-nemo-2407": {
- "name": "[Mistral] Mistral Nemo 12B",
- "model_card": "https://docs.mistral.ai/models/mistral-nemo-12b-24-07",
- "provider": "mistral"
- },
- "deepseek-chat": {
- "name": "[DeepSeek] DeepSeek latest (non-reasoning)",
- "model_card": "https://api-docs.deepseek.com/quick_start/pricing",
- "provider": "deepseek"
- },
- "deepseek-reasoner": {
- "name": "[DeepSeek] DeepSeek latest (reasoning)",
- "model_card": "https://api-docs.deepseek.com/quick_start/pricing",
- "provider": "deepseek"
- }
-}
\ No newline at end of file
+[
+ {
+ "model": "gpt-5.4",
+ "name": "[OpenAI] GPT-5.4",
+ "model_card": "https://platform.openai.com/docs/models/gpt-5.4",
+ "provider": "openai"
+ },
+ {
+ "model": "gpt-5-mini",
+ "name": "[OpenAI] GPT-5 mini",
+ "model_card": "https://platform.openai.com/docs/models/gpt-5-mini",
+ "provider": "openai"
+ },
+ {
+ "model": "gpt-5-nano",
+ "name": "[OpenAI] GPT-5 nano",
+ "model_card": "https://platform.openai.com/docs/models/gpt-5-nano",
+ "provider": "openai"
+ },
+ {
+ "model": "gpt-5.4-pro",
+ "name": "[OpenAI] GPT-5.4 Pro",
+ "model_card": "https://platform.openai.com/docs/models/gpt-5.4-pro",
+ "provider": "openai"
+ },
+ {
+ "model": "gpt-4.1-mini",
+ "name": "[OpenAI] GPT-4.1 mini",
+ "model_card": "https://platform.openai.com/docs/models/gpt-4.1-mini",
+ "provider": "openai"
+ },
+ {
+ "model": "gpt-4.1-nano",
+ "name": "[OpenAI] GPT-4.1 nano",
+ "model_card": "https://platform.openai.com/docs/models/gpt-4.1-nano",
+ "provider": "openai"
+ },
+ {
+ "model": "gpt-4.1",
+ "name": "[OpenAI] GPT-4.1",
+ "model_card": "https://platform.openai.com/docs/models/gpt-4.1",
+ "provider": "openai"
+ },
+ {
+ "model": "gpt-4o-mini",
+ "name": "[OpenAI] GPT-4o mini",
+ "model_card": "https://platform.openai.com/docs/models/gpt-4o-mini",
+ "provider": "openai"
+ },
+ {
+ "model": "gpt-4o",
+ "name": "[OpenAI] GPT-4o",
+ "model_card": "https://platform.openai.com/docs/models/gpt-4o",
+ "provider": "openai"
+ },
+ {
+ "model": "gemini-3.1-pro-preview",
+ "name": "[Google] Gemini 3.1 Pro",
+ "model_card": "https://docs.cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/3-1-pro",
+ "provider": "google"
+ },
+ {
+ "model": "gemini-3-flash-preview",
+ "name": "[Google] Gemini 3 Flash",
+ "model_card": "https://cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/3-flash",
+ "provider": "google"
+ },
+ {
+ "model": "gemini-3.1-flash-lite-preview",
+ "name": "[Google] Gemini 3.1 Flash Lite",
+ "provider": "google",
+ "model_card": "https://docs.cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/3-1-flash-lite"
+ },
+ {
+ "model": "claude-opus-4-6",
+ "name": "[Anthropic] Claude Opus 4.6 (latest)",
+ "model_card": "https://www.anthropic.com/claude/opus",
+ "provider": "anthropic"
+ },
+ {
+ "model": "claude-sonnet-4-6",
+ "name": "[Anthropic] Claude Sonnet 4.6 (latest)",
+ "model_card": "https://www.anthropic.com/claude/sonnet",
+ "provider": "anthropic"
+ },
+ {
+ "model": "claude-4-5-haiku",
+ "name": "[Anthropic] Claude 4.5 Haiku (latest)",
+ "model_card": "https://www.anthropic.com/claude/haiku",
+ "provider": "anthropic"
+ },
+ {
+ "model": "magistral-small-2509",
+ "name": "[Mistral] Magistral Small 1.2 (25.09)",
+ "model_card": "https://docs.mistral.ai/models/magistral-small-1-2-25-09",
+ "provider": "mistral"
+ },
+ {
+ "model": "magistral-medium-2509",
+ "name": "[Mistral] Magistral Medium 1.2 (25.09)",
+ "model_card": "https://docs.mistral.ai/models/magistral-medium-1-2-25-09",
+ "provider": "mistral"
+ },
+ {
+ "model": "mistral-small-2506",
+ "name": "[Mistral] Mistral Small 3.2 (25.06)",
+ "model_card": "https://docs.mistral.ai/models/mistral-small-3-2-25-06",
+ "provider": "mistral"
+ },
+ {
+ "model": "mistral-medium-2508",
+ "name": "[Mistral] Mistral Medium 3.1 (25.08)",
+ "model_card": "mistral-medium-2508",
+ "provider": "mistral"
+ },
+ {
+ "model": "mistral-large-2512",
+ "name": "[Mistral] Mistral Large 3 (25.12)",
+ "model_card": "https://docs.mistral.ai/models/mistral-large-3-25-12",
+ "provider": "mistral"
+ },
+ {
+ "model": "open-mistral-nemo-2407",
+ "name": "[Mistral] Mistral Nemo 12B",
+ "model_card": "https://docs.mistral.ai/models/mistral-nemo-12b-24-07",
+ "provider": "mistral"
+ },
+ {
+ "model": "deepseek-chat",
+ "name": "[DeepSeek] DeepSeek latest (non-reasoning)",
+ "model_card": "https://api-docs.deepseek.com/quick_start/pricing",
+ "provider": "deepseek"
+ },
+ {
+ "model": "deepseek-reasoner",
+ "name": "[DeepSeek] DeepSeek latest (reasoning)",
+ "model_card": "https://api-docs.deepseek.com/quick_start/pricing",
+ "provider": "deepseek"
+ }
+]
\ No newline at end of file
diff --git a/common/lib/config_definition.py b/common/lib/config_definition.py
index aef363e04..78cd1b7dc 100644
--- a/common/lib/config_definition.py
+++ b/common/lib/config_definition.py
@@ -575,53 +575,77 @@
# allows 4CAT LLM processors to connect to a local or remote LLM server
"llm.intro": {
"type": UserInput.OPTION_INFO,
- "help": "4CAT LLM processors allow users to utilize common APIs (e.g. OpenAI, Google, Anthropic) as well as connect "
- "to local or remote LLM servers. You can also set up your own LLM server using open source software such as "
- "[Ollama](https://ollama.com/) and connect 4CAT to it using the settings below for your users."
- },
- "llm.host_name": {
- "type": UserInput.OPTION_TEXT,
- "default": "4CAT LLM Server",
- "help": "Name of LLM Server in UI",
- "tooltip": "The name that will be shown to users in the interface when selecting an LLM server (or API or custom).",
- "global": True
- },
- "llm.provider_type": {
- "type": UserInput.OPTION_CHOICE,
- "help": "LLM Provider Type",
- "default": "none",
- "options": {
- "ollama": "Ollama",
- "none": "None",
- },
- "global": True,
- },
- "llm.server": {
- "type": UserInput.OPTION_TEXT,
- "default": "",
- "help": "LLM Server URL",
- "tooltip": "The URL of the LLM server, e.g. http://localhost:5000",
- "global": True
- },
- "llm.auth_type": {
- "type": UserInput.OPTION_TEXT,
- "help": "LLM Server Authentication Type",
- "default": "",
- "tooltip": "The authentication type required to connect to the server (e.g. 'X-API-KEY', 'Authorization'). Passed in the request header with the API key.",
+ "help": "4CAT LLM processors allow users to utilize common APIs (e.g. OpenAI, Google, Anthropic) as well as "
+ "connect to local or remote LLM servers. You can also set up your own LLM server using open source "
+ "software such as [Ollama](https://ollama.com/) and connect 4CAT to it using the settings below for "
+ "your users. After configuring providers you can enable and disable available models via the 'LLMs & "
+ "Providers' page in the Control Panel."
+ },
+ "llm.providers": {
+ "type": UserInput.OPTION_MULTI_OPTION,
+ "default": [
+ {
+ "name": "Third-party APIs (OpenAI, Google, Claude, Mistral, etc)",
+ "type": "api",
+ "url": "",
+ "auth_header": "",
+ "auth_key": ""
+ }
+ ],
"global": True,
- },
- "llm.api_key": {
- "type": UserInput.OPTION_TEXT,
- "default": "",
- "help": "LLM Server API Key",
- "tooltip": "The API key to access the LLM server, if required.",
- "global": True
+ "help": "LLM providers",
+ "options": {
+ "name": {
+ "type": UserInput.OPTION_TEXT,
+ "default": "",
+ "help": "Name of LLM Server in UI",
+ "tooltip": "The name that will be shown to users in the interface when selecting an LLM server (or API or custom).",
+ },
+ "type": {
+ "type": UserInput.OPTION_CHOICE,
+ "help": "LLM Provider Type",
+ "default": "none",
+ "options": {
+ "ollama": "Ollama",
+ "litellm": "LiteLLM",
+ "openai-like": "OpenAI compatible API (LM Studio, vLLM, etc)",
+ "api": "Third-party models from OpenAI, Anthropic, Mistral, etc",
+ "none": "None",
+ },
+ },
+ "url": {
+ "type": UserInput.OPTION_TEXT,
+ "default": "",
+ "help": "LLM Server URL",
+ "tooltip": "The URL of the LLM server, e.g. http://localhost:5000",
+ },
+ "auth_header": {
+ "type": UserInput.OPTION_TEXT,
+ "help": "Authentication Header",
+ "default": "",
+ "tooltip": "The HTTP header used to authenticate with the server (e.g. 'X-API-KEY', 'Authorization'). Passed with the Authentication Key as value.",
+ },
+ "auth_key": {
+ "type": UserInput.OPTION_TEXT,
+ "default": "",
+ "help": "Authentication Key",
+ "tooltip": "The API key to access the LLM server, if required.",
+ },
+ }
},
"llm.available_models": {
"type": UserInput.OPTION_TEXT_JSON,
"default": {},
"help": "Available LLM models",
- "tooltip": "A JSON dictionary of available LLM models on the server. 4CAT will query the LLM server for available models periodically.",
+ "tooltip": "A JSON dictionary of available LLM models on the server. Refreshed daily by the OllamaManager worker.",
+ "indirect": True,
+ "global": True
+ },
+ "llm.enabled_models": {
+ "type": UserInput.OPTION_TEXT_JSON,
+ "default": [],
+ "help": "Enabled LLM models",
+ "tooltip": "List of model keys enabled for use. Managed via the LLM Server settings panel.",
"indirect": True,
"global": True
},
@@ -739,5 +763,5 @@
"proxies": "Proxied HTTP requests",
"image-visuals": "Image visualization",
"extensions": "Extensions",
- "llm": "LLM Server Settings"
+ "llm": "LLM Providers"
}
diff --git a/common/lib/llm/__init__.py b/common/lib/llm/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/common/lib/llm.py b/common/lib/llm/adapter.py
similarity index 57%
rename from common/lib/llm.py
rename to common/lib/llm/adapter.py
index 0901194d1..9fe80eb49 100644
--- a/common/lib/llm.py
+++ b/common/lib/llm/adapter.py
@@ -1,9 +1,10 @@
import json
import base64
import mimetypes
-import requests
+
from pathlib import Path
from typing import List, Optional, Union
+
from pydantic import SecretStr
from langchain_core.messages import BaseMessage, HumanMessage, SystemMessage
from langchain_core.language_models.chat_models import BaseChatModel
@@ -18,111 +19,86 @@
class LLMAdapter:
def __init__(
self,
- provider: str,
- model: str,
+ config,
+ model,
api_key: Optional[str] = None,
- base_url: Optional[str] = None,
temperature: float = 0.1,
max_tokens: int = 1000,
client_kwargs: Optional[dict] = None,
):
"""
- provider: 'openai', 'google', 'mistral', 'ollama', 'lmstudio', 'anthropic', 'deepseek'
- model: model name (e.g., 'gpt-4o-mini', 'claude-3-opus', 'mistral-small', etc.)
- api_key: API key if required (OpenAI, Claude, Google, Mistral)
- base_url: for local models or Mistral custom endpoints
- temperature: temperature hyperparameter,
- max_tokens: how many output tokens may be used
- client_kwargs: additional client parameters
+ Instantiate an adapter to interface with an LLM model
+
+ :param config: 4CAT config reader
+ :param model: Model metadata (as in `llm.available_models` 4CAT setting)
+ :param api_key: API key, if needed
+ :param temperature: Temperature hyperparameter
+ :param max_tokens: Max tokens to generate
+ :param client_kwargs: Optional parameters for the LLM adapter class
"""
- self.provider = provider.lower()
+ known_providers = {p['url']: p for p in config.get("llm.providers")}
+
self.model = model
+ self.provider = known_providers.get(model['provider'])
self.api_key = api_key
- self.base_url = base_url
self.temperature = temperature
self.structured_output = False
self.parser = None
self.max_tokens = max_tokens
self.client_kwargs = dict(client_kwargs) if client_kwargs else {}
+
self.llm: BaseChatModel = self._load_llm()
def _load_llm(self) -> BaseChatModel:
- if self.provider == "openai":
- kwargs = {}
- if "o3" not in self.model:
- kwargs["temperature"] = self.temperature # temperature not supported for all models
- return ChatOpenAI(
- model=self.model,
- api_key=SecretStr(self.api_key),
- base_url=self.base_url or "https://api.openai.com/v1",
- max_tokens=self.max_tokens,
- **kwargs
- )
- elif self.provider == "google":
- return ChatGoogleGenerativeAI(
- model=self.model,
- temperature=self.temperature,
- google_api_key=self.api_key,
- max_tokens=self.max_tokens
- )
- elif self.provider == "anthropic":
- return ChatAnthropic(
- model_name=self.model,
- temperature=self.temperature,
- api_key=SecretStr(self.api_key),
- max_tokens=self.max_tokens,
- timeout=100,
- stop=None
- )
- elif self.provider == "mistral":
- return ChatMistralAI(
- model_name=self.model,
- temperature=self.temperature,
- api_key=SecretStr(self.api_key),
- base_url=self.base_url, # Optional override
- max_tokens=self.max_tokens,
- )
- elif self.provider == "deepseek":
- return ChatDeepSeek(
- model=self.model,
- temperature=self.temperature,
- api_key=SecretStr(self.api_key),
- base_url=self.base_url,
- max_tokens=self.max_tokens if self.max_tokens <= 8192 else 8192,
- )
- elif self.provider == "ollama":
- ollama_adapter = ChatOllama(
- model=self.model,
- temperature=self.temperature,
- base_url=self.base_url or "http://localhost:11434",
- max_tokens=self.max_tokens,
- client_kwargs=self.client_kwargs
- )
- self.model = ollama_adapter.model
- return ollama_adapter
- elif self.provider in {"vllm", "lmstudio"}:
- # OpenAI-compatible local servers
- if self.provider == "lmstudio" and not self.api_key:
- self.api_key = "lm-studio"
-
- # For vLLM, query the server to get the actual model name. We can't leave this empty, unfortunately.
- if self.provider == "vllm" and self.model=="vllm_model":
- model_name = self.get_vllm_model_name(self.base_url, self.api_key)
- self.model = model_name
- else:
- model_name = self.model if self.model else "lmstudio-model"
-
- llm = ChatOpenAI(
- model=model_name,
- temperature=self.temperature,
- api_key=SecretStr(self.api_key),
- base_url=self.base_url,
- max_tokens=self.max_tokens,
- )
- self.model = llm.model_name
- return llm
+ chat_params = {
+ "model": self.model["local_id"],
+ "api_key": SecretStr(self.api_key),
+ "base_url": self.provider["url"],
+ "max_tokens": self.max_tokens,
+ "temperature": self.temperature,
+ }
+
+ if self.provider["type"] == "openai":
+ if "o3" in self.model:
+ del chat_params["temperature"]
+ adapter_class = ChatOpenAI
+
+ elif self.provider["type"] == "google":
+ adapter_class = ChatGoogleGenerativeAI
+
+ elif self.provider["type"] == "anthropic":
+ chat_params.update({"timeout": 100, "stop": None})
+ adapter_class = ChatAnthropic
+
+ elif self.provider["type"] == "mistral":
+ adapter_class = ChatMistralAI
+
+ elif self.provider["type"] == "deepseek":
+ chat_params["max_tokens"] = min(self.max_tokens, 8192)
+ adapter_class = ChatDeepSeek
+
+ elif self.provider["type"] == "ollama":
+ adapter_class = ChatOllama
+ chat_params.update({"client_kwargs": self.client_kwargs})
+
+ elif self.provider["type"] in {"litellm", "openai-like"}:
+ url = f"{self.provider['url']}/" if not self.provider["url"].endswith("/") else self.provider['url']
+ url += "v1/" if not url.endswith("v1/") else ""
+
+ chat_params.update({"base_url": url})
+ if self.provider["auth_header"]:
+ chat_params.update({
+ "default_headers": {
+ self.provider["auth_header"]: self.provider["auth_key"]
+ }
+ })
+
+ adapter_class = ChatOpenAI
+
else:
- raise ValueError(f"Unsupported LLM provider: {self.provider}")
+ raise ValueError(f"{self.__class__.__name__} Unsupported LLM provider type: {self.provider['type']}")
+
+ return adapter_class(**chat_params)
def generate_text(
self,
@@ -161,7 +137,8 @@ def generate_text(
lc_messages = messages
kwargs = {"temperature": temperature}
- if self.provider in ("google", "ollama") or "o3" in self.model or "gpt-5" in self.model:
+ if self.provider["type"] in ("google", "ollama") or "o3" in self.model["local_id"] or "gpt-5" in self.model[
+ "local_id"]:
kwargs = {}
try:
@@ -172,10 +149,10 @@ def generate_text(
return response
def create_multimodal_content(
- self,
- text: str,
- media_urls: Optional[List[str]] = None,
- media_files: Optional[List[Union[str, Path]]] = None,
+ self,
+ text: str,
+ media_urls: Optional[List[str]] = None,
+ media_files: Optional[List[Union[str, Path]]] = None,
) -> List[dict]:
"""
Create multimodal content structure for LangChain messages with media URLs
@@ -224,11 +201,11 @@ def create_multimodal_content(
return content
def _format_media_block(
- self,
- url: Optional[str] = None,
- b64_data: Optional[str] = None,
- mime_type: str = "image/jpeg",
- media_category: str = "image",
+ self,
+ url: Optional[str] = None,
+ b64_data: Optional[str] = None,
+ mime_type: str = "image/jpeg",
+ media_category: str = "image",
) -> dict:
"""
Format a single media block for the appropriate provider.
@@ -304,31 +281,6 @@ def set_structure(self, json_schema):
self.llm = self.llm.with_structured_output(json_schema)
self.structured_output = True
- @staticmethod
- def get_model_options(config) -> dict:
- """
- Returns model choice options for UserInput
- """
- models = LLMAdapter.get_models(config)
- if not models:
- return {}
- options = {model_id: model_values["name"] for model_id, model_values in models.items()}
- return options
-
- @staticmethod
- def get_model_providers(config) -> dict:
- """
- Returns available model providers through APIs
- """
- models = LLMAdapter.get_models(config)
- if not models:
- return {}
- providers = list(set([model_values.get("provider", "") for model_values in models.values()]))
- if not providers:
- return {}
- options = {provider: provider.capitalize() for provider in providers if provider}
- return options
-
@staticmethod
def get_models(config) -> dict:
"""
@@ -337,36 +289,6 @@ def get_models(config) -> dict:
:returns dict, A dict with model IDs as keys and details as values
"""
- with (
- config.get("PATH_ROOT")
- .joinpath("common/assets/llms.json")
- .open() as available_models
- ):
- available_models = json.loads(available_models.read())
- return available_models
-
-
- @staticmethod
- def get_vllm_model_name(base_url: str, api_key: str = None) -> str:
- """
- Query vLLM server to get the name of the served model.
- """
-
- try:
- # vLLM exposes available models at /v1/models endpoint
- models_url = f"{base_url.rstrip('/')}/models"
- headers = {}
- if api_key:
- headers["Authorization"] = f"Bearer {api_key}"
-
- response = requests.get(models_url, headers=headers, timeout=10)
- response.raise_for_status()
- models_data = response.json()
-
- # Get the first available model
- if models_data.get("data") and len(models_data["data"]) > 0:
- return models_data["data"][0]["id"]
- else:
- raise ValueError("No models found on vLLM server")
- except Exception as e:
- raise ValueError(f"Could not retrieve model name from vLLM server: {e}")
+ available_models = config.get("llm.available_models", {})
+ enabled_models = config.get("llm.enabled_models", {})
+ return {k: v for k, v in available_models.items() if k in enabled_models}
diff --git a/common/lib/llm/clients/__init__.py b/common/lib/llm/clients/__init__.py
new file mode 100644
index 000000000..4287ca861
--- /dev/null
+++ b/common/lib/llm/clients/__init__.py
@@ -0,0 +1 @@
+#
\ No newline at end of file
diff --git a/common/lib/llm/clients/litellm_client.py b/common/lib/llm/clients/litellm_client.py
new file mode 100644
index 000000000..cf65497ff
--- /dev/null
+++ b/common/lib/llm/clients/litellm_client.py
@@ -0,0 +1,60 @@
+"""
+Centralized HTTP client for communicating with a LiteLLM server.
+
+This class owns all direct HTTP calls to LiteLLM's REST API and provides shared
+static helpers for capability parsing, display-name formatting, and building
+canonical llm.available_models entries. It is a plain helper with no 4CAT
+base-class dependency.
+
+This class is primarily intended for interfacing with LiteLLM, but since
+LiteLLM itself is mostly OpenAI API-compatible, this can be used to interface
+with the OpenAI API as well.
+"""
+from common.lib.llm.llm_client import LLMProviderClient
+
+class LiteLLMClient(LLMProviderClient):
+ type = "litellm"
+
+ _models_info_path = "/model/info"
+ _models_info_key = "data"
+ _model_id_key = "model_name"
+
+ def parse_supported_media_types(self, meta: dict) -> list[str]:
+ """
+ Derive the media types a model supports from its LiteLLM metadata.
+
+ :param meta: ``model info`` response dict, or ``None``.
+ :returns: Ordered list of supported media type strings.
+ Returns ``[]`` when ``meta`` is ``None``
+ """
+ if meta is None or not meta.get("model_info"):
+ return []
+
+ media_types = {"text"} # far as I can tell, text is always supported
+ if meta["model_info"].get("supports_vision"):
+ media_types.add("image")
+
+ if meta["model_info"].get("supports_audio_input"):
+ media_types.add("audio")
+
+ # no way to tell if model supports embeddings input as far as I can see...
+
+ return list(media_types)
+
+ def format_display_name(self, meta: dict) -> str:
+ """
+ Build a human-readable display name for a model.
+
+ :param model_id: Raw Ollama model identifier (e.g. ``"llama3:8b"``).
+ :param meta: ``/api/show`` response dict, or ``None``.
+ :returns: Human-readable display name string.
+ """
+ model_name = self.get_global_model_id(meta)
+
+ if meta.get("model_name"):
+ model_name = meta["model_name"]
+
+ if meta["litellm_params"].get("model"):
+ model_name = "/".join(meta["litellm_params"].get("model").split("/")[1:])
+
+ return model_name
\ No newline at end of file
diff --git a/common/lib/llm/clients/ollama_client.py b/common/lib/llm/clients/ollama_client.py
new file mode 100644
index 000000000..e21297448
--- /dev/null
+++ b/common/lib/llm/clients/ollama_client.py
@@ -0,0 +1,182 @@
+"""
+Centralized HTTP client for communicating with an Ollama server.
+
+This class owns all direct HTTP calls to Ollama's REST API and provides shared static
+helpers for capability parsing, display-name formatting, and building canonical
+llm.available_models entries. It is a plain helper with no 4CAT base-class dependency.
+"""
+import requests
+
+from common.lib.llm.llm_client import LLMProviderClient
+
+
+class OllamaClient(LLMProviderClient):
+ type = "ollama"
+
+ _models_info_path = "/api/tags"
+ _models_info_key = "models"
+ _model_id_key = "model"
+
+ def list_models(self) -> list[dict]:
+ """
+ List all models available.
+
+ For Ollama, get some additional model info via an extra API request.
+
+ :return list[dict]: List of models available.:
+ """
+ models = super().list_models()
+ result = []
+ for model in models:
+ try:
+ model_info = self._session.post(
+ f"{self.base_url}/api/show",
+ json={"model": model[self._model_id_key]},
+ headers=self._headers,
+ timeout=self.timeout,
+ ).json()
+ result.append({**model, "metadata": model_info})
+ except (requests.exceptions.HTTPError, KeyError) as e:
+ self.log.warning(
+ f"{self.__class__.__name__}: failed to fetch additional model info for model {model[self._model_id_key]}: {e}")
+
+ return result
+
+
+
+ def parse_supported_media_types(self, meta: dict) -> list[str]:
+ """Derive the media types a model supports from its Ollama metadata.
+
+ **Primary path**: reads ``meta["capabilities"]``:
+ - ``"completion"`` → ``"text"``
+ - ``"vision"`` → ``"image"``
+ - ``"embedding"`` → ``"embedding"``
+
+ **Fallback path** (used when capabilities are absent or only yield ``"text"``):
+ inspects GGUF ``model_info`` / ``details`` for vision signals and adds
+ ``"image"`` if any are found.
+
+ :param meta: ``/api/show`` response dict, or ``None``.
+ :returns: Ordered list of supported media type strings.
+ Returns ``[]`` when ``meta`` is ``None`` (unknown — callers
+ should include the model, not block it).
+ """
+ if meta is None or not meta.get("metadata"):
+ return []
+
+ capabilities = meta["metadata"].get("capabilities", [])
+ media_types: list[str] = []
+
+ _cap_map = {
+ "completion": "text",
+ "vision": "image",
+ "embedding": "embedding",
+ }
+ for cap in capabilities:
+ mapped = _cap_map.get(cap)
+ if mapped and mapped not in media_types:
+ media_types.append(mapped)
+
+ # Fallback: GGUF-level vision signals when capabilities list gives no image info
+ if "image" not in media_types:
+ details = meta.get("details", {})
+ model_info = meta.get("model_info", {})
+ projector_info = meta.get("projector_info")
+
+ has_clip_family = "clip" in (details.get("families") or [])
+ has_vision_keys = any(k.startswith("vision.") for k in model_info)
+ has_projector = bool(projector_info)
+
+ if has_clip_family or has_vision_keys or has_projector:
+ media_types.append("image")
+
+ return media_types
+
+ def format_display_name(self, meta: dict) -> str:
+ """
+ Build a human-readable display name for a model.
+
+ :param model_id: Raw Ollama model identifier (e.g. ``"llama3:8b"``).
+ :param meta: ``/api/show`` response dict, or ``None``.
+ :returns: Human-readable display name string.
+ """
+ model_name = self.get_model_id(meta)
+
+ extra_bits = []
+ if meta.get("metadata") and meta["metadata"].get("model_info"):
+ more_meta = meta["metadata"]["model_info"]
+ if more_meta.get("general.basename"):
+ model_name = more_meta["general.basename"]
+
+ if more_meta.get("general.finetune"):
+ extra_bits.append(more_meta["general.finetune"])
+
+ if more_meta.get("general.size_label"):
+ extra_bits.append(more_meta["general.size_label"])
+
+ elif meta.get("details") and meta["details"].get("parameter_size"):
+ extra_bits.append(f"{meta['details']['parameter_size']} parameters")
+
+ model_name += f" ({', '.join(extra_bits)})"
+
+ return model_name
+
+ def get_model_card_url(self, meta: dict) -> str:
+ """
+ Get a URL for a model card for a given model
+
+ :param meta: Model metadata
+ :return str: Model card URL (empty string if unavailable)
+ """
+ return f"https://ollama.com/library/{meta['model']}"
+
+ def pull_model(self, model_id: str, stream: bool = False) -> bool:
+ """Pull a model from the Ollama registry.
+
+ :param model_id: Model name (e.g. ``"llama3:8b"``).
+ :param stream: Whether to stream the response (default ``False``).
+ :returns: ``True`` on success, ``False`` on failure.
+ """
+ try:
+ r = self._session.post(
+ f"{self.base_url}/api/pull",
+ headers=self._headers,
+ json={"model": model_id, "stream": stream},
+ timeout=600,
+ )
+
+ if r.status_code != 200 and self.log:
+ self.log.warning(
+ f"{self.__class__.__name__}: failed to pull model {model_id} from {self.base_url}, status code {r.status_code}: {r.text}")
+
+ return r.status_code == 200
+
+ except requests.RequestException as e:
+ if self.log:
+ self.log.warning(
+ f"{self.__class__.__name__}: failed to pull model {model_id} from {self.base_url}: {e}")
+
+ return False
+
+ def delete_model(self, model_id: str) -> bool:
+ """Delete a model from the Ollama server.
+
+ :param model_id: Model name (e.g. ``"llama3:8b"``).
+ :returns: ``True`` on success, ``False`` on failure.
+ """
+ try:
+ r = self._session.delete(
+ f"{self.base_url}/api/delete",
+ headers=self._headers,
+ json={"model": model_id},
+ timeout=30,
+ )
+ if r.status_code != 200 and self.log:
+ self.log.warning(
+ f"{self.__class__.__name__}: failed to delete model {model_id} from {self.base_url}, status code {r.status_code}: {r.text}")
+ return r.status_code == 200
+ except requests.RequestException as e:
+ if self.log:
+ self.log.warning(
+ f"{self.__class__.__name__}: failed to delete model {model_id} from {self.base_url}: {e}")
+ return False
diff --git a/common/lib/llm/clients/openai_client.py b/common/lib/llm/clients/openai_client.py
new file mode 100644
index 000000000..f8701dd7c
--- /dev/null
+++ b/common/lib/llm/clients/openai_client.py
@@ -0,0 +1,61 @@
+"""
+Centralized HTTP client for communicating with an OpenAI compatible server.
+
+This class owns all direct HTTP calls to an OpenAI style REST API and provides shared
+static helpers for capability parsing, display-name formatting, and building
+canonical llm.available_models entries. It is a plain helper with no 4CAT
+base-class dependency.
+"""
+from common.lib.llm.llm_client import LLMProviderClient
+
+
+class LMStudioClient(LLMProviderClient):
+ type = "openai-like"
+
+ _models_info_path = "/api/v1/models"
+ _models_info_key = "models"
+ _model_id_key = "key"
+
+ def parse_supported_media_types(self, meta: dict) -> list[str]:
+ """
+ Derive the media types a model supports from its LiteLLM metadata.
+
+ :param meta: ``model info`` response dict, or ``None``.
+ :returns: Ordered list of supported media type strings.
+ Returns ``[]`` when ``meta`` is ``None``
+ """
+ media_types = {"text"} # far as I can tell, text is always supported
+
+ if meta is None or not meta.get("capabilities"):
+ return list(media_types)
+
+ if meta["capabilities"].get("vision"):
+ media_types.add("image")
+
+ # no way to tell if model supports embeddings input as far as I can see...
+
+ return list(media_types)
+
+ def format_display_name(self, meta: dict) -> str:
+ """
+ Build a human-readable display name for a model.
+
+ :param model_id: Raw Ollama model identifier (e.g. ``"llama3:8b"``).
+ :param meta: ``/api/show`` response dict, or ``None``.
+ :returns: Human-readable display name string.
+ """
+ model_name = self.get_model_id(meta)
+
+ if meta.get("display_name"):
+ model_name = meta["display_name"]
+
+ extra_bits = []
+ if meta.get("publisher"):
+ extra_bits.append(meta["publisher"])
+
+ if meta.get("params_string"):
+ extra_bits.append(meta["params_string"])
+
+ model_name += f" ({', '.join(extra_bits)})"
+
+ return model_name
diff --git a/common/lib/llm/clients/thirdparty_client.py b/common/lib/llm/clients/thirdparty_client.py
new file mode 100644
index 000000000..e1df93d45
--- /dev/null
+++ b/common/lib/llm/clients/thirdparty_client.py
@@ -0,0 +1,66 @@
+"""
+Fake 'client' to read from local store of known 3d party, API-based LLMs that
+can be used with 4CAT
+"""
+import json
+
+from common.lib.llm.llm_client import LLMProviderClient
+
+
+class ThirdPartyClient(LLMProviderClient):
+ type = "api"
+
+ _models_info_key = "models"
+ _model_id_key = "model"
+
+ def get_status(self):
+ return 200
+
+ def list_models(self) -> dict:
+ with self.config.get("PATH_ROOT").joinpath("common/assets/llms.json").open() as infile:
+ models = json.load(infile)
+
+ return models
+
+ def parse_supported_media_types(self, meta: dict) -> list[str]:
+ """
+ Derive the media types a model supports from its LiteLLM metadata.
+
+ :param meta: ``model info`` response dict, or ``None``.
+ :returns: Ordered list of supported media type strings.
+ Returns ``[]`` when ``meta`` is ``None``
+ """
+ return meta.get("supported_media_types", ["text"])
+
+ def format_display_name(self, meta: dict) -> str:
+ """
+ Build a human-readable display name for a model.
+
+ :param model_id: Raw Ollama model identifier (e.g. ``"llama3:8b"``).
+ :param meta: ``/api/show`` response dict, or ``None``.
+ :returns: Human-readable display name string.
+ """
+ return meta["name"]
+
+ def build_model_entry(self, meta: dict) -> dict:
+ """
+ Build a canonical ``llm.available_models`` entry for a model.
+
+ :param model_id: Raw model identifier.
+ :param display_name: Human-readable name (from ``format_display_name``).
+ :param meta: ``/api/show`` response dict, or ``None`` if unavailable.
+ :returns: Dict ready to store under ``llm.available_models[model_id]``.
+ """
+ entry = super().build_model_entry(meta)
+ entry["provider"] = meta["provider"]
+
+ return entry
+
+ def get_model_card_url(self, meta: dict) -> str:
+ """
+ Get a URL for a model card for a given model
+
+ :param meta: Model metadata
+ :return str: Model card URL (empty string if unavailable)
+ """
+ return meta["model_card"] if meta["model_card"] else ""
diff --git a/common/lib/llm/llm_client.py b/common/lib/llm/llm_client.py
new file mode 100644
index 000000000..b59d38ab0
--- /dev/null
+++ b/common/lib/llm/llm_client.py
@@ -0,0 +1,196 @@
+"""
+Centralized HTTP client for communicating with an LLM provider.
+
+This class owns all direct HTTP calls to the provider's REST API and provides
+shared static helpers for capability parsing, display-name formatting, and
+building canonical llm.available_models entries. It is a plain helper with no
+4CAT base-class dependency.
+"""
+
+from abc import abstractmethod
+
+import requests
+
+
+class LLMProviderClient:
+ _headers = {}
+ _meta = {}
+
+ @staticmethod
+ def get_client(config, provider_config: dict) -> "LLMProviderClient":
+ """
+ Get a client for an LLM provider
+
+ Returns the appropriate sub-class depending on the provider type.
+
+ :param config: 4CAT config reader
+ :param dict provider_config: Provider parameters, as configured in
+ 4CAT
+ :return LLMProviderClient:
+ """
+ # in-line import because we otherwise get circular import shenanigans
+ from common.lib.llm.clients.ollama_client import OllamaClient
+ from common.lib.llm.clients.litellm_client import LiteLLMClient
+ from common.lib.llm.clients.openai_client import LMStudioClient
+ from common.lib.llm.clients.thirdparty_client import ThirdPartyClient
+
+ for client_type in (OllamaClient, LiteLLMClient, LMStudioClient, ThirdPartyClient):
+ if client_type.type == provider_config["type"]:
+ return client_type(config, provider_config)
+
+ raise ValueError(f"LLMProviderClient: Unknown provider type {provider_config['type']}")
+
+ def __init__(self, config, provider_config: dict, timeout: int = 10, log=None) -> None:
+ """
+ HTTP client for an LLM Provider
+
+ :param dict provider_config: Provider parameters, as configured in 4CAT
+ :param int timeout: Default request timeout in seconds.
+ :param Logger log: 4CAT log handler
+ """
+ self.config = config
+
+ self._meta = provider_config
+
+ self.timeout = timeout
+ self.auth_type = provider_config.get("auth_header")
+ self.auth_key = provider_config.get("auth_key")
+ self.timeout = timeout
+
+ self.base_url = provider_config["url"].rstrip("/")
+ if self.base_url.endswith("v1"):
+ # get rid of the 'v1' - we'll add this in the path
+ self.base_url = f"{self.base_url[:-2]}"
+
+ self._session = requests.Session()
+ self._headers = {"Content-Type": "application/json"}
+
+ if self.auth_type:
+ self._headers[self.auth_type] = self.auth_key
+
+ self.log = log
+
+ def get_status(self) -> bool | int:
+ """
+ Check if the server is reachable and responding to requests
+
+ :return: `False` if the server is not responding, or an HTTP status code.
+ """
+ try:
+ r = self._session.get(
+ f"{self.base_url}{self._models_info_path}",
+ headers=self._headers,
+ timeout=self.timeout,
+ )
+ if self.log and r.status_code != 200:
+ self.log.warning(
+ f"{self.__class__.__name__}: server responded with status code {r.status_code} during availability check: {r.text}")
+ return r.status_code
+ except requests.RequestException as e:
+ if self.log:
+ self.log.warning(f"{self.__class__.__name__}: server is not available at {self.base_url}: {e}")
+ return False
+
+ def list_models(self) -> list[dict]:
+ """List available models from the Ollama server.
+
+ :returns: List of model dicts, or ``[]`` on failure.
+ """
+ try:
+ r = self._session.get(
+ f"{self.base_url}{self._models_info_path}",
+ headers=self._headers,
+ timeout=self.timeout,
+ )
+ if r.status_code == 200:
+ return r.json().get(self._models_info_key, [])
+ if self.log:
+ self.log.warning(
+ f"{self.__class__.__name__}: failed to list models from {self.base_url}, status code {r.status_code}: {r.text}")
+ except requests.RequestException as e:
+ if self.log:
+ self.log.warning(f"{self.__class__.__name__}: failed to list models from {self.base_url}: {e}")
+ return []
+
+ def build_model_entry(self, meta: dict) -> dict:
+ """
+ Build a canonical ``llm.available_models`` entry for a model.
+
+ :param model_id: Raw model identifier.
+ :param display_name: Human-readable name (from ``format_display_name``).
+ :param meta: ``/api/show`` response dict, or ``None`` if unavailable.
+ :returns: Dict ready to store under ``llm.available_models[model_id]``.
+ """
+ return {
+ "id": self.get_global_model_id(meta),
+ "local_id": self.get_model_id(meta),
+ "name": self.format_display_name(meta),
+ "model_card": self.get_model_card_url(meta),
+ "provider_type": self._meta["type"],
+ "provider": self._meta["url"],
+ "supported_media_types": self.parse_supported_media_types(meta),
+ "metadata": meta,
+ }
+
+ def get_model_card_url(self, meta: dict) -> str:
+ """
+ Get a URL for a model card for a given model
+
+ :param meta: Model metadata
+ :return str: Model card URL (empty string if unavailable)
+ """
+ return ""
+
+ @abstractmethod
+ def parse_supported_media_types(self, meta: dict) -> list[str]:
+ """Derive the media types a model supports from its Ollama metadata.
+
+ **Primary path**: reads ``meta["capabilities"]``:
+ - ``"completion"`` → ``"text"``
+ - ``"vision"`` → ``"image"``
+ - ``"embedding"`` → ``"embedding"``
+
+ **Fallback path** (used when capabilities are absent or only yield ``"text"``):
+ inspects GGUF ``model_info`` / ``details`` for vision signals and adds
+ ``"image"`` if any are found.
+
+ :param meta: ``/api/show`` response dict, or ``None``.
+ :returns: Ordered list of supported media type strings.
+ Returns ``[]`` when ``meta`` is ``None`` (unknown — callers
+ should include the model, not block it).
+ """
+ pass
+
+ @abstractmethod
+ def format_display_name(self, meta: dict) -> str:
+ """
+ Build a human-readable display name for a model.
+
+ :param dict meta: Model metadata
+ :returns str: Human-readable display name string.
+ """
+ pass
+
+ def get_model_id(self, meta: dict) -> str:
+ """
+ Choose a model identifier based on model metadata.
+
+ This is the ID within the provider context, i.e. it is not guaranteed
+ to be globally unique (use `get_global_model_id()` instead).
+
+ :param dict meta: Model metadata
+ :return str: Model ID
+ """
+ return meta[self._model_id_key]
+
+ def get_global_model_id(self, meta: dict) -> str:
+ """
+ Choose a model identifier based on model metadata.
+
+ This needs to be a *globally* unique ID, i.e. if multiple providers
+ provide the same model, the ID should still be unique per provider.
+
+ :param dict meta: Model metadata
+ :return str: Model ID
+ """
+ return "-".join((self._meta["type"], self._meta["url"], self.get_model_id(meta)))
\ No newline at end of file
diff --git a/common/lib/user_input.py b/common/lib/user_input.py
index 7fcb6bcb9..16a583d74 100644
--- a/common/lib/user_input.py
+++ b/common/lib/user_input.py
@@ -26,6 +26,7 @@ class UserInput:
OPTION_TEXT = "string" # simple string or integer (input text)
OPTION_MULTI = "multi" # multiple values out of a list (select multiple)
OPTION_MULTI_SELECT = "multi_select" # multiple values out of a dropdown list (select multiple)
+ OPTION_MULTI_OPTION = "multi_option" # several instances of a collection of controls
OPTION_INFO = "info" # just a bit of text, not actual input
OPTION_TEXT_LARGE = "textarea" # longer text
OPTION_TEXT_JSON = "json" # text, but should be valid JSON
@@ -70,6 +71,8 @@ def parse_all(options, input, silently_correct=True):
if type(input) is not dict and type(input) is not ImmutableMultiDict:
raise TypeError("input must be a dictionary or ImmutableMultiDict")
+ print(input)
+
if type(input) is ImmutableMultiDict:
# we are not using to_dict, because that messes up multi-selects
input = {key: input.getlist(key) for key in input}
@@ -181,6 +184,41 @@ def parse_all(options, input, silently_correct=True):
parsed_input[option] = table_input
+ elif settings.get("type") == UserInput.OPTION_MULTI_OPTION:
+ # these are collections of other input options that can be
+ # repeated an arbitrary amount of times and are saved as a
+ # list of these values
+ # i.e. forms within forms!!!
+ item_options = settings["options"]
+ input_items = {}
+ for key, value in input.items():
+ if key_match := re.match(f"{option}-([0-9]+)-(.+)", key):
+ input_index = int(key_match[1])
+ # note: the index is just used to match inputs to items
+ # it is not used for ordering
+ option_item = key_match[2]
+ if option_item not in item_options:
+ continue
+
+ if input_index not in input_items:
+ input_items[input_index] = {}
+
+ print(key, value)
+ input_items[input_index][option_item] = UserInput.parse_value(item_options[option_item], value, input_items[input_index], silently_correct)
+
+ # discard items that are only default values
+ parsed_input[option] = []
+ for input_index, item in input_items.items():
+ only_default = True
+ for key, value in item.items():
+ if value != item_options[key]["default"]:
+ only_default = False
+
+ if not only_default:
+ parsed_input[option].append(item)
+
+ print(parsed_input[option])
+
elif option not in input:
# not provided? use default
parsed_input[option] = settings.get("default", None)
diff --git a/docker-compose_ollama.yml b/docker-compose_ollama.yml
new file mode 100644
index 000000000..020b12c96
--- /dev/null
+++ b/docker-compose_ollama.yml
@@ -0,0 +1,54 @@
+# Use this file as an override to add a local Ollama instance to your 4CAT stack.
+#
+# Usage:
+# docker compose -f docker-compose.yml -f docker-compose_ollama.yml up -d
+#
+# Once running, configure 4CAT via the Control Panel → Settings → LLM:
+# LLM Provider Type : ollama
+# LLM Server URL : http://ollama:11434
+#
+# GPU support (NVIDIA):
+# Uncomment the `deploy.resources` block in the ollama service below and
+# ensure the NVIDIA Container Toolkit is installed on your host.
+# See: https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html
+#
+# GPU support (Apple Silicon / AMD):
+# Pass the appropriate device through your host's Docker settings instead.
+# Ollama will automatically detect the GPU when it is available inside the container.
+
+services:
+ ollama:
+ image: ollama/ollama:latest
+ container_name: 4cat_ollama
+ restart: unless-stopped
+ volumes:
+ - 4cat_ollama:/root/.ollama
+ # Expose the Ollama API on the host for optional external access or
+ # management with the Ollama CLI. Remove this block if you want to keep
+ # Ollama accessible only within the Docker network.
+ ports:
+ - "127.0.0.1:11434:11434"
+ healthcheck:
+ test: ["CMD", "ollama", "ls"]
+ interval: 10s
+ timeout: 5s
+ retries: 5
+ # --- NVIDIA GPU support (uncomment to enable) ---
+ # deploy:
+ # resources:
+ # reservations:
+ # devices:
+ # - driver: nvidia
+ # count: all
+ # capabilities: [gpu]
+
+ # Make the 4CAT backend wait for Ollama to be healthy before starting.
+ # This prevents initial model-refresh failures on first boot.
+ backend:
+ depends_on:
+ ollama:
+ condition: service_healthy
+
+volumes:
+ 4cat_ollama:
+ name: 4cat_ollama_data
diff --git a/docker/README.md b/docker/README.md
index 00f0862fc..31843b2ce 100644
--- a/docker/README.md
+++ b/docker/README.md
@@ -42,3 +42,76 @@ https://github.com/docker/buildx/issues/426
https://stackoverflow.com/questions/64221861/failed-to-resolve-with-frontend-dockerfile-v0
4. More errors coming soon! (No doubt)
+
+---
+
+## Running a local Ollama instance alongside 4CAT
+
+4CAT can use a local [Ollama](https://ollama.com) server for LLM-powered processors.
+A Docker Compose override file (`docker-compose_ollama.yml`) is included to add
+Ollama as a sidecar service so you do not need to run it separately on the host.
+
+### Quick start
+
+```bash
+docker compose -f docker-compose.yml -f docker-compose_ollama.yml up -d
+```
+
+This starts the standard 4CAT stack plus an `ollama` container that is only
+accessible within the Docker network (and optionally on `localhost:11434` on
+the host via the exposed port).
+
+### Configuring 4CAT to use Ollama
+
+#### Automatic configuration (fresh Docker install with sidecar)
+
+When you start 4CAT for the first time using the Ollama override file, the
+`docker_setup.py` initialisation script automatically detects the `ollama`
+sidecar and sets **LLM Provider Type**, **LLM Server URL**, and **LLM Access**
+for you. You can skip to step 2 below.
+
+#### Manual configuration (or to verify/change settings)
+
+1. Log in as admin and open **Control Panel → Settings**.
+2. Confirm or set the following LLM fields:
+
+ | Setting | Value |
+ |---|---|
+ | LLM Provider Type | `ollama` |
+ | LLM Server URL | `http://ollama:11434` |
+ | LLM Access | enabled |
+
+3. Save settings.
+4. Open **Control Panel → LLM Server** (visible once *LLM Access* is enabled).
+5. Use the **Refresh** button to load available models, then **Pull** a model
+ (e.g. `llama3.2:3b`) to download it from the Ollama library.
+6. Enable the models you want to make available to users.
+
+### GPU support (NVIDIA)
+
+Uncomment the `deploy.resources` block in `docker-compose_ollama.yml` and
+ensure the [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html)
+is installed on your host. Then restart the stack with the override:
+
+```bash
+docker compose -f docker-compose.yml -f docker-compose_ollama.yml up -d
+```
+
+### Persisting models
+
+Models downloaded by Ollama are stored in the `4cat_ollama_data` Docker volume.
+They survive container restarts and re-creations unless you explicitly remove
+the volume (`docker volume rm 4cat_ollama_data`).
+
+### Using an external Ollama server
+
+If you already run Ollama on the host or elsewhere, skip the override file and
+point 4CAT directly at that server:
+
+- **On the same host**: use `http://host.docker.internal:11434` as the LLM Server URL.
+- **Remote server**: use the server's reachable URL and configure any required
+ API key in the *LLM Server API Key* and *LLM Server Authentication Type* settings.
+
+In both cases, configure the LLM settings manually via **Control Panel → Settings**
+(see *Manual configuration* above), using the appropriate server URL instead of
+`http://ollama:11434`.
diff --git a/docker/docker_setup.py b/docker/docker_setup.py
index 450684602..aea641c12 100644
--- a/docker/docker_setup.py
+++ b/docker/docker_setup.py
@@ -207,6 +207,35 @@ def _format_host(host: str) -> str:
f"docker exec 4cat_backend python -c \"from common.config_manager import ConfigManager;config=ConfigManager();config.with_db();config.set('flask.server_name', '{formatted_host}:{public_port}');config.db.commit();\""
)
+ # If an Ollama container is available on the Docker network, configure 4CAT to use it.
+ ollama_url = 'http://ollama:11434'
+ try:
+ import requests
+ try:
+ resp = requests.get(f"{ollama_url}/api/tags", timeout=2)
+ if resp.status_code == 200:
+ current_llm_server = config.get("llm.server")
+ if current_llm_server == ollama_url:
+ print("Ollama server already configured in 4CAT settings.")
+ elif current_llm_server and current_llm_server != ollama_url:
+ # Previously configured LLM server is different; log a warning but do not overwrite user settings
+ print(f"Warning: Detected Ollama server at {ollama_url} but llm.server is set to {current_llm_server}. To use the Ollama server, update the llm.server setting to {ollama_url} in the 4CAT Control Panel.")
+ else:
+ # set basic LLM settings so the initial admin user does not need to
+ # configure them manually for local development environments that
+ # include the Ollama sidecar.
+ config.set('llm.provider_type', 'ollama')
+ config.set('llm.server', ollama_url)
+ config.set('llm.access', True)
+ config.db.commit()
+ print('Detected Ollama on Docker network; configured LLM settings to use it.')
+ except requests.RequestException:
+ # Ollama not available; do nothing
+ pass
+ except Exception:
+ # requests other error; skip automatic Ollama configuration
+ pass
+
print(f"\nStarting app\n"
f"4CAT is accessible at:\n"
f"{'https' if config.get('flask.https', False) else 'http'}://{config.get('flask.server_name')}\n")
diff --git a/extensions b/extensions
new file mode 120000
index 000000000..c25d13e68
--- /dev/null
+++ b/extensions
@@ -0,0 +1 @@
+/Users/stijn/surfdrive/PycharmProjects/4cat/config/extensions
\ No newline at end of file
diff --git a/processors/machine_learning/llm_prompter.py b/processors/machine_learning/llm_prompter.py
index c2bd0d02e..7026aa6c3 100644
--- a/processors/machine_learning/llm_prompter.py
+++ b/processors/machine_learning/llm_prompter.py
@@ -16,7 +16,7 @@
from common.lib.item_mapping import MappedItem
from common.lib.exceptions import ProcessorInterruptedException, QueryParametersException, QueryNeedsExplicitConfirmationException
from common.lib.helpers import UserInput, nthify, andify, remove_nuls, flatten_dict
-from common.lib.llm import LLMAdapter
+from common.lib.llm.adapter import LLMAdapter
from backend.lib.processor import BasicProcessor
class LLMPrompter(BasicProcessor):
@@ -55,46 +55,39 @@ def get_queue_id(cls, remote_id, details, dataset) -> str:
local_queue = "local_models"
if not dataset:
return local_queue
+
+ model = dataset.parameters.get("model")
+ if model.startswith("api"):
+ # API-based models have their own queue - no local resources being
+ # used so can be concurrent
+ return f"llm-api-{dataset.key}"
else:
- if dataset.parameters.get('api_or_local', 'api') in ["local", "hosted"]:
- # Hosted models also go in the local queue since they use the same shared LLM server
- return local_queue
-
- # Queue per model/API type
- return f"{cls.type}-{dataset.parameters.get('api_or_local', 'api')}-{dataset.parameters.get('api_model', 'none')}"
+ # use the model URL as the queue ID (extracted from the model
+ # global ID)
+ # this is not fool-proof, but does mean not more than one dataset
+ # runs per API server - in the scenario of these running locally,
+ # it means things do not run concurrently (which is good)
+ return f"llm-local-{dataset.parameters.get('model').split('-')[1]}"
@classmethod
def get_options(cls, parent_dataset=None, config=None) -> dict:
# Check if 4CAT wide LLM server is available
- if config.get("llm.access", False) and config.get("llm.server", ""):
- shared_llm_name = config.get("llm.host_name", "4CAT LLM Server")
- shared_llm_models = {model: model_metadata.get("name") for model, model_metadata in config.get("llm.available_models", {}).items()}
- shared_llm_default = list(shared_llm_models.keys())[0] if shared_llm_models else ""
- else:
- shared_llm_name = False
- shared_llm_default = ""
- shared_llm_models = {}
+ available_models = config.get("llm.available_models", [])
+ enabled_model_ids = config.get("llm.enabled_models", [])
+ if not config.get("llm.access"):
+ enabled_model_ids = [_ for _ in enabled_model_ids if _.startswith("api-")]
+
+ enabled_models = {k: v for k, v in available_models.items() if k in enabled_model_ids}
# Determine if the parent dataset is a media archive (zip with images/video/audio)
is_media_parent = False
media_type = "media"
- hosted_and_local_available = True
if parent_dataset:
parent_extension = parent_dataset.get_extension()
parent_media_type = parent_dataset.get_media_type()
if parent_extension == "zip" and parent_media_type in ("image", "video", "audio"):
is_media_parent = True
media_type = parent_media_type
- if parent_media_type in ("video", "audio"):
- # Ollama and LM Studio currently only support text and image
- hosted_and_local_available = False
-
- # Add additional sources for LLM Models
- api_or_local_options = {"api": "API"}
- if hosted_and_local_available:
- api_or_local_options["local"] = "Local"
- if shared_llm_name:
- api_or_local_options["hosted"] = shared_llm_name
options = {
"ethics_warning1": {
@@ -102,21 +95,14 @@ def get_options(cls, parent_dataset=None, config=None) -> dict:
"help": "Always test your prompt on a sample of rows, for instance by first using the "
"Random filter processor.",
},
- "api_or_local": {
- "type": UserInput.OPTION_CHOICE,
- "help": "Local or API",
- "options": api_or_local_options,
- "default": "api" if not shared_llm_name else "hosted",
- "tooltip": "You can use 'local' models through Ollama and LM Studio as long as you have a valid "
- "and accessible URL through which the model can be reached.",
- },
- "api_model": {
+ "model": {
"type": UserInput.OPTION_CHOICE,
"help": "API model",
- "options": LLMAdapter.get_model_options(config),
+ "options": {
+ model_id: model["name"] for model_id, model in enabled_models.items()
+ },
"default": "none",
"tooltip": "Select from the predefined model list or insert manually",
- "requires": "api_or_local==api",
},
"api_key": {
"type": UserInput.OPTION_TEXT,
@@ -124,282 +110,194 @@ def get_options(cls, parent_dataset=None, config=None) -> dict:
"help": "API key",
"tooltip": "Create an API key on the LLM provider's website (e.g. https://admin.mistral.ai/organization"
"/api-keys). Note that this often involves billing.",
- "requires": "api_or_local==api",
+ "requires": "api_model^=api",
"sensitive": True,
- },
- "api_custom_model_provider": {
- "type": UserInput.OPTION_CHOICE,
- "help": "Model provider",
- "requires": "api_model==custom",
- "options": LLMAdapter.get_model_providers(config),
- "tooltip": "API provider. Currently limited to this list.",
- },
- "api_custom_model_id": {
- "type": UserInput.OPTION_TEXT,
- "help": "Model ID",
- "requires": "api_model==custom",
- "tooltip": "E.g. 'mistral-small-2503'. Check the API provider's documentation on what model ID to use. "
- "Fine-tuned models often require more info; OpenAI for instance requires the following "
- "format: ft:[modelname]:[org_id]:[custom_suffix]:",
- "default": "",
- },
- "local_info": {
- "type": UserInput.OPTION_INFO,
- "requires": "api_or_local==local",
- "help": "You can use local LLMs with LM Studio, Ollama, and vLLM. These applications need to be reachable by "
- "this 4CAT server, e.g. by running them on the same machine. For LM Studio and vLLM, "
- "use the Base URL to interface with any OpenAI-like API endpoint.",
- },
- "local_provider": {
- "type": UserInput.OPTION_CHOICE,
- "requires": "api_or_local==local",
- "options": {
- "none": "",
- "lmstudio": "LM Studio",
- "ollama": "Ollama",
- "vllm": "vLLM",
- },
- "default": "none",
- "help": "Local LLM provider",
- },
- "lmstudio-info": {
- "type": UserInput.OPTION_INFO,
- "requires": "local_provider==lmstudio",
- "help": "LM Studio is a desktop application to chat with LLMs, but that you can also run as a local "
- "server. See [this link for intructions on how to run LM Studio as a server](https://lmstudio.ai/docs/"
- "app/api). When the server is running, the endpoint is shown in the 'Developer' tab on the top "
- "right (default: `http://localhost:1234/v1` or `http://host.docker.internal:1234/v1` in Docker). "
- "4CAT will use the top-most model you have loaded. ",
- },
- "ollama-info": {
- "type": UserInput.OPTION_INFO,
- "requires": "local_provider==ollama",
- "help": "Ollama is a simple command-line application that lets you interface with a range of open-"
- "source LLMs and that you can run as a local server. See [this link]"
- "(https://github.com/ollama/ollama/blob/main/README.md#quickstart) for instructions.",
- },
- "vllm-info": {
- "type": UserInput.OPTION_INFO,
- "requires": "local_provider==ollama",
- "help": "[vLLM](https://docs.vllm.ai/en/latest/getting_started/quickstart/) is a framework for Linux "
- "systems capable of fast inference with a single LLM. Communication is done through an "
- "OpenAI-like API endpoint. Just change the base URL below and insert an optional API key.",
- },
- "local_base_url": {
- "type": UserInput.OPTION_TEXT,
- "requires": "api_or_local==local",
- "default": "",
- "help": "Base URL",
- "tooltip": "[optional] Leaving this empty will use default values (`http://localhost:1234/v1` or `http://host.docker.internal:1234/v1` for LM "
- "Studio, `http://localhost:11434` or `http://host.docker.internal:11434` for Ollama, `http://localhost:8000` or `http://host.docker.internal:8000` for vLLM ).",
- },
- "lmstudio_api_key": {
- "type": UserInput.OPTION_TEXT,
- "default": "",
- "help": "LM Studio API key",
- "tooltip": "[optional] Uses `lm-studio` by default.",
- "requires": "local_provider==lmstudio",
- "sensitive": True,
- },
- "vllm_api_key": {
- "type": UserInput.OPTION_TEXT,
- "default": "",
- "help": "vLLM API key",
- "tooltip": "[optional] Empty by default.",
- "requires": "local_provider==vllm",
- "sensitive": True,
- },
- "ollama_model": {
- "type": UserInput.OPTION_TEXT,
- "requires": "local_provider==ollama",
- "default": "",
- "help": "Ollama model name",
- "tooltip": "[required] for example 'llama3.2'",
- },
- "hosted_llm_model": {
- "type": UserInput.OPTION_CHOICE,
- "help": "LLM model",
- "options": shared_llm_models,
- "default": shared_llm_default,
- "requires": "api_or_local==hosted",
- },
+ }
}
if is_media_parent:
# Media-specific options: show info about media files being attached
- options["media_info"] = {
- "type": UserInput.OPTION_INFO,
- "help": f"The parent dataset contains {media_type} files that will be sent "
- f"to the LLM with each prompt. Make sure to use a model that supports "
- f"{media_type} input (e.g. vision models for images).
"
- f"Not all models support all media types. If the model cannot process "
- f"{media_type} files, an error will be returned during processing.",
- }
- options["system_prompt"] = {
- "type": UserInput.OPTION_TEXT_LARGE,
- "help": "System prompt",
- "tooltip": "[optional] A system prompt can be used to give the LLM general instructions, for instance "
- "on the tone of the text. This processor may edit the system prompt to "
- "ensure correct output. System prompts are included in the results file.",
- "default": "",
- }
- options["prompt"] = {
- "type": UserInput.OPTION_TEXT_LARGE,
- "help": "User prompt",
- "tooltip": f"Describe what the model should do with each {media_type} file. "
- f"No column brackets needed — {media_type} files are attached automatically.",
- "default": "",
- }
+ options.update({
+ "media_info": {
+ "type": UserInput.OPTION_INFO,
+ "help": f"The parent dataset contains {media_type} files that will be sent "
+ f"to the LLM with each prompt. Make sure to use a model that supports "
+ f"{media_type} input (e.g. vision models for images).
"
+ f"Not all models support all media types. If the model cannot process "
+ f"{media_type} files, an error will be returned during processing.",
+ },
+ "system_prompt": {
+ "type": UserInput.OPTION_TEXT_LARGE,
+ "help": "System prompt",
+ "tooltip": "[optional] A system prompt can be used to give the LLM general instructions, for instance "
+ "on the tone of the text. This processor may edit the system prompt to "
+ "ensure correct output. System prompts are included in the results file.",
+ "default": "",
+ },
+ "prompt": {
+ "type": UserInput.OPTION_TEXT_LARGE,
+ "help": "User prompt",
+ "tooltip": f"Describe what the model should do with each {media_type} file. "
+ f"No column brackets needed — {media_type} files are attached automatically.",
+ "default": "",
+ }
+ })
+
else:
- # Text-based dataset options: column brackets, media URL toggle, batching
- options["prompt_info"] = {
+ options.update({
+ # Text-based dataset options: column brackets, media URL toggle, batching
+ "prompt_info": {
+ "type": UserInput.OPTION_INFO,
+ "help": "How to prompt
"
+ "Use `[brackets]` with column names to insert dataset items in the prompt. You "
+ "can place column brackets in different parts of the prompt or use multiple column names within"
+ ' a single column bracket to merge items.
Example 1: "Describe the topic '
+ 'of this social media post in max. 3 words: `[body, tags]`"
Example 2: '
+ "\"Given the following hashtags: `[tags]`, answer whether they are 'related' or 'unrelated' "
+ 'to the following text: `[body]`"
Prompting is a delicate art. See '
+ "processor references on best prompting practices.
For predefined research prompts, see "
+ "e.g. [Prompt Compass](https://github.com/ErikBorra/PromptCompass/blob/main/prompts.json#L136) "
+ "or the [Anthropic Prompt Library](https://docs.anthropic.com/en/resources/prompt-library/"
+ "library).",
+ },
+ "system_prompt": {
+ "type": UserInput.OPTION_TEXT_LARGE,
+ "help": "System prompt",
+ "tooltip": "[optional] A system prompt can be used to give the LLM general instructions, for instance "
+ "on the tone of the text. This processor may edit the system prompt to "
+ "ensure correct output. System prompts are included in the results file.",
+ "default": "",
+ },
+ "prompt": {
+ "type": UserInput.OPTION_TEXT_LARGE,
+ "help": "User prompt",
+ "tooltip": "Use [brackets] with columns names.",
+ "default": "",
+ },
+ "use_media": {
+ "type": UserInput.OPTION_TOGGLE,
+ "help": "Add images",
+ "tooltip": "Add media URLs for multi-modal processing. Requires a model that supports vision.",
+ "default": False,
+ },
+ "media_columns": {
+ "type": UserInput.OPTION_TEXT,
+ "help": "Columns with image URL(s)",
+ "default": "",
+ "inline": True,
+ "tooltip": "Multiple columns can be selected.",
+ "requires": "use_media==true",
+ }
+ })
+
+ # Common options for both text and media datasets
+ options.update({
+ "structured_output": {
+ "type": UserInput.OPTION_TOGGLE,
+ "help": "Output structured JSON",
+ "tooltip": "Output in a JSON format instead of text. Note that your chosen model may not support "
+ "structured output.",
+ "default": False,
+ },
+ "json_schema_info": {
"type": UserInput.OPTION_INFO,
- "help": "How to prompt
"
- "Use `[brackets]` with column names to insert dataset items in the prompt. You "
- "can place column brackets in different parts of the prompt or use multiple column names within"
- ' a single column bracket to merge items.
Example 1: "Describe the topic '
- 'of this social media post in max. 3 words: `[body, tags]`"
Example 2: '
- "\"Given the following hashtags: `[tags]`, answer whether they are 'related' or 'unrelated' "
- 'to the following text: `[body]`"
Prompting is a delicate art. See '
- "processor references on best prompting practices.
For predefined research prompts, see "
- "e.g. [Prompt Compass](https://github.com/ErikBorra/PromptCompass/blob/main/prompts.json#L136) "
- "or the [Anthropic Prompt Library](https://docs.anthropic.com/en/resources/prompt-library/"
- "library).",
- }
- options["system_prompt"] = {
- "type": UserInput.OPTION_TEXT_LARGE,
- "help": "System prompt",
- "tooltip": "[optional] A system prompt can be used to give the LLM general instructions, for instance "
- "on the tone of the text. This processor may edit the system prompt to "
- "ensure correct output. System prompts are included in the results file.",
- "default": "",
- }
- options["prompt"] = {
+ "help": "Insert a JSON Schema for structured outputs. These define the output that "
+ "the LLM will adhere to. [See instructions and examples on how to write a JSON Schema]"
+ "(https://json-schema.org/learn/miscellaneous-examples) and [OpenAI's documentation]"
+ "(https://platform.openai.com/docs/guides/structured-outputs?api-mode=chat#supported-schemas).",
+ "requires": "structured_output==true",
+ },
+ "json_schema": {
"type": UserInput.OPTION_TEXT_LARGE,
- "help": "User prompt",
- "tooltip": "Use [brackets] with columns names.",
+ "help": "JSON schema",
+ "tooltip": "[required] A JSON schema that the structured output will adhere to",
+ "requires": "structured_output==true",
"default": "",
- }
- options["use_media"] = {
- "type": UserInput.OPTION_TOGGLE,
- "help": "Add images",
- "tooltip": "Add media URLs for multi-modal processing. Requires a model that supports vision.",
- "default": False,
- }
- options["media_columns"] = {
+ },
+ "temperature": {
"type": UserInput.OPTION_TEXT,
- "help": "Columns with image URL(s)",
- "default": "",
- "inline": True,
- "tooltip": "Multiple columns can be selected.",
- "requires": "use_media==true",
+ "help": "Temperature",
+ "default": 0.1,
+ "coerce_type": float,
+ "max": 2.0,
+ "tooltip": "Temperature indicates how strict the model will gravitate towards the most "
+ "probable next token. A score close to 0 returns more predictable "
+ "outputs while a score close to 1 leads to more creative outputs. Not supported by all models.",
}
-
- # Common options for both text and media datasets
- options["structured_output"] = {
- "type": UserInput.OPTION_TOGGLE,
- "help": "Output structured JSON",
- "tooltip": "Output in a JSON format instead of text. Note that your chosen model may not support "
- "structured output.",
- "default": False,
- }
- options["json_schema_info"] = {
- "type": UserInput.OPTION_INFO,
- "help": "Insert a JSON Schema for structured outputs. These define the output that "
- "the LLM will adhere to. [See instructions and examples on how to write a JSON Schema]"
- "(https://json-schema.org/learn/miscellaneous-examples) and [OpenAI's documentation]"
- "(https://platform.openai.com/docs/guides/structured-outputs?api-mode=chat#supported-schemas).",
- "requires": "structured_output==true",
- }
- options["json_schema"] = {
- "type": UserInput.OPTION_TEXT_LARGE,
- "help": "JSON schema",
- "tooltip": "[required] A JSON schema that the structured output will adhere to",
- "requires": "structured_output==true",
- "default": "",
- }
- options["temperature"] = {
- "type": UserInput.OPTION_TEXT,
- "help": "Temperature",
- "default": 0.1,
- "coerce_type": float,
- "max": 2.0,
- "tooltip": "Temperature indicates how strict the model will gravitate towards the most "
- "probable next token. A score close to 0 returns more predictable "
- "outputs while a score close to 1 leads to more creative outputs. Not supported by all models.",
- }
+ })
if not is_media_parent:
- options["truncate_input"] = {
+ options.update({
+ "truncate_input": {
+ "type": UserInput.OPTION_TEXT,
+ "help": "Max chars in input value",
+ "default": 0,
+ "coerce_type": int,
+ "tooltip": "This value determines how many characters an inserted dataset value may have. 0 = unlimited.",
+ "requires": "use_media==false",
+ },
+ "max_tokens": {
+ "type": UserInput.OPTION_TEXT,
+ "help": "Max output tokens",
+ "default": 10000,
+ "coerce_type": int,
+ "tooltip": "As a rule of thumb, one token generally corresponds to ~4 characters of "
+ "text for common English text. This includes tokens spent for reasoning.",
+ },
+ "batches": {
+ "type": UserInput.OPTION_TEXT,
+ "help": "Items per prompt",
+ "coerce_type": int,
+ "default": 1,
+ "tooltip": "How many dataset items to insert into the prompt. These will be inserted as a list "
+ "wherever the column brackets are used (e.g. '[body]').",
+ "requires": "use_media==false",
+ },
+ "batch_info": {
+ "type": UserInput.OPTION_INFO,
+ "help": "Note on batching: Batching may increase speed but reduce accuracy. Models "
+ "need to support structured output for batching. This processor uses JSON schemas to ensure "
+ "symmetry between input and output lengths, but models may struggle to match input and output "
+ "values. Describe the dataset values in plurals in your prompt when batching. If you use "
+ "multiple column brackets in your prompt, rows with any empty values are skipped.",
+ "requires": "use_media==false",
+ }
+ })
+
+ options.update({
+ "ethics_warning3": {
+ "type": UserInput.OPTION_INFO,
+ "requires": "api_or_local==api",
+ "help": "When using LLMs through commercial parties, always consider anonymising your data and "
+ "whether local open-source LLMs are also an option.",
+ },
+ "save_annotations": {
+ "type": UserInput.OPTION_ANNOTATION,
+ "label": "prompt outputs",
+ "default": False,
+ },
+ "hide_think": {
+ "type": UserInput.OPTION_TOGGLE,
+ "help": "Hide reasoning",
+ "default": False,
+ "tooltip": "Some models include reasoning in their output, between
@@ -29,7 +51,7 @@
{% endif %} {% elif settings.type == "date" %} - + {% if "tooltip" in settings %} @@ -55,7 +77,7 @@ {% endif %} {% elif settings.type in ("json", "textarea") %} + placeholder="{{ settings.tooltip }}" data-default="{{ settings.original_default }}">{{ settings.default }} {% if "tooltip" in settings %}@@ -63,7 +85,7 @@
{% endif %} {% elif settings.type == "choice" %} -