diff --git a/.jshintrc b/.jshintrc new file mode 100644 index 000000000..7a6412586 --- /dev/null +++ b/.jshintrc @@ -0,0 +1,8 @@ +{ + "esversion": 11, + "undef": true, + "globals": { + "$": false, + "document": false, + } +} \ No newline at end of file diff --git a/backend/workers/llm_manager.py b/backend/workers/llm_manager.py new file mode 100644 index 000000000..38d9405e0 --- /dev/null +++ b/backend/workers/llm_manager.py @@ -0,0 +1,83 @@ +""" +Manage LLM models +""" +from backend.lib.worker import BasicWorker +from common.lib.llm.llm_client import LLMProviderClient + +class LLMProviderManager(BasicWorker): + """ + Manages LLM models + + Periodically refreshes the list of available models from an LLM provider. + Can also pull or delete models on demand when queued with a specific task. + + Job details: + - task: "refresh" (default), "pull", or "delete" + - provider: the URL of the LLM provider, as configured in the + llm.providers setting. if not given, run on all providers + + Job remote_id: + - For refresh: "manage-llm-refresh" (periodic) or "manage-llm-manual" (on-demand) + - For pull/delete: the model name to pull or delete + """ + type = "manage-llm" + max_workers = 1 + client = None + + @classmethod + def ensure_job(cls, config=None): + """ + Ensure the daily refresh job is always scheduled + + :return: Job parameters for the worker + """ + return {"remote_id": "manage-llm-refresh", "interval": 86400} + + def work(self): + task = self.job.details.get("task", "refresh") if self.job.details else "refresh" + provider = self.job.details.get("provider", "") if self.job.details else None + model_name = self.job.data["remote_id"] + available_models = None + + for provider_config in self.config.get("llm.providers", []): + if provider and provider != provider_config["url"]: + continue + + try: + client = LLMProviderClient.get_client(self.config, provider_config) + except ValueError: + self.log.debug(f"{self.__class__.__name__}: invalid provider type: {provider_config['type']}, skipping") + continue + + # note that technically it is possible to pull/delete a model on + # multiple providers at once (if a model_name is defined but no + # provider). may not be a problem? may be useful one day? + success = False + if task == "pull" and hasattr(client, "pull_model"): + success = client.pull_model(model_name) + + elif task == "delete" and hasattr(client, "delete_model"): + success = client.delete_model(model_name) + + if success or task == "refresh": + # refresh models after pulling/deleting, or when asked to + if available_models is None: + available_models = {} + + for model in client.list_models(): + model = client.build_model_entry(model) + available_models[model["id"]] = model + + self.log.debug(f"{self.__class__.__name__}: ran task '{task}' (model name: {model_name or 'N/A'})") + + elif success is None: + self.log.warning(f"{self.__class__.__name__}: task '{task}' unknown or not supported by client") + else: + self.log.warning(f"{self.__class__.__name__}: task '{task}' failed for model {model_name}") + + if available_models is not None: + enabled_and_available = set(available_models.keys()) & set(self.config.get("llm.enabled_models", [])) + self.config.set("llm.available_models", available_models) + self.config.set("llm.enabled_models", list(enabled_and_available)) + + self.job.finish() diff --git a/backend/workers/refresh_items.py b/backend/workers/refresh_items.py index 8a56c213f..7ab11645d 100644 --- a/backend/workers/refresh_items.py +++ b/backend/workers/refresh_items.py @@ -1,78 +1,26 @@ """ Refresh items """ -import json - -import requests - from backend.lib.worker import BasicWorker class ItemUpdater(BasicWorker): """ Refresh 4CAT items - Refreshes settings that are dependent on external factors + Refreshes settings that are dependent on external factors. + LLM model refreshing is handled by the OllamaManager worker. """ type = "refresh-items" max_workers = 1 - @classmethod - def ensure_job(cls, config=None): - """ - Ensure that the refresher is always running - - This is used to ensure that the refresher is always running, and if it is - not, it will be started by the WorkerManager. - - :return: Job parameters for the worker - """ - return {"remote_id": "refresh-items", "interval": 60} + # ensure_job is intentionally disabled: this worker currently does nothing + # and would only create unnecessary job queue churn. Re-enable when work() + # has actual tasks to perform. + # @classmethod + # def ensure_job(cls, config=None): + # return {"remote_id": "refresh-items", "interval": 60} def work(self): - # Refresh items - self.refresh_settings() - + # Placeholder – no tasks implemented yet. self.job.finish() - - def refresh_settings(self): - """ - Refresh settings - """ - # LLM server settings - llm_provider = self.config.get("llm.provider_type", "none").lower() - llm_server = self.config.get("llm.server", "") - - # For now we only support the Ollama API - if llm_provider == "ollama" and llm_server: - headers = {"Content-Type": "application/json"} - llm_api_key = self.config.get("llm.api_key", "") - llm_auth_type = self.config.get("llm.auth_type", "") - if llm_api_key and llm_auth_type: - headers[llm_auth_type] = llm_api_key - - available_models = {} - try: - response = requests.get(f"{llm_server}/api/tags", headers=headers, timeout=10) - if response.status_code == 200: - settings = response.json() - for model in settings.get("models", []): - model = model["name"] - try: - model_metadata = requests.post(f"{llm_server}/api/show", headers=headers, json={"model": model}, timeout=10).json() - available_models[model] = { - "name": f"{model_metadata['model_info'].get('general.basename', model)} ({model_metadata['details']['parameter_size']} parameters)", - "model_card": f"https://ollama.com/library/{model}", - "provider": "local" - } - - except (requests.RequestException, json.JSONDecodeError, KeyError) as e: - self.log.debug(f"Could not get metadata for model {model} from Ollama - skipping (error: {e})") - - self.config.set("llm.available_models", available_models) - self.log.debug("Refreshed LLM server settings cache") - else: - self.log.warning(f"Could not refresh LLM server settings cache - server returned status code {response.status_code}") - - except requests.RequestException as e: - self.log.warning(f"Could not refresh LLM server settings cache - request error: {str(e)}") \ No newline at end of file diff --git a/common/assets/llms.json b/common/assets/llms.json index 835dbaa09..c17351488 100644 --- a/common/assets/llms.json +++ b/common/assets/llms.json @@ -1,128 +1,140 @@ -{ - "none": { - "name": "", - "model_card": "", - "provider": "", - "default": true - }, - "custom": { - "name": "[custom]", - "model_card": "", - "provider": "" - }, - "gpt-5.4": { - "name": "[OpenAI] GPT-5.4", - "model_card": "https://platform.openai.com/docs/models/gpt-5.4", - "provider": "openai" - }, - "gpt-5-mini": { - "name": "[OpenAI] GPT-5 mini", - "model_card": "https://platform.openai.com/docs/models/gpt-5-mini", - "provider": "openai" - }, - "gpt-5-nano": { - "name": "[OpenAI] GPT-5 nano", - "model_card": "https://platform.openai.com/docs/models/gpt-5-nano", - "provider": "openai" - }, - "gpt-5.4-pro": { - "name": "[OpenAI] GPT-5.4 Pro", - "model_card": "https://platform.openai.com/docs/models/gpt-5.4-pro", - "provider": "openai" - }, - "gpt-4.1-mini": { - "name": "[OpenAI] GPT-4.1 mini", - "model_card": "https://platform.openai.com/docs/models/gpt-4.1-mini", - "provider": "openai" - }, - "gpt-4.1-nano": { - "name": "[OpenAI] GPT-4.1 nano", - "model_card": "https://platform.openai.com/docs/models/gpt-4.1-nano", - "provider": "openai" - }, - "gpt-4.1": { - "name": "[OpenAI] GPT-4.1", - "model_card": "https://platform.openai.com/docs/models/gpt-4.1", - "provider": "openai" - }, - "gpt-4o-mini": { - "name": "[OpenAI] GPT-4o mini", - "model_card": "https://platform.openai.com/docs/models/gpt-4o-mini", - "provider": "openai" - }, - "gpt-4o": { - "name": "[OpenAI] GPT-4o", - "model_card": "https://platform.openai.com/docs/models/gpt-4o", - "provider": "openai" - }, - "gemini-3.1-pro-preview": { - "name": "[Google] Gemini 3.1 Pro", - "model_card": "https://docs.cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/3-1-pro", - "provider": "google" - }, - "gemini-3-flash-preview": { - "name": "[Google] Gemini 3 Flash", - "model_card": "https://cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/3-flash", - "provider": "google" - }, - "gemini-3.1-flash-lite-preview": { - "name": "[Google] Gemini 3.1 Flash Lite", - "provider": "google", - "model_card": "https://docs.cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/3-1-flash-lite" - }, - "claude-opus-4-6": { - "name": "[Anthropic] Claude Opus 4.6 (latest)", - "model_card": "https://www.anthropic.com/claude/opus", - "provider": "anthropic" - }, - "claude-sonnet-4-6": { - "name": "[Anthropic] Claude Sonnet 4.6 (latest)", - "model_card": "https://www.anthropic.com/claude/sonnet", - "provider": "anthropic" - }, - "claude-4-5-haiku": { - "name": "[Anthropic] Claude 4.5 Haiku (latest)", - "model_card": "https://www.anthropic.com/claude/haiku", - "provider": "anthropic" - }, - "magistral-small-2509": { - "name": "[Mistral] Magistral Small 1.2 (25.09)", - "model_card": "https://docs.mistral.ai/models/magistral-small-1-2-25-09", - "provider": "mistral" - }, - "magistral-medium-2509": { - "name": "[Mistral] Magistral Medium 1.2 (25.09)", - "model_card": "https://docs.mistral.ai/models/magistral-medium-1-2-25-09", - "provider": "mistral" - }, - "mistral-small-2506": { - "name": "[Mistral] Mistral Small 3.2 (25.06)", - "model_card": "https://docs.mistral.ai/models/mistral-small-3-2-25-06", - "provider": "mistral" - }, - "mistral-medium-2508": { - "name": "[Mistral] Mistral Medium 3.1 (25.08)", - "model_card": "mistral-medium-2508", - "provider": "mistral" - }, - "mistral-large-2512": { - "name": "[Mistral] Mistral Large 3 (25.12)", - "model_card": "https://docs.mistral.ai/models/mistral-large-3-25-12", - "provider": "mistral" - }, - "open-mistral-nemo-2407": { - "name": "[Mistral] Mistral Nemo 12B", - "model_card": "https://docs.mistral.ai/models/mistral-nemo-12b-24-07", - "provider": "mistral" - }, - "deepseek-chat": { - "name": "[DeepSeek] DeepSeek latest (non-reasoning)", - "model_card": "https://api-docs.deepseek.com/quick_start/pricing", - "provider": "deepseek" - }, - "deepseek-reasoner": { - "name": "[DeepSeek] DeepSeek latest (reasoning)", - "model_card": "https://api-docs.deepseek.com/quick_start/pricing", - "provider": "deepseek" - } -} \ No newline at end of file +[ + { + "model": "gpt-5.4", + "name": "[OpenAI] GPT-5.4", + "model_card": "https://platform.openai.com/docs/models/gpt-5.4", + "provider": "openai" + }, + { + "model": "gpt-5-mini", + "name": "[OpenAI] GPT-5 mini", + "model_card": "https://platform.openai.com/docs/models/gpt-5-mini", + "provider": "openai" + }, + { + "model": "gpt-5-nano", + "name": "[OpenAI] GPT-5 nano", + "model_card": "https://platform.openai.com/docs/models/gpt-5-nano", + "provider": "openai" + }, + { + "model": "gpt-5.4-pro", + "name": "[OpenAI] GPT-5.4 Pro", + "model_card": "https://platform.openai.com/docs/models/gpt-5.4-pro", + "provider": "openai" + }, + { + "model": "gpt-4.1-mini", + "name": "[OpenAI] GPT-4.1 mini", + "model_card": "https://platform.openai.com/docs/models/gpt-4.1-mini", + "provider": "openai" + }, + { + "model": "gpt-4.1-nano", + "name": "[OpenAI] GPT-4.1 nano", + "model_card": "https://platform.openai.com/docs/models/gpt-4.1-nano", + "provider": "openai" + }, + { + "model": "gpt-4.1", + "name": "[OpenAI] GPT-4.1", + "model_card": "https://platform.openai.com/docs/models/gpt-4.1", + "provider": "openai" + }, + { + "model": "gpt-4o-mini", + "name": "[OpenAI] GPT-4o mini", + "model_card": "https://platform.openai.com/docs/models/gpt-4o-mini", + "provider": "openai" + }, + { + "model": "gpt-4o", + "name": "[OpenAI] GPT-4o", + "model_card": "https://platform.openai.com/docs/models/gpt-4o", + "provider": "openai" + }, + { + "model": "gemini-3.1-pro-preview", + "name": "[Google] Gemini 3.1 Pro", + "model_card": "https://docs.cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/3-1-pro", + "provider": "google" + }, + { + "model": "gemini-3-flash-preview", + "name": "[Google] Gemini 3 Flash", + "model_card": "https://cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/3-flash", + "provider": "google" + }, + { + "model": "gemini-3.1-flash-lite-preview", + "name": "[Google] Gemini 3.1 Flash Lite", + "provider": "google", + "model_card": "https://docs.cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/3-1-flash-lite" + }, + { + "model": "claude-opus-4-6", + "name": "[Anthropic] Claude Opus 4.6 (latest)", + "model_card": "https://www.anthropic.com/claude/opus", + "provider": "anthropic" + }, + { + "model": "claude-sonnet-4-6", + "name": "[Anthropic] Claude Sonnet 4.6 (latest)", + "model_card": "https://www.anthropic.com/claude/sonnet", + "provider": "anthropic" + }, + { + "model": "claude-4-5-haiku", + "name": "[Anthropic] Claude 4.5 Haiku (latest)", + "model_card": "https://www.anthropic.com/claude/haiku", + "provider": "anthropic" + }, + { + "model": "magistral-small-2509", + "name": "[Mistral] Magistral Small 1.2 (25.09)", + "model_card": "https://docs.mistral.ai/models/magistral-small-1-2-25-09", + "provider": "mistral" + }, + { + "model": "magistral-medium-2509", + "name": "[Mistral] Magistral Medium 1.2 (25.09)", + "model_card": "https://docs.mistral.ai/models/magistral-medium-1-2-25-09", + "provider": "mistral" + }, + { + "model": "mistral-small-2506", + "name": "[Mistral] Mistral Small 3.2 (25.06)", + "model_card": "https://docs.mistral.ai/models/mistral-small-3-2-25-06", + "provider": "mistral" + }, + { + "model": "mistral-medium-2508", + "name": "[Mistral] Mistral Medium 3.1 (25.08)", + "model_card": "mistral-medium-2508", + "provider": "mistral" + }, + { + "model": "mistral-large-2512", + "name": "[Mistral] Mistral Large 3 (25.12)", + "model_card": "https://docs.mistral.ai/models/mistral-large-3-25-12", + "provider": "mistral" + }, + { + "model": "open-mistral-nemo-2407", + "name": "[Mistral] Mistral Nemo 12B", + "model_card": "https://docs.mistral.ai/models/mistral-nemo-12b-24-07", + "provider": "mistral" + }, + { + "model": "deepseek-chat", + "name": "[DeepSeek] DeepSeek latest (non-reasoning)", + "model_card": "https://api-docs.deepseek.com/quick_start/pricing", + "provider": "deepseek" + }, + { + "model": "deepseek-reasoner", + "name": "[DeepSeek] DeepSeek latest (reasoning)", + "model_card": "https://api-docs.deepseek.com/quick_start/pricing", + "provider": "deepseek" + } +] \ No newline at end of file diff --git a/common/lib/config_definition.py b/common/lib/config_definition.py index aef363e04..78cd1b7dc 100644 --- a/common/lib/config_definition.py +++ b/common/lib/config_definition.py @@ -575,53 +575,77 @@ # allows 4CAT LLM processors to connect to a local or remote LLM server "llm.intro": { "type": UserInput.OPTION_INFO, - "help": "4CAT LLM processors allow users to utilize common APIs (e.g. OpenAI, Google, Anthropic) as well as connect " - "to local or remote LLM servers. You can also set up your own LLM server using open source software such as " - "[Ollama](https://ollama.com/) and connect 4CAT to it using the settings below for your users." - }, - "llm.host_name": { - "type": UserInput.OPTION_TEXT, - "default": "4CAT LLM Server", - "help": "Name of LLM Server in UI", - "tooltip": "The name that will be shown to users in the interface when selecting an LLM server (or API or custom).", - "global": True - }, - "llm.provider_type": { - "type": UserInput.OPTION_CHOICE, - "help": "LLM Provider Type", - "default": "none", - "options": { - "ollama": "Ollama", - "none": "None", - }, - "global": True, - }, - "llm.server": { - "type": UserInput.OPTION_TEXT, - "default": "", - "help": "LLM Server URL", - "tooltip": "The URL of the LLM server, e.g. http://localhost:5000", - "global": True - }, - "llm.auth_type": { - "type": UserInput.OPTION_TEXT, - "help": "LLM Server Authentication Type", - "default": "", - "tooltip": "The authentication type required to connect to the server (e.g. 'X-API-KEY', 'Authorization'). Passed in the request header with the API key.", + "help": "4CAT LLM processors allow users to utilize common APIs (e.g. OpenAI, Google, Anthropic) as well as " + "connect to local or remote LLM servers. You can also set up your own LLM server using open source " + "software such as [Ollama](https://ollama.com/) and connect 4CAT to it using the settings below for " + "your users. After configuring providers you can enable and disable available models via the 'LLMs & " + "Providers' page in the Control Panel." + }, + "llm.providers": { + "type": UserInput.OPTION_MULTI_OPTION, + "default": [ + { + "name": "Third-party APIs (OpenAI, Google, Claude, Mistral, etc)", + "type": "api", + "url": "", + "auth_header": "", + "auth_key": "" + } + ], "global": True, - }, - "llm.api_key": { - "type": UserInput.OPTION_TEXT, - "default": "", - "help": "LLM Server API Key", - "tooltip": "The API key to access the LLM server, if required.", - "global": True + "help": "LLM providers", + "options": { + "name": { + "type": UserInput.OPTION_TEXT, + "default": "", + "help": "Name of LLM Server in UI", + "tooltip": "The name that will be shown to users in the interface when selecting an LLM server (or API or custom).", + }, + "type": { + "type": UserInput.OPTION_CHOICE, + "help": "LLM Provider Type", + "default": "none", + "options": { + "ollama": "Ollama", + "litellm": "LiteLLM", + "openai-like": "OpenAI compatible API (LM Studio, vLLM, etc)", + "api": "Third-party models from OpenAI, Anthropic, Mistral, etc", + "none": "None", + }, + }, + "url": { + "type": UserInput.OPTION_TEXT, + "default": "", + "help": "LLM Server URL", + "tooltip": "The URL of the LLM server, e.g. http://localhost:5000", + }, + "auth_header": { + "type": UserInput.OPTION_TEXT, + "help": "Authentication Header", + "default": "", + "tooltip": "The HTTP header used to authenticate with the server (e.g. 'X-API-KEY', 'Authorization'). Passed with the Authentication Key as value.", + }, + "auth_key": { + "type": UserInput.OPTION_TEXT, + "default": "", + "help": "Authentication Key", + "tooltip": "The API key to access the LLM server, if required.", + }, + } }, "llm.available_models": { "type": UserInput.OPTION_TEXT_JSON, "default": {}, "help": "Available LLM models", - "tooltip": "A JSON dictionary of available LLM models on the server. 4CAT will query the LLM server for available models periodically.", + "tooltip": "A JSON dictionary of available LLM models on the server. Refreshed daily by the OllamaManager worker.", + "indirect": True, + "global": True + }, + "llm.enabled_models": { + "type": UserInput.OPTION_TEXT_JSON, + "default": [], + "help": "Enabled LLM models", + "tooltip": "List of model keys enabled for use. Managed via the LLM Server settings panel.", "indirect": True, "global": True }, @@ -739,5 +763,5 @@ "proxies": "Proxied HTTP requests", "image-visuals": "Image visualization", "extensions": "Extensions", - "llm": "LLM Server Settings" + "llm": "LLM Providers" } diff --git a/common/lib/llm/__init__.py b/common/lib/llm/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/common/lib/llm.py b/common/lib/llm/adapter.py similarity index 57% rename from common/lib/llm.py rename to common/lib/llm/adapter.py index 0901194d1..9fe80eb49 100644 --- a/common/lib/llm.py +++ b/common/lib/llm/adapter.py @@ -1,9 +1,10 @@ import json import base64 import mimetypes -import requests + from pathlib import Path from typing import List, Optional, Union + from pydantic import SecretStr from langchain_core.messages import BaseMessage, HumanMessage, SystemMessage from langchain_core.language_models.chat_models import BaseChatModel @@ -18,111 +19,86 @@ class LLMAdapter: def __init__( self, - provider: str, - model: str, + config, + model, api_key: Optional[str] = None, - base_url: Optional[str] = None, temperature: float = 0.1, max_tokens: int = 1000, client_kwargs: Optional[dict] = None, ): """ - provider: 'openai', 'google', 'mistral', 'ollama', 'lmstudio', 'anthropic', 'deepseek' - model: model name (e.g., 'gpt-4o-mini', 'claude-3-opus', 'mistral-small', etc.) - api_key: API key if required (OpenAI, Claude, Google, Mistral) - base_url: for local models or Mistral custom endpoints - temperature: temperature hyperparameter, - max_tokens: how many output tokens may be used - client_kwargs: additional client parameters + Instantiate an adapter to interface with an LLM model + + :param config: 4CAT config reader + :param model: Model metadata (as in `llm.available_models` 4CAT setting) + :param api_key: API key, if needed + :param temperature: Temperature hyperparameter + :param max_tokens: Max tokens to generate + :param client_kwargs: Optional parameters for the LLM adapter class """ - self.provider = provider.lower() + known_providers = {p['url']: p for p in config.get("llm.providers")} + self.model = model + self.provider = known_providers.get(model['provider']) self.api_key = api_key - self.base_url = base_url self.temperature = temperature self.structured_output = False self.parser = None self.max_tokens = max_tokens self.client_kwargs = dict(client_kwargs) if client_kwargs else {} + self.llm: BaseChatModel = self._load_llm() def _load_llm(self) -> BaseChatModel: - if self.provider == "openai": - kwargs = {} - if "o3" not in self.model: - kwargs["temperature"] = self.temperature # temperature not supported for all models - return ChatOpenAI( - model=self.model, - api_key=SecretStr(self.api_key), - base_url=self.base_url or "https://api.openai.com/v1", - max_tokens=self.max_tokens, - **kwargs - ) - elif self.provider == "google": - return ChatGoogleGenerativeAI( - model=self.model, - temperature=self.temperature, - google_api_key=self.api_key, - max_tokens=self.max_tokens - ) - elif self.provider == "anthropic": - return ChatAnthropic( - model_name=self.model, - temperature=self.temperature, - api_key=SecretStr(self.api_key), - max_tokens=self.max_tokens, - timeout=100, - stop=None - ) - elif self.provider == "mistral": - return ChatMistralAI( - model_name=self.model, - temperature=self.temperature, - api_key=SecretStr(self.api_key), - base_url=self.base_url, # Optional override - max_tokens=self.max_tokens, - ) - elif self.provider == "deepseek": - return ChatDeepSeek( - model=self.model, - temperature=self.temperature, - api_key=SecretStr(self.api_key), - base_url=self.base_url, - max_tokens=self.max_tokens if self.max_tokens <= 8192 else 8192, - ) - elif self.provider == "ollama": - ollama_adapter = ChatOllama( - model=self.model, - temperature=self.temperature, - base_url=self.base_url or "http://localhost:11434", - max_tokens=self.max_tokens, - client_kwargs=self.client_kwargs - ) - self.model = ollama_adapter.model - return ollama_adapter - elif self.provider in {"vllm", "lmstudio"}: - # OpenAI-compatible local servers - if self.provider == "lmstudio" and not self.api_key: - self.api_key = "lm-studio" - - # For vLLM, query the server to get the actual model name. We can't leave this empty, unfortunately. - if self.provider == "vllm" and self.model=="vllm_model": - model_name = self.get_vllm_model_name(self.base_url, self.api_key) - self.model = model_name - else: - model_name = self.model if self.model else "lmstudio-model" - - llm = ChatOpenAI( - model=model_name, - temperature=self.temperature, - api_key=SecretStr(self.api_key), - base_url=self.base_url, - max_tokens=self.max_tokens, - ) - self.model = llm.model_name - return llm + chat_params = { + "model": self.model["local_id"], + "api_key": SecretStr(self.api_key), + "base_url": self.provider["url"], + "max_tokens": self.max_tokens, + "temperature": self.temperature, + } + + if self.provider["type"] == "openai": + if "o3" in self.model: + del chat_params["temperature"] + adapter_class = ChatOpenAI + + elif self.provider["type"] == "google": + adapter_class = ChatGoogleGenerativeAI + + elif self.provider["type"] == "anthropic": + chat_params.update({"timeout": 100, "stop": None}) + adapter_class = ChatAnthropic + + elif self.provider["type"] == "mistral": + adapter_class = ChatMistralAI + + elif self.provider["type"] == "deepseek": + chat_params["max_tokens"] = min(self.max_tokens, 8192) + adapter_class = ChatDeepSeek + + elif self.provider["type"] == "ollama": + adapter_class = ChatOllama + chat_params.update({"client_kwargs": self.client_kwargs}) + + elif self.provider["type"] in {"litellm", "openai-like"}: + url = f"{self.provider['url']}/" if not self.provider["url"].endswith("/") else self.provider['url'] + url += "v1/" if not url.endswith("v1/") else "" + + chat_params.update({"base_url": url}) + if self.provider["auth_header"]: + chat_params.update({ + "default_headers": { + self.provider["auth_header"]: self.provider["auth_key"] + } + }) + + adapter_class = ChatOpenAI + else: - raise ValueError(f"Unsupported LLM provider: {self.provider}") + raise ValueError(f"{self.__class__.__name__} Unsupported LLM provider type: {self.provider['type']}") + + return adapter_class(**chat_params) def generate_text( self, @@ -161,7 +137,8 @@ def generate_text( lc_messages = messages kwargs = {"temperature": temperature} - if self.provider in ("google", "ollama") or "o3" in self.model or "gpt-5" in self.model: + if self.provider["type"] in ("google", "ollama") or "o3" in self.model["local_id"] or "gpt-5" in self.model[ + "local_id"]: kwargs = {} try: @@ -172,10 +149,10 @@ def generate_text( return response def create_multimodal_content( - self, - text: str, - media_urls: Optional[List[str]] = None, - media_files: Optional[List[Union[str, Path]]] = None, + self, + text: str, + media_urls: Optional[List[str]] = None, + media_files: Optional[List[Union[str, Path]]] = None, ) -> List[dict]: """ Create multimodal content structure for LangChain messages with media URLs @@ -224,11 +201,11 @@ def create_multimodal_content( return content def _format_media_block( - self, - url: Optional[str] = None, - b64_data: Optional[str] = None, - mime_type: str = "image/jpeg", - media_category: str = "image", + self, + url: Optional[str] = None, + b64_data: Optional[str] = None, + mime_type: str = "image/jpeg", + media_category: str = "image", ) -> dict: """ Format a single media block for the appropriate provider. @@ -304,31 +281,6 @@ def set_structure(self, json_schema): self.llm = self.llm.with_structured_output(json_schema) self.structured_output = True - @staticmethod - def get_model_options(config) -> dict: - """ - Returns model choice options for UserInput - """ - models = LLMAdapter.get_models(config) - if not models: - return {} - options = {model_id: model_values["name"] for model_id, model_values in models.items()} - return options - - @staticmethod - def get_model_providers(config) -> dict: - """ - Returns available model providers through APIs - """ - models = LLMAdapter.get_models(config) - if not models: - return {} - providers = list(set([model_values.get("provider", "") for model_values in models.values()])) - if not providers: - return {} - options = {provider: provider.capitalize() for provider in providers if provider} - return options - @staticmethod def get_models(config) -> dict: """ @@ -337,36 +289,6 @@ def get_models(config) -> dict: :returns dict, A dict with model IDs as keys and details as values """ - with ( - config.get("PATH_ROOT") - .joinpath("common/assets/llms.json") - .open() as available_models - ): - available_models = json.loads(available_models.read()) - return available_models - - - @staticmethod - def get_vllm_model_name(base_url: str, api_key: str = None) -> str: - """ - Query vLLM server to get the name of the served model. - """ - - try: - # vLLM exposes available models at /v1/models endpoint - models_url = f"{base_url.rstrip('/')}/models" - headers = {} - if api_key: - headers["Authorization"] = f"Bearer {api_key}" - - response = requests.get(models_url, headers=headers, timeout=10) - response.raise_for_status() - models_data = response.json() - - # Get the first available model - if models_data.get("data") and len(models_data["data"]) > 0: - return models_data["data"][0]["id"] - else: - raise ValueError("No models found on vLLM server") - except Exception as e: - raise ValueError(f"Could not retrieve model name from vLLM server: {e}") + available_models = config.get("llm.available_models", {}) + enabled_models = config.get("llm.enabled_models", {}) + return {k: v for k, v in available_models.items() if k in enabled_models} diff --git a/common/lib/llm/clients/__init__.py b/common/lib/llm/clients/__init__.py new file mode 100644 index 000000000..4287ca861 --- /dev/null +++ b/common/lib/llm/clients/__init__.py @@ -0,0 +1 @@ +# \ No newline at end of file diff --git a/common/lib/llm/clients/litellm_client.py b/common/lib/llm/clients/litellm_client.py new file mode 100644 index 000000000..cf65497ff --- /dev/null +++ b/common/lib/llm/clients/litellm_client.py @@ -0,0 +1,60 @@ +""" +Centralized HTTP client for communicating with a LiteLLM server. + +This class owns all direct HTTP calls to LiteLLM's REST API and provides shared +static helpers for capability parsing, display-name formatting, and building +canonical llm.available_models entries. It is a plain helper with no 4CAT +base-class dependency. + +This class is primarily intended for interfacing with LiteLLM, but since +LiteLLM itself is mostly OpenAI API-compatible, this can be used to interface +with the OpenAI API as well. +""" +from common.lib.llm.llm_client import LLMProviderClient + +class LiteLLMClient(LLMProviderClient): + type = "litellm" + + _models_info_path = "/model/info" + _models_info_key = "data" + _model_id_key = "model_name" + + def parse_supported_media_types(self, meta: dict) -> list[str]: + """ + Derive the media types a model supports from its LiteLLM metadata. + + :param meta: ``model info`` response dict, or ``None``. + :returns: Ordered list of supported media type strings. + Returns ``[]`` when ``meta`` is ``None`` + """ + if meta is None or not meta.get("model_info"): + return [] + + media_types = {"text"} # far as I can tell, text is always supported + if meta["model_info"].get("supports_vision"): + media_types.add("image") + + if meta["model_info"].get("supports_audio_input"): + media_types.add("audio") + + # no way to tell if model supports embeddings input as far as I can see... + + return list(media_types) + + def format_display_name(self, meta: dict) -> str: + """ + Build a human-readable display name for a model. + + :param model_id: Raw Ollama model identifier (e.g. ``"llama3:8b"``). + :param meta: ``/api/show`` response dict, or ``None``. + :returns: Human-readable display name string. + """ + model_name = self.get_global_model_id(meta) + + if meta.get("model_name"): + model_name = meta["model_name"] + + if meta["litellm_params"].get("model"): + model_name = "/".join(meta["litellm_params"].get("model").split("/")[1:]) + + return model_name \ No newline at end of file diff --git a/common/lib/llm/clients/ollama_client.py b/common/lib/llm/clients/ollama_client.py new file mode 100644 index 000000000..e21297448 --- /dev/null +++ b/common/lib/llm/clients/ollama_client.py @@ -0,0 +1,182 @@ +""" +Centralized HTTP client for communicating with an Ollama server. + +This class owns all direct HTTP calls to Ollama's REST API and provides shared static +helpers for capability parsing, display-name formatting, and building canonical +llm.available_models entries. It is a plain helper with no 4CAT base-class dependency. +""" +import requests + +from common.lib.llm.llm_client import LLMProviderClient + + +class OllamaClient(LLMProviderClient): + type = "ollama" + + _models_info_path = "/api/tags" + _models_info_key = "models" + _model_id_key = "model" + + def list_models(self) -> list[dict]: + """ + List all models available. + + For Ollama, get some additional model info via an extra API request. + + :return list[dict]: List of models available.: + """ + models = super().list_models() + result = [] + for model in models: + try: + model_info = self._session.post( + f"{self.base_url}/api/show", + json={"model": model[self._model_id_key]}, + headers=self._headers, + timeout=self.timeout, + ).json() + result.append({**model, "metadata": model_info}) + except (requests.exceptions.HTTPError, KeyError) as e: + self.log.warning( + f"{self.__class__.__name__}: failed to fetch additional model info for model {model[self._model_id_key]}: {e}") + + return result + + + + def parse_supported_media_types(self, meta: dict) -> list[str]: + """Derive the media types a model supports from its Ollama metadata. + + **Primary path**: reads ``meta["capabilities"]``: + - ``"completion"`` → ``"text"`` + - ``"vision"`` → ``"image"`` + - ``"embedding"`` → ``"embedding"`` + + **Fallback path** (used when capabilities are absent or only yield ``"text"``): + inspects GGUF ``model_info`` / ``details`` for vision signals and adds + ``"image"`` if any are found. + + :param meta: ``/api/show`` response dict, or ``None``. + :returns: Ordered list of supported media type strings. + Returns ``[]`` when ``meta`` is ``None`` (unknown — callers + should include the model, not block it). + """ + if meta is None or not meta.get("metadata"): + return [] + + capabilities = meta["metadata"].get("capabilities", []) + media_types: list[str] = [] + + _cap_map = { + "completion": "text", + "vision": "image", + "embedding": "embedding", + } + for cap in capabilities: + mapped = _cap_map.get(cap) + if mapped and mapped not in media_types: + media_types.append(mapped) + + # Fallback: GGUF-level vision signals when capabilities list gives no image info + if "image" not in media_types: + details = meta.get("details", {}) + model_info = meta.get("model_info", {}) + projector_info = meta.get("projector_info") + + has_clip_family = "clip" in (details.get("families") or []) + has_vision_keys = any(k.startswith("vision.") for k in model_info) + has_projector = bool(projector_info) + + if has_clip_family or has_vision_keys or has_projector: + media_types.append("image") + + return media_types + + def format_display_name(self, meta: dict) -> str: + """ + Build a human-readable display name for a model. + + :param model_id: Raw Ollama model identifier (e.g. ``"llama3:8b"``). + :param meta: ``/api/show`` response dict, or ``None``. + :returns: Human-readable display name string. + """ + model_name = self.get_model_id(meta) + + extra_bits = [] + if meta.get("metadata") and meta["metadata"].get("model_info"): + more_meta = meta["metadata"]["model_info"] + if more_meta.get("general.basename"): + model_name = more_meta["general.basename"] + + if more_meta.get("general.finetune"): + extra_bits.append(more_meta["general.finetune"]) + + if more_meta.get("general.size_label"): + extra_bits.append(more_meta["general.size_label"]) + + elif meta.get("details") and meta["details"].get("parameter_size"): + extra_bits.append(f"{meta['details']['parameter_size']} parameters") + + model_name += f" ({', '.join(extra_bits)})" + + return model_name + + def get_model_card_url(self, meta: dict) -> str: + """ + Get a URL for a model card for a given model + + :param meta: Model metadata + :return str: Model card URL (empty string if unavailable) + """ + return f"https://ollama.com/library/{meta['model']}" + + def pull_model(self, model_id: str, stream: bool = False) -> bool: + """Pull a model from the Ollama registry. + + :param model_id: Model name (e.g. ``"llama3:8b"``). + :param stream: Whether to stream the response (default ``False``). + :returns: ``True`` on success, ``False`` on failure. + """ + try: + r = self._session.post( + f"{self.base_url}/api/pull", + headers=self._headers, + json={"model": model_id, "stream": stream}, + timeout=600, + ) + + if r.status_code != 200 and self.log: + self.log.warning( + f"{self.__class__.__name__}: failed to pull model {model_id} from {self.base_url}, status code {r.status_code}: {r.text}") + + return r.status_code == 200 + + except requests.RequestException as e: + if self.log: + self.log.warning( + f"{self.__class__.__name__}: failed to pull model {model_id} from {self.base_url}: {e}") + + return False + + def delete_model(self, model_id: str) -> bool: + """Delete a model from the Ollama server. + + :param model_id: Model name (e.g. ``"llama3:8b"``). + :returns: ``True`` on success, ``False`` on failure. + """ + try: + r = self._session.delete( + f"{self.base_url}/api/delete", + headers=self._headers, + json={"model": model_id}, + timeout=30, + ) + if r.status_code != 200 and self.log: + self.log.warning( + f"{self.__class__.__name__}: failed to delete model {model_id} from {self.base_url}, status code {r.status_code}: {r.text}") + return r.status_code == 200 + except requests.RequestException as e: + if self.log: + self.log.warning( + f"{self.__class__.__name__}: failed to delete model {model_id} from {self.base_url}: {e}") + return False diff --git a/common/lib/llm/clients/openai_client.py b/common/lib/llm/clients/openai_client.py new file mode 100644 index 000000000..f8701dd7c --- /dev/null +++ b/common/lib/llm/clients/openai_client.py @@ -0,0 +1,61 @@ +""" +Centralized HTTP client for communicating with an OpenAI compatible server. + +This class owns all direct HTTP calls to an OpenAI style REST API and provides shared +static helpers for capability parsing, display-name formatting, and building +canonical llm.available_models entries. It is a plain helper with no 4CAT +base-class dependency. +""" +from common.lib.llm.llm_client import LLMProviderClient + + +class LMStudioClient(LLMProviderClient): + type = "openai-like" + + _models_info_path = "/api/v1/models" + _models_info_key = "models" + _model_id_key = "key" + + def parse_supported_media_types(self, meta: dict) -> list[str]: + """ + Derive the media types a model supports from its LiteLLM metadata. + + :param meta: ``model info`` response dict, or ``None``. + :returns: Ordered list of supported media type strings. + Returns ``[]`` when ``meta`` is ``None`` + """ + media_types = {"text"} # far as I can tell, text is always supported + + if meta is None or not meta.get("capabilities"): + return list(media_types) + + if meta["capabilities"].get("vision"): + media_types.add("image") + + # no way to tell if model supports embeddings input as far as I can see... + + return list(media_types) + + def format_display_name(self, meta: dict) -> str: + """ + Build a human-readable display name for a model. + + :param model_id: Raw Ollama model identifier (e.g. ``"llama3:8b"``). + :param meta: ``/api/show`` response dict, or ``None``. + :returns: Human-readable display name string. + """ + model_name = self.get_model_id(meta) + + if meta.get("display_name"): + model_name = meta["display_name"] + + extra_bits = [] + if meta.get("publisher"): + extra_bits.append(meta["publisher"]) + + if meta.get("params_string"): + extra_bits.append(meta["params_string"]) + + model_name += f" ({', '.join(extra_bits)})" + + return model_name diff --git a/common/lib/llm/clients/thirdparty_client.py b/common/lib/llm/clients/thirdparty_client.py new file mode 100644 index 000000000..e1df93d45 --- /dev/null +++ b/common/lib/llm/clients/thirdparty_client.py @@ -0,0 +1,66 @@ +""" +Fake 'client' to read from local store of known 3d party, API-based LLMs that +can be used with 4CAT +""" +import json + +from common.lib.llm.llm_client import LLMProviderClient + + +class ThirdPartyClient(LLMProviderClient): + type = "api" + + _models_info_key = "models" + _model_id_key = "model" + + def get_status(self): + return 200 + + def list_models(self) -> dict: + with self.config.get("PATH_ROOT").joinpath("common/assets/llms.json").open() as infile: + models = json.load(infile) + + return models + + def parse_supported_media_types(self, meta: dict) -> list[str]: + """ + Derive the media types a model supports from its LiteLLM metadata. + + :param meta: ``model info`` response dict, or ``None``. + :returns: Ordered list of supported media type strings. + Returns ``[]`` when ``meta`` is ``None`` + """ + return meta.get("supported_media_types", ["text"]) + + def format_display_name(self, meta: dict) -> str: + """ + Build a human-readable display name for a model. + + :param model_id: Raw Ollama model identifier (e.g. ``"llama3:8b"``). + :param meta: ``/api/show`` response dict, or ``None``. + :returns: Human-readable display name string. + """ + return meta["name"] + + def build_model_entry(self, meta: dict) -> dict: + """ + Build a canonical ``llm.available_models`` entry for a model. + + :param model_id: Raw model identifier. + :param display_name: Human-readable name (from ``format_display_name``). + :param meta: ``/api/show`` response dict, or ``None`` if unavailable. + :returns: Dict ready to store under ``llm.available_models[model_id]``. + """ + entry = super().build_model_entry(meta) + entry["provider"] = meta["provider"] + + return entry + + def get_model_card_url(self, meta: dict) -> str: + """ + Get a URL for a model card for a given model + + :param meta: Model metadata + :return str: Model card URL (empty string if unavailable) + """ + return meta["model_card"] if meta["model_card"] else "" diff --git a/common/lib/llm/llm_client.py b/common/lib/llm/llm_client.py new file mode 100644 index 000000000..b59d38ab0 --- /dev/null +++ b/common/lib/llm/llm_client.py @@ -0,0 +1,196 @@ +""" +Centralized HTTP client for communicating with an LLM provider. + +This class owns all direct HTTP calls to the provider's REST API and provides +shared static helpers for capability parsing, display-name formatting, and +building canonical llm.available_models entries. It is a plain helper with no +4CAT base-class dependency. +""" + +from abc import abstractmethod + +import requests + + +class LLMProviderClient: + _headers = {} + _meta = {} + + @staticmethod + def get_client(config, provider_config: dict) -> "LLMProviderClient": + """ + Get a client for an LLM provider + + Returns the appropriate sub-class depending on the provider type. + + :param config: 4CAT config reader + :param dict provider_config: Provider parameters, as configured in + 4CAT + :return LLMProviderClient: + """ + # in-line import because we otherwise get circular import shenanigans + from common.lib.llm.clients.ollama_client import OllamaClient + from common.lib.llm.clients.litellm_client import LiteLLMClient + from common.lib.llm.clients.openai_client import LMStudioClient + from common.lib.llm.clients.thirdparty_client import ThirdPartyClient + + for client_type in (OllamaClient, LiteLLMClient, LMStudioClient, ThirdPartyClient): + if client_type.type == provider_config["type"]: + return client_type(config, provider_config) + + raise ValueError(f"LLMProviderClient: Unknown provider type {provider_config['type']}") + + def __init__(self, config, provider_config: dict, timeout: int = 10, log=None) -> None: + """ + HTTP client for an LLM Provider + + :param dict provider_config: Provider parameters, as configured in 4CAT + :param int timeout: Default request timeout in seconds. + :param Logger log: 4CAT log handler + """ + self.config = config + + self._meta = provider_config + + self.timeout = timeout + self.auth_type = provider_config.get("auth_header") + self.auth_key = provider_config.get("auth_key") + self.timeout = timeout + + self.base_url = provider_config["url"].rstrip("/") + if self.base_url.endswith("v1"): + # get rid of the 'v1' - we'll add this in the path + self.base_url = f"{self.base_url[:-2]}" + + self._session = requests.Session() + self._headers = {"Content-Type": "application/json"} + + if self.auth_type: + self._headers[self.auth_type] = self.auth_key + + self.log = log + + def get_status(self) -> bool | int: + """ + Check if the server is reachable and responding to requests + + :return: `False` if the server is not responding, or an HTTP status code. + """ + try: + r = self._session.get( + f"{self.base_url}{self._models_info_path}", + headers=self._headers, + timeout=self.timeout, + ) + if self.log and r.status_code != 200: + self.log.warning( + f"{self.__class__.__name__}: server responded with status code {r.status_code} during availability check: {r.text}") + return r.status_code + except requests.RequestException as e: + if self.log: + self.log.warning(f"{self.__class__.__name__}: server is not available at {self.base_url}: {e}") + return False + + def list_models(self) -> list[dict]: + """List available models from the Ollama server. + + :returns: List of model dicts, or ``[]`` on failure. + """ + try: + r = self._session.get( + f"{self.base_url}{self._models_info_path}", + headers=self._headers, + timeout=self.timeout, + ) + if r.status_code == 200: + return r.json().get(self._models_info_key, []) + if self.log: + self.log.warning( + f"{self.__class__.__name__}: failed to list models from {self.base_url}, status code {r.status_code}: {r.text}") + except requests.RequestException as e: + if self.log: + self.log.warning(f"{self.__class__.__name__}: failed to list models from {self.base_url}: {e}") + return [] + + def build_model_entry(self, meta: dict) -> dict: + """ + Build a canonical ``llm.available_models`` entry for a model. + + :param model_id: Raw model identifier. + :param display_name: Human-readable name (from ``format_display_name``). + :param meta: ``/api/show`` response dict, or ``None`` if unavailable. + :returns: Dict ready to store under ``llm.available_models[model_id]``. + """ + return { + "id": self.get_global_model_id(meta), + "local_id": self.get_model_id(meta), + "name": self.format_display_name(meta), + "model_card": self.get_model_card_url(meta), + "provider_type": self._meta["type"], + "provider": self._meta["url"], + "supported_media_types": self.parse_supported_media_types(meta), + "metadata": meta, + } + + def get_model_card_url(self, meta: dict) -> str: + """ + Get a URL for a model card for a given model + + :param meta: Model metadata + :return str: Model card URL (empty string if unavailable) + """ + return "" + + @abstractmethod + def parse_supported_media_types(self, meta: dict) -> list[str]: + """Derive the media types a model supports from its Ollama metadata. + + **Primary path**: reads ``meta["capabilities"]``: + - ``"completion"`` → ``"text"`` + - ``"vision"`` → ``"image"`` + - ``"embedding"`` → ``"embedding"`` + + **Fallback path** (used when capabilities are absent or only yield ``"text"``): + inspects GGUF ``model_info`` / ``details`` for vision signals and adds + ``"image"`` if any are found. + + :param meta: ``/api/show`` response dict, or ``None``. + :returns: Ordered list of supported media type strings. + Returns ``[]`` when ``meta`` is ``None`` (unknown — callers + should include the model, not block it). + """ + pass + + @abstractmethod + def format_display_name(self, meta: dict) -> str: + """ + Build a human-readable display name for a model. + + :param dict meta: Model metadata + :returns str: Human-readable display name string. + """ + pass + + def get_model_id(self, meta: dict) -> str: + """ + Choose a model identifier based on model metadata. + + This is the ID within the provider context, i.e. it is not guaranteed + to be globally unique (use `get_global_model_id()` instead). + + :param dict meta: Model metadata + :return str: Model ID + """ + return meta[self._model_id_key] + + def get_global_model_id(self, meta: dict) -> str: + """ + Choose a model identifier based on model metadata. + + This needs to be a *globally* unique ID, i.e. if multiple providers + provide the same model, the ID should still be unique per provider. + + :param dict meta: Model metadata + :return str: Model ID + """ + return "-".join((self._meta["type"], self._meta["url"], self.get_model_id(meta))) \ No newline at end of file diff --git a/common/lib/user_input.py b/common/lib/user_input.py index 7fcb6bcb9..16a583d74 100644 --- a/common/lib/user_input.py +++ b/common/lib/user_input.py @@ -26,6 +26,7 @@ class UserInput: OPTION_TEXT = "string" # simple string or integer (input text) OPTION_MULTI = "multi" # multiple values out of a list (select multiple) OPTION_MULTI_SELECT = "multi_select" # multiple values out of a dropdown list (select multiple) + OPTION_MULTI_OPTION = "multi_option" # several instances of a collection of controls OPTION_INFO = "info" # just a bit of text, not actual input OPTION_TEXT_LARGE = "textarea" # longer text OPTION_TEXT_JSON = "json" # text, but should be valid JSON @@ -70,6 +71,8 @@ def parse_all(options, input, silently_correct=True): if type(input) is not dict and type(input) is not ImmutableMultiDict: raise TypeError("input must be a dictionary or ImmutableMultiDict") + print(input) + if type(input) is ImmutableMultiDict: # we are not using to_dict, because that messes up multi-selects input = {key: input.getlist(key) for key in input} @@ -181,6 +184,41 @@ def parse_all(options, input, silently_correct=True): parsed_input[option] = table_input + elif settings.get("type") == UserInput.OPTION_MULTI_OPTION: + # these are collections of other input options that can be + # repeated an arbitrary amount of times and are saved as a + # list of these values + # i.e. forms within forms!!! + item_options = settings["options"] + input_items = {} + for key, value in input.items(): + if key_match := re.match(f"{option}-([0-9]+)-(.+)", key): + input_index = int(key_match[1]) + # note: the index is just used to match inputs to items + # it is not used for ordering + option_item = key_match[2] + if option_item not in item_options: + continue + + if input_index not in input_items: + input_items[input_index] = {} + + print(key, value) + input_items[input_index][option_item] = UserInput.parse_value(item_options[option_item], value, input_items[input_index], silently_correct) + + # discard items that are only default values + parsed_input[option] = [] + for input_index, item in input_items.items(): + only_default = True + for key, value in item.items(): + if value != item_options[key]["default"]: + only_default = False + + if not only_default: + parsed_input[option].append(item) + + print(parsed_input[option]) + elif option not in input: # not provided? use default parsed_input[option] = settings.get("default", None) diff --git a/docker-compose_ollama.yml b/docker-compose_ollama.yml new file mode 100644 index 000000000..020b12c96 --- /dev/null +++ b/docker-compose_ollama.yml @@ -0,0 +1,54 @@ +# Use this file as an override to add a local Ollama instance to your 4CAT stack. +# +# Usage: +# docker compose -f docker-compose.yml -f docker-compose_ollama.yml up -d +# +# Once running, configure 4CAT via the Control Panel → Settings → LLM: +# LLM Provider Type : ollama +# LLM Server URL : http://ollama:11434 +# +# GPU support (NVIDIA): +# Uncomment the `deploy.resources` block in the ollama service below and +# ensure the NVIDIA Container Toolkit is installed on your host. +# See: https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html +# +# GPU support (Apple Silicon / AMD): +# Pass the appropriate device through your host's Docker settings instead. +# Ollama will automatically detect the GPU when it is available inside the container. + +services: + ollama: + image: ollama/ollama:latest + container_name: 4cat_ollama + restart: unless-stopped + volumes: + - 4cat_ollama:/root/.ollama + # Expose the Ollama API on the host for optional external access or + # management with the Ollama CLI. Remove this block if you want to keep + # Ollama accessible only within the Docker network. + ports: + - "127.0.0.1:11434:11434" + healthcheck: + test: ["CMD", "ollama", "ls"] + interval: 10s + timeout: 5s + retries: 5 + # --- NVIDIA GPU support (uncomment to enable) --- + # deploy: + # resources: + # reservations: + # devices: + # - driver: nvidia + # count: all + # capabilities: [gpu] + + # Make the 4CAT backend wait for Ollama to be healthy before starting. + # This prevents initial model-refresh failures on first boot. + backend: + depends_on: + ollama: + condition: service_healthy + +volumes: + 4cat_ollama: + name: 4cat_ollama_data diff --git a/docker/README.md b/docker/README.md index 00f0862fc..31843b2ce 100644 --- a/docker/README.md +++ b/docker/README.md @@ -42,3 +42,76 @@ https://github.com/docker/buildx/issues/426 https://stackoverflow.com/questions/64221861/failed-to-resolve-with-frontend-dockerfile-v0 4. More errors coming soon! (No doubt) + +--- + +## Running a local Ollama instance alongside 4CAT + +4CAT can use a local [Ollama](https://ollama.com) server for LLM-powered processors. +A Docker Compose override file (`docker-compose_ollama.yml`) is included to add +Ollama as a sidecar service so you do not need to run it separately on the host. + +### Quick start + +```bash +docker compose -f docker-compose.yml -f docker-compose_ollama.yml up -d +``` + +This starts the standard 4CAT stack plus an `ollama` container that is only +accessible within the Docker network (and optionally on `localhost:11434` on +the host via the exposed port). + +### Configuring 4CAT to use Ollama + +#### Automatic configuration (fresh Docker install with sidecar) + +When you start 4CAT for the first time using the Ollama override file, the +`docker_setup.py` initialisation script automatically detects the `ollama` +sidecar and sets **LLM Provider Type**, **LLM Server URL**, and **LLM Access** +for you. You can skip to step 2 below. + +#### Manual configuration (or to verify/change settings) + +1. Log in as admin and open **Control Panel → Settings**. +2. Confirm or set the following LLM fields: + + | Setting | Value | + |---|---| + | LLM Provider Type | `ollama` | + | LLM Server URL | `http://ollama:11434` | + | LLM Access | enabled | + +3. Save settings. +4. Open **Control Panel → LLM Server** (visible once *LLM Access* is enabled). +5. Use the **Refresh** button to load available models, then **Pull** a model + (e.g. `llama3.2:3b`) to download it from the Ollama library. +6. Enable the models you want to make available to users. + +### GPU support (NVIDIA) + +Uncomment the `deploy.resources` block in `docker-compose_ollama.yml` and +ensure the [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html) +is installed on your host. Then restart the stack with the override: + +```bash +docker compose -f docker-compose.yml -f docker-compose_ollama.yml up -d +``` + +### Persisting models + +Models downloaded by Ollama are stored in the `4cat_ollama_data` Docker volume. +They survive container restarts and re-creations unless you explicitly remove +the volume (`docker volume rm 4cat_ollama_data`). + +### Using an external Ollama server + +If you already run Ollama on the host or elsewhere, skip the override file and +point 4CAT directly at that server: + +- **On the same host**: use `http://host.docker.internal:11434` as the LLM Server URL. +- **Remote server**: use the server's reachable URL and configure any required + API key in the *LLM Server API Key* and *LLM Server Authentication Type* settings. + +In both cases, configure the LLM settings manually via **Control Panel → Settings** +(see *Manual configuration* above), using the appropriate server URL instead of +`http://ollama:11434`. diff --git a/docker/docker_setup.py b/docker/docker_setup.py index 450684602..aea641c12 100644 --- a/docker/docker_setup.py +++ b/docker/docker_setup.py @@ -207,6 +207,35 @@ def _format_host(host: str) -> str: f"docker exec 4cat_backend python -c \"from common.config_manager import ConfigManager;config=ConfigManager();config.with_db();config.set('flask.server_name', '{formatted_host}:{public_port}');config.db.commit();\"" ) + # If an Ollama container is available on the Docker network, configure 4CAT to use it. + ollama_url = 'http://ollama:11434' + try: + import requests + try: + resp = requests.get(f"{ollama_url}/api/tags", timeout=2) + if resp.status_code == 200: + current_llm_server = config.get("llm.server") + if current_llm_server == ollama_url: + print("Ollama server already configured in 4CAT settings.") + elif current_llm_server and current_llm_server != ollama_url: + # Previously configured LLM server is different; log a warning but do not overwrite user settings + print(f"Warning: Detected Ollama server at {ollama_url} but llm.server is set to {current_llm_server}. To use the Ollama server, update the llm.server setting to {ollama_url} in the 4CAT Control Panel.") + else: + # set basic LLM settings so the initial admin user does not need to + # configure them manually for local development environments that + # include the Ollama sidecar. + config.set('llm.provider_type', 'ollama') + config.set('llm.server', ollama_url) + config.set('llm.access', True) + config.db.commit() + print('Detected Ollama on Docker network; configured LLM settings to use it.') + except requests.RequestException: + # Ollama not available; do nothing + pass + except Exception: + # requests other error; skip automatic Ollama configuration + pass + print(f"\nStarting app\n" f"4CAT is accessible at:\n" f"{'https' if config.get('flask.https', False) else 'http'}://{config.get('flask.server_name')}\n") diff --git a/extensions b/extensions new file mode 120000 index 000000000..c25d13e68 --- /dev/null +++ b/extensions @@ -0,0 +1 @@ +/Users/stijn/surfdrive/PycharmProjects/4cat/config/extensions \ No newline at end of file diff --git a/processors/machine_learning/llm_prompter.py b/processors/machine_learning/llm_prompter.py index c2bd0d02e..7026aa6c3 100644 --- a/processors/machine_learning/llm_prompter.py +++ b/processors/machine_learning/llm_prompter.py @@ -16,7 +16,7 @@ from common.lib.item_mapping import MappedItem from common.lib.exceptions import ProcessorInterruptedException, QueryParametersException, QueryNeedsExplicitConfirmationException from common.lib.helpers import UserInput, nthify, andify, remove_nuls, flatten_dict -from common.lib.llm import LLMAdapter +from common.lib.llm.adapter import LLMAdapter from backend.lib.processor import BasicProcessor class LLMPrompter(BasicProcessor): @@ -55,46 +55,39 @@ def get_queue_id(cls, remote_id, details, dataset) -> str: local_queue = "local_models" if not dataset: return local_queue + + model = dataset.parameters.get("model") + if model.startswith("api"): + # API-based models have their own queue - no local resources being + # used so can be concurrent + return f"llm-api-{dataset.key}" else: - if dataset.parameters.get('api_or_local', 'api') in ["local", "hosted"]: - # Hosted models also go in the local queue since they use the same shared LLM server - return local_queue - - # Queue per model/API type - return f"{cls.type}-{dataset.parameters.get('api_or_local', 'api')}-{dataset.parameters.get('api_model', 'none')}" + # use the model URL as the queue ID (extracted from the model + # global ID) + # this is not fool-proof, but does mean not more than one dataset + # runs per API server - in the scenario of these running locally, + # it means things do not run concurrently (which is good) + return f"llm-local-{dataset.parameters.get('model').split('-')[1]}" @classmethod def get_options(cls, parent_dataset=None, config=None) -> dict: # Check if 4CAT wide LLM server is available - if config.get("llm.access", False) and config.get("llm.server", ""): - shared_llm_name = config.get("llm.host_name", "4CAT LLM Server") - shared_llm_models = {model: model_metadata.get("name") for model, model_metadata in config.get("llm.available_models", {}).items()} - shared_llm_default = list(shared_llm_models.keys())[0] if shared_llm_models else "" - else: - shared_llm_name = False - shared_llm_default = "" - shared_llm_models = {} + available_models = config.get("llm.available_models", []) + enabled_model_ids = config.get("llm.enabled_models", []) + if not config.get("llm.access"): + enabled_model_ids = [_ for _ in enabled_model_ids if _.startswith("api-")] + + enabled_models = {k: v for k, v in available_models.items() if k in enabled_model_ids} # Determine if the parent dataset is a media archive (zip with images/video/audio) is_media_parent = False media_type = "media" - hosted_and_local_available = True if parent_dataset: parent_extension = parent_dataset.get_extension() parent_media_type = parent_dataset.get_media_type() if parent_extension == "zip" and parent_media_type in ("image", "video", "audio"): is_media_parent = True media_type = parent_media_type - if parent_media_type in ("video", "audio"): - # Ollama and LM Studio currently only support text and image - hosted_and_local_available = False - - # Add additional sources for LLM Models - api_or_local_options = {"api": "API"} - if hosted_and_local_available: - api_or_local_options["local"] = "Local" - if shared_llm_name: - api_or_local_options["hosted"] = shared_llm_name options = { "ethics_warning1": { @@ -102,21 +95,14 @@ def get_options(cls, parent_dataset=None, config=None) -> dict: "help": "Always test your prompt on a sample of rows, for instance by first using the " "Random filter processor.", }, - "api_or_local": { - "type": UserInput.OPTION_CHOICE, - "help": "Local or API", - "options": api_or_local_options, - "default": "api" if not shared_llm_name else "hosted", - "tooltip": "You can use 'local' models through Ollama and LM Studio as long as you have a valid " - "and accessible URL through which the model can be reached.", - }, - "api_model": { + "model": { "type": UserInput.OPTION_CHOICE, "help": "API model", - "options": LLMAdapter.get_model_options(config), + "options": { + model_id: model["name"] for model_id, model in enabled_models.items() + }, "default": "none", "tooltip": "Select from the predefined model list or insert manually", - "requires": "api_or_local==api", }, "api_key": { "type": UserInput.OPTION_TEXT, @@ -124,282 +110,194 @@ def get_options(cls, parent_dataset=None, config=None) -> dict: "help": "API key", "tooltip": "Create an API key on the LLM provider's website (e.g. https://admin.mistral.ai/organization" "/api-keys). Note that this often involves billing.", - "requires": "api_or_local==api", + "requires": "api_model^=api", "sensitive": True, - }, - "api_custom_model_provider": { - "type": UserInput.OPTION_CHOICE, - "help": "Model provider", - "requires": "api_model==custom", - "options": LLMAdapter.get_model_providers(config), - "tooltip": "API provider. Currently limited to this list.", - }, - "api_custom_model_id": { - "type": UserInput.OPTION_TEXT, - "help": "Model ID", - "requires": "api_model==custom", - "tooltip": "E.g. 'mistral-small-2503'. Check the API provider's documentation on what model ID to use. " - "Fine-tuned models often require more info; OpenAI for instance requires the following " - "format: ft:[modelname]:[org_id]:[custom_suffix]:", - "default": "", - }, - "local_info": { - "type": UserInput.OPTION_INFO, - "requires": "api_or_local==local", - "help": "You can use local LLMs with LM Studio, Ollama, and vLLM. These applications need to be reachable by " - "this 4CAT server, e.g. by running them on the same machine. For LM Studio and vLLM, " - "use the Base URL to interface with any OpenAI-like API endpoint.", - }, - "local_provider": { - "type": UserInput.OPTION_CHOICE, - "requires": "api_or_local==local", - "options": { - "none": "", - "lmstudio": "LM Studio", - "ollama": "Ollama", - "vllm": "vLLM", - }, - "default": "none", - "help": "Local LLM provider", - }, - "lmstudio-info": { - "type": UserInput.OPTION_INFO, - "requires": "local_provider==lmstudio", - "help": "LM Studio is a desktop application to chat with LLMs, but that you can also run as a local " - "server. See [this link for intructions on how to run LM Studio as a server](https://lmstudio.ai/docs/" - "app/api). When the server is running, the endpoint is shown in the 'Developer' tab on the top " - "right (default: `http://localhost:1234/v1` or `http://host.docker.internal:1234/v1` in Docker). " - "4CAT will use the top-most model you have loaded. ", - }, - "ollama-info": { - "type": UserInput.OPTION_INFO, - "requires": "local_provider==ollama", - "help": "Ollama is a simple command-line application that lets you interface with a range of open-" - "source LLMs and that you can run as a local server. See [this link]" - "(https://github.com/ollama/ollama/blob/main/README.md#quickstart) for instructions.", - }, - "vllm-info": { - "type": UserInput.OPTION_INFO, - "requires": "local_provider==ollama", - "help": "[vLLM](https://docs.vllm.ai/en/latest/getting_started/quickstart/) is a framework for Linux " - "systems capable of fast inference with a single LLM. Communication is done through an " - "OpenAI-like API endpoint. Just change the base URL below and insert an optional API key.", - }, - "local_base_url": { - "type": UserInput.OPTION_TEXT, - "requires": "api_or_local==local", - "default": "", - "help": "Base URL", - "tooltip": "[optional] Leaving this empty will use default values (`http://localhost:1234/v1` or `http://host.docker.internal:1234/v1` for LM " - "Studio, `http://localhost:11434` or `http://host.docker.internal:11434` for Ollama, `http://localhost:8000` or `http://host.docker.internal:8000` for vLLM ).", - }, - "lmstudio_api_key": { - "type": UserInput.OPTION_TEXT, - "default": "", - "help": "LM Studio API key", - "tooltip": "[optional] Uses `lm-studio` by default.", - "requires": "local_provider==lmstudio", - "sensitive": True, - }, - "vllm_api_key": { - "type": UserInput.OPTION_TEXT, - "default": "", - "help": "vLLM API key", - "tooltip": "[optional] Empty by default.", - "requires": "local_provider==vllm", - "sensitive": True, - }, - "ollama_model": { - "type": UserInput.OPTION_TEXT, - "requires": "local_provider==ollama", - "default": "", - "help": "Ollama model name", - "tooltip": "[required] for example 'llama3.2'", - }, - "hosted_llm_model": { - "type": UserInput.OPTION_CHOICE, - "help": "LLM model", - "options": shared_llm_models, - "default": shared_llm_default, - "requires": "api_or_local==hosted", - }, + } } if is_media_parent: # Media-specific options: show info about media files being attached - options["media_info"] = { - "type": UserInput.OPTION_INFO, - "help": f"The parent dataset contains {media_type} files that will be sent " - f"to the LLM with each prompt. Make sure to use a model that supports " - f"{media_type} input (e.g. vision models for images).
" - f"Not all models support all media types. If the model cannot process " - f"{media_type} files, an error will be returned during processing.", - } - options["system_prompt"] = { - "type": UserInput.OPTION_TEXT_LARGE, - "help": "System prompt", - "tooltip": "[optional] A system prompt can be used to give the LLM general instructions, for instance " - "on the tone of the text. This processor may edit the system prompt to " - "ensure correct output. System prompts are included in the results file.", - "default": "", - } - options["prompt"] = { - "type": UserInput.OPTION_TEXT_LARGE, - "help": "User prompt", - "tooltip": f"Describe what the model should do with each {media_type} file. " - f"No column brackets needed — {media_type} files are attached automatically.", - "default": "", - } + options.update({ + "media_info": { + "type": UserInput.OPTION_INFO, + "help": f"The parent dataset contains {media_type} files that will be sent " + f"to the LLM with each prompt. Make sure to use a model that supports " + f"{media_type} input (e.g. vision models for images).
" + f"Not all models support all media types. If the model cannot process " + f"{media_type} files, an error will be returned during processing.", + }, + "system_prompt": { + "type": UserInput.OPTION_TEXT_LARGE, + "help": "System prompt", + "tooltip": "[optional] A system prompt can be used to give the LLM general instructions, for instance " + "on the tone of the text. This processor may edit the system prompt to " + "ensure correct output. System prompts are included in the results file.", + "default": "", + }, + "prompt": { + "type": UserInput.OPTION_TEXT_LARGE, + "help": "User prompt", + "tooltip": f"Describe what the model should do with each {media_type} file. " + f"No column brackets needed — {media_type} files are attached automatically.", + "default": "", + } + }) + else: - # Text-based dataset options: column brackets, media URL toggle, batching - options["prompt_info"] = { + options.update({ + # Text-based dataset options: column brackets, media URL toggle, batching + "prompt_info": { + "type": UserInput.OPTION_INFO, + "help": "How to prompt
" + "Use `[brackets]` with column names to insert dataset items in the prompt. You " + "can place column brackets in different parts of the prompt or use multiple column names within" + ' a single column bracket to merge items.
Example 1: "Describe the topic ' + 'of this social media post in max. 3 words: `[body, tags]`"
Example 2: ' + "\"Given the following hashtags: `[tags]`, answer whether they are 'related' or 'unrelated' " + 'to the following text: `[body]`"
Prompting is a delicate art. See ' + "processor references on best prompting practices.
For predefined research prompts, see " + "e.g. [Prompt Compass](https://github.com/ErikBorra/PromptCompass/blob/main/prompts.json#L136) " + "or the [Anthropic Prompt Library](https://docs.anthropic.com/en/resources/prompt-library/" + "library).", + }, + "system_prompt": { + "type": UserInput.OPTION_TEXT_LARGE, + "help": "System prompt", + "tooltip": "[optional] A system prompt can be used to give the LLM general instructions, for instance " + "on the tone of the text. This processor may edit the system prompt to " + "ensure correct output. System prompts are included in the results file.", + "default": "", + }, + "prompt": { + "type": UserInput.OPTION_TEXT_LARGE, + "help": "User prompt", + "tooltip": "Use [brackets] with columns names.", + "default": "", + }, + "use_media": { + "type": UserInput.OPTION_TOGGLE, + "help": "Add images", + "tooltip": "Add media URLs for multi-modal processing. Requires a model that supports vision.", + "default": False, + }, + "media_columns": { + "type": UserInput.OPTION_TEXT, + "help": "Columns with image URL(s)", + "default": "", + "inline": True, + "tooltip": "Multiple columns can be selected.", + "requires": "use_media==true", + } + }) + + # Common options for both text and media datasets + options.update({ + "structured_output": { + "type": UserInput.OPTION_TOGGLE, + "help": "Output structured JSON", + "tooltip": "Output in a JSON format instead of text. Note that your chosen model may not support " + "structured output.", + "default": False, + }, + "json_schema_info": { "type": UserInput.OPTION_INFO, - "help": "How to prompt
" - "Use `[brackets]` with column names to insert dataset items in the prompt. You " - "can place column brackets in different parts of the prompt or use multiple column names within" - ' a single column bracket to merge items.
Example 1: "Describe the topic ' - 'of this social media post in max. 3 words: `[body, tags]`"
Example 2: ' - "\"Given the following hashtags: `[tags]`, answer whether they are 'related' or 'unrelated' " - 'to the following text: `[body]`"
Prompting is a delicate art. See ' - "processor references on best prompting practices.
For predefined research prompts, see " - "e.g. [Prompt Compass](https://github.com/ErikBorra/PromptCompass/blob/main/prompts.json#L136) " - "or the [Anthropic Prompt Library](https://docs.anthropic.com/en/resources/prompt-library/" - "library).", - } - options["system_prompt"] = { - "type": UserInput.OPTION_TEXT_LARGE, - "help": "System prompt", - "tooltip": "[optional] A system prompt can be used to give the LLM general instructions, for instance " - "on the tone of the text. This processor may edit the system prompt to " - "ensure correct output. System prompts are included in the results file.", - "default": "", - } - options["prompt"] = { + "help": "Insert a JSON Schema for structured outputs. These define the output that " + "the LLM will adhere to. [See instructions and examples on how to write a JSON Schema]" + "(https://json-schema.org/learn/miscellaneous-examples) and [OpenAI's documentation]" + "(https://platform.openai.com/docs/guides/structured-outputs?api-mode=chat#supported-schemas).", + "requires": "structured_output==true", + }, + "json_schema": { "type": UserInput.OPTION_TEXT_LARGE, - "help": "User prompt", - "tooltip": "Use [brackets] with columns names.", + "help": "JSON schema", + "tooltip": "[required] A JSON schema that the structured output will adhere to", + "requires": "structured_output==true", "default": "", - } - options["use_media"] = { - "type": UserInput.OPTION_TOGGLE, - "help": "Add images", - "tooltip": "Add media URLs for multi-modal processing. Requires a model that supports vision.", - "default": False, - } - options["media_columns"] = { + }, + "temperature": { "type": UserInput.OPTION_TEXT, - "help": "Columns with image URL(s)", - "default": "", - "inline": True, - "tooltip": "Multiple columns can be selected.", - "requires": "use_media==true", + "help": "Temperature", + "default": 0.1, + "coerce_type": float, + "max": 2.0, + "tooltip": "Temperature indicates how strict the model will gravitate towards the most " + "probable next token. A score close to 0 returns more predictable " + "outputs while a score close to 1 leads to more creative outputs. Not supported by all models.", } - - # Common options for both text and media datasets - options["structured_output"] = { - "type": UserInput.OPTION_TOGGLE, - "help": "Output structured JSON", - "tooltip": "Output in a JSON format instead of text. Note that your chosen model may not support " - "structured output.", - "default": False, - } - options["json_schema_info"] = { - "type": UserInput.OPTION_INFO, - "help": "Insert a JSON Schema for structured outputs. These define the output that " - "the LLM will adhere to. [See instructions and examples on how to write a JSON Schema]" - "(https://json-schema.org/learn/miscellaneous-examples) and [OpenAI's documentation]" - "(https://platform.openai.com/docs/guides/structured-outputs?api-mode=chat#supported-schemas).", - "requires": "structured_output==true", - } - options["json_schema"] = { - "type": UserInput.OPTION_TEXT_LARGE, - "help": "JSON schema", - "tooltip": "[required] A JSON schema that the structured output will adhere to", - "requires": "structured_output==true", - "default": "", - } - options["temperature"] = { - "type": UserInput.OPTION_TEXT, - "help": "Temperature", - "default": 0.1, - "coerce_type": float, - "max": 2.0, - "tooltip": "Temperature indicates how strict the model will gravitate towards the most " - "probable next token. A score close to 0 returns more predictable " - "outputs while a score close to 1 leads to more creative outputs. Not supported by all models.", - } + }) if not is_media_parent: - options["truncate_input"] = { + options.update({ + "truncate_input": { + "type": UserInput.OPTION_TEXT, + "help": "Max chars in input value", + "default": 0, + "coerce_type": int, + "tooltip": "This value determines how many characters an inserted dataset value may have. 0 = unlimited.", + "requires": "use_media==false", + }, + "max_tokens": { + "type": UserInput.OPTION_TEXT, + "help": "Max output tokens", + "default": 10000, + "coerce_type": int, + "tooltip": "As a rule of thumb, one token generally corresponds to ~4 characters of " + "text for common English text. This includes tokens spent for reasoning.", + }, + "batches": { + "type": UserInput.OPTION_TEXT, + "help": "Items per prompt", + "coerce_type": int, + "default": 1, + "tooltip": "How many dataset items to insert into the prompt. These will be inserted as a list " + "wherever the column brackets are used (e.g. '[body]').", + "requires": "use_media==false", + }, + "batch_info": { + "type": UserInput.OPTION_INFO, + "help": "Note on batching: Batching may increase speed but reduce accuracy. Models " + "need to support structured output for batching. This processor uses JSON schemas to ensure " + "symmetry between input and output lengths, but models may struggle to match input and output " + "values. Describe the dataset values in plurals in your prompt when batching. If you use " + "multiple column brackets in your prompt, rows with any empty values are skipped.", + "requires": "use_media==false", + } + }) + + options.update({ + "ethics_warning3": { + "type": UserInput.OPTION_INFO, + "requires": "api_or_local==api", + "help": "When using LLMs through commercial parties, always consider anonymising your data and " + "whether local open-source LLMs are also an option.", + }, + "save_annotations": { + "type": UserInput.OPTION_ANNOTATION, + "label": "prompt outputs", + "default": False, + }, + "hide_think": { + "type": UserInput.OPTION_TOGGLE, + "help": "Hide reasoning", + "default": False, + "tooltip": "Some models include reasoning in their output, between tags. This option " + "removes this tag and its contents from the output.", + }, + "limit": { "type": UserInput.OPTION_TEXT, - "help": "Max chars in input value", + "help": "Only annotate this many items, then stop", "default": 0, "coerce_type": int, - "tooltip": "This value determines how many characters an inserted dataset value may have. 0 = unlimited.", - "requires": "use_media==false", - } - - options["max_tokens"] = { - "type": UserInput.OPTION_TEXT, - "help": "Max output tokens", - "default": 10000, - "coerce_type": int, - "tooltip": "As a rule of thumb, one token generally corresponds to ~4 characters of " - "text for common English text. This includes tokens spent for reasoning.", - } - - if not is_media_parent: - options["batches"] = { + "min": 0, + "delegated": True, + }, + "annotation_label": { "type": UserInput.OPTION_TEXT, - "help": "Items per prompt", - "coerce_type": int, - "default": 1, - "tooltip": "How many dataset items to insert into the prompt. These will be inserted as a list " - "wherever the column brackets are used (e.g. '[body]').", - "requires": "use_media==false", - } - options["batch_info"] = { - "type": UserInput.OPTION_INFO, - "help": "Note on batching: Batching may increase speed but reduce accuracy. Models " - "need to support structured output for batching. This processor uses JSON schemas to ensure " - "symmetry between input and output lengths, but models may struggle to match input and output " - "values. Describe the dataset values in plurals in your prompt when batching. If you use " - "multiple column brackets in your prompt, rows with any empty values are skipped.", - "requires": "use_media==false", + "help": "Label for the annotations to add to the dataset", + "default": "", + "delegated": True, } - - options["ethics_warning3"] = { - "type": UserInput.OPTION_INFO, - "requires": "api_or_local==api", - "help": "When using LLMs through commercial parties, always consider anonymising your data and " - "whether local open-source LLMs are also an option.", - } - options["save_annotations"] = { - "type": UserInput.OPTION_ANNOTATION, - "label": "prompt outputs", - "default": False, - } - options["hide_think"] = { - "type": UserInput.OPTION_TOGGLE, - "help": "Hide reasoning", - "default": False, - "tooltip": "Some models include reasoning in their output, between tags. This option " - "removes this tag and its contents from the output.", - } - options["limit"] = { - "type": UserInput.OPTION_TEXT, - "help": "Only annotate this many items, then stop", - "default": 0, - "coerce_type": int, - "min": 0, - "delegated": True, - } - options["annotation_label"] = { - "type": UserInput.OPTION_TEXT, - "help": "Label for the annotations to add to the dataset", - "default": "", - "delegated": True, - } + }) # Get the media columns for the select media columns option if not is_media_parent and parent_dataset and parent_dataset.get_columns(): @@ -419,20 +317,15 @@ def is_compatible_with(cls, module=None, config=None): # Text-based datasets if module.get_extension() in ["csv", "ndjson"]: return True + # Media datasets (zip archives with images, video, or audio) if module.get_extension() == "zip" and module.get_media_type() in ("image", "video", "audio"): return True return False def process(self): - self.dataset.update_status("Validating settings") - api_model = self.parameters.get("api_model") - if api_model == "none": - api_model = "" - - modal_location = self.parameters.get("api_or_local", "api") hide_think = self.parameters.get("hide_think", False) # Check if the source dataset is a media archive (zip with images/video/audio) @@ -459,85 +352,31 @@ def process(self): # Set value for batch length in prompts batches = max(1, min(self.parameters.get("batches", 1), self.source_dataset.num_rows)) - use_batches = batches > 1 - if media_columns or is_media_archive: # no batching for media files - use_batches = False + use_batches = batches > 1 and not (media_columns or is_media_archive) # no batching for media files + if not use_batches: self.dataset.delete_parameter("batches") # Set all variables through which we can reach the LLM api_key = "" - base_url = None client_kwargs = {} - if modal_location == "local": - provider = self.parameters.get("local_provider", "") - base_url = self.parameters.get("local_base_url", "") + # load model and provider metadata + chosen_model_id = self.parameters.get("model") + available_models = {k: v for k, v in self.config.get("llm.available_models").items() if k in self.config.get("llm.enabled_models")} + if chosen_model_id not in available_models: + return self.dataset.finish_with_error(f"Model {chosen_model_id} not supported") - if not provider: - self.dataset.finish_with_error("Choose a local model provider") - return + model = available_models[chosen_model_id] - if provider == "lmstudio": - model = "lmstudio_model" - if not base_url: - base_url = "http://127.0.0.1:1234/v1" if not self.config.get("USING_DOCKER", False) else "http://host.docker.internal:1234/v1" - if not self.parameters.get("lmstudio_api_key"): - api_key = "lm-studio" - elif provider == "ollama": - model = self.parameters.get("ollama_model", "") - if not model: - self.dataset.finish_with_error("You need to provide a model name for Ollama (e.g. 'llama3.2')") - return - if not base_url: - base_url = "http://localhost:11434" if not self.config.get("USING_DOCKER", False) else "http://host.docker.internal:11434" - elif provider == "vllm": - model = "vllm_model" - api_key = self.parameters.get("vllm_api_key", "") - if not base_url: - base_url = "http://localhost:8000/v1" - else: - self.dataset.finish_with_error("Local provider not supported, choose either lmstudio or ollama") - return + if model["provider_type"] == "api" and not api_key: + return self.dataset.finish_with_error(f"No API key provided for model {chosen_model_id}") - elif modal_location == "hosted": - base_url = self.config.get("llm.server", "") - provider = self.config.get("llm.provider_type", "none").lower() - api_key = self.config.get("llm.api_key", "") - llm_auth_type = self.config.get("llm.auth_type", "") - model = self.parameters.get("hosted_llm_model", "") - if api_key and llm_auth_type: - client_kwargs = { - "headers": { - llm_auth_type: api_key - } - } - if provider == "none" or not base_url: - self.dataset.finish_with_error("4CAT LLM server not properly configured; contact the administrator") - return - else: - if not api_model: - self.dataset.finish_with_error("Select an API model or insert one manually") - return - # Models can be set manually - if api_model == "custom": - model = self.parameters.get("api_custom_model_id", "") - provider = self.parameters.get("api_custom_model_provider", "") - if not model: - self.dataset.finish_with_error("You must provide a valid API model name/ID") - return - if not provider: - self.dataset.finish_with_error("You must provide a valid API model provider") - return - else: - model_info = LLMAdapter.get_models(self.config).get(api_model, {}) - provider = model_info.get("provider") - model = api_model + available_providers = {p["url"]: p for p in self.config.get("llm.providers")} + if model["provider"] not in available_providers: + return self.dataset.finish_with_error(f"Model provider {model['provider']} unknown") - api_key = self.parameters.get("api_key") or self.config.get(f"api.{provider}.api_key", "") - if not api_key: - self.dataset.finish_with_error("You need to provide a valid API key") - return + provider = available_providers[model["provider"]] # Prompt validation base_prompt = self.parameters.get("prompt", "") @@ -583,14 +422,13 @@ def process(self): # Start LLM self.dataset.update_status("Connecting to LLM provider") - base_url_str = "" if not base_url else f" at base URL '{base_url}'" - self.dataset.log(f"Using LLM provider '{provider}' with model '{model}'{base_url_str}") + base_url_str = "" if not provider["url"] else f" at base URL '{provider['url']}'" + self.dataset.log(f"Using LLM provider '{model['provider_type'] if provider['url'] else provider['provider']}' with model '{model['local_id']}'{base_url_str}") try: llm = LLMAdapter( - provider=provider, + config=self.config, model=model, api_key=api_key, - base_url=base_url, temperature=temperature, max_tokens=max_tokens, client_kwargs=client_kwargs @@ -788,7 +626,7 @@ def process(self): "prompt": prompt, "temperature": temperature, "max_tokens": max_tokens, - "model": model, + "model": model["local_id"], "time_created": datetime.fromtimestamp(time_created).strftime("%Y-%m-%d %H:%M:%S"), "time_created_utc": time_created, "batch_number": "", @@ -816,7 +654,7 @@ def process(self): for output_key, output_value in annotation_output.items(): # Skip 'signature' and 'type' annotations for Google - if provider == "google" and ( + if model["provider"] == "google" and ( output_key.endswith(".signature") or output_key.endswith(".type") ): @@ -851,7 +689,7 @@ def process(self): self.dataset.update_progress(row / max_processed) # Rate limits for different providers - if provider == "mistral": + if model["provider"] == "mistral": time.sleep(1) if limit_reached: @@ -966,10 +804,9 @@ def process(self): json_schema = self.get_json_schema_for_batch(n_batched, custom_schema=json_schema_original) # `llm` becomes a RunnableSequence when used, so we'll need to reset it here llm = LLMAdapter( - provider=provider, + config=self.config, model=model, api_key=api_key, - base_url=base_url, temperature=temperature, max_tokens=max_tokens, client_kwargs=client_kwargs @@ -984,7 +821,7 @@ def process(self): batch_str = f" and {n_batched} items batched into the prompt" if use_batches else "" self.dataset.update_status(f"Generating text at row {row:,}/" - f"{max_processed:,} with {model}{batch_str}") + f"{max_processed:,} with {model['name']}{batch_str}") # Now finally generate some text! try: response = llm.generate_text( @@ -1008,15 +845,9 @@ def process(self): self.dataset.finish_with_warning(outputs, f"Not all items processed: {e}") return - # Set model name from the response for more details - if hasattr(response, "response_metadata"): - model = response.response_metadata.get("model_name", model) - if "models/" in model: - model = model.replace("models/", "") - if not response: structured_warning = " with your specified JSON schema" if structured_output else "" - warning = f"{model} could not return text{structured_warning}. Consider editing your prompt or changing settings." + warning = f"{model['name']} could not return text{structured_warning}. Consider editing your prompt or changing settings." self.dataset.finish_with_warning(outputs, warning) return @@ -1100,7 +931,7 @@ def process(self): "prompt": prompt if not use_batches else base_prompt, # Insert dataset values if not batching "temperature": temperature, "max_tokens": max_tokens, - "model": model, + "model": model["local_id"], "time_created": datetime.fromtimestamp(time_created).strftime("%Y-%m-%d %H:%M:%S"), "time_created_utc": time_created, "batch_number": n + 1 if use_batches else "", @@ -1122,7 +953,7 @@ def process(self): for output_key, output_value in annotation_output.items(): # Skip 'signature' and 'type' annotations for Google - if provider == "google" and output_key in ("extras.signature", ".type"): + if model["provider"] == "google" and output_key in ("extras.signature", ".type"): continue annotation = { @@ -1140,7 +971,7 @@ def process(self): n_batched = 0 # Rate limits for different providers - if provider == "mistral": + if model["provider"] == "mistral": time.sleep(1) # Write annotations in batches @@ -1175,7 +1006,7 @@ def process(self): # Final outputs time_end = time.time() time_progressed = str(timedelta(seconds=int(time_end - time_start))) - final_status = f"Finished, {model} generated text in {time_progressed}." + final_status = f"Finished, {model['local_id']} generated text in {time_progressed}." skipped_str = None if not skipped else f" Skipped {skipped} rows because of empty values." if skipped_str: self.dataset.finish_with_warning(i, final_status + skipped_str) @@ -1261,7 +1092,8 @@ def validate_query(query, request, config): :param config: :return: """ - if query["api_or_local"] == "api" and not query.get("api_key"): + is_external_api = query["model"].startswith("api-") + if is_external_api and not query.get("api_key"): raise QueryParametersException("You need to enter an API key when using third-party models.") # For media archive datasets, use_media won't be present in the query @@ -1277,7 +1109,7 @@ def validate_query(query, request, config): raise QueryParametersException("You need to insert column name(s) in the user prompt within brackets " "(e.g. '[body]' or '[timestamp, author]')") - if query["api_or_local"] == "api" and not query.get("frontend-confirm"): + if is_external_api and not query.get("frontend-confirm"): raise QueryNeedsExplicitConfirmationException("Your data will be sent to a third-party service for " "processing, which will share your data with them and is " "likely to incur costs. Do you want to continue?") diff --git a/processors/machine_learning/prompt_compass.py b/processors/machine_learning/prompt_compass.py index 076bd916f..46386790e 100644 --- a/processors/machine_learning/prompt_compass.py +++ b/processors/machine_learning/prompt_compass.py @@ -3,7 +3,6 @@ """ from backend.lib.preset import ProcessorPreset from common.lib.helpers import UserInput -from common.lib.llm import LLMAdapter from common.lib.exceptions import ( QueryParametersException, @@ -63,25 +62,6 @@ def get_prompt_library(config): return prompt_library - @staticmethod - def get_available_models(config): - """ - Get available model providers - - Combine the list defined by the LLMAdapter with known local models. - - :param config: Configuration reader - :return dict: Models and metadata - """ - # get cached local models - models = config.get("llm.available_models", {}) - models = {} if models == [] else models - models.update({k: v for k, v in LLMAdapter.get_models(config).items() if k not in ("none", "custom")}) - - models = {k: v for k, v in models.items() if "model_card" in v} - - return models - @staticmethod def is_compatible_with(module=None, config=None): """ @@ -91,9 +71,7 @@ def is_compatible_with(module=None, config=None): :param ConfigManager|None config: Configuration reader (context-aware) :return bool: """ - models = PromptCompassRunner.get_available_models(config) - return (models - and module.is_top_dataset() + return (module.is_top_dataset() and module.get_extension() in ("csv", "ndjson")) @classmethod @@ -108,15 +86,22 @@ def get_options(cls, parent_dataset=None, config=None): :return: """ prompt_library = cls.get_prompt_library(config) - available_models = cls.get_available_models(config) + available_models = config.get("llm.available_models", []) + enabled_model_ids = config.get("llm.enabled_models", []) + if not config.get("llm.access"): + enabled_model_ids = [_ for _ in enabled_model_ids if _.startswith("api-")] + + enabled_models = {k: v for k, v in available_models.items() if k in enabled_model_ids} options = { "model": { "type": UserInput.OPTION_CHOICE, "help": "Model to use", "tooltip": "Third-party models require an API key to run.", - "options": {("local/" if v["provider"] == "local" else f"{v['provider']}/") + k: v["name"] for k, v in available_models.items()}, - "default": sorted(list(available_models.keys()), key=lambda k: k.startswith("local"))[-1] + "options": { + model_id: model["name"] for model_id, model in enabled_models.items() + }, + "default": sorted(list(enabled_models.keys()), key=lambda k: not k.startswith("api"))[-1] }, } @@ -136,7 +121,7 @@ def get_options(cls, parent_dataset=None, config=None): "cache": True, "tooltip": "Create an API key on the LLM provider's website (e.g. https://admin.mistral.ai/organization" "/api-keys). Note that this often involves billing.", - "requires": "model!^=local" + "requires": "model^=api" }, "hide_think": { "type": UserInput.OPTION_TOGGLE, diff --git a/processors/metrics/rank_attribute.py b/processors/metrics/rank_attribute.py index 2f50d623a..3c7c03286 100644 --- a/processors/metrics/rank_attribute.py +++ b/processors/metrics/rank_attribute.py @@ -86,7 +86,8 @@ def get_options(cls, parent_dataset=None, config=None): "hostnames": "Domain names", "level2-hostnames": "Second-level domain names (e.g. m.youtube.com -> youtube.com)", "hashtags": "Hashtags (words starting with #)", - "emoji": "Emoji (each used emoji in the column is counted individually)" + "emoji": "Emoji (each used emoji in the column is counted individually)", + "occurrence": "Values (the number of comma-separated values in the given field)" }, "help": "Extract from column", "tooltip": "This can be used to extract more specific values from the value of the selected column(s); for " diff --git a/webtool/__init__.py b/webtool/__init__.py index 54ac2072c..e7359c423 100644 --- a/webtool/__init__.py +++ b/webtool/__init__.py @@ -171,6 +171,7 @@ def time_this(func): import webtool.views.views_restart # noqa: E402 import webtool.views.views_admin # noqa: E402 import webtool.views.views_extensions # noqa: E402 + import webtool.views.views_llm # noqa: E402 import webtool.views.views_user # noqa: E402 import webtool.views.views_dataset # noqa: E402 import webtool.views.views_misc # noqa: E402 @@ -181,6 +182,7 @@ def time_this(func): app.register_blueprint(webtool.views.views_restart.component) app.register_blueprint(webtool.views.views_admin.component) app.register_blueprint(webtool.views.views_extensions.component) + app.register_blueprint(webtool.views.views_llm.component) app.register_blueprint(webtool.views.views_user.component) app.register_blueprint(webtool.views.views_dataset.component) app.register_blueprint(webtool.views.views_misc.component) @@ -239,6 +241,10 @@ def get_datasource_explorer_templates(name): [FileSystemLoader(template_paths), FunctionLoader(get_datasource_explorer_templates)] ) + # enable to {% do %} tag in jinja + app.jinja_env.add_extension("jinja2.ext.do") + app.jinja_env.add_extension("jinja2.ext.debug") + # import custom jinja2 template filters # these also benefit from current_app import webtool.lib.template_filters # noqa: E402 diff --git a/webtool/lib/template_filters.py b/webtool/lib/template_filters.py index 5682c6dbc..02e41dec6 100644 --- a/webtool/lib/template_filters.py +++ b/webtool/lib/template_filters.py @@ -195,6 +195,25 @@ def _jinja2_filter_markdown(text, trim_container=False): def _jinja2_filter_isbool(value): return isinstance(value, bool) +@current_app.template_filter('propmap') +def _jinja2_filter_propmap(data, property, default=None): + """ + Select a property from a sequence of dicts + + To map `{a: b: {prop: value}}` to `{a: value}` for a given `prop`. If + `data` is a dict, preserve key:value pairs. If the property does not exist + in a sequence item, use the `default` value. + + :param data: Sequence or dict to map + :param property: Property to use for mapping + :param default: Value to use if property does not exist in item + :return: Mapped sequence or dict + """ + if type(data) is dict: + return {k: v.get(property, default) for k, v in data.items()} + else: + return [v.get(property, default) for v in data.values()] + @current_app.template_filter('json') def _jinja2_filter_json(data): return json.dumps(data) @@ -396,6 +415,18 @@ def _jinja2_filter_parameter_str(url): return params +@current_app.template_filter("hostname") +def _jinja2_filter_hostname(url: str) -> str: + """ + For a URL, return the hostname + + If no hostname is found, return the original value + + :param str url: + :return str: + """ + return ural.get_hostname(url) or url + @current_app.template_filter("explorer_css") def explorer_css(datasource, scope_class="explorer-content-container"): @@ -429,6 +460,10 @@ def explorer_css(datasource, scope_class="explorer-content-container"): def _jinja2_filter_hasattr(obj, attribute): return hasattr(obj, attribute) +@current_app.template_filter('debug') +def _jinja2_filter_debug(value): + print(value) + @current_app.context_processor def inject_now(): def uniqid(): diff --git a/webtool/static/css/stylesheet.css b/webtool/static/css/stylesheet.css index fecc5cfd7..3e2445911 100644 --- a/webtool/static/css/stylesheet.css +++ b/webtool/static/css/stylesheet.css @@ -330,6 +330,48 @@ article.small .form-element select[multiple] { top: -0.4em; } +.form-multi-option-wrapper, .form-multi-option-header { + margin: 0 1em; +} + +.form-multi-option-wrapper { + margin: 1em; +} + +.form-multi-option-header { + +} + +.form-multi-option-wrapper li { + border-left: 2px solid var(--accent); + margin-bottom: 4px; + position: relative; + margin-left: 20px; +} + +.form-multi-option-wrapper .action-button:not(.hidden) { + position: absolute; + top: 0.5em; + left: 0.5em; + padding: 0 0.4em; +} + +.form-multi-option-wrapper .action-button.delete-button { + left: 2.5em; +} + +.form-multi-option-wrapper li::before { + content: ''; + position: absolute; + top: 0; + left: -17px; + border-left: 2px solid var(--contrast-dark); + border-bottom: 2px solid var(--contrast-dark); + width: 15px; + height: 15px; + +} + .data-overview-link { display: block; font-size: 13px; @@ -441,6 +483,7 @@ h2 .inline-search input, h3 .inline-search input { overflow: hidden; box-sizing: border-box; border-width: 0; + padding: 0; } .ellipsis { font-weight: bold; diff --git a/webtool/static/js/fourcat.js b/webtool/static/js/fourcat.js index e84dc8374..c4fd3762c 100644 --- a/webtool/static/js/fourcat.js +++ b/webtool/static/js/fourcat.js @@ -6,6 +6,7 @@ async function load() { await import("./modules/dataset-page.js"), await import("./modules/dynamic-container.js"), await import("./modules/multichoice.js"), + await import("./modules/multi-form.js"), await import("./modules/popup.js"), await import("./modules/run-processor.js"), await import("./modules/tooltip.js"), diff --git a/webtool/static/js/modules/multi-form.js b/webtool/static/js/modules/multi-form.js new file mode 100644 index 000000000..9ca67bd93 --- /dev/null +++ b/webtool/static/js/modules/multi-form.js @@ -0,0 +1,97 @@ +import {find_parent, reset_form_elements} from "./util.js"; + +export const multiForm = { + init: function () { + const actions = document.createElement('div'); + actions.className = 'multi-form-actions'; + + const add_button = document.createElement('button'); + add_button.className = 'add-button action-button'; + add_button.textContent = '+'; + add_button.addEventListener('click', multiForm.add_item); + + const delete_button = document.createElement('button'); + delete_button.className = 'delete-button action-button'; + delete_button.textContent = 'x'; + delete_button.addEventListener('click', multiForm.delete_item); + + actions.appendChild(add_button); + actions.appendChild(delete_button); + + document.querySelectorAll('.form-multi-option-wrapper').forEach(function (el) { + el.addEventListener('click', multiForm.handle_click); + el.querySelectorAll('li').forEach(function (el) { + const el_actions = actions.cloneNode(true); + el.appendChild(el_actions); + }); + multiForm.renumber(el); + }); + + }, + + handle_click: function (e) { + if(!(e.target.classList.contains('add-button') || e.target.classList.contains('delete-button'))) { + return true; + } + e.preventDefault(); + const wrapper = find_parent(e.target, 'ol'); + if(e.target.classList.contains('delete-button')){ + multiForm.delete_item(e); + } else { + multiForm.add_item(e); + } + multiForm.renumber(wrapper); + }, + + add_item: function (e) { + const ol = find_parent(e.target, 'ol.form-multi-option-wrapper'); + const last_li = find_parent(e.target, 'li'); + const clone = last_li.cloneNode(true); + reset_form_elements(clone) + ol.appendChild(clone); + }, + + delete_item: function (e) { + if(!confirm("Are you sure?")){ + return false; + } + const li = find_parent(e.target, 'li'); + const ol = find_parent(e.target, 'ol.form-multi-option-wrapper'); + + if(ol.querySelectorAll('li').length > 1) { + li.parentNode.removeChild(li); + } else { + // last element; do not remove, but reset to default + reset_form_elements(li); + } + }, + + renumber: function(parent) { + let index = 1; + parent.querySelectorAll('li').forEach(function (el) { + el.setAttribute('data-multi-option-index', index); + el.querySelector('.delete-button').classList.remove('hidden'); + multiForm.renumber_items(el, index); + index += 1; + }) + parent.querySelector('li:last-child .delete-button').classList.add('hidden'); + }, + + renumber_items: function(parent, index) { + const attributes = ['for', 'id', 'name']; + parent.childNodes.forEach(child => { + if (!(child instanceof HTMLElement)) { + return; + } + for(const attribute of attributes) { + if(child.hasAttribute(attribute)) { + child.setAttribute(attribute, child.getAttribute(attribute).replace(/-[0-9+]-/, `-${index}-`)); + } + } + multiForm.renumber_items(child, index); + }); + } + +} + +export const module = multiForm; \ No newline at end of file diff --git a/webtool/static/js/modules/ui-helpers.js b/webtool/static/js/modules/ui-helpers.js index 8b0ef097f..4c9b31ebe 100644 --- a/webtool/static/js/modules/ui-helpers.js +++ b/webtool/static/js/modules/ui-helpers.js @@ -1,5 +1,5 @@ import {popup} from "./popup.js"; -import {find_parent} from "./util.js"; +import {find_parent, hsv2hsl} from "./util.js"; export const ui_helpers = { /** diff --git a/webtool/static/js/modules/util.js b/webtool/static/js/modules/util.js index 6f17d4ec9..1839c13f1 100644 --- a/webtool/static/js/modules/util.js +++ b/webtool/static/js/modules/util.js @@ -219,4 +219,26 @@ export function hsv2rgb(h, s, v) { } return [r * 255, g * 255, b * 255]; +} + +/** + * Recursively reset form elements to default value + * + * We don't use form.reset() for two reasons: + * - We may want to reset a subset of a form + * - The *current* value of an element may not be the designated *default* + * value of the element + * + * @param parent Parent node to recursively reset child nodes of + */ +export function reset_form_elements(parent) { + parent.childNodes.forEach(child => { + if(child instanceof HTMLElement) { + if (child.hasAttribute('data-default')) { + child.value = child.getAttribute('data-default'); + } else { + reset_form_elements(child); + } + } + }) } \ No newline at end of file diff --git a/webtool/templates/components/datasource-option.html b/webtool/templates/components/datasource-option.html index 2a3cbb4a9..867a339c8 100644 --- a/webtool/templates/components/datasource-option.html +++ b/webtool/templates/components/datasource-option.html @@ -5,6 +5,28 @@
{{ settings.help|markdown|safe }}
{% elif settings.type in ["annotation", "annotations"] %} {# pass - a datasource should never need to annotate itself; this is for processor options only #} + {% elif settings.type == "multi_option" %} +
+

{{ settings.help }}

+
+
    + {# always include an empty item #} + {% set empty_item = settings.options|propmap("default") %} + {% do settings.default.append(empty_item) %} + {% for item in settings.default %} + {% set outerloop = loop %} + {% set last_index = outerloop.index %} +
  1. + {% for sub_option, sub_settings in settings.options.items() %} + {% do sub_settings.update({"original_default": sub_settings.default, "default": item[sub_option]}) %} + {% set sub_option = option ~ "-" ~ outerloop.index ~ "-" ~ sub_option %} + {% with option=sub_option, settings=sub_settings %} + {% include "components/datasource-option.html" %} + {% endwith %} + {% endfor %} +
  2. + {% endfor %} +
{% else %}
@@ -21,7 +43,7 @@

{% endif %} {% elif settings.type == "string" %} - 0 %} step="{{ settings.min }}"{% elif settings.min is defined and settings.min is not none and settings.min|float == 0 %} step="any"{% endif %} type="{% if (settings.min is defined and settings.min is not none) or (settings.max is defined and settings.max is not none) %}number{% elif settings.password %}password{% else %}text{% endif %}" value="{{ settings.default }}"> + 0 %} step="{{ settings.min }}"{% elif settings.min is defined and settings.min is not none and settings.min|float == 0 %} step="any"{% endif %} type="{% if (settings.min is defined and settings.min is not none) or (settings.max is defined and settings.max is not none) %}number{% elif settings.password %}password{% else %}text{% endif %}" value="{{ settings.default }}" data-default="{{ settings.original_default }}"> {% if "tooltip" in settings %} {% endif %} {% elif settings.type == "date" %} - + {% if "tooltip" in settings %} @@ -55,7 +77,7 @@ {% endif %} {% elif settings.type in ("json", "textarea") %} + placeholder="{{ settings.tooltip }}" data-default="{{ settings.original_default }}">{{ settings.default }} {% if "tooltip" in settings %} {% endif %} {% elif settings.type == "choice" %} - {% for value, label in settings.options.items() %} {% endfor %} @@ -105,7 +127,7 @@ {% elif settings.type == "hue" %}
{% set hue_id = uniqid() %} - diff --git a/webtool/templates/controlpanel/layout.html b/webtool/templates/controlpanel/layout.html index 33c387421..26d03df20 100644 --- a/webtool/templates/controlpanel/layout.html +++ b/webtool/templates/controlpanel/layout.html @@ -18,6 +18,8 @@ Jobs{% endif %} {% if __user_config("privileges.admin.can_restart") %} Extensions{% endif %} + {% if __user_config("privileges.admin.can_manage_settings") and __user_config("llm.access") %} + LLMs & Providers{% endif %} {% if __user_config("privileges.admin.can_manage_users") %} View logs{% endif %} {% if __user_config("privileges.admin.can_manipulate_all_datasets") %} diff --git a/webtool/templates/controlpanel/llm-server.html b/webtool/templates/controlpanel/llm-server.html new file mode 100644 index 000000000..af9ffe689 --- /dev/null +++ b/webtool/templates/controlpanel/llm-server.html @@ -0,0 +1,175 @@ +{% extends "controlpanel/layout.html" %} + +{% block title %}LLM Server{% endblock %} +{% block body_class %}plain-page admin {{ body_class }}{% endblock %} +{% block subbreadcrumbs %}{% set navigation.sub = "llm" %}{% endblock %} + +{% block body %} +
+
+

LLM Providers

+ + {% if flashes %} +
+ {% for notice in flashes %} +

{{ notice|safe }}

+ {% endfor %} +
+ {% endif %} + +

+ You can add and configure LLM providers via the 'LLM providers' tab on the Settings page. +

+ + {# Server status #} +
+ + + + + + {% if not providers %} + + + + {% else %} + {% for provider in providers %} + + + + + {% endfor %} + {% endif %} +
ServerStatus
+ No LLM providers configured. +
{{ provider.type }} {{ provider.name }} + {% if provider.status == "online" %} + Online + {% else %} + {{ provider.status }} + {% endif %} +
+
+ + {# Available models #} +

+ Available Models +
+ + +
+

+ + {% if update_running %} +

+ Models are currently being refreshed or installed - reload the page to see up-to-date list. +

+ {% endif %} + +
+ + + + + + + + + + + + + + {% if available_models %} + {% for model_id, model in available_models.items() %} + + + + + + + {% endfor %} + {% else %} + + + + {% endif %} +
NameProvider/modelCapabilitiesStatus
+ {% if model.model_card %} + {{ model.name }} + {% else %} + {{ model.name }} + {% endif %} + + {{ model.provider_type }}/{{ model.provider|hostname }}
+ {{ model.local_id }} +
+ {{ model.supported_media_types | join(", ") }} + + {% if model_id in enabled_models %} +
+ + + +
+ {% else %} +
+ + + +
+ {% endif %} + {% if model.provider_type == "ollama" %} +
+ + + +
+ {% endif %} +
+ {% if providers %} + No models found. Use the Refresh button to fetch available models, or + install a new model below with compatible providers. + {% else %} + No LLM providers configured. + {% endif %} +
+
+
+ + {# Pull a new model, if an ollama server is configured #} + {% if providers|selectattr("type", "equalto", "ollama")|list %} +
+

Install new LLMs

+

Enter a model name (e.g. llama3:8b) to make it available via the configured provider. For + Ollama, model names can be found in + model library.

+

Pulling large models may take several minutes; the job runs in the background. Note that 4CAT cannot install + models for all LLM providers; if your provider is not listed below, it may not be able to add additional + models to it, or you may need to use an external tool to add new models.

+
+ +
+ + +
+
+ + +
+
+ +
+
+
+ {% endif %} +
+{% endblock %} diff --git a/webtool/views/views_admin.py b/webtool/views/views_admin.py index 3b425bcef..301dc7138 100644 --- a/webtool/views/views_admin.py +++ b/webtool/views/views_admin.py @@ -666,9 +666,10 @@ def manipulate_settings(): global_value = global_settings.get(option, definition.get(option, {}).get("default")) is_changed = tag and global_value != tag_value - default = all_settings.get(option, definition.get(option, {}).get("default")) + default = definition.get(option, {}).get("default") + current_value = all_settings.get(option, definition.get(option, {}).get("default")) if definition.get(option, {}).get("type") == UserInput.OPTION_TEXT_JSON: - default = json.dumps(default) + current_value = json.dumps(current_value) # this is used for organising things in the UI option_owner = option.split(".")[0] @@ -694,7 +695,8 @@ def manipulate_settings(): "default": all_settings.get(option) }), "submenu": submenu, - "default": default, + "default": current_value, # override default so this is the value displayed in the web UI + "original_default": default, # but also save the actual default "tabname": tabname, "is_changed": is_changed } diff --git a/webtool/views/views_llm.py b/webtool/views/views_llm.py new file mode 100644 index 000000000..8a70ad910 --- /dev/null +++ b/webtool/views/views_llm.py @@ -0,0 +1,103 @@ +""" +4CAT views for LLM server management +""" +import time + +from flask import Blueprint, render_template, flash, get_flashed_messages, redirect, url_for, request, g +from flask_login import login_required + +from webtool.lib.helpers import setting_required, error +from common.lib.llm.llm_client import LLMProviderClient + +component = Blueprint("llm", __name__) + + +@component.route("/admin/llm/", methods=["GET", "POST"]) +@login_required +@setting_required("privileges.admin.can_manage_settings") +def llm_panel(): + """ + LLM Server management panel + + Shows server status, available models, and controls to pull/delete/refresh + models. Pull, delete, and refresh operations are queued as LLMProviderManager + jobs rather than run synchronously. + """ + if not g.config.get("llm.access"): + return error(403, message="LLM access is not enabled on this server.") + + providers = g.config.get("llm.providers", []) + + if request.method == "POST": + action = request.form.get("action", "").strip() + provider = request.form.get("provider", "").strip() + details = {"provider": provider} if provider else {} + + if action == "refresh": + # Queue a one-time manual refresh job; use a timestamp-based remote_id + # so it is always accepted even if a periodic job already exists. + g.queue.add_job("manage-llm", details={**details, "task": "refresh"}, + remote_id=f"manage-llm-manual-{int(time.time())}") + flash("Model refresh job queued.") + + elif action == "pull": + model_name = request.form.get("model_name", "").strip() + if model_name: + g.queue.add_job("manage-llm", details={**details, "task": "pull"}, remote_id=model_name) + flash(f"Pull job queued for model '{model_name}'.") + else: + flash("Please provide a model name to pull.") + + elif action == "delete": + model_name = request.form.get("model_name", "").strip() + if model_name: + g.queue.add_job("manage-llm", details={**details, "task": "delete"}, remote_id=model_name) + flash(f"Delete job queued for model '{model_name}'.") + + elif action == "enable": + model_name = request.form.get("model_name", "").strip() + if model_name: + enabled_models = list(g.config.get("llm.enabled_models", []) or []) + if model_name not in enabled_models: + enabled_models.append(model_name) + g.config.set("llm.enabled_models", enabled_models) + flash(f"Model '{model_name}' enabled.") + + elif action == "disable": + model_name = request.form.get("model_name", "").strip() + if model_name: + enabled_models = list(g.config.get("llm.enabled_models", []) or []) + if model_name in enabled_models: + enabled_models.remove(model_name) + g.config.set("llm.enabled_models", enabled_models) + flash(f"Model '{model_name}' disabled.") + + return redirect(url_for("llm.llm_panel")) + + # --- GET: render panel --- + + for i, provider in enumerate(providers): + client = LLMProviderClient.get_client(g.config, provider) + + if provider_status := client.get_status(): + server_status = "online" if provider_status == 200 else f"error (HTTP {provider_status})" + else: + server_status = "unreachable" + + providers[i]["status"] = server_status + + available_models = g.config.get("llm.available_models", {}) or {} + enabled_models = list(g.config.get("llm.enabled_models", []) or []) + + update_running = bool([ + job for job in g.queue.get_all_jobs("manage-llm") if not job.data["interval"] + ]) + + return render_template( + "controlpanel/llm-server.html", + flashes=get_flashed_messages(), + providers=providers, + available_models=available_models, + enabled_models=enabled_models, + update_running=update_running, + )