From b4cb754b95326042d625fa5bfb9e89555ea7834f Mon Sep 17 00:00:00 2001
From: 827dls <1670704430@qq.com>
Date: Thu, 7 May 2026 23:34:49 +0800
Subject: [PATCH 1/2] =?UTF-8?q?=E5=A4=9A=E6=A8=A1=E6=80=81=E5=88=86?=
=?UTF-8?q?=E7=B1=BB=E4=B8=BA=E5=9B=BE=E7=89=87=E7=90=86=E8=A7=A3=E6=A8=A1?=
=?UTF-8?q?=E5=9E=8B=EF=BC=8C=E5=9B=BE=E7=89=87=E7=94=9F=E6=88=90=E6=A8=A1?=
=?UTF-8?q?=E5=9E=8B=EF=BC=8C=E8=A7=86=E9=A2=91=E7=90=86=E8=A7=A3=E6=A8=A1?=
=?UTF-8?q?=E5=9E=8B?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
backend/consts/const.py | 5 +-
backend/consts/model.py | 3 +
backend/services/image_service.py | 2 +-
backend/services/model_health_service.py | 29 +-
backend/services/model_provider_service.py | 10 +-
.../services/providers/dashscope_provider.py | 26 +-
.../providers/modelengine_provider.py | 6 +-
.../services/providers/silicon_provider.py | 58 +++-
.../services/providers/tokenpony_provider.py | 22 +-
.../components/model/ModelAddDialog.tsx | 22 +-
.../components/model/ModelDeleteDialog.tsx | 46 +++-
.../components/model/ModelEditDialog.tsx | 3 +
.../models/components/modelConfig.tsx | 99 +++++--
frontend/const/modelConfig.ts | 15 ++
frontend/hooks/model/useModelList.ts | 30 +++
frontend/hooks/useConfig.ts | 9 +-
frontend/public/locales/en/common.json | 6 +
frontend/public/locales/zh/common.json | 6 +
frontend/types/modelConfig.ts | 8 +-
start_dev.sh | 19 ++
.../providers/test_silicon_provider.py | 254 ++++++++++++++++++
.../services/test_model_health_service.py | 233 ++++++++++++++--
22 files changed, 847 insertions(+), 64 deletions(-)
create mode 100644 start_dev.sh
diff --git a/backend/consts/const.py b/backend/consts/const.py
index db1e69184..89b731fa0 100644
--- a/backend/consts/const.py
+++ b/backend/consts/const.py
@@ -286,7 +286,10 @@ class VectorDatabaseType(str, Enum):
"rerank": "RERANK_ID",
"vlm": "VLM_ID",
"stt": "STT_ID",
- "tts": "TTS_ID"
+ "tts": "TTS_ID",
+ "imageUnderstanding": "IMAGE_UNDERSTANDING_ID",
+ "imageGeneration": "IMAGE_GENERATION_ID",
+ "videoUnderstanding": "VIDEO_UNDERSTANDING_ID"
}
APP_NAME = "APP_NAME"
diff --git a/backend/consts/model.py b/backend/consts/model.py
index 7cea3fdb5..a889ec848 100644
--- a/backend/consts/model.py
+++ b/backend/consts/model.py
@@ -155,6 +155,9 @@ class ModelConfig(BaseModel):
vlm: SingleModelConfig
stt: SingleModelConfig
tts: SingleModelConfig
+ imageUnderstanding: SingleModelConfig
+ imageGeneration: SingleModelConfig
+ videoUnderstanding: SingleModelConfig
class AppConfig(BaseModel):
diff --git a/backend/services/image_service.py b/backend/services/image_service.py
index 8decbd541..0f394b9ab 100644
--- a/backend/services/image_service.py
+++ b/backend/services/image_service.py
@@ -33,7 +33,7 @@ async def proxy_image_impl(decoded_url: str):
def get_vlm_model(tenant_id: str):
# Get the tenant config
vlm_model_config = tenant_config_manager.get_model_config(
- key=MODEL_CONFIG_MAPPING["vlm"], tenant_id=tenant_id)
+ key=MODEL_CONFIG_MAPPING.get("imageUnderstanding", "IMAGE_UNDERSTANDING_ID"), tenant_id=tenant_id)
if not vlm_model_config:
return None
return OpenAIVLModel(
diff --git a/backend/services/model_health_service.py b/backend/services/model_health_service.py
index 626e19007..3c9723900 100644
--- a/backend/services/model_health_service.py
+++ b/backend/services/model_health_service.py
@@ -1,5 +1,6 @@
import logging
from typing import Optional
+import httpx
from nexent.core import MessageObserver
from nexent.core.models import OpenAIModel, OpenAIVLModel
@@ -80,6 +81,8 @@ async def _perform_connectivity_check(
Returns:
bool: Connectivity check result
"""
+ logger.info(f"Performing connectivity check for model={model_name} type={model_type} url={model_base_url}")
+
if LOCALHOST_NAME in model_base_url or LOCALHOST_IP in model_base_url:
model_base_url = model_base_url.replace(
LOCALHOST_NAME, DOCKER_INTERNAL_HOST).replace(LOCALHOST_IP, DOCKER_INTERNAL_HOST)
@@ -122,7 +125,8 @@ async def _perform_connectivity_check(
ssl_verify=ssl_verify,
)
connectivity = await rerank_model.connectivity_check()
- elif model_type == "vlm":
+ elif model_type in ("vlm", "image_understanding", "video_understanding"):
+ # Vision/language models use OpenAIVLModel for connectivity check
observer = MessageObserver()
set_monitoring_operation("connectivity_check",
display_name=display_name)
@@ -133,6 +137,29 @@ async def _perform_connectivity_check(
api_key=model_api_key,
ssl_verify=ssl_verify
).check_connectivity()
+ elif model_type == "image_generation":
+ # Image generation models have a separate API endpoint
+ # Use simple HTTP request to verify the API is accessible
+ connectivity = False
+ try:
+ headers = {"Authorization": f"Bearer {model_api_key}"}
+ async with httpx.AsyncClient(verify=ssl_verify, timeout=30) as client:
+ # Test if the endpoint is reachable
+ logger.info(f"Image generation connectivity check for model={model_name} url={model_base_url}")
+ response = await client.post(
+ model_base_url,
+ headers=headers,
+ json={"model": model_name, "prompt": "test"}
+ )
+ logger.info(f"Image generation connectivity check response: status={response.status_code}")
+ # Any response (even error) means the endpoint is reachable
+ connectivity = response.status_code < 500
+ except httpx.TimeoutException as e:
+ logger.warning(f"Image generation connectivity check timeout for model={model_name} url={model_base_url}: {e}")
+ except httpx.ConnectError as e:
+ logger.warning(f"Image generation connectivity check failed (connect error) for model={model_name} url={model_base_url}: {e}")
+ except Exception as e:
+ logger.warning(f"Image generation connectivity check failed (exception) for model={model_name} url={model_base_url}: {type(e).__name__}: {e}")
elif model_type in ["tts", "stt"]:
voice_service = get_voice_service()
connectivity = await voice_service.check_voice_connectivity(model_type)
diff --git a/backend/services/model_provider_service.py b/backend/services/model_provider_service.py
index dbff17082..4889c0456 100644
--- a/backend/services/model_provider_service.py
+++ b/backend/services/model_provider_service.py
@@ -136,9 +136,15 @@ async def prepare_model_dict(provider: str, model: dict, model_url: str, model_a
if provider == ProviderEnum.DASHSCOPE.value:
model_dict["base_url"] = f"{model_url.replace('compatible-mode/v1','api/v1').rstrip('/')}/services/rerank/text-rerank/text-rerank"
else:
- model_dict["base_url"] = f"{model_url.rstrip('/')}/rerank"
+ model_dict["base_url"] = f"{model_url.rstrip('/')}/rerank"
+ elif model["model_type"] == "image_generation":
+ # For image generation models, use the images/generations endpoint
+ if provider == ProviderEnum.SILICON.value:
+ model_dict["base_url"] = f"{model_url.rstrip('/')}/images/generations"
+ else:
+ model_dict["base_url"] = model_url
else:
- # For non-embedding models
+ # For non-embedding, non-rerank, non-image-generation models
if provider == ProviderEnum.MODELENGINE.value:
# Ensure ModelEngine models have the full API path
model_dict["base_url"] = f"{model_url.rstrip('/')}/{MODEL_ENGINE_NORTH_PREFIX}"
diff --git a/backend/services/providers/dashscope_provider.py b/backend/services/providers/dashscope_provider.py
index b9fb7ab7b..6f2c57da7 100644
--- a/backend/services/providers/dashscope_provider.py
+++ b/backend/services/providers/dashscope_provider.py
@@ -53,10 +53,13 @@ async def get_models(self, provider_config: Dict) -> List[Dict]:
current_page += 1
await asyncio.sleep(0.5)
- # Initialize containers for the 6 main categories
+ # Initialize containers for the 7 main categories
categorized_models = {
"chat": [], # Maps to "llm"
"vlm": [], # Maps to "vlm"
+ "image_understanding": [], # Maps to "image_understanding"
+ "image_generation": [], # Maps to "image_generation"
+ "video_understanding": [], # Maps to "video_understanding"
"embedding": [], # Maps to "embedding" / "multi_embedding"
"rerank": [], # Maps to "rerank"
"tts": [], # Maps to "tts"
@@ -106,11 +109,22 @@ async def get_models(self, provider_config: Dict) -> List[Dict]:
categorized_models['tts'].append(cleaned_model)
continue
- # 5. VLM
+ # 5. VLM / Image Understanding / Image Generation / Video Understanding
vision_mods = {'Image', 'Video'}
- if (set(req_mod) & vision_mods) or (set(res_mod) & vision_mods) or '视觉' in desc:
- cleaned_model.update({"model_tag": "chat", "model_type": "vlm"})
- categorized_models['vlm'].append(cleaned_model)
+ has_vision = (set(req_mod) & vision_mods) or (set(res_mod) & vision_mods)
+ has_image_gen = 'Image' in res_mod and 'Video' not in res_mod
+ has_video = 'Video' in req_mod or 'Video' in res_mod
+
+ if has_vision or '视觉' in desc:
+ if has_image_gen:
+ cleaned_model.update({"model_tag": "image_generation", "model_type": "image_generation"})
+ categorized_models['image_generation'].append(cleaned_model)
+ elif has_video:
+ cleaned_model.update({"model_tag": "video_understanding", "model_type": "video_understanding"})
+ categorized_models['video_understanding'].append(cleaned_model)
+ else:
+ cleaned_model.update({"model_tag": "image_understanding", "model_type": "image_understanding"})
+ categorized_models['image_understanding'].append(cleaned_model)
continue
# 6. Chat / LLM
@@ -123,6 +137,8 @@ async def get_models(self, provider_config: Dict) -> List[Dict]:
return categorized_models["chat"]
elif target_model_type in ("embedding", "multi_embedding"):
return categorized_models["embedding"]
+ elif target_model_type in ("image_understanding", "image_generation", "video_understanding"):
+ return categorized_models[target_model_type]
elif target_model_type in categorized_models:
return categorized_models[target_model_type]
else:
diff --git a/backend/services/providers/modelengine_provider.py b/backend/services/providers/modelengine_provider.py
index 276f84378..d882c88df 100644
--- a/backend/services/providers/modelengine_provider.py
+++ b/backend/services/providers/modelengine_provider.py
@@ -83,7 +83,9 @@ async def get_models(self, provider_config: Dict) -> List[Dict]:
"asr": "stt",
"tts": "tts",
"rerank": "rerank",
- "multimodal": "vlm",
+ "image_understanding": "image_understanding",
+ "image_generation": "image_generation",
+ "video_understanding": "video_understanding",
}
filtered_models = []
@@ -100,7 +102,7 @@ async def get_models(self, provider_config: Dict) -> List[Dict]:
"id": model.get("id", ""),
"model_type": internal_type,
"model_tag": me_type,
- "max_tokens": DEFAULT_LLM_MAX_TOKENS if internal_type in ("llm", "vlm") else 0,
+ "max_tokens": DEFAULT_LLM_MAX_TOKENS if internal_type in ("llm", "vlm", "image_understanding", "image_generation", "video_understanding") else 0,
"base_url": host,
"api_key": api_key,
})
diff --git a/backend/services/providers/silicon_provider.py b/backend/services/providers/silicon_provider.py
index ea41cc95d..14d5bce26 100644
--- a/backend/services/providers/silicon_provider.py
+++ b/backend/services/providers/silicon_provider.py
@@ -1,10 +1,16 @@
+import logging
import httpx
from typing import Dict, List
from consts.const import DEFAULT_LLM_MAX_TOKENS
-from consts.provider import SILICON_GET_URL
+from consts.provider import SILICON_GET_URL, SILICON_BASE_URL
from services.providers.base import AbstractModelProvider, _classify_provider_error
+logger = logging.getLogger("silicon_provider")
+
+# Silicon Flow image generation endpoint
+SILICON_IMAGE_GEN_URL = "https://api.siliconflow.cn/v1/images/generations"
+
class SiliconModelProvider(AbstractModelProvider):
"""Concrete implementation for SiliconFlow provider."""
@@ -26,7 +32,7 @@ async def get_models(self, provider_config: Dict) -> List[Dict]:
headers = {"Authorization": f"Bearer {model_api_key}"}
# Choose endpoint by model type
- if model_type in ("llm", "vlm"):
+ if model_type in ("llm", "vlm", "image_understanding", "image_generation", "video_understanding"):
silicon_url = f"{SILICON_GET_URL}?sub_type=chat"
elif model_type in ("embedding", "multi_embedding"):
silicon_url = f"{SILICON_GET_URL}?sub_type=embedding"
@@ -41,11 +47,51 @@ async def get_models(self, provider_config: Dict) -> List[Dict]:
model_list: List[Dict] = response.json()["data"]
# Annotate models with canonical fields expected downstream
- if model_type in ("llm", "vlm"):
+ if model_type in ("llm", "vlm", "image_understanding", "image_generation", "video_understanding"):
for item in model_list:
item["model_tag"] = "chat"
item["model_type"] = model_type
item["max_tokens"] = DEFAULT_LLM_MAX_TOKENS
+
+ # Filter models based on the requested model_type
+ if model_type == "llm":
+ # For LLM, exclude vision/image/video generation related models
+ model_list = [
+ m for m in model_list
+ if not any(kw in m.get("id", "").lower() for kw in [
+ "-vl", "vl-", "vision", "video", "ocr",
+ "image_gen", "img_gen", "wanx", "flux",
+ "stable-diffusion", "dall", "llava", "qwen-v"
+ ])
+ ]
+ elif model_type == "image_understanding":
+ # Only include image understanding models
+ model_list = [
+ m for m in model_list
+ if any(kw in m.get("id", "").lower() for kw in [
+ "-vl", "vl-", "ocr", "vision", "llava",
+ "qwen-v", "qwen2-v", "vit", "clip"
+ ])
+ ]
+ elif model_type == "image_generation":
+ # Only include image generation models
+ model_list = [
+ m for m in model_list
+ if any(kw in m.get("id", "").lower() for kw in [
+ "image_gen", "img_gen", "wanx", "flux",
+ "stable-diffusion", "dall", "sd-", "sdxl",
+ "midjourney", "imgen", "t2i", "text2img",
+ "qwen-img", "qwen-image", "cog", "minimax",
+ "polux", "edit", "image-edit"
+ ])
+ ]
+ logger.info(f"SiliconFlow image generation models after filter: {[m.get('id') for m in model_list]}")
+ elif model_type == "video_understanding":
+ # Only include video understanding models
+ model_list = [
+ m for m in model_list
+ if "video" in m.get("id", "").lower()
+ ]
elif model_type in ("embedding", "multi_embedding"):
for item in model_list:
item["model_tag"] = "embedding"
@@ -55,6 +101,12 @@ async def get_models(self, provider_config: Dict) -> List[Dict]:
item["model_tag"] = "rerank"
item["model_type"] = model_type
+ # For image generation models, set the correct base_url
+ if model_type == "image_generation":
+ for item in model_list:
+ # Set the image generation endpoint as base_url
+ item["base_url"] = SILICON_IMAGE_GEN_URL
+
# Return empty list to indicate successful API call but no models
if not model_list:
return []
diff --git a/backend/services/providers/tokenpony_provider.py b/backend/services/providers/tokenpony_provider.py
index ab4446c1b..c4142c5c5 100644
--- a/backend/services/providers/tokenpony_provider.py
+++ b/backend/services/providers/tokenpony_provider.py
@@ -42,10 +42,13 @@ async def get_models(self, provider_config: Dict) -> List[Dict]:
# OpenAI standard response puts the model list inside the "data" array
all_models: List[Dict] = response.json().get("data", [])
- # Initialize containers for the 6 main categories
+ # Initialize containers for the 9 main categories
categorized_models = {
"chat": [], # Maps to "llm"
"vlm": [], # Maps to "vlm"
+ "image_understanding": [], # Maps to "image_understanding"
+ "image_generation": [], # Maps to "image_generation"
+ "video_understanding": [], # Maps to "video_understanding"
"embedding": [], # Maps to "embedding" / "multi_embedding"
"rerank": [], # Maps to "rerank"
"tts": [], # Maps to "tts"
@@ -86,11 +89,16 @@ async def get_models(self, provider_config: Dict) -> List[Dict]:
cleaned_model.update({"model_tag": "tts", "model_type": "tts"})
categorized_models['tts'].append(cleaned_model)
- # 5. VLM (Vision Language Model / Image & Video Generation)
-
- elif any(keyword in m_id for keyword in ['-vl', 'vl-', 'ocr', 'vision']):
- cleaned_model.update({"model_tag": "chat", "model_type": "vlm"})
- categorized_models['vlm'].append(cleaned_model)
+ # 5. Image Generation / Image Understanding / Video Understanding
+ elif any(keyword in m_id for keyword in ['image_gen', 'img_gen', 'wanx', 'stable-diffusion', 'dall', 'flux', 'sd-', 'sdxl', 'midjourney']):
+ cleaned_model.update({"model_tag": "image_generation", "model_type": "image_generation"})
+ categorized_models['image_generation'].append(cleaned_model)
+ elif 'video' in m_id or 'video_understanding' in m_id:
+ cleaned_model.update({"model_tag": "video_understanding", "model_type": "video_understanding"})
+ categorized_models['video_understanding'].append(cleaned_model)
+ elif any(keyword in m_id for keyword in ['-vl', 'vl-', 'ocr', 'vision', 'qwen-vl', 'qwen2-vl']):
+ cleaned_model.update({"model_tag": "image_understanding", "model_type": "image_understanding"})
+ categorized_models['image_understanding'].append(cleaned_model)
# 6. Chat (Pure Text Conversation / Reasoning)
# Fallback check added: 'not metadata' catches standard OpenAI models that lack modality data
@@ -103,6 +111,8 @@ async def get_models(self, provider_config: Dict) -> List[Dict]:
return categorized_models["chat"]
elif target_model_type in ("embedding", "multi_embedding"):
return categorized_models["embedding"]
+ elif target_model_type in ("image_understanding", "image_generation", "video_understanding"):
+ return categorized_models[target_model_type]
elif target_model_type in categorized_models:
return categorized_models[target_model_type]
else:
diff --git a/frontend/app/[locale]/models/components/model/ModelAddDialog.tsx b/frontend/app/[locale]/models/components/model/ModelAddDialog.tsx
index 7cbf5192e..54f43ef34 100644
--- a/frontend/app/[locale]/models/components/model/ModelAddDialog.tsx
+++ b/frontend/app/[locale]/models/components/model/ModelAddDialog.tsx
@@ -715,8 +715,14 @@ export const ModelAddDialog = ({
case MODEL_TYPES.MULTI_EMBEDDING:
configUpdate = { multiEmbedding: modelConfig };
break;
- case MODEL_TYPES.VLM:
- configUpdate = { vlm: modelConfig };
+ case MODEL_TYPES.IMAGE_UNDERSTANDING:
+ configUpdate = { imageUnderstanding: modelConfig };
+ break;
+ case MODEL_TYPES.IMAGE_GENERATION:
+ configUpdate = { imageGeneration: modelConfig };
+ break;
+ case MODEL_TYPES.VIDEO_UNDERSTANDING:
+ configUpdate = { videoUnderstanding: modelConfig };
break;
case MODEL_TYPES.RERANK:
configUpdate = { rerank: modelConfig };
@@ -842,7 +848,15 @@ export const ModelAddDialog = ({
-
+
+
+
@@ -1440,7 +1454,7 @@ export const ModelAddDialog = ({
...
>
)}
- {form.type === "vlm" && !form.isBatchImport && (
+ {(form.type === MODEL_TYPES.IMAGE_UNDERSTANDING || form.type === MODEL_TYPES.IMAGE_GENERATION || form.type === MODEL_TYPES.VIDEO_UNDERSTANDING) && !form.isBatchImport && (
<>
({
},
multimodal: {
title: t("modelConfig.category.multimodal"),
- options: [{ id: MODEL_TYPES.VLM, name: t("modelConfig.option.vlmModel") }],
+ options: [
+ {
+ id: MODEL_TYPES.IMAGE_UNDERSTANDING,
+ name: t("modelConfig.option.imageUnderstandingModel"),
+ },
+ {
+ id: MODEL_TYPES.IMAGE_GENERATION,
+ name: t("modelConfig.option.imageGenerationModel"),
+ },
+ {
+ id: MODEL_TYPES.VIDEO_UNDERSTANDING,
+ name: t("modelConfig.option.videoUnderstandingModel"),
+ },
+ ],
},
voice: {
title: t("modelConfig.category.voice"),
@@ -142,7 +155,11 @@ export const ModelConfigSection = forwardRef<
llm: { main: "" },
embedding: { embedding: "", multi_embedding: "" },
reranker: { reranker: "" },
- multimodal: { vlm: "" },
+ multimodal: {
+ image_understanding: "",
+ image_generation: "",
+ video_understanding: "",
+ },
voice: { tts: "", stt: "" },
});
@@ -283,10 +300,30 @@ export const ModelConfigSection = forwardRef<
)
: true;
- const vlm = modelConfig.vlm.displayName;
- const vlmExists = vlm
+ const imageUnderstanding = modelConfig.imageUnderstanding?.displayName;
+ const imageUnderstandingExists = imageUnderstanding
+ ? allModels.some(
+ (m) =>
+ m.displayName === imageUnderstanding &&
+ m.type === MODEL_TYPES.IMAGE_UNDERSTANDING
+ )
+ : true;
+
+ const imageGeneration = modelConfig.imageGeneration?.displayName;
+ const imageGenerationExists = imageGeneration
? allModels.some(
- (m) => m.displayName === vlm && m.type === MODEL_TYPES.VLM
+ (m) =>
+ m.displayName === imageGeneration &&
+ m.type === MODEL_TYPES.IMAGE_GENERATION
+ )
+ : true;
+
+ const videoUnderstanding = modelConfig.videoUnderstanding?.displayName;
+ const videoUnderstandingExists = videoUnderstanding
+ ? allModels.some(
+ (m) =>
+ m.displayName === videoUnderstanding &&
+ m.type === MODEL_TYPES.VIDEO_UNDERSTANDING
)
: true;
@@ -317,7 +354,9 @@ export const ModelConfigSection = forwardRef<
reranker: rerankExists ? rerank : "",
},
multimodal: {
- vlm: vlmExists ? vlm : "",
+ image_understanding: imageUnderstandingExists ? imageUnderstanding : "",
+ image_generation: imageGenerationExists ? imageGeneration : "",
+ video_understanding: videoUnderstandingExists ? videoUnderstanding : "",
},
voice: {
tts: ttsExists ? tts : "",
@@ -359,8 +398,16 @@ export const ModelConfigSection = forwardRef<
configUpdates.rerank = { modelName: "", displayName: "" };
}
- if (!vlmExists && vlm) {
- configUpdates.vlm = { modelName: "", displayName: "" };
+ if (!imageUnderstandingExists && imageUnderstanding) {
+ configUpdates.imageUnderstanding = { modelName: "", displayName: "" };
+ }
+
+ if (!imageGenerationExists && imageGeneration) {
+ configUpdates.imageGeneration = { modelName: "", displayName: "" };
+ }
+
+ if (!videoUnderstandingExists && videoUnderstanding) {
+ configUpdates.videoUnderstanding = { modelName: "", displayName: "" };
}
if (!sttExists && stt) {
@@ -384,7 +431,9 @@ export const ModelConfigSection = forwardRef<
!!modelConfig.embedding.modelName ||
!!modelConfig.multiEmbedding.modelName ||
!!modelConfig.rerank.modelName ||
- !!modelConfig.vlm.modelName ||
+ !!modelConfig.imageUnderstanding?.modelName ||
+ !!modelConfig.imageGeneration?.modelName ||
+ !!modelConfig.videoUnderstanding?.modelName ||
!!modelConfig.tts.modelName ||
!!modelConfig.stt.modelName;
@@ -440,12 +489,16 @@ export const ModelConfigSection = forwardRef<
const hasLlmMain = !!modelConfig.llm.modelName;
const hasEmbedding = !!modelConfig.embedding.modelName;
const hasReranker = !!modelConfig.rerank.modelName;
- const hasVlm = !!modelConfig.vlm.modelName;
+ const hasImageUnderstanding = !!modelConfig.imageUnderstanding?.modelName;
+ const hasImageGeneration = !!modelConfig.imageGeneration?.modelName;
+ const hasVideoUnderstanding = !!modelConfig.videoUnderstanding?.modelName;
const hasTts = !!modelConfig.tts.modelName;
const hasStt = !!modelConfig.stt.modelName;
hasSelectedModels =
- hasLlmMain || hasEmbedding || hasReranker || hasVlm || hasTts || hasStt;
+ hasLlmMain || hasEmbedding || hasReranker ||
+ hasImageUnderstanding || hasImageGeneration || hasVideoUnderstanding ||
+ hasTts || hasStt;
if (hasSelectedModels) {
currentSelectedModels.llm.main = modelConfig.llm.modelName;
@@ -454,7 +507,12 @@ export const ModelConfigSection = forwardRef<
currentSelectedModels.embedding.multi_embedding =
modelConfig.multiEmbedding.modelName || "";
currentSelectedModels.reranker.reranker = modelConfig.rerank.modelName;
- currentSelectedModels.multimodal.vlm = modelConfig.vlm.modelName;
+ currentSelectedModels.multimodal.image_understanding =
+ modelConfig.imageUnderstanding?.modelName || "";
+ currentSelectedModels.multimodal.image_generation =
+ modelConfig.imageGeneration?.modelName || "";
+ currentSelectedModels.multimodal.video_understanding =
+ modelConfig.videoUnderstanding?.modelName || "";
currentSelectedModels.voice.tts = modelConfig.tts.modelName;
currentSelectedModels.voice.stt = modelConfig.stt.modelName;
} else {
@@ -492,7 +550,7 @@ export const ModelConfigSection = forwardRef<
} else if (category === "reranker") {
modelType = MODEL_TYPES.RERANK;
} else if (category === "multimodal") {
- modelType = MODEL_TYPES.VLM;
+ modelType = optionId as ModelType;
} else if (category === MODEL_TYPES.EMBEDDING) {
modelType =
optionId === MODEL_TYPES.MULTI_EMBEDDING
@@ -654,7 +712,7 @@ export const ModelConfigSection = forwardRef<
} else if (category === "reranker") {
modelType = MODEL_TYPES.RERANK;
} else if (category === "multimodal") {
- modelType = MODEL_TYPES.VLM;
+ modelType = option as ModelType;
} else if (category === MODEL_TYPES.EMBEDDING) {
modelType =
option === MODEL_TYPES.MULTI_EMBEDDING
@@ -679,7 +737,16 @@ export const ModelConfigSection = forwardRef<
) {
configKey = "multiEmbedding";
} else if (category === "multimodal") {
- configKey = MODEL_TYPES.VLM;
+ // Map multimodal option to config key
+ if (option === MODEL_TYPES.IMAGE_UNDERSTANDING) {
+ configKey = MODEL_TYPES.IMAGE_UNDERSTANDING;
+ } else if (option === MODEL_TYPES.IMAGE_GENERATION) {
+ configKey = MODEL_TYPES.IMAGE_GENERATION;
+ } else if (option === MODEL_TYPES.VIDEO_UNDERSTANDING) {
+ configKey = MODEL_TYPES.VIDEO_UNDERSTANDING;
+ } else {
+ configKey = option;
+ }
} else if (category === "reranker") {
configKey = MODEL_TYPES.RERANK;
} else if (category === "voice" && option === "tts") {
@@ -981,7 +1048,7 @@ export const ModelConfigSection = forwardRef<
? MODEL_TYPES.TTS
: MODEL_TYPES.STT
: key === "multimodal"
- ? MODEL_TYPES.VLM
+ ? (option.id as ModelType)
: key === MODEL_TYPES.EMBEDDING &&
option.id === MODEL_TYPES.MULTI_EMBEDDING
? MODEL_TYPES.MULTI_EMBEDDING
diff --git a/frontend/const/modelConfig.ts b/frontend/const/modelConfig.ts
index a79e3b16d..bed6ac17d 100644
--- a/frontend/const/modelConfig.ts
+++ b/frontend/const/modelConfig.ts
@@ -7,6 +7,9 @@ export const MODEL_TYPES = {
STT: "stt",
TTS: "tts",
VLM: "vlm",
+ IMAGE_UNDERSTANDING: "image_understanding",
+ IMAGE_GENERATION: "image_generation",
+ VIDEO_UNDERSTANDING: "video_understanding",
} as const;
// Model source constants
@@ -149,5 +152,17 @@ export const CARD_THEMES = {
borderColor: "#e6e6e6",
backgroundColor: "#ffffff",
},
+ image_understanding: {
+ borderColor: "#e6e6e6",
+ backgroundColor: "#ffffff",
+ },
+ image_generation: {
+ borderColor: "#e6e6e6",
+ backgroundColor: "#ffffff",
+ },
+ video_understanding: {
+ borderColor: "#e6e6e6",
+ backgroundColor: "#ffffff",
+ },
};
diff --git a/frontend/hooks/model/useModelList.ts b/frontend/hooks/model/useModelList.ts
index f6ff1dce1..b31f0aee5 100644
--- a/frontend/hooks/model/useModelList.ts
+++ b/frontend/hooks/model/useModelList.ts
@@ -46,6 +46,30 @@ export function useModelList(options?: { enabled?: boolean; staleTime?: number }
return models.filter((model) => model.type === "vlm" && model.connect_status === "available");
}, [models]);
+ const imageUnderstandingModels = useMemo(() => {
+ return models.filter((model) => model.type === "image_understanding");
+ }, [models]);
+
+ const availableImageUnderstandingModels = useMemo(() => {
+ return models.filter((model) => model.type === "image_understanding" && model.connect_status === "available");
+ }, [models]);
+
+ const imageGenerationModels = useMemo(() => {
+ return models.filter((model) => model.type === "image_generation");
+ }, [models]);
+
+ const availableImageGenerationModels = useMemo(() => {
+ return models.filter((model) => model.type === "image_generation" && model.connect_status === "available");
+ }, [models]);
+
+ const videoUnderstandingModels = useMemo(() => {
+ return models.filter((model) => model.type === "video_understanding");
+ }, [models]);
+
+ const availableVideoUnderstandingModels = useMemo(() => {
+ return models.filter((model) => model.type === "video_understanding" && model.connect_status === "available");
+ }, [models]);
+
return {
...query,
models,
@@ -56,6 +80,12 @@ export function useModelList(options?: { enabled?: boolean; staleTime?: number }
availableEmbeddingModels,
vlmModels,
availableVlmModels,
+ imageUnderstandingModels,
+ availableImageUnderstandingModels,
+ imageGenerationModels,
+ availableImageGenerationModels,
+ videoUnderstandingModels,
+ availableVideoUnderstandingModels,
invalidate: () => queryClient.invalidateQueries({ queryKey: ["models"] }),
};
}
diff --git a/frontend/hooks/useConfig.ts b/frontend/hooks/useConfig.ts
index 70aee0df2..1d7ec3831 100644
--- a/frontend/hooks/useConfig.ts
+++ b/frontend/hooks/useConfig.ts
@@ -263,8 +263,13 @@ export function useConfig() {
const config: GlobalConfig = (query.data as GlobalConfig | undefined) ?? defaultConfig;
- // Whether config has selected a VLM model
- const isVlmAvailable = !!(config?.models?.vlm?.modelName || config?.models?.vlm?.displayName);
+ // Whether config has selected a VLM model (image understanding, image generation, or video understanding)
+ const isVlmAvailable = !!(
+ config?.models?.vlm?.modelName || config?.models?.vlm?.displayName ||
+ config?.models?.imageUnderstanding?.modelName || config?.models?.imageUnderstanding?.displayName ||
+ config?.models?.imageGeneration?.modelName || config?.models?.imageGeneration?.displayName ||
+ config?.models?.videoUnderstanding?.modelName || config?.models?.videoUnderstanding?.displayName
+ );
// Whether config has selected an Embedding model
const isEmbeddingAvailable = !!(config?.models?.embedding?.modelName || config?.models?.embedding?.displayName);
diff --git a/frontend/public/locales/en/common.json b/frontend/public/locales/en/common.json
index eae1f6f95..b45a0322d 100644
--- a/frontend/public/locales/en/common.json
+++ b/frontend/public/locales/en/common.json
@@ -785,6 +785,9 @@
"model.type.llm": "Large Language Model",
"model.type.embedding": "Embedding Model",
"model.type.vlm": "Vision Language Model",
+ "model.type.image_understanding": "Image Understanding Model",
+ "model.type.image_generation": "Image Generation Model",
+ "model.type.video_understanding": "Video Understanding Model",
"model.type.rerank": "Rerank Model",
"model.type.stt": "Speech-to-Text Model",
"model.type.tts": "Text-to-Speech Model",
@@ -852,6 +855,9 @@
"modelConfig.option.multiEmbeddingModel": "Multimodal Embedding Model",
"modelConfig.option.rerankerModel": "Reranker Model",
"modelConfig.option.vlmModel": "Vision Language Model",
+ "modelConfig.option.imageUnderstandingModel": "Image Understanding Model",
+ "modelConfig.option.imageGenerationModel": "Image Generation Model",
+ "modelConfig.option.videoUnderstandingModel": "Video Understanding Model",
"modelConfig.option.ttsModel": "Text-to-Speech Model",
"modelConfig.option.sttModel": "Speech-to-Text Model",
"modelConfig.error.loadList": "Failed to load model list:",
diff --git a/frontend/public/locales/zh/common.json b/frontend/public/locales/zh/common.json
index fb521b68d..cdcb6a477 100644
--- a/frontend/public/locales/zh/common.json
+++ b/frontend/public/locales/zh/common.json
@@ -785,6 +785,9 @@
"model.type.llm": "大语言模型",
"model.type.embedding": "向量模型",
"model.type.vlm": "视觉语言模型",
+ "model.type.image_understanding": "图片理解模型",
+ "model.type.image_generation": "图片生成模型",
+ "model.type.video_understanding": "视频理解模型",
"model.type.rerank": "重排模型",
"model.type.stt": "语音识别模型",
"model.type.tts": "语音合成模型",
@@ -853,6 +856,9 @@
"modelConfig.option.multiEmbeddingModel": "多模态向量模型",
"modelConfig.option.rerankerModel": "重排模型",
"modelConfig.option.vlmModel": "视觉语言模型",
+ "modelConfig.option.imageUnderstandingModel": "图片理解模型",
+ "modelConfig.option.imageGenerationModel": "图片生成模型",
+ "modelConfig.option.videoUnderstandingModel": "视频理解模型",
"modelConfig.option.ttsModel": "语音合成模型",
"modelConfig.option.sttModel": "语音识别模型",
"modelConfig.error.loadList": "加载模型列表失败:",
diff --git a/frontend/types/modelConfig.ts b/frontend/types/modelConfig.ts
index 829f3f183..ab1223401 100644
--- a/frontend/types/modelConfig.ts
+++ b/frontend/types/modelConfig.ts
@@ -30,7 +30,10 @@ export type ModelType =
| "stt"
| "tts"
| "vlm"
- | "multi_embedding";
+ | "multi_embedding"
+ | "image_understanding"
+ | "image_generation"
+ | "video_understanding";
// Model option interface
export interface ModelOption {
@@ -83,6 +86,9 @@ export interface ModelConfig {
vlm: SingleModelConfig;
stt: SingleModelConfig;
tts: SingleModelConfig;
+ imageUnderstanding: SingleModelConfig;
+ imageGeneration: SingleModelConfig;
+ videoUnderstanding: SingleModelConfig;
}
// Global configuration interface
diff --git a/start_dev.sh b/start_dev.sh
new file mode 100644
index 000000000..97c0b67d6
--- /dev/null
+++ b/start_dev.sh
@@ -0,0 +1,19 @@
+#!/bin/bash
+
+# 设置项目根目录路径(建议使用绝对路径或确保脚本在根目录运行)
+PROJECT_ROOT="F:/PythonProject/nexent-develop"
+VENV_PYTHON="backend/.venv/Scripts/python"
+
+echo "🚀 正在通过 mintty 启动 Nexent 服务..."
+
+# 1. 启动 MCP Service
+mintty -p 100,100 -t "MCP-Service" bash -c "cd $PROJECT_ROOT && source .env && $VENV_PYTHON backend/mcp_service.py; exec bash" &
+
+# 2. 启动 Config Service
+mintty -p 500,100 -t "Config-Service" bash -c "cd $PROJECT_ROOT && source .env && $VENV_PYTHON backend/config_service.py; exec bash" &
+
+# 3. 启动 Runtime Service
+mintty -p 100,500 -t "Runtime-Service" bash -c "cd $PROJECT_ROOT && source .env && $VENV_PYTHON backend/runtime_service.py; exec bash" &
+
+
+echo "✅ 4 个独立的窗口已在 $PROJECT_ROOT 路径下启动。"
\ No newline at end of file
diff --git a/test/backend/services/providers/test_silicon_provider.py b/test/backend/services/providers/test_silicon_provider.py
index b947040c3..16512e87a 100644
--- a/test/backend/services/providers/test_silicon_provider.py
+++ b/test/backend/services/providers/test_silicon_provider.py
@@ -578,3 +578,257 @@ async def test_get_models_correct_url_for_rerank(self, mocker: MockFixture):
# Verify the URL contains sub_type=reranker for rerank
call_args = mock_client.get.call_args
assert "sub_type=reranker" in call_args[0][0]
+
+
+class TestSiliconModelProviderFiltering:
+ """Tests for model filtering in SiliconModelProvider."""
+
+ @pytest.mark.asyncio
+ async def test_get_models_llm_excludes_vision_models(self, mocker: MockFixture):
+ """Test that LLM filter excludes vision/image models."""
+ mock_response = MagicMock()
+ mock_response.status_code = 200
+ mock_response.json.return_value = {
+ "data": [
+ {"id": "gpt-4", "name": "GPT-4"},
+ {"id": "gpt-4-vision", "name": "GPT-4 Vision"},
+ {"id": "qwen-vl-72b", "name": "Qwen VL"},
+ {"id": "stable-diffusion-xl", "name": "SDXL"},
+ ]
+ }
+ mock_response.raise_for_status = MagicMock()
+
+ mock_client = AsyncMock()
+ mock_client.get.return_value = mock_response
+
+ mock_cm = MagicMock()
+ mock_cm.__aenter__ = AsyncMock(return_value=mock_client)
+ mock_cm.__aexit__ = AsyncMock(return_value=None)
+
+ mocker.patch(
+ "backend.services.providers.silicon_provider.httpx.AsyncClient",
+ return_value=mock_cm
+ )
+
+ provider = SiliconModelProvider()
+ result = await provider.get_models({
+ "model_type": "llm",
+ "api_key": "test-key"
+ })
+
+ assert len(result) == 1
+ assert result[0]["id"] == "gpt-4"
+
+ @pytest.mark.asyncio
+ async def test_get_models_image_understanding_filter(self, mocker: MockFixture):
+ """Test that image_understanding filter includes only vision models."""
+ mock_response = MagicMock()
+ mock_response.status_code = 200
+ mock_response.json.return_value = {
+ "data": [
+ {"id": "gpt-4", "name": "GPT-4"},
+ {"id": "gpt-4-vision-preview", "name": "GPT-4V"},
+ {"id": "qwen-vl-72b", "name": "Qwen VL"},
+ {"id": "llava-1.5", "name": "LLaVA"},
+ {"id": "stable-diffusion-xl", "name": "SDXL"},
+ ]
+ }
+ mock_response.raise_for_status = MagicMock()
+
+ mock_client = AsyncMock()
+ mock_client.get.return_value = mock_response
+
+ mock_cm = MagicMock()
+ mock_cm.__aenter__ = AsyncMock(return_value=mock_client)
+ mock_cm.__aexit__ = AsyncMock(return_value=None)
+
+ mocker.patch(
+ "backend.services.providers.silicon_provider.httpx.AsyncClient",
+ return_value=mock_cm
+ )
+
+ provider = SiliconModelProvider()
+ result = await provider.get_models({
+ "model_type": "image_understanding",
+ "api_key": "test-key"
+ })
+
+ result_ids = [m["id"] for m in result]
+ assert "gpt-4" not in result_ids
+ assert "stable-diffusion-xl" not in result_ids
+ assert len(result) >= 1
+
+ @pytest.mark.asyncio
+ async def test_get_models_image_generation_filter(self, mocker: MockFixture):
+ """Test that image_generation filter includes only image gen models."""
+ mock_response = MagicMock()
+ mock_response.status_code = 200
+ mock_response.json.return_value = {
+ "data": [
+ {"id": "gpt-4", "name": "GPT-4"},
+ {"id": "stable-diffusion-xl", "name": "SDXL"},
+ {"id": "wanx-t2i", "name": "Wanx T2I"},
+ {"id": "flux-pro", "name": "Flux Pro"},
+ {"id": "qwen-vl-72b", "name": "Qwen VL"},
+ ]
+ }
+ mock_response.raise_for_status = MagicMock()
+
+ mock_client = AsyncMock()
+ mock_client.get.return_value = mock_response
+
+ mock_cm = MagicMock()
+ mock_cm.__aenter__ = AsyncMock(return_value=mock_client)
+ mock_cm.__aexit__ = AsyncMock(return_value=None)
+
+ mocker.patch(
+ "backend.services.providers.silicon_provider.httpx.AsyncClient",
+ return_value=mock_cm
+ )
+
+ provider = SiliconModelProvider()
+ result = await provider.get_models({
+ "model_type": "image_generation",
+ "api_key": "test-key"
+ })
+
+ result_ids = [m["id"] for m in result]
+ assert "gpt-4" not in result_ids
+ assert "qwen-vl-72b" not in result_ids
+ assert len(result) >= 1
+
+ @pytest.mark.asyncio
+ async def test_get_models_video_understanding_filter(self, mocker: MockFixture):
+ """Test that video_understanding filter includes only video models."""
+ mock_response = MagicMock()
+ mock_response.status_code = 200
+ mock_response.json.return_value = {
+ "data": [
+ {"id": "gpt-4", "name": "GPT-4"},
+ {"id": "video-understanding-model", "name": "Video Model"},
+ {"id": "stable-diffusion-xl", "name": "SDXL"},
+ ]
+ }
+ mock_response.raise_for_status = MagicMock()
+
+ mock_client = AsyncMock()
+ mock_client.get.return_value = mock_response
+
+ mock_cm = MagicMock()
+ mock_cm.__aenter__ = AsyncMock(return_value=mock_client)
+ mock_cm.__aexit__ = AsyncMock(return_value=None)
+
+ mocker.patch(
+ "backend.services.providers.silicon_provider.httpx.AsyncClient",
+ return_value=mock_cm
+ )
+
+ provider = SiliconModelProvider()
+ result = await provider.get_models({
+ "model_type": "video_understanding",
+ "api_key": "test-key"
+ })
+
+ result_ids = [m["id"] for m in result]
+ assert "gpt-4" not in result_ids
+ assert "stable-diffusion-xl" not in result_ids
+ assert len(result) == 1
+ assert result[0]["id"] == "video-understanding-model"
+
+ @pytest.mark.asyncio
+ async def test_get_models_correct_url_for_image_generation(self, mocker: MockFixture):
+ """Test that correct URL is used for image_generation models."""
+ mock_response = MagicMock()
+ mock_response.status_code = 200
+ mock_response.json.return_value = {"data": [{"id": "stable-diffusion-xl"}]}
+ mock_response.raise_for_status = MagicMock()
+
+ mock_client = AsyncMock()
+ mock_client.get.return_value = mock_response
+
+ mock_cm = MagicMock()
+ mock_cm.__aenter__ = AsyncMock(return_value=mock_client)
+ mock_cm.__aexit__ = AsyncMock(return_value=None)
+
+ mocker.patch(
+ "backend.services.providers.silicon_provider.httpx.AsyncClient",
+ return_value=mock_cm
+ )
+
+ provider = SiliconModelProvider()
+ await provider.get_models({
+ "model_type": "image_generation",
+ "api_key": "test-key"
+ })
+
+ call_args = mock_client.get.call_args
+ assert "sub_type=chat" in call_args[0][0]
+
+ @pytest.mark.asyncio
+ async def test_get_models_correct_url_for_video_understanding(self, mocker: MockFixture):
+ """Test that correct URL is used for video_understanding models."""
+ mock_response = MagicMock()
+ mock_response.status_code = 200
+ mock_response.json.return_value = {"data": [{"id": "video-model"}]}
+ mock_response.raise_for_status = MagicMock()
+
+ mock_client = AsyncMock()
+ mock_client.get.return_value = mock_response
+
+ mock_cm = MagicMock()
+ mock_cm.__aenter__ = AsyncMock(return_value=mock_client)
+ mock_cm.__aexit__ = AsyncMock(return_value=None)
+
+ mocker.patch(
+ "backend.services.providers.silicon_provider.httpx.AsyncClient",
+ return_value=mock_cm
+ )
+
+ provider = SiliconModelProvider()
+ await provider.get_models({
+ "model_type": "video_understanding",
+ "api_key": "test-key"
+ })
+
+ call_args = mock_client.get.call_args
+ assert "sub_type=chat" in call_args[0][0]
+
+ @pytest.mark.asyncio
+ async def test_get_models_image_generation_correct_base_url(self, mocker: MockFixture):
+ """Test that image generation models have correct base_url set."""
+ mock_response = MagicMock()
+ mock_response.status_code = 200
+ mock_response.json.return_value = {
+ "data": [
+ {"id": "stable-diffusion-xl", "name": "SDXL"},
+ {"id": "flux-pro", "name": "Flux Pro"},
+ ]
+ }
+ mock_response.raise_for_status = MagicMock()
+
+ mock_client = AsyncMock()
+ mock_client.get.return_value = mock_response
+
+ mock_cm = MagicMock()
+ mock_cm.__aenter__ = AsyncMock(return_value=mock_client)
+ mock_cm.__aexit__ = AsyncMock(return_value=None)
+
+ mocker.patch(
+ "backend.services.providers.silicon_provider.httpx.AsyncClient",
+ return_value=mock_cm
+ )
+ mocker.patch(
+ "backend.services.providers.silicon_provider.SILICON_IMAGE_GEN_URL",
+ "https://api.siliconflow.cn/v1/images/generations"
+ )
+
+ provider = SiliconModelProvider()
+ result = await provider.get_models({
+ "model_type": "image_generation",
+ "api_key": "test-key"
+ })
+
+ for model in result:
+ assert model["base_url"] == "https://api.siliconflow.cn/v1/images/generations"
+ assert model["model_type"] == "image_generation"
+ assert model["model_tag"] == "chat"
diff --git a/test/backend/services/test_model_health_service.py b/test/backend/services/test_model_health_service.py
index 1858576fc..64d61695e 100644
--- a/test/backend/services/test_model_health_service.py
+++ b/test/backend/services/test_model_health_service.py
@@ -1,7 +1,10 @@
import os
import sys
+import types
from unittest import mock
+from unittest.mock import MagicMock, AsyncMock
+import httpx
import pytest
# Dynamically determine the backend path
@@ -13,7 +16,7 @@
class MockModule(mock.MagicMock):
@classmethod
def __getattr__(cls, key):
- return mock.MagicMock() # Return a regular MagicMock instead of a new MockModule
+ return mock.MagicMock()
# Mock required modules before any imports occur
@@ -27,50 +30,87 @@ def __getattr__(cls, key):
# Mock nexent packages and modules with proper hierarchy
sys.modules['nexent'] = MockModule()
-sys.modules['nexent.core'] = MockModule()
+sys.modules['nexent.core'] = types.ModuleType("nexent.core")
sys.modules['nexent.core.agents'] = MockModule()
sys.modules['nexent.core.agents.agent_model'] = MockModule()
-sys.modules['nexent.core.models'] = MockModule()
+sys.modules['nexent.core.models'] = types.ModuleType("nexent.core.models")
sys.modules['nexent.core.models.embedding_model'] = MockModule()
+sys.modules['nexent.core.models.rerank_model'] = MockModule()
-# Mock rerank_model module with proper class exports
-
-
-class MockBaseRerank:
+# Create mock classes for nexent.core
+class MockMessageObserver:
pass
+class MockOpenAIModel:
+ pass
-class MockOpenAICompatibleRerank(MockBaseRerank):
- def __init__(self, *args, **kwargs):
- pass
+class MockOpenAIVLModel:
+ pass
+sys.modules['nexent.core'].MessageObserver = MockMessageObserver
+sys.modules['nexent.core.models'].OpenAIModel = MockOpenAIModel
+sys.modules['nexent.core.models'].OpenAIVLModel = MockOpenAIVLModel
+sys.modules['nexent.core.models'].JinaEmbedding = mock.MagicMock()
+sys.modules['nexent.core.models'].OpenAICompatibleEmbedding = mock.MagicMock()
+sys.modules['nexent.core.models.rerank_model'].OpenAICompatibleRerank = mock.MagicMock()
-rerank_module = MockModule()
-rerank_module.BaseRerank = MockBaseRerank
-rerank_module.OpenAICompatibleRerank = MockOpenAICompatibleRerank
-sys.modules['nexent.core.models.rerank_model'] = rerank_module
+# Mock nexent.monitor module with the required functions
+nexent_monitor_mod = types.ModuleType("nexent.monitor")
+nexent_monitor_mod.set_monitoring_context = mock.MagicMock()
+nexent_monitor_mod.set_monitoring_operation = mock.MagicMock()
+sys.modules['nexent.monitor'] = nexent_monitor_mod
# Mock services packages
sys.modules['services'] = MockModule()
sys.modules['services.voice_service'] = MockModule()
# Define the ModelConnectStatusEnum for testing
+class _StatusEnumValue:
+ """Helper class to simulate enum value behavior."""
+ def __init__(self, val):
+ self._val = val
+ @property
+ def value(self):
+ return self._val
-class ModelConnectStatusEnum:
- AVAILABLE = "available"
- UNAVAILABLE = "unavailable"
- DETECTING = "detecting"
+ def __str__(self):
+ return self._val
-# Define a ModelResponse class for testing
+ def __eq__(self, other):
+ return str(self) == str(other)
+ def __hash__(self):
+ return hash(self._val)
+
+class ModelConnectStatusEnum:
+ AVAILABLE = _StatusEnumValue("available")
+ UNAVAILABLE = _StatusEnumValue("unavailable")
+ DETECTING = _StatusEnumValue("detecting")
+ NOT_DETECTED = _StatusEnumValue("not_detected")
+
+# Define a ModelResponse class for testing
class ModelResponse:
def __init__(self, code, message="", data=None):
self.code = code
self.message = message
self.data = data or {}
+# Mock consts modules
+sys.modules['consts'] = MockModule()
+sys.modules['consts.const'] = types.ModuleType("consts.const")
+sys.modules['consts.const'].LOCALHOST_NAME = "localhost"
+sys.modules['consts.const'].LOCALHOST_IP = "127.0.0.1"
+sys.modules['consts.const'].DOCKER_INTERNAL_HOST = "host.docker.internal"
+sys.modules['consts.model'] = types.ModuleType("consts.model")
+sys.modules['consts.model'].ModelConnectStatusEnum = ModelConnectStatusEnum
+
+# Mock httpx for image generation tests
+sys.modules['httpx'] = types.ModuleType("httpx")
+sys.modules['httpx'].AsyncClient = mock.MagicMock()
+sys.modules['httpx'].ConnectError = type('ConnectError', (Exception,), {})
+sys.modules['httpx'].TimeoutException = type('TimeoutException', (Exception,), {})
# Now import the module under test
try:
@@ -663,6 +703,161 @@ async def test_verify_model_config_connectivity_exception():
assert "Unexpected error" in response["error"]
+@pytest.mark.asyncio
+async def test_perform_connectivity_check_image_generation_success():
+ """Test connectivity check for image_generation model."""
+ mock_response = MagicMock()
+ mock_response.status_code = 400 # Bad request is OK, as long as server responds
+
+ mock_client = AsyncMock()
+ mock_client.post.return_value = mock_response
+
+ mock_cm = MagicMock()
+ mock_cm.__aenter__ = AsyncMock(return_value=mock_client)
+ mock_cm.__aexit__ = AsyncMock(return_value=None)
+
+ with mock.patch("backend.services.model_health_service.httpx.AsyncClient", return_value=mock_cm):
+ result = await _perform_connectivity_check(
+ "Qwen/Qwen-Image-Edit",
+ "image_generation",
+ "https://api.siliconflow.cn/v1/images/generations",
+ "test-key",
+ )
+
+ assert result is True
+ mock_client.post.assert_called_once()
+ call_args = mock_client.post.call_args
+ assert call_args[0][0] == "https://api.siliconflow.cn/v1/images/generations"
+ assert call_args[1]["json"]["model"] == "Qwen/Qwen-Image-Edit"
+ assert call_args[1]["headers"]["Authorization"] == "Bearer test-key"
+
+
+@pytest.mark.asyncio
+async def test_perform_connectivity_check_image_generation_failure():
+ """Test connectivity check failure for image_generation model."""
+ mock_response = MagicMock()
+ mock_response.status_code = 500 # Server error means connectivity failed
+
+ mock_client = AsyncMock()
+ mock_client.post.return_value = mock_response
+
+ mock_cm = MagicMock()
+ mock_cm.__aenter__ = AsyncMock(return_value=mock_client)
+ mock_cm.__aexit__ = AsyncMock(return_value=None)
+
+ with mock.patch("backend.services.model_health_service.httpx.AsyncClient", return_value=mock_cm):
+ result = await _perform_connectivity_check(
+ "Qwen/Qwen-Image-Edit",
+ "image_generation",
+ "https://api.siliconflow.cn/v1/images/generations",
+ "test-key",
+ )
+
+ assert result is False
+
+
+@pytest.mark.asyncio
+async def test_perform_connectivity_check_image_generation_connect_error():
+ """Test connectivity check for image_generation model with connection error."""
+ mock_client = AsyncMock()
+ mock_client.post.side_effect = httpx.ConnectError("Connection failed")
+
+ mock_cm = MagicMock()
+ mock_cm.__aenter__ = AsyncMock(return_value=mock_client)
+ mock_cm.__aexit__ = AsyncMock(return_value=None)
+
+ with mock.patch("backend.services.model_health_service.httpx.AsyncClient", return_value=mock_cm):
+ result = await _perform_connectivity_check(
+ "Qwen/Qwen-Image-Edit",
+ "image_generation",
+ "https://api.siliconflow.cn/v1/images/generations",
+ "test-key",
+ )
+
+ assert result is False
+
+
+@pytest.mark.asyncio
+async def test_perform_connectivity_check_image_generation_timeout():
+ """Test connectivity check for image_generation model with timeout."""
+ mock_client = AsyncMock()
+ mock_client.post.side_effect = httpx.TimeoutException("Timeout")
+
+ mock_cm = MagicMock()
+ mock_cm.__aenter__ = AsyncMock(return_value=mock_client)
+ mock_cm.__aexit__ = AsyncMock(return_value=None)
+
+ with mock.patch("backend.services.model_health_service.httpx.AsyncClient", return_value=mock_cm):
+ result = await _perform_connectivity_check(
+ "Qwen/Qwen-Image-Edit",
+ "image_generation",
+ "https://api.siliconflow.cn/v1/images/generations",
+ "test-key",
+ )
+
+ assert result is False
+
+
+@pytest.mark.asyncio
+async def test_perform_connectivity_check_image_understanding():
+ """Test connectivity check for image_understanding model."""
+ with mock.patch("backend.services.model_health_service.MessageObserver") as mock_observer, \
+ mock.patch("backend.services.model_health_service.OpenAIVLModel") as mock_model:
+ mock_observer_instance = mock.MagicMock()
+ mock_observer.return_value = mock_observer_instance
+
+ mock_model_instance = mock.MagicMock()
+ mock_model_instance.check_connectivity = mock.AsyncMock(return_value=True)
+ mock_model.return_value = mock_model_instance
+
+ result = await _perform_connectivity_check(
+ "Qwen/Qwen2.5-VL",
+ "image_understanding",
+ "https://api.siliconflow.cn",
+ "test-key",
+ )
+
+ assert result is True
+ mock_model.assert_called_once_with(
+ mock_observer_instance,
+ model_id="Qwen/Qwen2.5-VL",
+ api_base="https://api.siliconflow.cn",
+ api_key="test-key",
+ ssl_verify=True
+ )
+ mock_model_instance.check_connectivity.assert_called_once()
+
+
+@pytest.mark.asyncio
+async def test_perform_connectivity_check_video_understanding():
+ """Test connectivity check for video_understanding model."""
+ with mock.patch("backend.services.model_health_service.MessageObserver") as mock_observer, \
+ mock.patch("backend.services.model_health_service.OpenAIVLModel") as mock_model:
+ mock_observer_instance = mock.MagicMock()
+ mock_observer.return_value = mock_observer_instance
+
+ mock_model_instance = mock.MagicMock()
+ mock_model_instance.check_connectivity = mock.AsyncMock(return_value=True)
+ mock_model.return_value = mock_model_instance
+
+ result = await _perform_connectivity_check(
+ "Qwen/Qwen2-VL",
+ "video_understanding",
+ "https://api.siliconflow.cn",
+ "test-key",
+ )
+
+ assert result is True
+ mock_model.assert_called_once_with(
+ mock_observer_instance,
+ model_id="Qwen/Qwen2-VL",
+ api_base="https://api.siliconflow.cn",
+ api_key="test-key",
+ ssl_verify=True
+ )
+ mock_model_instance.check_connectivity.assert_called_once()
+
+
@pytest.mark.asyncio
async def test_save_config_with_error():
# This is the placeholder test function provided by the user
From b06ecded197b97f4f96b6d40144cb8b916c08786 Mon Sep 17 00:00:00 2001
From: 827dls <1670704430@qq.com>
Date: Mon, 11 May 2026 14:57:57 +0800
Subject: [PATCH 2/2] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E5=A4=9A=E6=A8=A1?=
=?UTF-8?q?=E6=80=81=E5=9B=BE=E7=89=87=E7=90=86=E8=A7=A3bug?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
backend/agents/create_agent_info.py | 4 +-
backend/apps/a2a_client_app.py | 6 +
backend/apps/skill_app.py | 3 +
backend/consts/model.py | 1 -
backend/consts/provider.py | 5 +
backend/database/a2a_agent_db.py | 19 ++-
backend/database/agent_db.py | 1 -
backend/database/agent_version_db.py | 78 +++++++--
backend/database/client.py | 13 +-
backend/database/db_models.py | 1 -
backend/database/skill_db.py | 71 ++++----
backend/services/a2a_client_service.py | 17 +-
backend/services/config_sync_service.py | 5 +-
backend/services/image_service.py | 7 +
.../services/providers/dashscope_provider.py | 16 +-
.../services/providers/silicon_provider.py | 141 ++++++++++++----
.../services/providers/tokenpony_provider.py | 18 +-
backend/services/skill_service.py | 4 +
docker/init.sql | 2 -
...e_context_manager_to_ag_tenant_agent_t.sql | 10 --
...510_migrate_vlm_to_image_understanding.sql | 22 +++
.../models/components/model/ModelListCard.tsx | 13 +-
.../models/components/modelConfig.tsx | 25 ++-
frontend/const/modelConfig.ts | 6 +-
frontend/hooks/model/useModelList.ts | 5 +-
frontend/hooks/useConfig.ts | 34 ++++
frontend/services/agentConfigService.ts | 1 -
frontend/types/agentConfig.ts | 1 -
.../charts/nexent-common/files/init.sql | 2 -
start_dev.sh | 5 +-
test/backend/database/test_agent_db.py | 3 +-
.../backend/database/test_agent_version_db.py | 159 +++++++++++++++---
32 files changed, 537 insertions(+), 161 deletions(-)
delete mode 100644 docker/sql/v2.0.4_0427_add_enable_context_manager_to_ag_tenant_agent_t.sql
create mode 100644 docker/sql/v2.0.5_0510_migrate_vlm_to_image_understanding.sql
diff --git a/backend/agents/create_agent_info.py b/backend/agents/create_agent_info.py
index 933fcd129..54f5e5da9 100644
--- a/backend/agents/create_agent_info.py
+++ b/backend/agents/create_agent_info.py
@@ -409,10 +409,8 @@ async def create_agent_config(
model_max_tokens = model_info["max_tokens"]
else:
model_name = "main_model"
- # Use agent-level setting for context management, default to False
- enable_context_manager = agent_info.get("enable_context_manager", False)
cm_config = ContextManagerConfig(
- enabled=enable_context_manager,
+ enabled=False,
token_threshold=model_max_tokens,
)
agent_config = AgentConfig(
diff --git a/backend/apps/a2a_client_app.py b/backend/apps/a2a_client_app.py
index db7acd108..8652c2f43 100644
--- a/backend/apps/a2a_client_app.py
+++ b/backend/apps/a2a_client_app.py
@@ -154,6 +154,12 @@ async def list_external_agents(
)
except Exception as e:
+ # Return empty list if table doesn't exist
+ if "does not exist" in str(e).lower():
+ return JSONResponse(
+ status_code=HTTPStatus.OK,
+ content={"status": "success", "data": []}
+ )
logger.error(f"List agents failed: {e}", exc_info=True)
raise HTTPException(
status_code=HTTPStatus.INTERNAL_SERVER_ERROR,
diff --git a/backend/apps/skill_app.py b/backend/apps/skill_app.py
index 510a0e481..2a6e9fda5 100644
--- a/backend/apps/skill_app.py
+++ b/backend/apps/skill_app.py
@@ -35,6 +35,9 @@ async def list_skills() -> JSONResponse:
except SkillException as e:
raise HTTPException(status_code=500, detail=str(e))
except Exception as e:
+ # Return empty list if table doesn't exist
+ if "does not exist" in str(e).lower():
+ return JSONResponse(content={"skills": []})
logger.error(f"Error listing skills: {e}")
raise HTTPException(status_code=500, detail="Internal server error")
diff --git a/backend/consts/model.py b/backend/consts/model.py
index a889ec848..1fab74ca1 100644
--- a/backend/consts/model.py
+++ b/backend/consts/model.py
@@ -339,7 +339,6 @@ class AgentInfoRequest(BaseModel):
related_agent_ids: Optional[List[int]] = None
group_ids: Optional[List[int]] = None
ingroup_permission: Optional[str] = None
- enable_context_manager: Optional[bool] = None
version_no: int = 0
diff --git a/backend/consts/provider.py b/backend/consts/provider.py
index 38bbc4027..049306670 100644
--- a/backend/consts/provider.py
+++ b/backend/consts/provider.py
@@ -14,6 +14,11 @@ class ProviderEnum(str, Enum):
SILICON_BASE_URL = "https://api.siliconflow.cn/v1/"
SILICON_GET_URL = "https://api.siliconflow.cn/v1/models"
+# Silicon Flow model tags (for filtering)
+# Based on SiliconFlow website: https://cloud.siliconflow.cn/me/models
+SILICON_TAG_VISION = "VLM" # Vision tag - VLM models (e.g., Kimi-K2.6, Qwen3.6)
+SILICON_TAG_VIDEO = "视频" # Video tag - Video understanding models
+
# Dashcope
DASHSCOPE_BASE_URL = "https://dashscope.aliyuncs.com/compatible-mode/v1/"
DASHSCOPE_GET_URL = "https://dashscope.aliyuncs.com/api/v1/models"
diff --git a/backend/database/a2a_agent_db.py b/backend/database/a2a_agent_db.py
index 9becdd67b..dc73e421a 100644
--- a/backend/database/a2a_agent_db.py
+++ b/backend/database/a2a_agent_db.py
@@ -1214,12 +1214,19 @@ def get_server_agent_ids(tenant_id: str) -> set[int]:
Returns:
Set of agent IDs that have A2A Server registration.
"""
- with _get_db_session() as session:
- agent_ids = session.query(A2AServerAgent.agent_id).filter(
- A2AServerAgent.tenant_id == tenant_id,
- A2AServerAgent.delete_flag != 'Y'
- ).all()
- return {row[0] for row in agent_ids}
+ try:
+ with _get_db_session() as session:
+ agent_ids = session.query(A2AServerAgent.agent_id).filter(
+ A2AServerAgent.tenant_id == tenant_id,
+ A2AServerAgent.delete_flag != 'Y'
+ ).all()
+ return {row[0] for row in agent_ids}
+ except Exception as e:
+ # Return empty set if table doesn't exist (migration not applied)
+ if "does not exist" in str(e).lower():
+ logger.warning(f"A2A server agent table not found, returning empty set: {e}")
+ return set()
+ raise
# =============================================================================
diff --git a/backend/database/agent_db.py b/backend/database/agent_db.py
index 7d14d7b8e..3ced7625b 100644
--- a/backend/database/agent_db.py
+++ b/backend/database/agent_db.py
@@ -194,7 +194,6 @@ def create_agent(agent_info, tenant_id: str, user_id: str):
"business_logic_model_name": new_agent.business_logic_model_name,
"group_ids": new_agent.group_ids,
"is_new": new_agent.is_new,
- "enable_context_manager": new_agent.enable_context_manager,
"current_version_no": new_agent.current_version_no,
"version_no": new_agent.version_no,
"created_by": new_agent.created_by,
diff --git a/backend/database/agent_version_db.py b/backend/database/agent_version_db.py
index aea8c06dc..bc216ca9a 100644
--- a/backend/database/agent_version_db.py
+++ b/backend/database/agent_version_db.py
@@ -57,14 +57,42 @@ def query_version_list(
"""
Query version list for an agent
"""
- with get_db_session() as session:
- versions = session.query(AgentVersion).filter(
- AgentVersion.agent_id == agent_id,
- AgentVersion.tenant_id == tenant_id,
- AgentVersion.delete_flag == 'N',
- ).order_by(AgentVersion.version_no.desc()).all()
-
- return [as_dict(v) for v in versions]
+ try:
+ with get_db_session() as session:
+ versions = session.query(AgentVersion).filter(
+ AgentVersion.agent_id == agent_id,
+ AgentVersion.tenant_id == tenant_id,
+ AgentVersion.delete_flag == 'N',
+ ).order_by(AgentVersion.version_no.desc()).all()
+
+ return [as_dict(v) for v in versions]
+ except Exception as e:
+ error_str = str(e).lower()
+ # If is_a2a column doesn't exist, retry with explicit column selection
+ if "is_a2a" in str(e) and ("does not exist" in error_str or "undefinedcolumn" in error_str):
+ with get_db_session() as session:
+ from sqlalchemy import select
+ columns = [
+ AgentVersion.id,
+ AgentVersion.tenant_id,
+ AgentVersion.agent_id,
+ AgentVersion.version_no,
+ AgentVersion.version_name,
+ AgentVersion.release_note,
+ AgentVersion.source_version_no,
+ AgentVersion.source_type,
+ AgentVersion.status,
+ AgentVersion.created_by,
+ AgentVersion.create_time,
+ ]
+ versions = session.query(*columns).filter(
+ AgentVersion.agent_id == agent_id,
+ AgentVersion.tenant_id == tenant_id,
+ AgentVersion.delete_flag == 'N',
+ ).order_by(AgentVersion.version_no.desc()).all()
+
+ return [dict(zip([c.key for c in columns], v)) for v in versions]
+ raise
def query_current_version_no(
@@ -141,11 +169,35 @@ def insert_version(
Insert a new version metadata record
Returns: version id
"""
- with get_db_session() as session:
- result = session.execute(
- insert(AgentVersion).values(**version_data).returning(AgentVersion.id)
- )
- return result.scalar_one()
+ from sqlalchemy import text
+
+ # First try with full data
+ try:
+ with get_db_session() as session:
+ result = session.execute(
+ insert(AgentVersion).values(**version_data).returning(AgentVersion.id)
+ )
+ return result.scalar_one()
+ except Exception as e:
+ error_str = str(e).lower()
+ # If is_a2a column doesn't exist, retry without it using native SQL
+ if "is_a2a" in str(e) and ("does not exist" in error_str or "undefinedcolumn" in error_str):
+ logger.info("is_a2a column not found, using native SQL to insert")
+ # Build column list and parameter placeholders
+ columns = [k for k in version_data.keys() if k != 'is_a2a']
+ col_list = ', '.join(columns)
+ placeholders = ', '.join([f':{c}' for c in columns])
+ insert_sql = text(f"""
+ INSERT INTO nexent.ag_tenant_agent_version_t (id, {col_list})
+ VALUES (nextval('nexent.ag_tenant_agent_version_t_id_seq'), {placeholders})
+ RETURNING id
+ """)
+ # Build params without is_a2a
+ params = {k: v for k, v in version_data.items() if k != 'is_a2a'}
+ with get_db_session() as session:
+ result = session.execute(insert_sql, params)
+ return result.scalar_one()
+ raise
def update_version_status(
diff --git a/backend/database/client.py b/backend/database/client.py
index 05f8940b9..29f55106d 100644
--- a/backend/database/client.py
+++ b/backend/database/client.py
@@ -294,7 +294,13 @@ def get_db_session(db_session=None):
except Exception as e:
if db_session is None:
session.rollback()
- logger.error(f"Database operation failed: {str(e)}")
+ error_str = str(e).lower()
+ # For "is_a2a column does not exist" errors, just log warning and raise
+ # The caller should handle this by removing the field and retrying
+ if "is_a2a" in str(e) and "does not exist" in error_str:
+ logger.warning(f"Database operation failed (expected for missing is_a2a column): {str(e)}")
+ else:
+ logger.error(f"Database operation failed: {str(e)}")
raise e
finally:
if db_session is None:
@@ -373,6 +379,11 @@ def get_monitoring_db_session(db_session=None):
except Exception as e:
if db_session is None:
session.rollback()
+ # Silently ignore "table does not exist" errors for monitoring
+ # This allows the app to work even if the monitoring table hasn't been created yet
+ if "does not exist" in str(e).lower():
+ logger.warning(f"Monitoring table not found, skipping: {str(e)}")
+ return
logger.error(f"Monitoring database operation failed: {str(e)}")
raise
finally:
diff --git a/backend/database/db_models.py b/backend/database/db_models.py
index 947c0a812..233e136fa 100644
--- a/backend/database/db_models.py
+++ b/backend/database/db_models.py
@@ -313,7 +313,6 @@ class AgentInfo(TableBase):
is_new = Column(Boolean, default=False, doc="Whether this agent is marked as new for the user")
current_version_no = Column(Integer, nullable=True, doc="Current published version number. NULL means no version published yet")
ingroup_permission = Column(String(30), doc="In-group permission: EDIT, READ_ONLY, PRIVATE")
- enable_context_manager = Column(Boolean, default=False, doc="Whether to enable context management (compression) for this agent")
class ToolInstance(TableBase):
diff --git a/backend/database/skill_db.py b/backend/database/skill_db.py
index 2a718800b..5ff9e7631 100644
--- a/backend/database/skill_db.py
+++ b/backend/database/skill_db.py
@@ -44,40 +44,53 @@ def create_or_update_skill_by_skill_info(skill_info, tenant_id: str, user_id: st
skill_info_dict.setdefault("created_by", user_id)
skill_info_dict.setdefault("updated_by", user_id)
- with get_db_session() as session:
- query = session.query(SkillInstance).filter(
- SkillInstance.tenant_id == tenant_id,
- SkillInstance.agent_id == skill_info_dict.get('agent_id'),
- SkillInstance.delete_flag != 'Y',
- SkillInstance.skill_id == skill_info_dict.get('skill_id'),
- SkillInstance.version_no == version_no
- )
- skill_instance = query.first()
-
- if skill_instance:
- for key, value in skill_info_dict.items():
- if hasattr(skill_instance, key):
- setattr(skill_instance, key, value)
- else:
- new_skill_instance = SkillInstance(
- **filter_property(skill_info_dict, SkillInstance))
- session.add(new_skill_instance)
- session.flush()
- skill_instance = new_skill_instance
+ try:
+ with get_db_session() as session:
+ query = session.query(SkillInstance).filter(
+ SkillInstance.tenant_id == tenant_id,
+ SkillInstance.agent_id == skill_info_dict.get('agent_id'),
+ SkillInstance.delete_flag != 'Y',
+ SkillInstance.skill_id == skill_info_dict.get('skill_id'),
+ SkillInstance.version_no == version_no
+ )
+ skill_instance = query.first()
+
+ if skill_instance:
+ for key, value in skill_info_dict.items():
+ if hasattr(skill_instance, key):
+ setattr(skill_instance, key, value)
+ else:
+ new_skill_instance = SkillInstance(
+ **filter_property(skill_info_dict, SkillInstance))
+ session.add(new_skill_instance)
+ session.flush()
+ skill_instance = new_skill_instance
- return as_dict(skill_instance)
+ return as_dict(skill_instance)
+ except Exception as e:
+ # Return None if table doesn't exist (migration not applied)
+ if "relation" in str(e).lower() and "does not exist" in str(e).lower():
+ logger.warning(f"Skill instance table not found, skipping skill update: {e}")
+ return None
+ raise
def query_skill_instances_by_agent_id(agent_id: int, tenant_id: str, version_no: int = 0):
"""Query all SkillInstance for an agent (regardless of enabled status)."""
- with get_db_session() as session:
- query = session.query(SkillInstance).filter(
- SkillInstance.tenant_id == tenant_id,
- SkillInstance.agent_id == agent_id,
- SkillInstance.version_no == version_no,
- SkillInstance.delete_flag != 'Y')
- skill_instances = query.all()
- return [as_dict(skill_instance) for skill_instance in skill_instances]
+ try:
+ with get_db_session() as session:
+ query = session.query(SkillInstance).filter(
+ SkillInstance.tenant_id == tenant_id,
+ SkillInstance.agent_id == agent_id,
+ SkillInstance.version_no == version_no,
+ SkillInstance.delete_flag != 'Y')
+ skill_instances = query.all()
+ return [as_dict(skill_instance) for skill_instance in skill_instances]
+ except Exception as e:
+ # Return empty list if table doesn't exist (migration not applied)
+ if "relation" in str(e).lower() and "does not exist" in str(e).lower():
+ return []
+ raise
def query_enabled_skill_instances(agent_id: int, tenant_id: str, version_no: int = 0):
diff --git a/backend/services/a2a_client_service.py b/backend/services/a2a_client_service.py
index 14f721ffd..dfce59d75 100644
--- a/backend/services/a2a_client_service.py
+++ b/backend/services/a2a_client_service.py
@@ -369,11 +369,18 @@ def list_external_agents(
Returns:
List of agent information dicts.
"""
- return a2a_agent_db.list_external_agents(
- tenant_id=tenant_id,
- source_type=source_type,
- is_available=is_available
- )
+ try:
+ return a2a_agent_db.list_external_agents(
+ tenant_id=tenant_id,
+ source_type=source_type,
+ is_available=is_available
+ )
+ except Exception as e:
+ # Return empty list if table doesn't exist (migration not applied)
+ if "relation" in str(e).lower() and "does not exist" in str(e).lower():
+ logger.warning(f"A2A external agents table not found, returning empty list: {e}")
+ return []
+ raise
def update_agent_protocol(
self,
diff --git a/backend/services/config_sync_service.py b/backend/services/config_sync_service.py
index 9fe50813a..860ac6f40 100644
--- a/backend/services/config_sync_service.py
+++ b/backend/services/config_sync_service.py
@@ -96,8 +96,10 @@ async def save_config_impl(config, tenant_id, user_id):
continue
model_display_name = model_config.get("displayName")
-
config_key = get_env_key(model_type) + "_ID"
+
+ logger.info(f"Saving model config: type={model_type}, key={config_key}, displayName={model_display_name}")
+
model_id = get_model_id_by_display_name(
model_display_name, tenant_id)
@@ -156,6 +158,7 @@ def build_models_config(tenant_id: str) -> dict:
try:
model_config = tenant_config_manager.get_model_config(
config_key, tenant_id=tenant_id)
+ logger.info(f"build_models_config: key={model_key}, config_key={config_key}, model_config={model_config}")
models_config[model_key] = build_model_config(model_config)
except Exception as e:
logger.warning(f"Failed to get config for {config_key}: {e}")
diff --git a/backend/services/image_service.py b/backend/services/image_service.py
index 0f394b9ab..fe26c52aa 100644
--- a/backend/services/image_service.py
+++ b/backend/services/image_service.py
@@ -32,8 +32,15 @@ async def proxy_image_impl(decoded_url: str):
def get_vlm_model(tenant_id: str):
# Get the tenant config
+ # First try imageUnderstanding (newer name), then fallback to vlm (legacy name)
vlm_model_config = tenant_config_manager.get_model_config(
key=MODEL_CONFIG_MAPPING.get("imageUnderstanding", "IMAGE_UNDERSTANDING_ID"), tenant_id=tenant_id)
+
+ # If imageUnderstanding not found, try vlm (for backward compatibility)
+ if not vlm_model_config:
+ vlm_model_config = tenant_config_manager.get_model_config(
+ key=MODEL_CONFIG_MAPPING.get("vlm", "VLM_ID"), tenant_id=tenant_id)
+
if not vlm_model_config:
return None
return OpenAIVLModel(
diff --git a/backend/services/providers/dashscope_provider.py b/backend/services/providers/dashscope_provider.py
index 6f2c57da7..6b1c1129f 100644
--- a/backend/services/providers/dashscope_provider.py
+++ b/backend/services/providers/dashscope_provider.py
@@ -22,6 +22,10 @@ async def get_models(self, provider_config: Dict) -> List[Dict]:
"""
try:
target_model_type: str = provider_config["model_type"]
+ # Normalize model_type to snake_case for consistency
+ # Convert camelCase to snake_case (e.g., "imageUnderstanding" -> "image_understanding")
+ import re
+ model_type_normalized = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', target_model_type).lower()
model_api_key: str = provider_config["api_key"]
headers = {"Authorization": f"Bearer {model_api_key}"}
@@ -133,14 +137,14 @@ async def get_models(self, provider_config: Dict) -> List[Dict]:
categorized_models['chat'].append(cleaned_model)
# Return the specific list based on the requested target_model_type
- if target_model_type == "llm":
+ if model_type_normalized == "llm":
return categorized_models["chat"]
- elif target_model_type in ("embedding", "multi_embedding"):
+ elif model_type_normalized in ("embedding", "multi_embedding"):
return categorized_models["embedding"]
- elif target_model_type in ("image_understanding", "image_generation", "video_understanding"):
- return categorized_models[target_model_type]
- elif target_model_type in categorized_models:
- return categorized_models[target_model_type]
+ elif model_type_normalized in ("image_understanding", "image_generation", "video_understanding"):
+ return categorized_models[model_type_normalized]
+ elif model_type_normalized in categorized_models:
+ return categorized_models[model_type_normalized]
else:
return []
except (httpx.HTTPStatusError, httpx.ConnectTimeout, httpx.ConnectError, Exception) as e:
diff --git a/backend/services/providers/silicon_provider.py b/backend/services/providers/silicon_provider.py
index 14d5bce26..63e0f6cd8 100644
--- a/backend/services/providers/silicon_provider.py
+++ b/backend/services/providers/silicon_provider.py
@@ -3,7 +3,10 @@
from typing import Dict, List
from consts.const import DEFAULT_LLM_MAX_TOKENS
-from consts.provider import SILICON_GET_URL, SILICON_BASE_URL
+from consts.provider import (
+ SILICON_GET_URL,
+ SILICON_BASE_URL,
+)
from services.providers.base import AbstractModelProvider, _classify_provider_error
logger = logging.getLogger("silicon_provider")
@@ -27,54 +30,124 @@ async def get_models(self, provider_config: Dict) -> List[Dict]:
"""
try:
model_type: str = provider_config["model_type"]
+ # Normalize model_type to snake_case for consistency
+ # Convert camelCase to snake_case (e.g., "imageUnderstanding" -> "image_understanding")
+ import re
+ model_type_normalized = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', model_type).lower()
model_api_key: str = provider_config["api_key"]
headers = {"Authorization": f"Bearer {model_api_key}"}
- # Choose endpoint by model type
- if model_type in ("llm", "vlm", "image_understanding", "image_generation", "video_understanding"):
- silicon_url = f"{SILICON_GET_URL}?sub_type=chat"
- elif model_type in ("embedding", "multi_embedding"):
- silicon_url = f"{SILICON_GET_URL}?sub_type=embedding"
- elif model_type == "rerank":
- silicon_url = f"{SILICON_GET_URL}?sub_type=reranker"
- else:
- silicon_url = SILICON_GET_URL
+ # Build URL with type and tag parameters
+ silicon_url = SILICON_GET_URL
+ params = []
+
+ # Choose sub_type by model type
+ if model_type_normalized in ("llm", "vlm", "image_understanding", "image_generation", "video_understanding"):
+ params.append("sub_type=chat")
+ elif model_type_normalized in ("embedding", "multi_embedding"):
+ params.append("sub_type=embedding")
+ elif model_type_normalized == "rerank":
+ params.append("sub_type=reranker")
+
+ # Note: SiliconFlow API does NOT support 'tag' parameter
+ # We only use sub_type for filtering, then filter by model ID keywords after getting results
+
+ if params:
+ silicon_url = f"{silicon_url}?{'&'.join(params)}"
async with httpx.AsyncClient(verify=False) as client:
response = await client.get(silicon_url, headers=headers)
response.raise_for_status()
model_list: List[Dict] = response.json()["data"]
+ # Log the API response for debugging
+ logger.info(f"SiliconFlow API response: type={model_type}, url={silicon_url}, raw_count={len(model_list)}")
+ logger.info(f"SiliconFlow raw model IDs: {[m.get('id') for m in model_list[:20]]}") # Log first 20
+
# Annotate models with canonical fields expected downstream
- if model_type in ("llm", "vlm", "image_understanding", "image_generation", "video_understanding"):
+ if model_type_normalized in ("llm", "vlm", "image_understanding", "image_generation", "video_understanding"):
for item in model_list:
item["model_tag"] = "chat"
- item["model_type"] = model_type
+ item["model_type"] = model_type_normalized
item["max_tokens"] = DEFAULT_LLM_MAX_TOKENS
- # Filter models based on the requested model_type
- if model_type == "llm":
- # For LLM, exclude vision/image/video generation related models
+ # For LLM, exclude models with vision/video/image generation keywords
+ # (in case API tag filter is not precise enough)
+ if model_type_normalized == "llm":
+ llm_exclude_keywords = [
+ "-vl", "vl-", "vision", "video",
+ "image_gen", "img_gen", "wanx", "flux",
+ "stable-diffusion", "dall", "llava",
+ "qwen-v", "qwen2-v", "qwen2.5-v", "qvq",
+ "internvl", "intern-vl", "minicpm-v",
+ "glm-4v", "gpt-4v", "gpt-4o",
+ "claude-3-opus", "claude-3-sonnet",
+ "gemini",
+ ]
model_list = [
m for m in model_list
- if not any(kw in m.get("id", "").lower() for kw in [
- "-vl", "vl-", "vision", "video", "ocr",
- "image_gen", "img_gen", "wanx", "flux",
- "stable-diffusion", "dall", "llava", "qwen-v"
- ])
+ if not any(kw in m.get("id", "").lower() for kw in llm_exclude_keywords)
]
- elif model_type == "image_understanding":
- # Only include image understanding models
+ logger.info(f"SiliconFlow LLM models after filter: {[m.get('id') for m in model_list]}")
+
+ # For image_understanding (VLM), filter by VLM-related keywords in model ID
+ # SiliconFlow API doesn't support tag filter, so we filter after getting results
+ elif model_type_normalized == "image_understanding":
+ # More comprehensive VLM keywords to catch all vision models
+ vlm_keywords = [
+ # Vision/VLM keywords
+ "vl", "-vl", "vision", "vlm",
+ "qwen-v", "qwen2-v", "qwen2.5-v", "qwen2-5-v", "qvq", "qwen-vl",
+ "qwen-vl-", "qwen2.5-vl-", "qwen2.5vl-", "qwen2-vl-", # Qwen VL series with size
+ "qwen3-vl", "qwen3-vl-", # Qwen3 VL series
+ "internvl", "intern-vl", "minicpm-v",
+ "glm-4v", "glm-4v-plus", "glm-4v-max", "glm-4v-flash",
+ "gpt-4v", "gpt-4o", "gpt-4-turbo",
+ "claude-3", "claude-3.5",
+ "gemini",
+ # Moonshot/Kimi
+ "moonshot", "kimi", "kimi-vl",
+ # Llava
+ "llava",
+ # Video understanding (can also process images)
+ "video", "videovision", "video-chat",
+ # OCR models
+ "ocr", "deepseek-ocr", "paddleocr", "paddleocr-vl",
+ # Other vision models
+ "ring-flash",
+ # Cog
+ "cog", "cogvlm", "cogagent",
+ # Other multimodal
+ "qwen-vl", "qwen2.5-vl", "qwen2.5vl",
+ "step-1", "step1v",
+ "emu", "emovla",
+ # Keep these shorter patterns at the end
+ "vl-", "-vl", "vision",
+ ]
+ original_count = len(model_list)
model_list = [
m for m in model_list
- if any(kw in m.get("id", "").lower() for kw in [
- "-vl", "vl-", "ocr", "vision", "llava",
- "qwen-v", "qwen2-v", "vit", "clip"
- ])
+ if any(kw in m.get("id", "").lower() for kw in vlm_keywords)
+ ]
+ logger.info(f"SiliconFlow VLM: raw={original_count}, filtered={len(model_list)}")
+ logger.info(f"SiliconFlow VLM models after filter: {[m.get('id') for m in model_list]}")
+
+ # For video understanding, filter by video-related keywords
+ elif model_type_normalized == "video_understanding":
+ video_keywords = [
+ "video", "videovision", "video-chat",
+ "qvq", "qwen-vl", "qwen2-v",
]
- elif model_type == "image_generation":
- # Only include image generation models
+ model_list = [
+ m for m in model_list
+ if any(kw in m.get("id", "").lower() for kw in video_keywords)
+ ]
+ logger.info(f"SiliconFlow video models after filter: {[m.get('id') for m in model_list]}")
+
+ # For image generation, filter by image generation keywords
+ # (API may not have a specific tag for image generation)
+ elif model_type_normalized == "image_generation":
model_list = [
m for m in model_list
if any(kw in m.get("id", "").lower() for kw in [
@@ -86,23 +159,17 @@ async def get_models(self, provider_config: Dict) -> List[Dict]:
])
]
logger.info(f"SiliconFlow image generation models after filter: {[m.get('id') for m in model_list]}")
- elif model_type == "video_understanding":
- # Only include video understanding models
- model_list = [
- m for m in model_list
- if "video" in m.get("id", "").lower()
- ]
- elif model_type in ("embedding", "multi_embedding"):
+ elif model_type_normalized in ("embedding", "multi_embedding"):
for item in model_list:
item["model_tag"] = "embedding"
item["model_type"] = model_type
- elif model_type == "rerank":
+ elif model_type_normalized == "rerank":
for item in model_list:
item["model_tag"] = "rerank"
item["model_type"] = model_type
# For image generation models, set the correct base_url
- if model_type == "image_generation":
+ if model_type_normalized == "image_generation":
for item in model_list:
# Set the image generation endpoint as base_url
item["base_url"] = SILICON_IMAGE_GEN_URL
diff --git a/backend/services/providers/tokenpony_provider.py b/backend/services/providers/tokenpony_provider.py
index c4142c5c5..8e190c0bb 100644
--- a/backend/services/providers/tokenpony_provider.py
+++ b/backend/services/providers/tokenpony_provider.py
@@ -25,6 +25,10 @@ async def get_models(self, provider_config: Dict) -> List[Dict]:
"""
try:
target_model_type: str = provider_config["model_type"]
+ # Normalize model_type to snake_case for consistency
+ # Convert camelCase to snake_case (e.g., "imageUnderstanding" -> "image_understanding")
+ import re
+ model_type_normalized = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', target_model_type).lower()
model_api_key: str = provider_config["api_key"]
headers = {"Authorization": f"Bearer {model_api_key}"}
@@ -96,7 +100,7 @@ async def get_models(self, provider_config: Dict) -> List[Dict]:
elif 'video' in m_id or 'video_understanding' in m_id:
cleaned_model.update({"model_tag": "video_understanding", "model_type": "video_understanding"})
categorized_models['video_understanding'].append(cleaned_model)
- elif any(keyword in m_id for keyword in ['-vl', 'vl-', 'ocr', 'vision', 'qwen-vl', 'qwen2-vl']):
+ elif any(keyword in m_id for keyword in ['-vl', 'vl-', 'ocr', 'vision', 'qwen-vl', 'qwen2-vl', 'qwen-vl-', 'qwen2.5-vl', 'glm-4v-flash']):
cleaned_model.update({"model_tag": "image_understanding", "model_type": "image_understanding"})
categorized_models['image_understanding'].append(cleaned_model)
@@ -107,14 +111,14 @@ async def get_models(self, provider_config: Dict) -> List[Dict]:
categorized_models['chat'].append(cleaned_model)
# Return the specific list based on the requested target_model_type
- if target_model_type == "llm":
+ if model_type_normalized == "llm":
return categorized_models["chat"]
- elif target_model_type in ("embedding", "multi_embedding"):
+ elif model_type_normalized in ("embedding", "multi_embedding"):
return categorized_models["embedding"]
- elif target_model_type in ("image_understanding", "image_generation", "video_understanding"):
- return categorized_models[target_model_type]
- elif target_model_type in categorized_models:
- return categorized_models[target_model_type]
+ elif model_type_normalized in ("image_understanding", "image_generation", "video_understanding"):
+ return categorized_models[model_type_normalized]
+ elif model_type_normalized in categorized_models:
+ return categorized_models[model_type_normalized]
else:
return []
diff --git a/backend/services/skill_service.py b/backend/services/skill_service.py
index 1cccd31d6..0e37b61b6 100644
--- a/backend/services/skill_service.py
+++ b/backend/services/skill_service.py
@@ -460,6 +460,10 @@ def list_skills(self, tenant_id: Optional[str] = None) -> List[Dict[str, Any]]:
skills = skill_db.list_skills()
return [self._overlay_params_from_local_config_yaml(s) for s in skills]
except Exception as e:
+ # Return empty list if table doesn't exist (migration not applied)
+ if "relation" in str(e).lower() and "does not exist" in str(e).lower():
+ logger.warning(f"Skills table not found, returning empty list: {e}")
+ return []
logger.error(f"Error listing skills: {e}")
raise SkillException(f"Failed to list skills: {str(e)}") from e
diff --git a/docker/init.sql b/docker/init.sql
index 2df9665c7..9a065864c 100644
--- a/docker/init.sql
+++ b/docker/init.sql
@@ -316,7 +316,6 @@ CREATE TABLE IF NOT EXISTS nexent.ag_tenant_agent_t (
enabled BOOLEAN DEFAULT FALSE,
is_new BOOLEAN DEFAULT FALSE,
provide_run_summary BOOLEAN DEFAULT FALSE,
- enable_context_manager BOOLEAN DEFAULT FALSE,
version_no INTEGER DEFAULT 0 NOT NULL,
current_version_no INTEGER NULL,
ingroup_permission VARCHAR(30),
@@ -374,7 +373,6 @@ COMMENT ON COLUMN nexent.ag_tenant_agent_t.is_new IS 'Whether this agent is mark
COMMENT ON COLUMN nexent.ag_tenant_agent_t.version_no IS 'Version number. 0 = draft/editing state, >=1 = published snapshot';
COMMENT ON COLUMN nexent.ag_tenant_agent_t.current_version_no IS 'Current published version number. NULL means no version published yet';
COMMENT ON COLUMN nexent.ag_tenant_agent_t.ingroup_permission IS 'In-group permission: EDIT, READ_ONLY, PRIVATE';
-COMMENT ON COLUMN nexent.ag_tenant_agent_t.enable_context_manager IS 'Whether to enable context management (compression) for this agent';
-- Create index for is_new queries
CREATE INDEX IF NOT EXISTS idx_ag_tenant_agent_t_is_new
diff --git a/docker/sql/v2.0.4_0427_add_enable_context_manager_to_ag_tenant_agent_t.sql b/docker/sql/v2.0.4_0427_add_enable_context_manager_to_ag_tenant_agent_t.sql
deleted file mode 100644
index b89a19e04..000000000
--- a/docker/sql/v2.0.4_0427_add_enable_context_manager_to_ag_tenant_agent_t.sql
+++ /dev/null
@@ -1,10 +0,0 @@
--- Migration: Add enable_context_manager column to ag_tenant_agent_t table
--- Date: 2025-04-27
--- Description: Add enable_context_manager field to control context management (compression) per agent
-
--- Add enable_context_manager column to ag_tenant_agent_t table
-ALTER TABLE nexent.ag_tenant_agent_t
-ADD COLUMN IF NOT EXISTS enable_context_manager BOOLEAN DEFAULT FALSE;
-
--- Add comment to the column
-COMMENT ON COLUMN nexent.ag_tenant_agent_t.enable_context_manager IS 'Whether to enable context management (compression) for this agent';
\ No newline at end of file
diff --git a/docker/sql/v2.0.5_0510_migrate_vlm_to_image_understanding.sql b/docker/sql/v2.0.5_0510_migrate_vlm_to_image_understanding.sql
new file mode 100644
index 000000000..b6b70fa9f
--- /dev/null
+++ b/docker/sql/v2.0.5_0510_migrate_vlm_to_image_understanding.sql
@@ -0,0 +1,22 @@
+-- Migration script: Migrate vlm model type to image_understanding
+-- This script updates historical data where users had configured VLM models
+-- The model type should be changed from 'vlm' to 'image_understanding'
+-- Date: 2026-05-10
+
+-- Step 1: Preview affected records (optional - can be removed in production)
+-- SELECT model_id, model_name, model_type, display_name, tenant_id
+-- FROM nexent.model_record_t
+-- WHERE model_type = 'vlm' AND delete_flag = 'N';
+
+-- Step 2: Update model_type from 'vlm' to 'image_understanding'
+UPDATE nexent.model_record_t
+SET model_type = 'image_understanding',
+ update_time = CURRENT_TIMESTAMP
+WHERE model_type = 'vlm'
+ AND delete_flag = 'N';
+
+-- Step 3: Verify the update
+-- SELECT model_type, COUNT(*) as count
+-- FROM nexent.model_record_t
+-- WHERE delete_flag = 'N'
+-- GROUP BY model_type;
diff --git a/frontend/app/[locale]/models/components/model/ModelListCard.tsx b/frontend/app/[locale]/models/components/model/ModelListCard.tsx
index 8bf6e00a6..2afdc853a 100644
--- a/frontend/app/[locale]/models/components/model/ModelListCard.tsx
+++ b/frontend/app/[locale]/models/components/model/ModelListCard.tsx
@@ -154,13 +154,22 @@ export const ModelListCard = ({
// Get filtered models by type
const getFilteredModels = (): ModelOption[] => {
- return modelsData.filter((model) => model.type === type);
+ // Support both camelCase (imageUnderstanding) and snake_case (image_understanding)
+ return modelsData.filter((model) => {
+ if (model.type === type) return true;
+ // Handle VLM type compatibility (vlm -> image_understanding/imageUnderstanding)
+ if (type === MODEL_TYPES.IMAGE_UNDERSTANDING &&
+ (model.type === 'image_understanding' || model.type === 'vlm')) {
+ return true;
+ }
+ return false;
+ });
};
// Get model source label based on source field
const getModelSource = (displayName: string): string => {
const model = modelsData.find(
- (m) => m.type === type && m.displayName === displayName
+ (m) => (m.type === type || m.type === 'image_understanding' || m.type === 'vlm') && m.displayName === displayName
);
if (!model) return t("model.source.unknown");
diff --git a/frontend/app/[locale]/models/components/modelConfig.tsx b/frontend/app/[locale]/models/components/modelConfig.tsx
index e6444214d..30b71cbc1 100644
--- a/frontend/app/[locale]/models/components/modelConfig.tsx
+++ b/frontend/app/[locale]/models/components/modelConfig.tsx
@@ -300,14 +300,24 @@ export const ModelConfigSection = forwardRef<
)
: true;
+ // vlm is now called image_understanding, but we need to support legacy vlm models
+ const legacyVlm = modelConfig.vlm?.displayName;
+ const legacyVlmExists = legacyVlm
+ ? allModels.some(
+ (m) =>
+ m.displayName === legacyVlm &&
+ (m.type === MODEL_TYPES.VLM || m.type === MODEL_TYPES.IMAGE_UNDERSTANDING)
+ )
+ : true;
+
const imageUnderstanding = modelConfig.imageUnderstanding?.displayName;
const imageUnderstandingExists = imageUnderstanding
? allModels.some(
(m) =>
m.displayName === imageUnderstanding &&
- m.type === MODEL_TYPES.IMAGE_UNDERSTANDING
+ (m.type === MODEL_TYPES.IMAGE_UNDERSTANDING || m.type === MODEL_TYPES.VLM)
)
- : true;
+ : legacyVlmExists;
const imageGeneration = modelConfig.imageGeneration?.displayName;
const imageGenerationExists = imageGeneration
@@ -398,6 +408,11 @@ export const ModelConfigSection = forwardRef<
configUpdates.rerank = { modelName: "", displayName: "" };
}
+ // Handle legacy vlm configuration - migrate to imageUnderstanding or clear
+ if (!legacyVlmExists && legacyVlm) {
+ configUpdates.vlm = { modelName: "", displayName: "" };
+ }
+
if (!imageUnderstandingExists && imageUnderstanding) {
configUpdates.imageUnderstanding = { modelName: "", displayName: "" };
}
@@ -426,6 +441,9 @@ export const ModelConfigSection = forwardRef<
}
// Check if there are configured models that need connectivity verification
+ // Handle legacy vlm configuration check
+ const hasLegacyVlmConfigured = !!modelConfig.vlm?.modelName;
+
const hasConfiguredModels =
!!modelConfig.llm.modelName ||
!!modelConfig.embedding.modelName ||
@@ -435,7 +453,8 @@ export const ModelConfigSection = forwardRef<
!!modelConfig.imageGeneration?.modelName ||
!!modelConfig.videoUnderstanding?.modelName ||
!!modelConfig.tts.modelName ||
- !!modelConfig.stt.modelName;
+ !!modelConfig.stt.modelName ||
+ hasLegacyVlmConfigured;
// Perform verification directly here instead of using setTimeout
// This ensures we use model data from the current function scope instead of relying on state updates
diff --git a/frontend/const/modelConfig.ts b/frontend/const/modelConfig.ts
index bed6ac17d..505e552b8 100644
--- a/frontend/const/modelConfig.ts
+++ b/frontend/const/modelConfig.ts
@@ -7,9 +7,9 @@ export const MODEL_TYPES = {
STT: "stt",
TTS: "tts",
VLM: "vlm",
- IMAGE_UNDERSTANDING: "image_understanding",
- IMAGE_GENERATION: "image_generation",
- VIDEO_UNDERSTANDING: "video_understanding",
+ IMAGE_UNDERSTANDING: "imageUnderstanding",
+ IMAGE_GENERATION: "imageGeneration",
+ VIDEO_UNDERSTANDING: "videoUnderstanding",
} as const;
// Model source constants
diff --git a/frontend/hooks/model/useModelList.ts b/frontend/hooks/model/useModelList.ts
index b31f0aee5..aeff2fcdc 100644
--- a/frontend/hooks/model/useModelList.ts
+++ b/frontend/hooks/model/useModelList.ts
@@ -2,6 +2,7 @@ import { useQuery, useQueryClient } from "@tanstack/react-query";
import { modelService } from "@/services/modelService";
import { ModelOption } from "@/types/modelConfig";
import { useMemo } from "react";
+import { MODEL_TYPES } from "@/const/modelConfig";
export function useModelList(options?: { enabled?: boolean; staleTime?: number }) {
const queryClient = useQueryClient();
@@ -47,11 +48,11 @@ export function useModelList(options?: { enabled?: boolean; staleTime?: number }
}, [models]);
const imageUnderstandingModels = useMemo(() => {
- return models.filter((model) => model.type === "image_understanding");
+ return models.filter((model) => model.type === MODEL_TYPES.IMAGE_UNDERSTANDING);
}, [models]);
const availableImageUnderstandingModels = useMemo(() => {
- return models.filter((model) => model.type === "image_understanding" && model.connect_status === "available");
+ return models.filter((model) => model.type === MODEL_TYPES.IMAGE_UNDERSTANDING && model.connect_status === "available");
}, [models]);
const imageGenerationModels = useMemo(() => {
diff --git a/frontend/hooks/useConfig.ts b/frontend/hooks/useConfig.ts
index 1d7ec3831..a9f5e840d 100644
--- a/frontend/hooks/useConfig.ts
+++ b/frontend/hooks/useConfig.ts
@@ -91,6 +91,31 @@ const defaultConfig: GlobalConfig = {
modelUrl: "",
},
},
+ // New multimodal model types
+ imageUnderstanding: {
+ modelName: "",
+ displayName: "",
+ apiConfig: {
+ apiKey: "",
+ modelUrl: "",
+ },
+ },
+ imageGeneration: {
+ modelName: "",
+ displayName: "",
+ apiConfig: {
+ apiKey: "",
+ modelUrl: "",
+ },
+ },
+ videoUnderstanding: {
+ modelName: "",
+ displayName: "",
+ apiConfig: {
+ apiKey: "",
+ modelUrl: "",
+ },
+ },
},
};
@@ -142,6 +167,10 @@ function transformBackendToFrontend(backendConfig: any): GlobalConfig {
vlm: transformModelEntry(backendConfig.models.vlm),
stt: transformModelEntry(backendConfig.models.stt),
tts: transformModelEntry(backendConfig.models.tts),
+ // New multimodal model types
+ imageUnderstanding: transformModelEntry(backendConfig.models.imageUnderstanding),
+ imageGeneration: transformModelEntry(backendConfig.models.imageGeneration),
+ videoUnderstanding: transformModelEntry(backendConfig.models.videoUnderstanding),
}
: defaultConfig.models;
@@ -263,6 +292,9 @@ export function useConfig() {
const config: GlobalConfig = (query.data as GlobalConfig | undefined) ?? defaultConfig;
+ // Debug log for imageUnderstanding config
+ console.log("[useConfig] imageUnderstanding config:", JSON.stringify(config?.models?.imageUnderstanding));
+
// Whether config has selected a VLM model (image understanding, image generation, or video understanding)
const isVlmAvailable = !!(
config?.models?.vlm?.modelName || config?.models?.vlm?.displayName ||
@@ -271,6 +303,8 @@ export function useConfig() {
config?.models?.videoUnderstanding?.modelName || config?.models?.videoUnderstanding?.displayName
);
+ console.log("[useConfig] isVlmAvailable:", isVlmAvailable);
+
// Whether config has selected an Embedding model
const isEmbeddingAvailable = !!(config?.models?.embedding?.modelName || config?.models?.embedding?.displayName);
diff --git a/frontend/services/agentConfigService.ts b/frontend/services/agentConfigService.ts
index 37f621e95..3315d0961 100644
--- a/frontend/services/agentConfigService.ts
+++ b/frontend/services/agentConfigService.ts
@@ -396,7 +396,6 @@ export interface UpdateAgentInfoPayload {
model_id?: number;
max_steps?: number;
provide_run_summary?: boolean;
- enable_context_manager?: boolean;
enabled?: boolean;
business_description?: string;
business_logic_model_name?: string;
diff --git a/frontend/types/agentConfig.ts b/frontend/types/agentConfig.ts
index e6d36daaf..1907e940d 100644
--- a/frontend/types/agentConfig.ts
+++ b/frontend/types/agentConfig.ts
@@ -42,7 +42,6 @@ export interface Agent {
model_id?: number;
max_step: number;
provide_run_summary: boolean;
- enable_context_manager?: boolean;
tools: Tool[];
duty_prompt?: string;
constraint_prompt?: string;
diff --git a/k8s/helm/nexent/charts/nexent-common/files/init.sql b/k8s/helm/nexent/charts/nexent-common/files/init.sql
index 806ad0074..df05b5b9e 100644
--- a/k8s/helm/nexent/charts/nexent-common/files/init.sql
+++ b/k8s/helm/nexent/charts/nexent-common/files/init.sql
@@ -316,7 +316,6 @@ CREATE TABLE IF NOT EXISTS nexent.ag_tenant_agent_t (
enabled BOOLEAN DEFAULT FALSE,
is_new BOOLEAN DEFAULT FALSE,
provide_run_summary BOOLEAN DEFAULT FALSE,
- enable_context_manager BOOLEAN DEFAULT FALSE,
version_no INTEGER DEFAULT 0 NOT NULL,
current_version_no INTEGER NULL,
ingroup_permission VARCHAR(30),
@@ -374,7 +373,6 @@ COMMENT ON COLUMN nexent.ag_tenant_agent_t.is_new IS 'Whether this agent is mark
COMMENT ON COLUMN nexent.ag_tenant_agent_t.version_no IS 'Version number. 0 = draft/editing state, >=1 = published snapshot';
COMMENT ON COLUMN nexent.ag_tenant_agent_t.current_version_no IS 'Current published version number. NULL means no version published yet';
COMMENT ON COLUMN nexent.ag_tenant_agent_t.ingroup_permission IS 'In-group permission: EDIT, READ_ONLY, PRIVATE';
-COMMENT ON COLUMN nexent.ag_tenant_agent_t.enable_context_manager IS 'Whether to enable context management (compression) for this agent';
-- Create index for is_new queries
CREATE INDEX IF NOT EXISTS idx_ag_tenant_agent_t_is_new
diff --git a/start_dev.sh b/start_dev.sh
index 97c0b67d6..fc52548a2 100644
--- a/start_dev.sh
+++ b/start_dev.sh
@@ -1,9 +1,8 @@
#!/bin/bash
# 设置项目根目录路径(建议使用绝对路径或确保脚本在根目录运行)
-PROJECT_ROOT="F:/PythonProject/nexent-develop"
+PROJECT_ROOT="F:/nexent/nexent"
VENV_PYTHON="backend/.venv/Scripts/python"
-
echo "🚀 正在通过 mintty 启动 Nexent 服务..."
# 1. 启动 MCP Service
@@ -16,4 +15,4 @@ mintty -p 500,100 -t "Config-Service" bash -c "cd $PROJECT_ROOT && source .env &
mintty -p 100,500 -t "Runtime-Service" bash -c "cd $PROJECT_ROOT && source .env && $VENV_PYTHON backend/runtime_service.py; exec bash" &
-echo "✅ 4 个独立的窗口已在 $PROJECT_ROOT 路径下启动。"
\ No newline at end of file
+echo "✅ 3 个独立的窗口已在 $PROJECT_ROOT 路径下启动。"
\ No newline at end of file
diff --git a/test/backend/database/test_agent_db.py b/test/backend/database/test_agent_db.py
index 6f2c780e5..a083a537e 100644
--- a/test/backend/database/test_agent_db.py
+++ b/test/backend/database/test_agent_db.py
@@ -121,7 +121,6 @@ def __init__(self):
self.business_description = None
self.group_ids = None
self.is_new = True
- self.enable_context_manager = False
self.current_version_no = None
self.version_no = 0
self.created_by = None
@@ -275,7 +274,7 @@ def test_query_sub_agents_id_list(monkeypatch, mock_session):
assert result == [2]
def test_create_agent_success(monkeypatch, mock_session):
- """测试成功创建agent"""
+ """Test successful creating agent"""
session, query = mock_session
session.add = MagicMock()
session.flush = MagicMock()
diff --git a/test/backend/database/test_agent_version_db.py b/test/backend/database/test_agent_version_db.py
index cd9ad8714..da99f82ec 100644
--- a/test/backend/database/test_agent_version_db.py
+++ b/test/backend/database/test_agent_version_db.py
@@ -247,26 +247,39 @@ def mock_as_dict(obj):
def mock_sqlalchemy_insert(monkeypatch):
"""Helper function to mock SQLAlchemy insert"""
from sqlalchemy.sql import Insert
-
+ from sqlalchemy.exc import ProgrammingError
+
+ # Track if we should raise an error on the first call
+ should_raise = [False]
+
def insert_wrapper(table):
"""Wrapper that accepts the actual table class (or MagicMock) and returns a mock statement"""
+ # If we need to raise an error (first call), raise ProgrammingError
+ if should_raise[0]:
+ should_raise[0] = False # Reset for next call
+ raise ProgrammingError(
+ "statement", {}, BaseException(
+ "column \"is_a2a\" of relation \"ag_tenant_agent_version_t\" does not exist"
+ )
+ )
+
# Create a mock statement that chains properly
# This bypasses SQLAlchemy's table validation by directly returning a mock
mock_stmt = MagicMock(spec=Insert)
mock_values_result = MagicMock()
mock_returning_result = MagicMock()
-
+
# Chain: .values(**kwargs) returns an object that has .returning()
mock_values_result.returning = lambda *args, **kwargs: mock_returning_result
mock_stmt.values = lambda **kwargs: mock_values_result
-
+
# The final statement is what gets executed
return mock_stmt
-
+
# Patch the imported function in agent_version_db module (this is what the code actually uses)
# We patch at the module level after import, so it overrides the imported function
monkeypatch.setattr(agent_version_db_module, "insert", insert_wrapper)
- return insert_wrapper
+ return insert_wrapper, should_raise
def mock_sqlalchemy_update(monkeypatch):
@@ -401,13 +414,15 @@ def test_query_version_list_success(monkeypatch, mock_session):
mock_version2 = MockAgentVersion()
mock_version2.version_no = 2
mock_version2.version_name = "v2.0"
-
+ mock_version2.__dict__['version_no'] = 2
+ mock_version2.__dict__['version_name'] = "v2.0"
+
mock_order_by = MagicMock()
mock_order_by.all = lambda: [mock_version2, mock_version1] # Ordered desc
mock_filter = MagicMock()
mock_filter.order_by.return_value = mock_order_by
query.filter.return_value = mock_filter
-
+
mock_ctx = MagicMock()
mock_ctx.__enter__.return_value = session
mock_ctx.__exit__.return_value = None
@@ -415,9 +430,9 @@ def test_query_version_list_success(monkeypatch, mock_session):
# This is needed because agent_version_db imports get_db_session and as_dict at module level
monkeypatch.setattr(agent_version_db_module, "get_db_session", lambda: mock_ctx)
monkeypatch.setattr(agent_version_db_module, "as_dict", mock_as_dict)
-
+
result = query_version_list(agent_id=1, tenant_id="tenant1")
-
+
assert len(result) == 2
assert result[0]["version_no"] == 2 # Should be ordered desc
assert result[1]["version_no"] == 1
@@ -639,14 +654,14 @@ def query_side_effect(model_class):
def test_insert_version_success(monkeypatch, mock_session):
"""Test successfully inserting a new version"""
session, query = mock_session
-
+
mock_result = MagicMock()
mock_result.scalar_one.return_value = 123
session.execute.return_value = mock_result
-
+
# Mock SQLAlchemy insert to avoid ArgumentError
- mock_sqlalchemy_insert(monkeypatch)
-
+ mock_sqlalchemy_insert(monkeypatch)[0] # Ignore the returned tuple
+
mock_ctx = MagicMock()
mock_ctx.__enter__.return_value = session
mock_ctx.__exit__.return_value = None
@@ -654,7 +669,7 @@ def test_insert_version_success(monkeypatch, mock_session):
# This is needed because agent_version_db imports get_db_session and as_dict at module level
monkeypatch.setattr(agent_version_db_module, "get_db_session", lambda: mock_ctx)
monkeypatch.setattr(agent_version_db_module, "as_dict", mock_as_dict)
-
+
version_data = {
"tenant_id": "tenant1",
"agent_id": 1,
@@ -662,13 +677,119 @@ def test_insert_version_success(monkeypatch, mock_session):
"version_name": "v1.0",
"status": STATUS_RELEASED,
}
-
+
result = insert_version(version_data)
-
+
assert result == 123
session.execute.assert_called_once()
+def test_insert_version_with_is_a2a_column_missing(monkeypatch, mock_session):
+ """Test inserting version when is_a2a column doesn't exist in database"""
+ session, query = mock_session
+
+ # Mock SQLAlchemy insert with should_raise flag to trigger error on first call
+ insert_wrapper, should_raise = mock_sqlalchemy_insert(monkeypatch)
+ should_raise[0] = True # First call should raise error
+
+ # Second call succeeds
+ mock_result = MagicMock()
+ mock_result.scalar_one.return_value = 456
+ session.execute.return_value = mock_result
+
+ mock_ctx = MagicMock()
+ mock_ctx.__enter__.return_value = session
+ mock_ctx.__exit__.return_value = None
+ monkeypatch.setattr(agent_version_db_module, "get_db_session", lambda: mock_ctx)
+ monkeypatch.setattr(agent_version_db_module, "as_dict", mock_as_dict)
+
+ # Version data includes is_a2a field
+ version_data = {
+ "tenant_id": "tenant1",
+ "agent_id": 1,
+ "version_no": 1,
+ "version_name": "v1.0",
+ "status": STATUS_RELEASED,
+ "is_a2a": False,
+ }
+
+ result = insert_version(version_data)
+
+ assert result == 456
+
+
+def test_insert_version_undefined_column_error(monkeypatch, mock_session):
+ """Test inserting version when UndefinedColumn error occurs"""
+ session, query = mock_session
+
+ # Mock SQLAlchemy insert with should_raise flag to trigger error on first call
+ insert_wrapper, should_raise = mock_sqlalchemy_insert(monkeypatch)
+ should_raise[0] = True # First call should raise error
+
+ # Second call succeeds
+ mock_result = MagicMock()
+ mock_result.scalar_one.return_value = 789
+ session.execute.return_value = mock_result
+
+ mock_ctx = MagicMock()
+ mock_ctx.__enter__.return_value = session
+ mock_ctx.__exit__.return_value = None
+ monkeypatch.setattr(agent_version_db_module, "get_db_session", lambda: mock_ctx)
+ monkeypatch.setattr(agent_version_db_module, "as_dict", mock_as_dict)
+
+ version_data = {
+ "tenant_id": "tenant1",
+ "agent_id": 1,
+ "version_no": 1,
+ "is_a2a": True,
+ }
+
+ result = insert_version(version_data)
+
+ assert result == 789
+
+
+def test_query_version_list_with_is_a2a_column_missing(monkeypatch, mock_session):
+ """Test querying version list when is_a2a column doesn't exist"""
+ session, query = mock_session
+
+ # First call raises error, second call with explicit columns succeeds
+ call_count = [0]
+ mock_version = MockAgentVersion()
+
+ def execute_side_effect(*args, **kwargs):
+ call_count[0] += 1
+ if call_count[0] == 1:
+ from sqlalchemy.exc import ProgrammingError
+ raise ProgrammingError(
+ "statement", {}, BaseException(
+ "column \"is_a2a\" does not exist"
+ )
+ )
+ # Return mock result for the fallback query
+ mock_result = MagicMock()
+ mock_result.all.return_value = [(1, "tenant1", 1, 1, "v1.0", None, None, "NORMAL", "RELEASED", "user1", "2023-01-01")]
+ return mock_result
+
+ session.execute.side_effect = execute_side_effect
+
+ mock_order_by = MagicMock()
+ mock_order_by.all = lambda: [mock_version]
+ mock_filter = MagicMock()
+ mock_filter.order_by.return_value = mock_order_by
+ query.filter.return_value = mock_filter
+
+ mock_ctx = MagicMock()
+ mock_ctx.__enter__.return_value = session
+ mock_ctx.__exit__.return_value = None
+ monkeypatch.setattr(agent_version_db_module, "get_db_session", lambda: mock_ctx)
+ monkeypatch.setattr(agent_version_db_module, "as_dict", mock_as_dict)
+
+ result = query_version_list(agent_id=1, tenant_id="tenant1")
+
+ assert len(result) == 1
+
+
def test_update_version_status_success(monkeypatch, mock_session):
"""Test successfully updating version status"""
session, query = mock_session
@@ -794,7 +915,7 @@ def test_insert_agent_snapshot_success(monkeypatch, mock_session):
session.execute = MagicMock()
# Mock SQLAlchemy insert to avoid ArgumentError
- mock_sqlalchemy_insert(monkeypatch)
+ mock_sqlalchemy_insert(monkeypatch)[0] # Ignore the returned tuple
mock_ctx = MagicMock()
mock_ctx.__enter__.return_value = session
@@ -823,7 +944,7 @@ def test_insert_tool_snapshot_success(monkeypatch, mock_session):
session.execute = MagicMock()
# Mock SQLAlchemy insert to avoid ArgumentError
- mock_sqlalchemy_insert(monkeypatch)
+ mock_sqlalchemy_insert(monkeypatch)[0] # Ignore the returned tuple
mock_ctx = MagicMock()
mock_ctx.__enter__.return_value = session
@@ -852,7 +973,7 @@ def test_insert_relation_snapshot_success(monkeypatch, mock_session):
session.execute = MagicMock()
# Mock SQLAlchemy insert to avoid ArgumentError
- mock_sqlalchemy_insert(monkeypatch)
+ mock_sqlalchemy_insert(monkeypatch)[0] # Ignore the returned tuple
mock_ctx = MagicMock()
mock_ctx.__enter__.return_value = session