Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
516cc95
implement DashScope and TokenPony model providers
wadecrack Mar 3, 2026
463ebd5
New Requirement: Support for provider Zhipu AI Models (LLM and Embedd…
wadecrack Mar 4, 2026
eb12b0a
New Requirement: Support for provider dashscope and tokenpony Models …
wadecrack Mar 4, 2026
36b8be9
bug fix : embedding model max_tokens changes
wadecrack Mar 5, 2026
3470662
bug fix : embedding model max_tokens changes
wadecrack Mar 5, 2026
fb16d93
create test files for the backend providers
wadecrack Mar 5, 2026
941cac2
bugfix for test files of the backend providers
wadecrack Mar 5, 2026
96a1c5b
Merge branch 'develop' into develop-xq-260225
wadecrack Mar 5, 2026
9580a2d
Merge branch 'xq/develop_models_providers' of https://github.com/Mode…
wadecrack Mar 5, 2026
57a24a4
improve codecov for testfiles
wadecrack Mar 6, 2026
00854ca
implement DashScope and TokenPony model providers
wadecrack Mar 3, 2026
97bcb3b
New Requirement: Support for provider Zhipu AI Models (LLM and Embedd…
wadecrack Mar 4, 2026
a552e12
New Requirement: Support for provider dashscope and tokenpony Models …
wadecrack Mar 4, 2026
e21157d
bug fix : embedding model max_tokens changes
wadecrack Mar 5, 2026
b70e45c
bug fix : embedding model max_tokens changes
wadecrack Mar 5, 2026
2f3af41
create test files for the backend providers
wadecrack Mar 5, 2026
0515bd3
bugfix for test files of the backend providers
wadecrack Mar 5, 2026
74e3c1a
improve codecov for testfiles
wadecrack Mar 6, 2026
019a15c
Merge remote-tracking branch 'upstream/xq/develop_models_providers' i…
wadecrack Mar 6, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions backend/consts/provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,21 @@ class ProviderEnum(str, Enum):
SILICON = "silicon"
OPENAI = "openai"
MODELENGINE = "modelengine"
DASHSCOPE = "dashscope"
TOKENPONY = "tokenpony"


# Silicon Flow
SILICON_BASE_URL = "https://api.siliconflow.cn/v1/"
SILICON_GET_URL = "https://api.siliconflow.cn/v1/models"

# Dashcope
DASHSCOPE_BASE_URL = "https://dashscope.aliyuncs.com/compatible-mode/v1/"
DASHSCOPE_GET_URL = "https://dashscope.aliyuncs.com/api/v1/models"

# TokenPony
TOKENPONY_BASE_URL = "https://api.tokenpony.cn/v1/"
TOKENPONY_GET_URL = "https://api.tokenpony.cn/v1/models"

# ModelEngine
# Base URL and API key are loaded from environment variables at runtime
6 changes: 5 additions & 1 deletion backend/services/model_management_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

from consts.const import LOCALHOST_IP, LOCALHOST_NAME, DOCKER_INTERNAL_HOST
from consts.model import ModelConnectStatusEnum
from consts.provider import ProviderEnum, SILICON_BASE_URL
from consts.provider import ProviderEnum, SILICON_BASE_URL, DASHSCOPE_BASE_URL, TOKENPONY_BASE_URL

from database.model_management_db import (
create_model_record,
Expand Down Expand Up @@ -142,6 +142,10 @@ async def batch_create_models_for_tenant(user_id: str, tenant_id: str, batch_pay
elif provider == ProviderEnum.MODELENGINE.value:
# ModelEngine models carry their own base_url in each model dict
model_url = ""
elif provider == ProviderEnum.DASHSCOPE.value:
model_url = DASHSCOPE_BASE_URL
elif provider == ProviderEnum.TOKENPONY.value:
model_url = TOKENPONY_BASE_URL
else:
model_url = ""

Expand Down
11 changes: 10 additions & 1 deletion backend/services/model_provider_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
from services.model_health_service import embedding_dimension_check
from services.providers.base import AbstractModelProvider
from services.providers.silicon_provider import SiliconModelProvider
from services.providers.tokenpony_provider import TokenPonyModelProvider
from services.providers.dashscope_provider import DashScopeModelProvider
from services.providers.modelengine_provider import ModelEngineProvider, get_model_engine_raw_url, MODEL_ENGINE_NORTH_PREFIX
from utils.model_name_utils import split_repo_name, add_repo_to_name

Expand Down Expand Up @@ -40,6 +42,12 @@ async def get_provider_models(model_data: dict) -> List[dict]:
elif model_data["provider"] == ProviderEnum.MODELENGINE.value:
provider = ModelEngineProvider()
model_list = await provider.get_models(model_data)
elif model_data["provider"] == ProviderEnum.DASHSCOPE.value:
provider = DashScopeModelProvider()
model_list = await provider.get_models(model_data)
elif model_data["provider"] == ProviderEnum.TOKENPONY.value:
provider = TokenPonyModelProvider()
model_list = await provider.get_models(model_data)

return model_list

Expand Down Expand Up @@ -117,7 +125,8 @@ async def prepare_model_dict(provider: str, model: dict, model_url: str, model_a
# dimension by performing a real connectivity check.
if model["model_type"] in ["embedding", "multi_embedding"]:
if provider != ProviderEnum.MODELENGINE.value:
model_dict["base_url"] = f"{model_url}embeddings"
# Ensure proper slash between base URL and endpoint
model_dict["base_url"] = f"{model_url.rstrip('/')}/embeddings"
else:
# For ModelEngine embedding models, append the embeddings path
model_dict["base_url"] = f"{model_url.rstrip('/')}/{MODEL_ENGINE_NORTH_PREFIX}/embeddings"
Expand Down
132 changes: 132 additions & 0 deletions backend/services/providers/dashscope_provider.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
import httpx
from typing import Dict, List
import asyncio
from consts.const import DEFAULT_LLM_MAX_TOKENS
from consts.provider import DASHSCOPE_GET_URL
from services.providers.base import AbstractModelProvider, _classify_provider_error


class DashScopeModelProvider(AbstractModelProvider):
"""Concrete implementation for DashScope (Aliyun) provider."""

async def get_models(self, provider_config: Dict) -> List[Dict]:
"""
Fetch models from DashScope API, categorize them, and return
the requested model type.

Args:
provider_config: Configuration dict containing model_type and api_key

Returns:
List of models with canonical fields. Returns error dict if API call fails.
"""
try:
target_model_type: str = provider_config["model_type"]
model_api_key: str = provider_config["api_key"]

headers = {"Authorization": f"Bearer {model_api_key}"}
base_url = DASHSCOPE_GET_URL

all_models: List[Dict] = []
current_page = 1

# Fetch all models with pagination asynchronously
async with httpx.AsyncClient(verify=False) as client:
while True:
params = {"page_size": 100, "page_no": current_page}
response = await client.get(base_url, headers=headers, params=params)
if response.status_code == 429:
await asyncio.sleep(2)
continue
response.raise_for_status()

data = response.json()
models = data.get("output", {}).get("models", [])

# Break loop if no more models on the current page
if not models:
break

all_models.extend(models)
if len(models) < 100:
break
current_page += 1
await asyncio.sleep(0.5)

# Initialize containers for the 6 main categories
categorized_models = {
"chat": [], # Maps to "llm"
"vlm": [], # Maps to "vlm"
"embedding": [], # Maps to "embedding" / "multi_embedding"
"reranker": [], # Maps to "reranker"
"tts": [], # Maps to "tts"
"stt": [] # Maps to "stt"
}

# Classify models and inject canonical fields expected downstream
for model_obj in all_models:
# Extract key fields for logical determination (lowercased for robustness)
m_id = model_obj.get('model', '').lower()
desc = model_obj.get('description', '')
metadata = model_obj.get('inference_metadata', {})
req_mod = metadata.get('request_modality', [])
res_mod = metadata.get('response_modality', [])
model_obj.setdefault("object", model_obj.get("object", "model"))
model_obj.setdefault("owned_by", model_obj.get("owned_by", "dashscope"))
cleaned_model = {
"id": m_id,
"object": model_obj.get("object"),
"created": 0,
"owned_by": model_obj.get("owned_by"),
"model_tag": "",
"model_type": "",
"max_tokens": DEFAULT_LLM_MAX_TOKENS
}
# 1. Embedding
if 'embedding' in m_id.lower() or '向量' in desc:
cleaned_model.update({"model_tag": "embedding", "model_type": "embedding"})
categorized_models['embedding'].append(cleaned_model)
continue

# 2. Reranker
if 'rerank' in m_id.lower() or '重排序' in desc:
cleaned_model.update({"model_tag": "reranker", "model_type": "reranker"})
categorized_models['reranker'].append(cleaned_model)
continue

# 3. STT
if 'Audio' in req_mod and 'Text' in res_mod:
cleaned_model.update({"model_tag": "stt", "model_type": "stt"})
categorized_models['stt'].append(cleaned_model)
continue

# 4. TTS
if 'Audio' in res_mod and 'Video' not in res_mod:
cleaned_model.update({"model_tag": "tts", "model_type": "tts"})
categorized_models['tts'].append(cleaned_model)
continue

# 5. VLM
vision_mods = {'Image', 'Video'}
if (set(req_mod) & vision_mods) or (set(res_mod) & vision_mods) or '视觉' in desc:
cleaned_model.update({"model_tag": "chat", "model_type": "vlm"})
categorized_models['vlm'].append(cleaned_model)
continue

# 6. Chat / LLM
if 'Text' in req_mod or 'Text' in res_mod:
cleaned_model.update({"model_tag": "chat", "model_type": "llm"})
categorized_models['chat'].append(cleaned_model)

# Return the specific list based on the requested target_model_type
if target_model_type == "llm":
return categorized_models["chat"]
elif target_model_type in ("embedding", "multi_embedding"):
return categorized_models["embedding"]
elif target_model_type in categorized_models:
return categorized_models[target_model_type]
else:
return []
except (httpx.HTTPStatusError, httpx.ConnectTimeout, httpx.ConnectError, Exception) as e:
return _classify_provider_error("DashScope", exception=e)

112 changes: 112 additions & 0 deletions backend/services/providers/tokenpony_provider.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
import httpx
import ssl

from typing import Dict, List


from consts.const import DEFAULT_LLM_MAX_TOKENS
from consts.provider import TOKENPONY_GET_URL
from services.providers.base import AbstractModelProvider, _classify_provider_error


class TokenPonyModelProvider(AbstractModelProvider):
"""Concrete implementation for TokenPony provider."""

async def get_models(self, provider_config: Dict) -> List[Dict]:
"""
Fetch models from TokenPony API, categorize them based on modality/ID,
and return the requested model type.

Args:
provider_config: Configuration dict containing model_type and api_key

Returns:
List of models with canonical fields. Returns error dict if API call fails.
"""
try:
target_model_type: str = provider_config["model_type"]
model_api_key: str = provider_config["api_key"]

headers = {"Authorization": f"Bearer {model_api_key}"}
url = TOKENPONY_GET_URL


ssl_context = ssl.create_default_context()
ssl_context.check_hostname = False
ssl_context.verify_mode = ssl.CERT_NONE
ssl_context.set_ciphers("DEFAULT@SECLEVEL=1")

async with httpx.AsyncClient(http2=True) as client:
response = await client.get(url, headers=headers)
response.raise_for_status()
# OpenAI standard response puts the model list inside the "data" array
all_models: List[Dict] = response.json().get("data", [])

# Initialize containers for the 6 main categories
categorized_models = {
"chat": [], # Maps to "llm"
"vlm": [], # Maps to "vlm"
"embedding": [], # Maps to "embedding" / "multi_embedding"
"reranker": [], # Maps to "reranker"
"tts": [], # Maps to "tts"
"stt": [] # Maps to "stt"
}

# Classify models and inject canonical fields expected downstream
for model_obj in all_models:
m_id = model_obj['id'].lower()
model_obj.setdefault("object", model_obj.get("object", "model"))
model_obj.setdefault("owned_by", model_obj.get("owned_by", "tokenpony"))
cleaned_model = {
"id": m_id,
"object": model_obj.get("object"),
"created": 0,
"owned_by": model_obj.get("owned_by"),
"model_tag": "",
"model_type": "",
"max_tokens": DEFAULT_LLM_MAX_TOKENS
}
# 1. reranker
if 'rerank' in m_id:
cleaned_model.update({"model_tag": "reranker", "model_type": "reranker"})
categorized_models['reranker'].append(cleaned_model)
#2. embedding
elif 'embedding' in m_id or m_id.startswith('bge-'):
cleaned_model.update({"model_tag": "embedding", "model_type": "embedding"})
categorized_models['embedding'].append(cleaned_model)

# 3. STT (Speech-to-Text / Audio understanding)
elif 'stt' in m_id:
cleaned_model.update({"model_tag": "stt", "model_type": "stt"})
categorized_models['stt'].append(cleaned_model)


# 4. TTS (Text-to-Speech)
elif 'tts' in m_id:
cleaned_model.update({"model_tag": "tts", "model_type": "tts"})
categorized_models['tts'].append(cleaned_model)

# 5. VLM (Vision Language Model / Image & Video Generation)

elif any(keyword in m_id for keyword in ['-vl', 'vl-', 'ocr', 'vision']):
cleaned_model.update({"model_tag": "chat", "model_type": "vlm"})
categorized_models['vlm'].append(cleaned_model)

# 6. Chat (Pure Text Conversation / Reasoning)
# Fallback check added: 'not metadata' catches standard OpenAI models that lack modality data
else :
cleaned_model.update({"model_tag": "chat", "model_type": "llm"})
categorized_models['chat'].append(cleaned_model)

# Return the specific list based on the requested target_model_type
if target_model_type == "llm":
return categorized_models["chat"]
elif target_model_type in ("embedding", "multi_embedding"):
return categorized_models["embedding"]
elif target_model_type in categorized_models:
return categorized_models[target_model_type]
else:
return []

except (httpx.HTTPStatusError, httpx.ConnectTimeout, httpx.ConnectError, Exception) as e:
return _classify_provider_error("TokenPony", exception=e)
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ import { modelService } from "@/services/modelService";
import { ModelType, SingleModelConfig } from "@/types/modelConfig";
import { MODEL_TYPES, PROVIDER_LINKS } from "@/const/modelConfig";
import { useSiliconModelList } from "@/hooks/model/useSiliconModelList";
import { useDashscopeModelList } from "@/hooks/model/useDashscopeModelList";
import { useTokenPonyModelList } from "@/hooks/model/useTokenponyModelList";
import log from "@/lib/logger";
import {
ModelChunkSizeSlider,
Expand Down Expand Up @@ -248,15 +250,41 @@ export const ModelAddDialog = ({
const [modelMaxTokens, setModelMaxTokens] = useState("4096");

// Use the silicon model list hook
const { getModelList, getProviderSelectedModalList } = useSiliconModelList({
const siliconHook = useSiliconModelList({
form,
setModelList,
setSelectedModelIds,
setShowModelList,
setLoadingModelList,
tenantId,
});

const dashscopeHook = useDashscopeModelList({
form,
setModelList,
setSelectedModelIds,
setShowModelList,
setLoadingModelList,
tenantId,
});
const tokenponyHook = useTokenPonyModelList({
form,
setModelList,
setSelectedModelIds,
setShowModelList,
setLoadingModelList,
tenantId,
});
let getModelList;
let getProviderSelectedModalList;

// 2. 根据条件赋值
if (form.provider === "silicon") {
({ getModelList, getProviderSelectedModalList } = siliconHook);
} else if (form.provider === "dashscope") {
({ getModelList, getProviderSelectedModalList } = dashscopeHook);
} else if (form.provider === "tokenpony") {
({ getModelList, getProviderSelectedModalList } = tokenponyHook);
}
// Reset form to default state
const resetForm = useCallback(() => {
setForm(DEFAULT_FORM_STATE);
Expand Down Expand Up @@ -794,6 +822,8 @@ export const ModelAddDialog = ({
{t("model.provider.modelengine")}
</Option>
<Option value="silicon">{t("model.provider.silicon")}</Option>
<Option value="dashscope">{t("model.provider.dashscope")}</Option>
<Option value="tokenpony">{t("model.provider.tokenpony")}</Option>
</Select>
{/* ModelEngine URL input (only when provider is ModelEngine) */}
{form.provider === "modelengine" && (
Expand Down
Loading
Loading