From 8f6ac25f8126c2c1e56475de93aafc336a5d84ed Mon Sep 17 00:00:00 2001 From: junjun Date: Thu, 9 Oct 2025 16:09:55 +0800 Subject: [PATCH] feat: Vector retrieval matches tables --- backend/apps/datasource/crud/datasource.py | 2 +- backend/apps/datasource/crud/table.py | 36 ++++++++++++++++++---- backend/common/utils/embedding_threads.py | 10 ------ backend/main.py | 4 +-- 4 files changed, 33 insertions(+), 19 deletions(-) diff --git a/backend/apps/datasource/crud/datasource.py b/backend/apps/datasource/crud/datasource.py index f9339bdc4..696513b17 100644 --- a/backend/apps/datasource/crud/datasource.py +++ b/backend/apps/datasource/crud/datasource.py @@ -15,7 +15,7 @@ from apps.db.engine import get_engine_config, get_engine_conn from common.core.config import settings from common.core.deps import SessionDep, CurrentUser, Trans -from common.utils.embedding_threads import run_save_table_embeddings +from apps.datasource.crud.table import run_save_table_embeddings from common.utils.utils import deepcopy_ignore_extra from .table import get_tables_by_ds_id from ..crud.field import delete_field_by_ds_id, update_field diff --git a/backend/apps/datasource/crud/table.py b/backend/apps/datasource/crud/table.py index 4ac3a5714..f62ac729c 100644 --- a/backend/apps/datasource/crud/table.py +++ b/backend/apps/datasource/crud/table.py @@ -1,9 +1,11 @@ import json import time import traceback +from concurrent.futures import ThreadPoolExecutor from typing import List from sqlalchemy import and_, select, update +from sqlalchemy.orm import sessionmaker from sqlalchemy.orm.session import Session from apps.ai_model.embedding import EmbeddingModelCache @@ -12,6 +14,13 @@ from common.utils.utils import SQLBotLogUtil from ..models.datasource import CoreTable, CoreField +executor = ThreadPoolExecutor(max_workers=200) + +from common.core.db import engine + +session_maker = sessionmaker(bind=engine) +session = session_maker() + def delete_table_by_ds_id(session: SessionDep, id: int): session.query(CoreTable).filter(CoreTable.ds_id == id).delete(synchronize_session=False) @@ -32,14 +41,18 @@ def update_table(session: SessionDep, item: CoreTable): def run_fill_empty_table_embedding(session: Session): - if not settings.EMBEDDING_ENABLED: - return + try: + if not settings.EMBEDDING_ENABLED: + return - SQLBotLogUtil.info('get tables') - stmt = select(CoreTable.id).where(and_(CoreTable.embedding.is_(None))) - results = session.execute(stmt).scalars().all() + SQLBotLogUtil.info('get tables') + stmt = select(CoreTable.id).where(and_(CoreTable.embedding.is_(None))) + results = session.execute(stmt).scalars().all() + SQLBotLogUtil.info('result:' + str(len(results))) - save_table_embedding(session, results) + save_table_embedding(session, results) + except Exception: + traceback.print_exc() def save_table_embedding(session: Session, ids: List[int]): @@ -89,3 +102,14 @@ def save_table_embedding(session: Session, ids: List[int]): SQLBotLogUtil.info('table embedding finished in:' + str(end_time - start_time) + 'seconds') except Exception: traceback.print_exc() + + +def run_save_table_embeddings(ids: List[int]): + executor.submit(save_table_embedding, session, ids) + + +def fill_empty_table_embeddings(): + try: + executor.submit(run_fill_empty_table_embedding, session) + except Exception: + traceback.print_exc() diff --git a/backend/common/utils/embedding_threads.py b/backend/common/utils/embedding_threads.py index 182c525ec..a38b66f0d 100644 --- a/backend/common/utils/embedding_threads.py +++ b/backend/common/utils/embedding_threads.py @@ -31,13 +31,3 @@ def run_save_data_training_embeddings(ids: List[int]): def fill_empty_data_training_embeddings(): from apps.data_training.curd.data_training import run_fill_empty_embeddings executor.submit(run_fill_empty_embeddings, session) - - -def run_save_table_embeddings(ids: List[int]): - from apps.datasource.crud.table import save_table_embedding - executor.submit(save_table_embedding, session, ids) - - -def fill_empty_table_embeddings(): - from apps.datasource.crud.table import run_fill_empty_table_embedding - executor.submit(run_fill_empty_table_embedding, session) diff --git a/backend/main.py b/backend/main.py index 3486df562..fb14b53f4 100644 --- a/backend/main.py +++ b/backend/main.py @@ -12,14 +12,14 @@ from alembic import command from apps.api import api_router +from apps.datasource.crud.table import fill_empty_table_embeddings from apps.system.crud.aimodel_manage import async_model_info from apps.system.crud.assistant import init_dynamic_cors from apps.system.middleware.auth import TokenMiddleware from common.core.config import settings from common.core.response_middleware import ResponseMiddleware, exception_handler from common.core.sqlbot_cache import init_sqlbot_cache -from common.utils.embedding_threads import fill_empty_terminology_embeddings, fill_empty_data_training_embeddings, \ - fill_empty_table_embeddings +from common.utils.embedding_threads import fill_empty_terminology_embeddings, fill_empty_data_training_embeddings from common.utils.utils import SQLBotLogUtil