Skip to content

Commit 144c67a

Browse files
committed
feat: Vector retrieval matches datasource
1 parent d826ad3 commit 144c67a

File tree

1 file changed

+46
-53
lines changed

1 file changed

+46
-53
lines changed

backend/apps/chat/task/llm.py

Lines changed: 46 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,6 @@
3232
from apps.data_training.curd.data_training import get_training_template
3333
from apps.datasource.crud.datasource import get_table_schema
3434
from apps.datasource.crud.permission import get_row_permission_filters, is_normal_user
35-
from apps.datasource.embedding.ds_embedding import get_ds_embedding
3635
from apps.datasource.models.datasource import CoreDatasource
3736
from apps.db.db import exec_sql, get_version, check_connection
3837
from apps.system.crud.assistant import AssistantOutDs, AssistantOutDsFactory, get_assistant_ds
@@ -426,64 +425,58 @@ def select_datasource(self):
426425
full_thinking_text = ''
427426
full_text = ''
428427

429-
ds = None
430428
if not ignore_auto_select:
431-
if settings.EMBEDDING_ENABLED:
432-
ds = get_ds_embedding(self.session, self.current_user, _ds_list, self.chat_question.question)
433-
yield {'content': '{"id":' + ds.id + '}'}
434-
else:
435-
_ds_list_dict = []
436-
for _ds in _ds_list:
437-
_ds_list_dict.append(_ds)
438-
datasource_msg.append(
439-
HumanMessage(self.chat_question.datasource_user_question(orjson.dumps(_ds_list_dict).decode())))
440-
441-
self.current_logs[OperationEnum.CHOOSE_DATASOURCE] = start_log(session=self.session,
442-
ai_modal_id=self.chat_question.ai_modal_id,
443-
ai_modal_name=self.chat_question.ai_modal_name,
444-
operate=OperationEnum.CHOOSE_DATASOURCE,
445-
record_id=self.record.id,
446-
full_message=[{'type': msg.type,
447-
'content': msg.content}
448-
for
449-
msg in datasource_msg])
450-
451-
token_usage = {}
452-
res = self.llm.stream(datasource_msg)
453-
for chunk in res:
454-
SQLBotLogUtil.info(chunk)
429+
_ds_list_dict = []
430+
for _ds in _ds_list:
431+
_ds_list_dict.append(_ds)
432+
datasource_msg.append(
433+
HumanMessage(self.chat_question.datasource_user_question(orjson.dumps(_ds_list_dict).decode())))
434+
435+
self.current_logs[OperationEnum.CHOOSE_DATASOURCE] = start_log(session=self.session,
436+
ai_modal_id=self.chat_question.ai_modal_id,
437+
ai_modal_name=self.chat_question.ai_modal_name,
438+
operate=OperationEnum.CHOOSE_DATASOURCE,
439+
record_id=self.record.id,
440+
full_message=[{'type': msg.type,
441+
'content': msg.content}
442+
for
443+
msg in datasource_msg])
444+
445+
token_usage = {}
446+
res = self.llm.stream(datasource_msg)
447+
for chunk in res:
448+
SQLBotLogUtil.info(chunk)
449+
reasoning_content_chunk = ''
450+
if 'reasoning_content' in chunk.additional_kwargs:
451+
reasoning_content_chunk = chunk.additional_kwargs.get('reasoning_content', '')
452+
# else:
453+
# reasoning_content_chunk = chunk.get('reasoning_content')
454+
if reasoning_content_chunk is None:
455455
reasoning_content_chunk = ''
456-
if 'reasoning_content' in chunk.additional_kwargs:
457-
reasoning_content_chunk = chunk.additional_kwargs.get('reasoning_content', '')
458-
# else:
459-
# reasoning_content_chunk = chunk.get('reasoning_content')
460-
if reasoning_content_chunk is None:
461-
reasoning_content_chunk = ''
462-
full_thinking_text += reasoning_content_chunk
463-
464-
full_text += chunk.content
465-
yield {'content': chunk.content, 'reasoning_content': reasoning_content_chunk}
466-
get_token_usage(chunk, token_usage)
467-
datasource_msg.append(AIMessage(full_text))
468-
469-
self.current_logs[OperationEnum.CHOOSE_DATASOURCE] = end_log(session=self.session,
470-
log=self.current_logs[
471-
OperationEnum.CHOOSE_DATASOURCE],
472-
full_message=[
473-
{'type': msg.type,
474-
'content': msg.content}
475-
for msg in datasource_msg],
476-
reasoning_content=full_thinking_text,
477-
token_usage=token_usage)
478-
479-
json_str = extract_nested_json(full_text)
480-
ds = orjson.loads(json_str)
456+
full_thinking_text += reasoning_content_chunk
457+
458+
full_text += chunk.content
459+
yield {'content': chunk.content, 'reasoning_content': reasoning_content_chunk}
460+
get_token_usage(chunk, token_usage)
461+
datasource_msg.append(AIMessage(full_text))
462+
463+
self.current_logs[OperationEnum.CHOOSE_DATASOURCE] = end_log(session=self.session,
464+
log=self.current_logs[
465+
OperationEnum.CHOOSE_DATASOURCE],
466+
full_message=[
467+
{'type': msg.type,
468+
'content': msg.content}
469+
for msg in datasource_msg],
470+
reasoning_content=full_thinking_text,
471+
token_usage=token_usage)
472+
473+
json_str = extract_nested_json(full_text)
481474

482475
_error: Exception | None = None
483476
_datasource: int | None = None
484477
_engine_type: str | None = None
485478
try:
486-
data: dict = _ds_list[0] if ignore_auto_select else ds
479+
data: dict = _ds_list[0] if ignore_auto_select else orjson.loads(json_str)
487480

488481
if data.get('id') and data.get('id') != 0:
489482
_datasource = data['id']
@@ -522,7 +515,7 @@ def select_datasource(self):
522515
except Exception as e:
523516
_error = e
524517

525-
if not ignore_auto_select and not settings.EMBEDDING_ENABLED:
518+
if not ignore_auto_select:
526519
self.record = save_select_datasource_answer(session=self.session, record_id=self.record.id,
527520
answer=orjson.dumps({'content': full_text}).decode(),
528521
datasource=_datasource,

0 commit comments

Comments
 (0)