Skip to content

Commit ae6c864

Browse files
committed
Merge 'feature/add_knowledge_v2' into 'main'
增加新版知识库SDK See merge request: !26
2 parents 859fea2 + 0d3ea3d commit ae6c864

16 files changed

Lines changed: 1661 additions & 0 deletions

File tree

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
import os
2+
from vikingdb.knowledge import VikingKnowledge
3+
from vikingdb.auth import IAM
4+
5+
6+
def init_client():
7+
ak = os.getenv("VOLC_AK")
8+
sk = os.getenv("VOLC_SK")
9+
client = VikingKnowledge(auth=IAM(ak=ak, sk=sk))
10+
return client
11+
12+
13+
def init_collection(client: VikingKnowledge):
14+
resource_id = os.getenv("VIKING_COLLECTION_RID")
15+
collection_name = os.getenv("VIKING_COLLECTION_NAME") or "financial_reports"
16+
project_name = os.getenv("VIKING_PROJECT") or "default"
17+
return client.collection(resource_id=resource_id, collection_name=collection_name, project_name=project_name)
18+
19+
20+
if __name__ == "__main__":
21+
client = init_client()
22+
collection = init_collection(client)
23+
print("client:", type(client).__name__)
24+
print("collection:", type(collection).__name__)

examples/knowledge/02_doc_crud.py

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
import os
2+
import json
3+
import time
4+
from typing import List
5+
from vikingdb.knowledge import VikingKnowledge
6+
from vikingdb.auth import IAM
7+
from vikingdb.knowledge import AddDocV2Request, ListDocsRequest, MetaItem, DedupOptions
8+
9+
10+
def init_client():
11+
ak = os.getenv("VOLC_AK") or ""
12+
sk = os.getenv("VOLC_SK") or ""
13+
client = VikingKnowledge(auth=IAM(ak=ak, sk=sk))
14+
return client
15+
16+
17+
def init_collection(client: VikingKnowledge):
18+
resource_id = os.getenv("VIKING_COLLECTION_RID")
19+
collection_name = os.getenv("VIKING_COLLECTION_NAME") or "financial_reports"
20+
project_name = os.getenv("VIKING_PROJECT") or "default"
21+
return client.collection(resource_id=resource_id, collection_name=collection_name, project_name=project_name)
22+
23+
24+
def to_json_env(name: str):
25+
val = os.getenv(name)
26+
if not val:
27+
return None
28+
try:
29+
return json.loads(val)
30+
except Exception:
31+
return None
32+
33+
def add_doc_by_url(kc, *, doc_id: str, doc_name: str, doc_type: str, url: str, tag_list: List[MetaItem]):
34+
req = AddDocV2Request(
35+
doc_id=doc_id,
36+
doc_name=doc_name,
37+
doc_type=doc_type,
38+
uri=url,
39+
tag_list=tag_list,
40+
)
41+
res = kc.add_doc_v2(req)
42+
print("add_doc:", res)
43+
return res
44+
45+
def add_doc_by_tos(kc, *, doc_id: str, doc_name: str, doc_type: str, tos_path: str, tag_list: List[MetaItem]):
46+
req = AddDocV2Request(
47+
doc_id=doc_id,
48+
doc_name=doc_name,
49+
doc_type=doc_type,
50+
uri=tos_path,
51+
tag_list=tag_list,
52+
)
53+
res = kc.add_doc_v2(req)
54+
print("add_doc:", res)
55+
return res
56+
57+
def run_doc_crud():
58+
client = init_client()
59+
kc = init_collection(client)
60+
doc_id = "google-report-2025-q1"
61+
doc_name = "Google 2025 Q1 Financial Report"
62+
doc_type = "pdf"
63+
url = "https://pdf.dfcfw.com/pdf/H3_AP202504281663850212_1.pdf"
64+
meta = [
65+
MetaItem(field_name="category", field_type="string", field_value="financial_report"),
66+
MetaItem(field_name="quarter", field_type="string", field_value="Q1"),
67+
MetaItem(field_name="year", field_type="int64", field_value=2025),
68+
]
69+
70+
add_doc_by_url(
71+
kc,
72+
doc_id=doc_id,
73+
doc_name=doc_name,
74+
doc_type=doc_type,
75+
url=url,
76+
tag_list=meta,
77+
)
78+
info = kc.get_doc(doc_id, return_token_usage=True)
79+
print("get_doc:", info.model_dump(by_alias=True))
80+
81+
meta.append(MetaItem(field_name="updated_at", field_type="int64", field_value=1714560000))
82+
upd_res = kc.update_doc_meta(doc_id, meta)
83+
print("update_doc_meta:", upd_res)
84+
85+
time.sleep(30)
86+
87+
new_name = doc_name + "-updated"
88+
upd_doc_res = kc.update_doc(doc_id, new_name)
89+
print("update_doc:", upd_doc_res)
90+
91+
list_req = ListDocsRequest(offset=0, limit=10, return_token_usage=True)
92+
list_res = kc.list_docs(list_req)
93+
print("list_docs:", list_res.result.model_dump(by_alias=True))
94+
95+
#del_res = kc.delete_doc(doc_id)
96+
#print("delete_doc:", del_res)
97+
98+
99+
if __name__ == "__main__":
100+
run_doc_crud()
Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
import os
2+
import json
3+
from vikingdb.knowledge import VikingKnowledge
4+
from vikingdb.auth import IAM
5+
from vikingdb.knowledge import (
6+
AddPointRequest,
7+
UpdatePointRequest,
8+
ListPointsRequest,
9+
DeletePointRequest,
10+
)
11+
12+
13+
def init_client():
14+
ak = os.getenv("VOLC_AK") or ""
15+
sk = os.getenv("VOLC_SK") or ""
16+
client = VikingKnowledge(auth=IAM(ak=ak, sk=sk))
17+
return client
18+
19+
20+
def init_collection(client: VikingKnowledge):
21+
resource_id = os.getenv("VIKING_COLLECTION_RID")
22+
collection_name = os.getenv("VIKING_COLLECTION_NAME") or "financial_reports"
23+
project_name = os.getenv("VIKING_PROJECT") or "default"
24+
return client.collection(resource_id=resource_id, collection_name=collection_name, project_name=project_name)
25+
26+
27+
def to_json_env(name: str):
28+
val = os.getenv(name)
29+
if not val:
30+
return None
31+
try:
32+
return json.loads(val)
33+
except Exception:
34+
return None
35+
36+
37+
def run_point_crud():
38+
client = init_client()
39+
kc = init_collection(client)
40+
doc_id = "google-report-2025-q1"
41+
chunk_type = "text"
42+
chunk_title = "Revenue Highlights"
43+
content = "Revenue grew 12% YoY to $3.4B."
44+
question = None
45+
fields = [
46+
{"field_name": "topic", "field_type": "string", "field_value": "revenue"},
47+
{"field_name": "year", "field_type": "int64", "field_value": 2025},
48+
{"field_name": "quarter", "field_type": "string", "field_value": "Q1"},
49+
]
50+
51+
add_req = AddPointRequest(
52+
doc_id=doc_id,
53+
chunk_type=chunk_type,
54+
chunk_title=chunk_title,
55+
content=content,
56+
question=question,
57+
fields=fields,
58+
)
59+
60+
add_res = kc.add_point(add_req)
61+
print("add_point:", add_res)
62+
63+
point_id = add_res.result.point_id
64+
65+
info = kc.get_point(point_id, get_attachment_link=True)
66+
print("get_point:", info.model_dump(by_alias=True))
67+
68+
69+
upd_content = content + " Updated."
70+
upd_title = chunk_title + " (Updated)"
71+
upd_req = UpdatePointRequest(content=upd_content, chunk_title=upd_title)
72+
upd_res = kc.update_point(point_id, upd_req)
73+
print("update_point_content:", upd_res)
74+
75+
76+
upd_fields = [
77+
{"field_name": "topic", "field_type": "string", "field_value": "revenue"},
78+
{"field_name": "revised", "field_type": "bool", "field_value": True},
79+
]
80+
upd_req = UpdatePointRequest(fields=upd_fields)
81+
upd_res = kc.update_point(point_id, upd_req)
82+
print("update_point_fields:", upd_res)
83+
84+
list_req = ListPointsRequest(offset=0, limit=10, get_attachment_link=True)
85+
list_res = kc.list_points(list_req)
86+
print("list_points:", list_res.model_dump(by_alias=True))
87+
88+
del_req = DeletePointRequest(point_id=point_id)
89+
del_res = kc.delete_point(del_req)
90+
print("delete_point:", del_res)
91+
92+
93+
if __name__ == "__main__":
94+
run_point_crud()

examples/knowledge/04_search.py

Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,140 @@
1+
import os
2+
import json
3+
from vikingdb.knowledge import VikingKnowledge, RerankDataItem
4+
from vikingdb.auth import IAM, APIKey
5+
from vikingdb.knowledge.models.search import (
6+
SearchCollectionRequest,
7+
SearchKnowledgeRequest,
8+
)
9+
from vikingdb.knowledge.models.chat import ChatMessage, ChatCompletionRequest
10+
from vikingdb.knowledge.models.service_chat import ServiceChatRequest
11+
from vikingdb.knowledge.exceptions import VikingKnowledgeException
12+
13+
14+
def init_client():
15+
ak = os.getenv("VOLC_AK")
16+
sk = os.getenv("VOLC_SK")
17+
client = VikingKnowledge(auth=IAM(ak=ak, sk=sk))
18+
return client
19+
20+
def init_client_by_apikey():
21+
api_key = os.getenv("VIKING_SERVICE_API_KEY")
22+
client = VikingKnowledge(auth=APIKey(api_key=api_key))
23+
return client
24+
25+
26+
def init_collection(client: VikingKnowledge):
27+
resource_id = os.getenv("VIKING_COLLECTION_RID")
28+
collection_name = os.getenv("VIKING_COLLECTION_NAME") or "financial_reports"
29+
project_name = os.getenv("VIKING_PROJECT") or "default"
30+
return client.collection(resource_id=resource_id, collection_name=collection_name, project_name=project_name)
31+
32+
33+
def run_search_collection():
34+
client = init_client()
35+
kc = init_collection(client)
36+
query = "2025 Q1 revenue growth"
37+
sc_req = SearchCollectionRequest(
38+
query=query,
39+
limit=10,
40+
dense_weight=0.5,
41+
rerank_switch=False,
42+
retrieve_count=25,
43+
endpoint_id=None,
44+
rerank_model="Doubao-pro-4k-rerank",
45+
rerank_only_chunk=False,
46+
query_param=None,
47+
)
48+
try:
49+
sc_res = kc.search_collection(sc_req)
50+
print("search_collection:", sc_res.model_dump(by_alias=True))
51+
except VikingKnowledgeException as e:
52+
print("search_collection_error:", e)
53+
54+
55+
def run_search_knowledge():
56+
client = init_client()
57+
kc = init_collection(client)
58+
query = "2025 Q1 revenue growth"
59+
sk_req = SearchKnowledgeRequest(
60+
query=query,
61+
image_query=None,
62+
pre_processing=None,
63+
post_processing=None,
64+
query_param=None,
65+
limit=10,
66+
dense_weight=0.5,
67+
)
68+
try:
69+
sk_res = kc.search_knowledge(sk_req)
70+
print("search_knowledge:", sk_res.model_dump(by_alias=True))
71+
except VikingKnowledgeException as e:
72+
print("search_knowledge_error:", e)
73+
74+
75+
def run_chat_completion():
76+
client = init_client()
77+
msgs = [
78+
ChatMessage(role="system", content="你是一位在线客服,根据<context>中的财报信息回答用户问题"),
79+
ChatMessage(role="user", content=[{"type": "text", "text": "总结下 2025 Q1 收入表现"}]),
80+
]
81+
req = ChatCompletionRequest(
82+
model="Doubao-1-5-pro-32k",
83+
messages=msgs,
84+
thinking=None,
85+
max_tokens=4096,
86+
temperature=0.1,
87+
return_token_usage=True,
88+
api_key=os.getenv("VIKING_CHAT_API_KEY"),
89+
stream=False,
90+
)
91+
try:
92+
res = client.chat_completion(req)
93+
print("chat_completion:", res.model_dump(by_alias=True))
94+
except VikingKnowledgeException as e:
95+
print("chat_completion_error:", e)
96+
97+
98+
def run_service_chat():
99+
client = init_client_by_apikey()
100+
service_rid = os.getenv("VIKING_SERVICE_RID")
101+
msgs = [ChatMessage(role="user", content="列举 2025 Q1 财报里的三项亮点")]
102+
req = ServiceChatRequest(
103+
service_resource_id=service_rid,
104+
messages=msgs,
105+
query_param=None,
106+
stream=False,
107+
)
108+
try:
109+
res = client.service_chat(req, timeout=120)
110+
111+
print("service_chat:", res.model_dump(by_alias=True))
112+
except VikingKnowledgeException as e:
113+
print("service_chat_error:", e)
114+
115+
116+
def run_rerank_ops():
117+
client = init_client()
118+
query = "2025 Q1 revenue growth"
119+
datas = [
120+
RerankDataItem(query=query, content="Revenue grew 12% YoY to $3.4B.", title="Revenue"),
121+
RerankDataItem(query=query, content="Operating margin improved by 1.5pp to 17%.", title="Margin"),
122+
]
123+
try:
124+
res = client.rerank(
125+
datas=datas,
126+
rerank_model="m3-v2-rerank",
127+
rerank_instruction=os.getenv("VIKING_RERANK_INSTRUCTION"),
128+
endpoint_id=None,
129+
)
130+
print("rerank:", res)
131+
except VikingKnowledgeException as e:
132+
print("rerank_error:", e)
133+
134+
135+
if __name__ == "__main__":
136+
run_search_collection()
137+
run_search_knowledge()
138+
run_chat_completion()
139+
run_service_chat()
140+
run_rerank_ops()

0 commit comments

Comments
 (0)