-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsettings.yaml
More file actions
213 lines (177 loc) · 6.16 KB
/
settings.yaml
File metadata and controls
213 lines (177 loc) · 6.16 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
# /home/wangxinxin/my_graphrag/settings.yaml
### This config file contains required core defaults that must be set, along with a handful of common optional settings.
### For a full list of available settings, see https://microsoft.github.io/graphrag/config/yaml/
### LLM settings ###
## There are a number of settings to tune the threading and token limits for LLM calls - check the docs.
models:
default_chat_model:
type: chat
model_provider: openai
auth_type: api_key
api_key: "sk-dummy"
model: sensenova
# Points to Bridge Server (LLM)
api_base: http://localhost:8900/v1
# Critical: Set to false to allow GraphRAG to accept plain text returned by the Bridge Server
model_supports_json: false
# Recommended to keep concurrency low for local models
concurrent_requests: 4
async_mode: threaded
retry_strategy: exponential_backoff
max_retries: 3
# Must be null
tokens_per_minute: null
requests_per_minute: null
default_embedding_model:
type: embedding
model_provider: openai
auth_type: api_key
api_key: "sk-dummy"
model: bge-m3
# 2. Points to fix_tei_proxy
api_base: http://localhost:8102/v1
concurrent_requests: 20
async_mode: threaded
retry_strategy: exponential_backoff
max_retries: 3
# Must be null
tokens_per_minute: null
requests_per_minute: null
### Input settings ###
input:
storage:
type: file # or blob
base_dir: "input"
file_type: text # [csv, text, json]
chunks:
# BGE-M3 supports 8k; 1200 is a good balance point
size: 1200
overlap: 100
group_by_columns: [id]
### Output/storage settings ###
## If blob storage is specified in the following four sections,
## connection_string and container_name must be provided
output:
type: file # [file, blob, cosmosdb]
base_dir: "output"
cache:
type: file # [file, blob, cosmosdb]
base_dir: "cache"
reporting:
type: file # [file, blob]
base_dir: "logs"
vector_store:
default_vector_store:
type: lancedb
db_uri: output/lancedb
container_name: default
### Workflow settings ###
embed_text:
model_id: default_embedding_model
vector_store_id: default_vector_store
extract_graph:
model_id: default_chat_model
prompt: "prompts/extract_graph.txt"
entity_types: [organization,person,geo,event]
max_gleanings: 1
summarize_descriptions:
model_id: default_chat_model
prompt: "prompts/summarize_descriptions.txt"
max_length: 500
extract_graph_nlp:
text_analyzer:
extractor_type: regex_english # [regex_english, syntactic_parser, cfg]
async_mode: threaded # or asyncio
cluster_graph:
max_cluster_size: 10
extract_claims:
enabled: false
model_id: default_chat_model
prompt: "prompts/extract_claims.txt"
description: "Any claims or facts that could be relevant to information discovery."
max_gleanings: 1
community_reports:
model_id: default_chat_model
graph_prompt: "prompts/community_report_graph.txt"
text_prompt: "prompts/community_report_text.txt"
max_length: 2000
# Leverage the 64K window to significantly increase input length and generate more comprehensive reports
max_input_length: 32000
embed_graph:
enabled: false # if true, will generate node2vec embeddings for nodes
umap:
enabled: false # if true, will generate UMAP embeddings for nodes (embed_graph must also be enabled)
snapshots:
graphml: false
embeddings: false
### Query settings ###
## The prompt locations are required here, but each search method has a number of optional knobs that can be tuned.
## See the config docs: https://microsoft.github.io/graphrag/config/yaml/#query
local_search:
chat_model_id: default_chat_model
embedding_model_id: default_embedding_model
prompt: "prompts/local_search_system_prompt.txt"
# =========== Optimization for 64K models ===========
# Allow using 48k context (approx. 75% of the window)
max_context_tokens: 48000
# Reserve 2000 tokens for LLM answer generation
llm_max_tokens: 2000
# ===================================================
# 1. Increase recall quantity (default is 10, changed to 50)
top_k_mapped_entities: 50
# 2. Lower similarity threshold (default might be 0.2 or higher; BGE-M3 may require lower)
# Force set to 0, meaning "pull in anything even slightly relevant" and let the LLM decide
min_score: 0.0
# 3. Increase context window fill ratio (to include more text in the prompt)
local_search_text_unit_prop: 0.8
global_search:
chat_model_id: default_chat_model
map_prompt: "prompts/global_search_map_system_prompt.txt"
reduce_prompt: "prompts/global_search_reduce_system_prompt.txt"
knowledge_prompt: "prompts/global_search_knowledge_system_prompt.txt"
# =========== Optimization for 64K models ===========
max_context_tokens: 48000
llm_max_tokens: 2000
# Allow processing more data
data_max_tokens: 48000
map_max_length: 1000
reduce_max_length: 2000
dynamic_search_threshold: 1
dynamic_search_keep_parent: False
dynamic_search_num_repeats: 1
dynamic_search_use_summary: False
dynamic_search_max_level: 2
# ===================================================
drift_search:
chat_model_id: default_chat_model
embedding_model_id: default_embedding_model
prompt: "prompts/drift_search_system_prompt.txt"
reduce_prompt: "prompts/drift_search_reduce_prompt.txt"
# =========== Optimization for 64K models ===========
data_max_tokens: 48000
# ===================================================
reduce_max_tokens: 2000
reduce_temperature: 0
reduce_max_completion_tokens: null
concurrency: 32
drift_k_followups: 20
primer_folds: 5
primer_llm_max_tokens: 12000
n_depth: 3
local_search_text_unit_prop: 0.9
local_search_community_prop: 0.1
local_search_top_k_mapped_entities: 10
local_search_top_k_relationships: 10
local_search_max_data_tokens: 12000
local_search_temperature: 0
local_search_top_p: 1
local_search_n: 1
local_search_llm_max_gen_tokens: null
local_search_llm_max_gen_completion_tokens: null
basic_search:
chat_model_id: default_chat_model
embedding_model_id: default_embedding_model
prompt: "prompts/basic_search_system_prompt.txt"
k: 10
# =========== Optimization for 64K models ===========
max_context_tokens: 48000