GraphRAG-Local-Bridge/settings.yaml at main · winer632/GraphRAG-Local-Bridge · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
# /home/wangxinxin/my_graphrag/settings.yaml

### This config file contains required core defaults that must be set, along with a handful of common optional settings.
### For a full list of available settings, see https://microsoft.github.io/graphrag/config/yaml/

### LLM settings ###
## There are a number of settings to tune the threading and token limits for LLM calls - check the docs.

models:
  default_chat_model:
    type: chat
    model_provider: openai
    auth_type: api_key
    api_key: "sk-dummy"
    model: sensenova
    # Points to Bridge Server (LLM)
    api_base: http://localhost:8900/v1
    # Critical: Set to false to allow GraphRAG to accept plain text returned by the Bridge Server
    model_supports_json: false
    # Recommended to keep concurrency low for local models
    concurrent_requests: 4
    async_mode: threaded
    retry_strategy: exponential_backoff
    max_retries: 3
    # Must be null
    tokens_per_minute: null
    requests_per_minute: null

  default_embedding_model:
    type: embedding
    model_provider: openai
    auth_type: api_key
    api_key: "sk-dummy"
    model: bge-m3
    # 2. Points to fix_tei_proxy
    api_base: http://localhost:8102/v1
    concurrent_requests: 20
    async_mode: threaded
    retry_strategy: exponential_backoff
    max_retries: 3
    # Must be null
    tokens_per_minute: null
    requests_per_minute: null

### Input settings ###

input:
  storage:
    type: file # or blob
    base_dir: "input"
  file_type: text # [csv, text, json]

chunks:
  # BGE-M3 supports 8k; 1200 is a good balance point
  size: 1200
  overlap: 100
  group_by_columns: [id]

### Output/storage settings ###
## If blob storage is specified in the following four sections,
## connection_string and container_name must be provided

output:
  type: file # [file, blob, cosmosdb]
  base_dir: "output"

cache:
  type: file # [file, blob, cosmosdb]
  base_dir: "cache"

reporting:
  type: file # [file, blob]
  base_dir: "logs"

vector_store:
  default_vector_store:
    type: lancedb
    db_uri: output/lancedb
    container_name: default

### Workflow settings ###

embed_text:
  model_id: default_embedding_model
  vector_store_id: default_vector_store

extract_graph:
  model_id: default_chat_model
  prompt: "prompts/extract_graph.txt"
  entity_types: [organization,person,geo,event]
  max_gleanings: 1

summarize_descriptions:
  model_id: default_chat_model
  prompt: "prompts/summarize_descriptions.txt"
  max_length: 500

extract_graph_nlp:
  text_analyzer:
    extractor_type: regex_english # [regex_english, syntactic_parser, cfg]
  async_mode: threaded # or asyncio

cluster_graph:
  max_cluster_size: 10

extract_claims:
  enabled: false
  model_id: default_chat_model
  prompt: "prompts/extract_claims.txt"
  description: "Any claims or facts that could be relevant to information discovery."
  max_gleanings: 1

community_reports:
  model_id: default_chat_model
  graph_prompt: "prompts/community_report_graph.txt"
  text_prompt: "prompts/community_report_text.txt"
  max_length: 2000
  # Leverage the 64K window to significantly increase input length and generate more comprehensive reports
  max_input_length: 32000

embed_graph:
  enabled: false # if true, will generate node2vec embeddings for nodes

umap:
  enabled: false # if true, will generate UMAP embeddings for nodes (embed_graph must also be enabled)

snapshots:
  graphml: false
  embeddings: false

### Query settings ###
## The prompt locations are required here, but each search method has a number of optional knobs that can be tuned.
## See the config docs: https://microsoft.github.io/graphrag/config/yaml/#query

local_search:
  chat_model_id: default_chat_model
  embedding_model_id: default_embedding_model
  prompt: "prompts/local_search_system_prompt.txt"

  # =========== Optimization for 64K models ===========
  # Allow using 48k context (approx. 75% of the window)
  max_context_tokens: 48000
  # Reserve 2000 tokens for LLM answer generation
  llm_max_tokens: 2000
  # ===================================================

  # 1. Increase recall quantity (default is 10, changed to 50)
  top_k_mapped_entities: 50

  # 2. Lower similarity threshold (default might be 0.2 or higher; BGE-M3 may require lower)
  # Force set to 0, meaning "pull in anything even slightly relevant" and let the LLM decide
  min_score: 0.0

  # 3. Increase context window fill ratio (to include more text in the prompt)
  local_search_text_unit_prop: 0.8


global_search:
  chat_model_id: default_chat_model
  map_prompt: "prompts/global_search_map_system_prompt.txt"
  reduce_prompt: "prompts/global_search_reduce_system_prompt.txt"
  knowledge_prompt: "prompts/global_search_knowledge_system_prompt.txt"

  # =========== Optimization for 64K models ===========
  max_context_tokens: 48000
  llm_max_tokens: 2000
  # Allow processing more data
  data_max_tokens: 48000
  map_max_length: 1000
  reduce_max_length: 2000
  dynamic_search_threshold: 1
  dynamic_search_keep_parent: False
  dynamic_search_num_repeats: 1
  dynamic_search_use_summary: False
  dynamic_search_max_level: 2
  # ===================================================

drift_search:
  chat_model_id: default_chat_model
  embedding_model_id: default_embedding_model
  prompt: "prompts/drift_search_system_prompt.txt"
  reduce_prompt: "prompts/drift_search_reduce_prompt.txt"

  # =========== Optimization for 64K models ===========
  data_max_tokens: 48000
  # ===================================================

  reduce_max_tokens: 2000
  reduce_temperature: 0
  reduce_max_completion_tokens: null
  concurrency: 32
  drift_k_followups: 20
  primer_folds: 5
  primer_llm_max_tokens: 12000
  n_depth: 3
  local_search_text_unit_prop: 0.9
  local_search_community_prop: 0.1
  local_search_top_k_mapped_entities: 10
  local_search_top_k_relationships: 10
  local_search_max_data_tokens: 12000
  local_search_temperature: 0
  local_search_top_p: 1
  local_search_n: 1
  local_search_llm_max_gen_tokens: null
  local_search_llm_max_gen_completion_tokens: null

basic_search:
  chat_model_id: default_chat_model
  embedding_model_id: default_embedding_model
  prompt: "prompts/basic_search_system_prompt.txt"
  k: 10
  # =========== Optimization for 64K models ===========
  max_context_tokens: 48000