Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .semversioner/next-release/minor-20250916182815141332.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"type": "minor",
"description": "Add config for NLP async mode."
}
1 change: 1 addition & 0 deletions graphrag/config/defaults.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,7 @@ class ExtractGraphNLPDefaults:
normalize_edge_weights: bool = True
text_analyzer: TextAnalyzerDefaults = field(default_factory=TextAnalyzerDefaults)
concurrent_requests: int = 25
async_mode: AsyncType = AsyncType.Threaded


@dataclass
Expand Down
1 change: 1 addition & 0 deletions graphrag/config/init_content.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,7 @@
extract_graph_nlp:
text_analyzer:
extractor_type: {graphrag_config_defaults.extract_graph_nlp.text_analyzer.extractor_type.value} # [regex_english, syntactic_parser, cfg]
async_mode: {graphrag_config_defaults.extract_graph_nlp.async_mode.value} # or asyncio

cluster_graph:
max_cluster_size: {graphrag_config_defaults.cluster_graph.max_cluster_size}
Expand Down
6 changes: 5 additions & 1 deletion graphrag/config/models/extract_graph_nlp_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from pydantic import BaseModel, Field

from graphrag.config.defaults import graphrag_config_defaults
from graphrag.config.enums import NounPhraseExtractorType
from graphrag.config.enums import AsyncType, NounPhraseExtractorType


class TextAnalyzerConfig(BaseModel):
Expand Down Expand Up @@ -68,3 +68,7 @@ class ExtractGraphNLPConfig(BaseModel):
description="The number of threads to use for the extraction process.",
default=graphrag_config_defaults.extract_graph_nlp.concurrent_requests,
)
async_mode: AsyncType = Field(
description="The async mode to use.",
default=graphrag_config_defaults.extract_graph_nlp.async_mode,
)
10 changes: 8 additions & 2 deletions graphrag/index/operations/build_noun_graph/build_noun_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,17 @@ async def build_noun_graph(
text_analyzer: BaseNounPhraseExtractor,
normalize_edge_weights: bool,
num_threads: int = 4,
async_mode: AsyncType = AsyncType.Threaded,
cache: PipelineCache | None = None,
) -> tuple[pd.DataFrame, pd.DataFrame]:
"""Build a noun graph from text units."""
text_units = text_unit_df.loc[:, ["id", "text"]]
nodes_df = await _extract_nodes(
text_units, text_analyzer, num_threads=num_threads, cache=cache
text_units,
text_analyzer,
num_threads=num_threads,
async_mode=async_mode,
cache=cache,
)
edges_df = _extract_edges(nodes_df, normalize_edge_weights=normalize_edge_weights)
return (nodes_df, edges_df)
Expand All @@ -39,6 +44,7 @@ async def _extract_nodes(
text_unit_df: pd.DataFrame,
text_analyzer: BaseNounPhraseExtractor,
num_threads: int = 4,
async_mode: AsyncType = AsyncType.Threaded,
cache: PipelineCache | None = None,
) -> pd.DataFrame:
"""
Expand All @@ -64,7 +70,7 @@ async def extract(row):
text_unit_df,
extract,
num_threads=num_threads,
async_type=AsyncType.Threaded,
async_type=async_mode,
progress_msg="extract noun phrases progress: ",
)

Expand Down
1 change: 1 addition & 0 deletions graphrag/index/workflows/extract_graph_nlp.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ async def extract_graph_nlp(
text_analyzer=text_analyzer,
normalize_edge_weights=extraction_config.normalize_edge_weights,
num_threads=extraction_config.concurrent_requests,
async_mode=extraction_config.async_mode,
cache=cache,
)

Expand Down
Loading