Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/gh-pages.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ jobs:

- name: Install dependencies
shell: bash
run: uv sync --extra dev
run: uv sync

- name: mkdocs build
shell: bash
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/python-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ jobs:
- name: Install dependencies
shell: bash
run: |
uv sync --extra dev
uv sync
uv pip install gensim

- name: Check
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/python-integration-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ jobs:
- name: Install dependencies
shell: bash
run: |
uv sync --extra dev
uv sync
uv pip install gensim

- name: Build
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/python-notebook-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ jobs:
- name: Install dependencies
shell: bash
run: |
uv sync --extra dev
uv sync
uv pip install gensim

- name: Notebook Test
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/python-smoke-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ jobs:
- name: Install dependencies
shell: bash
run: |
uv sync --extra dev
uv sync
uv pip install gensim

- name: Build
Expand Down
4 changes: 4 additions & 0 deletions .semversioner/next-release/patch-20251006231459594285.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"type": "patch",
"description": "Housekeeping toward 2.7."
}
6 changes: 1 addition & 5 deletions DEVELOPING.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,8 @@

## Install Dependencies
```shell
# (optional) create virtual environment
uv venv --python 3.10
source .venv/bin/activate

# install python dependencies
uv sync --extra dev
uv sync
```

## Execute the indexing engine
Expand Down
6 changes: 1 addition & 5 deletions docs/developing.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,8 @@
## Install Dependencies

```sh
# (optional) create virtual environment
uv venv --python 3.10
source .venv/bin/activate

# install python dependencies
uv sync --extra dev
uv sync
```

## Execute the Indexing Engine
Expand Down
2 changes: 2 additions & 0 deletions docs/examples_notebooks/api_overview.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,8 @@
"metadata": {},
"outputs": [],
"source": [
"# note that we expect this to fail on the deployed docs because the PROJECT_DIRECTORY is not set to a real location.\n",
"# if you run this notebook locally, make sure to point at a location containing your settings.yaml\n",
"graphrag_config = load_config(Path(PROJECT_DIRECTORY))"
]
},
Expand Down
31 changes: 18 additions & 13 deletions docs/examples_notebooks/custom_vector_store.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@
"import numpy as np\n",
"import yaml\n",
"\n",
"from graphrag.config.models.vector_store_schema_config import VectorStoreSchemaConfig\n",
"from graphrag.data_model.types import TextEmbedder\n",
"\n",
"# GraphRAG vector store components\n",
Expand Down Expand Up @@ -147,14 +148,12 @@
" self.vectors: dict[str, np.ndarray] = {}\n",
" self.connected = False\n",
"\n",
" print(\n",
" f\"🚀 SimpleInMemoryVectorStore initialized for collection: {self.collection_name}\"\n",
" )\n",
" print(f\"🚀 SimpleInMemoryVectorStore initialized for index: {self.index_name}\")\n",
"\n",
" def connect(self, **kwargs: Any) -> None:\n",
" \"\"\"Connect to the vector storage (no-op for in-memory store).\"\"\"\n",
" self.connected = True\n",
" print(f\"✅ Connected to in-memory vector store: {self.collection_name}\")\n",
" print(f\"✅ Connected to in-memory vector store: {self.index_name}\")\n",
"\n",
" def load_documents(\n",
" self, documents: list[VectorStoreDocument], overwrite: bool = True\n",
Expand Down Expand Up @@ -250,7 +249,7 @@
" def get_stats(self) -> dict[str, Any]:\n",
" \"\"\"Get statistics about the vector store (custom method).\"\"\"\n",
" return {\n",
" \"collection_name\": self.collection_name,\n",
" \"index_name\": self.index_name,\n",
" \"document_count\": len(self.documents),\n",
" \"vector_count\": len(self.vectors),\n",
" \"connected\": self.connected,\n",
Expand Down Expand Up @@ -353,11 +352,11 @@
"outputs": [],
"source": [
"# Test creating vector store using the factory\n",
"vector_store_config = {\"collection_name\": \"test_collection\"}\n",
"schema = VectorStoreSchemaConfig(index_name=\"test_collection\")\n",
"\n",
"# Create vector store instance using factory\n",
"vector_store = VectorStoreFactory.create_vector_store(\n",
" CUSTOM_VECTOR_STORE_TYPE, vector_store_config\n",
" CUSTOM_VECTOR_STORE_TYPE, vector_store_schema_config=schema\n",
")\n",
"\n",
"print(f\"✅ Created vector store instance: {type(vector_store).__name__}\")\n",
Expand Down Expand Up @@ -486,9 +485,13 @@
" print(\"🚀 Simulating GraphRAG pipeline with custom vector store...\\n\")\n",
"\n",
" # 1. GraphRAG creates vector store using factory\n",
" config = {\"collection_name\": \"graphrag_entities\", \"similarity_threshold\": 0.3}\n",
" schema = VectorStoreSchemaConfig(index_name=\"graphrag_entities\")\n",
"\n",
" store = VectorStoreFactory.create_vector_store(CUSTOM_VECTOR_STORE_TYPE, config)\n",
" store = VectorStoreFactory.create_vector_store(\n",
" CUSTOM_VECTOR_STORE_TYPE,\n",
" vector_store_schema_config=schema,\n",
" similarity_threshold=0.3,\n",
" )\n",
" store.connect()\n",
"\n",
" print(\"✅ Step 1: Vector store created and connected\")\n",
Expand Down Expand Up @@ -549,7 +552,8 @@
" # Test 1: Basic functionality\n",
" print(\"Test 1: Basic functionality\")\n",
" store = VectorStoreFactory.create_vector_store(\n",
" CUSTOM_VECTOR_STORE_TYPE, {\"collection_name\": \"test\"}\n",
" CUSTOM_VECTOR_STORE_TYPE,\n",
" vector_store_schema_config=VectorStoreSchemaConfig(index_name=\"test\"),\n",
" )\n",
" store.connect()\n",
"\n",
Expand Down Expand Up @@ -597,7 +601,8 @@
" # Test 5: Error handling\n",
" print(\"\\nTest 5: Error handling\")\n",
" disconnected_store = VectorStoreFactory.create_vector_store(\n",
" CUSTOM_VECTOR_STORE_TYPE, {\"collection_name\": \"test2\"}\n",
" CUSTOM_VECTOR_STORE_TYPE,\n",
" vector_store_schema_config=VectorStoreSchemaConfig(index_name=\"test2\"),\n",
" )\n",
"\n",
" try:\n",
Expand Down Expand Up @@ -653,7 +658,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "graphrag-venv (3.10.18)",
"display_name": "graphrag",
"language": "python",
"name": "python3"
},
Expand All @@ -667,7 +672,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.18"
"version": "3.12.10"
}
},
"nbformat": 4,
Expand Down
37 changes: 21 additions & 16 deletions docs/examples_notebooks/drift_search.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,11 @@
"from pathlib import Path\n",
"\n",
"import pandas as pd\n",
"import tiktoken\n",
"\n",
"from graphrag.config.enums import ModelType\n",
"from graphrag.config.models.drift_search_config import DRIFTSearchConfig\n",
"from graphrag.config.models.language_model_config import LanguageModelConfig\n",
"from graphrag.config.models.vector_store_schema_config import VectorStoreSchemaConfig\n",
"from graphrag.language_model.manager import ModelManager\n",
"from graphrag.query.indexer_adapters import (\n",
" read_indexer_entities,\n",
Expand All @@ -37,6 +37,7 @@
" DRIFTSearchContextBuilder,\n",
")\n",
"from graphrag.query.structured_search.drift_search.search import DRIFTSearch\n",
"from graphrag.tokenizer.get_tokenizer import get_tokenizer\n",
"from graphrag.vector_stores.lancedb import LanceDBVectorStore\n",
"\n",
"INPUT_DIR = \"./inputs/operation dulce\"\n",
Expand All @@ -62,12 +63,16 @@
"# load description embeddings to an in-memory lancedb vectorstore\n",
"# to connect to a remote db, specify url and port values.\n",
"description_embedding_store = LanceDBVectorStore(\n",
" collection_name=\"default-entity-description\",\n",
" vector_store_schema_config=VectorStoreSchemaConfig(\n",
" index_name=\"default-entity-description\"\n",
" ),\n",
")\n",
"description_embedding_store.connect(db_uri=LANCEDB_URI)\n",
"\n",
"full_content_embedding_store = LanceDBVectorStore(\n",
" collection_name=\"default-community-full_content\",\n",
" vector_store_schema_config=VectorStoreSchemaConfig(\n",
" index_name=\"default-community-full_content\"\n",
" )\n",
")\n",
"full_content_embedding_store.connect(db_uri=LANCEDB_URI)\n",
"\n",
Expand All @@ -94,33 +99,33 @@
"outputs": [],
"source": [
"api_key = os.environ[\"GRAPHRAG_API_KEY\"]\n",
"llm_model = os.environ[\"GRAPHRAG_LLM_MODEL\"]\n",
"embedding_model = os.environ[\"GRAPHRAG_EMBEDDING_MODEL\"]\n",
"\n",
"chat_config = LanguageModelConfig(\n",
" api_key=api_key,\n",
" type=ModelType.OpenAIChat,\n",
" model=llm_model,\n",
" type=ModelType.Chat,\n",
" model_provider=\"openai\",\n",
" model=\"gpt-4.1\",\n",
" max_retries=20,\n",
")\n",
"chat_model = ModelManager().get_or_create_chat_model(\n",
" name=\"local_search\",\n",
" model_type=ModelType.OpenAIChat,\n",
" model_type=ModelType.Chat,\n",
" config=chat_config,\n",
")\n",
"\n",
"token_encoder = tiktoken.encoding_for_model(llm_model)\n",
"tokenizer = get_tokenizer(chat_config)\n",
"\n",
"embedding_config = LanguageModelConfig(\n",
" api_key=api_key,\n",
" type=ModelType.OpenAIEmbedding,\n",
" model=embedding_model,\n",
" type=ModelType.Embedding,\n",
" model_provider=\"openai\",\n",
" model=\"text-embedding-3-small\",\n",
" max_retries=20,\n",
")\n",
"\n",
"text_embedder = ModelManager().get_or_create_embedding_model(\n",
" name=\"local_search_embedding\",\n",
" model_type=ModelType.OpenAIEmbedding,\n",
" model_type=ModelType.Embedding,\n",
" config=embedding_config,\n",
")"
]
Expand Down Expand Up @@ -173,12 +178,12 @@
" reports=reports,\n",
" entity_text_embeddings=description_embedding_store,\n",
" text_units=text_units,\n",
" token_encoder=token_encoder,\n",
" tokenizer=tokenizer,\n",
" config=drift_params,\n",
")\n",
"\n",
"search = DRIFTSearch(\n",
" model=chat_model, context_builder=context_builder, token_encoder=token_encoder\n",
" model=chat_model, context_builder=context_builder, tokenizer=tokenizer\n",
")"
]
},
Expand Down Expand Up @@ -212,7 +217,7 @@
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"display_name": "graphrag",
"language": "python",
"name": "python3"
},
Expand All @@ -226,7 +231,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.9"
"version": "3.12.10"
}
},
"nbformat": 4,
Expand Down
22 changes: 11 additions & 11 deletions docs/examples_notebooks/global_search.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
"import os\n",
"\n",
"import pandas as pd\n",
"import tiktoken\n",
"\n",
"from graphrag.config.enums import ModelType\n",
"from graphrag.config.models.language_model_config import LanguageModelConfig\n",
Expand All @@ -32,7 +31,8 @@
"from graphrag.query.structured_search.global_search.community_context import (\n",
" GlobalCommunityContext,\n",
")\n",
"from graphrag.query.structured_search.global_search.search import GlobalSearch"
"from graphrag.query.structured_search.global_search.search import GlobalSearch\n",
"from graphrag.tokenizer.get_tokenizer import get_tokenizer"
]
},
{
Expand All @@ -58,21 +58,21 @@
"outputs": [],
"source": [
"api_key = os.environ[\"GRAPHRAG_API_KEY\"]\n",
"llm_model = os.environ[\"GRAPHRAG_LLM_MODEL\"]\n",
"\n",
"config = LanguageModelConfig(\n",
" api_key=api_key,\n",
" type=ModelType.OpenAIChat,\n",
" model=llm_model,\n",
" type=ModelType.Chat,\n",
" model_provider=\"openai\",\n",
" model=\"gpt-4.1\",\n",
" max_retries=20,\n",
")\n",
"model = ModelManager().get_or_create_chat_model(\n",
" name=\"global_search\",\n",
" model_type=ModelType.OpenAIChat,\n",
" model_type=ModelType.Chat,\n",
" config=config,\n",
")\n",
"\n",
"token_encoder = tiktoken.encoding_for_model(llm_model)"
"tokenizer = get_tokenizer(config)"
]
},
{
Expand Down Expand Up @@ -142,7 +142,7 @@
" community_reports=reports,\n",
" communities=communities,\n",
" entities=entities, # default to None if you don't want to use community weights for ranking\n",
" token_encoder=token_encoder,\n",
" tokenizer=tokenizer,\n",
")"
]
},
Expand Down Expand Up @@ -193,7 +193,7 @@
"search_engine = GlobalSearch(\n",
" model=model,\n",
" context_builder=context_builder,\n",
" token_encoder=token_encoder,\n",
" tokenizer=tokenizer,\n",
" max_data_tokens=12_000, # change this based on the token limit you have on your model (if you are using a model with 8k limit, a good setting could be 5000)\n",
" map_llm_params=map_llm_params,\n",
" reduce_llm_params=reduce_llm_params,\n",
Expand Down Expand Up @@ -241,7 +241,7 @@
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"display_name": "graphrag",
"language": "python",
"name": "python3"
},
Expand All @@ -255,7 +255,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.9"
"version": "3.12.10"
}
},
"nbformat": 4,
Expand Down
Loading
Loading