Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
43 commits
Select commit Hold shift + click to select a range
f4cc018
embedding api improvements
ricopinazo Aug 28, 2025
8bf8e8c
disable embeddings by default
ricopinazo Aug 28, 2025
6fa9962
fix compilation error for enable_embeddings
ricopinazo Aug 28, 2025
3da0307
fix compilation error in main.rs
ricopinazo Aug 28, 2025
06640c0
sort milvus integration
ricopinazo Sep 5, 2025
2b5b5f0
lancedb vector storage implementation and rename score to distance
ricopinazo Sep 12, 2025
f906e94
like this almost compiles but not yet
ricopinazo Sep 16, 2025
976ff92
force lancedb to work with other version of chrono but still polars-a…
ricopinazo Sep 17, 2025
8e1975a
this seems to compile with all features but storage
ricopinazo Sep 17, 2025
e942d0e
put pometry-storage back into place
ricopinazo Sep 17, 2025
5eeb703
sort new multi-embedding vector cache
ricopinazo Sep 17, 2025
f637544
still some rust tests failing
ricopinazo Sep 19, 2025
8b999da
remove outdated comment
ricopinazo Sep 19, 2025
98f2555
all rust tests now passing with the new custom openai server
ricopinazo Sep 24, 2025
b1c66c7
some compilation errors caused by teh server future not being Sync
ricopinazo Sep 26, 2025
9952530
fixing some python tests
ricopinazo Sep 29, 2025
d7fe667
wip
ricopinazo Oct 10, 2025
f0af2a9
trying to avoid the drop of the tempdir but still not working
ricopinazo Oct 17, 2025
78e7701
fix compilation error
ricopinazo Oct 17, 2025
3701786
fix bug caused by a temp dir being dropped too soon
ricopinazo Oct 23, 2025
705d588
fix python tests
ricopinazo Oct 24, 2025
ed89cea
Merge branch 'master' into embedding-api
ricopinazo Oct 24, 2025
f4ba9cc
fix dependency conflicts
ricopinazo Oct 24, 2025
857c5ab
format
ricopinazo Oct 24, 2025
656b314
fix rust test
ricopinazo Oct 27, 2025
517c322
change rust version
ricopinazo Oct 27, 2025
df51a17
started implementing context manager for PyEmbeddingServer
ricopinazo Nov 21, 2025
ffa9b50
context manager for embedding server
ricopinazo Nov 27, 2025
67ec661
all graphql vector tests are passing now
ricopinazo Dec 5, 2025
8fca6f8
re-indexing, graphql vectorise, and minor fixes
ricopinazo Dec 17, 2025
4246c4d
big cleanup
ricopinazo Dec 18, 2025
df5fe85
Merge branch 'master' into embedding-api
ricopinazo Dec 18, 2025
8080ea2
update Cargo.lock
ricopinazo Dec 18, 2025
e2aa20e
handle all unwraps in lancedb.rs
ricopinazo Dec 19, 2025
55afcc2
fix cache tests
ricopinazo Dec 19, 2025
1c8dd0b
fix benchmark compilation error
ricopinazo Dec 19, 2025
066aabd
fix python graphql vector tests
ricopinazo Dec 19, 2025
d802890
fix more graphql tests
ricopinazo Dec 19, 2025
7aa1b44
this should fix all the compilation errors on the tests
ricopinazo Dec 19, 2025
d8323bc
make vector cache lazy
ricopinazo Dec 22, 2025
a4e4d09
fix rust doc tests
ricopinazo Dec 22, 2025
c001d68
fix compilation error on a test and expose reindex api
ricopinazo Dec 22, 2025
7b3420c
handle python embedding errors
ricopinazo Dec 22, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3,832 changes: 2,991 additions & 841 deletions Cargo.lock

Large diffs are not rendered by default.

7 changes: 3 additions & 4 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ readme = "README.md"
homepage = "https://github.com/Raphtory/raphtory/"
keywords = ["graph", "temporal-graph", "temporal"]
authors = ["Pometry"]
rust-version = "1.86.0"
rust-version = "1.88.0"
edition = "2021"

# debug symbols are using a lot of resources
Expand Down Expand Up @@ -87,7 +87,7 @@ parking_lot = { version = "0.12.1", features = [
"send_guard",
] }
ordered-float = "4.2.0"
chrono = { version = "0.4.42", features = ["serde"] }
chrono = { version = "0.4.41", features = ["serde"] }
tempfile = "3.10.0"
futures-util = "0.3.30"
thiserror = "2.0.0"
Expand Down Expand Up @@ -153,7 +153,7 @@ pest_derive = "2.7.8"
minijinja = "2.2.0"
minijinja-contrib = { version = "2.2.0", features = ["datetime"] }
datafusion = { version = "50.0.0" }
arroy = "0.6.1"
lancedb = "0.22.2" # this is the latest and asks for chrono 0.4.41
heed = "0.22.0"
sqlparser = "0.58.0"
futures = "0.3"
Expand All @@ -172,4 +172,3 @@ strsim = { version = "0.11.1" }
uuid = { version = "1.16.0", features = ["v4"] }
strum = "0.27.2"
strum_macros = "0.27.2"

26 changes: 26 additions & 0 deletions milvus/docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
version: "3.9"

services:
milvus:
image: milvusdb/milvus:v2.6.1
container_name: milvus
command: ["milvus", "run", "standalone"]
security_opt:
- seccomp:unconfined
environment:
ETCD_USE_EMBED: "true"
COMMON_STORAGETYPE: "local"
DEPLOY_MODE: "STANDALONE"
ports:
- "9091:9091"
- "19530:19530"

attu:
image: zilliz/attu:v2.6
container_name: attu
environment:
MILVUS_URL: "http://milvus:19530"
ports:
- "8000:3000"
depends_on:
- milvus
Original file line number Diff line number Diff line change
@@ -1,16 +1,12 @@
import tempfile
from raphtory.graphql import GraphServer, RaphtoryClient
from raphtory import Graph
from raphtory.vectors import OpenAIEmbeddings, embedding_server


def embedding(texts):
return [[text.count("a"), text.count("b")] for text in texts]


def test_embedding():
result = embedding(texts=["aaa", "b", "ab", "ba"])
assert result == [[3, 0], [0, 1], [1, 1], [1, 1]]

@embedding_server(address="0.0.0.0:7340")
def embeddings(text: str):
return [text.count("a"), text.count("b")]

def setup_graph(g):
g.add_node(1, "aab")
Expand Down Expand Up @@ -58,51 +54,61 @@ def assert_correct_documents(client):
}


def setup_server(work_dir):
server = GraphServer(work_dir)
server = server.set_embeddings(
cache="/tmp/graph-cache",
embedding=embedding,
nodes="{{ name }}",
edges=False,
)
return server


def test_new_graph():
print("test_new_graph")
work_dir = tempfile.TemporaryDirectory()
server = setup_server(work_dir.name)
with server.start():
client = RaphtoryClient("http://localhost:1736")
client.new_graph("abb", "EVENT")
rg = client.remote_graph("abb")
setup_graph(rg)
assert_correct_documents(client)
server = GraphServer(work_dir.name)
with embeddings.start():
with server.start():
client = RaphtoryClient("http://localhost:1736")
client.new_graph("abb", "EVENT")
rg = client.remote_graph("abb")
setup_graph(rg)
client.query("""
{
vectoriseGraph(path: "abb", model: { openAI: { model: "whatever", apiBase: "http://localhost:7340" } }, nodes: { custom: "{{ name }}" }, edges: { enabled: false })
}
""")
assert_correct_documents(client)


def test_upload_graph():
print("test_upload_graph")
work_dir = tempfile.mkdtemp()
temp_dir = tempfile.mkdtemp()
server = setup_server(work_dir)
with server.start():
client = RaphtoryClient("http://localhost:1736")
g = Graph()
setup_graph(g)
g_path = temp_dir + "/abb"
g.save_to_zip(g_path)
client.upload_graph(path="abb", file_path=g_path, overwrite=True)
assert_correct_documents(client)
work_dir = tempfile.TemporaryDirectory()
temp_dir = tempfile.TemporaryDirectory()
server = GraphServer(work_dir.name)
with embeddings.start():
with server.start():
client = RaphtoryClient("http://localhost:1736")
g = Graph()
setup_graph(g)
g_path = temp_dir.name + "/abb"
g.save_to_zip(g_path)
client.upload_graph(path="abb", file_path=g_path, overwrite=True)
client.query("""
{
vectoriseGraph(path: "abb", model: { openAI: { model: "whatever", apiBase: "http://localhost:7340" } }, nodes: { custom: "{{ name }}" }, edges: { enabled: false })
}
""")
assert_correct_documents(client)

GRAPH_NAME = "abb"

def test_include_graph():
work_dir = tempfile.mkdtemp()
g_path = work_dir + "/abb"
work_dir = tempfile.TemporaryDirectory()
g_path = work_dir.name + "/" + GRAPH_NAME
g = Graph()
setup_graph(g)
g.save_to_file(g_path)
server = setup_server(work_dir)
with server.start():
client = RaphtoryClient("http://localhost:1736")
assert_correct_documents(client)
server = GraphServer(work_dir.name)
with embeddings.start():
embedding_client = OpenAIEmbeddings(api_base="http://localhost:7340")
server.vectorise_graph(
name=GRAPH_NAME,
embeddings=embedding_client,
nodes="{{ name }}",
edges=False
)
with server.start():
client = RaphtoryClient("http://localhost:1736")
assert_correct_documents(client)
82 changes: 56 additions & 26 deletions python/tests/test_base_install/test_vectors.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
import pytest
import requests
from raphtory import Graph
from raphtory.vectors import VectorisedGraph
from raphtory.vectors import VectorisedGraph, OpenAIEmbeddings, embedding_server

embedding_map = {
"raphtory": [1.0, 0.0, 0.0], # this is now needed,
"node1": [1.0, 0.0, 0.0],
"node2": [0.0, 1.0, 0.0],
"node3": [0.0, 0.0, 1.0],
Expand All @@ -11,20 +14,32 @@
"edge3": [0.0, 1.0, 1.0],
}


def single_embedding(text: str):
try:
@pytest.fixture(autouse=True)
def test_server():
@embedding_server(address="0.0.0.0:7340") # TODO: ask only for PORT!!!
def custom_embeddings(text: str):
return embedding_map[text]
except:
raise Exception(f"unexpected document content: {text}")

with custom_embeddings.start():
yield

def test_failing_python_embeddings():
@embedding_server(address="0.0.0.0:7342")
def failing_embeddings(text: str):
assert(False)

with failing_embeddings.start():
headers = { "Content-Type": "application/json" }
payload = { "model": "whatever", "input": ["Hello world"] }
response = requests.post("http://localhost:7342/embeddings", headers=headers, json=payload)
assert(response.status_code == 500)
response = requests.post("http://localhost:7342/embeddings", headers=headers, json=payload)
assert(response.status_code == 500)

def embedding(texts):
return [single_embedding(text) for text in texts]


def floats_are_equals(float1: float, float2: float) -> bool:
return float1 + 0.001 > float2 and float1 - 0.001 < float2
return float1 + 0.00001 > float2 and float1 - 0.01 < float2


# the graph generated by this function looks like this:
Expand All @@ -48,26 +63,34 @@ def create_graph() -> VectorisedGraph:
g.add_edge(3, "node1", "node3", {"name": "edge2"})
g.add_edge(4, "node3", "node4", {"name": "edge3"})

vg = g.vectorise(embedding, nodes="{{ name }}", edges="{{ properties.name }}")
embeddings = OpenAIEmbeddings(api_base="http://localhost:7340")
vg = g.vectorise(embeddings, nodes="{{ name }}", edges="{{ properties.name }}")

return vg

def test_embedding_sever_context_manager():
@embedding_server(address="0.0.0.0:7341")
def constant(text: str):
return [1.0]

with constant.start():
headers = { "Content-Type": "application/json" }
data = {
# "model": "whatever",
"input": ["The text to vectorise"]
}
response = requests.post("http://localhost:7341/embeddings", headers=headers, json=data)
response.raise_for_status()
result = response.json()
vector = result['data'][0]['embedding']
assert vector == [1.0]


def test_selection():
vg = create_graph()

################################
selection = vg.empty_selection()
nodes_to_select = ["node1", "node2"]
edges_to_select = [("node1", "node2"), ("node1", "node3")]
selection = vg.empty_selection()
selection.add_nodes(nodes_to_select)
selection.add_edges(edges_to_select)
nodes = selection.nodes()
###########################

assert len(vg.empty_selection().get_documents()) == 0
assert len(vg.empty_selection().get_documents_with_scores()) == 0
assert len(vg.empty_selection().get_documents_with_distances()) == 0

nodes_to_select = ["node1", "node2"]
edges_to_select = [("node1", "node2"), ("node1", "node3")]
Expand All @@ -77,7 +100,9 @@ def test_selection():
nodes = selection.nodes()
node_names_returned = [node.name for node in nodes]
assert node_names_returned == nodes_to_select
print("before get documents")
docs = [doc.content for doc in selection.get_documents()]
print("after get documents")
assert docs == ["node1", "node2"]

selection = vg.empty_selection()
Expand Down Expand Up @@ -113,8 +138,8 @@ def test_search():
assert edge_names_returned == [("node1", "node2")]
# TODO: same for edges ?

[(doc1, score1)] = vg.entities_by_similarity("node1", 1).get_documents_with_scores()
assert floats_are_equals(score1, 1.0)
[(doc1, distance1)] = vg.entities_by_similarity("node1", 1).get_documents_with_distances()
assert floats_are_equals(distance1, 0.0)
assert (doc1.entity.name, doc1.content) == ("node1", "node1")

# chained search
Expand Down Expand Up @@ -205,16 +230,19 @@ def test_filtering_by_entity_type():
assert contents == ["edge1", "edge2", "edge3"]


def constant_embedding(texts):
return [[1.0, 0.0, 0.0] for text in texts]

@embedding_server(address="0.0.0.0:7341")
def constant_embedding(_text):
return [1.0, 0.0, 0.0]

def test_default_template():
g = Graph()
g.add_node(1, "node1")
g.add_edge(2, "node1", "node1")

vg = g.vectorise(constant_embedding)
running = constant_embedding.start()

vg = g.vectorise(OpenAIEmbeddings(api_base="http://localhost:7341"))

node_docs = vg.nodes_by_similarity(query="whatever", limit=10).get_documents()
assert len(node_docs) == 1
Expand All @@ -226,3 +254,5 @@ def test_default_template():
edge_docs[0].content
== "There is an edge from node1 to node1 with events at:\n- Jan 1 1970 00:00\n"
)

running.stop()
19 changes: 15 additions & 4 deletions raphtory-benchmark/src/common/vectors.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,9 @@ use rand::{rngs::StdRng, Rng, SeedableRng};
use raphtory::{
prelude::{AdditionOps, Graph, NO_PROPS},
vectors::{
cache::VectorCache, embeddings::EmbeddingResult, template::DocumentTemplate,
vectorisable::Vectorisable, vectorised_graph::VectorisedGraph, Embedding,
cache::VectorCache, embeddings::EmbeddingResult, storage::OpenAIEmbeddings,
template::DocumentTemplate, vectorisable::Vectorisable, vectorised_graph::VectorisedGraph,
Embedding,
},
};
use tokio::runtime::Runtime;
Expand Down Expand Up @@ -35,12 +36,22 @@ pub fn create_graph_for_vector_bench(size: usize) -> Graph {
}

pub async fn vectorise_graph_for_bench_async(graph: Graph) -> VectorisedGraph<Graph> {
let cache = VectorCache::in_memory(embedding_model);
let cache = VectorCache::in_memory();
let model = cache
.openai(OpenAIEmbeddings {
model: "whatever".to_owned(),
api_base: Some("localhost://1783".to_owned()), // TODO: run embedding server as well on the background so that this works
api_key_env: None,
project_id: None,
org_id: None,
})
.await
.unwrap();
let template = DocumentTemplate {
node_template: Some("{{name}}".to_owned()),
edge_template: None,
};
graph.vectorise(cache, template, None, true).await.unwrap()
graph.vectorise(model, template, None, true).await.unwrap()
}

// TODO: remove this version
Expand Down
17 changes: 17 additions & 0 deletions raphtory-graphql/schema.graphql
Original file line number Diff line number Diff line change
Expand Up @@ -2546,6 +2546,12 @@ type QueryRoot {
"""
updateGraph(path: String!): MutableGraph!
"""
Update graph query, has side effects to update graph state

Returns:: GqlMutableGraph
"""
vectoriseGraph(path: String!, model: String, apiBase: String, nodes: Template, edges: Template): Boolean!
"""
Create vectorised graph in the format used for queries

Returns:: GqlVectorisedGraph
Expand Down Expand Up @@ -2612,6 +2618,17 @@ enum SortByTime {
EARLIEST
}

input Template @oneOf {
"""
The default template.
"""
enabled: Boolean
"""
A custom template.
"""
custom: String
}

type TemporalProperties {
"""
Get property value matching the specified key.
Expand Down
Loading
Loading