From 1819cd0208d8c63a1cd208da591665fdaf5a2789 Mon Sep 17 00:00:00 2001 From: beinan Date: Wed, 11 Feb 2026 19:53:48 +0000 Subject: [PATCH] feat(graph): split catalog crate --- Cargo.lock | 14 ++- Cargo.toml | 1 + crates/lance-graph-catalog/Cargo.toml | 21 ++++ crates/lance-graph-catalog/README.md | 3 + crates/lance-graph-catalog/src/lib.rs | 10 ++ .../src/namespace/directory.rs | 109 ++++++++++++++++++ .../lance-graph-catalog/src/namespace/mod.rs | 3 + .../lance-graph-catalog/src/source_catalog.rs | 101 ++++++++++++++++ crates/lance-graph/Cargo.toml | 2 +- crates/lance-graph/README.md | 7 +- crates/lance-graph/src/namespace/directory.rs | 108 +---------------- crates/lance-graph/src/source_catalog.rs | 99 +--------------- 12 files changed, 270 insertions(+), 208 deletions(-) create mode 100644 crates/lance-graph-catalog/Cargo.toml create mode 100644 crates/lance-graph-catalog/README.md create mode 100644 crates/lance-graph-catalog/src/lib.rs create mode 100644 crates/lance-graph-catalog/src/namespace/directory.rs create mode 100644 crates/lance-graph-catalog/src/namespace/mod.rs create mode 100644 crates/lance-graph-catalog/src/source_catalog.rs diff --git a/Cargo.lock b/Cargo.lock index edd6e7d..383436b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3572,7 +3572,6 @@ dependencies = [ "arrow", "arrow-array", "arrow-schema", - "async-trait", "criterion", "datafusion", "datafusion-common", @@ -3582,6 +3581,7 @@ dependencies = [ "futures", "lance", "lance-arrow", + "lance-graph-catalog", "lance-index", "lance-linalg", "lance-namespace", @@ -3593,6 +3593,18 @@ dependencies = [ "tokio", ] +[[package]] +name = "lance-graph-catalog" +version = "0.5.2" +dependencies = [ + "arrow-schema", + "async-trait", + "datafusion", + "lance-namespace", + "snafu", + "tokio", +] + [[package]] name = "lance-index" version = "1.0.1" diff --git a/Cargo.toml b/Cargo.toml index e188a70..9001ce1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,7 @@ [workspace] members = [ "crates/lance-graph", + "crates/lance-graph-catalog", ] exclude = [ "python", diff --git a/crates/lance-graph-catalog/Cargo.toml b/crates/lance-graph-catalog/Cargo.toml new file mode 100644 index 0000000..8ee3e35 --- /dev/null +++ b/crates/lance-graph-catalog/Cargo.toml @@ -0,0 +1,21 @@ +[package] +name = "lance-graph-catalog" +version = "0.5.2" +edition = "2021" +license = "Apache-2.0" +authors = ["Lance Devs "] +repository = "https://github.com/lancedb/lance-graph" +readme = "README.md" +description = "Catalog and namespace utilities for Lance graph" +keywords = ["lance", "graph", "catalog", "namespace"] +categories = ["database", "data-structures", "science"] + +[dependencies] +arrow-schema = "56.2" +async-trait = "0.1" +datafusion = { version = "50.3", default-features = false } +lance-namespace = "1.0.1" +snafu = "0.8" + +[dev-dependencies] +tokio = { version = "1.37", features = ["macros", "rt-multi-thread"] } diff --git a/crates/lance-graph-catalog/README.md b/crates/lance-graph-catalog/README.md new file mode 100644 index 0000000..ce8ff4b --- /dev/null +++ b/crates/lance-graph-catalog/README.md @@ -0,0 +1,3 @@ +# Lance Graph Catalog + +Catalog and namespace utilities shared by the Lance graph query engine. diff --git a/crates/lance-graph-catalog/src/lib.rs b/crates/lance-graph-catalog/src/lib.rs new file mode 100644 index 0000000..b43457f --- /dev/null +++ b/crates/lance-graph-catalog/src/lib.rs @@ -0,0 +1,10 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright The Lance Authors + +//! Catalog and namespace utilities for Lance Graph. + +pub mod namespace; +pub mod source_catalog; + +pub use namespace::DirNamespace; +pub use source_catalog::{GraphSourceCatalog, InMemoryCatalog, SimpleTableSource}; diff --git a/crates/lance-graph-catalog/src/namespace/directory.rs b/crates/lance-graph-catalog/src/namespace/directory.rs new file mode 100644 index 0000000..541a9b2 --- /dev/null +++ b/crates/lance-graph-catalog/src/namespace/directory.rs @@ -0,0 +1,109 @@ +// SPDX-License-Identifier: Apache-2.0 + +use async_trait::async_trait; +use lance_namespace::models::{DescribeTableRequest, DescribeTableResponse}; +use lance_namespace::{Error as NamespaceError, LanceNamespace, Result}; +use snafu::location; + +/// A namespace that resolves table names relative to a base directory or URI. +#[derive(Debug, Clone)] +pub struct DirNamespace { + base_uri: String, +} + +impl DirNamespace { + /// Create a new directory-backed namespace rooted at `base_uri`. + /// + /// The URI is normalized so that it does not end with a trailing slash. + pub fn new(base_uri: impl Into) -> Self { + let uri = base_uri.into(); + let clean_uri = uri.trim_end_matches('/').to_string(); + Self { + base_uri: clean_uri, + } + } + + /// Return the normalized base URI. + pub fn base_uri(&self) -> &str { + &self.base_uri + } +} + +#[async_trait] +impl LanceNamespace for DirNamespace { + fn namespace_id(&self) -> String { + format!("DirNamespace {{ base_uri: '{}' }}", self.base_uri) + } + + async fn describe_table(&self, request: DescribeTableRequest) -> Result { + let id = request.id.ok_or_else(|| { + NamespaceError::invalid_input( + "DirNamespace requires the table identifier to be provided", + location!(), + ) + })?; + + if id.len() != 1 { + return Err(NamespaceError::invalid_input( + format!( + "DirNamespace expects identifiers with a single component, got {:?}", + id + ), + location!(), + )); + } + + let table_name = &id[0]; + let location = format!("{}/{}.lance", self.base_uri, table_name); + + let mut response = DescribeTableResponse::new(); + response.location = Some(location); + response.storage_options = None; + Ok(response) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[tokio::test] + async fn describe_table_returns_clean_location() { + let namespace = DirNamespace::new("s3://bucket/path/"); + let mut request = DescribeTableRequest::new(); + request.id = Some(vec!["users".to_string()]); + + let response = namespace.describe_table(request).await.unwrap(); + assert_eq!( + response.location.as_deref(), + Some("s3://bucket/path/users.lance") + ); + } + + #[tokio::test] + async fn describe_table_rejects_missing_identifier() { + let namespace = DirNamespace::new("file:///tmp"); + let request = DescribeTableRequest::new(); + + let err = namespace.describe_table(request).await.unwrap_err(); + assert!( + err.to_string() + .contains("DirNamespace requires the table identifier"), + "unexpected error: {err}" + ); + } + + #[tokio::test] + async fn describe_table_rejects_multi_component_identifier() { + let namespace = DirNamespace::new("memory://namespace"); + let mut request = DescribeTableRequest::new(); + request.id = Some(vec!["foo".into(), "bar".into()]); + + let err = namespace.describe_table(request).await.unwrap_err(); + assert!( + err.to_string() + .contains("expects identifiers with a single component"), + "unexpected error: {err}" + ); + } +} diff --git a/crates/lance-graph-catalog/src/namespace/mod.rs b/crates/lance-graph-catalog/src/namespace/mod.rs new file mode 100644 index 0000000..fe6c89c --- /dev/null +++ b/crates/lance-graph-catalog/src/namespace/mod.rs @@ -0,0 +1,3 @@ +pub mod directory; + +pub use directory::DirNamespace; diff --git a/crates/lance-graph-catalog/src/source_catalog.rs b/crates/lance-graph-catalog/src/source_catalog.rs new file mode 100644 index 0000000..a122c87 --- /dev/null +++ b/crates/lance-graph-catalog/src/source_catalog.rs @@ -0,0 +1,101 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright The Lance Authors + +//! Context-free source catalog for DataFusion logical planning. + +use std::any::Any; +use std::collections::HashMap; +use std::sync::Arc; + +use arrow_schema::{Schema, SchemaRef}; +use datafusion::logical_expr::TableSource; + +/// A minimal catalog to resolve node labels and relationship types to logical table sources. +pub trait GraphSourceCatalog: Send + Sync { + fn node_source(&self, label: &str) -> Option>; + fn relationship_source(&self, rel_type: &str) -> Option>; +} + +/// A simple in-memory catalog useful for tests and bootstrap wiring. +pub struct InMemoryCatalog { + node_sources: HashMap>, + rel_sources: HashMap>, +} + +impl InMemoryCatalog { + pub fn new() -> Self { + Self { + node_sources: HashMap::new(), + rel_sources: HashMap::new(), + } + } + + pub fn with_node_source( + mut self, + label: impl Into, + source: Arc, + ) -> Self { + // Normalize key to lowercase for case-insensitive lookup + self.node_sources + .insert(label.into().to_lowercase(), source); + self + } + + pub fn with_relationship_source( + mut self, + rel_type: impl Into, + source: Arc, + ) -> Self { + // Normalize key to lowercase for case-insensitive lookup + self.rel_sources + .insert(rel_type.into().to_lowercase(), source); + self + } +} + +impl Default for InMemoryCatalog { + fn default() -> Self { + Self::new() + } +} + +impl GraphSourceCatalog for InMemoryCatalog { + /// Get node source with case-insensitive label lookup + /// + /// Note: Keys are stored as lowercase, so this is an O(1) operation. + fn node_source(&self, label: &str) -> Option> { + self.node_sources.get(&label.to_lowercase()).cloned() + } + + /// Get relationship source with case-insensitive type lookup + /// + /// Note: Keys are stored as lowercase, so this is an O(1) operation. + fn relationship_source(&self, rel_type: &str) -> Option> { + self.rel_sources.get(&rel_type.to_lowercase()).cloned() + } +} + +/// A trivial logical table source with a fixed schema. +pub struct SimpleTableSource { + schema: SchemaRef, +} + +impl SimpleTableSource { + pub fn new(schema: SchemaRef) -> Self { + Self { schema } + } + pub fn empty() -> Self { + Self { + schema: Arc::new(Schema::empty()), + } + } +} + +impl TableSource for SimpleTableSource { + fn as_any(&self) -> &dyn Any { + self + } + fn schema(&self) -> SchemaRef { + self.schema.clone() + } +} diff --git a/crates/lance-graph/Cargo.toml b/crates/lance-graph/Cargo.toml index 4956594..ffdf7d3 100644 --- a/crates/lance-graph/Cargo.toml +++ b/crates/lance-graph/Cargo.toml @@ -28,7 +28,7 @@ datafusion-expr = "50.3" datafusion-sql = "50.3" datafusion-functions-aggregate = "50.3" futures = "0.3" -async-trait = "0.1" +lance-graph-catalog = { path = "../lance-graph-catalog", version = "0.5.2" } lance = "1.0.0" lance-linalg = "1.0.0" lance-namespace = "1.0.1" diff --git a/crates/lance-graph/README.md b/crates/lance-graph/README.md index 43c94b6..89ab1f0 100644 --- a/crates/lance-graph/README.md +++ b/crates/lance-graph/README.md @@ -120,7 +120,12 @@ Basic aggregations like `COUNT` are supported. Optional matches and subqueries a - `config` – Graph configuration types and builders. - `query` – High level `CypherQuery` API and runtime. - `error` – `GraphError` and result helpers. -- `source_catalog` – Helpers for looking up table metadata. +- `namespace` – Namespace helpers (re-exported from `lance-graph-catalog`). +- `source_catalog` – Catalog helpers for looking up table metadata (re-exported from `lance-graph-catalog`). + +`lance-graph` re-exports the catalog and namespace types from the `lance-graph-catalog` crate for +API compatibility. You can depend on `lance-graph-catalog` directly if you only need catalog or +namespace utilities. ## Error Handling diff --git a/crates/lance-graph/src/namespace/directory.rs b/crates/lance-graph/src/namespace/directory.rs index 541a9b2..a094a56 100644 --- a/crates/lance-graph/src/namespace/directory.rs +++ b/crates/lance-graph/src/namespace/directory.rs @@ -1,109 +1,3 @@ // SPDX-License-Identifier: Apache-2.0 -use async_trait::async_trait; -use lance_namespace::models::{DescribeTableRequest, DescribeTableResponse}; -use lance_namespace::{Error as NamespaceError, LanceNamespace, Result}; -use snafu::location; - -/// A namespace that resolves table names relative to a base directory or URI. -#[derive(Debug, Clone)] -pub struct DirNamespace { - base_uri: String, -} - -impl DirNamespace { - /// Create a new directory-backed namespace rooted at `base_uri`. - /// - /// The URI is normalized so that it does not end with a trailing slash. - pub fn new(base_uri: impl Into) -> Self { - let uri = base_uri.into(); - let clean_uri = uri.trim_end_matches('/').to_string(); - Self { - base_uri: clean_uri, - } - } - - /// Return the normalized base URI. - pub fn base_uri(&self) -> &str { - &self.base_uri - } -} - -#[async_trait] -impl LanceNamespace for DirNamespace { - fn namespace_id(&self) -> String { - format!("DirNamespace {{ base_uri: '{}' }}", self.base_uri) - } - - async fn describe_table(&self, request: DescribeTableRequest) -> Result { - let id = request.id.ok_or_else(|| { - NamespaceError::invalid_input( - "DirNamespace requires the table identifier to be provided", - location!(), - ) - })?; - - if id.len() != 1 { - return Err(NamespaceError::invalid_input( - format!( - "DirNamespace expects identifiers with a single component, got {:?}", - id - ), - location!(), - )); - } - - let table_name = &id[0]; - let location = format!("{}/{}.lance", self.base_uri, table_name); - - let mut response = DescribeTableResponse::new(); - response.location = Some(location); - response.storage_options = None; - Ok(response) - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[tokio::test] - async fn describe_table_returns_clean_location() { - let namespace = DirNamespace::new("s3://bucket/path/"); - let mut request = DescribeTableRequest::new(); - request.id = Some(vec!["users".to_string()]); - - let response = namespace.describe_table(request).await.unwrap(); - assert_eq!( - response.location.as_deref(), - Some("s3://bucket/path/users.lance") - ); - } - - #[tokio::test] - async fn describe_table_rejects_missing_identifier() { - let namespace = DirNamespace::new("file:///tmp"); - let request = DescribeTableRequest::new(); - - let err = namespace.describe_table(request).await.unwrap_err(); - assert!( - err.to_string() - .contains("DirNamespace requires the table identifier"), - "unexpected error: {err}" - ); - } - - #[tokio::test] - async fn describe_table_rejects_multi_component_identifier() { - let namespace = DirNamespace::new("memory://namespace"); - let mut request = DescribeTableRequest::new(); - request.id = Some(vec!["foo".into(), "bar".into()]); - - let err = namespace.describe_table(request).await.unwrap_err(); - assert!( - err.to_string() - .contains("expects identifiers with a single component"), - "unexpected error: {err}" - ); - } -} +pub use lance_graph_catalog::DirNamespace; diff --git a/crates/lance-graph/src/source_catalog.rs b/crates/lance-graph/src/source_catalog.rs index a122c87..bf53303 100644 --- a/crates/lance-graph/src/source_catalog.rs +++ b/crates/lance-graph/src/source_catalog.rs @@ -1,101 +1,4 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright The Lance Authors -//! Context-free source catalog for DataFusion logical planning. - -use std::any::Any; -use std::collections::HashMap; -use std::sync::Arc; - -use arrow_schema::{Schema, SchemaRef}; -use datafusion::logical_expr::TableSource; - -/// A minimal catalog to resolve node labels and relationship types to logical table sources. -pub trait GraphSourceCatalog: Send + Sync { - fn node_source(&self, label: &str) -> Option>; - fn relationship_source(&self, rel_type: &str) -> Option>; -} - -/// A simple in-memory catalog useful for tests and bootstrap wiring. -pub struct InMemoryCatalog { - node_sources: HashMap>, - rel_sources: HashMap>, -} - -impl InMemoryCatalog { - pub fn new() -> Self { - Self { - node_sources: HashMap::new(), - rel_sources: HashMap::new(), - } - } - - pub fn with_node_source( - mut self, - label: impl Into, - source: Arc, - ) -> Self { - // Normalize key to lowercase for case-insensitive lookup - self.node_sources - .insert(label.into().to_lowercase(), source); - self - } - - pub fn with_relationship_source( - mut self, - rel_type: impl Into, - source: Arc, - ) -> Self { - // Normalize key to lowercase for case-insensitive lookup - self.rel_sources - .insert(rel_type.into().to_lowercase(), source); - self - } -} - -impl Default for InMemoryCatalog { - fn default() -> Self { - Self::new() - } -} - -impl GraphSourceCatalog for InMemoryCatalog { - /// Get node source with case-insensitive label lookup - /// - /// Note: Keys are stored as lowercase, so this is an O(1) operation. - fn node_source(&self, label: &str) -> Option> { - self.node_sources.get(&label.to_lowercase()).cloned() - } - - /// Get relationship source with case-insensitive type lookup - /// - /// Note: Keys are stored as lowercase, so this is an O(1) operation. - fn relationship_source(&self, rel_type: &str) -> Option> { - self.rel_sources.get(&rel_type.to_lowercase()).cloned() - } -} - -/// A trivial logical table source with a fixed schema. -pub struct SimpleTableSource { - schema: SchemaRef, -} - -impl SimpleTableSource { - pub fn new(schema: SchemaRef) -> Self { - Self { schema } - } - pub fn empty() -> Self { - Self { - schema: Arc::new(Schema::empty()), - } - } -} - -impl TableSource for SimpleTableSource { - fn as_any(&self) -> &dyn Any { - self - } - fn schema(&self) -> SchemaRef { - self.schema.clone() - } -} +pub use lance_graph_catalog::{GraphSourceCatalog, InMemoryCatalog, SimpleTableSource};