From 30504d3e42c8b57e3ea0425f526dc4ecbb574471 Mon Sep 17 00:00:00 2001 From: Ragnor Comerford Date: Sun, 14 Jun 2026 19:03:54 +0200 Subject: [PATCH 1/6] =?UTF-8?q?build(deps):=20bump=20Lance=206.0.1=20?= =?UTF-8?q?=E2=86=92=207.0.0=20(object=5Fstore=200.13.2,=20roaring=200.11.?= =?UTF-8?q?4)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Arrow stays 58 and DataFusion stays 53 (no change). The only transitive bump is object_store 0.12.5 → 0.13.2. 141 upstream commits reviewed; no fixes lost (the 6.0.x release-branch backports are all forward-ported into 7.0.0). - object_store 0.13 moved get/put/head/rename/delete behind a new ObjectStoreExt trait (list/list_with_delimiter/put_opts stay on the core trait). Add `use object_store::ObjectStoreExt` in storage.rs and db/manifest/namespace.rs; no call-site changes. Mirrors Lance's own migration in PR #6672. - roaring pinned to 0.11.4 (cargo update -p roaring --precise 0.11.4). Lance 7.0.0's UpdatedFragmentOffsets newtype (lance#6650) derives Eq over HashMap, which needs RoaringBitmap: Eq, added in roaring 0.11.4; the loose `roaring = "0.11"` constraint otherwise resolves 0.11.3 and lance itself fails to compile. - lance#6774: merge-insert INSERT rows now stamp _row_created_at_version with the commit version (was a fallback of 1). Flip the lance_version_columns assertion to `== v2` and correct the changes/mod.rs rationale comment. Production change-detection keys on _row_last_updated_at_version + ID membership, so its logic is unaffected. Refs lance#6650, lance#6774, lance#6672. --- Cargo.lock | 1669 ++++++++++++++--- Cargo.toml | 18 +- crates/omnigraph/src/changes/mod.rs | 12 +- crates/omnigraph/src/db/manifest/namespace.rs | 4 +- crates/omnigraph/src/storage.rs | 2 +- .../omnigraph/tests/lance_version_columns.rs | 16 +- 6 files changed, 1423 insertions(+), 298 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 21403b0a..33dd652e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -23,9 +23,9 @@ version = "0.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b169f7a6d4742236a0a00c541b845991d0ac43e546831af1249753ab4c3aa3a0" dependencies = [ - "cfg-if", + "cfg-if 1.0.4", "cipher", - "cpufeatures", + "cpufeatures 0.2.17", ] [[package]] @@ -34,7 +34,7 @@ version = "0.8.12" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75" dependencies = [ - "cfg-if", + "cfg-if 1.0.4", "const-random", "getrandom 0.3.4", "once_cell", @@ -137,6 +137,15 @@ version = "1.0.102" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c" +[[package]] +name = "approx" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cab112f0a86d568ea0e627cc1d6be74a1e9cd55214684db5561995f6dad897c6" +dependencies = [ + "num-traits", +] + [[package]] name = "ar_archive_writer" version = "0.5.1" @@ -708,11 +717,11 @@ dependencies = [ "bytes", "form_urlencoded", "hex", - "hmac", + "hmac 0.12.1", "http 0.2.12", "http 1.4.0", "percent-encoding", - "sha2", + "sha2 0.10.9", "time", "tracing", ] @@ -980,7 +989,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bb531853791a215d7c62a30daf0dde835f381ab5de4589cfe7c649d2cbe92bd6" dependencies = [ "addr2line", - "cfg-if", + "cfg-if 1.0.4", "libc", "miniz_oxide", "object", @@ -1071,7 +1080,7 @@ version = "0.10.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "46502ad458c9a52b69d4d4d32775c788b7a1b85e8bc9d482d92250fc0e3f8efe" dependencies = [ - "digest", + "digest 0.10.7", ] [[package]] @@ -1083,9 +1092,9 @@ dependencies = [ "arrayref", "arrayvec 0.7.6", "cc", - "cfg-if", + "cfg-if 1.0.4", "constant_time_eq", - "cpufeatures", + "cpufeatures 0.2.17", ] [[package]] @@ -1097,6 +1106,15 @@ dependencies = [ "generic-array", ] +[[package]] +name = "block-buffer" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2f6c7dbe95a6ed67ad9f18e57daf93a2f034c524b99fd2b76d18fdfeb6660aa" +dependencies = [ + "hybrid-array", +] + [[package]] name = "block-padding" version = "0.3.3" @@ -1266,6 +1284,12 @@ dependencies = [ "smol_str", ] +[[package]] +name = "cfg-if" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822" + [[package]] name = "cfg-if" version = "1.0.4" @@ -1278,6 +1302,17 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" +[[package]] +name = "chacha20" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f8d983286843e49675a4b7a2d174efe136dc93a18d69130dd18198a6c167601" +dependencies = [ + "cfg-if 1.0.4", + "cpufeatures 0.3.0", + "rand_core 0.10.1", +] + [[package]] name = "chrono" version = "0.4.44" @@ -1308,7 +1343,7 @@ version = "0.4.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "773f3b9af64447d2ce9850330c473515014aa235e6a783b02db81ff39e4a3dad" dependencies = [ - "crypto-common", + "crypto-common 0.1.7", "inout", ] @@ -1361,6 +1396,12 @@ dependencies = [ "cc", ] +[[package]] +name = "cmov" +version = "0.5.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c9ea0ac24bc397ab3c98583a3c9ba74fa56b09a4449bbe172b9b1ddb016027a" + [[package]] name = "color-eyre" version = "0.6.5" @@ -1394,6 +1435,25 @@ version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" +[[package]] +name = "colored" +version = "3.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "faf9468729b8cbcea668e36183cb69d317348c2e08e994829fb56ebfdfbaac34" +dependencies = [ + "windows-sys 0.61.2", +] + +[[package]] +name = "combine" +version = "4.6.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba5a308b75df32fe02788e748662718f03fde005016435c444eea572398219fd" +dependencies = [ + "bytes", + "memchr", +] + [[package]] name = "comfy-table" version = "7.2.2" @@ -1436,6 +1496,12 @@ version = "0.9.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c2459377285ad874054d797f3ccebf984978aa39129f6eafde5cdc8315b612f8" +[[package]] +name = "const-oid" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6ef517f0926dd24a1582492c791b6a4818a4d94e789a334894aa15b0d12f55c" + [[package]] name = "const-random" version = "0.1.18" @@ -1456,12 +1522,37 @@ dependencies = [ "tiny-keccak", ] +[[package]] +name = "const-str" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "18f12cc9948ed9604230cdddc7c86e270f9401ccbe3c2e98a4378c5e7632212f" + +[[package]] +name = "const_panic" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e262cdaac42494e3ae34c43969f9cdeb7da178bdb4b66fa6a1ea2edb4c8ae652" +dependencies = [ + "typewit", +] + [[package]] name = "constant_time_eq" version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3d52eff69cd5e647efe296129160853a42795992097e8af39800e1060caeea9b" +[[package]] +name = "core-foundation" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91e195e091a93c46f7102ec7818a2aa394e1e1771c3ab4825963fa03e45afb8f" +dependencies = [ + "core-foundation-sys", + "libc", +] + [[package]] name = "core-foundation" version = "0.10.1" @@ -1478,6 +1569,15 @@ version = "0.8.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" +[[package]] +name = "countio" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9702aee5d1d744c01d82f6915644f950f898e014903385464c773b96fefdecb" +dependencies = [ + "futures-io", +] + [[package]] name = "cpufeatures" version = "0.2.17" @@ -1487,6 +1587,15 @@ dependencies = [ "libc", ] +[[package]] +name = "cpufeatures" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b2a41393f66f16b0823bb79094d54ac5fbd34ab292ddafb9a0456ac9f87d201" +dependencies = [ + "libc", +] + [[package]] name = "crc32c" version = "0.6.8" @@ -1502,7 +1611,7 @@ version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511" dependencies = [ - "cfg-if", + "cfg-if 1.0.4", ] [[package]] @@ -1574,6 +1683,15 @@ dependencies = [ "typenum", ] +[[package]] +name = "crypto-common" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ce6e4c961d6cd6c9a86db418387425e8bdeaf05b3c8bc1411e6dca4c252f1453" +dependencies = [ + "hybrid-array", +] + [[package]] name = "csv" version = "1.4.0" @@ -1595,6 +1713,31 @@ dependencies = [ "memchr", ] +[[package]] +name = "ctor" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "424e0138278faeb2b401f174ad17e715c829512d74f3d1e81eb43365c2e0590e" +dependencies = [ + "ctor-proc-macro", + "dtor", +] + +[[package]] +name = "ctor-proc-macro" +version = "0.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52560adf09603e58c9a7ee1fe1dcb95a16927b17c127f0ac02d6e768a0e25bc1" + +[[package]] +name = "ctutils" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d5515a3834141de9eafb9717ad39eea8247b5674e6066c404e8c4b365d2a29e" +dependencies = [ + "cmov", +] + [[package]] name = "darling" version = "0.23.0" @@ -1635,7 +1778,7 @@ version = "6.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5041cc499144891f3790297212f32a74fb938e5136a14943f338ef9e0ae276cf" dependencies = [ - "cfg-if", + "cfg-if 1.0.4", "crossbeam-utils", "hashbrown 0.14.5", "lock_api", @@ -1681,7 +1824,7 @@ dependencies = [ "futures", "itertools 0.14.0", "log", - "object_store 0.13.2", + "object_store", "parking_lot", "rand 0.9.2", "regex", @@ -1712,7 +1855,7 @@ dependencies = [ "futures", "itertools 0.14.0", "log", - "object_store 0.13.2", + "object_store", "parking_lot", "tokio", ] @@ -1737,7 +1880,7 @@ dependencies = [ "futures", "itertools 0.14.0", "log", - "object_store 0.13.2", + "object_store", ] [[package]] @@ -1756,7 +1899,7 @@ dependencies = [ "itertools 0.14.0", "libc", "log", - "object_store 0.13.2", + "object_store", "paste", "sqlparser", "tokio", @@ -1797,7 +1940,7 @@ dependencies = [ "glob", "itertools 0.14.0", "log", - "object_store 0.13.2", + "object_store", "rand 0.9.2", "tokio", "url", @@ -1823,7 +1966,7 @@ dependencies = [ "datafusion-session", "futures", "itertools 0.14.0", - "object_store 0.13.2", + "object_store", "tokio", ] @@ -1845,7 +1988,7 @@ dependencies = [ "datafusion-physical-plan", "datafusion-session", "futures", - "object_store 0.13.2", + "object_store", "regex", "tokio", ] @@ -1868,7 +2011,7 @@ dependencies = [ "datafusion-physical-plan", "datafusion-session", "futures", - "object_store 0.13.2", + "object_store", "serde_json", "tokio", "tokio-stream", @@ -1896,7 +2039,7 @@ dependencies = [ "datafusion-physical-expr-common", "futures", "log", - "object_store 0.13.2", + "object_store", "parking_lot", "rand 0.9.2", "tempfile", @@ -1965,7 +2108,7 @@ dependencies = [ "num-traits", "rand 0.9.2", "regex", - "sha2", + "sha2 0.10.9", "unicode-segmentation", "uuid", ] @@ -2284,7 +2427,7 @@ version = "0.7.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e7c1832837b905bbfb5101e07cc24c8deddf52f93225eee6ead5f4d63d53ddcb" dependencies = [ - "const-oid", + "const-oid 0.9.6", "pem-rfc7468", "zeroize", ] @@ -2311,12 +2454,24 @@ version = "0.10.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" dependencies = [ - "block-buffer", - "const-oid", - "crypto-common", + "block-buffer 0.10.4", + "const-oid 0.9.6", + "crypto-common 0.1.7", "subtle", ] +[[package]] +name = "digest" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1dd6dbb5841937940781866fa1281a1ff7bd3bf827091440879f9994983d5c2" +dependencies = [ + "block-buffer 0.12.1", + "const-oid 0.10.2", + "crypto-common 0.2.2", + "ctutils", +] + [[package]] name = "dirs" version = "6.0.0" @@ -2358,6 +2513,21 @@ dependencies = [ "const-random", ] +[[package]] +name = "dtor" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "404d02eeb088a82cfd873006cb713fe411306c7d182c344905e101fb1167d301" +dependencies = [ + "dtor-proc-macro", +] + +[[package]] +name = "dtor-proc-macro" +version = "0.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f678cf4a922c215c63e0de95eb1ff08a958a81d47e485cf9da1e27bf6305cfa5" + [[package]] name = "dunce" version = "1.0.5" @@ -2403,7 +2573,7 @@ version = "0.8.35" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "75030f3c4f45dafd7586dd6780965a8c7e8e285a5ecb86713e63a79c5b2766f3" dependencies = [ - "cfg-if", + "cfg-if 1.0.4", ] [[package]] @@ -2578,9 +2748,9 @@ checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c" [[package]] name = "fsst" -version = "6.0.1" +version = "7.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "83cf860f6a6bf0a6a60fdfe5a36c75121fad5ea4332d1d12deee3e65b6047727" +checksum = "bcd0ce0249ac12fd44fcde62d435c36d881952c2f0df4d1de24b45e1dbba5ddb" dependencies = [ "arrow-array", "rand 0.9.2", @@ -2689,6 +2859,15 @@ dependencies = [ "slab", ] +[[package]] +name = "gearhash" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8cf82cf76cd16485e56295a1377c775ce708c9f1a0be6b029076d60a245d213" +dependencies = [ + "cfg-if 0.1.10", +] + [[package]] name = "generator" version = "0.8.8" @@ -2696,7 +2875,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "52f04ae4152da20c76fe800fa48659201d5cf627c5149ca0b707b69d7eef6cf9" dependencies = [ "cc", - "cfg-if", + "cfg-if 1.0.4", "libc", "log", "rustversion", @@ -2720,10 +2899,10 @@ version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ff2abc00be7fca6ebc474524697ae276ad847ad0a6b3faa4bcb027e9a4614ad0" dependencies = [ - "cfg-if", + "cfg-if 1.0.4", "js-sys", "libc", - "wasi", + "wasi 0.11.1+wasi-snapshot-preview1", "wasm-bindgen", ] @@ -2733,7 +2912,7 @@ version = "0.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd" dependencies = [ - "cfg-if", + "cfg-if 1.0.4", "js-sys", "libc", "r-efi 5.3.0", @@ -2747,11 +2926,14 @@ version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0de51e6874e94e7bf76d726fc5d13ba782deca734ff60d5bb2fb2607c7406555" dependencies = [ - "cfg-if", + "cfg-if 1.0.4", + "js-sys", "libc", "r-efi 6.0.0", + "rand_core 0.10.1", "wasip2", "wasip3", + "wasm-bindgen", ] [[package]] @@ -2760,6 +2942,26 @@ version = "0.32.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e629b9b98ef3dd8afe6ca2bd0f89306cec16d43d907889945bc5d6687f2f13c7" +[[package]] +name = "git-version" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ad568aa3db0fcbc81f2f116137f263d7304f512a1209b35b85150d3ef88ad19" +dependencies = [ + "git-version-macro", +] + +[[package]] +name = "git-version-macro" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53010ccb100b96a67bc32c0175f0ed1426b31b655d562898e57325f81c023ac0" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + [[package]] name = "glob" version = "0.3.3" @@ -2822,7 +3024,7 @@ version = "2.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6ea2d84b969582b4b1864a92dc5d27cd2b77b622a8d79306834f1be5ba20d84b" dependencies = [ - "cfg-if", + "cfg-if 1.0.4", "crunchy", "num-traits", "zerocopy", @@ -2866,6 +3068,12 @@ version = "0.17.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ed5909b6e89a2db4456e54cd5f673791d7eca6732202bbf2a9cc504fe2f9b84a" +[[package]] +name = "heapify" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0049b265b7f201ca9ab25475b22b47fe444060126a51abe00f77d986fc5cc52e" + [[package]] name = "heck" version = "0.5.0" @@ -2884,22 +3092,44 @@ version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" +[[package]] +name = "hf-xet" +version = "1.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "430b33fa84f92796d4d263070b6c0d3ca219df7b9a0e1853ee431029b1612bcd" +dependencies = [ + "async-trait", + "bytes", + "http 1.4.0", + "more-asserts", + "serde", + "thiserror", + "tokio", + "tokio-util", + "tracing", + "uuid", + "xet-client", + "xet-core-structures", + "xet-data", + "xet-runtime", +] + [[package]] name = "hmac" version = "0.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6c49c37c09c17a53d937dfbb742eb3a961d65a994e6bcdcf37e7399d0cc8ab5e" dependencies = [ - "digest", + "digest 0.10.7", ] [[package]] -name = "home" -version = "0.5.12" +name = "hmac" +version = "0.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc627f471c528ff0c4a49e1d5e60450c8f6461dd6d10ba9dcd3a61d3dff7728d" +checksum = "6303bc9732ae41b04cb554b844a762b4115a61bfaa81e3e83050991eeb56863f" dependencies = [ - "windows-sys 0.61.2", + "digest 0.11.3", ] [[package]] @@ -2975,6 +3205,15 @@ version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "135b12329e5e3ce057a9f972339ea52bc954fe1e9358ef27f95e89716fbc5424" +[[package]] +name = "hybrid-array" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9155a582abd142abc056962c29e3ce5ff2ad5469f4246b537ed42c5deba857da" +dependencies = [ + "typenum", +] + [[package]] name = "hyper" version = "0.14.32" @@ -3073,9 +3312,11 @@ dependencies = [ "percent-encoding", "pin-project-lite", "socket2 0.6.2", + "system-configuration", "tokio", "tower-service", "tracing", + "windows-registry", ] [[package]] @@ -3271,7 +3512,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4d09b98f7eace8982db770e4408e7470b028ce513ac28fecdc6bf4c30fe92b62" dependencies = [ "bitflags", - "cfg-if", + "cfg-if 1.0.4", "libc", ] @@ -3329,10 +3570,12 @@ checksum = "1a3546dc96b6d42c5f24902af9e2538e82e39ad350b0c766eb3fbf2d8f3d8359" dependencies = [ "jiff-static", "jiff-tzdb-platform", + "js-sys", "log", "portable-atomic", "portable-atomic-util", "serde_core", + "wasm-bindgen", "windows-sys 0.61.2", ] @@ -3362,6 +3605,55 @@ dependencies = [ "jiff-tzdb", ] +[[package]] +name = "jni" +version = "0.22.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5efd9a482cf3a427f00d6b35f14332adc7902ce91efb778580e180ff90fa3498" +dependencies = [ + "cfg-if 1.0.4", + "combine", + "jni-macros", + "jni-sys", + "log", + "simd_cesu8", + "thiserror", + "walkdir", + "windows-link", +] + +[[package]] +name = "jni-macros" +version = "0.22.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a00109accc170f0bdb141fed3e393c565b6f5e072365c3bd58f5b062591560a3" +dependencies = [ + "proc-macro2", + "quote", + "rustc_version", + "simd_cesu8", + "syn 2.0.117", +] + +[[package]] +name = "jni-sys" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c6377a88cb3910bee9b0fa88d4f42e1d2da8e79915598f65fb0c7ee14c878af2" +dependencies = [ + "jni-sys-macros", +] + +[[package]] +name = "jni-sys-macros" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38c0b942f458fe50cdac086d2f946512305e5631e720728f2a61aabcd47a6264" +dependencies = [ + "quote", + "syn 2.0.117", +] + [[package]] name = "jobserver" version = "0.1.34" @@ -3403,29 +3695,31 @@ dependencies = [ ] [[package]] -name = "jsonwebtoken" -version = "9.3.1" +name = "keccak" +version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a87cc7a48537badeae96744432de36f4be2b4a34a05a5ef32e9dd8a1c169dde" +checksum = "cb26cec98cce3a3d96cbb7bced3c4b16e3d13f27ec56dbd62cbc8f39cfb9d653" dependencies = [ - "base64", - "js-sys", - "pem", - "ring", - "serde", - "serde_json", - "simple_asn1", + "cpufeatures 0.2.17", ] [[package]] -name = "keccak" -version = "0.1.6" +name = "konst" +version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cb26cec98cce3a3d96cbb7bced3c4b16e3d13f27ec56dbd62cbc8f39cfb9d653" +checksum = "f660d5f887e3562f9ab6f4a14988795b694099d66b4f5dedc02d197ba9becb1d" dependencies = [ - "cpufeatures", + "const_panic", + "konst_proc_macros", + "typewit", ] +[[package]] +name = "konst_proc_macros" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e037a2e1d8d5fdbd49b16a4ea09d5d6401c1f29eca5ff29d03d3824dba16256a" + [[package]] name = "lalrpop" version = "0.22.2" @@ -3460,10 +3754,11 @@ dependencies = [ [[package]] name = "lance" -version = "6.0.1" +version = "7.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d34e854994e84d043897f5ec9fb609221e9e69e3fd52996cd715d979fcd349f6" +checksum = "3944aca86f4c78f4da04af1c2bf33e664a2826b7af72972ad200d6b9de59019f" dependencies = [ + "arc-swap", "arrow", "arrow-arith", "arrow-array", @@ -3478,9 +3773,11 @@ dependencies = [ "async-trait", "async_cell", "aws-credential-types", + "bitpacking", "byteorder", "bytes", "chrono", + "crossbeam-queue", "crossbeam-skiplist", "dashmap", "datafusion", @@ -3507,13 +3804,14 @@ dependencies = [ "lance-tokenizer", "log", "moka", - "object_store 0.12.5", + "object_store", "permutation", "pin-project", "prost", "prost-build", "prost-types", "rand 0.9.2", + "rayon", "roaring", "semver", "serde", @@ -3529,13 +3827,12 @@ dependencies = [ [[package]] name = "lance-arrow" -version = "6.0.1" +version = "7.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7827fe404358c27d120ee8ea8ef7b9415c2911d54072bec83dd689d750ae65da" +checksum = "253f4a0a70580c985b91e65e9ca6cad644825a4078de28d8efbacf3ffbd7ecdc" dependencies = [ "arrow-array", "arrow-buffer", - "arrow-cast", "arrow-data", "arrow-ipc", "arrow-ord", @@ -3552,9 +3849,9 @@ dependencies = [ [[package]] name = "lance-bitpacking" -version = "6.0.1" +version = "7.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2cd0b31570d50fe13c7e4e36b03e1f1c99c3d8e5a34845b24b0665b51b40570d" +checksum = "80c4d12521b1945041dd515a56aa0854973138e7ac12111c92572e33e4ecb593" dependencies = [ "arrayref", "paste", @@ -3563,9 +3860,9 @@ dependencies = [ [[package]] name = "lance-core" -version = "6.0.1" +version = "7.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b128c213c676cb8e03c62a68670642770825171e64097cc2da97cbb19fe35d29" +checksum = "13f84020da5a484e2f07dd1796e09785ed7cd889857ebc4cb77e32ef214ee594" dependencies = [ "arrow-array", "arrow-buffer", @@ -3573,7 +3870,6 @@ dependencies = [ "async-trait", "byteorder", "bytes", - "chrono", "datafusion-common", "datafusion-sql", "deepsize", @@ -3582,10 +3878,9 @@ dependencies = [ "lance-arrow", "libc", "log", - "mock_instant", "moka", "num_cpus", - "object_store 0.12.5", + "object_store", "pin-project", "prost", "rand 0.9.2", @@ -3602,9 +3897,9 @@ dependencies = [ [[package]] name = "lance-datafusion" -version = "6.0.1" +version = "7.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e03b2de71cbcd09b10bf1a17c83cacbc0176ecd97203fb72b9e59d9b8f9a3743" +checksum = "7460597a66534a75987993d4dac5bc330586d99c5b79ae73367dbcbd4e29e576" dependencies = [ "arrow", "arrow-array", @@ -3628,16 +3923,15 @@ dependencies = [ "pin-project", "prost", "prost-build", - "snafu", "tokio", "tracing", ] [[package]] name = "lance-datagen" -version = "6.0.1" +version = "7.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2fe7c7ea7fd397e495a1646fec360e46ee0cbd75718f1c0e887aad657c5f2944" +checksum = "046f5506ed2271cd941a050de7bf535dd3aedc291aadec836a63fa56c5926e3b" dependencies = [ "arrow", "arrow-array", @@ -3655,9 +3949,9 @@ dependencies = [ [[package]] name = "lance-encoding" -version = "6.0.1" +version = "7.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fe3f8070835b407d8db9ea8728386bc3207ba23c66a9c22d344e231ef12b77ca" +checksum = "7af54edf43dcf9d6a56cc636eb35d457e68373c6448dca3f0891b3325b4a24e6" dependencies = [ "arrow-arith", "arrow-array", @@ -3682,9 +3976,7 @@ dependencies = [ "num-traits", "prost", "prost-build", - "prost-types", "rand 0.9.2", - "snafu", "strum", "tokio", "tracing", @@ -3694,9 +3986,9 @@ dependencies = [ [[package]] name = "lance-file" -version = "6.0.1" +version = "7.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a6dfcf654549330df3aef708cd7c12e170feecddd34d6c19dd005b4153213268" +checksum = "0772ae2d6207995dc1eb28aff9507f78e90b3362b58f311da001e9dc25f3d736" dependencies = [ "arrow-arith", "arrow-array", @@ -3717,21 +4009,21 @@ dependencies = [ "lance-io", "log", "num-traits", - "object_store 0.12.5", + "object_store", "prost", "prost-build", "prost-types", - "snafu", "tokio", "tracing", ] [[package]] name = "lance-index" -version = "6.0.1" +version = "7.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4fb8ad0bd10efa2608634a2518b7dd501231e76c56a65fbd6519e23914cc425a" +checksum = "e71fbfb51096a903cb524fe0da716f5f15fbc4a6b6f84cd6dec21abf319c5e84" dependencies = [ + "arc-swap", "arrow", "arrow-arith", "arrow-array", @@ -3750,7 +4042,6 @@ dependencies = [ "datafusion-common", "datafusion-expr", "datafusion-physical-expr", - "datafusion-sql", "deepsize", "dirs", "fst", @@ -3772,7 +4063,7 @@ dependencies = [ "log", "ndarray", "num-traits", - "object_store 0.12.5", + "object_store", "prost", "prost-build", "prost-types", @@ -3784,7 +4075,6 @@ dependencies = [ "serde", "serde_json", "smallvec", - "snafu", "tempfile", "tokio", "tracing", @@ -3794,9 +4084,9 @@ dependencies = [ [[package]] name = "lance-io" -version = "6.0.1" +version = "7.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ef5314703fa8c8baed04193cc669da80ab42521c6319d3cc921a4a997690dcc0" +checksum = "bab8c98ef1b870b20541d27f3ca4efdf7c9f5c25214233be07d231ba88900219" dependencies = [ "arrow", "arrow-arith", @@ -3820,10 +4110,9 @@ dependencies = [ "lance-arrow", "lance-core", "lance-namespace", - "libc", "log", "moka", - "object_store 0.12.5", + "object_store", "object_store_opendal", "opendal", "path_abs", @@ -3831,7 +4120,6 @@ dependencies = [ "prost", "rand 0.9.2", "serde", - "snafu", "tempfile", "tokio", "tracing", @@ -3840,9 +4128,9 @@ dependencies = [ [[package]] name = "lance-linalg" -version = "6.0.1" +version = "7.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "51aa9b73279f505b2bec0f194c7a2390ca74ad3260131e631a7bef8d97d54b2e" +checksum = "6b4c51cad0ac780b02dc4da48528479e7693c03e8d05390510bbc69ca2a9a1f1" dependencies = [ "arrow-array", "arrow-buffer", @@ -3858,31 +4146,29 @@ dependencies = [ [[package]] name = "lance-namespace" -version = "6.0.1" +version = "7.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "39cd01581f55ce45c49cbe494ee86c7ba7ca4ca3654690fd820941cd9105a46e" +checksum = "014e8332ca0615506342e0d3af608639864b68396973be14239f09c9f21f1fc2" dependencies = [ "arrow", "async-trait", "bytes", "lance-core", "lance-namespace-reqwest-client", - "serde", "snafu", ] [[package]] name = "lance-namespace-impls" -version = "6.0.1" +version = "7.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c2cb89f3933060f01350ad05a5a3fbda952e8ba638799bf8ac4cd2368416ee46" +checksum = "e8d1231906a3cf92dd3dcda7d14a09c4835af6cd2bcd76dfd2481e87f20a282d" dependencies = [ "arrow", "arrow-ipc", "arrow-schema", "async-trait", "bytes", - "chrono", "futures", "lance", "lance-core", @@ -3892,10 +4178,9 @@ dependencies = [ "lance-namespace", "lance-table", "log", - "object_store 0.12.5", + "object_store", "rand 0.9.2", "serde_json", - "snafu", "tokio", "url", ] @@ -3906,7 +4191,7 @@ version = "0.7.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6369eee4682fb11edf538388b43c61ce288b8302fe89bb40944d7daa7faaae99" dependencies = [ - "reqwest", + "reqwest 0.12.28", "serde", "serde_json", "serde_repr", @@ -3916,9 +4201,9 @@ dependencies = [ [[package]] name = "lance-table" -version = "6.0.1" +version = "7.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5db70650465a1af174b7dfe6948ec91a3d466ada12e11274eb66e51132173aa0" +checksum = "b16f1355904aea4ebb04ffc70c58c97901e10bde44452b4b021de4a1f329250d" dependencies = [ "arrow", "arrow-array", @@ -3936,7 +4221,7 @@ dependencies = [ "lance-file", "lance-io", "log", - "object_store 0.12.5", + "object_store", "prost", "prost-build", "prost-types", @@ -3955,9 +4240,9 @@ dependencies = [ [[package]] name = "lance-tokenizer" -version = "6.0.1" +version = "7.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eb08ef9382c9d58036c323db2c19cc097e02d1d0d87714fc7176b5d3b36a31aa" +checksum = "b39b7f5ed9d0c0b716bf599b559d888267ed1dfe4c4e29d3648b51d2a28940cf" dependencies = [ "rust-stemmers", "serde", @@ -4140,7 +4425,7 @@ version = "0.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "419e0dc8046cb947daa77eb95ae174acfbddb7673b4151f56d1eed8e93fbfaca" dependencies = [ - "cfg-if", + "cfg-if 1.0.4", "generator", "scoped-tls", "tracing", @@ -4215,8 +4500,17 @@ version = "0.10.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d89e7ee0cfbedfc4da3340218492196241d89eefb6dab27de5df917a6d2e78cf" dependencies = [ - "cfg-if", - "digest", + "cfg-if 1.0.4", + "digest 0.10.7", +] + +[[package]] +name = "mea" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2640d335e7273dacdcf51044026139b2e269c3bb0dfc3f8cb3496b85e3f6a42c" +dependencies = [ + "slab", ] [[package]] @@ -4231,7 +4525,7 @@ version = "7.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5f98efec8807c63c752b5bd61f862c165c115b0a35685bdcfd9238c7aeb592b7" dependencies = [ - "cfg-if", + "cfg-if 1.0.4", "miette-derive", "serde", "unicode-width 0.1.14", @@ -4281,16 +4575,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a69bcab0ad47271a0234d9422b131806bf3968021e5dc9328caf2d4cd58557fc" dependencies = [ "libc", - "wasi", + "wasi 0.11.1+wasi-snapshot-preview1", "windows-sys 0.61.2", ] -[[package]] -name = "mock_instant" -version = "0.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dce6dd36094cac388f119d2e9dc82dc730ef91c32a6222170d630e5414b956e6" - [[package]] name = "moka" version = "0.12.15" @@ -4311,6 +4599,12 @@ dependencies = [ "uuid", ] +[[package]] +name = "more-asserts" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fafa6961cabd9c63bcd77a45d7e3b7f3b552b70417831fb0f56db717e72407e" + [[package]] name = "multimap" version = "0.10.1" @@ -4363,10 +4657,19 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "61807f77802ff30975e01f4f071c8ba10c022052f98b3294119f3e615d13e5be" [[package]] -name = "nu-ansi-term" -version = "0.50.3" +name = "ntapi" +version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5" +checksum = "c3b335231dfd352ffb0f8017f3b6027a4917f7df785ea2143d8af2adc66980ae" +dependencies = [ + "winapi", +] + +[[package]] +name = "nu-ansi-term" +version = "0.50.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5" dependencies = [ "windows-sys 0.61.2", ] @@ -4452,6 +4755,34 @@ dependencies = [ "libc", ] +[[package]] +name = "objc2-core-foundation" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a180dd8642fa45cdb7dd721cd4c11b1cadd4929ce112ebd8b9f5803cc79d536" +dependencies = [ + "bitflags", +] + +[[package]] +name = "objc2-io-kit" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33fafba39597d6dc1fb709123dfa8289d39406734be322956a69f0931c73bb15" +dependencies = [ + "libc", + "objc2-core-foundation", +] + +[[package]] +name = "objc2-system-configuration" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7216bd11cbda54ccabcab84d523dc93b858ec75ecfb3a7d89513fa22464da396" +dependencies = [ + "objc2-core-foundation", +] + [[package]] name = "object" version = "0.37.3" @@ -4463,16 +4794,18 @@ dependencies = [ [[package]] name = "object_store" -version = "0.12.5" +version = "0.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fbfbfff40aeccab00ec8a910b57ca8ecf4319b335c542f2edcd19dd25a1e2a00" +checksum = "622acbc9100d3c10e2ee15804b0caa40e55c933d5aa53814cd520805b7958a49" dependencies = [ "async-trait", "base64", "bytes", "chrono", "form_urlencoded", - "futures", + "futures-channel", + "futures-core", + "futures-util", "http 1.4.0", "http-body-util", "httparse", @@ -4482,11 +4815,11 @@ dependencies = [ "md-5", "parking_lot", "percent-encoding", - "quick-xml 0.38.4", - "rand 0.9.2", - "reqwest", + "quick-xml 0.39.4", + "rand 0.10.1", + "reqwest 0.12.28", "ring", - "rustls-pemfile", + "rustls-pki-types", "serde", "serde_json", "serde_urlencoded", @@ -4499,43 +4832,18 @@ dependencies = [ "web-time", ] -[[package]] -name = "object_store" -version = "0.13.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "622acbc9100d3c10e2ee15804b0caa40e55c933d5aa53814cd520805b7958a49" -dependencies = [ - "async-trait", - "bytes", - "chrono", - "futures-channel", - "futures-core", - "futures-util", - "http 1.4.0", - "humantime", - "itertools 0.14.0", - "parking_lot", - "percent-encoding", - "thiserror", - "tokio", - "tracing", - "url", - "walkdir", - "wasm-bindgen-futures", - "web-time", -] - [[package]] name = "object_store_opendal" -version = "0.55.0" +version = "0.56.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "113ab0769e972eee585e57407b98de08bda5354fa28e8ba4d89038d6cb6a8991" +checksum = "08298874eee5935c95bcaa393148834f9c53d904461ca15584a041d8a1c907c2" dependencies = [ "async-trait", "bytes", "chrono", "futures", - "object_store 0.12.5", + "mea", + "object_store", "opendal", "pin-project", "tokio", @@ -4568,7 +4876,7 @@ dependencies = [ "omnigraph-policy", "omnigraph-server", "predicates", - "reqwest", + "reqwest 0.12.28", "serde", "serde_json", "serde_yaml", @@ -4586,7 +4894,7 @@ dependencies = [ "serde", "serde_json", "serde_yaml", - "sha2", + "sha2 0.10.9", "tempfile", "thiserror", "time", @@ -4607,10 +4915,10 @@ dependencies = [ "arrow-select", "pest", "pest_derive", - "reqwest", + "reqwest 0.12.28", "serde", "serde_json", - "sha2", + "sha2 0.10.9", "thiserror", "tokio", ] @@ -4639,16 +4947,16 @@ dependencies = [ "lance-namespace", "lance-namespace-impls", "lance-table", - "object_store 0.12.5", + "object_store", "omnigraph-compiler", "omnigraph-policy", "proptest", "regex", - "reqwest", + "reqwest 0.12.28", "serde", "serde_json", "serial_test", - "sha2", + "sha2 0.10.9", "tempfile", "thiserror", "time", @@ -4696,7 +5004,7 @@ dependencies = [ "serde_json", "serde_yaml", "serial_test", - "sha2", + "sha2 0.10.9", "subtle", "tempfile", "thiserror", @@ -4720,34 +5028,227 @@ version = "1.70.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe" +[[package]] +name = "oneshot" +version = "0.1.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "269bca4c2591a28585d6bf10d9ed0332b7d76900a1b02bec41bdc3a2cdcda107" + [[package]] name = "opendal" -version = "0.55.0" +version = "0.56.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97b31d3d8e99a85d83b73ec26647f5607b80578ed9375810b6e44ffa3590a236" +dependencies = [ + "ctor", + "opendal-core", + "opendal-layer-concurrent-limit", + "opendal-layer-logging", + "opendal-layer-retry", + "opendal-layer-timeout", + "opendal-service-azblob", + "opendal-service-azdls", + "opendal-service-gcs", + "opendal-service-hf", + "opendal-service-oss", + "opendal-service-s3", +] + +[[package]] +name = "opendal-core" +version = "0.56.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d075ab8a203a6ab4bc1bce0a4b9fe486a72bf8b939037f4b78d95386384bc80a" +checksum = "1849dd2687e173e776d3af5fce1ba3ae47b9dd37a09d1c4deba850ef45fe00ca" dependencies = [ "anyhow", - "backon", "base64", "bytes", - "crc32c", "futures", - "getrandom 0.2.17", "http 1.4.0", "http-body 1.0.1", "jiff", "log", "md-5", + "mea", "percent-encoding", "quick-xml 0.38.4", - "reqsign", - "reqwest", + "reqsign-core", + "reqwest 0.13.4", "serde", "serde_json", - "sha2", "tokio", "url", "uuid", + "web-time", +] + +[[package]] +name = "opendal-layer-concurrent-limit" +version = "0.56.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "048b1b29c503263bdd80a9afe46a68cd02ea9bd361185b1feab4b151078998e9" +dependencies = [ + "futures", + "http 1.4.0", + "mea", + "opendal-core", +] + +[[package]] +name = "opendal-layer-logging" +version = "0.56.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2645adc988b12eda106e2679ae529facfbbaa868ceb706f6f8125c6af15c47b" +dependencies = [ + "log", + "opendal-core", +] + +[[package]] +name = "opendal-layer-retry" +version = "0.56.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4eac134ffa4ddda6131a640a84a5315996424b9416c85052f8c64c1a33b70ad4" +dependencies = [ + "backon", + "log", + "opendal-core", +] + +[[package]] +name = "opendal-layer-timeout" +version = "0.56.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "619586ab7480c2e3009f6d18eabab18957bc094778fd130bcc38924970a90f4c" +dependencies = [ + "opendal-core", + "tokio", +] + +[[package]] +name = "opendal-service-azblob" +version = "0.56.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7452bf3ec61cfd81ac9ad9ada17825931e9e371d44a045c6bfab9596c0a2ac3b" +dependencies = [ + "base64", + "bytes", + "http 1.4.0", + "log", + "opendal-core", + "opendal-service-azure-common", + "quick-xml 0.38.4", + "reqsign-azure-storage", + "reqsign-core", + "reqsign-file-read-tokio", + "serde", + "sha2 0.10.9", + "uuid", +] + +[[package]] +name = "opendal-service-azdls" +version = "0.56.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f9884c2d8cf8ba2bb077d79c877dac5863ba3bab9e2c9c1e41a2e0491404772" +dependencies = [ + "bytes", + "http 1.4.0", + "log", + "opendal-core", + "opendal-service-azure-common", + "quick-xml 0.38.4", + "reqsign-azure-storage", + "reqsign-core", + "reqsign-file-read-tokio", + "serde", + "serde_json", +] + +[[package]] +name = "opendal-service-azure-common" +version = "0.56.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ffb0e45d6c8dcf66ce2da20e241bcb80e6e540e109a4ff20f318f6c9b4c54e0c" +dependencies = [ + "http 1.4.0", + "opendal-core", +] + +[[package]] +name = "opendal-service-gcs" +version = "0.56.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70a49477a10163431896d106136117f5670717f9c9e49cf6f710528800c6633a" +dependencies = [ + "async-trait", + "bytes", + "http 1.4.0", + "log", + "opendal-core", + "percent-encoding", + "quick-xml 0.38.4", + "reqsign-core", + "reqsign-file-read-tokio", + "reqsign-google", + "serde", + "serde_json", + "tokio", +] + +[[package]] +name = "opendal-service-hf" +version = "0.56.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b2ab7a2a8a11dfe257ef4db5c0de798acbcd0d6429c37382dad2154bc06a388" +dependencies = [ + "bytes", + "hf-xet", + "http 1.4.0", + "log", + "opendal-core", + "percent-encoding", + "reqwest 0.13.4", + "serde", + "serde_json", +] + +[[package]] +name = "opendal-service-oss" +version = "0.56.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29c8a917829ad06d21b639558532cb0101fe49b040d946d673a73018683fac05" +dependencies = [ + "bytes", + "http 1.4.0", + "log", + "opendal-core", + "quick-xml 0.38.4", + "reqsign-aliyun-oss", + "reqsign-core", + "reqsign-file-read-tokio", + "serde", +] + +[[package]] +name = "opendal-service-s3" +version = "0.56.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9dadddeb9bb50b0d30927dd914c298c4ddca47e4c1cfa7674d311f0cf9b051c8" +dependencies = [ + "base64", + "bytes", + "crc32c", + "http 1.4.0", + "log", + "md-5", + "opendal-core", + "quick-xml 0.38.4", + "reqsign-aws-v4", + "reqsign-core", + "reqsign-file-read-tokio", + "serde", + "url", ] [[package]] @@ -4781,6 +5282,15 @@ dependencies = [ "hashbrown 0.14.5", ] +[[package]] +name = "os_str_bytes" +version = "6.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2355d85b9a3786f481747ced0e0ff2ba35213a1f9bd406ed906554d7af805a1" +dependencies = [ + "memchr", +] + [[package]] name = "outref" version = "0.5.2" @@ -4815,7 +5325,7 @@ version = "0.9.12" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2621685985a2ebf1c516881c026032ac7deafcda1a2c9b7850dc81e3dfcb64c1" dependencies = [ - "cfg-if", + "cfg-if 1.0.4", "libc", "redox_syscall", "smallvec", @@ -4846,8 +5356,8 @@ version = "0.12.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f8ed6a7761f76e3b9f92dfb0a60a6a6477c61024b775147ff0973a02653abaf2" dependencies = [ - "digest", - "hmac", + "digest 0.10.7", + "hmac 0.12.1", ] [[package]] @@ -4921,7 +5431,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "89815c69d36021a140146f26659a81d6c2afa33d216d736dd4be5381a7362220" dependencies = [ "pest", - "sha2", + "sha2 0.10.9", ] [[package]] @@ -5033,7 +5543,7 @@ dependencies = [ "der", "pbkdf2", "scrypt", - "sha2", + "sha2 0.10.9", "spki", ] @@ -5248,9 +5758,9 @@ checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0" [[package]] name = "quick-xml" -version = "0.37.5" +version = "0.38.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "331e97a1af0bf59823e6eadffe373d7b27f485be8748f71471c662c1f269b7fb" +checksum = "b66c2058c55a409d601666cffe35f04333cf1013010882cec174a7467cd4e21c" dependencies = [ "memchr", "serde", @@ -5258,9 +5768,19 @@ dependencies = [ [[package]] name = "quick-xml" -version = "0.38.4" +version = "0.39.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b66c2058c55a409d601666cffe35f04333cf1013010882cec174a7467cd4e21c" +checksum = "cdcc8dd4e2f670d309a5f0e83fe36dfdc05af317008fea29144da1a2ac858e5e" +dependencies = [ + "memchr", + "serde", +] + +[[package]] +name = "quick-xml" +version = "0.40.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2474bd2e5029e7ccb6abb2ba48cf2383a333851dedf495901544281590c7da7f" dependencies = [ "memchr", "serde", @@ -5292,6 +5812,7 @@ version = "0.11.13" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f1906b49b0c3bc04b5fe5d86a77925ae6524a19b816ae38ce1e426255f1d8a31" dependencies = [ + "aws-lc-rs", "bytes", "getrandom 0.3.4", "lru-slab", @@ -5369,6 +5890,17 @@ dependencies = [ "rand_core 0.9.5", ] +[[package]] +name = "rand" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2e8e8bcc7961af1fdac401278c6a831614941f6164ee3bf4ce61b7edb162207" +dependencies = [ + "chacha20", + "getrandom 0.4.2", + "rand_core 0.10.1", +] + [[package]] name = "rand_chacha" version = "0.3.1" @@ -5407,6 +5939,12 @@ dependencies = [ "getrandom 0.3.4", ] +[[package]] +name = "rand_core" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "63b8176103e19a2643978565ca18b50549f6101881c443590420e4dc998a3c69" + [[package]] name = "rand_distr" version = "0.5.1" @@ -5480,6 +6018,15 @@ dependencies = [ "crossbeam-utils", ] +[[package]] +name = "redb" +version = "3.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ba239c1c1693315d3cc0e601db3b3965543afbf48c41730fdca2f069f510f4a" +dependencies = [ + "libc", +] + [[package]] name = "redox_syscall" version = "0.5.18" @@ -5556,84 +6103,220 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a" [[package]] -name = "reqsign" -version = "0.16.5" +name = "reqsign-aliyun-oss" +version = "3.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43451dbf3590a7590684c25fb8d12ecdcc90ed3ac123433e500447c7d77ed701" +checksum = "372266b4733756738eeb199a98188037d27a0989980e2600ae7ce1faf00a867d" dependencies = [ "anyhow", - "async-trait", - "base64", - "chrono", "form_urlencoded", - "getrandom 0.2.17", - "hex", - "hmac", - "home", "http 1.4.0", - "jsonwebtoken", "log", - "once_cell", "percent-encoding", - "quick-xml 0.37.5", - "rand 0.8.5", - "reqwest", - "rsa", + "reqsign-core", "rust-ini", "serde", "serde_json", - "sha1", - "sha2", - "tokio", ] [[package]] -name = "reqwest" -version = "0.12.28" +name = "reqsign-aws-v4" +version = "3.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eddd3ca559203180a307f12d114c268abf583f59b03cb906fd0b3ff8646c1147" +checksum = "7b75624bd8a466e37ddc0a7b6c33ac859a85347c153a916e1dd9d0b68338f74a" dependencies = [ - "base64", + "anyhow", "bytes", - "encoding_rs", - "futures-channel", - "futures-core", - "futures-util", - "h2 0.4.13", + "form_urlencoded", + "hex", "http 1.4.0", - "http-body 1.0.1", - "http-body-util", - "hyper 1.8.1", - "hyper-rustls 0.27.7", - "hyper-util", - "js-sys", "log", - "mime", - "mime_guess", "percent-encoding", - "pin-project-lite", - "quinn", - "rustls 0.23.36", - "rustls-native-certs", - "rustls-pki-types", + "quick-xml 0.40.1", + "reqsign-core", + "rust-ini", "serde", "serde_json", "serde_urlencoded", - "sync_wrapper", - "tokio", - "tokio-rustls 0.26.4", - "tokio-util", + "sha1", +] + +[[package]] +name = "reqsign-azure-storage" +version = "3.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62b96928e73ad984de1d99e382749d09e5dab7dd707b767974f7e40aa926b82f" +dependencies = [ + "anyhow", + "base64", + "bytes", + "form_urlencoded", + "http 1.4.0", + "log", + "pem", + "percent-encoding", + "reqsign-core", + "rsa", + "serde", + "serde_json", + "sha1", +] + +[[package]] +name = "reqsign-core" +version = "3.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5fa5cb48808693614d1701fcd3db0b30fa292e0f18e122ae068b6d32eaeed3f" +dependencies = [ + "anyhow", + "base64", + "bytes", + "form_urlencoded", + "futures", + "hex", + "hmac 0.13.0", + "http 1.4.0", + "jiff", + "log", + "percent-encoding", + "rsa", + "serde", + "serde_json", + "sha1", + "sha2 0.11.0", + "windows-sys 0.61.2", +] + +[[package]] +name = "reqsign-file-read-tokio" +version = "3.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a4b6f3a3fd29ffcc99a90aec585a65217783badfd73acddf847b63ae683bda9" +dependencies = [ + "anyhow", + "reqsign-core", + "tokio", +] + +[[package]] +name = "reqsign-google" +version = "3.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eb215d0876a18b6bd9cdd380b589e5292aaa638ca15266de794b1122d898b6b2" +dependencies = [ + "form_urlencoded", + "http 1.4.0", + "log", + "percent-encoding", + "reqsign-aws-v4", + "reqsign-core", + "rsa", + "serde", + "serde_json", + "tokio", +] + +[[package]] +name = "reqwest" +version = "0.12.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eddd3ca559203180a307f12d114c268abf583f59b03cb906fd0b3ff8646c1147" +dependencies = [ + "base64", + "bytes", + "encoding_rs", + "futures-channel", + "futures-core", + "futures-util", + "h2 0.4.13", + "http 1.4.0", + "http-body 1.0.1", + "http-body-util", + "hyper 1.8.1", + "hyper-rustls 0.27.7", + "hyper-util", + "js-sys", + "log", + "mime", + "mime_guess", + "percent-encoding", + "pin-project-lite", + "quinn", + "rustls 0.23.36", + "rustls-native-certs", + "rustls-pki-types", + "serde", + "serde_json", + "serde_urlencoded", + "sync_wrapper", + "tokio", + "tokio-rustls 0.26.4", + "tokio-util", "tower", "tower-http", "tower-service", "url", "wasm-bindgen", "wasm-bindgen-futures", - "wasm-streams", + "wasm-streams 0.4.2", "web-sys", "webpki-roots", ] +[[package]] +name = "reqwest" +version = "0.13.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "219c5811de6525e5416c7d5d53bb656d3afdbc6c5af816e0802bcfa42dbdc1c3" +dependencies = [ + "base64", + "bytes", + "futures-core", + "futures-util", + "http 1.4.0", + "http-body 1.0.1", + "http-body-util", + "hyper 1.8.1", + "hyper-rustls 0.27.7", + "hyper-util", + "js-sys", + "log", + "percent-encoding", + "pin-project-lite", + "quinn", + "rustls 0.23.36", + "rustls-pki-types", + "rustls-platform-verifier", + "serde", + "serde_json", + "sync_wrapper", + "tokio", + "tokio-rustls 0.26.4", + "tokio-util", + "tower", + "tower-http", + "tower-service", + "url", + "wasm-bindgen", + "wasm-bindgen-futures", + "wasm-streams 0.5.0", + "web-sys", +] + +[[package]] +name = "reqwest-middleware" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07bc3f1384cffa4f274dad2d4ddd73aed32fed8f786d96c6be8aa4e5fd3c3b58" +dependencies = [ + "anyhow", + "async-trait", + "http 1.4.0", + "reqwest 0.13.4", + "thiserror", + "tower-service", +] + [[package]] name = "ring" version = "0.17.14" @@ -5641,7 +6324,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a4689e6c2294d81e88dc6261c768b63bc4fcdb852be6d1352498b114f61383b7" dependencies = [ "cc", - "cfg-if", + "cfg-if 1.0.4", "getrandom 0.2.17", "libc", "untrusted", @@ -5650,9 +6333,9 @@ dependencies = [ [[package]] name = "roaring" -version = "0.11.3" +version = "0.11.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ba9ce64a8f45d7fc86358410bb1a82e8c987504c0d4900e9141d69a9f26c885" +checksum = "1dedc5658c6ecb3bdb5ef5f3295bb9253f42dcf3fd1402c03f6b1f7659c3c4a9" dependencies = [ "bytemuck", "byteorder", @@ -5664,15 +6347,15 @@ version = "0.9.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b8573f03f5883dcaebdfcf4725caa1ecb9c15b2ef50c43a07b816e06799bb12d" dependencies = [ - "const-oid", - "digest", + "const-oid 0.9.6", + "digest 0.10.7", "num-bigint-dig", "num-integer", "num-traits", "pkcs1", "pkcs8", "rand_core 0.6.4", - "sha2", + "sha2 0.10.9", "signature", "spki", "subtle", @@ -5685,7 +6368,7 @@ version = "0.21.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "796e8d2b6696392a43bea58116b667fb4c29727dc5abd27d6acf338bb4f688c7" dependencies = [ - "cfg-if", + "cfg-if 1.0.4", "ordered-multimap", ] @@ -5778,15 +6461,6 @@ dependencies = [ "security-framework", ] -[[package]] -name = "rustls-pemfile" -version = "2.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dce314e5fee3f39953d46bb63bb8a46d40c2f8fb7cc5a3b6cab2bde9721d6e50" -dependencies = [ - "rustls-pki-types", -] - [[package]] name = "rustls-pki-types" version = "1.14.0" @@ -5797,6 +6471,33 @@ dependencies = [ "zeroize", ] +[[package]] +name = "rustls-platform-verifier" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26d1e2536ce4f35f4846aa13bff16bd0ff40157cdb14cc056c7b14ba41233ba0" +dependencies = [ + "core-foundation 0.10.1", + "core-foundation-sys", + "jni", + "log", + "once_cell", + "rustls 0.23.36", + "rustls-native-certs", + "rustls-platform-verifier-android", + "rustls-webpki 0.103.9", + "security-framework", + "security-framework-sys", + "webpki-root-certs", + "windows-sys 0.61.2", +] + +[[package]] +name = "rustls-platform-verifier-android" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f87165f0995f63a9fbeea62b64d10b4d9d8e78ec6d7d51fb2125fda7bb36788f" + [[package]] name = "rustls-webpki" version = "0.101.7" @@ -5843,6 +6544,12 @@ version = "1.0.23" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9774ba4a74de5f7b1c1451ed6cd5285a32eddb5cccb8cc655a4e50009e06477f" +[[package]] +name = "safe-transmute" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3944826ff8fa8093089aba3acb4ef44b9446a99a16f3bf4e74af3f77d340ab7d" + [[package]] name = "salsa20" version = "0.10.2" @@ -5923,7 +6630,7 @@ checksum = "0516a385866c09368f0b5bcd1caff3366aace790fcd46e2bb032697bb172fd1f" dependencies = [ "pbkdf2", "salsa20", - "sha2", + "sha2 0.10.9", ] [[package]] @@ -5949,7 +6656,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b7f4bc775c73d9a02cde8bf7b2ec4c9d12743edf609006c7facc23998404cd1d" dependencies = [ "bitflags", - "core-foundation", + "core-foundation 0.10.1", "core-foundation-sys", "libc", "security-framework-sys", @@ -6127,13 +6834,13 @@ dependencies = [ [[package]] name = "sha1" -version = "0.10.6" +version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3bf829a2d51ab4a5ddf1352d8470c140cadc8301b2ae1789db023f01cedd6ba" +checksum = "aacc4cc499359472b4abe1bf11d0b12e688af9a805fa5e3016f9a386dc2d0214" dependencies = [ - "cfg-if", - "cpufeatures", - "digest", + "cfg-if 1.0.4", + "cpufeatures 0.3.0", + "digest 0.11.3", ] [[package]] @@ -6142,9 +6849,30 @@ version = "0.10.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283" dependencies = [ - "cfg-if", - "cpufeatures", - "digest", + "cfg-if 1.0.4", + "cpufeatures 0.2.17", + "digest 0.10.7", + "sha2-asm", +] + +[[package]] +name = "sha2" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "446ba717509524cb3f22f17ecc096f10f4822d76ab5c0b9822c5f9c284e825f4" +dependencies = [ + "cfg-if 1.0.4", + "cpufeatures 0.3.0", + "digest 0.11.3", +] + +[[package]] +name = "sha2-asm" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b845214d6175804686b2bd482bcffe96651bb2d1200742b712003504a2dac1ab" +dependencies = [ + "cc", ] [[package]] @@ -6153,7 +6881,7 @@ version = "0.10.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "75872d278a8f37ef87fa0ddbda7802605cb18344497949862c0d4dcb291eba60" dependencies = [ - "digest", + "digest 0.10.7", "keccak", ] @@ -6166,6 +6894,17 @@ dependencies = [ "lazy_static", ] +[[package]] +name = "shellexpand" +version = "3.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32824fab5e16e6c4d86dc1ba84489390419a39f97699852b66480bb87d297ed8" +dependencies = [ + "bstr", + "dirs", + "os_str_bytes", +] + [[package]] name = "shlex" version = "1.3.0" @@ -6188,7 +6927,7 @@ version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "77549399552de45a898a580c1b41d445bf730df867cc44e6c0233bbc4b8329de" dependencies = [ - "digest", + "digest 0.10.7", "rand_core 0.6.4", ] @@ -6199,22 +6938,20 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e320a6c5ad31d271ad523dcf3ad13e2767ad8b1cb8f047f75a8aeaf8da139da2" [[package]] -name = "simdutf8" -version = "0.1.5" +name = "simd_cesu8" +version = "1.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3a9fe34e3e7a50316060351f37187a3f546bce95496156754b601a5fa71b76e" +checksum = "94f90157bb87cddf702797c5dadfa0be7d266cdf49e22da2fcaa32eff75b2c33" +dependencies = [ + "rustc_version", + "simdutf8", +] [[package]] -name = "simple_asn1" -version = "0.6.4" +name = "simdutf8" +version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0d585997b0ac10be3c5ee635f1bab02d512760d14b7c468801ac8a01d9ae5f1d" -dependencies = [ - "num-bigint", - "num-traits", - "thiserror", - "time", -] +checksum = "e3a9fe34e3e7a50316060351f37187a3f546bce95496156754b601a5fa71b76e" [[package]] name = "siphasher" @@ -6335,12 +7072,28 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "08d74a23609d509411d10e2176dc2a4346e3b4aea2e7b1869f19fdedbc71c013" dependencies = [ "cc", - "cfg-if", + "cfg-if 1.0.4", "libc", "psm", "windows-sys 0.59.0", ] +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + +[[package]] +name = "statrs" +version = "0.18.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a3fe7c28c6512e766b0874335db33c94ad7b8f9054228ae1c2abd47ce7d335e" +dependencies = [ + "approx", + "num-traits", +] + [[package]] name = "std_prelude" version = "0.2.12" @@ -6399,6 +7152,12 @@ version = "2.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" +[[package]] +name = "symlink" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7973cce6668464ea31f176d85b13c7ab3bba2cb3b77a2ed26abd7801688010a" + [[package]] name = "syn" version = "1.0.109" @@ -6441,6 +7200,41 @@ dependencies = [ "syn 2.0.117", ] +[[package]] +name = "sysinfo" +version = "0.38.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92ab6a2f8bfe508deb3c6406578252e491d299cbbf3bc0529ecc3313aee4a52f" +dependencies = [ + "libc", + "memchr", + "ntapi", + "objc2-core-foundation", + "objc2-io-kit", + "windows", +] + +[[package]] +name = "system-configuration" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a13f3d0daba03132c0aa9767f98351b3488edc2c100cda2d2ec2b04f3d8d3c8b" +dependencies = [ + "bitflags", + "core-foundation 0.9.4", + "system-configuration-sys", +] + +[[package]] +name = "system-configuration-sys" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e1d1b10ced5ca923a1fcb8d03e96b8d3268065d724548c0211415ff6ac6bac4" +dependencies = [ + "core-foundation-sys", + "libc", +] + [[package]] name = "tagptr" version = "0.2.0" @@ -6516,7 +7310,7 @@ version = "1.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f60246a4944f24f6e018aa17cdeffb7818b76356965d03b07d6a9886e8962185" dependencies = [ - "cfg-if", + "cfg-if 1.0.4", ] [[package]] @@ -6612,6 +7406,17 @@ dependencies = [ "syn 2.0.117", ] +[[package]] +name = "tokio-retry" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4a129d95275ebf4c493ec53bf0f8cd95f5ac161bc4f381700809a54f595d4470" +dependencies = [ + "pin-project-lite", + "rand 0.10.1", + "tokio", +] + [[package]] name = "tokio-rustls" version = "0.24.1" @@ -6721,6 +7526,19 @@ dependencies = [ "tracing-core", ] +[[package]] +name = "tracing-appender" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "050686193eb999b4bb3bc2acfa891a13da00f79734704c4b8b4ef1a10b368a3c" +dependencies = [ + "crossbeam-channel", + "symlink", + "thiserror", + "time", + "tracing-subscriber", +] + [[package]] name = "tracing-attributes" version = "0.1.31" @@ -6763,6 +7581,16 @@ dependencies = [ "tracing-core", ] +[[package]] +name = "tracing-serde" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "704b1aeb7be0d0a84fc9828cae51dab5970fee5088f83d1dd7ee6f6246fc6ff1" +dependencies = [ + "serde", + "tracing-core", +] + [[package]] name = "tracing-subscriber" version = "0.3.22" @@ -6773,12 +7601,15 @@ dependencies = [ "nu-ansi-term", "once_cell", "regex-automata", + "serde", + "serde_json", "sharded-slab", "smallvec", "thread_local", "tracing", "tracing-core", "tracing-log", + "tracing-serde", ] [[package]] @@ -6804,9 +7635,15 @@ checksum = "6af6ae20167a9ece4bcb41af5b80f8a1f1df981f6391189ce00fd257af04126a" [[package]] name = "typenum" -version = "1.19.0" +version = "1.20.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "562d481066bde0658276a35467c4af00bdc6ee726305698a55b86e61d7ad82bb" +checksum = "b6f5e870be6c3b371b77fe0ee0bafb859fa4964b4404c27de1d380043c4dda20" + +[[package]] +name = "typewit" +version = "1.15.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "214ca0b2191785cbc06209b9ca1861e048e39b5ba33574b3cedd58363d5bb5f6" [[package]] name = "ucd-trie" @@ -7027,6 +7864,15 @@ version = "0.11.1+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" +[[package]] +name = "wasi" +version = "0.14.7+wasi-0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "883478de20367e224c0090af9cf5f9fa85bed63a95c1abf3afc5c083ebc06e8c" +dependencies = [ + "wasip2", +] + [[package]] name = "wasip2" version = "1.0.2+wasi-0.2.9" @@ -7045,13 +7891,22 @@ dependencies = [ "wit-bindgen", ] +[[package]] +name = "wasite" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "66fe902b4a6b8028a753d5424909b764ccf79b7a209eac9bf97e59cda9f71a42" +dependencies = [ + "wasi 0.14.7+wasi-0.2.4", +] + [[package]] name = "wasm-bindgen" version = "0.2.108" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "64024a30ec1e37399cf85a7ffefebdb72205ca1c972291c51512360d90bd8566" dependencies = [ - "cfg-if", + "cfg-if 1.0.4", "once_cell", "rustversion", "wasm-bindgen-macro", @@ -7064,7 +7919,7 @@ version = "0.4.58" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "70a6e77fd0ae8029c9ea0063f87c46fde723e7d887703d74ad2616d792e51e6f" dependencies = [ - "cfg-if", + "cfg-if 1.0.4", "futures-util", "js-sys", "once_cell", @@ -7139,6 +7994,19 @@ dependencies = [ "web-sys", ] +[[package]] +name = "wasm-streams" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d1ec4f6517c9e11ae630e200b2b65d193279042e28edd4a2cda233e46670bbb" +dependencies = [ + "futures-util", + "js-sys", + "wasm-bindgen", + "wasm-bindgen-futures", + "web-sys", +] + [[package]] name = "wasmparser" version = "0.244.0" @@ -7171,6 +8039,15 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "webpki-root-certs" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f31141ce3fc3e300ae89b78c0dd67f9708061d1d2eda54b8209346fd6be9a92c" +dependencies = [ + "rustls-pki-types", +] + [[package]] name = "webpki-roots" version = "1.0.6" @@ -7180,6 +8057,35 @@ dependencies = [ "rustls-pki-types", ] +[[package]] +name = "whoami" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6a5b12f9df4f978d2cfdb1bd3bac52433f44393342d7ee9c25f5a1c14c0f45d" +dependencies = [ + "libc", + "libredox", + "objc2-system-configuration", + "wasite", + "web-sys", +] + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + [[package]] name = "winapi-util" version = "0.1.11" @@ -7189,6 +8095,33 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "windows" +version = "0.62.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "527fadee13e0c05939a6a05d5bd6eec6cd2e3dbd648b9f8e447c6518133d8580" +dependencies = [ + "windows-collections", + "windows-core", + "windows-future", + "windows-numerics", +] + +[[package]] +name = "windows-collections" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23b2d95af1a8a14a3c7367e1ed4fc9c20e0a26e79551b1454d72583c97cc6610" +dependencies = [ + "windows-core", +] + [[package]] name = "windows-core" version = "0.62.2" @@ -7202,6 +8135,17 @@ dependencies = [ "windows-strings", ] +[[package]] +name = "windows-future" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1d6f90251fe18a279739e78025bd6ddc52a7e22f921070ccdc67dde84c605cb" +dependencies = [ + "windows-core", + "windows-link", + "windows-threading", +] + [[package]] name = "windows-implement" version = "0.60.2" @@ -7230,6 +8174,27 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" +[[package]] +name = "windows-numerics" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e2e40844ac143cdb44aead537bbf727de9b044e107a0f1220392177d15b0f26" +dependencies = [ + "windows-core", + "windows-link", +] + +[[package]] +name = "windows-registry" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "02752bf7fbdcce7f2a27a742f798510f3e5ad88dbe84871e5168e2120c3d5720" +dependencies = [ + "windows-link", + "windows-result", + "windows-strings", +] + [[package]] name = "windows-result" version = "0.4.1" @@ -7317,6 +8282,15 @@ dependencies = [ "windows_x86_64_msvc 0.53.1", ] +[[package]] +name = "windows-threading" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3949bd5b99cafdf1c7ca86b43ca564028dfe27d66958f2470940f73d86d75b37" +dependencies = [ + "windows-link", +] + [[package]] name = "windows_aarch64_gnullvm" version = "0.52.6" @@ -7516,6 +8490,153 @@ dependencies = [ "tap", ] +[[package]] +name = "xet-client" +version = "1.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3e1e496dcbe6a09017acdfaf48e1a646735e7ff5b2a49e2c7e081cca77a59bc8" +dependencies = [ + "anyhow", + "async-trait", + "base64", + "bytes", + "clap", + "crc32fast", + "futures", + "http 1.4.0", + "hyper 1.8.1", + "lazy_static", + "more-asserts", + "rand 0.10.1", + "redb", + "reqwest 0.13.4", + "reqwest-middleware", + "serde", + "serde_json", + "serde_repr", + "statrs", + "tempfile", + "thiserror", + "tokio", + "tokio-retry", + "tracing", + "tracing-subscriber", + "url", + "urlencoding", + "web-time", + "xet-core-structures", + "xet-runtime", +] + +[[package]] +name = "xet-core-structures" +version = "1.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb838aa8eb67d730af301584cf003caad407487606058292a6750711b603fbee" +dependencies = [ + "async-trait", + "base64", + "blake3", + "bytemuck", + "bytes", + "clap", + "countio", + "csv", + "futures", + "futures-util", + "getrandom 0.4.2", + "heapify", + "itertools 0.14.0", + "lazy_static", + "lz4_flex", + "more-asserts", + "rand 0.10.1", + "regex", + "safe-transmute", + "serde", + "static_assertions", + "tempfile", + "thiserror", + "tokio", + "tokio-util", + "tracing", + "uuid", + "web-time", + "xet-runtime", +] + +[[package]] +name = "xet-data" +version = "1.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67fd409bef621411a9d9013798540bb8036cb2678f03ab39af89a5e88034ed8c" +dependencies = [ + "anyhow", + "async-trait", + "bytes", + "chrono", + "clap", + "gearhash", + "http 1.4.0", + "itertools 0.14.0", + "lazy_static", + "more-asserts", + "rand 0.10.1", + "serde", + "serde_json", + "sha2 0.10.9", + "tempfile", + "thiserror", + "tokio", + "tokio-util", + "tracing", + "url", + "uuid", + "walkdir", + "xet-client", + "xet-core-structures", + "xet-runtime", +] + +[[package]] +name = "xet-runtime" +version = "1.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "15d8f121c33866f7648b737abe70d0e2dd9c0af4ffdd7219207531d0283aa63d" +dependencies = [ + "anyhow", + "async-trait", + "bytes", + "chrono", + "colored", + "const-str", + "ctor", + "dirs", + "futures", + "git-version", + "humantime", + "konst", + "lazy_static", + "libc", + "more-asserts", + "oneshot", + "pin-project", + "rand 0.10.1", + "reqwest 0.13.4", + "serde", + "serde_json", + "shellexpand", + "sysinfo", + "thiserror", + "tokio", + "tokio-util", + "tracing", + "tracing-appender", + "tracing-subscriber", + "whoami", + "winapi", +] + [[package]] name = "xmlparser" version = "0.13.6" diff --git a/Cargo.toml b/Cargo.toml index 56cdde54..c442242c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -31,14 +31,14 @@ datafusion-common = "53" datafusion-expr = "53" datafusion-functions-aggregate = "53" -lance = { version = "6.0.1", default-features = false, features = ["aws"] } -lance-datafusion = "6.0.1" -lance-file = "6.0.1" -lance-index = "6.0.1" -lance-linalg = "6.0.1" -lance-namespace = "6.0.1" -lance-namespace-impls = "6.0.1" -lance-table = "6.0.1" +lance = { version = "7.0.0", default-features = false, features = ["aws"] } +lance-datafusion = "7.0.0" +lance-file = "7.0.0" +lance-index = "7.0.0" +lance-linalg = "7.0.0" +lance-namespace = "7.0.0" +lance-namespace-impls = "7.0.0" +lance-table = "7.0.0" ulid = "1" futures = "0.3" @@ -64,7 +64,7 @@ base64 = "0.22" ariadne = "0.4" regex = "1" reqwest = { version = "0.12", default-features = false, features = ["json", "rustls-tls"] } -object_store = { version = "0.12.5", default-features = false, features = ["aws", "fs"] } +object_store = { version = "0.13.2", default-features = false, features = ["aws", "fs"] } fail = "0.5" time = { version = "0.3", features = ["formatting"] } axum = { version = "0.8", features = ["json", "macros"] } diff --git a/crates/omnigraph/src/changes/mod.rs b/crates/omnigraph/src/changes/mod.rs index d4a3fe7a..2e9bc029 100644 --- a/crates/omnigraph/src/changes/mod.rs +++ b/crates/omnigraph/src/changes/mod.rs @@ -248,12 +248,12 @@ async fn diff_table_same_lineage( // Inserts + Updates: use _row_last_updated_at_version to find all rows // touched since Vf, then classify by checking whether the ID existed at Vf. // - // Why not _row_created_at_version for inserts: Lance's merge_insert stamps - // new rows with _row_created_at_version = dataset_creation_version (v1), - // not the merge_insert commit version. This makes _row_created_at_version - // unreliable for detecting inserts from merge_insert writes. Using - // _row_last_updated_at_version catches all touched rows regardless of - // write mode, and ID-set membership distinguishes inserts from updates. + // We key on _row_last_updated_at_version because one scan over it catches + // every row touched in the window — inserts and updates alike — regardless + // of write mode, and ID-set membership at Vf then distinguishes inserts from + // updates. (lance#6774 made merge_insert stamp new rows' _row_created_at_version + // with the commit version, so created_at became reliable too; last_updated + // stays the right key since it also covers updates.) if wants_inserts || wants_updates { let filter_sql = format!( "_row_last_updated_at_version > {} AND _row_last_updated_at_version <= {}", diff --git a/crates/omnigraph/src/db/manifest/namespace.rs b/crates/omnigraph/src/db/manifest/namespace.rs index 80d206f4..5e907ba8 100644 --- a/crates/omnigraph/src/db/manifest/namespace.rs +++ b/crates/omnigraph/src/db/manifest/namespace.rs @@ -10,7 +10,9 @@ use lance_namespace::models::{ }; use lance_namespace::{Error as LanceNamespaceError, LanceNamespace, NamespaceError}; use lance_table::io::commit::ManifestNamingScheme; -use object_store::{Error as ObjectStoreError, ObjectStore as _, PutMode, PutOptions, path::Path}; +use object_store::{ + Error as ObjectStoreError, ObjectStore as _, ObjectStoreExt, PutMode, PutOptions, path::Path, +}; use crate::error::{OmniError, Result}; diff --git a/crates/omnigraph/src/storage.rs b/crates/omnigraph/src/storage.rs index 1f96b39f..357f9906 100644 --- a/crates/omnigraph/src/storage.rs +++ b/crates/omnigraph/src/storage.rs @@ -9,7 +9,7 @@ use object_store::aws::AmazonS3Builder; use object_store::local::LocalFileSystem; use object_store::memory::InMemory; use object_store::path::Path as ObjectPath; -use object_store::{DynObjectStore, ObjectStore, PutMode, PutPayload}; +use object_store::{DynObjectStore, ObjectStore, ObjectStoreExt, PutMode, PutPayload}; use url::Url; use crate::error::{OmniError, Result}; diff --git a/crates/omnigraph/tests/lance_version_columns.rs b/crates/omnigraph/tests/lance_version_columns.rs index b9367b9c..4b734587 100644 --- a/crates/omnigraph/tests/lance_version_columns.rs +++ b/crates/omnigraph/tests/lance_version_columns.rs @@ -191,14 +191,16 @@ async fn lance_merge_insert_new_row_stamps_created_at_version() { let eve = rows.iter().find(|r| r.0 == "eve").unwrap(); eprintln!("Eve: created_at_version={}, v1={}, v2={}", eve.2, v1, v2); - // Lance behavior (as of 3.0.1): merge_insert stamps new rows with - // _row_created_at_version = dataset_creation_version (v1), NOT the - // merge_insert commit version (v2). This is why Omnigraph's change - // detection uses _row_last_updated_at_version + ID set membership - // to classify inserts vs updates, not _row_created_at_version alone. + // Lance behavior (7.0.0, lance#6774): merge_insert stamps new INSERT + // rows with _row_created_at_version = the commit version (v2). Earlier + // Lance used a fallback of the dataset creation version; #6774 changed + // it so created_at reflects when the row actually entered the dataset. + // Omnigraph's change detection keys on _row_last_updated_at_version + ID + // set membership (see changes/mod.rs), so this stamping change leaves + // insert-vs-update classification unaffected. assert_eq!( - eve.2, v1, - "Lance merge_insert stamps new rows with created_at = dataset creation version, not commit version" + eve.2, v2, + "Lance merge_insert stamps new rows with created_at = commit version (lance#6774)" ); assert_eq!( eve.3, v2, From f84efc49f6bf441fb070f3e18a6e22cd3a277ba7 Mon Sep 17 00:00:00 2001 From: Ragnor Comerford Date: Sun, 14 Jun 2026 19:04:07 +0200 Subject: [PATCH 2/6] fix(storage): pin WriteParams::auto_cleanup = None (lance#6755 default flip) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit lance#6755 flipped the WriteParams::auto_cleanup default from on (a full cleanup pass every 20th commit) to None. On 6.0.1 the on-by-default hook could silently GC versions that __manifest pins for snapshots/time-travel. OmniGraph owns cleanup explicitly (optimize.rs::cleanup_all_tables) and never set auto_cleanup, so it was relying on a default that is both wrong for our snapshot model and now changed upstream. Pin auto_cleanup: None explicitly at all 11 production WriteParams sites (table_store ×6, commit_graph ×2, recovery_audit ×1, manifest/graph ×2 — the __manifest + sub-table Create paths). Removes the dependency on a default-flag value and locks in the snapshot-safe behavior regardless of future upstream re-flips. Refs lance#6755. --- crates/omnigraph/src/db/commit_graph.rs | 2 ++ crates/omnigraph/src/db/manifest/graph.rs | 2 ++ crates/omnigraph/src/db/recovery_audit.rs | 1 + crates/omnigraph/src/table_store.rs | 6 ++++++ 4 files changed, 11 insertions(+) diff --git a/crates/omnigraph/src/db/commit_graph.rs b/crates/omnigraph/src/db/commit_graph.rs index 9531a64d..6bebe018 100644 --- a/crates/omnigraph/src/db/commit_graph.rs +++ b/crates/omnigraph/src/db/commit_graph.rs @@ -57,6 +57,7 @@ impl CommitGraph { mode: WriteMode::Create, enable_stable_row_ids: true, data_storage_version: Some(LanceFileVersion::V2_2), + auto_cleanup: None, ..Default::default() }; let dataset = Dataset::write(reader, &uri as &str, Some(params)) @@ -430,6 +431,7 @@ async fn create_commit_actor_dataset(root_uri: &str) -> Result { mode: WriteMode::Create, enable_stable_row_ids: true, data_storage_version: Some(LanceFileVersion::V2_2), + auto_cleanup: None, ..Default::default() }; match Dataset::write(reader, &uri as &str, Some(params)).await { diff --git a/crates/omnigraph/src/db/manifest/graph.rs b/crates/omnigraph/src/db/manifest/graph.rs index 6c414aa7..a805b968 100644 --- a/crates/omnigraph/src/db/manifest/graph.rs +++ b/crates/omnigraph/src/db/manifest/graph.rs @@ -31,6 +31,7 @@ pub(super) async fn init_manifest_graph( mode: WriteMode::Create, enable_stable_row_ids: true, data_storage_version: Some(LanceFileVersion::V2_2), + auto_cleanup: None, ..Default::default() }; let manifest_path = manifest_uri(root); @@ -127,6 +128,7 @@ async fn create_empty_dataset(uri: &str, schema: &SchemaRef) -> Result enable_stable_row_ids: true, data_storage_version: Some(LanceFileVersion::V2_2), allow_external_blob_outside_bases: true, + auto_cleanup: None, ..Default::default() }; Dataset::write(reader, uri, Some(params)) diff --git a/crates/omnigraph/src/db/recovery_audit.rs b/crates/omnigraph/src/db/recovery_audit.rs index 2aab6bcf..5d62f581 100644 --- a/crates/omnigraph/src/db/recovery_audit.rs +++ b/crates/omnigraph/src/db/recovery_audit.rs @@ -189,6 +189,7 @@ async fn create_recoveries_dataset(root_uri: &str) -> Result { mode: WriteMode::Create, enable_stable_row_ids: true, data_storage_version: Some(LanceFileVersion::V2_2), + auto_cleanup: None, ..Default::default() }; match Dataset::write(reader, &uri as &str, Some(params)).await { diff --git a/crates/omnigraph/src/table_store.rs b/crates/omnigraph/src/table_store.rs index 65123c00..3125f8e8 100644 --- a/crates/omnigraph/src/table_store.rs +++ b/crates/omnigraph/src/table_store.rs @@ -745,6 +745,7 @@ impl TableStore { let params = WriteParams { mode: WriteMode::Append, allow_external_blob_outside_bases: true, + auto_cleanup: None, ..Default::default() }; ds.append(reader, Some(params)) @@ -764,6 +765,7 @@ impl TableStore { let params = WriteParams { mode: WriteMode::Append, allow_external_blob_outside_bases: true, + auto_cleanup: None, ..Default::default() }; ds.append(reader, Some(params)) @@ -777,6 +779,7 @@ impl TableStore { enable_stable_row_ids: true, data_storage_version: Some(LanceFileVersion::V2_2), allow_external_blob_outside_bases: true, + auto_cleanup: None, ..Default::default() }; Dataset::write(reader, dataset_uri, Some(params)) @@ -867,6 +870,7 @@ impl TableStore { let params = WriteParams { mode: WriteMode::Append, allow_external_blob_outside_bases: true, + auto_cleanup: None, ..Default::default() }; let transaction = InsertBuilder::new(Arc::new(ds.clone())) @@ -1087,6 +1091,7 @@ impl TableStore { mode: WriteMode::Overwrite, enable_stable_row_ids: true, allow_external_blob_outside_bases: true, + auto_cleanup: None, ..Default::default() }; let transaction = InsertBuilder::new(Arc::new(ds.clone())) @@ -1503,6 +1508,7 @@ impl TableStore { enable_stable_row_ids: true, data_storage_version: Some(LanceFileVersion::V2_2), allow_external_blob_outside_bases: true, + auto_cleanup: None, ..Default::default() }; Dataset::write(reader, dataset_uri, Some(params)) From 4ed2e83032ccf94f66a3a71a810c0d68a6addbed Mon Sep 17 00:00:00 2001 From: Ragnor Comerford Date: Sun, 14 Jun 2026 19:04:08 +0200 Subject: [PATCH 3/6] test(lance): pin BTREE range-boundary correctness (lance#6796) lance#6796 (issue #6792) fixed a BTREE scalar-index range-query bound inclusiveness bug: `x <= hi AND x > lo` returned the wrong boundary row. Add lance_surface_guards.rs::btree_range_query_boundary_is_correct, which reproduces the exact #6792 shape (5 rows + an explicit BTREE drives the index path even on tiny data) and pins the corrected inclusive-<= / exclusive-> semantics. It turns red if a future Lance regression reintroduces the bug. OmniGraph today builds BTREE only on string @key columns and queries them by equality/IN, so its current patterns do not hit this; the guard protects any future BTREE-range path (BTREE-on-properties, range-on-key). Refs lance#6796. --- .../omnigraph/tests/lance_surface_guards.rs | 77 +++++++++++++++++++ 1 file changed, 77 insertions(+) diff --git a/crates/omnigraph/tests/lance_surface_guards.rs b/crates/omnigraph/tests/lance_surface_guards.rs index 370f9e76..54fd7f7e 100644 --- a/crates/omnigraph/tests/lance_surface_guards.rs +++ b/crates/omnigraph/tests/lance_surface_guards.rs @@ -541,3 +541,80 @@ async fn fragment_deletion_metadata_is_available() { per-fragment deletions and would need to read the deletion vector.", ); } + +// --- Guard 14: BTREE scalar-index range-boundary correctness (lance#6796) ---- +// +// lance#6796 (issue #6792) fixed a BTREE range-query bound-inclusiveness bug: +// `price <= 10 AND price > 5` returned the wrong boundary row (5.0 instead of +// 10.0). OmniGraph builds BTREE scalar indexes (`ensure_indices`) and pushes +// range filters, so it was exposed on 6.0.1. This reproduces the exact #6792 +// shape (5 rows + an explicit BTREE drives the index path even on tiny data, +// per the upstream repro) and pins the corrected inclusive-`<=` / exclusive-`>` +// semantics. It turns red if a future Lance regression reintroduces the bug. +#[tokio::test] +async fn btree_range_query_boundary_is_correct() { + use arrow_array::Float64Array; + use futures::TryStreamExt; + + let dir = tempfile::tempdir().unwrap(); + let uri = dir.path().join("guard14.lance"); + let schema = Arc::new(Schema::new(vec![ + Field::new("id", DataType::Utf8, false), + Field::new("price", DataType::Float64, false), + ])); + let batch = RecordBatch::try_new( + schema.clone(), + vec![ + Arc::new(StringArray::from(vec!["a", "b", "c", "d", "e"])), + Arc::new(Float64Array::from(vec![1.0, 5.0, 10.0, 15.0, 20.0])), + ], + ) + .unwrap(); + let reader = RecordBatchIterator::new(vec![Ok(batch)], schema); + let params = WriteParams { + mode: WriteMode::Create, + enable_stable_row_ids: true, + data_storage_version: Some(LanceFileVersion::V2_2), + ..Default::default() + }; + let mut ds = Dataset::write(reader, uri.to_str().unwrap(), Some(params)) + .await + .unwrap(); + + // Build the BTREE on the numeric column so the range filter resolves through + // the scalar index (the path lance#6796 fixed). + ds.create_index_builder(&["price"], IndexType::BTree, &ScalarIndexParams::default()) + .replace(true) + .await + .unwrap(); + + let mut scanner = ds.scan(); + scanner.filter("price <= 10.0 AND price > 5.0").unwrap(); + let batches: Vec = scanner + .try_into_stream() + .await + .unwrap() + .try_collect() + .await + .unwrap(); + let mut got: Vec = Vec::new(); + for b in &batches { + let col = b + .column_by_name("price") + .unwrap() + .as_any() + .downcast_ref::() + .unwrap(); + for i in 0..col.len() { + got.push(col.value(i)); + } + } + got.sort_by(|a, b| a.partial_cmp(b).unwrap()); + assert_eq!( + got, + vec![10.0], + "BTREE range `price <= 10 AND price > 5` must return exactly [10.0] \ + (lance#6796 / issue #6792 boundary fix); got {got:?}. If this regressed, \ + Lance reintroduced the range-bound inclusiveness bug.", + ); +} From 29d61d47a5dfdac946d9453727d9b2fb0259ec6b Mon Sep 17 00:00:00 2001 From: Ragnor Comerford Date: Sun, 14 Jun 2026 19:04:16 +0200 Subject: [PATCH 4/6] docs(dev): align Lance docs + invariants to 7.0.0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - docs/dev/lance.md: new 2026-06-14 alignment stanza for the 6.0.1 → 7.0.0 bump (object_store ObjectStoreExt move, roaring 0.11.4, #6774/#6796/#6755 behavior, #6658 shipped → MR-A unblocked but separate, #6666 + blob compaction still open); prior 6.0.1 stanza demoted to historical. - AGENTS.md: storage substrate 6.x → 7.x (line + architecture diagram). - docs/dev/invariants.md: deletes/vector known gap updated — the staged two-phase delete API (lance#6658) now exists and MR-A is unblocked, but delete_where stays inline and D2 stays in place until the migration lands; create_vector_index still gated on lance#6666. --- AGENTS.md | 4 ++-- docs/dev/invariants.md | 17 +++++++++++------ docs/dev/lance.md | 19 ++++++++++++++++--- 3 files changed, 29 insertions(+), 11 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index 065e28aa..92447fdf 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -18,7 +18,7 @@ Tools that support `@`-imports (Claude Code) auto-include all three files via th **Version surveyed:** 0.7.0 **Workspace crates:** `omnigraph-compiler`, `omnigraph` (engine), `omnigraph-policy`, `omnigraph-api-types` (shared HTTP wire DTOs), `omnigraph-cluster`, `omnigraph-cli`, `omnigraph-server` -**Storage substrate:** Lance 6.x (columnar, versioned, branchable) +**Storage substrate:** Lance 7.x (columnar, versioned, branchable) **License:** MIT **Toolchain:** Rust stable, edition 2024 @@ -53,7 +53,7 @@ CLI (omnigraph) HTTP Server (omnigraph-server, Axum) omnigraph (engine) ── ManifestCoordinator, CommitGraph, RunRegistry, GraphIndex (CSR/CSC), exec │ ▼ - Lance 6.x ── columnar Arrow, fragments, per-dataset versions/branches, indexes + Lance 7.x ── columnar Arrow, fragments, per-dataset versions/branches, indexes │ ▼ Object store (file / s3 / RustFS / MinIO / S3-compat) diff --git a/docs/dev/invariants.md b/docs/dev/invariants.md index a0bcc6de..4b6c24cb 100644 --- a/docs/dev/invariants.md +++ b/docs/dev/invariants.md @@ -132,13 +132,18 @@ them explicit. new writer cannot couple a write with a HEAD advance through the default surface. The dead legacy methods (`append_batch` on the trait, `merge_insert_batch{,es}`, `create_{btree,inverted}_index`) were removed. The - remaining residuals are `delete_where` (gated on MR-A — Lance v7.x bump) - and `create_vector_index` (gated on Lance #6666); see - [lance.md](lance.md) and [writes.md](writes.md). New write paths should use - the staged shape unless a documented Lance blocker applies. + remaining residuals are `delete_where` and `create_vector_index`. The Lance + 6.0.1 → 7.0.0 bump landed, so the staged two-phase delete API + (`DeleteBuilder::execute_uncommitted`, Lance #6658) is now available and MR-A + is unblocked — but the migration itself is still pending, so `delete_where` + stays inline for now. `create_vector_index` remains gated on Lance #6666 + (still open). See [lance.md](lance.md) and [writes.md](writes.md). New write + paths should use the staged shape unless a documented Lance blocker applies. - **Deletes and vector indexes:** `delete_where` and vector index creation still - advance Lance HEAD inline because the required public Lance APIs are missing. - Keep D2 and recovery coverage in place until those residuals are removed. + advance Lance HEAD inline. The public delete two-phase API now exists (Lance + #6658 shipped in 7.0.0), so the delete residual is unblocked pending the MR-A + migration; vector index creation is still blocked (Lance #6666 open). Keep D2 + and recovery coverage in place until those residuals are removed. - **Blob-column compaction:** Lance `compact_files` mis-decodes blob-v2 columns under its forced `BlobHandling::AllBinary` read ("more fields in the schema than provided column indices"), so `optimize` skips any table with a `Blob` diff --git a/docs/dev/lance.md b/docs/dev/lance.md index a4e311f1..d435aff0 100644 --- a/docs/dev/lance.md +++ b/docs/dev/lance.md @@ -156,7 +156,22 @@ If a future need pulls one of these into scope, add a row to the matching domain When Lance ships a major release that changes any of the above (file format bump, new index type, transaction semantics change, new branching primitive), refresh this index in the same change as the omnigraph upgrade. Stale Lance pointers are worse than no pointers. -### Last alignment audit: 2026-05-22 (Lance 6.0.1 upstream; omnigraph pinned at 6.0.1) +### Last alignment audit: 2026-06-14 (Lance 7.0.0 upstream; omnigraph pinned at 7.0.0) + +Migration from Lance 6.0.1 → 7.0.0 landed in this cycle. **Arrow stayed 58, DataFusion stayed 53** (no change) — the only transitive bump is `object_store` 0.12.5 → 0.13.2. 141 upstream commits reviewed (6.0.1 → 7.0.0); no fixes lost (the 6.0.x release-branch backports are all forward-ported into 7.0.0). Behavior-affecting findings: + +- **object_store 0.13 moved convenience methods behind a new `ObjectStoreExt` trait** (`get`/`put`/`head`/`rename`/`delete`; `list`/`list_with_delimiter`/`put_opts` stay on the core `ObjectStore` trait). Fix = add `use object_store::ObjectStoreExt;` to `storage.rs` and `db/manifest/namespace.rs`; no call-site changes. Mirrors Lance's own migration in PR #6672. The local-FS `PutMode::Update` gap is unchanged (still unimplemented upstream), so `storage.rs::write_text_if_match`'s local content-token emulation stays. +- **`roaring` must be pinned to 0.11.4** (`cargo update -p roaring --precise 0.11.4`). Lance 7.0.0's `UpdatedFragmentOffsets` newtype (PR #6650) derives `Eq` over `HashMap`, which needs `RoaringBitmap: Eq` — added only in roaring 0.11.4 (roaring-rs PR #341). Lance's loose `roaring = "0.11"` constraint otherwise resolves the broken 0.11.3 and **lance itself fails to compile** (`RoaringBitmap: Eq is not satisfied`). roaring is transitive (no direct workspace dep); the pin lives only in `Cargo.lock`. +- **`_row_created_at_version` for merge-insert INSERT rows now = the commit version** (PR #6774; was a fallback of 1 / dataset-creation version). Flipped `lance_version_columns.rs::lance_merge_insert_new_row_stamps_created_at_version` to assert `== v2`. Production change-detection keys on `_row_last_updated_at_version` + ID-set membership, so classification logic is unaffected (the `changes/mod.rs` rationale comment was corrected). +- **BTREE range-query bound inclusiveness fixed** (PR #6796, issue #6792): `x <= hi AND x > lo` returned the wrong boundary row on 6.0.1. omnigraph today builds BTREE only on string `@key` columns (`id`/`src`/`dst`) and queries them by equality/IN, not range, so its *current* query patterns almost certainly never hit this bug — but the corrected boundary semantics are a contract we rely on the moment a BTREE-range path appears (BTREE-on-properties via the index-type tickets, or a range-on-key query). Pinned by `lance_surface_guards.rs::btree_range_query_boundary_is_correct` (reproduces #6792's 5-row + BTREE shape). +- **`WriteParams::auto_cleanup` default flipped from on (every-20-commits) to `None`** (PR #6755). On 6.0.1 the on-by-default hook could GC versions the `__manifest` pins for snapshots/time-travel. omnigraph owns cleanup explicitly (`optimize.rs::cleanup_all_tables`) and now pins `auto_cleanup: None` at all 11 production `WriteParams` sites — no dependency on the upstream default, and the snapshot-safe behavior locked in regardless of future re-flips. +- **Lance #6658 SHIPPED in 7.0.0** (`DeleteBuilder::execute_uncommitted`, exposed via PR #6781) → MR-A (migrate `delete_where` to the staged two-phase API, retire the parse-time D2 rule) is now **unblocked**, tracked separately (dev-graph `iss-950`). The bump itself keeps `delete_where` inline; the `_compile_delete_result_field_shape` guard is left untouched until MR-A. +- **Still NOT fixed in 7.0.0:** vector-index two-phase (Lance #6666 open) — `create_vector_index` inline residual retained; blob-column compaction — `compact_files_still_fails_on_blob_columns` guard still red on a fix, `optimize` still skips blob tables behind `LANCE_SUPPORTS_BLOB_COMPACTION`. +- **No Lance-API surface omnigraph uses changed 6.0.1 → 7.0.0** (verified by a clean engine build; the only compile break was object_store). `CleanupPolicy`, `WriteParams` (apart from the `auto_cleanup` default), `CompactionOptions`, the namespace models (resolved via `lance-namespace-reqwest-client` 0.7.7, unchanged across the bump), `Operation`, `ManifestLocation`, and `MergeInsertBuilder` shapes are all stable. + +Bump this date stanza on the next alignment pass. + +### Prior alignment audit: 2026-05-22 (Lance 6.0.1 upstream; omnigraph pinned at 6.0.1) Migration from Lance 4.0.0 → 6.0.1 landed in this cycle (DataFusion 52 → 53, Arrow 57 → 58, lance-tokenizer 6.0.1 added, tantivy* removed). Direct 4 → 6 jump; v5.x was not used as an intermediate (rationale in `~/.claude/plans/shimmering-percolating-duckling.md`). Behavior-affecting findings: @@ -179,5 +194,3 @@ Migration from Lance 4.0.0 → 6.0.1 landed in this cycle (DataFusion 52 → 53, - **Lance blob-v2 `compact_files` bug** (no public issue found as of 2026-06): `compact_files` disables binary-copy for blob datasets and forces `BlobHandling::AllBinary` on the read side; the v2.1+ structural decoder then mis-counts column infos for the blob-v2 struct and fails with `Invalid user input: there were more fields in the schema than provided column indices / infos` (`lance-encoding/src/decoder.rs::ColumnInfoIter::expect_next`). This fails even a pristine uniform-V2_2 multi-fragment blob table; vector/list/scalar/ragged columns and mixed file versions all compact fine. Reads/queries use descriptor handling (`BlobHandling::default()`) and are unaffected. `optimize` skips blob-bearing tables behind `LANCE_SUPPORTS_BLOB_COMPACTION = false` (`db/omnigraph/optimize.rs`), reporting `SkipReason::BlobColumnsUnsupportedByLance`. Pinned by `lance_surface_guards.rs::compact_files_still_fails_on_blob_columns`, which turns red when the bug is fixed → flip the gate, remove the skip branch + the `maintenance.rs::optimize_skips_blob_table_and_reports_skip` skip assertions. Surface guards added: `crates/omnigraph/tests/lance_surface_guards.rs` (10 named guards; 5 runtime + 5 compile-only). Future Lance bumps re-run this file first as the smoke check. Two additional guards from the original plan deferred to follow-up (`manifest_cas_returns_row_level_contention_variant` needs full publisher-race harness; `table_version_metadata_byte_compatible_with_v4` needs `pub(crate)` reach extension). - -Bump this date stanza on the next alignment pass. From 90a202871b680bf2b41debfae3a83b93d33c49bc Mon Sep 17 00:00:00 2001 From: Ragnor Comerford Date: Sun, 14 Jun 2026 19:57:50 +0200 Subject: [PATCH 5/6] fix(storage): skip Lance auto-cleanup on commit paths for legacy datasets MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses PR #229 review (Codex P1). `WriteParams::auto_cleanup` is create-time config with no effect on existing datasets (Lance write.rs docs), so the previous `auto_cleanup: None` change alone did NOT protect graphs created before the v7 bump: 6.0.1 defaulted auto_cleanup ON, leaving `lance.auto_cleanup.*` config on those datasets, and Lance's per-commit hook (io/commit.rs: `if !commit_config.skip_auto_cleanup`) fires off that stored config — so omnigraph's own writes would GC versions the __manifest pins for snapshots/time-travel. Skip the hook on every commit path, covering new and legacy datasets alike: - commit_staged: CommitBuilder::with_skip_auto_cleanup(true) — the staged data path. - __manifest publisher: MergeInsertBuilder::skip_auto_cleanup(true). - all 11 WriteParams: skip_auto_cleanup: true (direct Dataset::write/append paths; auto_cleanup: None retained so new datasets store no cleanup config at all). Tests: - lance_surface_guards::skip_auto_cleanup_suppresses_version_gc — substrate: negative control (config GCs v1 without skip) + with-skip survival. - staged_writes::commit_staged_skips_auto_cleanup_so_pinned_versions_survive — omnigraph usage: commit_staged on a legacy-config dataset preserves the pinned create version. Refs lance#6755. --- crates/omnigraph/src/db/commit_graph.rs | 2 + crates/omnigraph/src/db/manifest/graph.rs | 2 + crates/omnigraph/src/db/manifest/publisher.rs | 6 + crates/omnigraph/src/db/recovery_audit.rs | 1 + crates/omnigraph/src/table_store.rs | 15 +++ .../omnigraph/tests/lance_surface_guards.rs | 105 +++++++++++++++++- crates/omnigraph/tests/staged_writes.rs | 51 +++++++++ docs/dev/lance.md | 2 +- 8 files changed, 182 insertions(+), 2 deletions(-) diff --git a/crates/omnigraph/src/db/commit_graph.rs b/crates/omnigraph/src/db/commit_graph.rs index 6bebe018..3d90e54a 100644 --- a/crates/omnigraph/src/db/commit_graph.rs +++ b/crates/omnigraph/src/db/commit_graph.rs @@ -58,6 +58,7 @@ impl CommitGraph { enable_stable_row_ids: true, data_storage_version: Some(LanceFileVersion::V2_2), auto_cleanup: None, + skip_auto_cleanup: true, ..Default::default() }; let dataset = Dataset::write(reader, &uri as &str, Some(params)) @@ -432,6 +433,7 @@ async fn create_commit_actor_dataset(root_uri: &str) -> Result { enable_stable_row_ids: true, data_storage_version: Some(LanceFileVersion::V2_2), auto_cleanup: None, + skip_auto_cleanup: true, ..Default::default() }; match Dataset::write(reader, &uri as &str, Some(params)).await { diff --git a/crates/omnigraph/src/db/manifest/graph.rs b/crates/omnigraph/src/db/manifest/graph.rs index a805b968..da2c641d 100644 --- a/crates/omnigraph/src/db/manifest/graph.rs +++ b/crates/omnigraph/src/db/manifest/graph.rs @@ -32,6 +32,7 @@ pub(super) async fn init_manifest_graph( enable_stable_row_ids: true, data_storage_version: Some(LanceFileVersion::V2_2), auto_cleanup: None, + skip_auto_cleanup: true, ..Default::default() }; let manifest_path = manifest_uri(root); @@ -129,6 +130,7 @@ async fn create_empty_dataset(uri: &str, schema: &SchemaRef) -> Result data_storage_version: Some(LanceFileVersion::V2_2), allow_external_blob_outside_bases: true, auto_cleanup: None, + skip_auto_cleanup: true, ..Default::default() }; Dataset::write(reader, uri, Some(params)) diff --git a/crates/omnigraph/src/db/manifest/publisher.rs b/crates/omnigraph/src/db/manifest/publisher.rs index d13dd082..288f4be2 100644 --- a/crates/omnigraph/src/db/manifest/publisher.rs +++ b/crates/omnigraph/src/db/manifest/publisher.rs @@ -381,6 +381,12 @@ impl GraphNamespacePublisher { // the publisher loop above, where each attempt re-runs the pre-check. merge_builder.conflict_retries(0); merge_builder.use_index(false); + // Skip Lance's auto-cleanup hook: `__manifest` versions are the snapshot + // / time-travel authority and must never be GC'd by Lance's per-commit + // hook. A `__manifest` created before the v7 bump (6.0.1 defaulted + // auto_cleanup ON) still carries the stored config, so this skip is + // load-bearing on upgraded graphs, not just defensive. + merge_builder.skip_auto_cleanup(true); let (new_dataset, _stats) = merge_builder .try_build() .map_err(|e| OmniError::Lance(e.to_string()))? diff --git a/crates/omnigraph/src/db/recovery_audit.rs b/crates/omnigraph/src/db/recovery_audit.rs index 5d62f581..05d84b8f 100644 --- a/crates/omnigraph/src/db/recovery_audit.rs +++ b/crates/omnigraph/src/db/recovery_audit.rs @@ -190,6 +190,7 @@ async fn create_recoveries_dataset(root_uri: &str) -> Result { enable_stable_row_ids: true, data_storage_version: Some(LanceFileVersion::V2_2), auto_cleanup: None, + skip_auto_cleanup: true, ..Default::default() }; match Dataset::write(reader, &uri as &str, Some(params)).await { diff --git a/crates/omnigraph/src/table_store.rs b/crates/omnigraph/src/table_store.rs index d66db963..b458aec5 100644 --- a/crates/omnigraph/src/table_store.rs +++ b/crates/omnigraph/src/table_store.rs @@ -776,6 +776,7 @@ impl TableStore { mode: WriteMode::Append, allow_external_blob_outside_bases: true, auto_cleanup: None, + skip_auto_cleanup: true, ..Default::default() }; ds.append(reader, Some(params)) @@ -796,6 +797,7 @@ impl TableStore { mode: WriteMode::Append, allow_external_blob_outside_bases: true, auto_cleanup: None, + skip_auto_cleanup: true, ..Default::default() }; ds.append(reader, Some(params)) @@ -810,6 +812,7 @@ impl TableStore { data_storage_version: Some(LanceFileVersion::V2_2), allow_external_blob_outside_bases: true, auto_cleanup: None, + skip_auto_cleanup: true, ..Default::default() }; Dataset::write(reader, dataset_uri, Some(params)) @@ -901,6 +904,7 @@ impl TableStore { mode: WriteMode::Append, allow_external_blob_outside_bases: true, auto_cleanup: None, + skip_auto_cleanup: true, ..Default::default() }; let transaction = InsertBuilder::new(Arc::new(ds.clone())) @@ -1074,7 +1078,16 @@ impl TableStore { ds: Arc, transaction: Transaction, ) -> Result { + // Skip Lance's auto-cleanup hook on every commit. OmniGraph owns version + // GC explicitly (optimize.rs::cleanup_all_tables); Lance's hook fires off + // the *dataset's stored* `lance.auto_cleanup.*` config, which graphs + // created before the v7 bump (6.0.1 defaulted auto_cleanup ON) still + // carry — so `WriteParams::auto_cleanup = None` alone does NOT stop it on + // upgraded graphs. Skipping here covers the staged write path (the main + // data path) for new and legacy datasets alike, preventing Lance from + // GC'ing versions the __manifest still pins for snapshots/time-travel. CommitBuilder::new(ds) + .with_skip_auto_cleanup(true) .execute(transaction) .await .map_err(|e| OmniError::Lance(e.to_string())) @@ -1122,6 +1135,7 @@ impl TableStore { enable_stable_row_ids: true, allow_external_blob_outside_bases: true, auto_cleanup: None, + skip_auto_cleanup: true, ..Default::default() }; let transaction = InsertBuilder::new(Arc::new(ds.clone())) @@ -1539,6 +1553,7 @@ impl TableStore { data_storage_version: Some(LanceFileVersion::V2_2), allow_external_blob_outside_bases: true, auto_cleanup: None, + skip_auto_cleanup: true, ..Default::default() }; Dataset::write(reader, dataset_uri, Some(params)) diff --git a/crates/omnigraph/tests/lance_surface_guards.rs b/crates/omnigraph/tests/lance_surface_guards.rs index 07db5eda..d4731420 100644 --- a/crates/omnigraph/tests/lance_surface_guards.rs +++ b/crates/omnigraph/tests/lance_surface_guards.rs @@ -32,7 +32,10 @@ use lance::dataset::builder::DatasetBuilder; use lance::dataset::optimize::{CompactionOptions, compact_files}; use lance::dataset::transaction::Operation; use lance::dataset::write::delete::DeleteResult; -use lance::dataset::{MergeInsertBuilder, WhenMatched, WhenNotMatched, WriteMode, WriteParams}; +use lance::dataset::{ + CommitBuilder, InsertBuilder, MergeInsertBuilder, WhenMatched, WhenNotMatched, WriteMode, + WriteParams, +}; use lance::index::DatasetIndexExt; use lance_file::version::LanceFileVersion; use lance_index::IndexType; @@ -815,3 +818,103 @@ async fn btree_range_query_boundary_is_correct() { Lance reintroduced the range-bound inclusiveness bug.", ); } + +// --- Guard 18: skip_auto_cleanup suppresses version GC (lance#6755 / PR #229) -- +// +// After the v7 bump, OmniGraph relies on `CommitBuilder::with_skip_auto_cleanup` +// (`commit_staged`) and `MergeInsertBuilder::skip_auto_cleanup` (the `__manifest` +// publisher) to stop Lance's per-commit auto-cleanup hook from GC'ing versions +// the `__manifest` pins for snapshots/time-travel. This is load-bearing for +// graphs created BEFORE the bump: 6.0.1 defaulted `WriteParams::auto_cleanup` ON, +// so those datasets carry `lance.auto_cleanup.*` config that `auto_cleanup = None` +// on new writes cannot retroactively clear — only the per-commit skip stops it. +// +// Pins both halves: WITHOUT the skip the aggressive config GCs v1; WITH the skip +// (the exact call `commit_staged` makes) v1 survives. +#[tokio::test] +async fn skip_auto_cleanup_suppresses_version_gc() { + use std::collections::HashMap; + + // The cleanup config 6.0.1 stored by default, made aggressive: fire on every + // commit, delete anything older than now. + async fn set_legacy_cleanup(ds: &mut Dataset) { + let mut cfg = HashMap::new(); + cfg.insert("lance.auto_cleanup.interval".to_string(), "1".to_string()); + cfg.insert("lance.auto_cleanup.older_than".to_string(), "0ms".to_string()); + ds.update_config(cfg).await.unwrap(); + } + fn row(i: i32) -> (Arc, RecordBatch) { + let schema = Arc::new(Schema::new(vec![ + Field::new("id", DataType::Utf8, false), + Field::new("value", DataType::Int32, false), + ])); + let batch = RecordBatch::try_new( + schema.clone(), + vec![ + Arc::new(StringArray::from(vec![format!("k{i}")])), + Arc::new(Int32Array::from(vec![i])), + ], + ) + .unwrap(); + (schema, batch) + } + + // Negative control: WITHOUT skip, the legacy config GCs the pinned v1. + let ctrl = tempfile::tempdir().unwrap(); + let curi = ctrl.path().join("g18_ctrl.lance"); + let curi = curi.to_str().unwrap(); + let mut ds = fresh_dataset(curi).await; + let v1 = ds.version().version; + set_legacy_cleanup(&mut ds).await; + for i in 0..5 { + let (schema, batch) = row(i); + let reader = RecordBatchIterator::new(vec![Ok(batch)], schema); + ds.append( + reader, + Some(WriteParams { + mode: WriteMode::Append, + ..Default::default() + }), + ) + .await + .unwrap(); + } + assert!( + ds.checkout_version(v1).await.is_err(), + "negative control: without skip_auto_cleanup, the legacy auto_cleanup \ + config should have GC'd pinned v{v1}; if this fails the config is not \ + firing and the positive assertion below proves nothing." + ); + + // The guarantee: WITH the per-commit skip, v1 survives. Mirrors + // `TableStore::commit_staged` (InsertBuilder::execute_uncommitted + + // CommitBuilder::with_skip_auto_cleanup(true)). + let keep = tempfile::tempdir().unwrap(); + let kuri = keep.path().join("g18.lance"); + let kuri = kuri.to_str().unwrap(); + let mut ds = fresh_dataset(kuri).await; + let v1 = ds.version().version; + set_legacy_cleanup(&mut ds).await; + for i in 0..5 { + let (_schema, batch) = row(i); + let tx = InsertBuilder::new(Arc::new(ds.clone())) + .with_params(&WriteParams { + mode: WriteMode::Append, + ..Default::default() + }) + .execute_uncommitted(vec![batch]) + .await + .unwrap(); + ds = CommitBuilder::new(Arc::new(ds.clone())) + .with_skip_auto_cleanup(true) + .execute(tx) + .await + .unwrap(); + } + assert!( + ds.checkout_version(v1).await.is_ok(), + "v{v1} was GC'd despite CommitBuilder::with_skip_auto_cleanup(true) — the \ + commit_staged / publisher skip is the only thing protecting \ + __manifest-pinned versions on upgraded (pre-bump) graphs." + ); +} diff --git a/crates/omnigraph/tests/staged_writes.rs b/crates/omnigraph/tests/staged_writes.rs index 3771ad42..cf0e04ce 100644 --- a/crates/omnigraph/tests/staged_writes.rs +++ b/crates/omnigraph/tests/staged_writes.rs @@ -1046,3 +1046,54 @@ async fn lance_restore_loses_to_concurrent_append_via_orphaning() { let v2_ids = collect_ids(&v2_batches); assert_eq!(v2_ids, vec!["alice".to_string(), "bob".to_string()]); } + +/// Regression for PR #229: `commit_staged` must skip Lance's per-commit +/// auto-cleanup hook. A graph created BEFORE the v7 bump (6.0.1 defaulted +/// `WriteParams::auto_cleanup` ON) carries `lance.auto_cleanup.*` config on its +/// datasets that `auto_cleanup = None` on new writes cannot retroactively clear; +/// Lance's hook fires off that *stored* config at commit time. Without the skip, +/// the engine's own writes would GC the versions `__manifest` pins for +/// snapshots/time-travel. (The substrate negative control — that the config +/// really does GC without the skip — lives in +/// `lance_surface_guards.rs::skip_auto_cleanup_suppresses_version_gc`.) +#[tokio::test] +async fn commit_staged_skips_auto_cleanup_so_pinned_versions_survive() { + use std::collections::HashMap; + + let dir = tempfile::tempdir().unwrap(); + let uri = format!("{}/people.lance", dir.path().to_str().unwrap()); + let store = TableStore::new(dir.path().to_str().unwrap()); + + let mut ds = TableStore::write_dataset(&uri, person_batch(&[("seed", Some(0))])) + .await + .unwrap(); + let v1 = ds.version().version; + + // Simulate a pre-bump dataset: aggressive legacy auto_cleanup config (fire on + // every commit, delete anything older than now). + let mut cfg = HashMap::new(); + cfg.insert("lance.auto_cleanup.interval".to_string(), "1".to_string()); + cfg.insert("lance.auto_cleanup.older_than".to_string(), "0ms".to_string()); + ds.update_config(cfg).await.unwrap(); + + // Several writes through the engine's staged commit path. + for i in 0..5i32 { + let name = format!("p{i}"); + let staged = store + .stage_append(&ds, person_batch(&[(name.as_str(), Some(i))]), &[]) + .await + .unwrap(); + ds = store + .commit_staged(Arc::new(ds.clone()), staged.transaction) + .await + .unwrap(); + } + + // `commit_staged` sets `with_skip_auto_cleanup(true)`, so the legacy config + // must NOT have GC'd the `__manifest`-pinned create version. + assert!( + ds.checkout_version(v1).await.is_ok(), + "commit_staged must skip Lance auto-cleanup so a pre-bump graph's pinned \ + v{v1} survives; it was GC'd" + ); +} diff --git a/docs/dev/lance.md b/docs/dev/lance.md index 172c8afd..2ad1273b 100644 --- a/docs/dev/lance.md +++ b/docs/dev/lance.md @@ -164,7 +164,7 @@ Migration from Lance 6.0.1 → 7.0.0 landed in this cycle. **Arrow stayed 58, Da - **`roaring` must be pinned to 0.11.4** (`cargo update -p roaring --precise 0.11.4`). Lance 7.0.0's `UpdatedFragmentOffsets` newtype (PR #6650) derives `Eq` over `HashMap`, which needs `RoaringBitmap: Eq` — added only in roaring 0.11.4 (roaring-rs PR #341). Lance's loose `roaring = "0.11"` constraint otherwise resolves the broken 0.11.3 and **lance itself fails to compile** (`RoaringBitmap: Eq is not satisfied`). roaring is transitive (no direct workspace dep); the pin lives only in `Cargo.lock`. - **`_row_created_at_version` for merge-insert INSERT rows now = the commit version** (PR #6774; was a fallback of 1 / dataset-creation version). Flipped `lance_version_columns.rs::lance_merge_insert_new_row_stamps_created_at_version` to assert `== v2`. Production change-detection keys on `_row_last_updated_at_version` + ID-set membership, so classification logic is unaffected (the `changes/mod.rs` rationale comment was corrected). - **BTREE range-query bound inclusiveness fixed** (PR #6796, issue #6792): `x <= hi AND x > lo` returned the wrong boundary row on 6.0.1. omnigraph today builds BTREE only on string `@key` columns (`id`/`src`/`dst`) and queries them by equality/IN, not range, so its *current* query patterns almost certainly never hit this bug — but the corrected boundary semantics are a contract we rely on the moment a BTREE-range path appears (BTREE-on-properties via the index-type tickets, or a range-on-key query). Pinned by `lance_surface_guards.rs::btree_range_query_boundary_is_correct` (reproduces #6792's 5-row + BTREE shape). -- **`WriteParams::auto_cleanup` default flipped from on (every-20-commits) to `None`** (PR #6755). On 6.0.1 the on-by-default hook could GC versions the `__manifest` pins for snapshots/time-travel. omnigraph owns cleanup explicitly (`optimize.rs::cleanup_all_tables`) and now pins `auto_cleanup: None` at all 11 production `WriteParams` sites — no dependency on the upstream default, and the snapshot-safe behavior locked in regardless of future re-flips. +- **`WriteParams::auto_cleanup` default flipped from on (every-20-commits) to `None`** (PR #6755). On 6.0.1 the on-by-default hook could GC versions the `__manifest` pins for snapshots/time-travel. omnigraph owns cleanup explicitly (`optimize.rs::cleanup_all_tables`). Two parts to the fix, because `auto_cleanup` is **create-time config only and has no effect on existing datasets** (Lance `write.rs` docs): (1) `auto_cleanup: None` at all 11 `WriteParams` sites so *new* datasets store no cleanup config; (2) — the load-bearing half — `skip_auto_cleanup: true` on every commit path, because graphs created **before** the bump still carry the on-config in their datasets, and Lance's hook fires off the *dataset's stored* config at commit time (`io/commit.rs`: `if !commit_config.skip_auto_cleanup`). So the staged commit path (`commit_staged` → `CommitBuilder::with_skip_auto_cleanup(true)`), the `__manifest` publisher (`MergeInsertBuilder::skip_auto_cleanup(true)`), and the direct `WriteParams` paths all skip the hook. Without this, an upgraded graph would still auto-cleanup and delete `__manifest`-pinned versions. Pinned by `lance_surface_guards.rs::skip_auto_cleanup_suppresses_version_gc` (negative control + with-skip survival). - **Lance #6658 SHIPPED in 7.0.0** (`DeleteBuilder::execute_uncommitted`, exposed via PR #6781) → MR-A (migrate `delete_where` to the staged two-phase API, retire the parse-time D2 rule) is now **unblocked**, tracked separately (dev-graph `iss-950`). The bump itself keeps `delete_where` inline; the `_compile_delete_result_field_shape` guard is left untouched until MR-A. - **Still NOT fixed in 7.0.0:** vector-index two-phase (Lance #6666 open) — `create_vector_index` inline residual retained; blob-column compaction — `compact_files_still_fails_on_blob_columns` guard still red on a fix, `optimize` still skips blob tables behind `LANCE_SUPPORTS_BLOB_COMPACTION`. - **No Lance-API surface omnigraph uses changed 6.0.1 → 7.0.0** (verified by a clean engine build; the only compile break was object_store). `CleanupPolicy`, `WriteParams` (apart from the `auto_cleanup` default), `CompactionOptions`, the namespace models (resolved via `lance-namespace-reqwest-client` 0.7.7, unchanged across the bump), `Operation`, `ManifestLocation`, and `MergeInsertBuilder` shapes are all stable. From e2099fc3fe3ecb936e925a6fb2e5d804cba29149 Mon Sep 17 00:00:00 2001 From: Ragnor Comerford Date: Sun, 14 Jun 2026 20:34:42 +0200 Subject: [PATCH 6/6] test(lance): assert created_at-preserved + updated_at-bumped on merge_insert UPDATE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses PR #229 review follow-up. `lance_merge_insert_update_preserves_created_at_version` documented (in a comment) that a merge_insert UPDATE preserves created_at and bumps updated_at, but only asserted the value change — leaving the change-feed invariant unguarded. Add the two missing assertions: - bob created_at == v1 (preserved across UPDATE; what the test name promises; lance#6774 only changed INSERT-row stamping). - bob updated_at == v2 (bumped to the commit version) — the invariant OmniGraph's insert/update classification relies on (changes/mod.rs keys on _row_last_updated_at_version). A regression here would silently drop updates from the diff/change feed. --- crates/omnigraph/tests/lance_version_columns.rs | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/crates/omnigraph/tests/lance_version_columns.rs b/crates/omnigraph/tests/lance_version_columns.rs index 4b734587..fbe0cb48 100644 --- a/crates/omnigraph/tests/lance_version_columns.rs +++ b/crates/omnigraph/tests/lance_version_columns.rs @@ -260,11 +260,24 @@ async fn lance_merge_insert_update_preserves_created_at_version() { assert_eq!(alice.2, v1, "alice created_at should still be v1"); assert_eq!(alice.3, v1, "alice updated_at should still be v1"); - // Bob: updated via merge_insert - // created_at should be preserved (v1), updated_at should be bumped (v2) + // Bob: updated via merge_insert. eprintln!( "Bob: created_at={}, updated_at={}, v1={}, v2={}", bob.2, bob.3, v1, v2 ); assert_eq!(bob.1, 99, "bob's value should be updated to 99"); + // created_at is preserved across an UPDATE (lance#6774 only changed the + // INSERT-row stamping), which is what this test's name promises. + assert_eq!( + bob.2, v1, + "bob created_at must be preserved across a merge_insert UPDATE" + ); + // updated_at bumps to the commit version on UPDATE — the change-feed + // invariant OmniGraph's insert/update classification relies on + // (changes/mod.rs keys on _row_last_updated_at_version). If this regresses, + // the diff/change feed silently misses updates. + assert_eq!( + bob.3, v2, + "bob updated_at must bump to the commit version on a merge_insert UPDATE" + ); }