From 73e1231bb3cb861a561fdcbb00ca3ff7dd5c41db Mon Sep 17 00:00:00 2001 From: Louis-Vincent Date: Thu, 9 Apr 2026 13:04:09 -0400 Subject: [PATCH] refactor watcher module with proper Stream API + upgraded etcd client --- .gitignore | 3 +- Cargo.lock | 789 +++++++++++++++++++---------------- Cargo.toml | 24 +- compose.yaml | 12 +- src/channel.rs | 524 ++++++++++++++++++++++++ src/lease.rs | 2 +- src/lib.rs | 6 + src/lock.rs | 74 ++-- src/log.rs | 41 +- src/retry.rs | 73 ++-- src/sync/watch.rs | 2 +- src/tonic/discovery.rs | 416 +++++++++++++++++++ src/tonic/mod.rs | 3 + src/tonic/transport.rs | 689 +++++++++++++++++++++++++++++++ src/watcher.rs | 905 +++++++++++++++++++++++++++++++---------- tests/common.rs | 143 ++++++- tests/test_channel.rs | 643 +++++++++++++++++++++++++++++ tests/test_lock.rs | 2 +- tests/test_log.rs | 2 +- tests/test_watcher.rs | 269 ++++++++++++ 20 files changed, 3952 insertions(+), 670 deletions(-) create mode 100644 src/channel.rs create mode 100644 src/tonic/discovery.rs create mode 100644 src/tonic/mod.rs create mode 100644 src/tonic/transport.rs create mode 100644 tests/test_channel.rs create mode 100644 tests/test_watcher.rs diff --git a/.gitignore b/.gitignore index 1de5659..7be846b 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ -target \ No newline at end of file +target +.vscode \ No newline at end of file diff --git a/Cargo.lock b/Cargo.lock index 4e5d688..81a4f07 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,35 +2,20 @@ # It is not intended for manual editing. version = 4 -[[package]] -name = "addr2line" -version = "0.25.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b5d307320b3181d6d7954e663bd7c774a838b8220fe0593c86d9fb09f498b4b" -dependencies = [ - "gimli", -] - -[[package]] -name = "adler2" -version = "2.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa" - [[package]] name = "aho-corasick" -version = "1.1.3" +version = "1.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" +checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301" dependencies = [ "memchr", ] [[package]] name = "anyhow" -version = "1.0.100" +version = "1.0.102" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a23eb6b1614318a8071c9b2521f36b424b2c83db5eb3a0fead4a6c0809af6e61" +checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c" [[package]] name = "async-trait" @@ -40,7 +25,7 @@ checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.117", ] [[package]] @@ -51,9 +36,9 @@ checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0" [[package]] name = "axum" -version = "0.8.4" +version = "0.8.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "021e862c184ae977658b36c4500f7feac3221ca5da43e3f25bd04ab6c79a29b5" +checksum = "8b52af3cb4058c895d37317bb27508dccc8e5f2d39454016b297bf4a400597b8" dependencies = [ "axum-core", "bytes", @@ -67,8 +52,7 @@ dependencies = [ "mime", "percent-encoding", "pin-project-lite", - "rustversion", - "serde", + "serde_core", "sync_wrapper", "tower", "tower-layer", @@ -77,9 +61,9 @@ dependencies = [ [[package]] name = "axum-core" -version = "0.5.2" +version = "0.5.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68464cd0412f486726fb3373129ef5d2993f90c34bc2bc1c1e9943b2f4fc7ca6" +checksum = "08c78f31d7b1291f7ee735c1c6780ccde7785daae9a9206026862dab7d8792d1" dependencies = [ "bytes", "futures-core", @@ -88,27 +72,11 @@ dependencies = [ "http-body-util", "mime", "pin-project-lite", - "rustversion", "sync_wrapper", "tower-layer", "tower-service", ] -[[package]] -name = "backtrace" -version = "0.3.76" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bb531853791a215d7c62a30daf0dde835f381ab5de4589cfe7c649d2cbe92bd6" -dependencies = [ - "addr2line", - "cfg-if", - "libc", - "miniz_oxide", - "object", - "rustc-demangle", - "windows-link", -] - [[package]] name = "base64" version = "0.22.1" @@ -117,21 +85,41 @@ checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" [[package]] name = "bitflags" -version = "2.9.4" +version = "2.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2261d10cca569e4643e526d8dc2e62e433cc8aba21ab764233731f8d369bf394" +checksum = "843867be96c8daad0d758b57df9392b6d8d271134fce549de6ce169ff98a92af" [[package]] name = "bytes" -version = "1.10.1" +version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a" +checksum = "1e748733b7cbc798e1434b6ac524f0c1ff2ab456fe201501e6497c8417a4fc33" [[package]] name = "cfg-if" -version = "1.0.3" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" + +[[package]] +name = "crossbeam-utils" +version = "0.8.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" + +[[package]] +name = "dashmap" +version = "6.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2fd1289c04a9ea8cb22300a459a72a385d7c73d3259e2ed7dcb2af674838cfa9" +checksum = "5041cc499144891f3790297212f32a74fb938e5136a14943f338ef9e0ae276cf" +dependencies = [ + "cfg-if", + "crossbeam-utils", + "hashbrown 0.14.5", + "lock_api", + "once_cell", + "parking_lot_core", +] [[package]] name = "either" @@ -152,14 +140,14 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" dependencies = [ "libc", - "windows-sys 0.61.1", + "windows-sys", ] [[package]] name = "etcd-client" -version = "0.17.0" +version = "0.18.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8acfe553027cd07fc5fafa81a84f19a7a87eaffaccd2162b6db05e8d6ce98084" +checksum = "5ed900ba953ca6bf1fadb75e0c6b73d8463b9e2bb6bdb7b4573e8e7295852fbe" dependencies = [ "http", "prost", @@ -171,13 +159,14 @@ dependencies = [ "tonic-prost-build", "tower", "tower-service", + "visible", ] [[package]] name = "fastrand" -version = "2.3.0" +version = "2.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" +checksum = "9f1f227452a390804cdb637b74a86990f2a7d7ba4b7d5693aac9b4dd6defd8d6" [[package]] name = "fixedbitset" @@ -191,11 +180,17 @@ version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" +[[package]] +name = "foldhash" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" + [[package]] name = "futures" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "65bc07b1a8bc7c85c5f2e110c476c7389b4554ba72af57d8445ea63a576b0876" +checksum = "8b147ee9d1f6d097cef9ce628cd2ee62288d963e16fb287bd9286455b241382d" dependencies = [ "futures-channel", "futures-core", @@ -208,9 +203,9 @@ dependencies = [ [[package]] name = "futures-channel" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2dff15bf788c671c1934e366d07e30c1814a8ef514e1af724a602e8a2fbe1b10" +checksum = "07bbe89c50d7a535e539b8c17bc0b49bdb77747034daa8087407d655f3f7cc1d" dependencies = [ "futures-core", "futures-sink", @@ -218,15 +213,15 @@ dependencies = [ [[package]] name = "futures-core" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e" +checksum = "7e3450815272ef58cec6d564423f6e755e25379b217b0bc688e295ba24df6b1d" [[package]] name = "futures-executor" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e28d1d997f585e54aebc3f97d39e72338912123a67330d723fdbb564d646c9f" +checksum = "baf29c38818342a3b26b5b923639e7b1f4a61fc5e76102d4b1981c6dc7a7579d" dependencies = [ "futures-core", "futures-task", @@ -235,38 +230,38 @@ dependencies = [ [[package]] name = "futures-io" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6" +checksum = "cecba35d7ad927e23624b22ad55235f2239cfa44fd10428eecbeba6d6a717718" [[package]] name = "futures-macro" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" +checksum = "e835b70203e41293343137df5c0664546da5745f82ec9b84d40be8336958447b" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.117", ] [[package]] name = "futures-sink" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e575fab7d1e0dcb8d0c7bcf9a63ee213816ab51902e6d244a95819acacf1d4f7" +checksum = "c39754e157331b013978ec91992bde1ac089843443c49cbc7f46150b0fad0893" [[package]] name = "futures-task" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988" +checksum = "037711b3d59c33004d3856fbdc83b99d4ff37a24768fa1be9ce3538a1cde4393" [[package]] name = "futures-util" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81" +checksum = "389ca41296e6190b48053de0321d02a77f32f8a5d2461dd38762c0593805c6d6" dependencies = [ "futures-channel", "futures-core", @@ -276,44 +271,50 @@ dependencies = [ "futures-task", "memchr", "pin-project-lite", - "pin-utils", "slab", ] [[package]] name = "getrandom" -version = "0.2.16" +version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592" +checksum = "ff2abc00be7fca6ebc474524697ae276ad847ad0a6b3faa4bcb027e9a4614ad0" dependencies = [ "cfg-if", "libc", - "wasi 0.11.1+wasi-snapshot-preview1", + "wasi", ] [[package]] name = "getrandom" -version = "0.3.3" +version = "0.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26145e563e54f2cadc477553f1ec5ee650b00862f0a58bcd12cbdc5f0ea2d2f4" +checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd" dependencies = [ "cfg-if", "libc", - "r-efi", - "wasi 0.14.7+wasi-0.2.4", + "r-efi 5.3.0", + "wasip2", ] [[package]] -name = "gimli" -version = "0.32.3" +name = "getrandom" +version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e629b9b98ef3dd8afe6ca2bd0f89306cec16d43d907889945bc5d6687f2f13c7" +checksum = "0de51e6874e94e7bf76d726fc5d13ba782deca734ff60d5bb2fb2607c7406555" +dependencies = [ + "cfg-if", + "libc", + "r-efi 6.0.0", + "wasip2", + "wasip3", +] [[package]] name = "h2" -version = "0.4.12" +version = "0.4.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f3c0b69cfcb4e1b9f1bf2f53f95f766e4661169728ec61cd3fe5a0166f2d1386" +checksum = "2f44da3a8150a6703ed5d34e164b875fd14c2cdab9af1252a9a1020bde2bdc54" dependencies = [ "atomic-waker", "bytes", @@ -330,9 +331,24 @@ dependencies = [ [[package]] name = "hashbrown" -version = "0.16.0" +version = "0.14.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5419bdc4f6a9207fbeba6d11b604d481addf78ecd10c11ad51e76c2f6482748d" +checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" + +[[package]] +name = "hashbrown" +version = "0.15.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" +dependencies = [ + "foldhash", +] + +[[package]] +name = "hashbrown" +version = "0.16.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100" [[package]] name = "heck" @@ -342,12 +358,11 @@ checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" [[package]] name = "http" -version = "1.3.1" +version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f4a85d31aea989eead29a3aaf9e1115a180df8282431156e533de47660892565" +checksum = "e3ba2a386d7f85a81f119ad7498ebe444d2e22c2af0b86b069416ace48b3311a" dependencies = [ "bytes", - "fnv", "itoa", ] @@ -388,9 +403,9 @@ checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" [[package]] name = "hyper" -version = "1.7.0" +version = "1.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eb3aa54a13a0dfe7fbe3a59e0c76093041720fdc77b110cc0fc260fafb4dc51e" +checksum = "6299f016b246a94207e63da54dbe807655bf9e00044f73ded42c3ac5305fbcca" dependencies = [ "atomic-waker", "bytes", @@ -403,7 +418,6 @@ dependencies = [ "httpdate", "itoa", "pin-project-lite", - "pin-utils", "smallvec", "tokio", "want", @@ -424,13 +438,12 @@ dependencies = [ [[package]] name = "hyper-util" -version = "0.1.17" +version = "0.1.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c6995591a8f1380fcb4ba966a252a4b29188d51d2b89e3a252f5305be65aea8" +checksum = "96547c2556ec9d12fb1578c4eaf448b04993e7fb79cbaad930a656880a6bdfa0" dependencies = [ "bytes", "futures-channel", - "futures-core", "futures-util", "http", "http-body", @@ -444,24 +457,21 @@ dependencies = [ ] [[package]] -name = "indexmap" -version = "2.11.4" +name = "id-arena" +version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4b0f83760fb341a774ed326568e19f5a863af4a952def8c39f9ab92fd95b88e5" -dependencies = [ - "equivalent", - "hashbrown", -] +checksum = "3d3067d79b975e8844ca9eb072e16b31c3c1c36928edf9c6789548c524d0d954" [[package]] -name = "io-uring" -version = "0.7.10" +name = "indexmap" +version = "2.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "046fa2d4d00aea763528b4950358d0ead425372445dc8ff86312b3c69ff7727b" +checksum = "45a8a2b9cb3e0b0c1803dbb0758ffac5de2f425b23c28f518faabd9d805342ff" dependencies = [ - "bitflags", - "cfg-if", - "libc", + "equivalent", + "hashbrown 0.16.1", + "serde", + "serde_core", ] [[package]] @@ -475,9 +485,9 @@ dependencies = [ [[package]] name = "itoa" -version = "1.0.15" +version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" +checksum = "8f42a60cbdf9a97f5d2305f08a87dc4e09308d1276d28c869c684d7777685682" [[package]] name = "lazy_static" @@ -485,23 +495,38 @@ version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" +[[package]] +name = "leb128fmt" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09edd9e8b54e49e587e4f6295a7d29c3ea94d469cb40ab8ca70b288248a81db2" + [[package]] name = "libc" -version = "0.2.176" +version = "0.2.184" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "58f929b4d672ea937a23a1ab494143d968337a5f47e56d0815df1e0890ddf174" +checksum = "48f5d2a454e16a5ea0f4ced81bd44e4cfc7bd3a507b61887c99fd3538b28e4af" [[package]] name = "linux-raw-sys" -version = "0.11.0" +version = "0.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df1d3c3b53da64cf5760482273a98e575c651a67eec7f77df96b5b642de8f039" +checksum = "32a66949e030da00e8c7d4434b251670a91556f4144941d37452769c25d58a53" + +[[package]] +name = "lock_api" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "224399e74b87b5f3557511d98dff8b14089b3dadafcab6bb93eab67d3aace965" +dependencies = [ + "scopeguard", +] [[package]] name = "log" -version = "0.4.28" +version = "0.4.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34080505efa8e45a4b816c349525ebe327ceaa8559756f0356cba97ef3bf7432" +checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" [[package]] name = "matchers" @@ -520,9 +545,9 @@ checksum = "47e1ffaa40ddd1f3ed91f717a33c8c0ee23fff369e3aa8772b9605cc1d22f4c3" [[package]] name = "memchr" -version = "2.7.6" +version = "2.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273" +checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79" [[package]] name = "mime" @@ -530,24 +555,15 @@ version = "0.3.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" -[[package]] -name = "miniz_oxide" -version = "0.8.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316" -dependencies = [ - "adler2", -] - [[package]] name = "mio" -version = "1.0.4" +version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "78bed444cc8a2160f01cbcf811ef18cac863ad68ae8ca62092e8db51d51c761c" +checksum = "50b7e5b27aa02a74bac8c3f23f448f8d87ff11f92d3aac1a6ed369ee08cc56c1" dependencies = [ "libc", - "wasi 0.11.1+wasi-snapshot-preview1", - "windows-sys 0.59.0", + "wasi", + "windows-sys", ] [[package]] @@ -558,27 +574,31 @@ checksum = "1d87ecb2933e8aeadb3e3a02b828fed80a7528047e68b4f424523a0981a3a084" [[package]] name = "nu-ansi-term" -version = "0.50.1" +version = "0.50.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d4a28e057d01f97e61255210fcff094d74ed0466038633e95017f5beb68e4399" +checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5" dependencies = [ - "windows-sys 0.52.0", + "windows-sys", ] [[package]] -name = "object" -version = "0.37.3" +name = "once_cell" +version = "1.21.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ff76201f031d8863c38aa7f905eca4f53abbfa15f609db4277d44cd8938f33fe" -dependencies = [ - "memchr", -] +checksum = "9f7c3e4beb33f85d45ae3e3a1792185706c8e16d043238c593331cc7cd313b50" [[package]] -name = "once_cell" -version = "1.21.3" +name = "parking_lot_core" +version = "0.9.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" +checksum = "2621685985a2ebf1c516881c026032ac7deafcda1a2c9b7850dc81e3dfcb64c1" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall", + "smallvec", + "windows-link", +] [[package]] name = "percent-encoding" @@ -588,45 +608,40 @@ checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220" [[package]] name = "petgraph" -version = "0.7.1" +version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3672b37090dbd86368a4145bc067582552b29c27377cad4e0a306c97f9bd7772" +checksum = "8701b58ea97060d5e5b155d383a69952a60943f0e6dfe30b04c287beb0b27455" dependencies = [ "fixedbitset", + "hashbrown 0.15.5", "indexmap", ] [[package]] name = "pin-project" -version = "1.1.10" +version = "1.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "677f1add503faace112b9f1373e43e9e054bfdd22ff1a63c1bc485eaec6a6a8a" +checksum = "f1749c7ed4bcaf4c3d0a3efc28538844fb29bcdd7d2b67b2be7e20ba861ff517" dependencies = [ "pin-project-internal", ] [[package]] name = "pin-project-internal" -version = "1.1.10" +version = "1.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e918e4ff8c4549eb882f14b3a4bc8c8bc93de829416eacf579f1207a8fbf861" +checksum = "d9b20ed30f105399776b9c883e68e536ef602a16ae6f596d2c473591d6ad64c6" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.117", ] [[package]] name = "pin-project-lite" -version = "0.2.16" +version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b" - -[[package]] -name = "pin-utils" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" +checksum = "a89322df9ebe1c1578d689c92318e070967d1042b512afbe49518723f4e6d5cd" [[package]] name = "ppv-lite86" @@ -644,23 +659,23 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b" dependencies = [ "proc-macro2", - "syn", + "syn 2.0.117", ] [[package]] name = "proc-macro2" -version = "1.0.101" +version = "1.0.106" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89ae43fd86e4158d6db51ad8e2b80f313af9cc74f5c0e03ccb87de09998732de" +checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934" dependencies = [ "unicode-ident", ] [[package]] name = "prost" -version = "0.14.1" +version = "0.14.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7231bd9b3d3d33c86b58adbac74b5ec0ad9f496b19d22801d773636feaa95f3d" +checksum = "d2ea70524a2f82d518bce41317d0fae74151505651af45faf1ffbd6fd33f0568" dependencies = [ "bytes", "prost-derive", @@ -668,15 +683,14 @@ dependencies = [ [[package]] name = "prost-build" -version = "0.14.1" +version = "0.14.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac6c3320f9abac597dcbc668774ef006702672474aad53c6d596b62e487b40b1" +checksum = "343d3bd7056eda839b03204e68deff7d1b13aba7af2b2fd16890697274262ee7" dependencies = [ "heck", "itertools", "log", "multimap", - "once_cell", "petgraph", "prettyplease", "prost", @@ -684,37 +698,37 @@ dependencies = [ "pulldown-cmark", "pulldown-cmark-to-cmark", "regex", - "syn", + "syn 2.0.117", "tempfile", ] [[package]] name = "prost-derive" -version = "0.14.1" +version = "0.14.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9120690fafc389a67ba3803df527d0ec9cbbc9cc45e4cc20b332996dfb672425" +checksum = "27c6023962132f4b30eb4c172c91ce92d933da334c59c23cddee82358ddafb0b" dependencies = [ "anyhow", "itertools", "proc-macro2", "quote", - "syn", + "syn 2.0.117", ] [[package]] name = "prost-types" -version = "0.14.1" +version = "0.14.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b9b4db3d6da204ed77bb26ba83b6122a73aeb2e87e25fbf7ad2e84c4ccbf8f72" +checksum = "8991c4cbdb8bc5b11f0b074ffe286c30e523de90fee5ba8132f1399f23cb3dd7" dependencies = [ "prost", ] [[package]] name = "pulldown-cmark" -version = "0.13.0" +version = "0.13.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e8bbe1a966bd2f362681a44f6edce3c2310ac21e4d5067a6e7ec396297a6ea0" +checksum = "7c3a14896dfa883796f1cb410461aef38810ea05f2b2c33c5aded3649095fdad" dependencies = [ "bitflags", "memchr", @@ -723,18 +737,18 @@ dependencies = [ [[package]] name = "pulldown-cmark-to-cmark" -version = "21.0.0" +version = "22.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5b6a0769a491a08b31ea5c62494a8f144ee0987d86d670a8af4df1e1b7cde75" +checksum = "50793def1b900256624a709439404384204a5dc3a6ec580281bfaac35e882e90" dependencies = [ "pulldown-cmark", ] [[package]] name = "quote" -version = "1.0.40" +version = "1.0.45" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d" +checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924" dependencies = [ "proc-macro2", ] @@ -745,6 +759,12 @@ version = "5.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" +[[package]] +name = "r-efi" +version = "6.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8dcc9c7d52a811697d2151c701e0d08956f92b0e24136cf4cf27b57a6a0d9bf" + [[package]] name = "rand" version = "0.8.5" @@ -763,7 +783,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1" dependencies = [ "rand_chacha 0.9.0", - "rand_core 0.9.3", + "rand_core 0.9.5", ] [[package]] @@ -783,7 +803,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" dependencies = [ "ppv-lite86", - "rand_core 0.9.3", + "rand_core 0.9.5", ] [[package]] @@ -792,23 +812,32 @@ version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" dependencies = [ - "getrandom 0.2.16", + "getrandom 0.2.17", ] [[package]] name = "rand_core" -version = "0.9.3" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76afc826de14238e6e8c374ddcc1fa19e374fd8dd986b0d2af0d02377261d83c" +dependencies = [ + "getrandom 0.3.4", +] + +[[package]] +name = "redox_syscall" +version = "0.5.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38" +checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d" dependencies = [ - "getrandom 0.3.3", + "bitflags", ] [[package]] name = "regex" -version = "1.11.3" +version = "1.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b5288124840bee7b386bc413c487869b360b2b4ec421ea56425128692f2a82c" +checksum = "e10754a14b9137dd7b1e3e5b0493cc9171fdd105e0ab477f51b72e7f3ac0e276" dependencies = [ "aho-corasick", "memchr", @@ -818,9 +847,9 @@ dependencies = [ [[package]] name = "regex-automata" -version = "0.4.11" +version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "833eb9ce86d40ef33cb1306d8accf7bc8ec2bfea4355cbdebb3df68b40925cad" +checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f" dependencies = [ "aho-corasick", "memchr", @@ -829,26 +858,33 @@ dependencies = [ [[package]] name = "regex-syntax" -version = "0.8.6" +version = "0.8.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "caf4aa5b0f434c91fe5c7f1ecb6a5ece2130b02ad2a590589dda5146df959001" +checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a" [[package]] name = "retry" -version = "2.1.0" +version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1e211f878258887b3e65dd3c8ff9f530fe109f441a117ee0cdc27f341355032" +checksum = "1cab9bd343c737660e523ee69f788018f3db686d537d2fd0f99c9f747c1bda4f" dependencies = [ "rand 0.9.2", ] [[package]] name = "rust-etcd-utils" -version = "0.13.0" +version = "0.14.0" dependencies = [ "async-trait", + "bytes", + "dashmap", "etcd-client", "futures", + "http", + "http-body-util", + "hyper", + "hyper-util", + "pin-project", "rand 0.8.5", "retry", "serde", @@ -857,46 +893,41 @@ dependencies = [ "tokio", "tokio-stream", "tonic", + "tower", "tracing", "tracing-subscriber", ] -[[package]] -name = "rustc-demangle" -version = "0.1.26" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56f7d92ca342cea22a06f2121d944b4fd82af56988c270852495420f961d4ace" - [[package]] name = "rustix" -version = "1.1.2" +version = "1.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd15f8a2c5551a84d56efdc1cd049089e409ac19a3072d5037a17fd70719ff3e" +checksum = "b6fe4565b9518b83ef4f91bb47ce29620ca828bd32cb7e408f0062e9930ba190" dependencies = [ "bitflags", "errno", "libc", "linux-raw-sys", - "windows-sys 0.61.1", + "windows-sys", ] [[package]] -name = "rustversion" -version = "1.0.22" +name = "scopeguard" +version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" [[package]] -name = "ryu" -version = "1.0.20" +name = "semver" +version = "1.0.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f" +checksum = "8a7852d02fc848982e0c167ef163aaff9cd91dc640ba85e263cb1ce46fae51cd" [[package]] name = "serde" -version = "1.0.227" +version = "1.0.228" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "80ece43fc6fbed4eb5392ab50c07334d3e577cbf40997ee896fe7af40bba4245" +checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" dependencies = [ "serde_core", "serde_derive", @@ -904,35 +935,35 @@ dependencies = [ [[package]] name = "serde_core" -version = "1.0.227" +version = "1.0.228" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a576275b607a2c86ea29e410193df32bc680303c82f31e275bbfcafe8b33be5" +checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.227" +version = "1.0.228" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "51e694923b8824cf0e9b382adf0f60d4e05f348f357b38833a3fa5ed7c2ede04" +checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.117", ] [[package]] name = "serde_json" -version = "1.0.145" +version = "1.0.149" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "402a6f66d8c709116cf22f558eab210f5a50187f702eb4d7e5ef38d9a7f1c79c" +checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86" dependencies = [ "itoa", "memchr", - "ryu", "serde", "serde_core", + "zmij", ] [[package]] @@ -946,9 +977,9 @@ dependencies = [ [[package]] name = "slab" -version = "0.4.11" +version = "0.4.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a2ae44ef20feb57a68b23d846850f861394c2e02dc425a50098ae8c90267589" +checksum = "0c790de23124f9ab44544d7ac05d60440adc586479ce501c1d6d7da3cd8c9cf5" [[package]] name = "smallvec" @@ -958,19 +989,30 @@ checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" [[package]] name = "socket2" -version = "0.6.0" +version = "0.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "233504af464074f9d066d7b5416c5f9b894a5862a6506e306f7b816cdd6f1807" +checksum = "3a766e1110788c36f4fa1c2b71b387a7815aa65f88ce0229841826633d93723e" dependencies = [ "libc", - "windows-sys 0.59.0", + "windows-sys", ] [[package]] name = "syn" -version = "2.0.106" +version = "1.0.109" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ede7c438028d4436d71104916910f5bb611972c5cfd7f89b8300a8186e6fada6" +checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "syn" +version = "2.0.117" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99" dependencies = [ "proc-macro2", "quote", @@ -985,35 +1027,35 @@ checksum = "0bf256ce5efdfa370213c1dabab5935a12e49f2c58d15e9eac2870d3b4f27263" [[package]] name = "tempfile" -version = "3.23.0" +version = "3.27.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2d31c77bdf42a745371d260a26ca7163f1e0924b64afa0b688e61b5a9fa02f16" +checksum = "32497e9a4c7b38532efcdebeef879707aa9f794296a4f0244f6f69e9bc8574bd" dependencies = [ "fastrand", - "getrandom 0.3.3", + "getrandom 0.4.2", "once_cell", "rustix", - "windows-sys 0.61.1", + "windows-sys", ] [[package]] name = "thiserror" -version = "2.0.16" +version = "2.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3467d614147380f2e4e374161426ff399c91084acd2363eaf549172b3d5e60c0" +checksum = "4288b5bcbc7920c07a1149a35cf9590a2aa808e0bc1eafaade0b80947865fbc4" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "2.0.16" +version = "2.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c5e1be1c48b9172ee610da68fd9cd2770e7a4056cb3fc98710ee6906f0c7960" +checksum = "ebc4ee7f67670e9b64d05fa4253e753e016c6c95ff35b89b7941d6b856dec1d5" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.117", ] [[package]] @@ -1027,38 +1069,35 @@ dependencies = [ [[package]] name = "tokio" -version = "1.47.1" +version = "1.51.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89e49afdadebb872d3145a5638b59eb0691ea23e46ca484037cfab3b76b95038" +checksum = "2bd1c4c0fc4a7ab90fc15ef6daaa3ec3b893f004f915f2392557ed23237820cd" dependencies = [ - "backtrace", "bytes", - "io-uring", "libc", "mio", "pin-project-lite", - "slab", "socket2", "tokio-macros", - "windows-sys 0.59.0", + "windows-sys", ] [[package]] name = "tokio-macros" -version = "2.5.0" +version = "2.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e06d43f1345a3bcd39f6a56dbb7dcab2ba47e68e8ac134855e7e2bdbaf8cab8" +checksum = "385a6cb71ab9ab790c5fe8d67f1645e6c450a7ce006a33de03daa956cf70a496" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.117", ] [[package]] name = "tokio-stream" -version = "0.1.17" +version = "0.1.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eca58d7bba4a75707817a2c44174253f9236b2d5fbd055602e9d5c07c139a047" +checksum = "32da49809aab5c3bc678af03902d4ccddea2a87d028d86392a4b1560c6906c70" dependencies = [ "futures-core", "pin-project-lite", @@ -1067,9 +1106,9 @@ dependencies = [ [[package]] name = "tokio-util" -version = "0.7.16" +version = "0.7.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "14307c986784f72ef81c89db7d9e28d6ac26d16213b109ea501696195e6e3ce5" +checksum = "9ae9cec805b01e8fc3fd2fe289f89149a9b66dd16786abd8b19cfa7b48cb0098" dependencies = [ "bytes", "futures-core", @@ -1080,9 +1119,9 @@ dependencies = [ [[package]] name = "tonic" -version = "0.14.2" +version = "0.14.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eb7613188ce9f7df5bfe185db26c5814347d110db17920415cf2fbcad85e7203" +checksum = "fec7c61a0695dc1887c1b53952990f3ad2e3a31453e1f49f10e75424943a93ec" dependencies = [ "async-trait", "axum", @@ -1109,21 +1148,21 @@ dependencies = [ [[package]] name = "tonic-build" -version = "0.14.2" +version = "0.14.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c40aaccc9f9eccf2cd82ebc111adc13030d23e887244bc9cfa5d1d636049de3" +checksum = "1882ac3bf5ef12877d7ed57aad87e75154c11931c2ba7e6cde5e22d63522c734" dependencies = [ "prettyplease", "proc-macro2", "quote", - "syn", + "syn 2.0.117", ] [[package]] name = "tonic-prost" -version = "0.14.2" +version = "0.14.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "66bd50ad6ce1252d87ef024b3d64fe4c3cf54a86fb9ef4c631fdd0ded7aeaa67" +checksum = "a55376a0bbaa4975a3f10d009ad763d8f4108f067c7c2e74f3001fb49778d309" dependencies = [ "bytes", "prost", @@ -1132,25 +1171,25 @@ dependencies = [ [[package]] name = "tonic-prost-build" -version = "0.14.2" +version = "0.14.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b4a16cba4043dc3ff43fcb3f96b4c5c154c64cbd18ca8dce2ab2c6a451d058a2" +checksum = "f3144df636917574672e93d0f56d7edec49f90305749c668df5101751bb8f95a" dependencies = [ "prettyplease", "proc-macro2", "prost-build", "prost-types", "quote", - "syn", + "syn 2.0.117", "tempfile", "tonic-build", ] [[package]] name = "tower" -version = "0.5.2" +version = "0.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d039ad9159c98b70ecfd540b2573b97f7f52c3e8d9f8ad57a24b916a536975f9" +checksum = "ebe5ef63511595f1344e2d5cfa636d973292adc0eec1f0ad45fae9f0851ab1d4" dependencies = [ "futures-core", "futures-util", @@ -1179,9 +1218,9 @@ checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3" [[package]] name = "tracing" -version = "0.1.41" +version = "0.1.44" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "784e0ac535deb450455cbfa28a6f0df145ea1bb7ae51b821cf5e7927fdcfbdd0" +checksum = "63e71662fa4b2a2c3a26f570f037eb95bb1f85397f3cd8076caed2f026a6d100" dependencies = [ "pin-project-lite", "tracing-attributes", @@ -1190,20 +1229,20 @@ dependencies = [ [[package]] name = "tracing-attributes" -version = "0.1.30" +version = "0.1.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "81383ab64e72a7a8b8e13130c49e3dab29def6d0c7d76a03087b3cf71c5c6903" +checksum = "7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.117", ] [[package]] name = "tracing-core" -version = "0.1.34" +version = "0.1.36" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b9d12581f227e93f094d3af2ae690a574abb8a2b9b7a96e7cfe9647b2b617678" +checksum = "db97caf9d906fbde555dd62fa95ddba9eecfd14cb388e4f491a66d74cd5fb79a" dependencies = [ "once_cell", "valuable", @@ -1222,9 +1261,9 @@ dependencies = [ [[package]] name = "tracing-subscriber" -version = "0.3.20" +version = "0.3.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2054a14f5307d601f88daf0553e1cbf472acc4f2c51afab632431cdcd72124d5" +checksum = "cb7f578e5945fb242538965c2d0b04418d38ec25c79d160cd279bf0731c8d319" dependencies = [ "matchers", "nu-ansi-term", @@ -1246,15 +1285,21 @@ checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" [[package]] name = "unicase" -version = "2.8.1" +version = "2.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75b844d17643ee918803943289730bec8aac480150456169e647ed0b576ba539" +checksum = "dbc4bc3a9f746d862c45cb89d705aa10f187bb96c76001afab07a0d35ce60142" [[package]] name = "unicode-ident" -version = "1.0.19" +version = "1.0.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75" + +[[package]] +name = "unicode-xid" +version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f63a545481291138910575129486daeaf8ac54aee4387fe7906919f7830c7d9d" +checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853" [[package]] name = "valuable" @@ -1262,6 +1307,16 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65" +[[package]] +name = "visible" +version = "0.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a044005fd5c0fc1ebd79c622e5606431c6b879a6a19acafb754be9926a2de73e" +dependencies = [ + "quote", + "syn 1.0.109", +] + [[package]] name = "want" version = "0.3.1" @@ -1277,143 +1332,189 @@ version = "0.11.1+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" -[[package]] -name = "wasi" -version = "0.14.7+wasi-0.2.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "883478de20367e224c0090af9cf5f9fa85bed63a95c1abf3afc5c083ebc06e8c" -dependencies = [ - "wasip2", -] - [[package]] name = "wasip2" version = "1.0.1+wasi-0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0562428422c63773dad2c345a1882263bbf4d65cf3f42e90921f787ef5ad58e7" dependencies = [ - "wit-bindgen", + "wit-bindgen 0.46.0", ] [[package]] -name = "windows-link" -version = "0.2.0" +name = "wasip3" +version = "0.4.0+wasi-0.3.0-rc-2026-01-06" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "45e46c0661abb7180e7b9c281db115305d49ca1709ab8242adf09666d2173c65" - -[[package]] -name = "windows-sys" -version = "0.52.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +checksum = "5428f8bf88ea5ddc08faddef2ac4a67e390b88186c703ce6dbd955e1c145aca5" dependencies = [ - "windows-targets", + "wit-bindgen 0.51.0", ] [[package]] -name = "windows-sys" -version = "0.59.0" +name = "wasm-encoder" +version = "0.244.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" +checksum = "990065f2fe63003fe337b932cfb5e3b80e0b4d0f5ff650e6985b1048f62c8319" dependencies = [ - "windows-targets", + "leb128fmt", + "wasmparser", ] [[package]] -name = "windows-sys" -version = "0.61.1" +name = "wasm-metadata" +version = "0.244.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6f109e41dd4a3c848907eb83d5a42ea98b3769495597450cf6d153507b166f0f" +checksum = "bb0e353e6a2fbdc176932bbaab493762eb1255a7900fe0fea1a2f96c296cc909" dependencies = [ - "windows-link", + "anyhow", + "indexmap", + "wasm-encoder", + "wasmparser", ] [[package]] -name = "windows-targets" -version = "0.52.6" +name = "wasmparser" +version = "0.244.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" +checksum = "47b807c72e1bac69382b3a6fb3dbe8ea4c0ed87ff5629b8685ae6b9a611028fe" dependencies = [ - "windows_aarch64_gnullvm", - "windows_aarch64_msvc", - "windows_i686_gnu", - "windows_i686_gnullvm", - "windows_i686_msvc", - "windows_x86_64_gnu", - "windows_x86_64_gnullvm", - "windows_x86_64_msvc", + "bitflags", + "hashbrown 0.15.5", + "indexmap", + "semver", ] [[package]] -name = "windows_aarch64_gnullvm" -version = "0.52.6" +name = "windows-link" +version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" +checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" [[package]] -name = "windows_aarch64_msvc" -version = "0.52.6" +name = "windows-sys" +version = "0.61.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" +checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc" +dependencies = [ + "windows-link", +] [[package]] -name = "windows_i686_gnu" -version = "0.52.6" +name = "wit-bindgen" +version = "0.46.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" +checksum = "f17a85883d4e6d00e8a97c586de764dabcc06133f7f1d55dce5cdc070ad7fe59" [[package]] -name = "windows_i686_gnullvm" -version = "0.52.6" +name = "wit-bindgen" +version = "0.51.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" +checksum = "d7249219f66ced02969388cf2bb044a09756a083d0fab1e566056b04d9fbcaa5" +dependencies = [ + "wit-bindgen-rust-macro", +] [[package]] -name = "windows_i686_msvc" -version = "0.52.6" +name = "wit-bindgen-core" +version = "0.51.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" +checksum = "ea61de684c3ea68cb082b7a88508a8b27fcc8b797d738bfc99a82facf1d752dc" +dependencies = [ + "anyhow", + "heck", + "wit-parser", +] [[package]] -name = "windows_x86_64_gnu" -version = "0.52.6" +name = "wit-bindgen-rust" +version = "0.51.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" +checksum = "b7c566e0f4b284dd6561c786d9cb0142da491f46a9fbed79ea69cdad5db17f21" +dependencies = [ + "anyhow", + "heck", + "indexmap", + "prettyplease", + "syn 2.0.117", + "wasm-metadata", + "wit-bindgen-core", + "wit-component", +] [[package]] -name = "windows_x86_64_gnullvm" -version = "0.52.6" +name = "wit-bindgen-rust-macro" +version = "0.51.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" +checksum = "0c0f9bfd77e6a48eccf51359e3ae77140a7f50b1e2ebfe62422d8afdaffab17a" +dependencies = [ + "anyhow", + "prettyplease", + "proc-macro2", + "quote", + "syn 2.0.117", + "wit-bindgen-core", + "wit-bindgen-rust", +] [[package]] -name = "windows_x86_64_msvc" -version = "0.52.6" +name = "wit-component" +version = "0.244.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" +checksum = "9d66ea20e9553b30172b5e831994e35fbde2d165325bec84fc43dbf6f4eb9cb2" +dependencies = [ + "anyhow", + "bitflags", + "indexmap", + "log", + "serde", + "serde_derive", + "serde_json", + "wasm-encoder", + "wasm-metadata", + "wasmparser", + "wit-parser", +] [[package]] -name = "wit-bindgen" -version = "0.46.0" +name = "wit-parser" +version = "0.244.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f17a85883d4e6d00e8a97c586de764dabcc06133f7f1d55dce5cdc070ad7fe59" +checksum = "ecc8ac4bc1dc3381b7f59c34f00b67e18f910c2c0f50015669dde7def656a736" +dependencies = [ + "anyhow", + "id-arena", + "indexmap", + "log", + "semver", + "serde", + "serde_derive", + "serde_json", + "unicode-xid", + "wasmparser", +] [[package]] name = "zerocopy" -version = "0.8.27" +version = "0.8.48" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0894878a5fa3edfd6da3f88c4805f4c8558e2b996227a3d864f47fe11e38282c" +checksum = "eed437bf9d6692032087e337407a86f04cd8d6a16a37199ed57949d415bd68e9" dependencies = [ "zerocopy-derive", ] [[package]] name = "zerocopy-derive" -version = "0.8.27" +version = "0.8.48" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "88d2b8d9c68ad2b9e4340d7832716a4d21a22a1154777ad56ea55c51a9cf3831" +checksum = "70e3cd084b1788766f53af483dd21f93881ff30d7320490ec3ef7526d203bad4" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.117", ] + +[[package]] +name = "zmij" +version = "1.0.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa" diff --git a/Cargo.toml b/Cargo.toml index a6b9e3f..1d74d6f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,9 +1,9 @@ [package] name = "rust-etcd-utils" -version = "0.13.0" +version = "0.14.0" authors = [ - "Triton One", - "Louis-Vincent Boudreault" + "Louis-Vincent Boudreault", + "Triton One" ] publish = true edition = "2024" @@ -12,20 +12,32 @@ license = "AGPL-3.0" homepage = "https://triton.one" repository = "https://github.com/rpcpool/rust-etcd-utils" +[features] +default = [] +unstable = [] + [dependencies] async-trait = "^0.1.83" -etcd-client = "^0.17.0" +dashmap = "6.1.0" +etcd-client = { version = "^0.18.0", features = ["pub-response-field"] } futures = "^0.3.31" +http = "1.4.0" +pin-project = "1.1.11" rand = "^0.8.5" retry = "2" serde = { version = "1", features = ["derive"] } serde_json = "1" thiserror = "2" -tokio = { version = "1", features = ["rt-multi-thread", "time", "sync"] } +tokio = { version = "1.43.1", features = ["rt-multi-thread", "time", "sync"] } tokio-stream = "^0.1.16" tonic = "^0.14.2" +tower = "0.5" tracing = "^0.1.40" [dev-dependencies] -tracing-subscriber = { version = "^0.3.1", features = ["ansi", "env-filter"] } \ No newline at end of file +tracing-subscriber = { version = "^0.3.1", features = ["ansi", "env-filter"] } +bytes = "1.11.1" +http-body-util = "0.1" +hyper = { version = "1", features = ["http2", "server"] } +hyper-util = { version = "0.1", features = ["server", "tokio", "http2"] } diff --git a/compose.yaml b/compose.yaml index 21c2322..032f53e 100644 --- a/compose.yaml +++ b/compose.yaml @@ -2,10 +2,18 @@ version: '3' services: etcd: - image: bitnamilegacy/etcd:3.5 + image: quay.io/coreos/etcd:v3.5.0 + container_name: etcd environment: - - ALLOW_NONE_AUTHENTICATION=yes + - ETCD_NAME=etcd1 + - ETCD_DATA_DIR=/etcd-data + - ETCD_LISTEN_CLIENT_URLS=http://0.0.0.0:2379 - ETCD_ADVERTISE_CLIENT_URLS=http://etcd:2379 + - ETCD_LISTEN_PEER_URLS=http://0.0.0.0:2380 + - ETCD_INITIAL_ADVERTISE_PEER_URLS=http://etcd:2380 + - ETCD_INITIAL_CLUSTER=etcd1=http://etcd:2380 + - ETCD_INITIAL_CLUSTER_TOKEN=etcd-single + - ETCD_INITIAL_CLUSTER_STATE=new ports: - "2379:2379" - "2380:2380" \ No newline at end of file diff --git a/src/channel.rs b/src/channel.rs new file mode 100644 index 0000000..e82b24f --- /dev/null +++ b/src/channel.rs @@ -0,0 +1,524 @@ +use { + etcd_client::{BalancedChannelBuilder, Channel, Client, ConnectOptions, Error}, + std::{ + collections::{HashMap, HashSet}, + sync::{Arc, Mutex}, + time::Duration, + }, + tokio::{ + sync::mpsc, + time::{interval, timeout}, + }, + tonic::transport::{self, Endpoint}, + tower::{balance::p2c::Balance, buffer::Buffer, util::BoxCloneSyncService}, + tracing::{debug, info, warn}, +}; + +use crate::tonic::discovery::QuarantiningDiscover; + +type Uri = tonic::codegen::http::Uri; +type EndpointUpdater = mpsc::Sender>; +type EndpointChange = transport::channel::Change; + +#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)] +pub struct ReliableChannelStats { + pub quarantine_transitions: u64, + pub active_endpoints: usize, + pub quarantined_endpoints: usize, +} + +#[derive(Debug, Clone, Default)] +pub struct ReliableChannelStatsRegistry { + inner: Arc>, + endpoint_status: Arc>>, + call_counter: Arc>, +} + +impl ReliableChannelStatsRegistry { + pub fn snapshot(&self) -> ReliableChannelStats { + *self + .inner + .lock() + .expect("reliable channel stats lock poisoned") + } + + pub fn call_counts_snapshot(&self) -> HashMap { + self.call_counter + .iter() + .map(|entry| (entry.key().to_string(), *entry.value())) + .collect() + } + + pub fn get_call_count(&self, endpoint: &str) -> Option { + self.call_counter + .iter() + .find(|entry| entry.key().to_string().contains(endpoint)) + .map(|entry| *entry.value()) + } + + pub fn endpoint_status_snapshot(&self) -> HashMap { + self.endpoint_status + .lock() + .expect("endpoint status lock poisoned") + .clone() + } + + pub fn get_endpoint_status(&self, endpoint: &str) -> Option { + self.endpoint_status + .lock() + .expect("endpoint status lock poisoned") + .get(endpoint) + .copied() + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum EndpointStatus { + Active, + Quarantined, +} + +/// Reliable balanced channel builder with endpoint quarantine and periodic recovery probes. +/// +/// Behavior: +/// - Inserted endpoints are probed before entering the routable pool. +/// - Failing endpoints are quarantined and retried periodically. +/// - Error-reporting transport failures automatically remove endpoints from balancer routing. +#[derive(Debug, Clone)] +pub struct ReliableBalancedChannelBuilder { + pub probe_timeout: Duration, + pub quarantine_retry_interval: Duration, + stats_registry: ReliableChannelStatsRegistry, +} + +impl Default for ReliableBalancedChannelBuilder { + fn default() -> Self { + Self { + probe_timeout: Duration::from_secs(5), + quarantine_retry_interval: Duration::from_secs(15), + stats_registry: ReliableChannelStatsRegistry::default(), + } + } +} + +impl ReliableBalancedChannelBuilder { + pub fn stats_registry(&self) -> ReliableChannelStatsRegistry { + self.stats_registry.clone() + } +} + +impl BalancedChannelBuilder for ReliableBalancedChannelBuilder { + type Error = transport::Error; + + fn balanced_channel( + self, + buffer_size: usize, + ) -> Result<(Channel, EndpointUpdater), Self::Error> { + let balance_buffer_size = buffer_size.max(16); + let (discover_updater, discover_updates_rx) = mpsc::channel(balance_buffer_size); + let (user_updater, user_updates_rx) = mpsc::channel(buffer_size.max(16)); + let (spy_quarantine_tx, spy_quarantine_rx) = mpsc::unbounded_channel(); + + let discover = QuarantiningDiscover::new_with_quarantine_events_and_call_counter( + discover_updates_rx, + Some(spy_quarantine_tx), + Some(self.stats_registry.call_counter.clone()), + ); + let balanced = Balance::new(discover); + let buffered = Buffer::new(balanced, balance_buffer_size); + let custom = BoxCloneSyncService::new(buffered); + + tokio::spawn(run_endpoint_manager( + user_updates_rx, + discover_updater, + spy_quarantine_rx, + self.stats_registry.clone(), + self.probe_timeout, + self.quarantine_retry_interval, + )); + + Ok((Channel::Custom(custom), user_updater)) + } +} + +async fn run_endpoint_manager( + mut user_updates_rx: mpsc::Receiver, + discover_updater: EndpointUpdater, + mut spy_quarantine_rx: mpsc::UnboundedReceiver, + stats_registry: ReliableChannelStatsRegistry, + probe_timeout: Duration, + quarantine_retry_interval: Duration, +) { + let mut desired: HashMap = HashMap::new(); + let mut active: HashSet = HashSet::new(); + let mut quarantined: HashSet = HashSet::new(); + let mut tick = interval(quarantine_retry_interval); + let shared = EndpointManagerShared { + tonic_updater: &discover_updater, + stats_registry: &stats_registry, + probe_timeout, + }; + + loop { + tokio::select! { + maybe_change = user_updates_rx.recv() => { + let Some(change) = maybe_change else { + break; + }; + apply_user_change( + change, + &mut desired, + &mut active, + &mut quarantined, + &shared, + ).await; + } + maybe_quarantine = spy_quarantine_rx.recv() => { + let Some(uri) = maybe_quarantine else { + continue; + }; + apply_spy_quarantine( + uri, + &mut active, + &mut quarantined, + &shared, + ); + } + _ = tick.tick() => { + retry_quarantined( + &desired, + &mut active, + &mut quarantined, + &shared, + ).await; + } + } + } +} + +struct EndpointManagerShared<'a> { + tonic_updater: &'a EndpointUpdater, + stats_registry: &'a ReliableChannelStatsRegistry, + probe_timeout: Duration, +} + +async fn apply_user_change( + change: EndpointChange, + desired: &mut HashMap, + active: &mut HashSet, + quarantined: &mut HashSet, + shared: &EndpointManagerShared<'_>, +) { + match change { + EndpointChange::Insert(uri, endpoint) => { + desired.insert(uri.clone(), endpoint.clone()); + if endpoint_healthy(&endpoint, shared.probe_timeout).await { + quarantined.remove(&uri); + if active.insert(uri.clone()) { + set_endpoint_status(shared.stats_registry, &uri, EndpointStatus::Active); + let _ = shared + .tonic_updater + .send(EndpointChange::Insert(uri.clone(), endpoint)) + .await; + info!(endpoint = %uri, "endpoint added to active pool"); + } + } else { + if quarantined.insert(uri.clone()) { + increment_quarantine_transition(shared.stats_registry); + } + set_endpoint_status(shared.stats_registry, &uri, EndpointStatus::Quarantined); + if active.remove(&uri) { + let _ = shared + .tonic_updater + .send(EndpointChange::Remove(uri.clone())) + .await; + } + warn!(endpoint = %uri, "endpoint moved to quarantine"); + } + } + EndpointChange::Remove(uri) => { + desired.remove(&uri); + quarantined.remove(&uri); + remove_endpoint_status(shared.stats_registry, &uri); + if active.remove(&uri) { + let _ = shared + .tonic_updater + .send(EndpointChange::Remove(uri.clone())) + .await; + } + debug!(endpoint = %uri, "endpoint removed by caller"); + } + } + + set_current_counts(shared.stats_registry, active.len(), quarantined.len()); +} + +fn apply_spy_quarantine( + uri: Uri, + active: &mut HashSet, + quarantined: &mut HashSet, + shared: &EndpointManagerShared<'_>, +) { + if active.remove(&uri) { + if quarantined.insert(uri.clone()) { + increment_quarantine_transition(shared.stats_registry); + } + set_endpoint_status(shared.stats_registry, &uri, EndpointStatus::Quarantined); + warn!(endpoint = %uri, "endpoint quarantined from error-reporting transport error"); + } + + set_current_counts(shared.stats_registry, active.len(), quarantined.len()); +} + +async fn retry_quarantined( + desired: &HashMap, + active: &mut HashSet, + quarantined: &mut HashSet, + shared: &EndpointManagerShared<'_>, +) { + let quarantine_uris: Vec = quarantined.iter().cloned().collect(); + for uri in quarantine_uris { + let Some(endpoint) = desired.get(&uri) else { + quarantined.remove(&uri); + continue; + }; + + if endpoint_healthy(endpoint, shared.probe_timeout).await { + tracing::trace!("endpoint probe succeeded during quarantine retry"); + quarantined.remove(&uri); + if active.insert(uri.clone()) { + set_endpoint_status(shared.stats_registry, &uri, EndpointStatus::Active); + let _ = shared + .tonic_updater + .send(EndpointChange::Insert(uri.clone(), endpoint.clone())) + .await; + info!(endpoint = %uri, "endpoint recovered from quarantine"); + } + } + } + + set_current_counts(shared.stats_registry, active.len(), quarantined.len()); +} + +fn increment_quarantine_transition(stats_registry: &ReliableChannelStatsRegistry) { + let mut stats = stats_registry + .inner + .lock() + .expect("reliable channel stats lock poisoned"); + stats.quarantine_transitions = stats.quarantine_transitions.saturating_add(1); +} + +fn set_current_counts( + stats_registry: &ReliableChannelStatsRegistry, + active_count: usize, + quarantined_count: usize, +) { + let mut stats = stats_registry + .inner + .lock() + .expect("reliable channel stats lock poisoned"); + stats.active_endpoints = active_count; + stats.quarantined_endpoints = quarantined_count; +} + +fn set_endpoint_status( + stats_registry: &ReliableChannelStatsRegistry, + uri: &Uri, + status: EndpointStatus, +) { + stats_registry + .endpoint_status + .lock() + .expect("endpoint status lock poisoned") + .insert(uri.to_string(), status); +} + +fn remove_endpoint_status(stats_registry: &ReliableChannelStatsRegistry, uri: &Uri) { + stats_registry + .endpoint_status + .lock() + .expect("endpoint status lock poisoned") + .remove(&uri.to_string()); +} + +async fn endpoint_healthy(endpoint: &Endpoint, probe_timeout: Duration) -> bool { + match timeout(probe_timeout, endpoint.clone().connect()).await { + Ok(Ok(_)) => true, + Ok(Err(e)) => { + debug!(error = %e, "endpoint probe failed"); + false + } + Err(_) => false, + } +} + +/// Connect using the reliable balanced channel builder. +pub async fn connect_with_reliable_balanced_channel( + endpoints: S, + options: Option, +) -> Result +where + E: AsRef, + S: AsRef<[E]>, +{ + Client::connect_with_balanced_channel( + endpoints, + options, + ReliableBalancedChannelBuilder::default(), + ) + .await +} + +#[cfg(test)] +mod tests { + use super::*; + + fn endpoint(url: &'static str) -> Endpoint { + Endpoint::from_static(url) + } + + fn uri(url: &'static str) -> Uri { + url.parse().expect("valid uri") + } + + #[tokio::test] + async fn apply_spy_quarantine_is_noop_for_non_active_endpoint() { + let stats_registry = ReliableChannelStatsRegistry::default(); + let (tx, _rx) = mpsc::channel(4); + let shared = EndpointManagerShared { + tonic_updater: &tx, + stats_registry: &stats_registry, + probe_timeout: Duration::from_millis(10), + }; + let mut active = HashSet::new(); + let mut quarantined = HashSet::new(); + let target = uri("http://127.0.0.1:21001"); + + apply_spy_quarantine(target.clone(), &mut active, &mut quarantined, &shared); + + assert!(!quarantined.contains(&target)); + assert_eq!(stats_registry.snapshot().quarantine_transitions, 0); + assert!(stats_registry.endpoint_status_snapshot().is_empty()); + } + + #[tokio::test] + async fn apply_spy_quarantine_moves_active_to_quarantined_and_updates_stats() { + let stats_registry = ReliableChannelStatsRegistry::default(); + let (tx, _rx) = mpsc::channel(4); + let shared = EndpointManagerShared { + tonic_updater: &tx, + stats_registry: &stats_registry, + probe_timeout: Duration::from_millis(10), + }; + let mut active = HashSet::new(); + let mut quarantined = HashSet::new(); + let target = uri("http://127.0.0.1:21002"); + active.insert(target.clone()); + + apply_spy_quarantine(target.clone(), &mut active, &mut quarantined, &shared); + + assert!(!active.contains(&target)); + assert!(quarantined.contains(&target)); + assert_eq!(stats_registry.snapshot().quarantine_transitions, 1); + let status = stats_registry + .get_endpoint_status(target.to_string().as_str()) + .expect("status should exist"); + assert_eq!(status, EndpointStatus::Quarantined); + } + + #[tokio::test] + async fn apply_user_remove_clears_status_without_emitting_remove_for_inactive_endpoint() { + let stats_registry = ReliableChannelStatsRegistry::default(); + let (tx, mut rx) = mpsc::channel(4); + let shared = EndpointManagerShared { + tonic_updater: &tx, + stats_registry: &stats_registry, + probe_timeout: Duration::from_millis(10), + }; + let mut desired = HashMap::new(); + let mut active = HashSet::new(); + let mut quarantined = HashSet::new(); + let target = uri("http://127.0.0.1:21003"); + let target_str = target.to_string(); + + desired.insert(target.clone(), endpoint("http://127.0.0.1:21003")); + quarantined.insert(target.clone()); + set_endpoint_status(&stats_registry, &target, EndpointStatus::Quarantined); + + apply_user_change( + EndpointChange::Remove(target.clone()), + &mut desired, + &mut active, + &mut quarantined, + &shared, + ) + .await; + + assert!(!desired.contains_key(&target)); + assert!(!quarantined.contains(&target)); + assert_eq!( + stats_registry.get_endpoint_status(target_str.as_str()), + None + ); + assert!(rx.try_recv().is_err()); + } + + #[tokio::test] + async fn retry_quarantined_removes_orphaned_uris_not_in_desired() { + let stats_registry = ReliableChannelStatsRegistry::default(); + let (tx, mut rx) = mpsc::channel(4); + let shared = EndpointManagerShared { + tonic_updater: &tx, + stats_registry: &stats_registry, + probe_timeout: Duration::from_millis(10), + }; + let desired = HashMap::new(); + let mut active = HashSet::new(); + let mut quarantined = HashSet::new(); + let orphan = uri("http://127.0.0.1:21004"); + quarantined.insert(orphan.clone()); + + retry_quarantined(&desired, &mut active, &mut quarantined, &shared).await; + + assert!(!quarantined.contains(&orphan)); + assert!(rx.try_recv().is_err()); + let stats = stats_registry.snapshot(); + assert_eq!(stats.active_endpoints, 0); + assert_eq!(stats.quarantined_endpoints, 0); + } + + #[tokio::test] + async fn apply_user_insert_unhealthy_endpoint_quarantines_without_insert_event() { + let stats_registry = ReliableChannelStatsRegistry::default(); + let (tx, mut rx) = mpsc::channel(4); + let shared = EndpointManagerShared { + tonic_updater: &tx, + stats_registry: &stats_registry, + probe_timeout: Duration::from_millis(30), + }; + let mut desired = HashMap::new(); + let mut active = HashSet::new(); + let mut quarantined = HashSet::new(); + let target = uri("http://127.0.0.1:1"); + let ep = endpoint("http://127.0.0.1:1"); + + apply_user_change( + EndpointChange::Insert(target.clone(), ep), + &mut desired, + &mut active, + &mut quarantined, + &shared, + ) + .await; + + assert!(desired.contains_key(&target)); + assert!(!active.contains(&target)); + assert!(quarantined.contains(&target)); + assert!(rx.try_recv().is_err()); + assert_eq!(stats_registry.snapshot().quarantine_transitions, 1); + let status = stats_registry + .get_endpoint_status(target.to_string().as_str()) + .expect("status should exist"); + assert_eq!(status, EndpointStatus::Quarantined); + } +} diff --git a/src/lease.rs b/src/lease.rs index da8e095..463bb4f 100644 --- a/src/lease.rs +++ b/src/lease.rs @@ -260,7 +260,7 @@ impl ManagedLeaseFactoryRuntime { } } if let Err(e) = keeper.keep_alive().await { - error!("failed to keep alive lease {lease_id:?}, got {e:?}"); + warn!("failed to keep alive lease {lease_id:?}, got {e:?}"); break 'inner; } last_keep_alive = Instant::now(); diff --git a/src/lib.rs b/src/lib.rs index 987cc97..172d3d8 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -96,3 +96,9 @@ pub mod log; /// Utiltities for inter-task communication /// pub mod sync; + +#[cfg(feature = "unstable")] +pub mod channel; + +#[cfg(feature = "unstable")] +pub mod tonic; diff --git a/src/lock.rs b/src/lock.rs index 5139944..3fb7ccf 100644 --- a/src/lock.rs +++ b/src/lock.rs @@ -34,9 +34,9 @@ /// ``` use { super::{ + Revision, lease::{ManagedLease, ManagedLeaseFactory}, retry::retry_etcd_legacy, - Revision, }, crate::{ lease::{LeaseExpiredNotify, ManagedLeaseWeak}, @@ -45,7 +45,10 @@ use { }, core::fmt, etcd_client::{Compare, CompareOp, GetOptions, LockOptions, Txn, TxnOp, TxnResponse}, - futures::{future::join_all, FutureExt}, + futures::{ + FutureExt, StreamExt, + future::{BoxFuture, Shared, join_all}, + }, retry::delay::Fixed, std::{ future::Future, @@ -55,7 +58,7 @@ use { }, thiserror::Error, tokio::{ - sync::{broadcast, mpsc}, + sync::mpsc, task::{JoinError, JoinHandle}, }, tonic::Code, @@ -142,14 +145,14 @@ impl Future for LockManagerHandle { /// ``` /// pub struct ManagedLockRevokeNotify { - watch_lock_delete: broadcast::Receiver, + watch_lock_delete: Shared>, lease_expired_notify: LeaseExpiredNotify, } impl Clone for ManagedLockRevokeNotify { fn clone(&self) -> Self { Self { - watch_lock_delete: self.watch_lock_delete.resubscribe(), + watch_lock_delete: self.watch_lock_delete.clone(), lease_expired_notify: self.lease_expired_notify.clone(), } } @@ -159,14 +162,30 @@ impl ManagedLockRevokeNotify { /// /// Wait for the lock to be revoked. /// - pub async fn wait_for_revoke(mut self) { + pub async fn wait_for_revoke(self) { + let watch_lock_delete = self.watch_lock_delete; tokio::select! { _ = self.lease_expired_notify.recv() => {} - _ = self.watch_lock_delete.recv() => {} + _ = watch_lock_delete => {} } } } +fn make_revoke_callback( + etcd: etcd_client::Client, + lock_key: Vec, + revision: Revision, +) -> Shared> { + let mut watch_stream = etcd + .watch_client() + .watch_lock_key_change_stream(lock_key, revision); + async move { + let _ = watch_stream.next().await; + } + .boxed() + .shared() +} + /// /// Creates a lock manager to create "managed" locks. /// @@ -402,17 +421,14 @@ impl LockManager { } }; - let watch_lock_delete = self - .etcd - .watch_client() - .watch_lock_key_change(lock_key.clone(), revision); + let revoke_callback = make_revoke_callback(self.etcd.clone(), lock_key.clone(), revision); Ok(ManagedLock { lock_key, managed_lease, etcd: self.etcd.clone(), created_at_revision: revision, delete_signal_tx: self.delete_queue_tx.clone(), - revoke_callback_rx: watch_lock_delete.subscribe(), + revoke_callback, }) } @@ -487,17 +503,14 @@ impl LockManager { } }; - let watch_lock_delete = self - .etcd - .watch_client() - .watch_lock_key_change(lock_key.clone(), revision); + let revoke_callback = make_revoke_callback(self.etcd.clone(), lock_key.clone(), revision); Ok(ManagedLock { lock_key, managed_lease, etcd: self.etcd.clone(), created_at_revision: revision, delete_signal_tx: self.delete_queue_tx.clone(), - revoke_callback_rx: watch_lock_delete.subscribe(), + revoke_callback, }) } @@ -562,10 +575,7 @@ impl LockManager { lock_response.key().to_vec(), ); - let watch_lock_delete = self - .etcd - .watch_client() - .watch_lock_key_change(lock_key.clone(), revision); + let revoke_callback = make_revoke_callback(self.etcd.clone(), lock_key.clone(), revision); let managed_lock = ManagedLock { lock_key, @@ -573,7 +583,7 @@ impl LockManager { etcd: self.etcd.clone(), created_at_revision: revision, delete_signal_tx: self.delete_queue_tx.clone(), - revoke_callback_rx: watch_lock_delete.subscribe(), + revoke_callback, }; Ok(managed_lock) @@ -589,7 +599,7 @@ pub struct ManagedLock { pub created_at_revision: Revision, pub(crate) etcd: etcd_client::Client, delete_signal_tx: tokio::sync::mpsc::UnboundedSender, - revoke_callback_rx: broadcast::Receiver, + revoke_callback: Shared>, } impl fmt::Debug for ManagedLock { @@ -675,7 +685,7 @@ impl ManagedLock { /// pub fn get_revoke_notify(&self) -> ManagedLockRevokeNotify { ManagedLockRevokeNotify { - watch_lock_delete: self.revoke_callback_rx.resubscribe(), + watch_lock_delete: self.revoke_callback.clone(), lease_expired_notify: self.managed_lease.get_lease_expire_notify(), } } @@ -744,22 +754,10 @@ impl ManagedLock { F: FnOnce(ManagedLockGuard<'a>) -> Fut, Fut: Future + Send + 'a, { - let mut rx = self.revoke_callback_rx.resubscribe(); - - match rx.try_recv() { - Ok(_) => { - tracing::trace!("Lock revoked"); - return Err(LockError::LockRevoked); - } - Err(broadcast::error::TryRecvError::Closed) => { - tracing::trace!("Lock revoked"); - return Err(LockError::LockRevoked); - } - _ => {} - } + let revoke_callback = self.revoke_callback.clone(); tokio::select! { result = func(ManagedLockGuard { managed_lock: self }) => Ok(result), - _ = rx.recv() => Err(LockError::LockRevoked), + _ = revoke_callback => Err(LockError::LockRevoked), } } diff --git a/src/log.rs b/src/log.rs index 2ab6b15..577a910 100644 --- a/src/log.rs +++ b/src/log.rs @@ -1,10 +1,15 @@ use etcd_client::{Compare, CompareOp, Txn, TxnOp, WatchOptions}; -use serde::{de::DeserializeOwned, Serialize}; +use futures::StreamExt; +use serde::{Serialize, de::DeserializeOwned}; -use crate::{lock::ManagedLockGuard, retry::retry_etcd_txn, sync::watch, watcher::WatchClientExt}; +use crate::{ + lock::ManagedLockGuard, + retry::retry_etcd_txn, + watcher::{EtcdJsonPutWatchStream, WatchClientExt}, +}; pub struct LogWatcher { - rx: watch::Receiver, + stream: EtcdJsonPutWatchStream, } pub struct ExclusiveLogUpdater<'a, T> { @@ -105,33 +110,33 @@ where ) -> Result { let mut get_resp = etcd.get(log_name.as_ref(), None).await?; + // If the key exists, resume from its latest mod revision so callers can + // observe current state and any subsequent updates in order. + // If the key does not exist yet, start from header revision + 1 to avoid + // missing writes between the read and watch establishment. let maybe_watch_opts = get_resp .take_kvs() .into_iter() .map(|kv| kv.mod_revision()) .max() - .map(|max_mod_rev| WatchOptions::new().with_start_revision(max_mod_rev)); - - let mut rx = etcd + .map(|max_mod_rev| WatchOptions::new().with_start_revision(max_mod_rev)) + .or_else(|| { + get_resp + .header() + .map(|h| WatchOptions::new().with_start_revision(h.revision() + 1)) + }); + + let stream = etcd .watch_client() - .json_put_watch_channel::(log_name.as_ref(), maybe_watch_opts); - - let (mut wtx, wrx) = watch::watch::(); - - let _channel_handle = tokio::spawn(async move { - loop { - let (_revision, val) = rx.recv().await.expect("watch channel closed"); - let _ = wtx.update(val).await; - } - }); + .json_put_watch_stream::(log_name.as_ref(), maybe_watch_opts); - Ok(Self { rx: wrx }) + Ok(Self { stream }) } /// /// Observes the log for new entries. /// pub async fn observe(&mut self) -> Option { - self.rx.recv().await + self.stream.next().await.map(|(_revision, val)| val) } } diff --git a/src/retry.rs b/src/retry.rs index d4167e1..e605d0a 100644 --- a/src/retry.rs +++ b/src/retry.rs @@ -15,39 +15,54 @@ use { /// pub fn is_transient(err: &etcd_client::Error) -> bool { match err { - etcd_client::Error::GRpcStatus(status) => match status.code() { - tonic::Code::Ok => false, - tonic::Code::Cancelled => false, - tonic::Code::Unknown => { - match status.source() { - Some(e) => { - match e.downcast_ref::() { - Some(_) => { - // Because if the error is a transport error, it's likely a transient error due to connection reset. - true - } - None => false, + etcd_client::Error::TransportError(_) => true, + etcd_client::Error::GRpcStatus(status) => { + match status.source() { + Some(e) => { + match e.downcast_ref::() { + Some(_) => { + // Because if the error is a transport error, it's likely a transient error due to connection reset. + true } + None => status_code_is_transient(status), } - None => true, } + None => status_code_is_transient(status), } - tonic::Code::InvalidArgument => false, - tonic::Code::DeadlineExceeded => true, - tonic::Code::NotFound => false, - tonic::Code::AlreadyExists => false, - tonic::Code::PermissionDenied => false, - tonic::Code::ResourceExhausted => true, - tonic::Code::FailedPrecondition => false, - tonic::Code::Aborted => false, - tonic::Code::OutOfRange => false, - tonic::Code::Unimplemented => false, - tonic::Code::Internal => true, - tonic::Code::Unavailable => true, - tonic::Code::DataLoss => true, - tonic::Code::Unauthenticated => false, - }, - _ => false, + } + etcd_client::Error::InvalidArgs(_) => false, + etcd_client::Error::InvalidUri(_) => false, + etcd_client::Error::IoError(_) => true, + etcd_client::Error::WatchError(_) => true, + etcd_client::Error::Utf8Error(_) => false, + etcd_client::Error::LeaseKeepAliveError(_) => true, + etcd_client::Error::ElectError(_) => false, + etcd_client::Error::InvalidMetadataValue(_) => false, + etcd_client::Error::EndpointError(_) => false, + etcd_client::Error::EndpointsNotManaged => false, + etcd_client::Error::Internal(_) => true, + } +} + +pub fn status_code_is_transient(status: &tonic::Status) -> bool { + match status.code() { + tonic::Code::Ok => true, + tonic::Code::Cancelled => true, + tonic::Code::Unknown => true, + tonic::Code::InvalidArgument => false, + tonic::Code::DeadlineExceeded => true, + tonic::Code::NotFound => false, + tonic::Code::AlreadyExists => false, + tonic::Code::PermissionDenied => false, + tonic::Code::ResourceExhausted => true, + tonic::Code::FailedPrecondition => false, + tonic::Code::Aborted => false, + tonic::Code::OutOfRange => false, + tonic::Code::Unimplemented => false, + tonic::Code::Internal => true, + tonic::Code::Unavailable => true, + tonic::Code::DataLoss => true, + tonic::Code::Unauthenticated => false, } } diff --git a/src/sync/watch.rs b/src/sync/watch.rs index 2877ad3..90c2ac2 100644 --- a/src/sync/watch.rs +++ b/src/sync/watch.rs @@ -1,4 +1,4 @@ -use std::sync::{atomic::AtomicBool, Arc}; +use std::sync::{Arc, atomic::AtomicBool}; use tokio::sync::{Mutex, Notify}; struct Inner { diff --git a/src/tonic/discovery.rs b/src/tonic/discovery.rs new file mode 100644 index 0000000..12b9bfe --- /dev/null +++ b/src/tonic/discovery.rs @@ -0,0 +1,416 @@ +use std::{ + collections::HashSet, + hash::Hash, + pin::Pin, + sync::Arc, + task::{Context, Poll}, +}; + +use futures::Sink; +use futures::channel::mpsc as futures_mpsc; +use tokio::sync::mpsc; +use tokio_stream::Stream; +use tonic::transport::{Endpoint, channel::Change}; +use tower::{Service, discover::Change as TowerChange}; + +use crate::tonic::transport::ErrorReportingTransport; + +type QuarantinableTransport = + ErrorReportingTransport>; + +pub trait TransportBuilder { + type Transport: Service, Error = tonic::transport::Error>; + + fn build_transport(&self) -> Self::Transport; +} + +impl TransportBuilder for Endpoint { + type Transport = tonic::transport::Channel; + + fn build_transport(&self) -> Self::Transport { + self.connect_lazy() + } +} + +#[derive(Clone)] +pub struct TransportErrorSink { + discover_tx: futures_mpsc::UnboundedSender, + quarantine_event_tx: Option>, +} + +impl TransportErrorSink { + fn unbounded_send(&self, item: K) -> Result<(), futures_mpsc::TrySendError> { + let _ = self + .quarantine_event_tx + .as_ref() + .map(|tx| tx.send(item.clone())); + self.discover_tx.unbounded_send(item) + } +} + +impl Sink for TransportErrorSink { + type Error = std::io::Error; + + fn poll_ready(self: Pin<&mut Self>, _cx: &mut Context<'_>) -> Poll> { + Poll::Ready(Ok(())) + } + + fn start_send(self: Pin<&mut Self>, item: K) -> Result<(), Self::Error> { + self.get_mut() + .unbounded_send(item) + .map_err(|_| std::io::Error::other("failed to publish transport error")) + } + + fn poll_flush(self: Pin<&mut Self>, _cx: &mut Context<'_>) -> Poll> { + Poll::Ready(Ok(())) + } + + fn poll_close(self: Pin<&mut Self>, _cx: &mut Context<'_>) -> Poll> { + Poll::Ready(Ok(())) + } +} + +/// A `Discover` implementation similar to tonic's internal dynamic service stream, +/// but with error-reporting-driven endpoint quarantine. +/// +/// Endpoints are inserted from the `changes` channel. If a wrapped channel reports a +/// transport error through the error-reporting sink, the key is moved to quarantine and a +/// `Remove` change is emitted for the balancer. +pub struct QuarantiningDiscover { + changes: mpsc::Receiver>, + transport_error_rx: futures_mpsc::UnboundedReceiver, + transport_error_tx: TransportErrorSink, + shared_call_counter: Option>>, + active: HashSet, + quarantined: HashSet, +} + +impl QuarantiningDiscover { + pub fn new(changes: mpsc::Receiver>) -> Self { + Self::new_with_quarantine_events_and_call_counter(changes, None, None) + } + + pub fn new_with_quarantine_events( + changes: mpsc::Receiver>, + quarantine_event_tx: Option>, + ) -> Self { + Self::new_with_quarantine_events_and_call_counter(changes, quarantine_event_tx, None) + } + + pub fn new_with_quarantine_events_and_call_counter( + changes: mpsc::Receiver>, + quarantine_event_tx: Option>, + shared_call_counter: Option>>, + ) -> Self { + let (discover_tx, transport_error_rx) = futures_mpsc::unbounded(); + let transport_error_tx = TransportErrorSink { + discover_tx, + quarantine_event_tx, + }; + Self { + changes, + transport_error_rx, + transport_error_tx, + shared_call_counter, + active: HashSet::new(), + quarantined: HashSet::new(), + } + } + + pub fn quarantined_keys(&self) -> &HashSet { + &self.quarantined + } +} + +impl QuarantiningDiscover +where + K: Hash + Eq + Clone + Send + 'static, + TB: TransportBuilder, +{ + fn poll_transport_errors( + &mut self, + cx: &mut Context<'_>, + ) -> Poll< + Option>, tower::BoxError>>, + > { + loop { + match Pin::new(&mut self.transport_error_rx).poll_next(cx) { + Poll::Pending => return Poll::Pending, + Poll::Ready(None) => return Poll::Ready(None), + Poll::Ready(Some(key)) => { + if !self.quarantined.insert(key.clone()) { + continue; + } + if self.active.remove(&key) { + tracing::trace!("removing endpoint from discover due to transport error"); + return Poll::Ready(Some(Ok(TowerChange::Remove(key)))); + } + } + } + } + } + + fn poll_changes( + &mut self, + cx: &mut Context<'_>, + ) -> Poll< + Option>, tower::BoxError>>, + > { + loop { + match Pin::new(&mut self.changes).poll_recv(cx) { + Poll::Pending => return Poll::Pending, + Poll::Ready(None) => return Poll::Ready(None), + Poll::Ready(Some(change)) => match change { + Change::Insert(key, endpoint) => { + // Explicit insert acts as a retry signal and clears quarantine state. + self.quarantined.remove(&key); + self.active.insert(key.clone()); + let connection: QuarantinableTransport = + crate::tonic::error_reporting::ErrorReportingTransport::new_with_call_counter( + endpoint.build_transport(), + key.clone(), + self.transport_error_tx.clone(), + self.shared_call_counter.clone(), + ); + tracing::trace!("inserting endpoint into discover"); + return Poll::Ready(Some(Ok(TowerChange::Insert(key, connection)))); + } + Change::Remove(key) => { + tracing::trace!("removing endpoint from discover"); + self.quarantined.remove(&key); + if self.active.remove(&key) { + return Poll::Ready(Some(Ok(TowerChange::Remove(key)))); + } + } + }, + } + } + } +} + +impl Stream for QuarantiningDiscover +where + K: Hash + Eq + Clone + Send + 'static, + TB: TransportBuilder, +{ + type Item = Result>, tower::BoxError>; + + fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { + let this = self.as_mut().get_mut(); + match this.poll_transport_errors(cx) { + Poll::Ready(Some(change)) => return Poll::Ready(Some(change)), + Poll::Ready(None) | Poll::Pending => {} + } + + this.poll_changes(cx) + } +} + +impl Unpin + for QuarantiningDiscover +{ +} + +#[cfg(test)] +mod tests { + use super::*; + use futures::SinkExt; + use futures::task::noop_waker_ref; + use tower::discover::Discover; + + #[tokio::test] + async fn quarantines_key_when_error_reporting_transport_reports_transport_error() { + let (tx, rx) = mpsc::channel(4); + let mut discover = QuarantiningDiscover::new(rx); + let key = "ep-1".to_string(); + + let endpoint = Endpoint::from_static("http://127.0.0.1:2379"); + tx.send(Change::Insert(key.clone(), endpoint)) + .await + .expect("insert change should be sent"); + + let waker = noop_waker_ref(); + let mut cx = Context::from_waker(waker); + + let insert = Pin::new(&mut discover).poll_discover(&mut cx); + assert!(matches!( + insert, + Poll::Ready(Some(Ok(TowerChange::Insert(_, _)))) + )); + + discover + .transport_error_tx + .unbounded_send(key.clone()) + .expect("transport error key should be sent"); + + let remove = Pin::new(&mut discover).poll_discover(&mut cx); + assert!(matches!(remove, Poll::Ready(Some(Ok(TowerChange::Remove(k)))) if k == key)); + assert!(discover.quarantined_keys().contains(&key)); + } + + #[tokio::test] + async fn duplicate_transport_errors_do_not_emit_duplicate_remove() { + let (tx, rx) = mpsc::channel(4); + let mut discover = QuarantiningDiscover::new(rx); + let key = "ep-dup".to_string(); + + let endpoint = Endpoint::from_static("http://127.0.0.1:2379"); + tx.send(Change::Insert(key.clone(), endpoint)) + .await + .expect("insert change should be sent"); + + let waker = noop_waker_ref(); + let mut cx = Context::from_waker(waker); + + let insert = Pin::new(&mut discover).poll_discover(&mut cx); + assert!(matches!( + insert, + Poll::Ready(Some(Ok(TowerChange::Insert(_, _)))) + )); + + discover + .transport_error_tx + .unbounded_send(key.clone()) + .expect("first transport error should be sent"); + let first_remove = Pin::new(&mut discover).poll_discover(&mut cx); + assert!(matches!(first_remove, Poll::Ready(Some(Ok(TowerChange::Remove(k)))) if k == key)); + + discover + .transport_error_tx + .unbounded_send(key) + .expect("second transport error should be sent"); + let second_poll = Pin::new(&mut discover).poll_discover(&mut cx); + assert!(matches!(second_poll, Poll::Pending)); + } + + #[tokio::test] + async fn transport_error_for_inactive_key_marks_quarantine_without_remove() { + let (_tx, rx) = mpsc::channel::>(4); + let mut discover = QuarantiningDiscover::new(rx); + let key = "ep-inactive".to_string(); + + discover + .transport_error_tx + .unbounded_send(key.clone()) + .expect("transport error key should be sent"); + + let waker = noop_waker_ref(); + let mut cx = Context::from_waker(waker); + let poll = Pin::new(&mut discover).poll_discover(&mut cx); + + assert!(matches!(poll, Poll::Pending)); + assert!(discover.quarantined_keys().contains(&key)); + } + + #[tokio::test] + async fn remove_for_inactive_key_clears_quarantine_without_emitting_remove() { + let (tx, rx) = mpsc::channel(4); + let mut discover = QuarantiningDiscover::new(rx); + let key = "ep-remove-inactive".to_string(); + + let endpoint = Endpoint::from_static("http://127.0.0.1:2379"); + tx.send(Change::Insert(key.clone(), endpoint)) + .await + .expect("insert change should be sent"); + + let waker = noop_waker_ref(); + let mut cx = Context::from_waker(waker); + + let insert = Pin::new(&mut discover).poll_discover(&mut cx); + assert!(matches!( + insert, + Poll::Ready(Some(Ok(TowerChange::Insert(_, _)))) + )); + + discover + .transport_error_tx + .unbounded_send(key.clone()) + .expect("transport error key should be sent"); + let remove_after_error = Pin::new(&mut discover).poll_discover(&mut cx); + assert!( + matches!(remove_after_error, Poll::Ready(Some(Ok(TowerChange::Remove(k)))) if k == key) + ); + assert!(discover.quarantined_keys().contains(&key)); + + tx.send(Change::Remove(key.clone())) + .await + .expect("remove change should be sent"); + let remove_inactive = Pin::new(&mut discover).poll_discover(&mut cx); + assert!(matches!(remove_inactive, Poll::Pending)); + assert!(!discover.quarantined_keys().contains(&key)); + } + + #[tokio::test] + async fn explicit_reinsert_clears_quarantine_and_emits_insert() { + let (tx, rx) = mpsc::channel(4); + let mut discover = QuarantiningDiscover::new(rx); + let key = "ep-reinsert".to_string(); + + let endpoint = Endpoint::from_static("http://127.0.0.1:2379"); + tx.send(Change::Insert(key.clone(), endpoint.clone())) + .await + .expect("insert change should be sent"); + + let waker = noop_waker_ref(); + let mut cx = Context::from_waker(waker); + + let first_insert = Pin::new(&mut discover).poll_discover(&mut cx); + assert!(matches!( + first_insert, + Poll::Ready(Some(Ok(TowerChange::Insert(_, _)))) + )); + + discover + .transport_error_tx + .unbounded_send(key.clone()) + .expect("transport error key should be sent"); + let remove_after_error = Pin::new(&mut discover).poll_discover(&mut cx); + assert!( + matches!(remove_after_error, Poll::Ready(Some(Ok(TowerChange::Remove(k)))) if k == key) + ); + assert!(discover.quarantined_keys().contains(&key)); + + tx.send(Change::Insert(key.clone(), endpoint)) + .await + .expect("retry insert change should be sent"); + let retry_insert = Pin::new(&mut discover).poll_discover(&mut cx); + assert!( + matches!(retry_insert, Poll::Ready(Some(Ok(TowerChange::Insert(k, _)))) if k == key) + ); + assert!(!discover.quarantined_keys().contains(&key)); + } + + #[tokio::test] + async fn transport_error_sink_fanout_sends_quarantine_event() { + let (discover_tx, _discover_rx) = futures_mpsc::unbounded::(); + let (quarantine_tx, mut quarantine_rx) = mpsc::unbounded_channel::(); + let mut sink = TransportErrorSink { + discover_tx, + quarantine_event_tx: Some(quarantine_tx), + }; + + sink.send("ep-fanout".to_string()) + .await + .expect("sink send should succeed"); + + let forwarded = quarantine_rx + .recv() + .await + .expect("quarantine event should be forwarded"); + assert_eq!(forwarded, "ep-fanout"); + } + + #[tokio::test] + async fn transport_error_sink_reports_error_when_discover_channel_closed() { + let (discover_tx, discover_rx) = futures_mpsc::unbounded::(); + drop(discover_rx); + + let mut sink = TransportErrorSink { + discover_tx, + quarantine_event_tx: None, + }; + + let result = sink.send("ep-closed".to_string()).await; + assert!(result.is_err()); + } +} diff --git a/src/tonic/mod.rs b/src/tonic/mod.rs new file mode 100644 index 0000000..4432f51 --- /dev/null +++ b/src/tonic/mod.rs @@ -0,0 +1,3 @@ +pub mod discovery; +pub mod transport; +pub use transport as error_reporting; diff --git a/src/tonic/transport.rs b/src/tonic/transport.rs new file mode 100644 index 0000000..d3a656f --- /dev/null +++ b/src/tonic/transport.rs @@ -0,0 +1,689 @@ +use std::{ + future::Future, + hash::Hash, + pin::Pin, + sync::Arc, + task::{Context, Poll, ready}, +}; + +use futures::channel::mpsc as futures_mpsc; +use futures::{Sink, SinkExt}; +use pin_project::pin_project; +use tower::{Service, load::Load}; + +/// Channel type wrapped with error-reporting transport behavior. +pub type ErrorReportingChannel> = + ErrorReportingTransport; + +/// Wraps a tonic transport service and reports transport errors specifically. +/// +/// This type does not report successful calls. It only forwards endpoint keys +/// to the configured sink when transport-layer operations fail: +/// - `poll_ready` returns a transport error +/// - `call` future resolves to a transport error +pub struct ErrorReportingTransport { + inner: Transport, + key: Key, + error_sink: ErrorSink, + shared_stat_map: Option>>, + ready_notify_state: ReadyNotifyState, + ready_transport_error: Option, +} + +enum ReadyNotifyState { + Idle, + NeedSend(K), + NeedFlush, +} + +impl ErrorReportingTransport { + pub fn new(inner: T, key: K, error_sink: ES) -> Self { + Self { + inner, + key, + error_sink, + shared_stat_map: None, + ready_notify_state: ReadyNotifyState::Idle, + ready_transport_error: None, + } + } + + pub fn new_with_call_counter( + inner: T, + key: K, + error_sink: ES, + shared_stat_map: Option>>, + ) -> Self { + Self { + inner, + key, + error_sink, + shared_stat_map, + ready_notify_state: ReadyNotifyState::Idle, + ready_transport_error: None, + } + } +} + +#[derive(Debug, thiserror::Error)] +pub enum ErrorReportingTransportError { + #[error("Service error: {0}")] + Transport(#[from] tonic::transport::Error), + #[error("error sink error: {0}")] + ErrorSink(Box), +} + +enum SinkNotifyState { + Idle, + NeedSend(K), + NeedFlush, + Done, +} + +#[pin_project] +pub struct ErrorReportingTransportFuture { + #[pin] + inner: Fut, + #[pin] + error_sink: ErrorSink, + key: Key, + transport_error: Option, + sink_notify_state: SinkNotifyState, + _ok: std::marker::PhantomData, +} + +impl Future for ErrorReportingTransportFuture +where + Fut: Future>, + K: Clone + Send, + ES: Sink + Unpin, + >::Error: std::error::Error + Send + Sync + 'static, +{ + type Output = Result; + + fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { + let mut this = self.project(); + loop { + match this.sink_notify_state { + SinkNotifyState::Idle => match this.inner.as_mut().poll(cx) { + Poll::Ready(Ok(ok)) => return Poll::Ready(Ok(ok)), + Poll::Ready(Err(err)) => { + *this.transport_error = Some(err); + *this.sink_notify_state = SinkNotifyState::NeedSend(this.key.clone()); + } + Poll::Pending => return Poll::Pending, + }, + SinkNotifyState::NeedSend(key) => { + ready!( + this.error_sink + .poll_ready_unpin(cx) + .map_err(|e| ErrorReportingTransportError::ErrorSink(Box::new(e))) + )?; + this.error_sink + .start_send_unpin(key.clone()) + .map_err(|e| ErrorReportingTransportError::ErrorSink(Box::new(e)))?; + *this.sink_notify_state = SinkNotifyState::NeedFlush; + } + SinkNotifyState::NeedFlush => { + ready!( + this.error_sink + .poll_flush_unpin(cx) + .map_err(|e| ErrorReportingTransportError::ErrorSink(Box::new(e))) + )?; + *this.sink_notify_state = SinkNotifyState::Done; + let err = this + .transport_error + .take() + .expect("missing transport error while flushing sink"); + return Poll::Ready(Err(ErrorReportingTransportError::Transport(err))); + } + SinkNotifyState::Done => { + panic!("ErrorReportingTransportFuture polled after completion") + } + } + } + } +} + +impl Service> for ErrorReportingTransport +where + T: Service, Error = tonic::transport::Error>, + K: Clone + Eq + Hash + Send + 'static, + ES: Sink + Clone + Unpin + Send + 'static, + >::Error: std::error::Error + Send + Sync + 'static, +{ + type Response = T::Response; + type Error = ErrorReportingTransportError; + type Future = ErrorReportingTransportFuture; + + fn poll_ready( + &mut self, + cx: &mut std::task::Context<'_>, + ) -> std::task::Poll> { + loop { + match &self.ready_notify_state { + ReadyNotifyState::Idle => { + ready!(self.error_sink.poll_ready_unpin(cx)) + .map_err(|e| ErrorReportingTransportError::ErrorSink(Box::new(e)))?; + + match self.inner.poll_ready(cx) { + Poll::Ready(Ok(())) => return Poll::Ready(Ok(())), + Poll::Ready(Err(err)) => { + self.ready_transport_error = Some(err); + self.ready_notify_state = ReadyNotifyState::NeedSend(self.key.clone()); + } + Poll::Pending => return Poll::Pending, + } + } + ReadyNotifyState::NeedSend(key) => { + ready!(self.error_sink.poll_ready_unpin(cx)) + .map_err(|e| ErrorReportingTransportError::ErrorSink(Box::new(e)))?; + self.error_sink + .start_send_unpin(key.clone()) + .map_err(|e| ErrorReportingTransportError::ErrorSink(Box::new(e)))?; + self.ready_notify_state = ReadyNotifyState::NeedFlush; + } + ReadyNotifyState::NeedFlush => { + ready!(self.error_sink.poll_flush_unpin(cx)) + .map_err(|e| ErrorReportingTransportError::ErrorSink(Box::new(e)))?; + self.ready_notify_state = ReadyNotifyState::Idle; + let err = self + .ready_transport_error + .take() + .expect("missing transport error while flushing poll_ready notification"); + return Poll::Ready(Err(ErrorReportingTransportError::Transport(err))); + } + } + } + } + + fn call(&mut self, req: http::Request) -> Self::Future { + if let Some(map) = &self.shared_stat_map { + map.entry(self.key.clone()) + .and_modify(|count| *count += 1) + .or_insert(1); + } + + let error_sink = self.error_sink.clone(); + let key = self.key.clone(); + ErrorReportingTransportFuture { + inner: self.inner.call(req), + error_sink, + key, + transport_error: None, + sink_notify_state: SinkNotifyState::Idle, + _ok: std::marker::PhantomData, + } + } +} + +impl Load for ErrorReportingTransport { + type Metric = usize; + + fn load(&self) -> Self::Metric { + 0 + } +} + +#[cfg(test)] +mod tests { + use super::*; + use dashmap::DashMap; + use futures::task::noop_waker_ref; + use std::{ + future::Future, + sync::atomic::{AtomicUsize, Ordering}, + sync::{Arc, Mutex}, + time::Duration, + }; + + #[derive(Debug, Default)] + struct SinkState { + sent: Vec, + flush_count: usize, + ready_count: usize, + } + + #[derive(Debug, Clone)] + struct TestSink { + state: Arc>>, + pending_ready_once: bool, + pending_flush_once: bool, + } + + impl TestSink { + fn new(state: Arc>>) -> Self { + Self { + state, + pending_ready_once: false, + pending_flush_once: false, + } + } + + fn with_pending_once(state: Arc>>) -> Self { + Self { + state, + pending_ready_once: true, + pending_flush_once: true, + } + } + } + + impl Sink for TestSink { + type Error = std::io::Error; + + fn poll_ready( + mut self: Pin<&mut Self>, + _cx: &mut Context<'_>, + ) -> Poll> { + self.state.lock().expect("sink state lock").ready_count += 1; + if self.pending_ready_once { + self.pending_ready_once = false; + return Poll::Pending; + } + Poll::Ready(Ok(())) + } + + fn start_send(self: Pin<&mut Self>, item: K) -> Result<(), Self::Error> { + self.state.lock().expect("sink state lock").sent.push(item); + Ok(()) + } + + fn poll_flush( + mut self: Pin<&mut Self>, + _cx: &mut Context<'_>, + ) -> Poll> { + if self.pending_flush_once { + self.pending_flush_once = false; + return Poll::Pending; + } + self.state.lock().expect("sink state lock").flush_count += 1; + Poll::Ready(Ok(())) + } + + fn poll_close( + self: Pin<&mut Self>, + _cx: &mut Context<'_>, + ) -> Poll> { + Poll::Ready(Ok(())) + } + } + + struct ErrorOnceFuture { + err: Option, + polled_once: bool, + } + + struct PollReadyErrorService { + poll_ready_calls: Arc, + err: Option, + } + + struct PollReadyOkCountingService { + poll_ready_calls: Arc, + } + + impl Service> for PollReadyErrorService { + type Response = http::Response; + type Error = tonic::transport::Error; + type Future = std::future::Ready>; + + fn poll_ready(&mut self, _cx: &mut Context<'_>) -> Poll> { + self.poll_ready_calls.fetch_add(1, Ordering::SeqCst); + Poll::Ready(Err(self + .err + .take() + .expect("poll_ready called more than once on error service"))) + } + + fn call(&mut self, _req: http::Request) -> Self::Future { + std::future::ready(Ok(http::Response::new(tonic::body::Body::empty()))) + } + } + + impl Service> for PollReadyOkCountingService { + type Response = http::Response; + type Error = tonic::transport::Error; + type Future = std::future::Ready>; + + fn poll_ready(&mut self, _cx: &mut Context<'_>) -> Poll> { + self.poll_ready_calls.fetch_add(1, Ordering::SeqCst); + Poll::Ready(Ok(())) + } + + fn call(&mut self, _req: http::Request) -> Self::Future { + std::future::ready(Ok(http::Response::new(tonic::body::Body::empty()))) + } + } + + #[derive(Clone)] + struct OkService; + + impl Service> for OkService { + type Response = http::Response; + type Error = tonic::transport::Error; + type Future = std::future::Ready>; + + fn poll_ready(&mut self, _cx: &mut Context<'_>) -> Poll> { + Poll::Ready(Ok(())) + } + + fn call(&mut self, _req: http::Request) -> Self::Future { + std::future::ready(Ok(http::Response::new(tonic::body::Body::empty()))) + } + } + + impl Future for ErrorOnceFuture { + type Output = Result<(), tonic::transport::Error>; + + fn poll(mut self: Pin<&mut Self>, _cx: &mut Context<'_>) -> Poll { + if self.polled_once { + panic!("inner future was polled after completion"); + } + self.polled_once = true; + Poll::Ready(Err(self.err.take().expect("missing transport error"))) + } + } + + #[derive(Clone)] + struct AlwaysErrorSink; + + impl Sink for AlwaysErrorSink { + type Error = std::io::Error; + + fn poll_ready( + self: Pin<&mut Self>, + _cx: &mut Context<'_>, + ) -> Poll> { + Poll::Ready(Err(std::io::Error::other("poll_ready failed"))) + } + + fn start_send(self: Pin<&mut Self>, _item: String) -> Result<(), Self::Error> { + Ok(()) + } + + fn poll_flush( + self: Pin<&mut Self>, + _cx: &mut Context<'_>, + ) -> Poll> { + Poll::Ready(Ok(())) + } + + fn poll_close( + self: Pin<&mut Self>, + _cx: &mut Context<'_>, + ) -> Poll> { + Poll::Ready(Ok(())) + } + } + + async fn make_transport_error() -> tonic::transport::Error { + tonic::transport::Endpoint::from_static("http://192.0.2.1:12345") + .connect_timeout(Duration::from_millis(50)) + .connect() + .await + .expect_err("expected connect failure") + } + + #[tokio::test] + async fn error_reporting_transport_future_success_path_does_not_notify_sink() { + let state = Arc::new(Mutex::new(SinkState::::default())); + let sink = TestSink::new(Arc::clone(&state)); + + let fut = ErrorReportingTransportFuture { + inner: std::future::ready(Ok::<_, tonic::transport::Error>(123)), + error_sink: sink, + key: "endpoint-a".to_string(), + transport_error: None, + sink_notify_state: SinkNotifyState::Idle, + _ok: std::marker::PhantomData, + }; + + let out = fut.await.expect("expected success"); + assert_eq!(out, 123); + + let s = state.lock().expect("sink state lock"); + assert!(s.sent.is_empty()); + assert_eq!(s.flush_count, 0); + } + + #[tokio::test] + async fn error_reporting_transport_future_error_path_sends_and_flushes_once_without_repolling_inner() + { + let state = Arc::new(Mutex::new(SinkState::::default())); + let sink = TestSink::with_pending_once(Arc::clone(&state)); + let err = make_transport_error().await; + + let mut fut = ErrorReportingTransportFuture { + inner: ErrorOnceFuture { + err: Some(err), + polled_once: false, + }, + error_sink: sink, + key: "endpoint-b".to_string(), + transport_error: None, + sink_notify_state: SinkNotifyState::Idle, + _ok: std::marker::PhantomData, + }; + + let waker = noop_waker_ref(); + let mut cx = Context::from_waker(waker); + + assert!(matches!(Pin::new(&mut fut).poll(&mut cx), Poll::Pending)); + assert!(matches!(Pin::new(&mut fut).poll(&mut cx), Poll::Pending)); + let final_poll = Pin::new(&mut fut).poll(&mut cx); + assert!(matches!( + final_poll, + Poll::Ready(Err(ErrorReportingTransportError::Transport(_))) + )); + + let s = state.lock().expect("sink state lock"); + assert_eq!(s.sent, vec!["endpoint-b".to_string()]); + assert_eq!(s.flush_count, 1); + } + + #[tokio::test] + async fn error_reporting_transport_future_drop_before_notify_send_is_safe() { + let state = Arc::new(Mutex::new(SinkState::::default())); + let sink = TestSink { + state: Arc::clone(&state), + pending_ready_once: true, + pending_flush_once: false, + }; + let err = make_transport_error().await; + + let mut fut = ErrorReportingTransportFuture { + inner: ErrorOnceFuture { + err: Some(err), + polled_once: false, + }, + error_sink: sink, + key: "endpoint-cancel-1".to_string(), + transport_error: None, + sink_notify_state: SinkNotifyState::Idle, + _ok: std::marker::PhantomData, + }; + + let waker = noop_waker_ref(); + let mut cx = Context::from_waker(waker); + assert!(matches!(Pin::new(&mut fut).poll(&mut cx), Poll::Pending)); + + drop(fut); + + let s = state.lock().expect("sink state lock"); + assert!(s.sent.is_empty()); + assert_eq!(s.flush_count, 0); + } + + #[tokio::test] + async fn error_reporting_transport_future_drop_after_send_before_flush_is_safe() { + let state = Arc::new(Mutex::new(SinkState::::default())); + let sink = TestSink { + state: Arc::clone(&state), + pending_ready_once: false, + pending_flush_once: true, + }; + let err = make_transport_error().await; + + let mut fut = ErrorReportingTransportFuture { + inner: ErrorOnceFuture { + err: Some(err), + polled_once: false, + }, + error_sink: sink, + key: "endpoint-cancel-2".to_string(), + transport_error: None, + sink_notify_state: SinkNotifyState::Idle, + _ok: std::marker::PhantomData, + }; + + let waker = noop_waker_ref(); + let mut cx = Context::from_waker(waker); + assert!(matches!(Pin::new(&mut fut).poll(&mut cx), Poll::Pending)); + + drop(fut); + + let s = state.lock().expect("sink state lock"); + assert_eq!(s.sent, vec!["endpoint-cancel-2".to_string()]); + assert_eq!(s.flush_count, 0); + } + + #[tokio::test] + async fn error_reporting_transport_increments_optional_shared_call_counter() { + let map = Arc::new(DashMap::::new()); + let (error_sink, _error_rx) = futures_mpsc::unbounded::(); + + let mut spy = ErrorReportingTransport::new_with_call_counter( + OkService, + "endpoint-counter".to_string(), + error_sink, + Some(Arc::clone(&map)), + ); + + let req = http::Request::new(tonic::body::Body::empty()); + let _ = spy.call(req).await.expect("first request should succeed"); + let req = http::Request::new(tonic::body::Body::empty()); + let _ = spy.call(req).await.expect("second request should succeed"); + + let count = *map + .get("endpoint-counter") + .expect("missing endpoint call counter"); + assert_eq!(count, 2); + } + + #[tokio::test] + async fn error_reporting_poll_ready_waits_for_sink_before_polling_inner() { + let state = Arc::new(Mutex::new(SinkState::::default())); + let sink = TestSink { + state: Arc::clone(&state), + pending_ready_once: true, + pending_flush_once: false, + }; + let poll_ready_calls = Arc::new(AtomicUsize::new(0)); + + let mut spy = ErrorReportingTransport::new( + PollReadyOkCountingService { + poll_ready_calls: Arc::clone(&poll_ready_calls), + }, + "endpoint-ready-gate".to_string(), + sink, + ); + + let waker = noop_waker_ref(); + let mut cx = Context::from_waker(waker); + + assert!(matches!(spy.poll_ready(&mut cx), Poll::Pending)); + assert_eq!(poll_ready_calls.load(Ordering::SeqCst), 0); + + assert!(matches!(spy.poll_ready(&mut cx), Poll::Ready(Ok(())))); + assert_eq!(poll_ready_calls.load(Ordering::SeqCst), 1); + } + + #[tokio::test] + async fn error_reporting_poll_ready_maps_sink_poll_ready_error() { + let poll_ready_calls = Arc::new(AtomicUsize::new(0)); + let mut spy = ErrorReportingTransport::new( + PollReadyOkCountingService { + poll_ready_calls: Arc::clone(&poll_ready_calls), + }, + "endpoint-ready-sink-error".to_string(), + AlwaysErrorSink, + ); + + let waker = noop_waker_ref(); + let mut cx = Context::from_waker(waker); + let res = spy.poll_ready(&mut cx); + + assert!(matches!( + res, + Poll::Ready(Err(ErrorReportingTransportError::ErrorSink(_))) + )); + assert_eq!(poll_ready_calls.load(Ordering::SeqCst), 0); + } + + #[tokio::test] + #[should_panic(expected = "ErrorReportingTransportFuture polled after completion")] + async fn error_reporting_transport_future_panics_if_polled_after_completion() { + let state = Arc::new(Mutex::new(SinkState::::default())); + let sink = TestSink::new(Arc::clone(&state)); + let err = make_transport_error().await; + + let mut fut = ErrorReportingTransportFuture { + inner: ErrorOnceFuture { + err: Some(err), + polled_once: false, + }, + error_sink: sink, + key: "endpoint-done-repoll".to_string(), + transport_error: None, + sink_notify_state: SinkNotifyState::Idle, + _ok: std::marker::PhantomData, + }; + + let waker = noop_waker_ref(); + let mut cx = Context::from_waker(waker); + + let first = Pin::new(&mut fut).poll(&mut cx); + assert!(matches!( + first, + Poll::Ready(Err(ErrorReportingTransportError::Transport(_))) + )); + + let _ = Pin::new(&mut fut).poll(&mut cx); + } + + #[tokio::test] + async fn error_reporting_poll_ready_does_not_repoll_inner_while_sink_flush_pending() { + let state = Arc::new(Mutex::new(SinkState::::default())); + let sink = TestSink { + state: Arc::clone(&state), + pending_ready_once: false, + pending_flush_once: true, + }; + let err = make_transport_error().await; + let poll_ready_calls = Arc::new(AtomicUsize::new(0)); + + let mut spy = ErrorReportingTransport::new( + PollReadyErrorService { + poll_ready_calls: Arc::clone(&poll_ready_calls), + err: Some(err), + }, + "endpoint-ready-error".to_string(), + sink, + ); + + let waker = noop_waker_ref(); + let mut cx = Context::from_waker(waker); + + assert!(matches!(spy.poll_ready(&mut cx), Poll::Pending)); + assert!(matches!( + spy.poll_ready(&mut cx), + Poll::Ready(Err(ErrorReportingTransportError::Transport(_))) + )); + + assert_eq!(poll_ready_calls.load(Ordering::SeqCst), 1); + let s = state.lock().expect("sink state lock"); + assert_eq!(s.sent, vec!["endpoint-ready-error".to_string()]); + assert_eq!(s.flush_count, 1); + } +} diff --git a/src/watcher.rs b/src/watcher.rs index 638d875..9eff787 100644 --- a/src/watcher.rs +++ b/src/watcher.rs @@ -1,12 +1,21 @@ use { - super::{retry::retry_etcd_legacy, Revision}, + super::{Revision, retry::retry_etcd_legacy}, crate::retry::is_transient, - etcd_client::{EventType, WatchClient, WatchFilterType, WatchOptions}, + etcd_client::{ + Error, EventType, WatchClient, WatchFilterType, WatchOptions, WatchResponse, + WatchStream as EtcdClientWatchStream, + }, + futures::{Future, Stream}, + pin_project::pin_project, retry::delay::Exponential, serde::de::DeserializeOwned, - tokio::sync::{broadcast, mpsc}, - tokio_stream::StreamExt, - tracing::{error, info, warn}, + std::{ + collections::VecDeque, + marker::PhantomData, + pin::{Pin, pin}, + task::{Context, Poll, ready}, + }, + tracing::{error, info}, }; /// @@ -27,6 +36,404 @@ pub enum WatchEvent { }, } +enum ReconnectState { + Disconnected, + Connecting(Pin> + Send>>), + Streaming { stream: S }, + Terminated, +} + +pub trait EtcdConnector { + type WatchStream: Stream> + Unpin + Send + 'static; + type ConnectFut: Future> + Send + 'static; + + fn connect_watch(&mut self, last_revision: Option) -> Self::ConnectFut; +} + +pub struct GrpcEtcdConenctor { + watch_client: WatchClient, + key: Vec, + watch_options_prototype: WatchOptions, +} + +impl GrpcEtcdConenctor { + pub fn new( + watch_client: WatchClient, + key: Vec, + watch_options_prototype: WatchOptions, + ) -> Self { + Self { + watch_client, + key, + watch_options_prototype, + } + } +} + +impl EtcdConnector for GrpcEtcdConenctor { + type WatchStream = EtcdClientWatchStream; + type ConnectFut = Pin> + Send>>; + + fn connect_watch(&mut self, last_revision: Option) -> Self::ConnectFut { + let wc = self.watch_client.clone(); + let key = self.key.clone(); + let mut wopts = self.watch_options_prototype.clone(); + if let Some(rev) = last_revision { + wopts = wopts.with_start_revision(rev); + } + + Box::pin(async move { + let retry_strategy = Exponential::from_millis_with_factor(10, 10.0).take(3); + retry_etcd_legacy(retry_strategy, move || { + let mut wc = wc.clone(); + let key = key.clone(); + let wopts = wopts.clone(); + async move { wc.watch(key.clone(), Some(wopts)).await } + }) + .await + }) + } +} + +pub struct AutoReconnectWatchStream +where + C: EtcdConnector, +{ + connector: C, + state: ReconnectState, + last_revision: Option, +} + +impl AutoReconnectWatchStream +where + C: EtcdConnector, +{ + pub fn new(connector: C) -> Self { + Self { + connector, + state: ReconnectState::Disconnected, + last_revision: None, + } + } +} + +impl Stream for AutoReconnectWatchStream +where + C: EtcdConnector + Unpin, +{ + type Item = WatchResponse; + + fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { + let this = self.as_mut().get_mut(); + + loop { + match &mut this.state { + ReconnectState::Disconnected => { + let fut = this.connector.connect_watch(this.last_revision); + this.state = ReconnectState::Connecting(Box::pin(fut)); + } + ReconnectState::Connecting(fut) => { + let stream = match ready!(fut.as_mut().poll(cx)) { + Ok(v) => v, + Err(e) => { + error!("watch reconnect failed: {e}"); + this.state = ReconnectState::Terminated; + continue; + } + }; + this.state = ReconnectState::Streaming { stream }; + } + ReconnectState::Streaming { stream } => match Pin::new(stream).poll_next(cx) { + Poll::Ready(Some(Ok(watch_resp))) => { + if watch_resp.canceled() { + error!("watch cancelled: {watch_resp:?}"); + this.state = ReconnectState::Terminated; + continue; + } + + if let Some(revision) = watch_resp + .events() + .iter() + .filter_map(|ev| ev.kv()) + .max_by_key(|kv| kv.mod_revision()) + .map(|kv| kv.mod_revision()) + { + this.last_revision.replace(revision); + } + + return Poll::Ready(Some(watch_resp)); + } + Poll::Ready(Some(Err(e))) => { + if is_transient(&e) { + this.state = ReconnectState::Disconnected; + continue; + } + error!("watch stream failed with non-transient error: {e}"); + this.state = ReconnectState::Terminated; + } + Poll::Ready(None) => { + this.state = ReconnectState::Disconnected; + } + Poll::Pending => return Poll::Pending, + }, + ReconnectState::Terminated => return Poll::Ready(None), + } + } + } +} + +pub trait WatchStreamValueDecoder { + type Item; + type Error: std::error::Error + Send + 'static; + + fn decode_watch_response( + &mut self, + key: &[u8], + value: &[u8], + ) -> Result; +} + +pub struct JsonDecoder { + _phantom: PhantomData, +} + +impl Default for JsonDecoder { + fn default() -> Self { + Self { + _phantom: PhantomData, + } + } +} + +impl WatchStreamValueDecoder for JsonDecoder +where + V: DeserializeOwned, +{ + type Item = V; + type Error = serde_json::Error; + + fn decode_watch_response( + &mut self, + _key: &[u8], + value: &[u8], + ) -> Result { + serde_json::from_slice(value) + } +} + +#[pin_project] +pub struct ValueWatchStream { + #[pin] + inner: Source, + pending: VecDeque>, + decoder: Decoder, +} + +impl ValueWatchStream { + pub fn new(inner: S, decoder: D) -> Self { + Self { + inner, + pending: VecDeque::new(), + decoder, + } + } +} + +impl Stream for ValueWatchStream +where + D: WatchStreamValueDecoder, + S: Stream + Unpin, + V: DeserializeOwned, +{ + type Item = WatchEvent; + + fn poll_next(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { + let mut this = self.project(); + loop { + if let Some(next) = this.pending.pop_front() { + return Poll::Ready(Some(next)); + } + let watch_resp = match ready!(this.inner.as_mut().poll_next(cx)) { + Some(v) => v, + None => return Poll::Ready(None), + }; + + for event in watch_resp.events() { + let parsed = match event.event_type() { + EventType::Put => { + let kv = event.kv().expect("put event with no kv"); + let key = Vec::from(kv.key()); + let value = this + .decoder + .decode_watch_response(&key, kv.value()) + .expect("failed to deserialize controller state"); + WatchEvent::Put { + key, + value, + revision: kv.mod_revision(), + } + } + EventType::Delete => { + let kv = event.kv().expect("delete event with no kv"); + let prev_value = event + .prev_kv() + .map(|prev_kv| { + this.decoder + .decode_watch_response(prev_kv.key(), prev_kv.value()) + }) + .transpose() + .expect("failed to deserialize prev controller state"); + let key = Vec::from(kv.key()); + WatchEvent::Delete { + key, + prev_value, + revision: kv.mod_revision(), + } + } + }; + + this.pending.push_back(parsed); + } + } + } +} + +#[pin_project] +pub struct PutWatchStream { + #[pin] + inner: Source, + _phantom: PhantomData, + decoder: Decoder, +} + +impl PutWatchStream { + pub fn new(inner: S, decoder: D) -> Self { + Self { + inner, + _phantom: PhantomData, + decoder, + } + } +} + +impl Stream for PutWatchStream +where + S: Stream + Unpin, + D: WatchStreamValueDecoder, +{ + type Item = (Revision, T); + + fn poll_next(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { + let mut this = self.project(); + loop { + let watch_resp = match ready!(Pin::new(&mut this.inner).poll_next(cx)) { + Some(v) => v, + None => return Poll::Ready(None), + }; + + let max_kv = watch_resp + .events() + .iter() + .filter_map(|ev| ev.kv()) + .max_by_key(|kv| kv.mod_revision()); + + if let Some(kv) = max_kv { + let revision = kv.mod_revision(); + let state = this + .decoder + .decode_watch_response(kv.key(), kv.value()) + .expect("failed to deserialize kv value"); + return Poll::Ready(Some((revision, state))); + } + } + } +} + +pub struct LockKeyChangeStream { + inner: S, + key: Vec, + key_mod_revision: Revision, + done: bool, +} + +impl LockKeyChangeStream { + pub fn new(inner: S, key: Vec, key_mod_revision: Revision) -> Self { + Self { + inner, + key, + key_mod_revision, + done: false, + } + } +} + +impl Stream for LockKeyChangeStream +where + S: Stream + Unpin, +{ + type Item = Revision; + + fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { + let this = self.as_mut().get_mut(); + + if this.done { + return Poll::Ready(None); + } + + loop { + let watch_resp = match ready!(Pin::new(&mut this.inner).poll_next(cx)) { + Some(v) => v, + None => { + this.done = true; + return Poll::Ready(None); + } + }; + + for event in watch_resp.events() { + match event.event_type() { + EventType::Put => { + let kv = event.kv().expect("put event with no kv"); + if kv.key() == this.key { + continue; + } + let revision = kv.mod_revision(); + if revision <= this.key_mod_revision { + continue; + } + info!( + "watcher detected put event on key {:?} with revision {} > {}", + this.key, revision, this.key_mod_revision + ); + this.done = true; + return Poll::Ready(Some(revision)); + } + EventType::Delete => { + let kv = event.kv().expect("delete event with no kv"); + let revision = kv.mod_revision(); + if revision < this.key_mod_revision { + continue; + } + if kv.key() == this.key { + let key_label = String::from_utf8_lossy(&this.key); + info!( + "watcher detected delete event on key {:?} with revision {} >= {}", + key_label, revision, this.key_mod_revision + ); + this.done = true; + return Poll::Ready(Some(revision)); + } + } + } + } + } + } +} + +pub type EtcdReconnectWatchStream = AutoReconnectWatchStream; +pub type EtcdJsonWatchStream = ValueWatchStream>; +pub type EtcdJsonPutWatchStream = PutWatchStream>; +pub type EtcdLockKeyChangeStream = LockKeyChangeStream; + /// /// Extension trait for [`WatchClient`]. /// @@ -40,6 +447,68 @@ pub enum WatchEvent { pub trait WatchClientExt { fn get_watch_client(&self) -> WatchClient; + fn json_watch_stream( + &self, + key: impl Into>, + watch_options: Option, + ) -> EtcdJsonWatchStream + where + V: DeserializeOwned + Send + 'static, + { + self.value_watch_stream(key, watch_options, JsonDecoder::default()) + } + + fn value_watch_stream( + &self, + key: impl Into>, + watch_options: Option, + decoder: D, + ) -> ValueWatchStream + where + D: WatchStreamValueDecoder, + { + let wc = self.get_watch_client(); + let key: Vec = key.into(); + let wopts_prototype = watch_options.unwrap_or_default().with_prev_key(); + + let connector = GrpcEtcdConenctor::new(wc, key, wopts_prototype); + let reconnecting_stream = AutoReconnectWatchStream::new(connector); + + ValueWatchStream::new(reconnecting_stream, decoder) + } + + fn put_watch_stream( + &self, + key: impl Into>, + watch_options: Option, + decoder: D, + ) -> PutWatchStream + where + D: WatchStreamValueDecoder, + { + let wc = self.get_watch_client(); + let key: Vec = key.into(); + let wopts_prototype = watch_options + .unwrap_or_default() + .with_filters(vec![WatchFilterType::NoDelete]); + + let connector = GrpcEtcdConenctor::new(wc, key, wopts_prototype); + let reconnecting_stream = AutoReconnectWatchStream::new(connector); + + PutWatchStream::new(reconnecting_stream, decoder) + } + + fn json_put_watch_stream( + &self, + key: impl Into>, + watch_options: Option, + ) -> EtcdJsonPutWatchStream + where + T: DeserializeOwned + Send + 'static, + { + self.put_watch_stream(key, watch_options, JsonDecoder::default()) + } + /// /// Creates a channel that watches for changes to a key in etcd. /// @@ -53,247 +522,37 @@ pub trait WatchClientExt { &self, key: impl Into>, watch_options: Option, - ) -> mpsc::Receiver> + ) -> EtcdJsonWatchStream where V: DeserializeOwned + Send + 'static, { - let wc = self.get_watch_client(); - let (tx, rx) = tokio::sync::mpsc::channel(10); - let key: Vec = key.into(); - tokio::spawn(async move { - let wopts_prototype = watch_options.unwrap_or_default().with_prev_key(); - let mut last_revision = None; // 0 = latest revision - 'outer: loop { - let mut wopts = wopts_prototype.clone(); - if let Some(rev) = last_revision { - wopts = wopts.with_start_revision(rev); - } - let wc2 = wc.clone(); - let key2 = key.clone(); - let retry_strategy = Exponential::from_millis_with_factor(10, 10.0).take(3); - - let (mut watcher, mut stream) = retry_etcd_legacy(retry_strategy, move || { - let mut wc = wc2.clone(); - let key = key2.clone(); - let wopts = wopts.clone(); - async move { wc.watch(key.clone(), Some(wopts)).await } - }) - .await - .expect("watch retry failed"); - - 'inner: while let Some(watch_resp) = stream.next().await { - match watch_resp { - Ok(watch_resp) => { - if watch_resp.canceled() { - // This is probably because the compaction_revision < initial revision - error!("watch cancelled: {watch_resp:?}"); - break 'outer; - } - for event in watch_resp.events() { - let watch_event = match event.event_type() { - EventType::Put => { - let kv = event.kv().expect("put event with no kv"); - let key = Vec::from(kv.key()); - let value = serde_json::from_slice::(kv.value()) - .expect("failed to deserialize controller state"); - last_revision.replace(kv.mod_revision()); - WatchEvent::Put { - key, - value, - revision: kv.mod_revision(), - } - } - EventType::Delete => { - let kv = event.kv().expect("delete event with no kv"); - let prev_value = event - .prev_kv() - .map(|prev_kv| prev_kv.value()) - .map(|prev_v| serde_json::from_slice::(prev_v)) - .transpose() - .expect("failed to deserialize prev controller state"); - let key = Vec::from(kv.key()); - last_revision.replace(kv.mod_revision()); - WatchEvent::Delete { - key, - prev_value, - revision: kv.mod_revision(), - } - } - }; - if tx.send(watch_event).await.is_err() { - warn!("closed watch event receiver"); - break 'outer; - } - } - } - Err(e) => { - error!("watch stream error: {:?}", e); - break 'inner; - } - } - } - let _ = watcher.cancel().await; - } - }); - rx + self.json_watch_stream::(key, watch_options) } fn json_put_watch_channel( &self, key: impl Into>, watch_options: Option, - ) -> mpsc::Receiver<(Revision, T)> + ) -> EtcdJsonPutWatchStream where T: DeserializeOwned + Send + 'static, { - let wc = self.get_watch_client(); - let (tx, rx) = tokio::sync::mpsc::channel(10); - let key: Vec = key.into(); - tokio::spawn(async move { - let mut last_revision = None; // 0 = latest revision - let wopts_prototype = watch_options - .unwrap_or_default() - .with_filters(vec![WatchFilterType::NoDelete]); - 'outer: loop { - let mut wopts = wopts_prototype.clone(); - if let Some(rev) = last_revision { - wopts = wopts.with_start_revision(rev); - } - - let retry_strategy = Exponential::from_millis_with_factor(10, 10.0).take(3); - let wc2 = wc.clone(); - let key2 = key.clone(); - let (mut watcher, mut stream) = retry_etcd_legacy(retry_strategy, move || { - let mut wc = wc2.clone(); - let key = key2.clone(); - let wopts = wopts.clone(); - async move { wc.watch(key.clone(), Some(wopts)).await } - }) - .await - .expect("watch retry failed"); - - 'inner: while let Some(watch_resp) = stream.next().await { - match watch_resp { - Ok(watch_resp) => { - let max_kv = watch_resp - .events() - .iter() - .filter_map(|ev| ev.kv()) - .max_by_key(|kv| kv.mod_revision()); - if let Some(kv) = max_kv { - let revision = kv.mod_revision(); - last_revision.replace(revision); - - let state = serde_json::from_slice::(kv.value()) - .expect("failed to deserialize kv value"); - if tx.send((revision, state)).await.is_err() { - let key_str = String::from_utf8(key).expect("key is not utf8"); - warn!("json watch channel closed its receiving half for {key_str}"); - break 'outer; - } - } else if watch_resp.canceled() { - // This is probably because the compaction_revision < initial revision - error!("watch cancelled: {watch_resp:?}"); - break 'outer; - } - } - Err(e) => { - error!("watch stream error: {:?}", e); - break 'inner; - } - } - } - let _ = watcher.cancel().await; - } - }); - rx + self.json_put_watch_stream::(key, watch_options) } - /// - /// Creates a broadcast channel that watches for a lock key that gets deleted. - /// - fn watch_lock_key_change( + fn watch_lock_key_change_stream( &self, key: impl Into>, key_mod_revision: Revision, - ) -> broadcast::Sender { + ) -> EtcdLockKeyChangeStream { let wc = self.get_watch_client(); let key: Vec = key.into(); + let wopts_prototype = WatchOptions::new().with_start_revision(key_mod_revision); - let (tx, _) = broadcast::channel(1); + let connector = GrpcEtcdConenctor::new(wc, key.clone(), wopts_prototype); + let reconnecting_stream = AutoReconnectWatchStream::new(connector); - let tx2 = tx.clone(); - tokio::spawn(async move { - let tx = tx2; - 'outer: loop { - let key2 = key.clone(); - let key = key.clone(); - let wopts = WatchOptions::new().with_start_revision(key_mod_revision); - let retry_strategy = Exponential::from_millis_with_factor(10, 10.0).take(3); - let wc2 = wc.clone(); - let (mut watcher, mut stream) = retry_etcd_legacy(retry_strategy, move || { - let mut wc = wc2.clone(); - let key = key2.clone(); - let wopts = wopts.clone(); - async move { wc.watch(key.clone(), Some(wopts)).await } - }) - .await - .expect("watch retry failed"); - - 'inner: while let Some(result) = stream.next().await { - match result { - Ok(watch_resp) => { - if watch_resp.canceled() { - // This is probably because the compaction_revision < initial revision - error!("watch cancelled: {watch_resp:?}"); - break 'inner; - } - for event in watch_resp.events() { - match event.event_type() { - EventType::Put => { - let kv = event.kv().expect("put event with no kv"); - if kv.key() == key { - continue; - } - let revision = kv.mod_revision(); - if revision <= key_mod_revision { - continue 'inner; - } - info!("watcher detected put event on key {key:?} with revision {revision} > {key_mod_revision}"); - let _ = tx.send(revision); - let _ = watcher.cancel().await; - break 'outer; - } - EventType::Delete => { - let kv = event.kv().expect("delete event with no kv"); - let revision = kv.mod_revision(); - if revision < key_mod_revision { - continue; - } - - if kv.key() == key { - let key_label = String::from_utf8_lossy(&key); - info!("watcher detected delete event on key {key_label:?} with revision {revision} >= {key_mod_revision}"); - let _ = tx.send(revision); - let _ = watcher.cancel().await; - break 'outer; - } - } - } - } - } - Err(e) => { - if !is_transient(&e) { - tracing::error!("watch stream error: {e}"); - break 'outer; - } - } - } - } - let _ = watcher.cancel().await; - } - }); - tx + LockKeyChangeStream::new(reconnecting_stream, key, key_mod_revision) } } @@ -302,3 +561,199 @@ impl WatchClientExt for WatchClient { self.clone() } } + +#[cfg(test)] +mod tests { + use super::*; + use futures::{StreamExt, executor::block_on, future, stream}; + use std::{ + collections::VecDeque, + sync::{Arc, Mutex}, + }; + + fn make_watch_response(canceled: bool, revision: Option) -> WatchResponse { + let events = revision + .map(|rev| { + vec![etcd_client::proto::PbEvent { + r#type: 0, + kv: Some(etcd_client::proto::PbKeyValue { + mod_revision: rev, + ..Default::default() + }), + ..Default::default() + }] + }) + .unwrap_or_default(); + + WatchResponse(etcd_client::proto::PbWatchResponse { + canceled, + events, + ..Default::default() + }) + } + + fn transient_error() -> Error { + Error::GRpcStatus(tonic::Status::new(tonic::Code::Unavailable, "transient")) + } + + fn non_transient_error() -> Error { + Error::GRpcStatus(tonic::Status::new( + tonic::Code::InvalidArgument, + "non-transient", + )) + } + + struct TestConnector { + connect_called: Arc>, + } + + impl EtcdConnector for TestConnector { + type WatchStream = stream::Empty>; + type ConnectFut = future::Ready>; + + fn connect_watch(&mut self, _last_revision: Option) -> Self::ConnectFut { + *self.connect_called.lock().expect("mutex poisoned") = true; + future::ready(Err(Error::WatchError("connect failed".to_string()))) + } + } + + struct MockConnector { + calls: Arc>>>, + plans: VecDeque>, Error>>, + } + + impl MockConnector { + fn new(plans: Vec>, Error>>) -> Self { + Self { + calls: Arc::new(Mutex::new(Vec::new())), + plans: plans.into(), + } + } + } + + impl EtcdConnector for MockConnector { + type WatchStream = stream::Iter>>; + type ConnectFut = future::Ready>; + + fn connect_watch(&mut self, last_revision: Option) -> Self::ConnectFut { + self.calls + .lock() + .expect("mutex poisoned") + .push(last_revision); + let planned = self.plans.pop_front().expect("missing test plan"); + match planned { + Ok(items) => future::ready(Ok(stream::iter(items))), + Err(e) => future::ready(Err(e)), + } + } + } + + #[test] + fn reconnecting_stream_terminates_when_connect_fails() { + let connect_called = Arc::new(Mutex::new(false)); + let mut stream = AutoReconnectWatchStream::new(TestConnector { + connect_called: Arc::clone(&connect_called), + }); + + let next = block_on(stream.next()); + assert!(next.is_none()); + assert!(*connect_called.lock().expect("mutex poisoned")); + } + + #[test] + fn json_watch_stream_returns_none_when_inner_is_empty() { + let mut stream = + ValueWatchStream::<_, serde_json::Value, JsonDecoder>::new( + stream::empty(), + JsonDecoder::default(), + ); + let next = block_on(stream.next()); + assert!(next.is_none()); + } + + #[test] + fn json_put_watch_stream_returns_none_when_inner_is_empty() { + let mut stream = + PutWatchStream::<_, serde_json::Value, JsonDecoder>::new( + stream::empty(), + JsonDecoder::default(), + ); + let next = block_on(stream.next()); + assert!(next.is_none()); + } + + #[test] + fn lock_key_change_stream_returns_none_when_inner_is_empty() { + let mut stream = LockKeyChangeStream::new(stream::empty(), b"/lock/key".to_vec(), 42); + let next = block_on(stream.next()); + assert!(next.is_none()); + } + + #[test] + fn reconnecting_stream_terminates_on_non_transient_stream_error() { + let connector = MockConnector::new(vec![Ok(vec![Err(non_transient_error())])]); + let calls = Arc::clone(&connector.calls); + let mut stream = AutoReconnectWatchStream::new(connector); + + let next = block_on(stream.next()); + assert!(next.is_none()); + assert_eq!(*calls.lock().expect("mutex poisoned"), vec![None]); + } + + #[test] + fn reconnecting_stream_reconnects_on_transient_stream_error() { + let connector = MockConnector::new(vec![ + Ok(vec![Err(transient_error())]), + Ok(vec![Ok(make_watch_response(false, Some(3)))]), + ]); + let calls = Arc::clone(&connector.calls); + let mut stream = AutoReconnectWatchStream::new(connector); + + let next = block_on(stream.next()); + assert!(next.is_some()); + assert_eq!(*calls.lock().expect("mutex poisoned"), vec![None, None]); + } + + #[test] + fn reconnecting_stream_reconnects_when_inner_stream_ends() { + let connector = MockConnector::new(vec![ + Ok(vec![]), + Ok(vec![Ok(make_watch_response(false, Some(7)))]), + ]); + let calls = Arc::clone(&connector.calls); + let mut stream = AutoReconnectWatchStream::new(connector); + + let next = block_on(stream.next()); + assert!(next.is_some()); + assert_eq!(*calls.lock().expect("mutex poisoned"), vec![None, None]); + } + + #[test] + fn reconnecting_stream_stops_on_canceled_watch_response() { + let connector = MockConnector::new(vec![Ok(vec![Ok(make_watch_response(true, None))])]); + let mut stream = AutoReconnectWatchStream::new(connector); + + let next = block_on(stream.next()); + assert!(next.is_none()); + } + + #[test] + fn reconnecting_stream_tracks_last_revision_for_reconnect() { + let connector = MockConnector::new(vec![ + Ok(vec![Ok(make_watch_response(false, Some(11)))]), + Ok(vec![]), + Err(non_transient_error()), + ]); + let calls = Arc::clone(&connector.calls); + let mut stream = AutoReconnectWatchStream::new(connector); + + let first = block_on(stream.next()); + assert!(first.is_some()); + let second = block_on(stream.next()); + assert!(second.is_none()); + + let calls = calls.lock().expect("mutex poisoned"); + assert_eq!(calls[0], None); + assert!(calls.iter().skip(1).all(|v| *v == Some(11))); + } +} diff --git a/tests/common.rs b/tests/common.rs index 6f63f1c..edba17d 100644 --- a/tests/common.rs +++ b/tests/common.rs @@ -1,17 +1,29 @@ +use bytes::Bytes; +use http_body_util::Empty; +use hyper::{Request, Response, body::Incoming, server::conn::http2, service::service_fn}; +use hyper_util::rt::{TokioExecutor, TokioIo}; use std::io::{self, IsTerminal}; +use std::net::SocketAddr; +use std::sync::Arc; +use tokio::net::TcpListener; +use tokio::sync::Mutex; +use tokio::sync::oneshot; use tracing::level_filters::LevelFilter; -use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt, EnvFilter}; +use tracing_subscriber::{EnvFilter, layer::SubscriberExt, util::SubscriberInitExt}; +use rust_etcd_utils::channel::connect_with_reliable_balanced_channel; + +#[allow(dead_code)] pub async fn get_etcd_client() -> etcd_client::Client { let url = option_env!("ETCD_TEST_URL").unwrap_or("http://localhost:2379"); - etcd_client::Client::connect([url], None) + connect_with_reliable_balanced_channel([url], None) .await .expect("failed to connect to etcd") } #[allow(dead_code)] pub fn random_str(len: usize) -> String { - use rand::{distributions::Alphanumeric, thread_rng, Rng}; + use rand::{Rng, distributions::Alphanumeric, thread_rng}; let mut rng = thread_rng(); (&mut rng) .sample_iter(&Alphanumeric) @@ -38,3 +50,128 @@ pub fn setup_tracing() { .try_init() .expect("failed to setup tracing"); } + +#[allow(dead_code)] +pub fn reserve_localhost_addr() -> SocketAddr { + let listener = + std::net::TcpListener::bind("127.0.0.1:0").expect("failed to reserve localhost port"); + let addr = listener + .local_addr() + .expect("failed to read reserved localhost addr"); + drop(listener); + addr +} + +#[allow(dead_code)] +pub fn endpoint_url(addr: SocketAddr) -> String { + format!("http://{}", addr) +} + +#[allow(dead_code)] +pub struct FakeTcpServer { + shutdown_tx: Option>, + join: tokio::task::JoinHandle<()>, +} + +#[allow(dead_code)] +pub async fn spawn_fake_tcp_server(addr: SocketAddr) -> FakeTcpServer { + let listener = TcpListener::bind(addr) + .await + .expect("failed to bind fake tcp server"); + let (shutdown_tx, mut shutdown_rx) = oneshot::channel::<()>(); + + let join = tokio::spawn(async move { + loop { + tokio::select! { + _ = &mut shutdown_rx => break, + accept_res = listener.accept() => { + match accept_res { + Ok((stream, _)) => { + drop(stream); + } + Err(_) => break, + } + } + } + } + }); + + FakeTcpServer { + shutdown_tx: Some(shutdown_tx), + join, + } +} + +impl FakeTcpServer { + #[allow(dead_code)] + pub async fn shutdown(mut self) { + if let Some(tx) = self.shutdown_tx.take() { + let _ = tx.send(()); + } + let _ = self.join.await; + } +} + +#[allow(dead_code)] +pub struct MockH2Server { + shutdown_tx: Option>, + join: tokio::task::JoinHandle<()>, + conn_tasks: Arc>>>, +} + +#[allow(dead_code)] +pub async fn spawn_mock_h2_server(addr: SocketAddr) -> MockH2Server { + let listener = TcpListener::bind(addr) + .await + .expect("failed to bind mock h2 server"); + let (shutdown_tx, mut shutdown_rx) = oneshot::channel::<()>(); + let conn_tasks: Arc>>> = Arc::new(Mutex::new(Vec::new())); + let conn_tasks_bg = Arc::clone(&conn_tasks); + + let join = tokio::spawn(async move { + loop { + tokio::select! { + _ = &mut shutdown_rx => break, + accept_res = listener.accept() => { + let Ok((stream, _)) = accept_res else { + break; + }; + let task = tokio::spawn(async move { + let io = TokioIo::new(stream); + let service = service_fn(|_req: Request| async move { + Ok::<_, std::convert::Infallible>(Response::new(Empty::::new())) + }); + let _ = http2::Builder::new(TokioExecutor::new()) + .serve_connection(io, service) + .await; + }); + conn_tasks_bg.lock().await.push(task); + } + } + } + }); + + MockH2Server { + shutdown_tx: Some(shutdown_tx), + join, + conn_tasks, + } +} + +impl MockH2Server { + #[allow(dead_code)] + pub async fn shutdown(mut self) { + if let Some(tx) = self.shutdown_tx.take() { + let _ = tx.send(()); + } + + { + let mut tasks = self.conn_tasks.lock().await; + for task in tasks.drain(..) { + task.abort(); + } + } + + let _ = self.join.await; + } +} diff --git a/tests/test_channel.rs b/tests/test_channel.rs new file mode 100644 index 0000000..5f4f90c --- /dev/null +++ b/tests/test_channel.rs @@ -0,0 +1,643 @@ +use std::time::{Duration, Instant}; + +use etcd_client::BalancedChannelBuilder; +use tonic::transport::{Endpoint, channel::Change}; +use tower::{Service, ServiceExt}; + +use rust_etcd_utils::channel::{ + EndpointStatus, ReliableBalancedChannelBuilder, connect_with_reliable_balanced_channel, +}; + +mod common; + +/// Verifies the reliable builder works as a drop-in replacement for a single healthy endpoint. +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn reliable_channel_connect_put_get_single_endpoint() { + let mut client = connect_with_reliable_balanced_channel(["http://localhost:2379"], None) + .await + .expect("failed to connect with reliable builder"); + + let key = format!("test-channel-single-{}", common::random_str(12)); + let value = "value-single"; + + client + .put(key.as_str(), value, None) + .await + .expect("put failed"); + + let resp = client.get(key.as_str(), None).await.expect("get failed"); + let kv = resp.kvs().first().expect("missing kv"); + assert_eq!(kv.value_str().expect("utf8 value"), value); +} + +/// Verifies unreachable endpoints do not prevent requests when at least one healthy endpoint exists. +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn reliable_channel_works_with_unreachable_endpoint_in_list() { + let mut client = connect_with_reliable_balanced_channel( + ["http://127.0.0.1:1", "http://localhost:2379"], + None, + ) + .await + .expect("failed to connect with mixed endpoints"); + + let key = format!("test-channel-mixed-{}", common::random_str(12)); + let value = "value-mixed"; + + client + .put(key.as_str(), value, None) + .await + .expect("put failed with mixed endpoints"); + + let resp = client.get(key.as_str(), None).await.expect("get failed"); + let kv = resp.kvs().first().expect("missing kv"); + assert_eq!(kv.value_str().expect("utf8 value"), value); +} + +/// Verifies adding and removing a bad endpoint does not break ongoing operations. +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn reliable_channel_add_and_remove_unreachable_endpoint() { + let mut client = connect_with_reliable_balanced_channel(["http://localhost:2379"], None) + .await + .expect("failed to connect with reliable builder"); + + client + .add_endpoint("http://127.0.0.1:1") + .await + .expect("failed to add unreachable endpoint"); + + let key1 = format!("test-channel-add-remove-{}", common::random_str(12)); + client + .put(key1.as_str(), "v1", None) + .await + .expect("put failed after add_endpoint"); + + client + .remove_endpoint("http://127.0.0.1:1") + .await + .expect("failed to remove unreachable endpoint"); + + let key2 = format!("test-channel-add-remove-{}", common::random_str(12)); + client + .put(key2.as_str(), "v2", None) + .await + .expect("put failed after remove_endpoint"); +} + +/// Verifies explicit endpoint removal cleans both routing state and exposed status registry. +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn removing_quarantined_endpoint_removes_it_from_registry() { + let mut builder = ReliableBalancedChannelBuilder::default(); + builder.probe_timeout = Duration::from_millis(100); + builder.quarantine_retry_interval = Duration::from_millis(250); + let stats_registry = builder.stats_registry(); + + let client = etcd_client::Client::connect_with_balanced_channel( + ["http://localhost:2379"], + None, + builder, + ) + .await + .expect("failed to connect with reliable builder"); + + let bad_endpoint = "http://127.0.0.1:1"; + client + .add_endpoint(bad_endpoint) + .await + .expect("failed to add unreachable endpoint"); + + let quarantined_seen = tokio::time::timeout(Duration::from_secs(3), async { + loop { + if stats_registry + .endpoint_status_snapshot() + .iter() + .any(|(k, v)| k.contains("127.0.0.1:1") && *v == EndpointStatus::Quarantined) + { + break true; + } + tokio::time::sleep(Duration::from_millis(25)).await; + } + }) + .await + .expect("timed out waiting for quarantined endpoint"); + assert!(quarantined_seen); + + client + .remove_endpoint(bad_endpoint) + .await + .expect("failed to remove unreachable endpoint"); + + tokio::time::timeout(Duration::from_secs(3), async { + loop { + if !stats_registry + .endpoint_status_snapshot() + .keys() + .any(|k| k.contains("127.0.0.1:1")) + { + break; + } + tokio::time::sleep(Duration::from_millis(25)).await; + } + }) + .await + .expect("timed out waiting for endpoint removal from registry"); +} + +/// Verifies a down endpoint is quarantined, then reactivated after the same endpoint comes back. +/// +/// Test phases: +/// 1. Add a fake endpoint while nothing is listening on its port. +/// 2. Wait for quarantine transition and assert stats were updated. +/// 3. Start a mock server on the same endpoint. +/// 4. Wait for active transition and assert quarantine count returns to zero. +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn quarantined_endpoint_becomes_active_when_remote_comes_back_online() { + let mut builder = ReliableBalancedChannelBuilder::default(); + builder.probe_timeout = Duration::from_millis(100); + builder.quarantine_retry_interval = Duration::from_millis(150); + let stats_registry = builder.stats_registry(); + + let client = etcd_client::Client::connect_with_balanced_channel( + ["http://localhost:2379"], + None, + builder, + ) + .await + .expect("failed to connect with reliable builder"); + + let fake_addr = common::reserve_localhost_addr(); + let fake_endpoint = common::endpoint_url(fake_addr); + + // Phase 1: insert endpoint while down. + client + .add_endpoint(fake_endpoint.as_str()) + .await + .expect("failed to add fake endpoint"); + + // Phase 2: verify quarantine. + tokio::time::timeout(Duration::from_secs(3), async { + loop { + if stats_registry + .endpoint_status_snapshot() + .iter() + .any(|(k, v)| { + k.contains(fake_endpoint.as_str()) && *v == EndpointStatus::Quarantined + }) + { + break; + } + tokio::time::sleep(Duration::from_millis(25)).await; + } + }) + .await + .expect("timed out waiting for fake endpoint quarantine"); + + let stats_after_quarantine = stats_registry.snapshot(); + assert!(stats_after_quarantine.quarantine_transitions >= 1); + assert!(stats_after_quarantine.quarantined_endpoints >= 1); + + // Phase 3: bring endpoint back. + let fake_server = common::spawn_fake_tcp_server(fake_addr).await; + + // Phase 4: verify recovery to active. + tokio::time::timeout(Duration::from_secs(5), async { + loop { + if stats_registry + .endpoint_status_snapshot() + .iter() + .any(|(k, v)| k.contains(fake_endpoint.as_str()) && *v == EndpointStatus::Active) + { + break; + } + tokio::time::sleep(Duration::from_millis(25)).await; + } + }) + .await + .expect("timed out waiting for fake endpoint recovery to active"); + + let stats_after_recovery = stats_registry.snapshot(); + assert!(stats_after_recovery.active_endpoints >= 2); + assert_eq!(stats_after_recovery.quarantined_endpoints, 0); + + fake_server.shutdown().await; +} + +/// Verifies request path behavior across endpoint outage and restart using a mock HTTP/2 service. +/// +/// Test phases: +/// 1. Start endpoint and verify requests succeed. +/// 2. Stop endpoint and verify a new request fails. +/// 3. Drive additional requests while down and verify quarantine becomes visible. +/// 4. Restart endpoint, verify active status, and verify requests succeed again. +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn request_fails_when_mock_endpoint_goes_down_then_recovers_after_restart() { + let mut builder = ReliableBalancedChannelBuilder::default(); + builder.probe_timeout = Duration::from_millis(100); + builder.quarantine_retry_interval = Duration::from_millis(150); + let stats_registry = builder.stats_registry(); + + let (mut channel, updater) = builder + .balanced_channel(16) + .expect("failed to build reliable balanced channel"); + + let addr = common::reserve_localhost_addr(); + let endpoint_url = common::endpoint_url(addr); + let mut mock_server = common::spawn_mock_h2_server(addr).await; + + let uri: http::Uri = endpoint_url.parse().expect("valid endpoint uri"); + let endpoint = Endpoint::from_shared(endpoint_url.clone()).expect("valid tonic endpoint"); + + // Phase 1: endpoint is up and should serve requests. + updater + .send(Change::Insert(uri.clone(), endpoint)) + .await + .expect("failed to insert mock endpoint"); + + tokio::time::timeout(Duration::from_secs(3), async { + loop { + if stats_registry + .endpoint_status_snapshot() + .iter() + .any(|(k, v)| k.contains(endpoint_url.as_str()) && *v == EndpointStatus::Active) + { + break; + } + tokio::time::sleep(Duration::from_millis(25)).await; + } + }) + .await + .expect("timed out waiting for mock endpoint active"); + + let req = http::Request::builder() + .uri("http://mock.local/ping") + .body(tonic::body::Body::empty()) + .expect("failed to build request"); + let first_resp = channel + .ready() + .await + .expect("channel should be ready while mock server is up") + .call(req) + .await; + assert!( + first_resp.is_ok(), + "request should succeed when endpoint is up" + ); + assert_eq!( + stats_registry + .get_call_count(endpoint_url.as_str()) + .expect("missing call count for endpoint"), + 1 + ); + + // Phase 2: endpoint goes down and a new request should fail. + mock_server.shutdown().await; + + let req = http::Request::builder() + .uri("http://mock.local/ping") + .body(tonic::body::Body::empty()) + .expect("failed to build request"); + let second_resp = channel + .ready() + .await + .expect("channel should be ready before down request") + .call(req) + .await; + assert!( + second_resp.is_err(), + "request should fail after server shutdown" + ); + + // Phase 3: continue observing until error-reporting-driven quarantine becomes visible. + tokio::time::timeout(Duration::from_secs(3), async { + loop { + if stats_registry + .endpoint_status_snapshot() + .iter() + .any(|(k, v)| { + k.contains(endpoint_url.as_str()) && *v == EndpointStatus::Quarantined + }) + { + break; + } + tokio::time::sleep(Duration::from_millis(25)).await; + } + }) + .await + .expect("timed out waiting for endpoint quarantine after failure"); + + // Phase 4: restart endpoint and verify recovery. + mock_server = common::spawn_mock_h2_server(addr).await; + + tokio::time::timeout(Duration::from_secs(5), async { + loop { + if stats_registry + .endpoint_status_snapshot() + .iter() + .any(|(k, v)| k.contains(endpoint_url.as_str()) && *v == EndpointStatus::Active) + { + break; + } + tokio::time::sleep(Duration::from_millis(25)).await; + } + }) + .await + .expect("timed out waiting for endpoint to become active after restart"); + + let req = http::Request::builder() + .uri("http://mock.local/ping") + .body(tonic::body::Body::empty()) + .expect("failed to build request"); + let third_resp = channel + .ready() + .await + .expect("channel should be ready after endpoint restart") + .call(req) + .await; + assert!( + third_resp.is_ok(), + "request should succeed after endpoint restart" + ); + assert!( + stats_registry + .get_call_count(endpoint_url.as_str()) + .expect("missing call count for endpoint") + >= 3 + ); + + mock_server.shutdown().await; +} + +/// Verifies load is distributed across two healthy mock endpoints. +/// +/// Test phases: +/// 1. Start two mock servers and insert both URIs. +/// 2. Wait until both endpoints are marked active. +/// 3. Send many requests through the balanced channel. +/// 4. Assert both endpoints receive a meaningful share of requests. +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn requests_are_distributed_between_two_mock_endpoints() { + let mut builder = ReliableBalancedChannelBuilder::default(); + builder.probe_timeout = Duration::from_millis(100); + builder.quarantine_retry_interval = Duration::from_millis(150); + let stats_registry = builder.stats_registry(); + + let (mut channel, updater) = builder + .balanced_channel(32) + .expect("failed to build reliable balanced channel"); + + let addr_a = common::reserve_localhost_addr(); + let addr_b = common::reserve_localhost_addr(); + let endpoint_a = common::endpoint_url(addr_a); + let endpoint_b = common::endpoint_url(addr_b); + + let server_a = common::spawn_mock_h2_server(addr_a).await; + let server_b = common::spawn_mock_h2_server(addr_b).await; + + let uri_a: http::Uri = endpoint_a.parse().expect("valid endpoint a uri"); + let uri_b: http::Uri = endpoint_b.parse().expect("valid endpoint b uri"); + updater + .send(Change::Insert( + uri_a, + Endpoint::from_shared(endpoint_a.clone()).expect("valid tonic endpoint a"), + )) + .await + .expect("failed to insert endpoint a"); + updater + .send(Change::Insert( + uri_b, + Endpoint::from_shared(endpoint_b.clone()).expect("valid tonic endpoint b"), + )) + .await + .expect("failed to insert endpoint b"); + + tokio::time::timeout(Duration::from_secs(3), async { + loop { + let snapshot = stats_registry.endpoint_status_snapshot(); + let a_active = snapshot + .iter() + .any(|(k, v)| k.contains(endpoint_a.as_str()) && *v == EndpointStatus::Active); + let b_active = snapshot + .iter() + .any(|(k, v)| k.contains(endpoint_b.as_str()) && *v == EndpointStatus::Active); + if a_active && b_active { + break; + } + tokio::time::sleep(Duration::from_millis(25)).await; + } + }) + .await + .expect("timed out waiting for both endpoints to become active"); + + let total_requests = 400usize; + for _ in 0..total_requests { + let req = http::Request::builder() + .uri("http://mock.local/ping") + .body(tonic::body::Body::empty()) + .expect("failed to build request"); + let resp = channel + .ready() + .await + .expect("channel should be ready") + .call(req) + .await; + assert!( + resp.is_ok(), + "request should succeed with two healthy endpoints" + ); + } + + let a_count = stats_registry + .get_call_count(endpoint_a.as_str()) + .expect("missing call count for endpoint a"); + let b_count = stats_registry + .get_call_count(endpoint_b.as_str()) + .expect("missing call count for endpoint b"); + let observed_total = a_count + b_count; + + assert_eq!( + observed_total, total_requests, + "all requests should be attributed to one of the two endpoints" + ); + + // Keep this broad to avoid flakiness while still proving non-trivial distribution. + let min_share = total_requests / 5; // 20% + assert!( + a_count >= min_share, + "endpoint a received too few requests: {a_count}/{total_requests}" + ); + assert!( + b_count >= min_share, + "endpoint b received too few requests: {b_count}/{total_requests}" + ); + + server_a.shutdown().await; + server_b.shutdown().await; +} + +/// Verifies fair distribution before failure, then successful drain after one endpoint is killed. +/// +/// Test phases: +/// 1. Start two mock servers and insert both URIs. +/// 2. Send 500 requests and assert both endpoints receive a fair share. +/// 3. Kill one endpoint. +/// 4. Retry requests until 1000 successful sends are reached or 30 seconds pass. +/// 5. Assert one endpoint handled at least 500 more requests than the other. +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn fair_share_then_failover_drains_to_1000_successful_requests() { + let mut builder = ReliableBalancedChannelBuilder::default(); + builder.probe_timeout = Duration::from_millis(100); + builder.quarantine_retry_interval = Duration::from_millis(150); + let stats_registry = builder.stats_registry(); + + let (mut channel, updater) = builder + .balanced_channel(32) + .expect("failed to build reliable balanced channel"); + + let addr_a = common::reserve_localhost_addr(); + let addr_b = common::reserve_localhost_addr(); + let endpoint_a = common::endpoint_url(addr_a); + let endpoint_b = common::endpoint_url(addr_b); + + let mut server_a = Some(common::spawn_mock_h2_server(addr_a).await); + let mut server_b = Some(common::spawn_mock_h2_server(addr_b).await); + + let uri_a: http::Uri = endpoint_a.parse().expect("valid endpoint a uri"); + let uri_b: http::Uri = endpoint_b.parse().expect("valid endpoint b uri"); + updater + .send(Change::Insert( + uri_a, + Endpoint::from_shared(endpoint_a.clone()).expect("valid tonic endpoint a"), + )) + .await + .expect("failed to insert endpoint a"); + updater + .send(Change::Insert( + uri_b, + Endpoint::from_shared(endpoint_b.clone()).expect("valid tonic endpoint b"), + )) + .await + .expect("failed to insert endpoint b"); + + tokio::time::timeout(Duration::from_secs(3), async { + loop { + let snapshot = stats_registry.endpoint_status_snapshot(); + let a_active = snapshot + .iter() + .any(|(k, v)| k.contains(endpoint_a.as_str()) && *v == EndpointStatus::Active); + let b_active = snapshot + .iter() + .any(|(k, v)| k.contains(endpoint_b.as_str()) && *v == EndpointStatus::Active); + if a_active && b_active { + break; + } + tokio::time::sleep(Duration::from_millis(25)).await; + } + }) + .await + .expect("timed out waiting for both endpoints to become active"); + + let total_requests = 1000usize; + let first_phase_requests = 500usize; + + for _ in 0..first_phase_requests { + let req = http::Request::builder() + .uri("http://mock.local/ping") + .body(tonic::body::Body::empty()) + .expect("failed to build request"); + let resp = channel + .ready() + .await + .expect("channel should be ready") + .call(req) + .await; + assert!( + resp.is_ok(), + "request should succeed with two healthy endpoints" + ); + } + + let a_after_500 = stats_registry + .get_call_count(endpoint_a.as_str()) + .unwrap_or(0); + let b_after_500 = stats_registry + .get_call_count(endpoint_b.as_str()) + .unwrap_or(0); + assert_eq!(a_after_500 + b_after_500, first_phase_requests); + + let min_share_after_500 = first_phase_requests / 5; // 20% + assert!( + a_after_500 >= min_share_after_500, + "endpoint a received too few first-phase requests: {a_after_500}/{first_phase_requests}" + ); + assert!( + b_after_500 >= min_share_after_500, + "endpoint b received too few first-phase requests: {b_after_500}/{first_phase_requests}" + ); + + // Kill the endpoint with fewer first-phase requests so the survivor ends with a clear +500 skew. + let killed_a = a_after_500 <= b_after_500; + if killed_a { + server_a + .take() + .expect("server a should exist") + .shutdown() + .await; + } else { + server_b + .take() + .expect("server b should exist") + .shutdown() + .await; + } + + let mut successful_requests = first_phase_requests; + let deadline = Instant::now() + Duration::from_secs(30); + while successful_requests < total_requests && Instant::now() < deadline { + let req = http::Request::builder() + .uri("http://mock.local/ping") + .body(tonic::body::Body::empty()) + .expect("failed to build request"); + + match channel.ready().await { + Ok(ready) => { + if ready.call(req).await.is_ok() { + successful_requests += 1; + } else { + tokio::time::sleep(Duration::from_millis(10)).await; + } + } + Err(_) => { + tokio::time::sleep(Duration::from_millis(10)).await; + } + } + } + + assert_eq!( + successful_requests, total_requests, + "expected to complete {total_requests} successful requests within 30 seconds" + ); + + let a_final = stats_registry + .get_call_count(endpoint_a.as_str()) + .unwrap_or(0); + let b_final = stats_registry + .get_call_count(endpoint_b.as_str()) + .unwrap_or(0); + assert!( + a_final + b_final >= total_requests, + "expected at least {total_requests} attributed calls, got {}", + a_final + b_final + ); + + let skew = a_final.abs_diff(b_final); + assert!( + skew >= 500, + "expected at least 500 request skew after failover, got {skew} (a={a_final}, b={b_final})" + ); + + if let Some(server) = server_a.take() { + server.shutdown().await; + } + if let Some(server) = server_b.take() { + server.shutdown().await; + } +} diff --git a/tests/test_lock.rs b/tests/test_lock.rs index b244dea..a7ce087 100644 --- a/tests/test_lock.rs +++ b/tests/test_lock.rs @@ -3,7 +3,7 @@ use std::time::Duration; use common::random_str; use rust_etcd_utils::{ lease::ManagedLeaseFactory, - lock::{spawn_lock_manager, spawn_lock_manager_with_lease_factory, TryLockError}, + lock::{TryLockError, spawn_lock_manager, spawn_lock_manager_with_lease_factory}, }; mod common; diff --git a/tests/test_log.rs b/tests/test_log.rs index f768c84..5347632 100644 --- a/tests/test_log.rs +++ b/tests/test_log.rs @@ -3,7 +3,7 @@ use std::time::Duration; use common::random_str; use rust_etcd_utils::{ lease::ManagedLeaseFactory, - lock::{spawn_lock_manager_with_lease_factory, ManagedLockGuard}, + lock::{ManagedLockGuard, spawn_lock_manager_with_lease_factory}, log::{ExclusiveLogUpdater, LogWatcher, WriteError}, }; use serde::{Deserialize, Serialize}; diff --git a/tests/test_watcher.rs b/tests/test_watcher.rs new file mode 100644 index 0000000..aafc88f --- /dev/null +++ b/tests/test_watcher.rs @@ -0,0 +1,269 @@ +use std::time::Duration; + +use futures::StreamExt; +use rust_etcd_utils::{ + Revision, + watcher::{AutoReconnectWatchStream, EtcdConnector, WatchClientExt, WatchEvent}, +}; +use serde::{Deserialize, Serialize}; +use std::{ + collections::VecDeque, + sync::{Arc, Mutex}, +}; + +mod common; + +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +struct DummyValue { + value: i64, +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn json_watch_stream_emits_put_then_delete_with_prev_value() { + let mut etcd = common::get_etcd_client().await; + let key = format!("watch-json-{}", common::random_str(12)); + + let _ = etcd.delete(key.as_str(), None).await; + + let snapshot = etcd + .get(key.as_str(), None) + .await + .expect("snapshot get failed"); + let start_revision = snapshot + .header() + .expect("missing snapshot header") + .revision() + + 1; + + let mut stream = etcd.watch_client().json_watch_stream::( + key.clone(), + Some(etcd_client::WatchOptions::new().with_start_revision(start_revision)), + ); + + let put_value = DummyValue { value: 10 }; + etcd.kv_client() + .put( + key.as_str(), + serde_json::to_string(&put_value).expect("json"), + None, + ) + .await + .expect("put failed"); + + let put_event = tokio::time::timeout(Duration::from_secs(5), stream.next()) + .await + .expect("watch timeout on put") + .expect("watch stream ended unexpectedly"); + + match put_event { + WatchEvent::Put { + key: event_key, + value, + revision: _, + } => { + assert_eq!(event_key, key.as_bytes().to_vec()); + assert_eq!(value, put_value); + } + WatchEvent::Delete { .. } => panic!("expected put event"), + } + + etcd.kv_client() + .delete( + key.as_str(), + Some(etcd_client::DeleteOptions::new().with_prev_key()), + ) + .await + .expect("delete failed"); + + let delete_event = tokio::time::timeout(Duration::from_secs(5), stream.next()) + .await + .expect("watch timeout on delete") + .expect("watch stream ended unexpectedly"); + + match delete_event { + WatchEvent::Delete { + key: event_key, + prev_value, + revision: _, + } => { + assert_eq!(event_key, key.as_bytes().to_vec()); + assert_eq!(prev_value, Some(put_value)); + } + WatchEvent::Put { .. } => panic!("expected delete event"), + } +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn json_put_watch_stream_emits_put_events_and_filters_delete() { + let mut etcd = common::get_etcd_client().await; + let key = format!("watch-json-put-{}", common::random_str(12)); + + let _ = etcd.delete(key.as_str(), None).await; + + let snapshot = etcd + .get(key.as_str(), None) + .await + .expect("snapshot get failed"); + let start_revision = snapshot + .header() + .expect("missing snapshot header") + .revision() + + 1; + + let mut stream = etcd.watch_client().json_put_watch_stream::( + key.clone(), + Some(etcd_client::WatchOptions::new().with_start_revision(start_revision)), + ); + + let first = DummyValue { value: 1 }; + etcd.kv_client() + .put( + key.as_str(), + serde_json::to_string(&first).expect("json"), + None, + ) + .await + .expect("first put failed"); + + let (_rev1, got_first) = tokio::time::timeout(Duration::from_secs(5), stream.next()) + .await + .expect("watch timeout on first put") + .expect("watch stream ended unexpectedly"); + assert_eq!(got_first, first); + + etcd.kv_client() + .delete(key.as_str(), None) + .await + .expect("delete failed"); + + let second = DummyValue { value: 2 }; + etcd.kv_client() + .put( + key.as_str(), + serde_json::to_string(&second).expect("json"), + None, + ) + .await + .expect("second put failed"); + + let (_rev2, got_second) = tokio::time::timeout(Duration::from_secs(5), stream.next()) + .await + .expect("watch timeout on second put") + .expect("watch stream ended unexpectedly"); + assert_eq!(got_second, second); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn watch_lock_key_change_stream_emits_on_lock_key_delete() { + let etcd = common::get_etcd_client().await; + let key = format!("watch-lock-delete-{}", common::random_str(12)); + + let put_resp = etcd + .kv_client() + .put(key.as_str(), "seed", None) + .await + .expect("seed put failed"); + let key_mod_revision = put_resp.header().expect("missing put header").revision(); + + let mut stream = etcd + .watch_client() + .watch_lock_key_change_stream(key.clone(), key_mod_revision); + + etcd.kv_client() + .delete(key.as_str(), None) + .await + .expect("delete failed"); + + let revision = tokio::time::timeout(Duration::from_secs(5), stream.next()) + .await + .expect("watch timeout on lock delete") + .expect("watch stream ended unexpectedly"); + + assert!(revision >= key_mod_revision); +} + +fn make_watch_response(revision: Revision) -> etcd_client::WatchResponse { + etcd_client::WatchResponse(etcd_client::proto::PbWatchResponse { + canceled: false, + events: vec![etcd_client::proto::PbEvent { + r#type: 0, + kv: Some(etcd_client::proto::PbKeyValue { + mod_revision: revision, + ..Default::default() + }), + ..Default::default() + }], + ..Default::default() + }) +} + +fn transient_error() -> etcd_client::Error { + etcd_client::Error::GRpcStatus(tonic::Status::new(tonic::Code::Unavailable, "transient")) +} + +struct ScriptedConnector { + calls: Arc>>>, + plans: VecDeque< + Result>, etcd_client::Error>, + >, +} + +impl ScriptedConnector { + fn new( + plans: Vec< + Result>, etcd_client::Error>, + >, + ) -> Self { + Self { + calls: Arc::new(Mutex::new(Vec::new())), + plans: plans.into(), + } + } +} + +impl EtcdConnector for ScriptedConnector { + type WatchStream = futures::stream::Iter< + std::vec::IntoIter>, + >; + type ConnectFut = futures::future::Ready>; + + fn connect_watch(&mut self, last_revision: Option) -> Self::ConnectFut { + self.calls + .lock() + .expect("mutex poisoned") + .push(last_revision); + + let planned = self.plans.pop_front().expect("missing reconnect plan"); + match planned { + Ok(items) => futures::future::ready(Ok(futures::stream::iter(items))), + Err(e) => futures::future::ready(Err(e)), + } + } +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn autoreconnect_stream_reconnects_after_transient_error() { + let connector = ScriptedConnector::new(vec![ + Ok(vec![Err(transient_error())]), + Ok(vec![Ok(make_watch_response(123))]), + ]); + let calls = Arc::clone(&connector.calls); + + let mut stream = AutoReconnectWatchStream::new(connector); + let next = tokio::time::timeout(Duration::from_secs(3), stream.next()) + .await + .expect("timeout waiting for reconnect") + .expect("stream ended unexpectedly"); + + let max_revision = next + .events() + .iter() + .filter_map(|ev| ev.kv()) + .map(|kv| kv.mod_revision()) + .max() + .expect("missing kv in reconnect event"); + assert_eq!(max_revision, 123); + + let calls = calls.lock().expect("mutex poisoned"); + assert_eq!(*calls, vec![None, None]); +}