From c83d8021504d42f361e033f0c1cb65dd843715a4 Mon Sep 17 00:00:00 2001 From: Jusong Yu Date: Mon, 15 Dec 2025 14:16:29 +0100 Subject: [PATCH 01/14] devenv, rust env with tonic and protobuf-dev and readme - readme for dev-env - initialize the proj naming crate "req_packager" --- .envrc | 12 +++++ .gitignore | 14 ++++++ Cargo.toml | 3 +- devenv.lock | 123 ++++++++++++++++++++++++++++++++++++++++++++++++++++ devenv.nix | 66 ++++++++++++++++++++++++++++ devenv.yaml | 8 ++++ 6 files changed, 225 insertions(+), 1 deletion(-) create mode 100644 .envrc create mode 100644 devenv.lock create mode 100644 devenv.nix create mode 100644 devenv.yaml diff --git a/.envrc b/.envrc new file mode 100644 index 0000000..cc5c18b --- /dev/null +++ b/.envrc @@ -0,0 +1,12 @@ +#!/usr/bin/env bash + +export DIRENV_WARN_TIMEOUT=20s + +eval "$(devenv direnvrc)" + +# `use devenv` supports the same options as the `devenv shell` command. +# +# To silence all output, use `--quiet`. +# +# Example usage: use devenv --quiet --impure --option services.postgres.enable:bool true +use devenv diff --git a/.gitignore b/.gitignore index ea8c4bf..3658b9b 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,15 @@ +# Devenv +.devenv* +devenv.local.nix +devenv.local.yaml + +# direnv +.direnv + +# pre-commit +.pre-commit-config.yaml + + +# Added by cargo + /target diff --git a/Cargo.toml b/Cargo.toml index 1a80f9d..e401bd3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,8 @@ [package] +name = "req_packager" name = "poc-ui" version = "0.1.0" -edition = "2024" +edition = "2021" [dependencies] axum = { version = "0.8.8", features = ["tokio", "http2"] } diff --git a/devenv.lock b/devenv.lock new file mode 100644 index 0000000..f587c95 --- /dev/null +++ b/devenv.lock @@ -0,0 +1,123 @@ +{ + "nodes": { + "devenv": { + "locked": { + "dir": "src/modules", + "lastModified": 1765800147, + "owner": "cachix", + "repo": "devenv", + "rev": "ada06017aef4941e91aabe7507e33d750757ae0c", + "type": "github" + }, + "original": { + "dir": "src/modules", + "owner": "cachix", + "repo": "devenv", + "type": "github" + } + }, + "flake-compat": { + "flake": false, + "locked": { + "lastModified": 1765121682, + "owner": "edolstra", + "repo": "flake-compat", + "rev": "65f23138d8d09a92e30f1e5c87611b23ef451bf3", + "type": "github" + }, + "original": { + "owner": "edolstra", + "repo": "flake-compat", + "type": "github" + } + }, + "git-hooks": { + "inputs": { + "flake-compat": "flake-compat", + "gitignore": "gitignore", + "nixpkgs": [ + "nixpkgs" + ] + }, + "locked": { + "lastModified": 1765464257, + "owner": "cachix", + "repo": "git-hooks.nix", + "rev": "09e45f2598e1a8499c3594fe11ec2943f34fe509", + "type": "github" + }, + "original": { + "owner": "cachix", + "repo": "git-hooks.nix", + "type": "github" + } + }, + "gitignore": { + "inputs": { + "nixpkgs": [ + "git-hooks", + "nixpkgs" + ] + }, + "locked": { + "lastModified": 1762808025, + "owner": "hercules-ci", + "repo": "gitignore.nix", + "rev": "cb5e3fdca1de58ccbc3ef53de65bd372b48f567c", + "type": "github" + }, + "original": { + "owner": "hercules-ci", + "repo": "gitignore.nix", + "type": "github" + } + }, + "nixpkgs": { + "locked": { + "lastModified": 1764580874, + "owner": "cachix", + "repo": "devenv-nixpkgs", + "rev": "dcf61356c3ab25f1362b4a4428a6d871e84f1d1d", + "type": "github" + }, + "original": { + "owner": "cachix", + "ref": "rolling", + "repo": "devenv-nixpkgs", + "type": "github" + } + }, + "root": { + "inputs": { + "devenv": "devenv", + "git-hooks": "git-hooks", + "nixpkgs": "nixpkgs", + "pre-commit-hooks": [ + "git-hooks" + ], + "rust-overlay": "rust-overlay" + } + }, + "rust-overlay": { + "inputs": { + "nixpkgs": [ + "nixpkgs" + ] + }, + "locked": { + "lastModified": 1765766816, + "owner": "oxalica", + "repo": "rust-overlay", + "rev": "4f53a635709d82652567f51ef7af4365fbc0c88b", + "type": "github" + }, + "original": { + "owner": "oxalica", + "repo": "rust-overlay", + "type": "github" + } + } + }, + "root": "root", + "version": 7 +} diff --git a/devenv.nix b/devenv.nix new file mode 100644 index 0000000..c9a3227 --- /dev/null +++ b/devenv.nix @@ -0,0 +1,66 @@ +{ + pkgs, + lib, + config, + inputs, + ... +}: + +{ + # https://devenv.sh/basics/ + # env.GREET = "devenv"; + + # https://devenv.sh/packages/ + packages = [ + pkgs.git + pkgs.protobuf_32 + ]; + + # https://devenv.sh/languages/ + languages.rust = { + enable = true; + channel = "stable"; + version = "1.81.0"; + components = [ + "rustc" + "cargo" + "clippy" + "rustfmt" + "rust-analyzer" + ]; + }; + + # https://devenv.sh/processes/ + # processes.dev.exec = "${lib.getExe pkgs.watchexec} -n -- ls -la"; + + # https://devenv.sh/services/ + # services.postgres.enable = true; + + # https://devenv.sh/scripts/ + # scripts.hello.exec = '' + # echo hello from $GREET + # ''; + + # https://devenv.sh/basics/ + # enterShell = '' + # hello # Run scripts directly + # git --version # Use packages + # ''; + + # https://devenv.sh/tasks/ + # tasks = { + # "myproj:setup".exec = "mytool build"; + # "devenv:enterShell".after = [ "myproj:setup" ]; + # }; + + # https://devenv.sh/tests/ + # enterTest = '' + # echo "Running tests" + # git --version | grep --color=auto "${pkgs.git.version}" + # ''; + + # https://devenv.sh/git-hooks/ + # git-hooks.hooks.shellcheck.enable = true; + + # See full reference at https://devenv.sh/reference/options/ +} diff --git a/devenv.yaml b/devenv.yaml new file mode 100644 index 0000000..37e8589 --- /dev/null +++ b/devenv.yaml @@ -0,0 +1,8 @@ +inputs: + nixpkgs: + url: github:cachix/devenv-nixpkgs/rolling + rust-overlay: + url: github:oxalica/rust-overlay + inputs: + nixpkgs: + follows: nixpkgs From e06a259f53f37e91e006a0837f983ddb1c05c493 Mon Sep 17 00:00:00 2001 From: Jusong Yu Date: Mon, 15 Dec 2025 15:35:49 +0100 Subject: [PATCH 02/14] hello world boileplate --- Cargo.lock | 710 +++++++++++++++++++-------------------- Cargo.toml | 21 +- build.rs | 4 + devenv.nix | 3 +- proto/req_packager.proto | 14 + src/client.rs | 19 ++ src/server.rs | 39 +++ 7 files changed, 445 insertions(+), 365 deletions(-) create mode 100644 build.rs create mode 100644 proto/req_packager.proto create mode 100644 src/client.rs create mode 100644 src/server.rs diff --git a/Cargo.lock b/Cargo.lock index c27cbd2..48accfc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -11,6 +11,23 @@ dependencies = [ "memchr", ] +[[package]] +name = "anyhow" +version = "1.0.100" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a23eb6b1614318a8071c9b2521f36b424b2c83db5eb3a0fead4a6c0809af6e61" + +[[package]] +name = "async-trait" +version = "0.1.89" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "atomic-waker" version = "1.1.2" @@ -19,19 +36,16 @@ checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0" [[package]] name = "axum" -version = "0.8.8" +version = "0.8.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b52af3cb4058c895d37317bb27508dccc8e5f2d39454016b297bf4a400597b8" +checksum = "5b098575ebe77cb6d14fc7f32749631a6e44edbef6b796f89b020e99ba20d425" dependencies = [ "axum-core", "bytes", - "form_urlencoded", "futures-util", "http", "http-body", "http-body-util", - "hyper", - "hyper-util", "itoa", "matchit", "memchr", @@ -39,22 +53,17 @@ dependencies = [ "percent-encoding", "pin-project-lite", "serde_core", - "serde_json", - "serde_path_to_error", - "serde_urlencoded", "sync_wrapper", - "tokio", "tower", "tower-layer", "tower-service", - "tracing", ] [[package]] name = "axum-core" -version = "0.5.6" +version = "0.5.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08c78f31d7b1291f7ee735c1c6780ccde7785daae9a9206026862dab7d8792d1" +checksum = "59446ce19cd142f8833f856eb31f3eb097812d1479ab224f54d72428ca21ea22" dependencies = [ "bytes", "futures-core", @@ -66,39 +75,25 @@ dependencies = [ "sync_wrapper", "tower-layer", "tower-service", - "tracing", ] [[package]] -name = "bitflags" -version = "2.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "812e12b5285cc515a9c72a5c1d3b6d46a19dac5acfef5265968c166106e31dd3" - -[[package]] -name = "block-buffer" -version = "0.10.4" +name = "base64" +version = "0.22.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" -dependencies = [ - "generic-array", -] +checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" [[package]] -name = "bstr" -version = "1.12.1" +name = "bitflags" +version = "2.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "63044e1ae8e69f3b5a92c736ca6269b8d12fa7efe39bf34ddb06d102cf0e2cab" -dependencies = [ - "memchr", - "serde", -] +checksum = "812e12b5285cc515a9c72a5c1d3b6d46a19dac5acfef5265968c166106e31dd3" [[package]] name = "bytes" -version = "1.11.1" +version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e748733b7cbc798e1434b6ac524f0c1ff2ab456fe201501e6497c8417a4fc33" +checksum = "b35204fbdc0b3f4446b89fc1ac2cf84a8a68971995d0bf2e925ec7cd960f9cb3" [[package]] name = "cfg-if" @@ -107,64 +102,38 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" [[package]] -name = "cpufeatures" -version = "0.2.17" +name = "either" +version = "1.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "59ed5838eebb26a2bb2e58f6d5b5316989ae9d08bab10e0e6d103e656d1b0280" -dependencies = [ - "libc", -] +checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" [[package]] -name = "crossbeam-deque" -version = "0.8.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51" -dependencies = [ - "crossbeam-epoch", - "crossbeam-utils", -] - -[[package]] -name = "crossbeam-epoch" -version = "0.9.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" -dependencies = [ - "crossbeam-utils", -] - -[[package]] -name = "crossbeam-utils" -version = "0.8.21" +name = "equivalent" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" +checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" [[package]] -name = "crypto-common" -version = "0.1.7" +name = "errno" +version = "0.3.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "78c8292055d1c1df0cce5d180393dc8cce0abec0a7102adb6c7b1eef6016d60a" +checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" dependencies = [ - "generic-array", - "typenum", + "libc", + "windows-sys 0.61.2", ] [[package]] -name = "digest" -version = "0.10.7" +name = "fastrand" +version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" -dependencies = [ - "block-buffer", - "crypto-common", -] +checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" [[package]] -name = "equivalent" -version = "1.0.2" +name = "fixedbitset" +version = "0.5.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" +checksum = "1d674e81391d1e1ab681a28d99df07927c6d4aa5b027d7da16ba32d1d21ecd99" [[package]] name = "fnv" @@ -172,15 +141,6 @@ version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" -[[package]] -name = "form_urlencoded" -version = "1.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cb4cb245038516f5f85277875cdaa4f7d2c9a0fa0468de06ed190163b1581fcf" -dependencies = [ - "percent-encoding", -] - [[package]] name = "futures-channel" version = "0.3.31" @@ -221,44 +181,22 @@ dependencies = [ ] [[package]] -name = "generic-array" -version = "0.14.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" -dependencies = [ - "typenum", - "version_check", -] - -[[package]] -name = "globset" -version = "0.4.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "52dfc19153a48bde0cbd630453615c8151bce3a5adfac7a0aebfbf0a1e1f57e3" -dependencies = [ - "aho-corasick", - "bstr", - "log", - "regex-automata", - "regex-syntax", -] - -[[package]] -name = "globwalk" -version = "0.9.1" +name = "getrandom" +version = "0.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0bf760ebf69878d9fd8f110c89703d90ce35095324d1f1edcb595c63945ee757" +checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd" dependencies = [ - "bitflags", - "ignore", - "walkdir", + "cfg-if", + "libc", + "r-efi", + "wasip2", ] [[package]] name = "h2" -version = "0.4.13" +version = "0.4.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2f44da3a8150a6703ed5d34e164b875fd14c2cdab9af1252a9a1020bde2bdc54" +checksum = "f3c0b69cfcb4e1b9f1bf2f53f95f766e4661169728ec61cd3fe5a0166f2d1386" dependencies = [ "atomic-waker", "bytes", @@ -279,6 +217,12 @@ version = "0.16.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100" +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + [[package]] name = "http" version = "1.4.0" @@ -312,12 +256,6 @@ dependencies = [ "pin-project-lite", ] -[[package]] -name = "http-range-header" -version = "0.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9171a2ea8a68358193d15dd5d70c1c10a2afc3e7e4c5bc92bc9f025cebd7359c" - [[package]] name = "httparse" version = "1.10.1" @@ -330,15 +268,6 @@ version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" -[[package]] -name = "humansize" -version = "2.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6cb51c9a029ddc91b07a787f1d86b53ccfa49b0e86688c946ebe8d3555685dd7" -dependencies = [ - "libm", -] - [[package]] name = "hyper" version = "1.8.1" @@ -359,72 +288,79 @@ dependencies = [ "pin-utils", "smallvec", "tokio", + "want", ] [[package]] -name = "hyper-util" -version = "0.1.20" +name = "hyper-timeout" +version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "96547c2556ec9d12fb1578c4eaf448b04993e7fb79cbaad930a656880a6bdfa0" +checksum = "2b90d566bffbce6a75bd8b09a05aa8c2cb1fabb6cb348f8840c9e4c90a0d83b0" dependencies = [ - "bytes", - "http", - "http-body", "hyper", + "hyper-util", "pin-project-lite", "tokio", "tower-service", ] [[package]] -name = "ignore" -version = "0.4.25" +name = "hyper-util" +version = "0.1.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d3d782a365a015e0f5c04902246139249abf769125006fbe7649e2ee88169b4a" +checksum = "727805d60e7938b76b826a6ef209eb70eaa1812794f9424d4a4e2d740662df5f" dependencies = [ - "crossbeam-deque", - "globset", - "log", - "memchr", - "regex-automata", - "same-file", - "walkdir", - "winapi-util", + "bytes", + "futures-channel", + "futures-core", + "futures-util", + "http", + "http-body", + "hyper", + "libc", + "pin-project-lite", + "socket2", + "tokio", + "tower-service", + "tracing", ] [[package]] name = "indexmap" -version = "2.13.0" +version = "2.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7714e70437a7dc3ac8eb7e6f8df75fd8eb422675fc7678aff7364301092b1017" +checksum = "0ad4bb2b565bca0645f4d68c5c9af97fba094e9791da685bf83cb5f3ce74acf2" dependencies = [ "equivalent", "hashbrown", ] [[package]] -name = "itoa" -version = "1.0.17" +name = "itertools" +version = "0.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2" +checksum = "2b192c782037fadd9cfa75548310488aabdbf3d2da73885b31bd0abd03351285" +dependencies = [ + "either", +] [[package]] -name = "lazy_static" -version = "1.5.0" +name = "itoa" +version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" +checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" [[package]] name = "libc" -version = "0.2.181" +version = "0.2.178" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "459427e2af2b9c839b132acb702a1c654d95e10f8c326bfc2ad11310e458b1c5" +checksum = "37c93d8daa9d8a012fd8ab92f088405fb202ea0b6ab73ee2482ae66af4f42091" [[package]] -name = "libm" -version = "0.2.16" +name = "linux-raw-sys" +version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b6d2cec3eae94f9f509c767b45932f1ada8350c4bdb85af2fcab4a3c14807981" +checksum = "df1d3c3b53da64cf5760482273a98e575c651a67eec7f77df96b5b642de8f039" [[package]] name = "log" @@ -440,9 +376,9 @@ checksum = "47e1ffaa40ddd1f3ed91f717a33c8c0ee23fff369e3aa8772b9605cc1d22f4c3" [[package]] name = "memchr" -version = "2.8.0" +version = "2.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79" +checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273" [[package]] name = "mime" @@ -450,16 +386,6 @@ version = "0.3.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" -[[package]] -name = "mime_guess" -version = "2.0.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f7c44f8e672c00fe5308fa235f821cb4198414e1c77935c1ab6948d3fd78550e" -dependencies = [ - "mime", - "unicase", -] - [[package]] name = "mio" version = "1.1.1" @@ -471,6 +397,12 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "multimap" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d87ecb2933e8aeadb3e3a02b828fed80a7528047e68b4f424523a0981a3a084" + [[package]] name = "once_cell" version = "1.21.3" @@ -484,48 +416,35 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220" [[package]] -name = "pest" -version = "2.8.6" +name = "petgraph" +version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e0848c601009d37dfa3430c4666e147e49cdcf1b92ecd3e63657d8a5f19da662" +checksum = "3672b37090dbd86368a4145bc067582552b29c27377cad4e0a306c97f9bd7772" dependencies = [ - "memchr", - "ucd-trie", + "fixedbitset", + "indexmap", ] [[package]] -name = "pest_derive" -version = "2.8.6" +name = "pin-project" +version = "1.1.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "11f486f1ea21e6c10ed15d5a7c77165d0ee443402f0780849d1768e7d9d6fe77" +checksum = "677f1add503faace112b9f1373e43e9e054bfdd22ff1a63c1bc485eaec6a6a8a" dependencies = [ - "pest", - "pest_generator", + "pin-project-internal", ] [[package]] -name = "pest_generator" -version = "2.8.6" +name = "pin-project-internal" +version = "1.1.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8040c4647b13b210a963c1ed407c1ff4fdfa01c31d6d2a098218702e6664f94f" +checksum = "6e918e4ff8c4549eb882f14b3a4bc8c8bc93de829416eacf579f1207a8fbf861" dependencies = [ - "pest", - "pest_meta", "proc-macro2", "quote", "syn", ] -[[package]] -name = "pest_meta" -version = "2.8.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89815c69d36021a140146f26659a81d6c2afa33d216d736dd4be5381a7362220" -dependencies = [ - "pest", - "sha2", -] - [[package]] name = "pin-project-lite" version = "0.2.16" @@ -539,161 +458,191 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" [[package]] -name = "poc-ui" -version = "0.1.0" +name = "prettyplease" +version = "0.2.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b" dependencies = [ - "axum", - "humansize", - "serde", - "tera", - "tokio", - "tower-http", + "proc-macro2", + "syn", ] [[package]] name = "proc-macro2" -version = "1.0.106" +version = "1.0.103" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934" +checksum = "5ee95bc4ef87b8d5ba32e8b7714ccc834865276eab0aed5c9958d00ec45f49e8" dependencies = [ "unicode-ident", ] [[package]] -name = "quote" -version = "1.0.44" +name = "prost" +version = "0.14.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "21b2ebcf727b7760c461f091f9f0f539b77b8e87f2fd88131e7f1b433b3cece4" +checksum = "7231bd9b3d3d33c86b58adbac74b5ec0ad9f496b19d22801d773636feaa95f3d" dependencies = [ - "proc-macro2", + "bytes", + "prost-derive", ] [[package]] -name = "regex" -version = "1.12.3" +name = "prost-build" +version = "0.14.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e10754a14b9137dd7b1e3e5b0493cc9171fdd105e0ab477f51b72e7f3ac0e276" +checksum = "ac6c3320f9abac597dcbc668774ef006702672474aad53c6d596b62e487b40b1" dependencies = [ - "aho-corasick", - "memchr", - "regex-automata", - "regex-syntax", + "heck", + "itertools", + "log", + "multimap", + "once_cell", + "petgraph", + "prettyplease", + "prost", + "prost-types", + "pulldown-cmark", + "pulldown-cmark-to-cmark", + "regex", + "syn", + "tempfile", ] [[package]] -name = "regex-automata" -version = "0.4.14" +name = "prost-derive" +version = "0.14.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f" +checksum = "9120690fafc389a67ba3803df527d0ec9cbbc9cc45e4cc20b332996dfb672425" dependencies = [ - "aho-corasick", - "memchr", - "regex-syntax", + "anyhow", + "itertools", + "proc-macro2", + "quote", + "syn", ] [[package]] -name = "regex-syntax" -version = "0.8.9" +name = "prost-types" +version = "0.14.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a96887878f22d7bad8a3b6dc5b7440e0ada9a245242924394987b21cf2210a4c" +checksum = "b9b4db3d6da204ed77bb26ba83b6122a73aeb2e87e25fbf7ad2e84c4ccbf8f72" +dependencies = [ + "prost", +] [[package]] -name = "ryu" -version = "1.0.23" +name = "pulldown-cmark" +version = "0.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9774ba4a74de5f7b1c1451ed6cd5285a32eddb5cccb8cc655a4e50009e06477f" +checksum = "1e8bbe1a966bd2f362681a44f6edce3c2310ac21e4d5067a6e7ec396297a6ea0" +dependencies = [ + "bitflags", + "memchr", + "unicase", +] [[package]] -name = "same-file" -version = "1.0.6" +name = "pulldown-cmark-to-cmark" +version = "21.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +checksum = "8246feae3db61428fd0bb94285c690b460e4517d83152377543ca802357785f1" dependencies = [ - "winapi-util", + "pulldown-cmark", ] [[package]] -name = "serde" -version = "1.0.228" +name = "quote" +version = "1.0.42" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" +checksum = "a338cc41d27e6cc6dce6cefc13a0729dfbb81c262b1f519331575dd80ef3067f" dependencies = [ - "serde_core", - "serde_derive", + "proc-macro2", ] [[package]] -name = "serde_core" -version = "1.0.228" +name = "r-efi" +version = "5.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" +checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" + +[[package]] +name = "regex" +version = "1.12.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "843bc0191f75f3e22651ae5f1e72939ab2f72a4bc30fa80a066bd66edefc24d4" dependencies = [ - "serde_derive", + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", ] [[package]] -name = "serde_derive" -version = "1.0.228" +name = "regex-automata" +version = "0.4.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" +checksum = "5276caf25ac86c8d810222b3dbb938e512c55c6831a10f3e6ed1c93b84041f1c" dependencies = [ - "proc-macro2", - "quote", - "syn", + "aho-corasick", + "memchr", + "regex-syntax", ] [[package]] -name = "serde_json" -version = "1.0.149" +name = "regex-syntax" +version = "0.8.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86" +checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58" + +[[package]] +name = "req_packager" +version = "0.1.0" dependencies = [ - "itoa", - "memchr", - "serde", - "serde_core", - "zmij", + "prost", + "tokio", + "tonic", + "tonic-prost", + "tonic-prost-build", ] [[package]] -name = "serde_path_to_error" -version = "0.1.20" +name = "rustix" +version = "1.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "10a9ff822e371bb5403e391ecd83e182e0e77ba7f6fe0160b795797109d1b457" +checksum = "cd15f8a2c5551a84d56efdc1cd049089e409ac19a3072d5037a17fd70719ff3e" dependencies = [ - "itoa", - "serde", - "serde_core", + "bitflags", + "errno", + "libc", + "linux-raw-sys", + "windows-sys 0.61.2", ] [[package]] -name = "serde_urlencoded" -version = "0.7.1" +name = "serde_core" +version = "1.0.228" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d3491c14715ca2294c4d6a88f15e84739788c1d030eed8c110436aafdaa2f3fd" +checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" dependencies = [ - "form_urlencoded", - "itoa", - "ryu", - "serde", + "serde_derive", ] [[package]] -name = "sha2" -version = "0.10.9" +name = "serde_derive" +version = "1.0.228" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283" +checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" dependencies = [ - "cfg-if", - "cpufeatures", - "digest", + "proc-macro2", + "quote", + "syn", ] [[package]] name = "slab" -version = "0.4.12" +version = "0.4.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c790de23124f9ab44544d7ac05d60440adc586479ce501c1d6d7da3cd8c9cf5" +checksum = "7a2ae44ef20feb57a68b23d846850f861394c2e02dc425a50098ae8c90267589" [[package]] name = "smallvec" @@ -703,9 +652,9 @@ checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" [[package]] name = "socket2" -version = "0.6.2" +version = "0.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86f4aa3ad99f2088c990dfa82d367e19cb29268ed67c574d10d0a4bfe71f07e0" +checksum = "17129e116933cf371d018bb80ae557e889637989d8638274fb25622827b03881" dependencies = [ "libc", "windows-sys 0.60.2", @@ -713,9 +662,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.114" +version = "2.0.111" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d4d107df263a3013ef9b1879b0df87d706ff80f65a86ea879bd9c31f9b307c2a" +checksum = "390cc9a294ab71bdb1aa2e99d13be9c753cd2d7bd6560c77118597410c4d2e87" dependencies = [ "proc-macro2", "quote", @@ -729,26 +678,23 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0bf256ce5efdfa370213c1dabab5935a12e49f2c58d15e9eac2870d3b4f27263" [[package]] -name = "tera" -version = "1.20.1" +name = "tempfile" +version = "3.23.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e8004bca281f2d32df3bacd59bc67b312cb4c70cea46cbd79dbe8ac5ed206722" +checksum = "2d31c77bdf42a745371d260a26ca7163f1e0924b64afa0b688e61b5a9fa02f16" dependencies = [ - "globwalk", - "lazy_static", - "pest", - "pest_derive", - "regex", - "serde", - "serde_json", - "unicode-segmentation", + "fastrand", + "getrandom", + "once_cell", + "rustix", + "windows-sys 0.61.2", ] [[package]] name = "tokio" -version = "1.49.0" +version = "1.48.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72a2903cd7736441aac9df9d7688bd0ce48edccaadf181c3b90be801e81d3d86" +checksum = "ff360e02eab121e0bc37a2d3b4d4dc622e6eda3a8e5253d5435ecf5bd4c68408" dependencies = [ "bytes", "libc", @@ -770,11 +716,22 @@ dependencies = [ "syn", ] +[[package]] +name = "tokio-stream" +version = "0.1.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eca58d7bba4a75707817a2c44174253f9236b2d5fbd055602e9d5c07c139a047" +dependencies = [ + "futures-core", + "pin-project-lite", + "tokio", +] + [[package]] name = "tokio-util" -version = "0.7.18" +version = "0.7.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ae9cec805b01e8fc3fd2fe289f89149a9b66dd16786abd8b19cfa7b48cb0098" +checksum = "2efa149fe76073d6e8fd97ef4f4eca7b67f599660115591483572e406e165594" dependencies = [ "bytes", "futures-core", @@ -784,40 +741,85 @@ dependencies = [ ] [[package]] -name = "tower" -version = "0.5.3" +name = "tonic" +version = "0.14.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ebe5ef63511595f1344e2d5cfa636d973292adc0eec1f0ad45fae9f0851ab1d4" +checksum = "eb7613188ce9f7df5bfe185db26c5814347d110db17920415cf2fbcad85e7203" dependencies = [ - "futures-core", - "futures-util", - "pin-project-lite", + "async-trait", + "axum", + "base64", + "bytes", + "h2", + "http", + "http-body", + "http-body-util", + "hyper", + "hyper-timeout", + "hyper-util", + "percent-encoding", + "pin-project", + "socket2", "sync_wrapper", "tokio", + "tokio-stream", + "tower", "tower-layer", "tower-service", "tracing", ] [[package]] -name = "tower-http" -version = "0.6.8" +name = "tonic-build" +version = "0.14.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d4e6559d53cc268e5031cd8429d05415bc4cb4aefc4aa5d6cc35fbf5b924a1f8" +checksum = "4c40aaccc9f9eccf2cd82ebc111adc13030d23e887244bc9cfa5d1d636049de3" +dependencies = [ + "prettyplease", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "tonic-prost" +version = "0.14.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "66bd50ad6ce1252d87ef024b3d64fe4c3cf54a86fb9ef4c631fdd0ded7aeaa67" dependencies = [ - "bitflags", "bytes", + "prost", + "tonic", +] + +[[package]] +name = "tonic-prost-build" +version = "0.14.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4a16cba4043dc3ff43fcb3f96b4c5c154c64cbd18ca8dce2ab2c6a451d058a2" +dependencies = [ + "prettyplease", + "proc-macro2", + "prost-build", + "prost-types", + "quote", + "syn", + "tempfile", + "tonic-build", +] + +[[package]] +name = "tower" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d039ad9159c98b70ecfd540b2573b97f7f52c3e8d9f8ad57a24b916a536975f9" +dependencies = [ "futures-core", "futures-util", - "http", - "http-body", - "http-body-util", - "http-range-header", - "httpdate", - "mime", - "mime_guess", - "percent-encoding", + "indexmap", "pin-project-lite", + "slab", + "sync_wrapper", "tokio", "tokio-util", "tower-layer", @@ -839,68 +841,60 @@ checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3" [[package]] name = "tracing" -version = "0.1.44" +version = "0.1.43" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "63e71662fa4b2a2c3a26f570f037eb95bb1f85397f3cd8076caed2f026a6d100" +checksum = "2d15d90a0b5c19378952d479dc858407149d7bb45a14de0142f6c534b16fc647" dependencies = [ - "log", "pin-project-lite", + "tracing-attributes", "tracing-core", ] [[package]] -name = "tracing-core" -version = "0.1.36" +name = "tracing-attributes" +version = "0.1.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "db97caf9d906fbde555dd62fa95ddba9eecfd14cb388e4f491a66d74cd5fb79a" +checksum = "7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da" dependencies = [ - "once_cell", + "proc-macro2", + "quote", + "syn", ] [[package]] -name = "typenum" -version = "1.19.0" +name = "tracing-core" +version = "0.1.35" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "562d481066bde0658276a35467c4af00bdc6ee726305698a55b86e61d7ad82bb" +checksum = "7a04e24fab5c89c6a36eb8558c9656f30d81de51dfa4d3b45f26b21d61fa0a6c" +dependencies = [ + "once_cell", +] [[package]] -name = "ucd-trie" -version = "0.1.7" +name = "try-lock" +version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2896d95c02a80c6d6a5d6e953d479f5ddf2dfdb6a244441010e373ac0fb88971" +checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" [[package]] name = "unicase" -version = "2.9.0" +version = "2.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dbc4bc3a9f746d862c45cb89d705aa10f187bb96c76001afab07a0d35ce60142" +checksum = "75b844d17643ee918803943289730bec8aac480150456169e647ed0b576ba539" [[package]] name = "unicode-ident" -version = "1.0.23" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "537dd038a89878be9b64dd4bd1b260315c1bb94f4d784956b81e27a088d9a09e" - -[[package]] -name = "unicode-segmentation" -version = "1.12.0" +version = "1.0.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493" +checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5" [[package]] -name = "version_check" -version = "0.9.5" +name = "want" +version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" - -[[package]] -name = "walkdir" -version = "2.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" +checksum = "bfa7760aed19e106de2c7c0b581b509f2f25d3dacaf737cb82ac61bc6d760b0e" dependencies = [ - "same-file", - "winapi-util", + "try-lock", ] [[package]] @@ -910,12 +904,12 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" [[package]] -name = "winapi-util" -version = "0.1.11" +name = "wasip2" +version = "1.0.1+wasi-0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" +checksum = "0562428422c63773dad2c345a1882263bbf4d65cf3f42e90921f787ef5ad58e7" dependencies = [ - "windows-sys 0.61.2", + "wit-bindgen", ] [[package]] @@ -1008,7 +1002,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650" [[package]] -name = "zmij" -version = "1.0.20" +name = "wit-bindgen" +version = "0.46.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4de98dfa5d5b7fef4ee834d0073d560c9ca7b6c46a71d058c48db7960f8cfaf7" +checksum = "f17a85883d4e6d00e8a97c586de764dabcc06133f7f1d55dce5cdc070ad7fe59" diff --git a/Cargo.toml b/Cargo.toml index e401bd3..c37d3f1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,10 +4,19 @@ name = "poc-ui" version = "0.1.0" edition = "2021" +[[bin]] +name = "rp_server" +path = "src/server.rs" + +[[bin]] +name = "rp_client" +path = "src/client.rs" + [dependencies] -axum = { version = "0.8.8", features = ["tokio", "http2"] } -tokio = { version = "1.49.0", features = ["rt", "rt-multi-thread"] } -tower-http = { version = "0.6.8", features = ["fs"] } -tera = { version = "1", default-features = false } -serde = { version = "1.0.228", features = ["derive"] } -humansize = "2.1.3" +prost = "0.14.1" +tokio = { version = "1.48.0", features = ["macros", "rt-multi-thread"] } +tonic = "0.14.2" +tonic-prost = "0.14.2" + +[build-dependencies] +tonic-prost-build = "*" diff --git a/build.rs b/build.rs new file mode 100644 index 0000000..4b6501c --- /dev/null +++ b/build.rs @@ -0,0 +1,4 @@ +fn main() -> Result<(), Box> { + tonic_prost_build::compile_protos("proto/req_packager.proto")?; + Ok(()) +} diff --git a/devenv.nix b/devenv.nix index c9a3227..0014d50 100644 --- a/devenv.nix +++ b/devenv.nix @@ -14,13 +14,14 @@ packages = [ pkgs.git pkgs.protobuf_32 + pkgs.grpcurl ]; # https://devenv.sh/languages/ languages.rust = { enable = true; channel = "stable"; - version = "1.81.0"; + version = "1.82.0"; components = [ "rustc" "cargo" diff --git a/proto/req_packager.proto b/proto/req_packager.proto new file mode 100644 index 0000000..7c20e1b --- /dev/null +++ b/proto/req_packager.proto @@ -0,0 +1,14 @@ +syntax = "proto3"; +package req_packager; + +service Greeter { + rpc SayHello (HelloRequest) returns (HelloReply); +} + +message HelloRequest { + string name = 1; +} + +message HelloReply { + string message = 1; +} diff --git a/src/client.rs b/src/client.rs new file mode 100644 index 0000000..f7781bc --- /dev/null +++ b/src/client.rs @@ -0,0 +1,19 @@ +pub mod req_packager { + tonic::include_proto!("req_packager"); +} + +use req_packager::{greeter_client::GreeterClient, HelloRequest}; + +#[tokio::main] +async fn main() -> Result<(), Box> { + let mut client = GreeterClient::connect("http://[::1]:50051").await?; + + let request = tonic::Request::new(HelloRequest { + name: "Tonic".into(), + }); + let response = client.say_hello(request).await?; + + println!("RESPONSE={response:?}"); + + Ok(()) +} diff --git a/src/server.rs b/src/server.rs new file mode 100644 index 0000000..8f0b315 --- /dev/null +++ b/src/server.rs @@ -0,0 +1,39 @@ +pub mod req_packager { + tonic::include_proto!("req_packager"); +} + +use req_packager::{ + greeter_server::{Greeter, GreeterServer}, + HelloReply, HelloRequest, +}; +use tonic::{transport::Server, Request, Response, Status}; + +#[derive(Debug, Default)] +pub struct MyGreeter {} + +#[tonic::async_trait] +impl Greeter for MyGreeter { + async fn say_hello( + &self, + request: Request, + ) -> Result, Status> { + println!("Got a request: {request:?}"); + + let reply = HelloReply { + message: format!("Hello {}!", request.into_inner().name), + }; + + Ok(Response::new(reply)) + } +} + +#[tokio::main] +async fn main() -> Result<(), Box> { + let addr = "[::1]:50051".parse()?; + let greeter = MyGreeter::default(); + Server::builder() + .add_service(GreeterServer::new(greeter)) + .serve(addr) + .await?; + Ok(()) +} From 04ea46f2e76efa5e9d00726261bb913a4127ca1c Mon Sep 17 00:00:00 2001 From: Jusong Yu Date: Fri, 19 Dec 2025 14:41:40 +0100 Subject: [PATCH 03/14] dataset service protobuf --- Cargo.lock | 2 + Cargo.toml | 2 + proto/req_packager.proto | 118 ++++++++++++++++++++++++++++++++++++--- src/client.rs | 20 ++++--- src/server.rs | 92 +++++++++++++++++++++++++----- 5 files changed, 206 insertions(+), 28 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 48accfc..b1c1fff 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -599,7 +599,9 @@ name = "req_packager" version = "0.1.0" dependencies = [ "prost", + "prost-types", "tokio", + "tokio-stream", "tonic", "tonic-prost", "tonic-prost-build", diff --git a/Cargo.toml b/Cargo.toml index c37d3f1..c47148a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,7 +14,9 @@ path = "src/client.rs" [dependencies] prost = "0.14.1" +prost-types = "0.14.1" tokio = { version = "1.48.0", features = ["macros", "rt-multi-thread"] } +tokio-stream = "0.1.17" tonic = "0.14.2" tonic-prost = "0.14.2" diff --git a/proto/req_packager.proto b/proto/req_packager.proto index 7c20e1b..d9487a9 100644 --- a/proto/req_packager.proto +++ b/proto/req_packager.proto @@ -1,14 +1,118 @@ syntax = "proto3"; -package req_packager; -service Greeter { - rpc SayHello (HelloRequest) returns (HelloReply); +package req_packager.v1; + +import "google/protobuf/timestamp.proto"; +import "google/protobuf/struct.proto"; + +service DatasetService { + // Lazily retrieve file hierarchy or file info for a dataset + rpc BrowseDataset(BrowseDatasetRequest) + returns (stream BrowseDatasetResponse); +} + +message BrowseDatasetRequest { + // Data repo identifier by url (opaque to client) + string datarepo_url = 1; + // Dataset identifier (opaque to client) + string dataset_id = 2; +} + +message BrowseDatasetResponse { + // Server MUST send dataset_info as first message at INIT phase + // Server MUST not send any more messages after COMPLETE phase + enum BrowsePhase { + PHASE_UNSPECIFIED = 0; + PHASE_INIT = 1; + PHASE_BROWSING = 2; + PHASE_COMPLETED = 3; + } + + BrowsePhase phase = 1; + + oneof event { + // summury or basic metadata + DatasetInfo dataset_info = 2; + // entry entity of file + FileEntry file_entry = 3; + // progress for how much is done already + BrowseProgress progress = 4; + // if any error happing when browsing + BrowseError error = 5; + // complete signal + BrowseComplete complete = 6; + } } -message HelloRequest { - string name = 1; +message DatasetInfo { + string repo_url = 1; + string dataset_id = 2; + string description = 3; + + optional uint64 total_files = 4; + optional uint64 total_size_bytes = 5; + + google.protobuf.Timestamp created_at = 6; + google.protobuf.Timestamp updated_at = 7; + + map tags = 8; } -message HelloReply { - string message = 1; +// structure partially borrow from unix file handler +message FileEntry { + // abs path, root from dataset + string path = 1; + // basename + string name = 2; + // no matter basename or path ended with '/' or not, this is the only source of truth on + // it is a file or dir + bool is_dir = 3; + + // size in bytes + uint64 size_bytes = 4; + // mime_type, unset if unknown + optional string mime_type = 5; + // checksum, sha256 + optional string checksum = 6; + + // latest time the file is modified + google.protobuf.Timestamp modified_at = 7; +} + +message BrowseProgress { + uint64 files_scanned = 1; + uint64 bytes_scanned = 2; + + // 0-100 + uint32 percent = 3; + // path of file in processing + string path = 4; +} + +message BrowseError { + enum ErrorCode { + // UNKNOWN at the moment cover most of errors, more specific error type will be added + // when it is clear on which errors might happen. + UNKNOWN = 0; + // DATASET is not available, for example, havester create the entry but the data repo might offline etc. + INVALID_DATASET = 1; + // IO_ERROR + // TIMEOUT (for single file or for one dataset??) + } + + ErrorCode code = 1; + string message = 2; + + // path of file under processing where error raised + string path = 3; + // if fatal bail the browse process, otherwise can continue on other files. + bool fatal = 4; +} + +message BrowseComplete { + uint64 total_files = 1; + uint64 total_size_bytes = 2; + + bool success = 3; + google.protobuf.Timestamp finish_at = 4; } diff --git a/src/client.rs b/src/client.rs index f7781bc..da941ab 100644 --- a/src/client.rs +++ b/src/client.rs @@ -1,19 +1,25 @@ pub mod req_packager { - tonic::include_proto!("req_packager"); + tonic::include_proto!("req_packager.v1"); } -use req_packager::{greeter_client::GreeterClient, HelloRequest}; +use req_packager::{dataset_service_client::DatasetServiceClient, BrowseDatasetRequest}; #[tokio::main] async fn main() -> Result<(), Box> { - let mut client = GreeterClient::connect("http://[::1]:50051").await?; + let mut client = DatasetServiceClient::connect("http://[::1]:50051").await?; - let request = tonic::Request::new(HelloRequest { - name: "Tonic".into(), + // made up repo url and dataset id, should be mocked for test + let datarepo_url = "http://onedata.com".to_string(); + let dataset_id = "xxx-pid".to_string(); + let request = tonic::Request::new(BrowseDatasetRequest { + datarepo_url, + dataset_id, }); - let response = client.say_hello(request).await?; - println!("RESPONSE={response:?}"); + let mut stream = client.browse_dataset(request).await?.into_inner(); + while let Some(resp) = stream.message().await? { + println!("resp: {:?}", resp); + } Ok(()) } diff --git a/src/server.rs b/src/server.rs index 8f0b315..0de5d3b 100644 --- a/src/server.rs +++ b/src/server.rs @@ -1,38 +1,102 @@ pub mod req_packager { - tonic::include_proto!("req_packager"); + tonic::include_proto!("req_packager.v1"); } +use prost_types::Timestamp; +use std::{collections::HashMap, pin::Pin}; +use tokio::sync::mpsc; +use tokio_stream::{wrappers::ReceiverStream, Stream}; + use req_packager::{ - greeter_server::{Greeter, GreeterServer}, - HelloReply, HelloRequest, + browse_dataset_response::{BrowsePhase, Event}, + dataset_service_server::{DatasetService, DatasetServiceServer}, + BrowseComplete, BrowseDatasetRequest, BrowseDatasetResponse, DatasetInfo, }; use tonic::{transport::Server, Request, Response, Status}; #[derive(Debug, Default)] -pub struct MyGreeter {} +pub struct Packager { + // TODO: source of tool-registry, mocked by a JSON, in production can be just tool-registry + // API call address. + // TODO: source of type-registry, mocked by a JSON + // TODO: source of data repositories, mocked by a sqlite, the arch here not clear, should this + // all behind the filemetrix? Or get from filemetrix (seems better because I don't want RP + // tangled directly with DB, it is good to have operations behind filemetrix and this is one of + // the roles filemetrix need to play) the basic info and query from DB after? +} +// XXX: the logic and transport mixed here, I need to have a DatasetBrowser for the inner browse +// logic, then I can do the same no matter for filemetrix, or self directy service, or mocked test. #[tonic::async_trait] -impl Greeter for MyGreeter { - async fn say_hello( +impl DatasetService for Packager { + type BrowseDatasetStream = ReceiverStream>; + + /// browse dataset through filemetrix API calls. + /// XXX: I am expecting more than what filemetrix can provide. + /// I mock those functionalities here and request filemetrix to have thoes implemneted. + /// I need a service to downlead files for quick assessing (like a caching, caching <100k files). + async fn browse_dataset( &self, - request: Request, - ) -> Result, Status> { + request: Request, + ) -> Result, Status> { println!("Got a request: {request:?}"); + let (tx, rx) = mpsc::channel(16); + + tokio::spawn(async move { + // INIT Phase + let req = request.get_ref(); + let repo_url = &req.datarepo_url; + let id = &req.dataset_id; + + // TODO: make an API call using url + id (or just PID based on the API of filemetrix) to the filemetrix + let dataset_info = DatasetInfo { + // mock all fields, they are from filemetrix API call. + repo_url: repo_url.to_string(), + dataset_id: id.to_string(), + description: "example01".to_string(), + total_files: None, + total_size_bytes: None, + created_at: Some(Timestamp::default()), + updated_at: Some(Timestamp::default()), + tags: HashMap::new(), + }; + tx.send(Ok(BrowseDatasetResponse { + phase: BrowsePhase::PhaseInit as i32, + event: Some(Event::DatasetInfo(dataset_info)), + })) + .await + .ok(); + + // TODO: Browsing - let reply = HelloReply { - message: format!("Hello {}!", request.into_inner().name), - }; + // COMPLETED + tx.send(Ok(BrowseDatasetResponse { + phase: BrowsePhase::PhaseCompleted as i32, + event: Some(Event::Complete(BrowseComplete { + total_files: 100, + total_size_bytes: 100, + success: true, + finish_at: None, + })), + })) + .await + .ok(); + }); - Ok(Response::new(reply)) + Ok(Response::new(ReceiverStream::new(rx))) } } #[tokio::main] async fn main() -> Result<(), Box> { let addr = "[::1]:50051".parse()?; - let greeter = MyGreeter::default(); + // XXX: when new type/tool added, do I want to reload the packager in the memory? + // pro: tool/type-registry is more static and based on their are less updated, query is faster + // (however there is not too much query needed, just index visiting). + // con: the packager need to be initialized, how freq it happens to take latest list? + let packager = Packager::default(); Server::builder() - .add_service(GreeterServer::new(greeter)) + .add_service(DatasetServiceServer::new(packager)) .serve(addr) .await?; Ok(()) From 098a700244cbdd5aaa41babb8baaf72564d99591 Mon Sep 17 00:00:00 2001 From: Jusong Yu Date: Mon, 22 Dec 2025 16:20:41 +0100 Subject: [PATCH 04/14] vre types and data structure prototype --- Cargo.lock | 30 ++++++++++++++++++ Cargo.toml | 2 ++ src/lib.rs | 89 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 121 insertions(+) create mode 100644 src/lib.rs diff --git a/Cargo.lock b/Cargo.lock index b1c1fff..a78d05a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -600,6 +600,8 @@ version = "0.1.0" dependencies = [ "prost", "prost-types", + "serde", + "serde_json", "tokio", "tokio-stream", "tonic", @@ -620,6 +622,21 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "ryu" +version = "1.0.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62049b2877bf12821e8f9ad256ee38fdc31db7387ec2d3b3f403024de2034aea" + +[[package]] +name = "serde" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" +dependencies = [ + "serde_core", +] + [[package]] name = "serde_core" version = "1.0.228" @@ -640,6 +657,19 @@ dependencies = [ "syn", ] +[[package]] +name = "serde_json" +version = "1.0.146" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "217ca874ae0207aac254aa02c957ded05585a90892cc8d87f9e5fa49669dadd8" +dependencies = [ + "itoa", + "memchr", + "ryu", + "serde", + "serde_core", +] + [[package]] name = "slab" version = "0.4.11" diff --git a/Cargo.toml b/Cargo.toml index c47148a..bff8709 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,6 +15,8 @@ path = "src/client.rs" [dependencies] prost = "0.14.1" prost-types = "0.14.1" +serde = "1.0.228" +serde_json = "1.0.146" tokio = { version = "1.48.0", features = ["macros", "rt-multi-thread"] } tokio-stream = "0.1.17" tonic = "0.14.2" diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..97dd1a4 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,89 @@ +use std::path::PathBuf; + +// FIXME: look at EC2 etc, to have a better list of required fields +#[derive(Debug)] +struct EnvResource { + num_cpu: u32, + num_ram: u64, +} + +/// Config for how to launch the VRE, these are specifically for e.g. `.binder`. +/// The resource description is independent of this config. +/// The request packager do not (should not??, but if tool-registry also strong typed, maybe I can +/// constructed the type easily here??) know the exact format of the config. The format is +/// encoded in the tool-registry and know b +/// TODO: if the overall architecture and tech stack can not change (ask Enol whether he want to +/// uptake the grpc in more broad scope in dispacher and tool-registry). Otherwise, check if +/// RO-crate can provide such level of schema check. +#[derive(Debug)] +struct Config { + inner: serde_json::Value, +} + +#[derive(Debug)] +enum VirtualResearchEnv { + // tool that opened inline in the page. + EoscInline { + tool_id: String, + file: PathBuf, + }, + + // tool that redirect to 3rd-party site with the selected files + // such tools are very lightweight and do not need to specify resources. + BrowserNative { + tool_id: String, + files: Vec, + }, + + // tool that need VM resources and have resources attached (e.g. RRP, Galaxy) + Hosted { + tool_id: String, + config: Option, + files: Vec, + }, + + // (planned): + // Hosted but allow to allocating using EOSC resources. + HostedWithBuiltInRes { + tool_id: String, + config: Option, + files: Vec, + res: EnvResource, + }, + + // (planned): + // Hosted but allow to asking for tools that provide resourecs. + HostedWithPluginRes { + tool_id: String, + config: Option, + res_id: String, + files: Vec, + res: EnvResource, + }, +} + +// TODO: have a protobuf defined for the VirtualResearchEnv and mapping conversion here +// +// impl From for VirtualResearchEnv { +// fn from(value: proto::VirtualResearchEnv) -> Self { +// match value { +// => +// => +// => +// => +// } +// } +// } + +// server side call this function to assemble a payload that can send to downstream dispacher +// XXX: the return type is a very generic json, I probably want a crate to handle ro-crate +// specificly. +fn assemble_vre_request(vre: &VirtualResearchEnv) -> serde_json::Value { + match vre { + VirtualResearchEnv::EoscInline { tool_id, file } => todo!(), + VirtualResearchEnv::BrowserNative { tool_id, files } => todo!(), + VirtualResearchEnv::Hosted { tool_id, config, files } => todo!(), + VirtualResearchEnv::HostedWithBuiltInRes { tool_id, config, files, res } => todo!(), + VirtualResearchEnv::HostedWithPluginRes { tool_id, config, res_id, files, res } => todo!(), + } +} From bf2018675130dec9e595ed833de13d17b201b4e1 Mon Sep 17 00:00:00 2001 From: Jusong Yu Date: Mon, 5 Jan 2026 16:59:02 +0100 Subject: [PATCH 05/14] mock the filemetrix client --- Cargo.lock | 2 + Cargo.toml | 2 + proto/req_packager.proto | 33 ++++--- src/server.rs | 186 ++++++++++++++++++++++++++++++++++----- 4 files changed, 186 insertions(+), 37 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a78d05a..e7d3d40 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -598,6 +598,8 @@ checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58" name = "req_packager" version = "0.1.0" dependencies = [ + "anyhow", + "async-trait", "prost", "prost-types", "serde", diff --git a/Cargo.toml b/Cargo.toml index bff8709..ab3fa72 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,6 +13,8 @@ name = "rp_client" path = "src/client.rs" [dependencies] +anyhow = "1.0.100" +async-trait = "0.1.89" prost = "0.14.1" prost-types = "0.14.1" serde = "1.0.228" diff --git a/proto/req_packager.proto b/proto/req_packager.proto index d9487a9..d3821c8 100644 --- a/proto/req_packager.proto +++ b/proto/req_packager.proto @@ -31,9 +31,9 @@ message BrowseDatasetResponse { BrowsePhase phase = 1; oneof event { - // summury or basic metadata + // summury or basic metadata, send once at PHASE_INIT DatasetInfo dataset_info = 2; - // entry entity of file + // entry entity of file, send at PHASE_BROWSING FileEntry file_entry = 3; // progress for how much is done already BrowseProgress progress = 4; @@ -60,25 +60,30 @@ message DatasetInfo { // structure partially borrow from unix file handler message FileEntry { - // abs path, root from dataset + // abs path, root from dataset, include the basename. string path = 1; - // basename - string name = 2; // no matter basename or path ended with '/' or not, this is the only source of truth on // it is a file or dir - bool is_dir = 3; - + bool is_dir = 2; // size in bytes - uint64 size_bytes = 4; + uint64 size_bytes = 3; // mime_type, unset if unknown - optional string mime_type = 5; + optional string mime_type = 4; // checksum, sha256 - optional string checksum = 6; + optional string checksum = 5; // latest time the file is modified - google.protobuf.Timestamp modified_at = 7; + google.protobuf.Timestamp modified_at = 6; +} + +message FileChunk { + // which file this chunk belongs to, abs path, root from dataset, include the basename. + string path = 1; + // actual content bytes + bytes data = 2; } + message BrowseProgress { uint64 files_scanned = 1; uint64 bytes_scanned = 2; @@ -86,7 +91,7 @@ message BrowseProgress { // 0-100 uint32 percent = 3; // path of file in processing - string path = 4; + optional string path = 4; } message BrowseError { @@ -96,6 +101,8 @@ message BrowseError { UNKNOWN = 0; // DATASET is not available, for example, havester create the entry but the data repo might offline etc. INVALID_DATASET = 1; + UNAVAILABLE_FILEMETRIX = 2; + UNAVAILABLE_FILE = 3; // IO_ERROR // TIMEOUT (for single file or for one dataset??) } @@ -104,7 +111,7 @@ message BrowseError { string message = 2; // path of file under processing where error raised - string path = 3; + optional string path = 3; // if fatal bail the browse process, otherwise can continue on other files. bool fatal = 4; } diff --git a/src/server.rs b/src/server.rs index 0de5d3b..34d14b6 100644 --- a/src/server.rs +++ b/src/server.rs @@ -3,18 +3,71 @@ pub mod req_packager { } use prost_types::Timestamp; -use std::{collections::HashMap, pin::Pin}; +use std::{ + collections::HashMap, + sync::Arc, + time::{SystemTime, UNIX_EPOCH}, +}; use tokio::sync::mpsc; -use tokio_stream::{wrappers::ReceiverStream, Stream}; +use tokio_stream::wrappers::ReceiverStream; -use req_packager::{ +use crate::req_packager::{ browse_dataset_response::{BrowsePhase, Event}, + browse_error::ErrorCode, dataset_service_server::{DatasetService, DatasetServiceServer}, - BrowseComplete, BrowseDatasetRequest, BrowseDatasetResponse, DatasetInfo, + BrowseComplete, BrowseDatasetRequest, BrowseDatasetResponse, BrowseError, DatasetInfo, + FileEntry, }; use tonic::{transport::Server, Request, Response, Status}; -#[derive(Debug, Default)] +fn current_timestamp() -> Timestamp { + let now = SystemTime::now() + .duration_since(UNIX_EPOCH) + .expect("Time went backwards"); + Timestamp { + seconds: now.as_secs().cast_signed(), + nanos: now.subsec_nanos().cast_signed(), + } +} + +#[async_trait::async_trait] +trait FilemetrixClient: Send + Sync + 'static { + // get dataset information + async fn get_dataset_info(&self, repo_url: &str, id: &str) -> anyhow::Result; + // list files in the dataset + async fn list_files(&self, repo_url: &str, id: &str) -> anyhow::Result>; +} + +struct MockFilemetrixClient {} + +impl MockFilemetrixClient { + fn new() -> Self { + MockFilemetrixClient {} + } +} + +#[async_trait::async_trait] +impl FilemetrixClient for MockFilemetrixClient { + async fn get_dataset_info(&self, repo_url: &str, id: &str) -> anyhow::Result { + let dataset_info = DatasetInfo { + // mock all fields, they are from filemetrix API call. + repo_url: repo_url.to_string(), + dataset_id: id.to_string(), + description: "example01".to_string(), + total_files: None, + total_size_bytes: None, + created_at: Some(Timestamp::default()), + updated_at: Some(Timestamp::default()), + tags: HashMap::new(), + }; + Ok(dataset_info) + } + + async fn list_files(&self, repo_url: &str, id: &str) -> anyhow::Result> { + todo!() + } +} + pub struct Packager { // TODO: source of tool-registry, mocked by a JSON, in production can be just tool-registry // API call address. @@ -23,10 +76,12 @@ pub struct Packager { // all behind the filemetrix? Or get from filemetrix (seems better because I don't want RP // tangled directly with DB, it is good to have operations behind filemetrix and this is one of // the roles filemetrix need to play) the basic info and query from DB after? + filemetrix: Arc, } // XXX: the logic and transport mixed here, I need to have a DatasetBrowser for the inner browse // logic, then I can do the same no matter for filemetrix, or self directy service, or mocked test. +#[allow(clippy::too_many_lines)] #[tonic::async_trait] impl DatasetService for Packager { type BrowseDatasetStream = ReceiverStream>; @@ -41,6 +96,7 @@ impl DatasetService for Packager { ) -> Result, Status> { println!("Got a request: {request:?}"); let (tx, rx) = mpsc::channel(16); + let filemetrix_client = Arc::clone(&self.filemetrix); tokio::spawn(async move { // INIT Phase @@ -48,35 +104,112 @@ impl DatasetService for Packager { let repo_url = &req.datarepo_url; let id = &req.dataset_id; - // TODO: make an API call using url + id (or just PID based on the API of filemetrix) to the filemetrix - let dataset_info = DatasetInfo { - // mock all fields, they are from filemetrix API call. - repo_url: repo_url.to_string(), - dataset_id: id.to_string(), - description: "example01".to_string(), - total_files: None, - total_size_bytes: None, - created_at: Some(Timestamp::default()), - updated_at: Some(Timestamp::default()), - tags: HashMap::new(), + let dataset_info = match filemetrix_client.get_dataset_info(repo_url, id).await { + Ok(info) => info, + Err(err) => { + let err = BrowseError { + code: ErrorCode::UnavailableFilemetrix as i32, + message: format!("unable to get dataset info of url: {repo_url} - id: {id}, because of filemetrix error: {err}"), + path: None, + fatal: true, + }; + tx.send(Ok(BrowseDatasetResponse { + phase: BrowsePhase::PhaseInit as i32, + event: Some(Event::Error(err)), + })) + .await + .ok(); + + return; + } }; tx.send(Ok(BrowseDatasetResponse { phase: BrowsePhase::PhaseInit as i32, - event: Some(Event::DatasetInfo(dataset_info)), + event: Some(Event::DatasetInfo(dataset_info.clone())), + })) + .await + .ok(); + + tx.send(Ok(BrowseDatasetResponse { + phase: BrowsePhase::PhaseBrowsing as i32, + event: Some(Event::Progress(req_packager::BrowseProgress { + files_scanned: 0, + bytes_scanned: 0, + percent: 0, + path: None, + })), })) .await .ok(); - // TODO: Browsing + // Browsing, keep on sending file info of the dataset asynchronously + let files = match filemetrix_client.list_files(repo_url, id).await { + Ok(files) => files, + Err(err) => { + let err = BrowseError { + code: ErrorCode::UnavailableFilemetrix as i32, + message: format!("unable to list files url: {repo_url} - id: {id}, because of filemetrix error: {err}"), + path: None, + fatal: true, + }; + tx.send(Ok(BrowseDatasetResponse { + phase: BrowsePhase::PhaseInit as i32, + event: Some(Event::Error(err)), + })) + .await + .ok(); + + return; + } + }; + + let mut files_count = 0; + let mut bytes_count = 0; + // TODO: I may want to have pagination to at most showing 100 entries by default. + // I need then have sever wait for incomming message to continue, bilateral required + // and input needs to be a stream. + for file in files { + let filepath = file.path.clone(); + let sizebytes = file.size_bytes; + if let Err(err) = tx + .send(Ok(BrowseDatasetResponse { + phase: BrowsePhase::PhaseBrowsing as i32, + event: Some(Event::FileEntry(file)), + })) + .await + { + let err = BrowseError { + code: ErrorCode::UnavailableFile as i32, + message: format!("unable to send file: {repo_url} - id: {id} - file: {filepath} to client, because of: {err}"), + path: None, + fatal: true, + }; + tx.send(Ok(BrowseDatasetResponse { + phase: BrowsePhase::PhaseInit as i32, + event: Some(Event::Error(err)), + })) + .await + .ok(); + } else { + files_count += 1; + bytes_count += sizebytes; + }; + + // TODO: further operations include: + // 1. file download. + // 2. relay file to the VREs. + } + + let success = files_count == dataset_info.total_files() + && bytes_count == dataset_info.total_size_bytes(); - // COMPLETED tx.send(Ok(BrowseDatasetResponse { phase: BrowsePhase::PhaseCompleted as i32, event: Some(Event::Complete(BrowseComplete { - total_files: 100, - total_size_bytes: 100, - success: true, - finish_at: None, + total_files: files_count, + total_size_bytes: bytes_count, + success, + finish_at: Some(current_timestamp()), })), })) .await @@ -94,7 +227,12 @@ async fn main() -> Result<(), Box> { // pro: tool/type-registry is more static and based on their are less updated, query is faster // (however there is not too much query needed, just index visiting). // con: the packager need to be initialized, how freq it happens to take latest list? - let packager = Packager::default(); + // + let filemetrix_client = Arc::new(MockFilemetrixClient::new()); + let packager = Packager { + filemetrix: filemetrix_client, + }; + Server::builder() .add_service(DatasetServiceServer::new(packager)) .serve(addr) From 1805938e0ada967f8e800dcbd444986536b7790a Mon Sep 17 00:00:00 2001 From: Jusong Yu Date: Mon, 5 Jan 2026 17:58:25 +0100 Subject: [PATCH 06/14] progress on file listing --- proto/req_packager.proto | 15 +++++++++++++++ src/server.rs | 22 ++++++++++++++++++++-- 2 files changed, 35 insertions(+), 2 deletions(-) diff --git a/proto/req_packager.proto b/proto/req_packager.proto index d3821c8..ea74ff5 100644 --- a/proto/req_packager.proto +++ b/proto/req_packager.proto @@ -5,12 +5,27 @@ package req_packager.v1; import "google/protobuf/timestamp.proto"; import "google/protobuf/struct.proto"; +// service related to dataset, and anything talk to filemetrix service DatasetService { // Lazily retrieve file hierarchy or file info for a dataset rpc BrowseDataset(BrowseDatasetRequest) returns (stream BrowseDatasetResponse); + + // rpc DownloadFile(DownloadFileRequest) + // returns (stream FileChunk); } +// service ToolService { +// rpc BrowseTools(BrowseToolsRequest) +// returns (stream BrowseToolsResponse); +// } + +// // get decisios from client to assemble the crate to dispatcher +// service AssembleService { +// rpc AssembleCrate(AssembleCrateRequest) +// returns (AssembleCrateResponse); +// } + message BrowseDatasetRequest { // Data repo identifier by url (opaque to client) string datarepo_url = 1; diff --git a/src/server.rs b/src/server.rs index 34d14b6..a830587 100644 --- a/src/server.rs +++ b/src/server.rs @@ -178,6 +178,7 @@ impl DatasetService for Packager { })) .await { + // Err let err = BrowseError { code: ErrorCode::UnavailableFile as i32, message: format!("unable to send file: {repo_url} - id: {id} - file: {filepath} to client, because of: {err}"), @@ -191,13 +192,30 @@ impl DatasetService for Packager { .await .ok(); } else { + // Ok files_count += 1; bytes_count += sizebytes; + tx.send(Ok(BrowseDatasetResponse { + phase: BrowsePhase::PhaseBrowsing as i32, + event: Some(Event::Progress(req_packager::BrowseProgress { + files_scanned: files_count, + bytes_scanned: bytes_count, + #[allow(clippy::cast_possible_truncation)] + percent: (files_count / dataset_info.total_files() * 100) as u32, + path: None, + })), + })) + .await + .ok(); }; // TODO: further operations include: - // 1. file download. - // 2. relay file to the VREs. + // 1. file download, provide here? yes and calling scanning for mime-type and + // checksum automatically if the file is small (this rely on the file size must + // know beforehead). + // 3. mime type deduct?? should this purely be the responsibility of filemetrix?? + // (yes here) + // 2. relay file to the VREs? in a separated step? (in the seprated step) } let success = files_count == dataset_info.total_files() From e3014fe8f27a87e75f3a09d08cac13d64ddcca1b Mon Sep 17 00:00:00 2001 From: Jusong Yu Date: Tue, 6 Jan 2026 10:33:26 +0100 Subject: [PATCH 07/14] Assemble service + rename to url_** and id_** for consistency --- proto/req_packager.proto | 98 +++++++++++++++++++++++++++++----------- src/client.rs | 8 ++-- src/server.rs | 26 +++++------ 3 files changed, 88 insertions(+), 44 deletions(-) diff --git a/proto/req_packager.proto b/proto/req_packager.proto index ea74ff5..2b2f8f0 100644 --- a/proto/req_packager.proto +++ b/proto/req_packager.proto @@ -5,6 +5,24 @@ package req_packager.v1; import "google/protobuf/timestamp.proto"; import "google/protobuf/struct.proto"; +// structure partially borrow from unix file handler +message FileEntry { + // abs path, root from dataset, include the basename. + string path = 1; + // no matter basename or path ended with '/' or not, this is the only source of truth on + // it is a file or dir + bool is_dir = 2; + // size in bytes + uint64 size_bytes = 3; + // mime_type, unset if unknown + optional string mime_type = 4; + // checksum, sha256 + optional string checksum = 5; + + // latest time the file is modified + google.protobuf.Timestamp modified_at = 6; +} + // service related to dataset, and anything talk to filemetrix service DatasetService { // Lazily retrieve file hierarchy or file info for a dataset @@ -20,17 +38,61 @@ service DatasetService { // returns (stream BrowseToolsResponse); // } -// // get decisios from client to assemble the crate to dispatcher -// service AssembleService { -// rpc AssembleCrate(AssembleCrateRequest) -// returns (AssembleCrateResponse); -// } +// get decisios from client to assemble the crate to dispatcher +service AssembleService { + rpc PackageAssemble(PackageAssembleRequest) + returns (PackageAssembleResponse); +} + +// RFC 004 +enum VreTyp { + // Browser inline tool provided by Eosc + EoscInline = 0; + // Hosted Vre where resources are provided by the vre provider + Hosted = 1; +} + +message VreEntry { + string id_vre = 1; + VreTyp vre_type = 2; +} + + +// TODO: didn't cover the case that VRE require config files from user e.g. `.binder` +message PackageAssembleRequest { + // vre entry + VreEntry vre_entry = 1; + // file entries, list of files selected and passed from client + repeated FileEntry file_entries = 2; +} + +message VreHostedResp { + string tool_id = 1; + string url_callback = 2; + string version = 3; + // TODO: may need configuration, which can be a config file from request +} + +// information for client to go to assets server to get the tool and launch +message VreEoscInlineResp { + string tool_id = 1; + string url_entrypoint = 2; + string version = 3; +} + + +message PackageAssembleResponse { + oneof vre_resp { + VreEoscInlineResp eosc_inline = 1; + VreHostedResp hosted_inline = 2; + } +} message BrowseDatasetRequest { // Data repo identifier by url (opaque to client) - string datarepo_url = 1; + string url_datarepo = 1; // Dataset identifier (opaque to client) - string dataset_id = 2; + string id_dataset = 2; } message BrowseDatasetResponse { @@ -60,8 +122,8 @@ message BrowseDatasetResponse { } message DatasetInfo { - string repo_url = 1; - string dataset_id = 2; + string url_datarepo = 1; + string id_dataset = 2; string description = 3; optional uint64 total_files = 4; @@ -73,24 +135,6 @@ message DatasetInfo { map tags = 8; } -// structure partially borrow from unix file handler -message FileEntry { - // abs path, root from dataset, include the basename. - string path = 1; - // no matter basename or path ended with '/' or not, this is the only source of truth on - // it is a file or dir - bool is_dir = 2; - // size in bytes - uint64 size_bytes = 3; - // mime_type, unset if unknown - optional string mime_type = 4; - // checksum, sha256 - optional string checksum = 5; - - // latest time the file is modified - google.protobuf.Timestamp modified_at = 6; -} - message FileChunk { // which file this chunk belongs to, abs path, root from dataset, include the basename. string path = 1; diff --git a/src/client.rs b/src/client.rs index da941ab..18ddbf8 100644 --- a/src/client.rs +++ b/src/client.rs @@ -9,11 +9,11 @@ async fn main() -> Result<(), Box> { let mut client = DatasetServiceClient::connect("http://[::1]:50051").await?; // made up repo url and dataset id, should be mocked for test - let datarepo_url = "http://onedata.com".to_string(); - let dataset_id = "xxx-pid".to_string(); + let url_datarepo = "http://onedata.com".to_string(); + let id_dataset = "xxx-pid".to_string(); let request = tonic::Request::new(BrowseDatasetRequest { - datarepo_url, - dataset_id, + url_datarepo, + id_dataset, }); let mut stream = client.browse_dataset(request).await?.into_inner(); diff --git a/src/server.rs b/src/server.rs index a830587..eab5189 100644 --- a/src/server.rs +++ b/src/server.rs @@ -33,9 +33,9 @@ fn current_timestamp() -> Timestamp { #[async_trait::async_trait] trait FilemetrixClient: Send + Sync + 'static { // get dataset information - async fn get_dataset_info(&self, repo_url: &str, id: &str) -> anyhow::Result; + async fn get_dataset_info(&self, url_datarepo: &str, id: &str) -> anyhow::Result; // list files in the dataset - async fn list_files(&self, repo_url: &str, id: &str) -> anyhow::Result>; + async fn list_files(&self, url_datarepo: &str, id: &str) -> anyhow::Result>; } struct MockFilemetrixClient {} @@ -48,11 +48,11 @@ impl MockFilemetrixClient { #[async_trait::async_trait] impl FilemetrixClient for MockFilemetrixClient { - async fn get_dataset_info(&self, repo_url: &str, id: &str) -> anyhow::Result { + async fn get_dataset_info(&self, url_datarepo: &str, id: &str) -> anyhow::Result { let dataset_info = DatasetInfo { // mock all fields, they are from filemetrix API call. - repo_url: repo_url.to_string(), - dataset_id: id.to_string(), + url_datarepo: url_datarepo.to_string(), + id_dataset: id.to_string(), description: "example01".to_string(), total_files: None, total_size_bytes: None, @@ -63,7 +63,7 @@ impl FilemetrixClient for MockFilemetrixClient { Ok(dataset_info) } - async fn list_files(&self, repo_url: &str, id: &str) -> anyhow::Result> { + async fn list_files(&self, url_datarepo: &str, id: &str) -> anyhow::Result> { todo!() } } @@ -101,15 +101,15 @@ impl DatasetService for Packager { tokio::spawn(async move { // INIT Phase let req = request.get_ref(); - let repo_url = &req.datarepo_url; - let id = &req.dataset_id; + let url_datarepo = &req.url_datarepo; + let id = &req.id_dataset; - let dataset_info = match filemetrix_client.get_dataset_info(repo_url, id).await { + let dataset_info = match filemetrix_client.get_dataset_info(url_datarepo, id).await { Ok(info) => info, Err(err) => { let err = BrowseError { code: ErrorCode::UnavailableFilemetrix as i32, - message: format!("unable to get dataset info of url: {repo_url} - id: {id}, because of filemetrix error: {err}"), + message: format!("unable to get dataset info of url: {url_datarepo} - id: {id}, because of filemetrix error: {err}"), path: None, fatal: true, }; @@ -143,12 +143,12 @@ impl DatasetService for Packager { .ok(); // Browsing, keep on sending file info of the dataset asynchronously - let files = match filemetrix_client.list_files(repo_url, id).await { + let files = match filemetrix_client.list_files(url_datarepo, id).await { Ok(files) => files, Err(err) => { let err = BrowseError { code: ErrorCode::UnavailableFilemetrix as i32, - message: format!("unable to list files url: {repo_url} - id: {id}, because of filemetrix error: {err}"), + message: format!("unable to list files url: {url_datarepo} - id: {id}, because of filemetrix error: {err}"), path: None, fatal: true, }; @@ -181,7 +181,7 @@ impl DatasetService for Packager { // Err let err = BrowseError { code: ErrorCode::UnavailableFile as i32, - message: format!("unable to send file: {repo_url} - id: {id} - file: {filepath} to client, because of: {err}"), + message: format!("unable to send file: {url_datarepo} - id: {id} - file: {filepath} to client, because of: {err}"), path: None, fatal: true, }; From 4a05f9b616acee6b788a2db0016082d930b74c7e Mon Sep 17 00:00:00 2001 From: Jusong Yu Date: Tue, 6 Jan 2026 12:35:26 +0100 Subject: [PATCH 08/14] add scaffold for tool registry --- src/server.rs | 66 ++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 58 insertions(+), 8 deletions(-) diff --git a/src/server.rs b/src/server.rs index eab5189..0b0b055 100644 --- a/src/server.rs +++ b/src/server.rs @@ -12,11 +12,12 @@ use tokio::sync::mpsc; use tokio_stream::wrappers::ReceiverStream; use crate::req_packager::{ + assemble_service_server::{AssembleService, AssembleServiceServer}, browse_dataset_response::{BrowsePhase, Event}, browse_error::ErrorCode, dataset_service_server::{DatasetService, DatasetServiceServer}, BrowseComplete, BrowseDatasetRequest, BrowseDatasetResponse, BrowseError, DatasetInfo, - FileEntry, + FileEntry, PackageAssembleRequest, PackageAssembleResponse, }; use tonic::{transport::Server, Request, Response, Status}; @@ -68,7 +69,7 @@ impl FilemetrixClient for MockFilemetrixClient { } } -pub struct Packager { +pub struct DataRepoRelayer { // TODO: source of tool-registry, mocked by a JSON, in production can be just tool-registry // API call address. // TODO: source of type-registry, mocked by a JSON @@ -83,7 +84,7 @@ pub struct Packager { // logic, then I can do the same no matter for filemetrix, or self directy service, or mocked test. #[allow(clippy::too_many_lines)] #[tonic::async_trait] -impl DatasetService for Packager { +impl DatasetService for DataRepoRelayer { type BrowseDatasetStream = ReceiverStream>; /// browse dataset through filemetrix API calls. @@ -238,6 +239,53 @@ impl DatasetService for Packager { } } +struct ToolInfo {} +struct ToolEntry {} + +#[async_trait::async_trait] +trait ToolRegistryClient: Send + Sync + 'static { + // get tool info by id + async fn get_tool_info(&self, id: &str) -> anyhow::Result; + // list tools in the registry, fine to return a Vec store in the ram can handle 10,000 entries. + async fn list_tools(&self) -> anyhow::Result>; +} + +struct MockToolRegistryClient {} + +impl MockToolRegistryClient { + fn new() -> Self { + MockToolRegistryClient {} + } +} + +#[async_trait::async_trait] +impl ToolRegistryClient for MockToolRegistryClient { + async fn get_tool_info(&self, id: &str) -> anyhow::Result { + todo!() + } + async fn list_tools(&self) -> anyhow::Result> { + todo!() + } +} + +pub struct ReqPackAssembler { + tool_registry: Arc, +} + +#[tonic::async_trait] +impl AssembleService for ReqPackAssembler { + async fn package_assemble( + &self, + request: Request, + ) -> Result, Status> { + println!("Got a request: {request:?}"); + tokio::spawn(async move { + // tool from tool registry and validate + }); + todo!(); + } +} + #[tokio::main] async fn main() -> Result<(), Box> { let addr = "[::1]:50051".parse()?; @@ -246,13 +294,15 @@ async fn main() -> Result<(), Box> { // (however there is not too much query needed, just index visiting). // con: the packager need to be initialized, how freq it happens to take latest list? // - let filemetrix_client = Arc::new(MockFilemetrixClient::new()); - let packager = Packager { - filemetrix: filemetrix_client, - }; + let filemetrix = Arc::new(MockFilemetrixClient::new()); + let relayer = DataRepoRelayer { filemetrix }; + + let tool_registry = Arc::new(MockToolRegistryClient::new()); + let assembler = ReqPackAssembler { tool_registry }; Server::builder() - .add_service(DatasetServiceServer::new(packager)) + .add_service(DatasetServiceServer::new(relayer)) + .add_service(AssembleServiceServer::new(assembler)) .serve(addr) .await?; Ok(()) From 38144e8f206c93a33ecacf7b3ca1b7a52a252d09 Mon Sep 17 00:00:00 2001 From: Jusong Yu Date: Tue, 6 Jan 2026 17:07:11 +0100 Subject: [PATCH 09/14] mock dispatcher --- Cargo.lock | 266 +++++++++++++++++++++++++++++++++++++++ Cargo.toml | 1 + proto/req_packager.proto | 21 ++-- src/lib.rs | 45 ++++--- src/server.rs | 123 ++++++++++++++---- 5 files changed, 405 insertions(+), 51 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index e7d3d40..fe1bc3b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -101,6 +101,17 @@ version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" +[[package]] +name = "displaydoc" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "either" version = "1.15.0" @@ -141,6 +152,15 @@ version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" +[[package]] +name = "form_urlencoded" +version = "1.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb4cb245038516f5f85277875cdaa4f7d2c9a0fa0468de06ed190163b1581fcf" +dependencies = [ + "percent-encoding", +] + [[package]] name = "futures-channel" version = "0.3.31" @@ -325,6 +345,108 @@ dependencies = [ "tracing", ] +[[package]] +name = "icu_collections" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c6b649701667bbe825c3b7e6388cb521c23d88644678e83c0c4d0a621a34b43" +dependencies = [ + "displaydoc", + "potential_utf", + "yoke", + "zerofrom", + "zerovec", +] + +[[package]] +name = "icu_locale_core" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "edba7861004dd3714265b4db54a3c390e880ab658fec5f7db895fae2046b5bb6" +dependencies = [ + "displaydoc", + "litemap", + "tinystr", + "writeable", + "zerovec", +] + +[[package]] +name = "icu_normalizer" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f6c8828b67bf8908d82127b2054ea1b4427ff0230ee9141c54251934ab1b599" +dependencies = [ + "icu_collections", + "icu_normalizer_data", + "icu_properties", + "icu_provider", + "smallvec", + "zerovec", +] + +[[package]] +name = "icu_normalizer_data" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7aedcccd01fc5fe81e6b489c15b247b8b0690feb23304303a9e560f37efc560a" + +[[package]] +name = "icu_properties" +version = "2.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "020bfc02fe870ec3a66d93e677ccca0562506e5872c650f893269e08615d74ec" +dependencies = [ + "icu_collections", + "icu_locale_core", + "icu_properties_data", + "icu_provider", + "zerotrie", + "zerovec", +] + +[[package]] +name = "icu_properties_data" +version = "2.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "616c294cf8d725c6afcd8f55abc17c56464ef6211f9ed59cccffe534129c77af" + +[[package]] +name = "icu_provider" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85962cf0ce02e1e0a629cc34e7ca3e373ce20dda4c4d7294bbd0bf1fdb59e614" +dependencies = [ + "displaydoc", + "icu_locale_core", + "writeable", + "yoke", + "zerofrom", + "zerotrie", + "zerovec", +] + +[[package]] +name = "idna" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b0875f23caa03898994f6ddc501886a45c7d3d62d04d2d90788d47be1b1e4de" +dependencies = [ + "idna_adapter", + "smallvec", + "utf8_iter", +] + +[[package]] +name = "idna_adapter" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3acae9609540aa318d1bc588455225fb2085b9ed0c4f6bd0d9d5bcd86f1a0344" +dependencies = [ + "icu_normalizer", + "icu_properties", +] + [[package]] name = "indexmap" version = "2.12.1" @@ -362,6 +484,12 @@ version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "df1d3c3b53da64cf5760482273a98e575c651a67eec7f77df96b5b642de8f039" +[[package]] +name = "litemap" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6373607a59f0be73a39b6fe456b8192fcc3585f602af20751600e974dd455e77" + [[package]] name = "log" version = "0.4.29" @@ -457,6 +585,15 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" +[[package]] +name = "potential_utf" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b73949432f5e2a09657003c25bca5e19a0e9c84f8058ca374f49e0ebe605af77" +dependencies = [ + "zerovec", +] + [[package]] name = "prettyplease" version = "0.2.37" @@ -609,6 +746,7 @@ dependencies = [ "tonic", "tonic-prost", "tonic-prost-build", + "url", ] [[package]] @@ -694,6 +832,12 @@ dependencies = [ "windows-sys 0.60.2", ] +[[package]] +name = "stable_deref_trait" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596" + [[package]] name = "syn" version = "2.0.111" @@ -711,6 +855,17 @@ version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0bf256ce5efdfa370213c1dabab5935a12e49f2c58d15e9eac2870d3b4f27263" +[[package]] +name = "synstructure" +version = "0.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "tempfile" version = "3.23.0" @@ -724,6 +879,16 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "tinystr" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42d3e9c45c09de15d06dd8acf5f4e0e399e85927b7f00711024eb7ae10fa4869" +dependencies = [ + "displaydoc", + "zerovec", +] + [[package]] name = "tokio" version = "1.48.0" @@ -922,6 +1087,24 @@ version = "1.0.22" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5" +[[package]] +name = "url" +version = "2.5.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff67a8a4397373c3ef660812acab3268222035010ab8680ec4215f38ba3d0eed" +dependencies = [ + "form_urlencoded", + "idna", + "percent-encoding", + "serde", +] + +[[package]] +name = "utf8_iter" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" + [[package]] name = "want" version = "0.3.1" @@ -1040,3 +1223,86 @@ name = "wit-bindgen" version = "0.46.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f17a85883d4e6d00e8a97c586de764dabcc06133f7f1d55dce5cdc070ad7fe59" + +[[package]] +name = "writeable" +version = "0.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9edde0db4769d2dc68579893f2306b26c6ecfbe0ef499b013d731b7b9247e0b9" + +[[package]] +name = "yoke" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72d6e5c6afb84d73944e5cedb052c4680d5657337201555f9f2a16b7406d4954" +dependencies = [ + "stable_deref_trait", + "yoke-derive", + "zerofrom", +] + +[[package]] +name = "yoke-derive" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b659052874eb698efe5b9e8cf382204678a0086ebf46982b79d6ca3182927e5d" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "synstructure", +] + +[[package]] +name = "zerofrom" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "50cc42e0333e05660c3587f3bf9d0478688e15d870fab3346451ce7f8c9fbea5" +dependencies = [ + "zerofrom-derive", +] + +[[package]] +name = "zerofrom-derive" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "synstructure", +] + +[[package]] +name = "zerotrie" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a59c17a5562d507e4b54960e8569ebee33bee890c70aa3fe7b97e85a9fd7851" +dependencies = [ + "displaydoc", + "yoke", + "zerofrom", +] + +[[package]] +name = "zerovec" +version = "0.11.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c28719294829477f525be0186d13efa9a3c602f7ec202ca9e353d310fb9a002" +dependencies = [ + "yoke", + "zerofrom", + "zerovec-derive", +] + +[[package]] +name = "zerovec-derive" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eadce39539ca5cb3985590102671f2567e659fca9666581ad3411d59207951f3" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] diff --git a/Cargo.toml b/Cargo.toml index ab3fa72..520810b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -23,6 +23,7 @@ tokio = { version = "1.48.0", features = ["macros", "rt-multi-thread"] } tokio-stream = "0.1.17" tonic = "0.14.2" tonic-prost = "0.14.2" +url = "2.5.8" [build-dependencies] tonic-prost-build = "*" diff --git a/proto/req_packager.proto b/proto/req_packager.proto index 2b2f8f0..521ee96 100644 --- a/proto/req_packager.proto +++ b/proto/req_packager.proto @@ -54,19 +54,22 @@ enum VreTyp { message VreEntry { string id_vre = 1; - VreTyp vre_type = 2; + string version = 2; + oneof vre { + VreEoscInline vre_eosc_inline = 3; + VreHosted vre_hosted = 4; + } } - // TODO: didn't cover the case that VRE require config files from user e.g. `.binder` message PackageAssembleRequest { - // vre entry - VreEntry vre_entry = 1; + // vre entry id + string id_vre = 1; // file entries, list of files selected and passed from client repeated FileEntry file_entries = 2; } -message VreHostedResp { +message VreHosted { string tool_id = 1; string url_callback = 2; string version = 3; @@ -74,18 +77,14 @@ message VreHostedResp { } // information for client to go to assets server to get the tool and launch -message VreEoscInlineResp { +message VreEoscInline { string tool_id = 1; string url_entrypoint = 2; string version = 3; } - message PackageAssembleResponse { - oneof vre_resp { - VreEoscInlineResp eosc_inline = 1; - VreHostedResp hosted_inline = 2; - } + VreEntry vre_entry = 1; } message BrowseDatasetRequest { diff --git a/src/lib.rs b/src/lib.rs index 97dd1a4..a2505ec 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -13,7 +13,7 @@ struct EnvResource { /// constructed the type easily here??) know the exact format of the config. The format is /// encoded in the tool-registry and know b /// TODO: if the overall architecture and tech stack can not change (ask Enol whether he want to -/// uptake the grpc in more broad scope in dispacher and tool-registry). Otherwise, check if +/// uptake the grpc in more broad scope in dispacher and tool-registry). Otherwise, check if /// RO-crate can provide such level of schema check. #[derive(Debug)] struct Config { @@ -21,23 +21,23 @@ struct Config { } #[derive(Debug)] -enum VirtualResearchEnv { +pub enum VirtualResearchEnv { // tool that opened inline in the page. EoscInline { - tool_id: String, + id: String, file: PathBuf, }, // tool that redirect to 3rd-party site with the selected files // such tools are very lightweight and do not need to specify resources. BrowserNative { - tool_id: String, + id: String, files: Vec, }, // tool that need VM resources and have resources attached (e.g. RRP, Galaxy) Hosted { - tool_id: String, + id: String, config: Option, files: Vec, }, @@ -45,7 +45,7 @@ enum VirtualResearchEnv { // (planned): // Hosted but allow to allocating using EOSC resources. HostedWithBuiltInRes { - tool_id: String, + id: String, config: Option, files: Vec, res: EnvResource, @@ -54,7 +54,7 @@ enum VirtualResearchEnv { // (planned): // Hosted but allow to asking for tools that provide resourecs. HostedWithPluginRes { - tool_id: String, + id: String, config: Option, res_id: String, files: Vec, @@ -67,23 +67,34 @@ enum VirtualResearchEnv { // impl From for VirtualResearchEnv { // fn from(value: proto::VirtualResearchEnv) -> Self { // match value { -// => -// => -// => -// => +// => +// => +// => +// => // } // } // } -// server side call this function to assemble a payload that can send to downstream dispacher +// server side call this function to assemble a payload that can send to downstream dispacher // XXX: the return type is a very generic json, I probably want a crate to handle ro-crate // specificly. fn assemble_vre_request(vre: &VirtualResearchEnv) -> serde_json::Value { match vre { - VirtualResearchEnv::EoscInline { tool_id, file } => todo!(), - VirtualResearchEnv::BrowserNative { tool_id, files } => todo!(), - VirtualResearchEnv::Hosted { tool_id, config, files } => todo!(), - VirtualResearchEnv::HostedWithBuiltInRes { tool_id, config, files, res } => todo!(), - VirtualResearchEnv::HostedWithPluginRes { tool_id, config, res_id, files, res } => todo!(), + VirtualResearchEnv::EoscInline { id, file } => todo!(), + VirtualResearchEnv::BrowserNative { id, files } => todo!(), + VirtualResearchEnv::Hosted { id, config, files } => todo!(), + VirtualResearchEnv::HostedWithBuiltInRes { + id, + config, + files, + res, + } => todo!(), + VirtualResearchEnv::HostedWithPluginRes { + id, + config, + res_id, + files, + res, + } => todo!(), } } diff --git a/src/server.rs b/src/server.rs index 0b0b055..b15a687 100644 --- a/src/server.rs +++ b/src/server.rs @@ -1,6 +1,16 @@ -pub mod req_packager { +pub mod req_packager_rpc { tonic::include_proto!("req_packager.v1"); } +use crate::req_packager_rpc::{ + assemble_service_server::{AssembleService, AssembleServiceServer}, + browse_dataset_response::{BrowsePhase, Event}, + browse_error::ErrorCode, + dataset_service_server::{DatasetService, DatasetServiceServer}, + vre_entry::Vre, + BrowseComplete, BrowseDatasetRequest, BrowseDatasetResponse, BrowseError, DatasetInfo, + FileEntry, PackageAssembleRequest, PackageAssembleResponse, VreEntry, VreEoscInline, VreHosted, + VreTyp, +}; use prost_types::Timestamp; use std::{ @@ -10,16 +20,10 @@ use std::{ }; use tokio::sync::mpsc; use tokio_stream::wrappers::ReceiverStream; - -use crate::req_packager::{ - assemble_service_server::{AssembleService, AssembleServiceServer}, - browse_dataset_response::{BrowsePhase, Event}, - browse_error::ErrorCode, - dataset_service_server::{DatasetService, DatasetServiceServer}, - BrowseComplete, BrowseDatasetRequest, BrowseDatasetResponse, BrowseError, DatasetInfo, - FileEntry, PackageAssembleRequest, PackageAssembleResponse, -}; use tonic::{transport::Server, Request, Response, Status}; +use url::Url; + +use req_packager::VirtualResearchEnv; fn current_timestamp() -> Timestamp { let now = SystemTime::now() @@ -133,7 +137,7 @@ impl DatasetService for DataRepoRelayer { tx.send(Ok(BrowseDatasetResponse { phase: BrowsePhase::PhaseBrowsing as i32, - event: Some(Event::Progress(req_packager::BrowseProgress { + event: Some(Event::Progress(req_packager_rpc::BrowseProgress { files_scanned: 0, bytes_scanned: 0, percent: 0, @@ -198,7 +202,7 @@ impl DatasetService for DataRepoRelayer { bytes_count += sizebytes; tx.send(Ok(BrowseDatasetResponse { phase: BrowsePhase::PhaseBrowsing as i32, - event: Some(Event::Progress(req_packager::BrowseProgress { + event: Some(Event::Progress(req_packager_rpc::BrowseProgress { files_scanned: files_count, bytes_scanned: bytes_count, #[allow(clippy::cast_possible_truncation)] @@ -239,15 +243,12 @@ impl DatasetService for DataRepoRelayer { } } -struct ToolInfo {} -struct ToolEntry {} - #[async_trait::async_trait] trait ToolRegistryClient: Send + Sync + 'static { // get tool info by id - async fn get_tool_info(&self, id: &str) -> anyhow::Result; + async fn get_tool(&self, id: &str) -> anyhow::Result; // list tools in the registry, fine to return a Vec store in the ram can handle 10,000 entries. - async fn list_tools(&self) -> anyhow::Result>; + async fn list_tools(&self) -> anyhow::Result>; } struct MockToolRegistryClient {} @@ -260,10 +261,43 @@ impl MockToolRegistryClient { #[async_trait::async_trait] impl ToolRegistryClient for MockToolRegistryClient { - async fn get_tool_info(&self, id: &str) -> anyhow::Result { + async fn get_tool(&self, id: &str) -> anyhow::Result { todo!() } - async fn list_tools(&self) -> anyhow::Result> { + async fn list_tools(&self) -> anyhow::Result> { + todo!() + } +} + +// this is supposed to be the ro-crate that contain all information to launch the vre with required +// data pointers, so dispatcher or vre (depends on design of the dispatcher) can access the data +// without the needs to store data in the middleware. +struct LaunchReq { + // blob: Type + id_vre: String, + files: Vec, +} + +struct InfoRequest {} + +#[async_trait::async_trait] +trait DispatcherClient: Send + Sync + 'static { + // list all vre requests and their status + async fn check_user_requests(&self, id_user: String) -> anyhow::Result>; + // launch a vre with the launch request, return the callback url when it is ready + async fn launch(&self, p: LaunchReq) -> anyhow::Result; +} + +struct MockDispatcherClient {} + +#[async_trait::async_trait] +impl DispatcherClient for MockDispatcherClient { + async fn check_user_requests(&self, id_user: String) -> anyhow::Result> { + todo!() + } + + // launch a vre with the launch request, return the callback url when it is ready + async fn launch(&self, p: LaunchReq) -> anyhow::Result { todo!() } } @@ -279,10 +313,53 @@ impl AssembleService for ReqPackAssembler { request: Request, ) -> Result, Status> { println!("Got a request: {request:?}"); - tokio::spawn(async move { - // tool from tool registry and validate - }); - todo!(); + let tool_registry = Arc::clone(&self.tool_registry); + + // tool from tool registry and validate + let req = request.get_ref(); + let id_vre = &req.id_vre; + let files = &req.file_entries; + + let tool = tool_registry.get_tool(id_vre).await.map_err(|e| { + // convert anyhow error to tonic status + println!("Failed to get tool from registry: {e:?}"); + Status::internal(format!("Failed to get tool from registry: {e}")) + })?; + + // TODO: assemble an ro-crate and send to dispatcher and get back the required vre callback + match tool { + VirtualResearchEnv::EoscInline { .. } => { + // check file number and simply relay (because I use same data structure for the + // tool registry api call) the entry to the client + + // Inline tool only support passing one file, there might be use cases the tool + // processes multiple files, but impl that when the case comes. + if files.len() != 1 { + let err_msg = format!( + "inline tool only processes on one file, get: {}", + files.len() + ); + // TODO: proper tracing log + println!("{err_msg}"); + return Err(Status::internal(err_msg)); + } + + // vre that not through dispatcher. + let resp = PackageAssembleResponse { + vre_entry: Some(vre_entry), + }; + Ok(Response::new(resp)) + } + VirtualResearchEnv::Hosted { .. } => { + // assamble a package and send to dispatcher that return a callback url + + // TODO: can check if the quota reached, users should not allowed to launch + // infinit amount of vres (avoiding ddos). + + todo!() + } + _ => unimplemented!(), + } } } From 5f2a3cc69a1427f16275f872851a36f3a7e4e74f Mon Sep 17 00:00:00 2001 From: Jusong Yu Date: Mon, 12 Jan 2026 13:31:08 +0100 Subject: [PATCH 10/14] refac: move mock impl to bin's main and trait in lib --- proto/req_packager.proto | 35 ++-- src/client.rs | 6 +- src/lib.rs | 442 ++++++++++++++++++++++++++++++++++++--- src/server.rs | 369 +++++++------------------------- 4 files changed, 500 insertions(+), 352 deletions(-) diff --git a/proto/req_packager.proto b/proto/req_packager.proto index 521ee96..09e065f 100644 --- a/proto/req_packager.proto +++ b/proto/req_packager.proto @@ -52,12 +52,27 @@ enum VreTyp { Hosted = 1; } +// respose from assembler for client to redirect to the launched vre +message VreHosted { + string url_callback = 1; + // TODO: may need configuration, which can be a config file from request +} + +// information for client to go to assets server to get the tool and launch +message VreEoscInline { + string url_callback = 1; + // support single file to open with inline tool + FileEntry file_entry = 2; +} + + +// this is what response to client about the vre entity it can utilize. message VreEntry { string id_vre = 1; string version = 2; - oneof vre { - VreEoscInline vre_eosc_inline = 3; - VreHosted vre_hosted = 4; + oneof entry_point { + VreEoscInline eosc_inline = 3; + VreHosted hosted = 4; } } @@ -69,20 +84,6 @@ message PackageAssembleRequest { repeated FileEntry file_entries = 2; } -message VreHosted { - string tool_id = 1; - string url_callback = 2; - string version = 3; - // TODO: may need configuration, which can be a config file from request -} - -// information for client to go to assets server to get the tool and launch -message VreEoscInline { - string tool_id = 1; - string url_entrypoint = 2; - string version = 3; -} - message PackageAssembleResponse { VreEntry vre_entry = 1; } diff --git a/src/client.rs b/src/client.rs index 18ddbf8..0e63857 100644 --- a/src/client.rs +++ b/src/client.rs @@ -1,8 +1,4 @@ -pub mod req_packager { - tonic::include_proto!("req_packager.v1"); -} - -use req_packager::{dataset_service_client::DatasetServiceClient, BrowseDatasetRequest}; +use req_packager::grpc::{dataset_service_client::DatasetServiceClient, BrowseDatasetRequest}; #[tokio::main] async fn main() -> Result<(), Box> { diff --git a/src/lib.rs b/src/lib.rs index a2505ec..ee34f02 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,4 +1,391 @@ -use std::path::PathBuf; +pub mod grpc { + tonic::include_proto!("req_packager.v1"); +} + +use grpc::{ + assemble_service_server::AssembleService, + browse_dataset_response::{BrowsePhase, Event}, + browse_error::ErrorCode, + dataset_service_server::DatasetService, + vre_entry::EntryPoint, + BrowseComplete, BrowseDatasetRequest, BrowseDatasetResponse, BrowseError, BrowseProgress, + DatasetInfo, FileEntry, PackageAssembleRequest, PackageAssembleResponse, VreEntry, + VreEoscInline, VreHosted, +}; + +use prost_types::Timestamp; +use std::{ + path::PathBuf, + sync::Arc, + time::{SystemTime, UNIX_EPOCH}, +}; +use tokio::sync::mpsc; +use tokio_stream::wrappers::ReceiverStream; +use tonic::{Request, Response, Status}; +use url::Url; + +fn current_timestamp() -> Timestamp { + let now = SystemTime::now() + .duration_since(UNIX_EPOCH) + .expect("Time went backwards"); + Timestamp { + seconds: now.as_secs().cast_signed(), + nanos: now.subsec_nanos().cast_signed(), + } +} + +#[async_trait::async_trait] +pub trait FilemetrixClient: Send + Sync + 'static { + // get dataset information + async fn get_dataset_info(&self, url_datarepo: &str, id: &str) -> anyhow::Result; + // list files in the dataset + async fn list_files(&self, url_datarepo: &str, id: &str) -> anyhow::Result>; +} + +#[derive(Debug)] +struct Dataset { + // XXX: I don't want to couple the grpc logic with business logic, so I need real type for both + // datasetinfo and fileentry. + info: DatasetInfo, + files: Vec, +} + +pub struct DataRepoRelayer { + // TODO: source of tool-registry, mocked by a JSON, in production can be just tool-registry + // API call address. + // TODO: source of type-registry, mocked by a JSON + // TODO: source of data repositories, mocked by a sqlite, the arch here not clear, should this + // all behind the filemetrix? Or get from filemetrix (seems better because I don't want RP + // tangled directly with DB, it is good to have operations behind filemetrix and this is one of + // the roles filemetrix need to play) the basic info and query from DB after? + filemetrix: Arc, +} + +impl DataRepoRelayer { + pub fn new(filemetrix: Arc) -> Self { + Self { filemetrix } + } +} + +// XXX: the logic and transport mixed here, I need to have a DatasetBrowser for the inner browse +// logic, then I can do the same no matter for filemetrix, or self directy service, or mocked test. +#[allow(clippy::too_many_lines)] +#[tonic::async_trait] +impl DatasetService for DataRepoRelayer { + type BrowseDatasetStream = ReceiverStream>; + + /// browse dataset through filemetrix API calls. + /// XXX: I am expecting more than what filemetrix can provide. + /// I mock those functionalities here and request filemetrix to have thoes implemneted. + /// I need a service to downlead files for quick assessing (like a caching, caching <100k files). + async fn browse_dataset( + &self, + request: Request, + ) -> Result, Status> { + println!("Got a request: {request:?}"); + let (tx, rx) = mpsc::channel(16); + let filemetrix_client = Arc::clone(&self.filemetrix); + + tokio::spawn(async move { + // INIT Phase + let req = request.get_ref(); + let url_datarepo = &req.url_datarepo; + let id = &req.id_dataset; + + let dataset_info = match filemetrix_client.get_dataset_info(url_datarepo, id).await { + Ok(info) => info, + Err(err) => { + let err = BrowseError { + code: ErrorCode::UnavailableFilemetrix as i32, + message: format!("unable to get dataset info of url: {url_datarepo} - id: {id}, because of filemetrix error: {err}"), + path: None, + fatal: true, + }; + tx.send(Ok(BrowseDatasetResponse { + phase: BrowsePhase::PhaseInit as i32, + event: Some(Event::Error(err)), + })) + .await + .ok(); + + return; + } + }; + tx.send(Ok(BrowseDatasetResponse { + phase: BrowsePhase::PhaseInit as i32, + event: Some(Event::DatasetInfo(dataset_info.clone())), + })) + .await + .ok(); + + tx.send(Ok(BrowseDatasetResponse { + phase: BrowsePhase::PhaseBrowsing as i32, + event: Some(Event::Progress(BrowseProgress { + files_scanned: 0, + bytes_scanned: 0, + percent: 0, + path: None, + })), + })) + .await + .ok(); + + // Browsing, keep on sending file info of the dataset asynchronously + let files = match filemetrix_client.list_files(url_datarepo, id).await { + Ok(files) => files, + Err(err) => { + let err = BrowseError { + code: ErrorCode::UnavailableFilemetrix as i32, + message: format!("unable to list files url: {url_datarepo} - id: {id}, because of filemetrix error: {err}"), + path: None, + fatal: true, + }; + tx.send(Ok(BrowseDatasetResponse { + phase: BrowsePhase::PhaseInit as i32, + event: Some(Event::Error(err)), + })) + .await + .ok(); + + return; + } + }; + + let mut files_count = 0; + let mut bytes_count = 0; + // TODO: I may want to have pagination to at most showing 100 entries by default. + // I need then have sever wait for incomming message to continue, bilateral required + // and input needs to be a stream. + for file in files { + let filepath = file.path.clone(); + let sizebytes = file.size_bytes; + if let Err(err) = tx + .send(Ok(BrowseDatasetResponse { + phase: BrowsePhase::PhaseBrowsing as i32, + event: Some(Event::FileEntry(file)), + })) + .await + { + // Err + let err = BrowseError { + code: ErrorCode::UnavailableFile as i32, + message: format!("unable to send file: {url_datarepo} - id: {id} - file: {filepath} to client, because of: {err}"), + path: None, + fatal: true, + }; + tx.send(Ok(BrowseDatasetResponse { + phase: BrowsePhase::PhaseInit as i32, + event: Some(Event::Error(err)), + })) + .await + .ok(); + } else { + // Ok + files_count += 1; + bytes_count += sizebytes; + tx.send(Ok(BrowseDatasetResponse { + phase: BrowsePhase::PhaseBrowsing as i32, + event: Some(Event::Progress(BrowseProgress { + files_scanned: files_count, + bytes_scanned: bytes_count, + #[allow(clippy::cast_possible_truncation)] + percent: (files_count / dataset_info.total_files() * 100) as u32, + path: None, + })), + })) + .await + .ok(); + }; + + // TODO: further operations include: + // 1. file download, provide here? yes and calling scanning for mime-type and + // checksum automatically if the file is small (this rely on the file size must + // know beforehead). + // 3. mime type deduct?? should this purely be the responsibility of filemetrix?? + // (yes here) + // 2. relay file to the VREs? in a separated step? (in the seprated step) + } + + let success = files_count == dataset_info.total_files() + && bytes_count == dataset_info.total_size_bytes(); + + tx.send(Ok(BrowseDatasetResponse { + phase: BrowsePhase::PhaseCompleted as i32, + event: Some(Event::Complete(BrowseComplete { + total_files: files_count, + total_size_bytes: bytes_count, + success, + finish_at: Some(current_timestamp()), + })), + })) + .await + .ok(); + }); + + Ok(Response::new(ReceiverStream::new(rx))) + } +} + +#[async_trait::async_trait] +pub trait ToolRegistryClient: Send + Sync + 'static { + // get tool info by id + async fn get_tool(&self, id: &str) -> anyhow::Result; + // list tools in the registry, fine to return a Vec store in the ram can handle 10,000 entries. + async fn list_tools(&self) -> anyhow::Result>; +} + +// this is supposed to be the ro-crate that contain all information to launch the vre with required +// data pointers, so dispatcher or vre (depends on design of the dispatcher) can access the data +// without the needs to store data in the middleware. +// TODO: should not use tonic's FileEntry but a businiss faced own data structure. +pub struct LaunchRequset { + // blob: Type + id_vre: String, + files: Vec, +} + +pub struct InfoRequest {} + +#[async_trait::async_trait] +pub trait DispatcherClient: Send + Sync + 'static { + // list all vre requests and their status + async fn check_user_requests(&self, id_user: String) -> anyhow::Result>; + // launch a vre with the launch request, return the callback url when it is ready + async fn launch(&self, p: LaunchRequset) -> anyhow::Result; +} + +pub struct ReqPackAssembler { + pub tool_registry: Arc, + pub dispacher: Arc, +} + +// assemble service happens after user select which vre to use and what files to attach with vre. +// The recommendation is happened before this service. +// Therefore, the request contains vre id selected and file entries selected. +// As return, it response the result that client side can use to directly open the tool. +// The response is *not* streamed back but a single solide resp contains the information on how to +// redirect to the launched (or directly launch for the inline tool case) vre. +// +// For vres that need to be launched through dispatcher, the request is blocking until the vre is +// ready. We use grpc so other rpc calls are not blocked. +#[tonic::async_trait] +impl AssembleService for ReqPackAssembler { + // XXX: this rpc call may need to be separated into two calls, one use streams to get all + // information needed include resources whose necessity depends on the type of tools. + // Then send a whole pack and return resp after launch the vre. + async fn package_assemble( + &self, + mut request: Request, + ) -> Result, Status> { + println!("Got a request: {request:?}"); + let tool_registry = Arc::clone(&self.tool_registry); + let dispacher = Arc::clone(&self.dispacher); + + // client (by user) says which tool to use and which files are selected to launch with vre + let req = request.get_mut(); + let id_vre = &req.id_vre; + let files = &mut req.file_entries; + + let tool = tool_registry.get_tool(id_vre).await.map_err(|e| { + // convert anyhow error to tonic status + println!("Failed to get tool from registry: {e:?}"); + Status::internal(format!("Failed to get tool from registry: {e}")) + })?; + + // TODO: assemble an ro-crate and send to dispatcher and get back the required vre callback + match tool { + VirtualResearchEnv::EoscInline { id, version } => { + // check file number and simply relay (because I use same data structure for the + // tool registry api call) the entry to the client + + // Inline tool only support passing one file, there might be use cases the tool + // processes multiple files, but impl that when the case comes. + if files.len() != 1 { + let err_msg = format!( + "inline tool only processes on one file, get: {}", + files.len() + ); + // TODO: proper tracing log + println!("{err_msg}"); + return Err(Status::internal(err_msg)); + } + + // TODO: impl From trait to do the conversion + // XXX: how inline tool get the file entry information? through payload? through + // url query? or other machenism?? + let file = files.remove(0); // pop the file entry since I don't need it anymore + + // attach the file entry info and send back to client + let vre = EntryPoint::EoscInline(VreEoscInline { + url_callback: "https://example.com".to_string(), + file_entry: Some(file), + }); + let vre_entry = VreEntry { + id_vre: id, + version, + entry_point: Some(vre), + }; + + // vre that not through dispatcher. + let resp = PackageAssembleResponse { + vre_entry: Some(vre_entry), + }; + Ok(Response::new(resp)) + } + VirtualResearchEnv::Hosted { + id, + version, + requirements, + } => { + // assamble a package and send to dispatcher that return a callback url + // TODO: can check if the quota reached, users should not allowed to launch + // infinit amount of vres (avoiding ddos). + + let filenames = files + .iter() + .map(|f| { + let p = PathBuf::from(f.path.clone()); + // FIXME: dontpanic + let p = p.file_name().and_then(|n| n.to_str()).unwrap().to_string(); + p + }) + .collect::>(); + + if !requirements.iter().any(|r| filenames.contains(r)) { + let err_msg = format!("{requirements:?} not fullfilled",); + // TODO: proper tracing log + println!("{err_msg}"); + return Err(Status::internal(err_msg)); + } + + // talk to dispatcher to launch a vre + let launch_req = LaunchRequset { + id_vre: id.clone(), + files: files.clone(), + }; + let url_callback = dispacher.launch(launch_req).await.map_err(|e| { + // convert anyhow error to tonic status + Status::internal(format!("dispacher launch failed because of {e}")) + })?; + let url_callback = url_callback.to_string(); + + let vre = EntryPoint::Hosted(VreHosted { url_callback }); + let vre_entry = VreEntry { + id_vre: id.clone(), + version, + entry_point: Some(vre), + }; + + // vre that not through dispatcher. + let resp = PackageAssembleResponse { + vre_entry: Some(vre_entry), + }; + Ok(Response::new(resp)) + } + _ => unimplemented!(), + } + } +} // FIXME: look at EC2 etc, to have a better list of required fields #[derive(Debug)] @@ -25,7 +412,7 @@ pub enum VirtualResearchEnv { // tool that opened inline in the page. EoscInline { id: String, - file: PathBuf, + version: String, }, // tool that redirect to 3rd-party site with the selected files @@ -38,30 +425,33 @@ pub enum VirtualResearchEnv { // tool that need VM resources and have resources attached (e.g. RRP, Galaxy) Hosted { id: String, - config: Option, - files: Vec, + version: String, + // TODO: String is too vague, here I expect a describle requirements on configs and + // required files, that the server side can use to validate. + requirements: Vec, }, // (planned): - // Hosted but allow to allocating using EOSC resources. - HostedWithBuiltInRes { + // Hosted but required resources provided + // - allow to allocating using EOSC resources. + // - allow to asking for tools that provide resourecs. + // I have a felling that this should be a special type of vre, because in the Assembler + // service, I make it non-stream rpc call, the resource requests need back and forth comm + // between client and server, therefore better managed with bilateral streams. + HostedWithoutRes { id: String, config: Option, files: Vec, res: EnvResource, }, - - // (planned): - // Hosted but allow to asking for tools that provide resourecs. - HostedWithPluginRes { - id: String, - config: Option, - res_id: String, - files: Vec, - res: EnvResource, - }, } +// impl VirtualResearchEnv { +// pub fn attach_files(files: &Vec) { +// todo!() +// } +// } + // TODO: have a protobuf defined for the VirtualResearchEnv and mapping conversion here // // impl From for VirtualResearchEnv { @@ -80,21 +470,9 @@ pub enum VirtualResearchEnv { // specificly. fn assemble_vre_request(vre: &VirtualResearchEnv) -> serde_json::Value { match vre { - VirtualResearchEnv::EoscInline { id, file } => todo!(), - VirtualResearchEnv::BrowserNative { id, files } => todo!(), - VirtualResearchEnv::Hosted { id, config, files } => todo!(), - VirtualResearchEnv::HostedWithBuiltInRes { - id, - config, - files, - res, - } => todo!(), - VirtualResearchEnv::HostedWithPluginRes { - id, - config, - res_id, - files, - res, - } => todo!(), + VirtualResearchEnv::EoscInline { .. } => todo!(), + VirtualResearchEnv::BrowserNative { .. } => todo!(), + VirtualResearchEnv::Hosted { .. } => todo!(), + VirtualResearchEnv::HostedWithoutRes { .. } => todo!(), } } diff --git a/src/server.rs b/src/server.rs index b15a687..317a051 100644 --- a/src/server.rs +++ b/src/server.rs @@ -1,53 +1,42 @@ -pub mod req_packager_rpc { - tonic::include_proto!("req_packager.v1"); -} -use crate::req_packager_rpc::{ - assemble_service_server::{AssembleService, AssembleServiceServer}, - browse_dataset_response::{BrowsePhase, Event}, - browse_error::ErrorCode, - dataset_service_server::{DatasetService, DatasetServiceServer}, - vre_entry::Vre, - BrowseComplete, BrowseDatasetRequest, BrowseDatasetResponse, BrowseError, DatasetInfo, - FileEntry, PackageAssembleRequest, PackageAssembleResponse, VreEntry, VreEoscInline, VreHosted, - VreTyp, +use req_packager::{ + grpc::{ + assemble_service_server::AssembleServiceServer, + dataset_service_server::DatasetServiceServer, DatasetInfo, FileEntry, + }, + DataRepoRelayer, DispatcherClient, FilemetrixClient, InfoRequest, LaunchRequset, + ReqPackAssembler, ToolRegistryClient, }; use prost_types::Timestamp; -use std::{ - collections::HashMap, - sync::Arc, - time::{SystemTime, UNIX_EPOCH}, -}; -use tokio::sync::mpsc; -use tokio_stream::wrappers::ReceiverStream; -use tonic::{transport::Server, Request, Response, Status}; +use std::{collections::HashMap, sync::Arc}; +use tonic::transport::Server; use url::Url; use req_packager::VirtualResearchEnv; -fn current_timestamp() -> Timestamp { - let now = SystemTime::now() - .duration_since(UNIX_EPOCH) - .expect("Time went backwards"); - Timestamp { - seconds: now.as_secs().cast_signed(), - nanos: now.subsec_nanos().cast_signed(), - } +#[derive(Debug)] +struct Dataset { + // XXX: I don't want to couple the grpc logic with business logic, so I need real type for both + // datasetinfo and fileentry. + info: DatasetInfo, + files: Vec, } -#[async_trait::async_trait] -trait FilemetrixClient: Send + Sync + 'static { - // get dataset information - async fn get_dataset_info(&self, url_datarepo: &str, id: &str) -> anyhow::Result; - // list files in the dataset - async fn list_files(&self, url_datarepo: &str, id: &str) -> anyhow::Result>; +struct MockFilemetrixClient { + datasets: HashMap<(String, String), Dataset>, } -struct MockFilemetrixClient {} - impl MockFilemetrixClient { - fn new() -> Self { - MockFilemetrixClient {} + fn new(datasets: Vec) -> Self { + let datasets: HashMap<(String, String), Dataset> = datasets + .into_iter() + .map(|ds| { + let info = ds.info.clone(); + let (url, id_ds) = (info.url_datarepo, info.id_dataset); + ((url, id_ds), ds) + }) + .collect(); + MockFilemetrixClient { datasets } } } @@ -73,184 +62,6 @@ impl FilemetrixClient for MockFilemetrixClient { } } -pub struct DataRepoRelayer { - // TODO: source of tool-registry, mocked by a JSON, in production can be just tool-registry - // API call address. - // TODO: source of type-registry, mocked by a JSON - // TODO: source of data repositories, mocked by a sqlite, the arch here not clear, should this - // all behind the filemetrix? Or get from filemetrix (seems better because I don't want RP - // tangled directly with DB, it is good to have operations behind filemetrix and this is one of - // the roles filemetrix need to play) the basic info and query from DB after? - filemetrix: Arc, -} - -// XXX: the logic and transport mixed here, I need to have a DatasetBrowser for the inner browse -// logic, then I can do the same no matter for filemetrix, or self directy service, or mocked test. -#[allow(clippy::too_many_lines)] -#[tonic::async_trait] -impl DatasetService for DataRepoRelayer { - type BrowseDatasetStream = ReceiverStream>; - - /// browse dataset through filemetrix API calls. - /// XXX: I am expecting more than what filemetrix can provide. - /// I mock those functionalities here and request filemetrix to have thoes implemneted. - /// I need a service to downlead files for quick assessing (like a caching, caching <100k files). - async fn browse_dataset( - &self, - request: Request, - ) -> Result, Status> { - println!("Got a request: {request:?}"); - let (tx, rx) = mpsc::channel(16); - let filemetrix_client = Arc::clone(&self.filemetrix); - - tokio::spawn(async move { - // INIT Phase - let req = request.get_ref(); - let url_datarepo = &req.url_datarepo; - let id = &req.id_dataset; - - let dataset_info = match filemetrix_client.get_dataset_info(url_datarepo, id).await { - Ok(info) => info, - Err(err) => { - let err = BrowseError { - code: ErrorCode::UnavailableFilemetrix as i32, - message: format!("unable to get dataset info of url: {url_datarepo} - id: {id}, because of filemetrix error: {err}"), - path: None, - fatal: true, - }; - tx.send(Ok(BrowseDatasetResponse { - phase: BrowsePhase::PhaseInit as i32, - event: Some(Event::Error(err)), - })) - .await - .ok(); - - return; - } - }; - tx.send(Ok(BrowseDatasetResponse { - phase: BrowsePhase::PhaseInit as i32, - event: Some(Event::DatasetInfo(dataset_info.clone())), - })) - .await - .ok(); - - tx.send(Ok(BrowseDatasetResponse { - phase: BrowsePhase::PhaseBrowsing as i32, - event: Some(Event::Progress(req_packager_rpc::BrowseProgress { - files_scanned: 0, - bytes_scanned: 0, - percent: 0, - path: None, - })), - })) - .await - .ok(); - - // Browsing, keep on sending file info of the dataset asynchronously - let files = match filemetrix_client.list_files(url_datarepo, id).await { - Ok(files) => files, - Err(err) => { - let err = BrowseError { - code: ErrorCode::UnavailableFilemetrix as i32, - message: format!("unable to list files url: {url_datarepo} - id: {id}, because of filemetrix error: {err}"), - path: None, - fatal: true, - }; - tx.send(Ok(BrowseDatasetResponse { - phase: BrowsePhase::PhaseInit as i32, - event: Some(Event::Error(err)), - })) - .await - .ok(); - - return; - } - }; - - let mut files_count = 0; - let mut bytes_count = 0; - // TODO: I may want to have pagination to at most showing 100 entries by default. - // I need then have sever wait for incomming message to continue, bilateral required - // and input needs to be a stream. - for file in files { - let filepath = file.path.clone(); - let sizebytes = file.size_bytes; - if let Err(err) = tx - .send(Ok(BrowseDatasetResponse { - phase: BrowsePhase::PhaseBrowsing as i32, - event: Some(Event::FileEntry(file)), - })) - .await - { - // Err - let err = BrowseError { - code: ErrorCode::UnavailableFile as i32, - message: format!("unable to send file: {url_datarepo} - id: {id} - file: {filepath} to client, because of: {err}"), - path: None, - fatal: true, - }; - tx.send(Ok(BrowseDatasetResponse { - phase: BrowsePhase::PhaseInit as i32, - event: Some(Event::Error(err)), - })) - .await - .ok(); - } else { - // Ok - files_count += 1; - bytes_count += sizebytes; - tx.send(Ok(BrowseDatasetResponse { - phase: BrowsePhase::PhaseBrowsing as i32, - event: Some(Event::Progress(req_packager_rpc::BrowseProgress { - files_scanned: files_count, - bytes_scanned: bytes_count, - #[allow(clippy::cast_possible_truncation)] - percent: (files_count / dataset_info.total_files() * 100) as u32, - path: None, - })), - })) - .await - .ok(); - }; - - // TODO: further operations include: - // 1. file download, provide here? yes and calling scanning for mime-type and - // checksum automatically if the file is small (this rely on the file size must - // know beforehead). - // 3. mime type deduct?? should this purely be the responsibility of filemetrix?? - // (yes here) - // 2. relay file to the VREs? in a separated step? (in the seprated step) - } - - let success = files_count == dataset_info.total_files() - && bytes_count == dataset_info.total_size_bytes(); - - tx.send(Ok(BrowseDatasetResponse { - phase: BrowsePhase::PhaseCompleted as i32, - event: Some(Event::Complete(BrowseComplete { - total_files: files_count, - total_size_bytes: bytes_count, - success, - finish_at: Some(current_timestamp()), - })), - })) - .await - .ok(); - }); - - Ok(Response::new(ReceiverStream::new(rx))) - } -} - -#[async_trait::async_trait] -trait ToolRegistryClient: Send + Sync + 'static { - // get tool info by id - async fn get_tool(&self, id: &str) -> anyhow::Result; - // list tools in the registry, fine to return a Vec store in the ram can handle 10,000 entries. - async fn list_tools(&self) -> anyhow::Result>; -} - struct MockToolRegistryClient {} impl MockToolRegistryClient { @@ -269,27 +80,20 @@ impl ToolRegistryClient for MockToolRegistryClient { } } -// this is supposed to be the ro-crate that contain all information to launch the vre with required -// data pointers, so dispatcher or vre (depends on design of the dispatcher) can access the data -// without the needs to store data in the middleware. -struct LaunchReq { - // blob: Type - id_vre: String, - files: Vec, +struct MockDispatcherClient { + // I assume dispatcher knows and communicate with tool registry as well + // It can be generic out to the `ToolRegistryClient` trait + tool_registry: MockToolRegistryClient, } -struct InfoRequest {} - -#[async_trait::async_trait] -trait DispatcherClient: Send + Sync + 'static { - // list all vre requests and their status - async fn check_user_requests(&self, id_user: String) -> anyhow::Result>; - // launch a vre with the launch request, return the callback url when it is ready - async fn launch(&self, p: LaunchReq) -> anyhow::Result; +impl MockDispatcherClient { + fn new() -> Self { + MockDispatcherClient { + tool_registry: MockToolRegistryClient::new(), + } + } } -struct MockDispatcherClient {} - #[async_trait::async_trait] impl DispatcherClient for MockDispatcherClient { async fn check_user_requests(&self, id_user: String) -> anyhow::Result> { @@ -297,69 +101,34 @@ impl DispatcherClient for MockDispatcherClient { } // launch a vre with the launch request, return the callback url when it is ready - async fn launch(&self, p: LaunchReq) -> anyhow::Result { - todo!() - } -} - -pub struct ReqPackAssembler { - tool_registry: Arc, -} - -#[tonic::async_trait] -impl AssembleService for ReqPackAssembler { - async fn package_assemble( - &self, - request: Request, - ) -> Result, Status> { - println!("Got a request: {request:?}"); - let tool_registry = Arc::clone(&self.tool_registry); - - // tool from tool registry and validate - let req = request.get_ref(); - let id_vre = &req.id_vre; - let files = &req.file_entries; - - let tool = tool_registry.get_tool(id_vre).await.map_err(|e| { - // convert anyhow error to tonic status - println!("Failed to get tool from registry: {e:?}"); - Status::internal(format!("Failed to get tool from registry: {e}")) - })?; + async fn launch(&self, p: LaunchRequset) -> anyhow::Result { + // TODO: in the production impl, the launchReq -> ro-crate that carry information to launch + // a vre. + // It will be things like + // + // ```rust + // struct RoCrate { + // + // } + // let launch_pack: RoCrate = p.into(); + // let url = self.post(launch_pack).await?; + // return url; + // ``` + + // TODO: dispatcher talk to tool registry to validate the tool request, this comes with the + // question, should dispatcher fully trust req-packager that it always give the correct + // tool id and type to launch. After all it is dispatcher's side decision whether do the + // validation. + // XXX: the LaunchRequset should contain the id of tool registry as well because dispatcher + // in principle can support dispatch to different tool registry, but now only one is + // enough. + // + // it also relates to the auth problem, who has the access to the vre? who should control + // the permission of vre. I think it should be the vre provider and somewhere there is a + // mapping for what eosc user can access which vres. Should this all kept in an auth server + // (assume it will be one), or dispatcher maintain the table and mapping?? - // TODO: assemble an ro-crate and send to dispatcher and get back the required vre callback - match tool { - VirtualResearchEnv::EoscInline { .. } => { - // check file number and simply relay (because I use same data structure for the - // tool registry api call) the entry to the client - - // Inline tool only support passing one file, there might be use cases the tool - // processes multiple files, but impl that when the case comes. - if files.len() != 1 { - let err_msg = format!( - "inline tool only processes on one file, get: {}", - files.len() - ); - // TODO: proper tracing log - println!("{err_msg}"); - return Err(Status::internal(err_msg)); - } - - // vre that not through dispatcher. - let resp = PackageAssembleResponse { - vre_entry: Some(vre_entry), - }; - Ok(Response::new(resp)) - } - VirtualResearchEnv::Hosted { .. } => { - // assamble a package and send to dispatcher that return a callback url - - // TODO: can check if the quota reached, users should not allowed to launch - // infinit amount of vres (avoiding ddos). - - todo!() - } - _ => unimplemented!(), - } + todo!() } } @@ -371,11 +140,15 @@ async fn main() -> Result<(), Box> { // (however there is not too much query needed, just index visiting). // con: the packager need to be initialized, how freq it happens to take latest list? // - let filemetrix = Arc::new(MockFilemetrixClient::new()); - let relayer = DataRepoRelayer { filemetrix }; + let filemetrix = Arc::new(MockFilemetrixClient::new(vec![])); + let relayer = DataRepoRelayer::new(filemetrix); let tool_registry = Arc::new(MockToolRegistryClient::new()); - let assembler = ReqPackAssembler { tool_registry }; + let dispacher = Arc::new(MockDispatcherClient::new()); + let assembler = ReqPackAssembler { + tool_registry, + dispacher, + }; Server::builder() .add_service(DatasetServiceServer::new(relayer)) From 7d14d4f25275614d09550251ca80c84078f64aed Mon Sep 17 00:00:00 2001 From: Jusong Yu Date: Mon, 23 Feb 2026 16:03:10 +0100 Subject: [PATCH 11/14] further seperate mock from traits --- Cargo.lock | 492 +++++++++++++++++++++++++++++++++++++++++++++++++- Cargo.toml | 8 +- src/lib.rs | 115 +++++++----- src/server.rs | 237 +++++++++++++++++++++--- 4 files changed, 777 insertions(+), 75 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index fe1bc3b..5d0c9cc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -11,12 +11,43 @@ dependencies = [ "memchr", ] +[[package]] +name = "android_system_properties" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" +dependencies = [ + "libc", +] + [[package]] name = "anyhow" version = "1.0.100" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a23eb6b1614318a8071c9b2521f36b424b2c83db5eb3a0fead4a6c0809af6e61" +[[package]] +name = "async-stream" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b5a71a6f37880a80d1d7f19efd781e4b5de42c88f0722cc13bcb6cc2cfe8476" +dependencies = [ + "async-stream-impl", + "futures-core", + "pin-project-lite", +] + +[[package]] +name = "async-stream-impl" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7c24de15d275a1ecfd47a380fb4d5ec9bfe0933f309ed5e705b775596a3574d" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "async-trait" version = "0.1.89" @@ -34,6 +65,12 @@ version = "1.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0" +[[package]] +name = "autocfg" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" + [[package]] name = "axum" version = "0.8.7" @@ -89,18 +126,74 @@ version = "2.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "812e12b5285cc515a9c72a5c1d3b6d46a19dac5acfef5265968c166106e31dd3" +[[package]] +name = "bumpalo" +version = "3.19.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5dd9dc738b7a8311c7ade152424974d8115f2cdad61e8dab8dac9f2362298510" + [[package]] name = "bytes" version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b35204fbdc0b3f4446b89fc1ac2cf84a8a68971995d0bf2e925ec7cd960f9cb3" +[[package]] +name = "cc" +version = "1.2.52" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cd4932aefd12402b36c60956a4fe0035421f544799057659ff86f923657aada3" +dependencies = [ + "find-msvc-tools", + "shlex", +] + [[package]] name = "cfg-if" version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" +[[package]] +name = "chacha20" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f8d983286843e49675a4b7a2d174efe136dc93a18d69130dd18198a6c167601" +dependencies = [ + "cfg-if", + "cpufeatures", + "rand_core", +] + +[[package]] +name = "chrono" +version = "0.4.42" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "145052bdd345b87320e369255277e3fb5152762ad123a901ef5c262dd38fe8d2" +dependencies = [ + "iana-time-zone", + "js-sys", + "num-traits", + "serde", + "wasm-bindgen", + "windows-link", +] + +[[package]] +name = "core-foundation-sys" +version = "0.8.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" + +[[package]] +name = "cpufeatures" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b2a41393f66f16b0823bb79094d54ac5fbd34ab292ddafb9a0456ac9f87d201" +dependencies = [ + "libc", +] + [[package]] name = "displaydoc" version = "0.2.5" @@ -140,6 +233,12 @@ version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" +[[package]] +name = "find-msvc-tools" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f449e6c6c08c865631d4890cfacf252b3d396c9bcc83adb6623cdb02a8336c41" + [[package]] name = "fixedbitset" version = "0.5.7" @@ -152,6 +251,12 @@ version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" +[[package]] +name = "foldhash" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" + [[package]] name = "form_urlencoded" version = "1.2.2" @@ -176,6 +281,17 @@ version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e" +[[package]] +name = "futures-macro" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "futures-sink" version = "0.3.31" @@ -195,9 +311,11 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81" dependencies = [ "futures-core", + "futures-macro", "futures-task", "pin-project-lite", "pin-utils", + "slab", ] [[package]] @@ -212,6 +330,20 @@ dependencies = [ "wasip2", ] +[[package]] +name = "getrandom" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "139ef39800118c7683f2fd3c98c1b23c09ae076556b435f8e9064ae108aaeeec" +dependencies = [ + "cfg-if", + "libc", + "r-efi", + "rand_core", + "wasip2", + "wasip3", +] + [[package]] name = "h2" version = "0.4.12" @@ -231,6 +363,15 @@ dependencies = [ "tracing", ] +[[package]] +name = "hashbrown" +version = "0.15.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" +dependencies = [ + "foldhash", +] + [[package]] name = "hashbrown" version = "0.16.1" @@ -345,6 +486,30 @@ dependencies = [ "tracing", ] +[[package]] +name = "iana-time-zone" +version = "0.1.64" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33e57f83510bb73707521ebaffa789ec8caf86f9657cad665b092b581d40e9fb" +dependencies = [ + "android_system_properties", + "core-foundation-sys", + "iana-time-zone-haiku", + "js-sys", + "log", + "wasm-bindgen", + "windows-core", +] + +[[package]] +name = "iana-time-zone-haiku" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" +dependencies = [ + "cc", +] + [[package]] name = "icu_collections" version = "2.1.1" @@ -426,6 +591,12 @@ dependencies = [ "zerovec", ] +[[package]] +name = "id-arena" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d3067d79b975e8844ca9eb072e16b31c3c1c36928edf9c6789548c524d0d954" + [[package]] name = "idna" version = "1.1.0" @@ -454,7 +625,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0ad4bb2b565bca0645f4d68c5c9af97fba094e9791da685bf83cb5f3ce74acf2" dependencies = [ "equivalent", - "hashbrown", + "hashbrown 0.16.1", + "serde", + "serde_core", ] [[package]] @@ -472,6 +645,22 @@ version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" +[[package]] +name = "js-sys" +version = "0.3.83" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "464a3709c7f55f1f721e5389aa6ea4e3bc6aba669353300af094b29ffbdde1d8" +dependencies = [ + "once_cell", + "wasm-bindgen", +] + +[[package]] +name = "leb128fmt" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09edd9e8b54e49e587e4f6295a7d29c3ea94d469cb40ab8ca70b288248a81db2" + [[package]] name = "libc" version = "0.2.178" @@ -531,6 +720,15 @@ version = "0.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1d87ecb2933e8aeadb3e3a02b828fed80a7528047e68b4f424523a0981a3a084" +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", +] + [[package]] name = "once_cell" version = "1.21.3" @@ -702,6 +900,23 @@ version = "5.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" +[[package]] +name = "rand" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc266eb313df6c5c09c1c7b1fbe2510961e5bcd3add930c1e31f7ed9da0feff8" +dependencies = [ + "chacha20", + "getrandom 0.4.1", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c8d0fd677905edcbeedbf2edb6494d676f0e98d54d5cf9bda0b061cb8fb8aba" + [[package]] name = "regex" version = "1.12.2" @@ -736,9 +951,14 @@ name = "req_packager" version = "0.1.0" dependencies = [ "anyhow", + "async-stream", "async-trait", + "chrono", + "futures-core", + "futures-util", "prost", "prost-types", + "rand", "serde", "serde_json", "tokio", @@ -747,6 +967,7 @@ dependencies = [ "tonic-prost", "tonic-prost-build", "url", + "uuid", ] [[package]] @@ -762,12 +983,24 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "rustversion" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" + [[package]] name = "ryu" version = "1.0.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "62049b2877bf12821e8f9ad256ee38fdc31db7387ec2d3b3f403024de2034aea" +[[package]] +name = "semver" +version = "1.0.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d767eb0aabc880b29956c35734170f26ed551a859dbd361d140cdbeca61ab1e2" + [[package]] name = "serde" version = "1.0.228" @@ -775,6 +1008,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" dependencies = [ "serde_core", + "serde_derive", ] [[package]] @@ -810,6 +1044,12 @@ dependencies = [ "serde_core", ] +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + [[package]] name = "slab" version = "0.4.11" @@ -873,7 +1113,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2d31c77bdf42a745371d260a26ca7163f1e0924b64afa0b688e61b5a9fa02f16" dependencies = [ "fastrand", - "getrandom", + "getrandom 0.3.4", "once_cell", "rustix", "windows-sys 0.61.2", @@ -1087,6 +1327,12 @@ version = "1.0.22" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5" +[[package]] +name = "unicode-xid" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853" + [[package]] name = "url" version = "2.5.8" @@ -1105,6 +1351,17 @@ version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" +[[package]] +name = "uuid" +version = "1.21.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b672338555252d43fd2240c714dc444b8c6fb0a5c5335e65a07bba7742735ddb" +dependencies = [ + "getrandom 0.4.1", + "js-sys", + "wasm-bindgen", +] + [[package]] name = "want" version = "0.3.1" @@ -1126,7 +1383,130 @@ version = "1.0.1+wasi-0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0562428422c63773dad2c345a1882263bbf4d65cf3f42e90921f787ef5ad58e7" dependencies = [ - "wit-bindgen", + "wit-bindgen 0.46.0", +] + +[[package]] +name = "wasip3" +version = "0.4.0+wasi-0.3.0-rc-2026-01-06" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5428f8bf88ea5ddc08faddef2ac4a67e390b88186c703ce6dbd955e1c145aca5" +dependencies = [ + "wit-bindgen 0.51.0", +] + +[[package]] +name = "wasm-bindgen" +version = "0.2.106" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d759f433fa64a2d763d1340820e46e111a7a5ab75f993d1852d70b03dbb80fd" +dependencies = [ + "cfg-if", + "once_cell", + "rustversion", + "wasm-bindgen-macro", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.106" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48cb0d2638f8baedbc542ed444afc0644a29166f1595371af4fecf8ce1e7eeb3" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.106" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cefb59d5cd5f92d9dcf80e4683949f15ca4b511f4ac0a6e14d4e1ac60c6ecd40" +dependencies = [ + "bumpalo", + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.106" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cbc538057e648b67f72a982e708d485b2efa771e1ac05fec311f9f63e5800db4" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "wasm-encoder" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "990065f2fe63003fe337b932cfb5e3b80e0b4d0f5ff650e6985b1048f62c8319" +dependencies = [ + "leb128fmt", + "wasmparser", +] + +[[package]] +name = "wasm-metadata" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb0e353e6a2fbdc176932bbaab493762eb1255a7900fe0fea1a2f96c296cc909" +dependencies = [ + "anyhow", + "indexmap", + "wasm-encoder", + "wasmparser", +] + +[[package]] +name = "wasmparser" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "47b807c72e1bac69382b3a6fb3dbe8ea4c0ed87ff5629b8685ae6b9a611028fe" +dependencies = [ + "bitflags", + "hashbrown 0.15.5", + "indexmap", + "semver", +] + +[[package]] +name = "windows-core" +version = "0.62.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8e83a14d34d0623b51dce9581199302a221863196a1dde71a7663a4c2be9deb" +dependencies = [ + "windows-implement", + "windows-interface", + "windows-link", + "windows-result", + "windows-strings", +] + +[[package]] +name = "windows-implement" +version = "0.60.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "windows-interface" +version = "0.59.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358" +dependencies = [ + "proc-macro2", + "quote", + "syn", ] [[package]] @@ -1135,6 +1515,24 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" +[[package]] +name = "windows-result" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7781fa89eaf60850ac3d2da7af8e5242a5ea78d1a11c49bf2910bb5a73853eb5" +dependencies = [ + "windows-link", +] + +[[package]] +name = "windows-strings" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7837d08f69c77cf6b07689544538e017c1bfcf57e34b4c0ff58e6c2cd3b37091" +dependencies = [ + "windows-link", +] + [[package]] name = "windows-sys" version = "0.60.2" @@ -1224,6 +1622,94 @@ version = "0.46.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f17a85883d4e6d00e8a97c586de764dabcc06133f7f1d55dce5cdc070ad7fe59" +[[package]] +name = "wit-bindgen" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7249219f66ced02969388cf2bb044a09756a083d0fab1e566056b04d9fbcaa5" +dependencies = [ + "wit-bindgen-rust-macro", +] + +[[package]] +name = "wit-bindgen-core" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ea61de684c3ea68cb082b7a88508a8b27fcc8b797d738bfc99a82facf1d752dc" +dependencies = [ + "anyhow", + "heck", + "wit-parser", +] + +[[package]] +name = "wit-bindgen-rust" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7c566e0f4b284dd6561c786d9cb0142da491f46a9fbed79ea69cdad5db17f21" +dependencies = [ + "anyhow", + "heck", + "indexmap", + "prettyplease", + "syn", + "wasm-metadata", + "wit-bindgen-core", + "wit-component", +] + +[[package]] +name = "wit-bindgen-rust-macro" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c0f9bfd77e6a48eccf51359e3ae77140a7f50b1e2ebfe62422d8afdaffab17a" +dependencies = [ + "anyhow", + "prettyplease", + "proc-macro2", + "quote", + "syn", + "wit-bindgen-core", + "wit-bindgen-rust", +] + +[[package]] +name = "wit-component" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d66ea20e9553b30172b5e831994e35fbde2d165325bec84fc43dbf6f4eb9cb2" +dependencies = [ + "anyhow", + "bitflags", + "indexmap", + "log", + "serde", + "serde_derive", + "serde_json", + "wasm-encoder", + "wasm-metadata", + "wasmparser", + "wit-parser", +] + +[[package]] +name = "wit-parser" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ecc8ac4bc1dc3381b7f59c34f00b67e18f910c2c0f50015669dde7def656a736" +dependencies = [ + "anyhow", + "id-arena", + "indexmap", + "log", + "semver", + "serde", + "serde_derive", + "serde_json", + "unicode-xid", + "wasmparser", +] + [[package]] name = "writeable" version = "0.6.2" diff --git a/Cargo.toml b/Cargo.toml index 520810b..1f64e12 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,16 +14,22 @@ path = "src/client.rs" [dependencies] anyhow = "1.0.100" +async-stream = "0.3.6" async-trait = "0.1.89" +chrono = { version = "0.4.42", features = ["serde"] } +futures-core = "0.3.31" +futures-util = "0.3.31" prost = "0.14.1" prost-types = "0.14.1" -serde = "1.0.228" +rand = "0.10.0" +serde = { version = "1.0.228", features = ["derive"] } serde_json = "1.0.146" tokio = { version = "1.48.0", features = ["macros", "rt-multi-thread"] } tokio-stream = "0.1.17" tonic = "0.14.2" tonic-prost = "0.14.2" url = "2.5.8" +uuid = { version = "1.21.0", features = ["v4"] } [build-dependencies] tonic-prost-build = "*" diff --git a/src/lib.rs b/src/lib.rs index ee34f02..6e2a341 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,7 +1,9 @@ pub mod grpc { tonic::include_proto!("req_packager.v1"); } +use futures_util::{StreamExt, TryStreamExt}; +use futures_core::stream::BoxStream; use grpc::{ assemble_service_server::AssembleService, browse_dataset_response::{BrowsePhase, Event}, @@ -38,8 +40,14 @@ fn current_timestamp() -> Timestamp { pub trait FilemetrixClient: Send + Sync + 'static { // get dataset information async fn get_dataset_info(&self, url_datarepo: &str, id: &str) -> anyhow::Result; - // list files in the dataset - async fn list_files(&self, url_datarepo: &str, id: &str) -> anyhow::Result>; + /// list files in the dataset + /// # Errors + /// ??? + fn list_files( + &self, + url_datarepo: &str, + id: &str, + ) -> anyhow::Result>; } #[derive(Debug)] @@ -92,6 +100,9 @@ impl DatasetService for DataRepoRelayer { let url_datarepo = &req.url_datarepo; let id = &req.id_dataset; + // TODO: + // NOTE: datasets are with versions + // while files are with modified/updated timestamps. let dataset_info = match filemetrix_client.get_dataset_info(url_datarepo, id).await { Ok(info) => info, Err(err) => { @@ -131,7 +142,7 @@ impl DatasetService for DataRepoRelayer { .ok(); // Browsing, keep on sending file info of the dataset asynchronously - let files = match filemetrix_client.list_files(url_datarepo, id).await { + let files = match filemetrix_client.list_files(url_datarepo, id) { Ok(files) => files, Err(err) => { let err = BrowseError { @@ -156,55 +167,59 @@ impl DatasetService for DataRepoRelayer { // TODO: I may want to have pagination to at most showing 100 entries by default. // I need then have sever wait for incomming message to continue, bilateral required // and input needs to be a stream. - for file in files { - let filepath = file.path.clone(); - let sizebytes = file.size_bytes; - if let Err(err) = tx - .send(Ok(BrowseDatasetResponse { - phase: BrowsePhase::PhaseBrowsing as i32, - event: Some(Event::FileEntry(file)), - })) - .await - { - // Err - let err = BrowseError { - code: ErrorCode::UnavailableFile as i32, - message: format!("unable to send file: {url_datarepo} - id: {id} - file: {filepath} to client, because of: {err}"), - path: None, - fatal: true, - }; - tx.send(Ok(BrowseDatasetResponse { - phase: BrowsePhase::PhaseInit as i32, - event: Some(Event::Error(err)), - })) - .await - .ok(); - } else { - // Ok - files_count += 1; - bytes_count += sizebytes; - tx.send(Ok(BrowseDatasetResponse { - phase: BrowsePhase::PhaseBrowsing as i32, - event: Some(Event::Progress(BrowseProgress { - files_scanned: files_count, - bytes_scanned: bytes_count, - #[allow(clippy::cast_possible_truncation)] - percent: (files_count / dataset_info.total_files() * 100) as u32, + files.for_each_concurrent(10, |file| { + let tx = tx.clone(); + let dataset_info = dataset_info.clone(); + async move { + let filepath = file.path.clone(); + let sizebytes = file.size_bytes; + if let Err(err) = tx + .send(Ok(BrowseDatasetResponse { + phase: BrowsePhase::PhaseBrowsing as i32, + event: Some(Event::FileEntry(file)), + })) + .await + { + // Err + let err = BrowseError { + code: ErrorCode::UnavailableFile as i32, + message: format!("unable to send file: {url_datarepo} - id: {id} - file: {filepath} to client, because of: {err}"), path: None, - })), - })) - .await - .ok(); - }; + fatal: true, + }; + tx.send(Ok(BrowseDatasetResponse { + phase: BrowsePhase::PhaseInit as i32, + event: Some(Event::Error(err)), + })) + .await + .ok(); + } else { + // Ok + files_count += 1; + bytes_count += sizebytes; + tx.send(Ok(BrowseDatasetResponse { + phase: BrowsePhase::PhaseBrowsing as i32, + event: Some(Event::Progress(BrowseProgress { + files_scanned: files_count, + bytes_scanned: bytes_count, + #[allow(clippy::cast_possible_truncation)] + percent: (files_count / dataset_info.total_files() * 100) as u32, + path: None, + })), + })) + .await + .ok(); + }; - // TODO: further operations include: - // 1. file download, provide here? yes and calling scanning for mime-type and - // checksum automatically if the file is small (this rely on the file size must - // know beforehead). - // 3. mime type deduct?? should this purely be the responsibility of filemetrix?? - // (yes here) - // 2. relay file to the VREs? in a separated step? (in the seprated step) - } + // TODO: further operations include: + // 1. file download, provide here? yes and calling scanning for mime-type and + // checksum automatically if the file is small (this rely on the file size must + // know beforehead). + // 3. mime type deduct?? should this purely be the responsibility of filemetrix?? + // (yes here) + // 2. relay file to the VREs? in a separated step? (in the seprated step) + } + }).await; let success = files_count == dataset_info.total_files() && bytes_count == dataset_info.total_size_bytes(); diff --git a/src/server.rs b/src/server.rs index 317a051..c9f6b4f 100644 --- a/src/server.rs +++ b/src/server.rs @@ -1,20 +1,27 @@ +use async_stream::stream; +use futures_core::stream::BoxStream; +use prost_types::Timestamp; +use rand::{rng, seq::IndexedRandom, RngExt}; use req_packager::{ grpc::{ - assemble_service_server::AssembleServiceServer, - dataset_service_server::DatasetServiceServer, DatasetInfo, FileEntry, + self, assemble_service_server::AssembleServiceServer, + dataset_service_server::DatasetServiceServer, }, DataRepoRelayer, DispatcherClient, FilemetrixClient, InfoRequest, LaunchRequset, ReqPackAssembler, ToolRegistryClient, }; -use prost_types::Timestamp; +use chrono::{DateTime, Duration, Utc}; +use serde::{Deserialize, Serialize}; +use uuid::Uuid; + use std::{collections::HashMap, sync::Arc}; use tonic::transport::Server; use url::Url; use req_packager::VirtualResearchEnv; -#[derive(Debug)] +#[derive(Clone)] struct Dataset { // XXX: I don't want to couple the grpc logic with business logic, so I need real type for both // datasetinfo and fileentry. @@ -23,6 +30,8 @@ struct Dataset { } struct MockFilemetrixClient { + // the key is a tuple, where 1st element is for datarepo url and the second is the id of the + // dataset in the datarepo. datasets: HashMap<(String, String), Dataset>, } @@ -32,7 +41,7 @@ impl MockFilemetrixClient { .into_iter() .map(|ds| { let info = ds.info.clone(); - let (url, id_ds) = (info.url_datarepo, info.id_dataset); + let (url, id_ds) = (info.url, info.id); ((url, id_ds), ds) }) .collect(); @@ -42,23 +51,51 @@ impl MockFilemetrixClient { #[async_trait::async_trait] impl FilemetrixClient for MockFilemetrixClient { - async fn get_dataset_info(&self, url_datarepo: &str, id: &str) -> anyhow::Result { - let dataset_info = DatasetInfo { - // mock all fields, they are from filemetrix API call. - url_datarepo: url_datarepo.to_string(), - id_dataset: id.to_string(), - description: "example01".to_string(), - total_files: None, - total_size_bytes: None, - created_at: Some(Timestamp::default()), - updated_at: Some(Timestamp::default()), - tags: HashMap::new(), - }; - Ok(dataset_info) + async fn get_dataset_info( + &self, + url_datarepo: &str, + id: &str, + ) -> anyhow::Result { + match self + .datasets + .get(&(url_datarepo.to_string(), id.to_string())) + { + Some(dataset) => { + let info = dataset.info.clone(); + Ok(info.into()) + } + _ => { + anyhow::bail!("didn't find the dataset with {:?}", (url_datarepo, id)) + } + } } - async fn list_files(&self, url_datarepo: &str, id: &str) -> anyhow::Result> { - todo!() + fn list_files( + &self, + url_datarepo: &str, + id: &str, + ) -> anyhow::Result> { + match self + .datasets + .get(&(url_datarepo.to_string(), id.to_string())) + { + Some(dataset) => { + let files = dataset + .files + .iter() + .map(|f| f.clone().into()) + .collect::>(); + let stream = Box::pin(stream! { + for file in files { + yield file; + } + }); + Ok(stream) + } + _ => { + anyhow::bail!("didn't find the dataset with {:?}", (url_datarepo, id)) + } + } } } @@ -132,6 +169,163 @@ impl DispatcherClient for MockDispatcherClient { } } +#[derive(Serialize, Deserialize, Clone)] +struct DatasetInfo { + url: String, + id: String, + description: String, + total_files: Option, + total_size_bytes: Option, + create_at: DateTime, + updated_at: DateTime, + tags: HashMap, +} + +impl From for grpc::DatasetInfo { + fn from(d: DatasetInfo) -> Self { + let created_at = Timestamp { + seconds: d.create_at.timestamp(), + nanos: 0, + }; + let updated_at = Timestamp { + seconds: d.updated_at.timestamp(), + nanos: 0, + }; + grpc::DatasetInfo { + url_datarepo: d.url, + id_dataset: d.id, + description: d.description, + total_files: d.total_files, + total_size_bytes: d.total_size_bytes, + created_at: Some(created_at), + updated_at: Some(updated_at), + tags: d.tags, + } + } +} + +#[derive(Serialize, Deserialize, Clone)] +struct FileEntry { + path: String, + is_dir: bool, + size_bytes: u64, + mime_type: Option, + checksum: Option, + modified_at: DateTime, +} + +impl From for grpc::FileEntry { + fn from(f: FileEntry) -> Self { + let modified_at = Timestamp { + seconds: f.modified_at.timestamp(), + nanos: 0, + }; + grpc::FileEntry { + path: f.path, + is_dir: f.is_dir, + size_bytes: f.size_bytes, + mime_type: f.mime_type, + checksum: f.checksum, + modified_at: Some(modified_at), + } + } +} + +fn generate_fake_files(total: u64) -> Vec { + let mut rng = rng(); + let now = Utc::now(); + + let mime_types = [ + "text/csv", + "application/json", + "application/parquet", + "image/png", + "application/octet-stream", + ]; + + let mut entries = Vec::new(); + + // Create some directory structure first + let dirs = vec!["raw", "processed", "results", "metadata"]; + + for dir in &dirs { + entries.push(FileEntry { + path: dir.to_string(), + is_dir: true, + size_bytes: 0, + mime_type: None, + checksum: None, + modified_at: now - Duration::days(rng.random_range(1..30)), + }); + } + + // Generate files inside directories + for i in 0..total { + let parent = dirs.choose(&mut rng).unwrap(); + + let size = rng.random_range(10_000..10_000_000); + let modified = now - Duration::days(rng.random_range(0..30)); + + let mime = mime_types.choose(&mut rng).unwrap(); + + entries.push(FileEntry { + path: format!("{parent}/file_{i}.dat"), + is_dir: false, + size_bytes: size, + mime_type: Some(mime.to_string()), + checksum: Some(Uuid::new_v4().to_string()), + modified_at: modified, + }); + } + + entries +} + +fn generate_datasets() -> Vec { + let mut rng = rng(); + + let mut datasets = Vec::new(); + + let sample_tags = [ + ("domain", "physics"), + ("type", "simulation"), + ("format", "csv"), + ("owner", "research-team"), + ("status", "validated"), + ]; + + for i in 0..5 { + let now = Utc::now(); + let created = now - Duration::days(rng.random_range(10..100)); + let updated = created + Duration::days(rng.random_range(1..10)); + + let total_files = rng.random_range(5..50); + let total_size_bytes = rng.random_range(10_000_000..500_000_000); + + let mut tags = HashMap::new(); + for (k, v) in sample_tags.sample(&mut rng, 3) { + tags.insert(k.to_string(), v.to_string()); + } + + let info = DatasetInfo { + url: format!("https://example.com/datasets/{i}"), + id: Uuid::new_v4().to_string(), + description: format!("Mock dataset number {i}"), + total_files: Some(total_files), + total_size_bytes: Some(total_size_bytes), + create_at: created, + updated_at: updated, + tags, + }; + + let files = generate_fake_files(total_files); + + datasets.push(Dataset { info, files }); + } + + datasets +} + #[tokio::main] async fn main() -> Result<(), Box> { let addr = "[::1]:50051".parse()?; @@ -140,7 +334,8 @@ async fn main() -> Result<(), Box> { // (however there is not too much query needed, just index visiting). // con: the packager need to be initialized, how freq it happens to take latest list? // - let filemetrix = Arc::new(MockFilemetrixClient::new(vec![])); + let datasets = generate_datasets(); + let filemetrix = Arc::new(MockFilemetrixClient::new(datasets)); let relayer = DataRepoRelayer::new(filemetrix); let tool_registry = Arc::new(MockToolRegistryClient::new()); From 213bd39fc0d9f5adf1d0a18c078c33c9d2b19409 Mon Sep 17 00:00:00 2001 From: Jusong Yu Date: Tue, 24 Feb 2026 16:49:27 +0100 Subject: [PATCH 12/14] endpoint for getting file list done --- src/client.rs | 11 +++++++++-- src/lib.rs | 2 ++ src/server.rs | 11 ++++++----- 3 files changed, 17 insertions(+), 7 deletions(-) diff --git a/src/client.rs b/src/client.rs index 0e63857..b8b930e 100644 --- a/src/client.rs +++ b/src/client.rs @@ -2,11 +2,18 @@ use req_packager::grpc::{dataset_service_client::DatasetServiceClient, BrowseDat #[tokio::main] async fn main() -> Result<(), Box> { + // TODO: + // - client list all the files in a dataset + // - client select some files and ask "what vre should I use"? + // - with files and vre, create a ro-crate that contains all information inside (with + // information of which file is in which vre input slots). + // - client get the realtime status update from VRE and get a callback link send back when it + // is ready. let mut client = DatasetServiceClient::connect("http://[::1]:50051").await?; // made up repo url and dataset id, should be mocked for test - let url_datarepo = "http://onedata.com".to_string(); - let id_dataset = "xxx-pid".to_string(); + let url_datarepo = "https://example.com/datasets".to_string(); + let id_dataset = "1".to_string(); let request = tonic::Request::new(BrowseDatasetRequest { url_datarepo, id_dataset, diff --git a/src/lib.rs b/src/lib.rs index 6e2a341..6782c94 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -58,6 +58,8 @@ struct Dataset { files: Vec, } +// TODO: rename to DataRepositoryProxy?? +// This play the role to relay the API calls to source data repository through filemetrix service. pub struct DataRepoRelayer { // TODO: source of tool-registry, mocked by a JSON, in production can be just tool-registry // API call address. diff --git a/src/server.rs b/src/server.rs index c9f6b4f..8154458 100644 --- a/src/server.rs +++ b/src/server.rs @@ -21,7 +21,7 @@ use url::Url; use req_packager::VirtualResearchEnv; -#[derive(Clone)] +#[derive(Clone, Debug)] struct Dataset { // XXX: I don't want to couple the grpc logic with business logic, so I need real type for both // datasetinfo and fileentry. @@ -56,6 +56,7 @@ impl FilemetrixClient for MockFilemetrixClient { url_datarepo: &str, id: &str, ) -> anyhow::Result { + // XXX: very fragile to use url+id, should be a PID or other primary key in DB. match self .datasets .get(&(url_datarepo.to_string(), id.to_string())) @@ -169,7 +170,7 @@ impl DispatcherClient for MockDispatcherClient { } } -#[derive(Serialize, Deserialize, Clone)] +#[derive(Serialize, Deserialize, Clone, Debug)] struct DatasetInfo { url: String, id: String, @@ -204,7 +205,7 @@ impl From for grpc::DatasetInfo { } } -#[derive(Serialize, Deserialize, Clone)] +#[derive(Serialize, Deserialize, Clone, Debug)] struct FileEntry { path: String, is_dir: bool, @@ -308,8 +309,8 @@ fn generate_datasets() -> Vec { } let info = DatasetInfo { - url: format!("https://example.com/datasets/{i}"), - id: Uuid::new_v4().to_string(), + url: "https://example.com/datasets".to_string(), + id: format!("{i}"), description: format!("Mock dataset number {i}"), total_files: Some(total_files), total_size_bytes: Some(total_size_bytes), From 100435e53c2e0049624083d5eb9210fff5c0c655 Mon Sep 17 00:00:00 2001 From: Jusong Yu Date: Fri, 27 Feb 2026 17:18:50 +0100 Subject: [PATCH 13/14] wip, toolregistry service and player service --- proto/req_packager.proto | 223 +++++++++++++++++++++++++++------------ src/client.rs | 45 +++++++- src/lib.rs | 9 +- 3 files changed, 203 insertions(+), 74 deletions(-) diff --git a/proto/req_packager.proto b/proto/req_packager.proto index 09e065f..58ef18b 100644 --- a/proto/req_packager.proto +++ b/proto/req_packager.proto @@ -5,6 +5,19 @@ package req_packager.v1; import "google/protobuf/timestamp.proto"; import "google/protobuf/struct.proto"; +// -------------- Dataset Service ------------- + +// service related to dataset, and anything talk to filemetrix +service DatasetService { + // Lazily retrieve file hierarchy or file info for a dataset + rpc BrowseDataset(BrowseDatasetRequest) + returns (stream BrowseDatasetResponse); + + // rpc DownloadFile(DownloadFileRequest) + // returns (stream FileChunk); +} + + // structure partially borrow from unix file handler message FileEntry { // abs path, root from dataset, include the basename. @@ -23,71 +36,6 @@ message FileEntry { google.protobuf.Timestamp modified_at = 6; } -// service related to dataset, and anything talk to filemetrix -service DatasetService { - // Lazily retrieve file hierarchy or file info for a dataset - rpc BrowseDataset(BrowseDatasetRequest) - returns (stream BrowseDatasetResponse); - - // rpc DownloadFile(DownloadFileRequest) - // returns (stream FileChunk); -} - -// service ToolService { -// rpc BrowseTools(BrowseToolsRequest) -// returns (stream BrowseToolsResponse); -// } - -// get decisios from client to assemble the crate to dispatcher -service AssembleService { - rpc PackageAssemble(PackageAssembleRequest) - returns (PackageAssembleResponse); -} - -// RFC 004 -enum VreTyp { - // Browser inline tool provided by Eosc - EoscInline = 0; - // Hosted Vre where resources are provided by the vre provider - Hosted = 1; -} - -// respose from assembler for client to redirect to the launched vre -message VreHosted { - string url_callback = 1; - // TODO: may need configuration, which can be a config file from request -} - -// information for client to go to assets server to get the tool and launch -message VreEoscInline { - string url_callback = 1; - // support single file to open with inline tool - FileEntry file_entry = 2; -} - - -// this is what response to client about the vre entity it can utilize. -message VreEntry { - string id_vre = 1; - string version = 2; - oneof entry_point { - VreEoscInline eosc_inline = 3; - VreHosted hosted = 4; - } -} - -// TODO: didn't cover the case that VRE require config files from user e.g. `.binder` -message PackageAssembleRequest { - // vre entry id - string id_vre = 1; - // file entries, list of files selected and passed from client - repeated FileEntry file_entries = 2; -} - -message PackageAssembleResponse { - VreEntry vre_entry = 1; -} - message BrowseDatasetRequest { // Data repo identifier by url (opaque to client) string url_datarepo = 1; @@ -182,3 +130,148 @@ message BrowseComplete { bool success = 3; google.protobuf.Timestamp finish_at = 4; } + +// -------------- End of Dataset Service ------------- + +// -------------- Start of Tool Service -------------- +// this is what response to client about the vre entity it can utilize. +message ToolMeta { + // id in the tool registry + string id = 1; + string version = 2; +} + +message BrowseToolsRequest {} +message BrowseToolsResponse {} + +message GetToolRequest {} +message ToolResponse { + ToolMeta tool = 1; +} + +message FindToolsRequest { + repeated FileEntry files = 2; +} +// Find Tools response include the tool response and the state of the stream +message FindToolsResponse { + repeated ToolMeta tools = 1; +} + +// the tool service as in front of tool-registry DB, this actually can be the packaging hub. +service ToolService { + // Get a specific tool by id + rpc GetTool(GetToolRequest) returns (ToolResponse); + + // Find tools from the file list input provided, and maybe some information from user profile. + rpc FindTools(FindToolsRequest) returns (FindToolsResponse); + + rpc BrowseTools(BrowseToolsRequest) + returns (stream BrowseToolsResponse); +} + +// -------------- End of Tool Service -------------- + +// -------------- Start of Dataplayer Service -------------- +// Tool type here is of tool artifact which is running in the real machine (include local on wasm/js or even in **desktop**). + +// the lightweight tool do not need resources can be always ready. +// the tool that need resource preparing, start frond preparing, and end with the resource being dropped +message ToolStatus { + enum State { + PREPARING = 0; + reserved 1, 2, 3, 4; + READY = 8; + DROPPED = 9; + } + + string log = 1; + State state = 2; +} + +// XXX: this is already an abstract with assumption that the tool need and only need files as input to start. +// But in fact, some tool need config files that is independent of data files passed in. +// We may also want to extend this as a tool can be a combination of multiple tool such as resource provider tool. +// This interface high likely will be full re-definded. +message LaunchRequest { + ToolMeta tool = 1; + repeated FileEntry files = 2; +} + +message User_Id { + string inner = 1; +} + +message ToolHandler { + ToolStatus state = 1; + User_Id owner = 2; + string id = 3; +} + +message LaunchResponse { + ToolHandler handler = 1; +} +message StatusMonitorRequest { + string id = 1; +} +message StatusMonitorResponse { + ToolStatus status = 1; +} + +message DropRequest { + string id = 1; +} +message DropResponse { + string ack = 1; +} + +message QueryUserRequest { + User_Id user_id = 1; +} +message QueryUserResponse { + ToolHandler th = 1; +} + +message GetArtifactRequest { + string handler_id = 1; +} + +message HostedTool { + string callback_url = 1; +} + +message EoscInlineTool { + string callback_url = 1; +} + +message DesktopTool { + // ?? how?? +} + +message GetArtifactResponse { + oneof entry_point { + EoscInlineTool eosc_inline = 1; + HostedTool hosted = 2; + } +} + +// this is the service in front of dispatcher or say all other real vre artifact. +service DataplayerService { + // launch the tool + rpc Launch(LaunchRequest) returns (LaunchResponse); + + // find all instance of a user, kv table search, in-memory if empharpha or using sqlite to FS if persistence needed. + rpc Query(QueryUserRequest) returns (QueryUserResponse); + + // Get the artifact from id. + rpc GetArtifact(GetArtifactRequest) returns (GetArtifactResponse); + + // check the status of one running session + rpc StatusMonitor(StatusMonitorRequest) returns (StatusMonitorResponse); + + // ??? who should drop? vre call drop or from edc system call drop? + // Is the use case always after the control move to vre, it never goes back to eosc? + rpc Drop(DropRequest) returns (DropResponse); +} + +// -------------- End of Dataplayer Service -------------- + diff --git a/src/client.rs b/src/client.rs index b8b930e..00b8aa1 100644 --- a/src/client.rs +++ b/src/client.rs @@ -1,10 +1,18 @@ -use req_packager::grpc::{dataset_service_client::DatasetServiceClient, BrowseDatasetRequest}; +use req_packager::grpc::{ + assemble_service_client::AssembleServiceClient, + dataplayer_service_client::DataplayerServiceClient, + dataset_service_client::DatasetServiceClient, get_artifact_response::EntryPoint, + tool_service_client::ToolServiceClient, tool_service_server::ToolService, BrowseDatasetRequest, + BrowseDatasetResponse, EoscInlineTool, GetArtifactRequest, HostedTool, PackageAssembleRequest, +}; #[tokio::main] async fn main() -> Result<(), Box> { - // TODO: + // TODO: // - client list all the files in a dataset - // - client select some files and ask "what vre should I use"? + // |- for every file, client go and get a lightweight tool for preview it + // - client select some files and ask (throttle 200ms) "what vre should I use" (streaming back + // the results)? // - with files and vre, create a ro-crate that contains all information inside (with // information of which file is in which vre input slots). // - client get the realtime status update from VRE and get a callback link send back when it @@ -24,5 +32,36 @@ async fn main() -> Result<(), Box> { println!("resp: {:?}", resp); } + // assemble the package from 1. selected files, 2. the selected vre. 3. misc config if there + // are some. same information used to construct the ro-crate on the client side. Use the same function (in + // shared util, that is the request package), to construct the ro-crate in the grpc server + // side (the one in front of dispatcher). (is the ro-crate very very important? it is actually make the interface flasky, + // because it is not programmatically type safe). + // The idea on ro-crate: the ro-crate will not be transfered over tcp wire but it is constructed in the + // both end using the same function. This make the two ends can use strong type system to + // formalize the message instead of using ro-crate which is not so easy to work with. + + let mut client = ToolServiceClient::connect("http://[::1]:50051").await?; + // TODO: request to find tools from selected files + let resp = client.find_tools(request).await?.into_inner(); + let tools = resp.tools; + + let mut client = DataplayerServiceClient::connect("http://[::1]:50051").await?; + let tool = tools[1]; + let resp = client.launch(request).await?.into_inner(); + let tool_handler = resp.handler; + if let Some(handler) = tool_handler { + let id = handler.id; + let req = GetArtifactRequest { + handler_id: todo!(), + }; + let artifact = client.get_artifact(req).await?.into_inner(); + let ep = artifact.entry_point.unwrap(); + let callback_url = match ep { + EntryPoint::EoscInline(t) => t.callback_url, + EntryPoint::Hosted(t) => t.callback_url, + }; + } + Ok(()) } diff --git a/src/lib.rs b/src/lib.rs index 6782c94..648c132 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -5,14 +5,11 @@ use futures_util::{StreamExt, TryStreamExt}; use futures_core::stream::BoxStream; use grpc::{ - assemble_service_server::AssembleService, browse_dataset_response::{BrowsePhase, Event}, browse_error::ErrorCode, dataset_service_server::DatasetService, - vre_entry::EntryPoint, BrowseComplete, BrowseDatasetRequest, BrowseDatasetResponse, BrowseError, BrowseProgress, - DatasetInfo, FileEntry, PackageAssembleRequest, PackageAssembleResponse, VreEntry, - VreEoscInline, VreHosted, + DatasetInfo, FileEntry, }; use prost_types::Timestamp; @@ -271,7 +268,7 @@ pub trait DispatcherClient: Send + Sync + 'static { async fn launch(&self, p: LaunchRequset) -> anyhow::Result; } -pub struct ReqPackAssembler { +pub struct RequestPackager { pub tool_registry: Arc, pub dispacher: Arc, } @@ -286,7 +283,7 @@ pub struct ReqPackAssembler { // For vres that need to be launched through dispatcher, the request is blocking until the vre is // ready. We use grpc so other rpc calls are not blocked. #[tonic::async_trait] -impl AssembleService for ReqPackAssembler { +impl AssembleService for RequestPackager { // XXX: this rpc call may need to be separated into two calls, one use streams to get all // information needed include resources whose necessity depends on the type of tools. // Then send a whole pack and return resp after launch the vre. From ee8d40a73f280aefab1f53320a71f10d9822923d Mon Sep 17 00:00:00 2001 From: Jusong Yu Date: Mon, 2 Mar 2026 09:44:23 +0100 Subject: [PATCH 14/14] rebase after merge --- Cargo.lock | 351 +++++++++++++++++++++++++++++++++++++++- Cargo.toml | 25 ++- src/{lib.rs => lib_.rs} | 0 src/main.rs | 1 + 4 files changed, 366 insertions(+), 11 deletions(-) rename src/{lib.rs => lib_.rs} (100%) diff --git a/Cargo.lock b/Cargo.lock index 5d0c9cc..b03d100 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -73,16 +73,19 @@ checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" [[package]] name = "axum" -version = "0.8.7" +version = "0.8.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b098575ebe77cb6d14fc7f32749631a6e44edbef6b796f89b020e99ba20d425" +checksum = "8b52af3cb4058c895d37317bb27508dccc8e5f2d39454016b297bf4a400597b8" dependencies = [ "axum-core", "bytes", + "form_urlencoded", "futures-util", "http", "http-body", "http-body-util", + "hyper", + "hyper-util", "itoa", "matchit", "memchr", @@ -90,10 +93,15 @@ dependencies = [ "percent-encoding", "pin-project-lite", "serde_core", + "serde_json", + "serde_path_to_error", + "serde_urlencoded", "sync_wrapper", + "tokio", "tower", "tower-layer", "tower-service", + "tracing", ] [[package]] @@ -112,6 +120,7 @@ dependencies = [ "sync_wrapper", "tower-layer", "tower-service", + "tracing", ] [[package]] @@ -126,6 +135,25 @@ version = "2.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "812e12b5285cc515a9c72a5c1d3b6d46a19dac5acfef5265968c166106e31dd3" +[[package]] +name = "block-buffer" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" +dependencies = [ + "generic-array", +] + +[[package]] +name = "bstr" +version = "1.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "63044e1ae8e69f3b5a92c736ca6269b8d12fa7efe39bf34ddb06d102cf0e2cab" +dependencies = [ + "memchr", + "serde", +] + [[package]] name = "bumpalo" version = "3.19.1" @@ -161,7 +189,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6f8d983286843e49675a4b7a2d174efe136dc93a18d69130dd18198a6c167601" dependencies = [ "cfg-if", - "cpufeatures", + "cpufeatures 0.3.0", "rand_core", ] @@ -185,6 +213,15 @@ version = "0.8.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" +[[package]] +name = "cpufeatures" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59ed5838eebb26a2bb2e58f6d5b5316989ae9d08bab10e0e6d103e656d1b0280" +dependencies = [ + "libc", +] + [[package]] name = "cpufeatures" version = "0.3.0" @@ -194,6 +231,51 @@ dependencies = [ "libc", ] +[[package]] +name = "crossbeam-deque" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51" +dependencies = [ + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" + +[[package]] +name = "crypto-common" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78c8292055d1c1df0cce5d180393dc8cce0abec0a7102adb6c7b1eef6016d60a" +dependencies = [ + "generic-array", + "typenum", +] + +[[package]] +name = "digest" +version = "0.10.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" +dependencies = [ + "block-buffer", + "crypto-common", +] + [[package]] name = "displaydoc" version = "0.2.5" @@ -318,6 +400,16 @@ dependencies = [ "slab", ] +[[package]] +name = "generic-array" +version = "0.14.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" +dependencies = [ + "typenum", + "version_check", +] + [[package]] name = "getrandom" version = "0.3.4" @@ -344,6 +436,30 @@ dependencies = [ "wasip3", ] +[[package]] +name = "globset" +version = "0.4.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52dfc19153a48bde0cbd630453615c8151bce3a5adfac7a0aebfbf0a1e1f57e3" +dependencies = [ + "aho-corasick", + "bstr", + "log", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "globwalk" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0bf760ebf69878d9fd8f110c89703d90ce35095324d1f1edcb595c63945ee757" +dependencies = [ + "bitflags", + "ignore", + "walkdir", +] + [[package]] name = "h2" version = "0.4.12" @@ -417,6 +533,12 @@ dependencies = [ "pin-project-lite", ] +[[package]] +name = "http-range-header" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9171a2ea8a68358193d15dd5d70c1c10a2afc3e7e4c5bc92bc9f025cebd7359c" + [[package]] name = "httparse" version = "1.10.1" @@ -429,6 +551,15 @@ version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" +[[package]] +name = "humansize" +version = "2.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6cb51c9a029ddc91b07a787f1d86b53ccfa49b0e86688c946ebe8d3555685dd7" +dependencies = [ + "libm", +] + [[package]] name = "hyper" version = "1.8.1" @@ -618,6 +749,22 @@ dependencies = [ "icu_properties", ] +[[package]] +name = "ignore" +version = "0.4.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3d782a365a015e0f5c04902246139249abf769125006fbe7649e2ee88169b4a" +dependencies = [ + "crossbeam-deque", + "globset", + "log", + "memchr", + "regex-automata", + "same-file", + "walkdir", + "winapi-util", +] + [[package]] name = "indexmap" version = "2.12.1" @@ -655,6 +802,12 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "lazy_static" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" + [[package]] name = "leb128fmt" version = "0.1.0" @@ -667,6 +820,12 @@ version = "0.2.178" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37c93d8daa9d8a012fd8ab92f088405fb202ea0b6ab73ee2482ae66af4f42091" +[[package]] +name = "libm" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6d2cec3eae94f9f509c767b45932f1ada8350c4bdb85af2fcab4a3c14807981" + [[package]] name = "linux-raw-sys" version = "0.11.0" @@ -703,6 +862,16 @@ version = "0.3.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" +[[package]] +name = "mime_guess" +version = "2.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f7c44f8e672c00fe5308fa235f821cb4198414e1c77935c1ab6948d3fd78550e" +dependencies = [ + "mime", + "unicase", +] + [[package]] name = "mio" version = "1.1.1" @@ -741,6 +910,49 @@ version = "2.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220" +[[package]] +name = "pest" +version = "2.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e0848c601009d37dfa3430c4666e147e49cdcf1b92ecd3e63657d8a5f19da662" +dependencies = [ + "memchr", + "ucd-trie", +] + +[[package]] +name = "pest_derive" +version = "2.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "11f486f1ea21e6c10ed15d5a7c77165d0ee443402f0780849d1768e7d9d6fe77" +dependencies = [ + "pest", + "pest_generator", +] + +[[package]] +name = "pest_generator" +version = "2.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8040c4647b13b210a963c1ed407c1ff4fdfa01c31d6d2a098218702e6664f94f" +dependencies = [ + "pest", + "pest_meta", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "pest_meta" +version = "2.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "89815c69d36021a140146f26659a81d6c2afa33d216d736dd4be5381a7362220" +dependencies = [ + "pest", + "sha2", +] + [[package]] name = "petgraph" version = "0.7.1" @@ -953,19 +1165,23 @@ dependencies = [ "anyhow", "async-stream", "async-trait", + "axum", "chrono", "futures-core", "futures-util", + "humansize", "prost", "prost-types", "rand", "serde", "serde_json", + "tera", "tokio", "tokio-stream", "tonic", "tonic-prost", "tonic-prost-build", + "tower-http", "url", "uuid", ] @@ -995,6 +1211,15 @@ version = "1.0.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "62049b2877bf12821e8f9ad256ee38fdc31db7387ec2d3b3f403024de2034aea" +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + [[package]] name = "semver" version = "1.0.27" @@ -1044,6 +1269,40 @@ dependencies = [ "serde_core", ] +[[package]] +name = "serde_path_to_error" +version = "0.1.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "10a9ff822e371bb5403e391ecd83e182e0e77ba7f6fe0160b795797109d1b457" +dependencies = [ + "itoa", + "serde", + "serde_core", +] + +[[package]] +name = "serde_urlencoded" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3491c14715ca2294c4d6a88f15e84739788c1d030eed8c110436aafdaa2f3fd" +dependencies = [ + "form_urlencoded", + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "sha2" +version = "0.10.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283" +dependencies = [ + "cfg-if", + "cpufeatures 0.2.17", + "digest", +] + [[package]] name = "shlex" version = "1.3.0" @@ -1119,6 +1378,22 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "tera" +version = "1.20.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8004bca281f2d32df3bacd59bc67b312cb4c70cea46cbd79dbe8ac5ed206722" +dependencies = [ + "globwalk", + "lazy_static", + "pest", + "pest_derive", + "regex", + "serde", + "serde_json", + "unicode-segmentation", +] + [[package]] name = "tinystr" version = "0.8.2" @@ -1266,6 +1541,32 @@ dependencies = [ "tracing", ] +[[package]] +name = "tower-http" +version = "0.6.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4e6559d53cc268e5031cd8429d05415bc4cb4aefc4aa5d6cc35fbf5b924a1f8" +dependencies = [ + "bitflags", + "bytes", + "futures-core", + "futures-util", + "http", + "http-body", + "http-body-util", + "http-range-header", + "httpdate", + "mime", + "mime_guess", + "percent-encoding", + "pin-project-lite", + "tokio", + "tokio-util", + "tower-layer", + "tower-service", + "tracing", +] + [[package]] name = "tower-layer" version = "0.3.3" @@ -1284,6 +1585,7 @@ version = "0.1.43" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2d15d90a0b5c19378952d479dc858407149d7bb45a14de0142f6c534b16fc647" dependencies = [ + "log", "pin-project-lite", "tracing-attributes", "tracing-core", @@ -1315,6 +1617,18 @@ version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" +[[package]] +name = "typenum" +version = "1.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "562d481066bde0658276a35467c4af00bdc6ee726305698a55b86e61d7ad82bb" + +[[package]] +name = "ucd-trie" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2896d95c02a80c6d6a5d6e953d479f5ddf2dfdb6a244441010e373ac0fb88971" + [[package]] name = "unicase" version = "2.8.1" @@ -1327,6 +1641,12 @@ version = "1.0.22" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5" +[[package]] +name = "unicode-segmentation" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493" + [[package]] name = "unicode-xid" version = "0.2.6" @@ -1362,6 +1682,22 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "version_check" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" + +[[package]] +name = "walkdir" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" +dependencies = [ + "same-file", + "winapi-util", +] + [[package]] name = "want" version = "0.3.1" @@ -1474,6 +1810,15 @@ dependencies = [ "semver", ] +[[package]] +name = "winapi-util" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" +dependencies = [ + "windows-sys 0.61.2", +] + [[package]] name = "windows-core" version = "0.62.2" diff --git a/Cargo.toml b/Cargo.toml index 1f64e12..3644fcd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,16 +1,19 @@ [package] name = "req_packager" -name = "poc-ui" version = "0.1.0" edition = "2021" -[[bin]] -name = "rp_server" -path = "src/server.rs" +# [[bin]] +# name = "rp_server" +# path = "src/server.rs" +# +# [[bin]] +# name = "rp_client" +# path = "src/client.rs" [[bin]] -name = "rp_client" -path = "src/client.rs" +name = "ui-demo" +path = "src/main.rs" [dependencies] anyhow = "1.0.100" @@ -22,14 +25,20 @@ futures-util = "0.3.31" prost = "0.14.1" prost-types = "0.14.1" rand = "0.10.0" -serde = { version = "1.0.228", features = ["derive"] } serde_json = "1.0.146" -tokio = { version = "1.48.0", features = ["macros", "rt-multi-thread"] } tokio-stream = "0.1.17" tonic = "0.14.2" tonic-prost = "0.14.2" url = "2.5.8" uuid = { version = "1.21.0", features = ["v4"] } +# ui +axum = { version = "0.8.8", features = ["tokio", "http2"] } +tower-http = { version = "0.6.8", features = ["fs"] } +tera = { version = "1", default-features = false } +humansize = "2.1.3" +# shared +tokio = { version = "1.48.0", features = ["macros", "rt-multi-thread", "rt"] } +serde = { version = "1.0.228", features = ["derive"] } [build-dependencies] tonic-prost-build = "*" diff --git a/src/lib.rs b/src/lib_.rs similarity index 100% rename from src/lib.rs rename to src/lib_.rs diff --git a/src/main.rs b/src/main.rs index a700342..0bd9b4c 100644 --- a/src/main.rs +++ b/src/main.rs @@ -351,6 +351,7 @@ async fn main() { let app = Router::new() .nest_service("/assets", ServeDir::new("ui/assets")) .route("/search-result", get(search_result)) + .route("/", get(search_result)) .route("/datasets/{id}", get(dataset)) .route("/datasets/{id}/repo", get(inspect_dataset_repo)) .route("/repo-additional", get(repo_additional))