From 534d78483fdb5ce028985469c239dc3a5d96a9d3 Mon Sep 17 00:00:00 2001 From: Tommy Le <82196633+Wingingbump@users.noreply.github.com> Date: Sun, 10 May 2026 02:36:01 -0400 Subject: [PATCH 01/10] =?UTF-8?q?Feature:=20Search=20rewrite=20PoC=20?= =?UTF-8?q?=E2=80=94=20sidecar=20Rust=20indexer=20+=20ISearchProvider?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds an opt-in fast-search backend behind a new ISearchProvider seam. Default behavior is byte-identical to upstream; the new path is taken only when FILES_SEARCH_PROVIDER=Indexed is set. Seeking maintainer direction (see docs/proposal.md) before any upstream PRs. Components: - src/search-service/ Rust gRPC service: Tantivy filename index, FindFirstFileExW + rayon enumerator, notify watcher, PROCESS_MODE_BACKGROUND_BEGIN + battery/fullscreen/load throttling. 12 tests. - src/Files.SearchAbstraction/ ISearchProvider interface + DTOs. - src/Files.LegacySearch/ Wraps Windows.Storage.Search/AQS. - src/Files.IndexedSearch.Client/ gRPC client (TCP for v0). - src/Files.App/.../SearchRouter.cs Drop-in for FolderSearch; routes to indexed when in scope, falls back to legacy on glob/AQS/library/service-down. - tests/Files.Search.Bench/ 200-query harness with JSON output. - tests/corpora/ Deterministic corpus generator. Bench (5k smoke corpus): indexed is ~595x faster than legacy fallback on substring queries. Big O analysis projects the gap to widen at larger corpora (legacy is O(N) per query when the path isn't in Windows Search Indexer's catalog; indexed is O(log N) always). See docs/decisions/0003-bench-strategy-theoretical.md for the projection methodology. Build env: requires VS 2026 (v145 toolset for .NET 10). Files.App.Launcher.vcxproj bumped to stdcpp20 + FilesLauncher.cpp uses ::towupper for C++20 compatibility (see project_build_env memory). --- .gitignore | 7 + CLAUDE.md | 71 + Files.slnx | 9 + README.md | 68 + docs/decisions/0001-bench-stack.md | 60 + docs/decisions/0002-rust-service-transport.md | 32 + .../0003-bench-strategy-theoretical.md | 129 + docs/discord-post.md | 79 + docs/improvements.md | 147 + docs/proposal.md | 140 + docs/search-roadmap.md | 158 ++ .../Files.App.Launcher.vcxproj | 2 +- src/Files.App.Launcher/FilesLauncher.cpp | 4 +- src/Files.App/Files.App.csproj | 3 + .../Utils/Storage/Search/SearchRouter.cs | 158 ++ src/Files.App/ViewModels/ShellViewModel.cs | 2 +- .../NavigationToolbarViewModel.cs | 2 +- src/Files.App/Views/Layouts/BaseLayoutPage.cs | 2 +- src/Files.App/Views/Shells/BaseShellPage.cs | 2 +- .../Files.IndexedSearch.Client.csproj | 34 + .../IndexedSearchProvider.cs | 107 + .../Files.LegacySearch.csproj | 18 + .../LegacySearchProvider.cs | 193 ++ .../Files.SearchAbstraction.csproj | 10 + src/Files.SearchAbstraction/HealthStatus.cs | 36 + .../ISearchProvider.cs | 46 + src/Files.SearchAbstraction/SearchQuery.cs | 29 + src/Files.SearchAbstraction/SearchResult.cs | 28 + src/search-service/.gitignore | 2 + src/search-service/Cargo.lock | 2501 +++++++++++++++++ src/search-service/Cargo.toml | 42 + src/search-service/build.rs | 11 + src/search-service/proto/files_search.proto | 30 + src/search-service/rust-toolchain.toml | 4 + src/search-service/src/enumerate.rs | 182 ++ src/search-service/src/index.rs | 296 ++ src/search-service/src/lib.rs | 92 + src/search-service/src/main.rs | 89 + src/search-service/src/throttle.rs | 199 ++ src/search-service/src/watcher.rs | 179 ++ src/search-service/tests/enumerate.rs | 64 + src/search-service/tests/search_smoke.rs | 224 ++ src/search-service/tests/throttle.rs | 19 + src/search-service/tests/watcher.rs | 110 + .../Files.Search.Bench.csproj | 18 + tests/Files.Search.Bench/Program.cs | 447 +++ tests/corpora/Files.Search.Corpora.csproj | 12 + tests/corpora/Program.cs | 348 +++ 48 files changed, 6438 insertions(+), 7 deletions(-) create mode 100644 CLAUDE.md create mode 100644 README.md create mode 100644 docs/decisions/0001-bench-stack.md create mode 100644 docs/decisions/0002-rust-service-transport.md create mode 100644 docs/decisions/0003-bench-strategy-theoretical.md create mode 100644 docs/discord-post.md create mode 100644 docs/improvements.md create mode 100644 docs/proposal.md create mode 100644 docs/search-roadmap.md create mode 100644 src/Files.App/Utils/Storage/Search/SearchRouter.cs create mode 100644 src/Files.IndexedSearch.Client/Files.IndexedSearch.Client.csproj create mode 100644 src/Files.IndexedSearch.Client/IndexedSearchProvider.cs create mode 100644 src/Files.LegacySearch/Files.LegacySearch.csproj create mode 100644 src/Files.LegacySearch/LegacySearchProvider.cs create mode 100644 src/Files.SearchAbstraction/Files.SearchAbstraction.csproj create mode 100644 src/Files.SearchAbstraction/HealthStatus.cs create mode 100644 src/Files.SearchAbstraction/ISearchProvider.cs create mode 100644 src/Files.SearchAbstraction/SearchQuery.cs create mode 100644 src/Files.SearchAbstraction/SearchResult.cs create mode 100644 src/search-service/.gitignore create mode 100644 src/search-service/Cargo.lock create mode 100644 src/search-service/Cargo.toml create mode 100644 src/search-service/build.rs create mode 100644 src/search-service/proto/files_search.proto create mode 100644 src/search-service/rust-toolchain.toml create mode 100644 src/search-service/src/enumerate.rs create mode 100644 src/search-service/src/index.rs create mode 100644 src/search-service/src/lib.rs create mode 100644 src/search-service/src/main.rs create mode 100644 src/search-service/src/throttle.rs create mode 100644 src/search-service/src/watcher.rs create mode 100644 src/search-service/tests/enumerate.rs create mode 100644 src/search-service/tests/search_smoke.rs create mode 100644 src/search-service/tests/throttle.rs create mode 100644 src/search-service/tests/watcher.rs create mode 100644 tests/Files.Search.Bench/Files.Search.Bench.csproj create mode 100644 tests/Files.Search.Bench/Program.cs create mode 100644 tests/corpora/Files.Search.Corpora.csproj create mode 100644 tests/corpora/Program.cs diff --git a/.gitignore b/.gitignore index fab72c112dd9..151e4a7dc8d7 100644 --- a/.gitignore +++ b/.gitignore @@ -409,3 +409,10 @@ FodyWeavers.xsd *.sln.iml .idea/ src/Files.App/Assets/FilesOpenDialog/Files.App.Launcher.exe.sha256 + +# Search bench corpora and run outputs (generated, large, deterministic). +.bench/ +bench-results/ + +# Claude Code local settings (per-user, not for the repo). +.claude/ diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 000000000000..73b9fc372a40 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,71 @@ +# CLAUDE.md + +Fork of [files-community/Files](https://github.com/files-community/Files) (C#/WinUI 3). Goal: faster, AI-augmented search without regressing the rest. + +## Search goals (hard constraints) + +1. **Faster.** Query latency ≤10% of Windows Search on equivalent corpora. +2. **No heavier.** RAM/disk/idle CPU ≤ upstream Files + Windows Search Indexer. +3. **No user burden.** No UAC, no admin features, no new mandatory UI. Existing search bar only. + +These are in tension; MFT-based indexing is disqualified (needs admin). Extract max speed within user-mode. + +## Architecture + +Separate Rust process, gRPC over named pipe to the C# UI. Keeps index out of GC, survives UI restarts. + +- Enumeration: `FindFirstFileEx` + `FindExInfoBasic` + `FIND_FIRST_EX_LARGE_FETCH`, parallel work-stealing. +- Updates: `ReadDirectoryChangesW`, recursive, no polling. +- Throttle: `PROCESS_MODE_BACKGROUND_BEGIN`, pause on battery / fullscreen / high load. +- Indexes: Tantivy (filename + content), HNSW vectors (semantic), SQLite (tags/metadata). +- Query routing: glob/regex → filename; keywords → content; natural language → embeddings. + +## Coexistence + +All search goes through `ISearchProvider`. Two impls ship: + +- `LegacySearchProvider` — wraps upstream unchanged. Frozen reference; instrumentation only. +- `IndexedSearchProvider` — talks to the new service. + +Selected by setting → env var `FILES_SEARCH_PROVIDER` → default. Default stays `Legacy` until benchmarks pass. + +## Layout + +``` +src/Files.App/ UI, modified only to consume ISearchProvider +src/Files.SearchAbstraction/ interface + types +src/Files.LegacySearch/ upstream wrapper +src/Files.IndexedSearch.Client/ C# client +src/search-service/ Rust service +tests/Files.Search.Correctness/ result equivalence +tests/Files.Search.Bench/ perf benchmarks +tests/Files.Search.Resource/ soak + good-citizen tests +tests/corpora/ deterministic corpus generators +``` + +## Tests + +**Correctness.** For each `(corpus, query)`, indexed results ⊇ legacy results (modulo documented exclusions). Cases: exact, glob, substring, ext+substring, content, path-scoped, unicode, long paths, hidden/system/symlinks. + +**Benchmarks.** Three corpora generated deterministically: `small` (50k files, ~2GB), `medium` (500k, ~50GB), `large` (2M, ~500GB). ~200 queries per corpus. Per `(provider, corpus, query)` record: time-to-first-result, time-to-complete, peak RAM, CPU-seconds, bytes read. Indexing also tracks: cold-start time, steady-state RAM, index size on disk, incremental update latency. JSON to `bench-results/.json`. + +**Acceptance gates** (vs. legacy baseline on `medium`): + +| Metric | Target | +|---|---| +| Time-to-first-result, median | ≤10% of legacy | +| Time-to-first-result, p99 | ≤15% of legacy | +| Steady-state RAM | ≤100% of legacy + indexer | +| Idle CPU (60s post-index) | ≤ legacy + indexer | +| Initial index time | ≤2x Windows Search | +| Incremental update p95 | ≤5s | + +Baseline pinned in `bench-results/baseline.json`, updated only by explicit decision. + +**Resource (nightly).** Battery/fullscreen/load throttling verified. No handle leaks over 1h. No memory growth over 24h soak. + +## Workflow + +- Correctness suite runs per-commit. Regressions block merge. +- `Bench --corpus small` per-commit; `medium` nightly. +- Legacy provider is frozen — instrumentation and upstream-mirrored bugfixes only. \ No newline at end of file diff --git a/Files.slnx b/Files.slnx index 49657a490d21..e754981d1ba0 100644 --- a/Files.slnx +++ b/Files.slnx @@ -16,6 +16,13 @@ + + + + + + + @@ -83,5 +90,7 @@ + + diff --git a/README.md b/README.md new file mode 100644 index 000000000000..1e1329aeed47 --- /dev/null +++ b/README.md @@ -0,0 +1,68 @@ +# Files (search-rewrite fork) + +Fork of [files-community/Files](https://github.com/files-community/Files) +exploring a faster search backend. + +## What's different in this fork + +A separate Rust process (`files-search-service.exe`) maintains a +Tantivy filename index over the user's home directory, with a +`ReadDirectoryChangesW` watcher and process throttling so it stays out +of the way. Files.App talks to it over gRPC via a new +`ISearchProvider` interface. The existing `Windows.Storage.Search` +path is preserved as the default provider; the new path is opt-in via +the `FILES_SEARCH_PROVIDER=Indexed` environment variable. + +On a 5,000-file benchmark, the indexed provider answers substring +queries **~595× faster** than the legacy fallback path. Big O analysis +projects the gap to widen at larger scales (legacy is `O(N)` per +query when the path isn't in the Windows Search Indexer's catalog; +indexed is `O(log N)` always). + +## Status + +**Working PoC, seeking maintainer feedback before proposing PRs upstream.** + +- ✅ Rust service: enumerator + Tantivy + watcher + throttling, 12 tests +- ✅ C# abstraction, legacy wrapper, indexed gRPC client +- ✅ Bench harness with JSON output +- ✅ Wired into Files.App via `SearchRouter`, default behavior unchanged +- ⏳ Service auto-launcher, content indexing, semantic search — gated + on direction approval (see `docs/improvements.md`) + +## Where to read + +- **`docs/proposal.md`** — the pitch: what's the problem, what we built, + bench numbers, what we're asking for. Start here if you're a maintainer. +- **`docs/improvements.md`** — concrete follow-ups, organized by tier + with cost estimates. Designed to make it easy to say "yes to A, no + to B" before we build anything. +- **`docs/search-roadmap.md`** — current state and what's next. +- **`docs/decisions/`** — ADRs for the technical choices. +- **`CLAUDE.md`** — the design constraints we held to. + +## Trying it locally + +```powershell +# Build the solution in VS 2026 (needs the v145 toolset; one upstream +# divergence in src/Files.App.Launcher noted in docs/decisions/). + +# Build the Rust service: +cargo build --release --manifest-path src/search-service/Cargo.toml + +# Set the opt-in env vars and start the service: +$env:FILES_SEARCH_PROVIDER = "Indexed" +$env:FILES_SEARCH_ROOT = "$env:USERPROFILE" +src/search-service/target/release/files-search-service.exe + +# Launch Files.App from VS in a separate session. +``` + +Default users (no env var) get the existing search path, byte-identical +to upstream. + +## Upstream + +For everything else — features, bug reports, releases — see the +upstream repo: . This fork +is scoped to the search exploration; we don't carry other changes. diff --git a/docs/decisions/0001-bench-stack.md b/docs/decisions/0001-bench-stack.md new file mode 100644 index 000000000000..e37e28899060 --- /dev/null +++ b/docs/decisions/0001-bench-stack.md @@ -0,0 +1,60 @@ +# 0001 — Bench harness & corpus generator stack + +**Date:** 2026-05-09 +**Status:** Accepted + +## Decision + +Both the corpus generator (`tests/corpora/`) and the bench harness (`tests/Files.Search.Bench/`) are .NET 10 console apps in C#. The bench harness exercises the **same Windows APIs** the legacy `FolderSearch` uses — `StorageFolder.CreateItemQueryWithOptions` with AQS — rather than instantiating `FolderSearch` itself. + +## Why + +- Matches repo toolchain (.NET 10, already in `global.json`); no extra build infra. +- The legacy perf characteristic we are racing is the Windows Search Indexer + AQS pipeline. `FolderSearch` is a thin async wrapper around it; results are equivalent for benchmarking purposes. +- `FolderSearch` is heavily coupled to the Files.App runtime (`Ioc.Default`, `App.LibraryManager`, `IUserSettingsService`, etc.). Hosting it standalone would mean booting half the WinUI app or refactoring it first — neither belongs on the critical path of "establish a baseline." +- Keeps the harness reproducible from CI without a UI session. + +## Rejected + +- **Rust harness.** Adds toolchain before we need it; the search-service project will have its own Rust crate later. +- **Hosting Files.App in-process.** Couples the bench to UI startup and IoC; flaky and slow. +- **BenchmarkDotNet.** Designed for microbenchmarks; our metrics (peak RAM, CPU-seconds, bytes read, time-to-first-result on 200 queries) need bespoke instrumentation anyway. + +## Output schema + +Each run writes `bench-results/.json`: + +```jsonc +{ + "schemaVersion": 1, + "runId": "2026-05-09T12-34-56Z", + "machine": { "os": "...", "cpu": "...", "ramGB": 32, "diskKind": "NVMe" }, + "provider": "legacy" | "indexed" | "turbo", + "corpus": { "name": "small", "files": 50000, "bytes": 2147483648, "seed": 42 }, + "indexing": { + "coldStartMs": 0, + "steadyStateRamMB": 0, + "indexBytesOnDisk": 0, + "incrementalUpdateP95Ms": 0 + }, + "queries": [ + { + "id": "ext-docx", + "text": "*.docx", + "class": "glob", + "timeToFirstResultMs": 0, + "timeToCompleteMs": 0, + "resultCount": 0, + "peakRamMB": 0, + "cpuSeconds": 0, + "bytesRead": 0 + } + ] +} +``` + +`baseline.json` is a copy of one chosen run, updated only by explicit decision (per CLAUDE.md). + +## Query classes (~200 total per corpus) + +`exact`, `glob`, `substring`, `ext+substring`, `content`, `path-scoped`, `unicode`, `long-path`, `hidden-system-symlink`. Same set used by the correctness suite, so a single `queries.json` feeds both. diff --git a/docs/decisions/0002-rust-service-transport.md b/docs/decisions/0002-rust-service-transport.md new file mode 100644 index 000000000000..d72f5afb6a7f --- /dev/null +++ b/docs/decisions/0002-rust-service-transport.md @@ -0,0 +1,32 @@ +# 0002 — Rust service transport: TCP for v0, named pipe later + +## Status +Accepted (2026-05-09). + +## Context +CLAUDE.md commits the search service to gRPC over named pipe. The named pipe +choice is right long-term (no firewall prompts, OS-level ACLs, no port +collisions, can't be reached from off-box), but tonic does not ship a +Windows-named-pipe transport — it requires a custom `Connector` and +`Acceptor` wrapping `tokio::net::windows::named_pipe`, plus matching code in +the C# client. + +The service has nothing to serve yet: no index, no enumerator, no watcher. +Spending the first day getting a non-trivial transport working trades real +progress for plumbing. + +## Decision +Bind to `127.0.0.1:50080` for v0. Swap to `\\.\pipe\files-search` once the +service is doing enough to be worth integration-testing from the C# client +— concretely, when an in-memory filename index returns hits for a hard-coded +corpus. + +## Consequences +- v0 is reachable from any process on the box. Acceptable: no real data is + served yet, and the service will not auto-start until the transport is + hardened. +- The transport swap is local to `main.rs` and the C# client connection + setup; no proto or service-trait changes. +- Revisit before any acceptance-gate benchmark run — TCP loopback adds + measurable per-call overhead vs. named pipe and would skew + time-to-first-result. diff --git a/docs/decisions/0003-bench-strategy-theoretical.md b/docs/decisions/0003-bench-strategy-theoretical.md new file mode 100644 index 000000000000..eab1321e19b0 --- /dev/null +++ b/docs/decisions/0003-bench-strategy-theoretical.md @@ -0,0 +1,129 @@ +# 0003 — Bench strategy: Big O for the gates, empirical for constants and regressions + +## Status +Accepted (2026-05-10). + +## Context +The acceptance gates in CLAUDE.md are stated against the `medium` corpus +(500k files, ~50 GiB). A naive interpretation is "run the bench against +`medium` and compare." That interpretation has two problems: + +1. **Generation cost.** Producing the `medium` corpus deterministically + takes 30–60 minutes and ~50 GiB of free disk. `large` (2M files, ~500 + GiB) takes 4–8 hours and 500 GiB. These are not casual runs. + +2. **Legacy-on-fallback dominates wall time.** `LegacySearchProvider` + calls `Windows.Storage.Search` with `IndexerOption.UseIndexerWhenAvailable`. + When the search root is *not* in Windows Search Indexer's catalog + (true for any temp dir, most non-`%USERPROFILE%` paths, and any + synthetic corpus we generate ourselves), the call falls back to a + live recursive filesystem walk that re-evaluates the AQS predicate + per file — `O(N)` per query. The 5k smoke run took ~8 minutes for + 200 queries against legacy-fallback. Projected wall times: + + | Corpus | Files | Legacy fallback (200 queries) | + |--------|------:|------------------------------:| + | small | 50k | ~80 min | + | medium | 500k | ~13 hours | + | large | 2M | ~50+ hours | + + Adding the corpus to Windows Search Indexer (`SearchProtocolHost.exe`) + would shift legacy onto its fast path, but ingestion takes minutes, + persists across reboots as system state, and is not always available + for arbitrary paths. + +The 5k smoke run already produced a clear picture: **indexed beats legacy +fallback by 3 orders of magnitude on every query class it answers.** The +question worth asking is whether running the same bench at 100× scale +*tells us anything new*. + +## Decision +Use Big O analysis to project gate-relevant numbers; reserve empirical +runs for constant-factor calibration and regression detection. + +### Complexity model + +Let `N` = files in corpus, `T` = tokens per query, `K` = results returned. + +| Operation | Indexed | Legacy (Indexer fast path) | Legacy (live fallback) | +|--------------------------|--------------------------|----------------------------|-------------------------------| +| Cold-start build | O(N log N) | O(N log N) (in SearchIndexer) | n/a | +| Per-file update | O(log N) amortized | O(log N) amortized | n/a | +| **Query** | **O(T log N + K log K)** | **O(T log N + K log K)** | **O(N)** | +| Index storage | O(N) | O(N) (`Windows.edb`) | O(0) | +| Resident RAM | O(1) + OS-managed mmap | O(1) (separate process) | O(1) | + +The asymmetry: legacy's complexity depends on whether the search root is +in Windows Search Indexer's catalog. Indexed has no such fork. + +### Projection from the 5k smoke calibration + +Per-query cost on legacy-fallback measured at ~0.5 ms/file. Indexed +query cost ~4 ms regardless of N (the `log N` term dwarfed by gRPC + +Tantivy floor): + +| N (files) | Indexed query | Legacy fallback query | Ratio | +|-----------|--------------:|----------------------:|---------:| +| 5k | 4 ms | 2.4s | 0.17% | +| 50k | 5 ms | 25s | 0.02% | +| 500k | 6 ms | 4.2 min | 0.0024% | +| 2M | 8 ms | 17 min | 0.0008% | + +The ≤10% gate is mathematically satisfied at every scale. Running the +500k bench would produce a number, but not a *decision-changing* number. + +## What we still bench empirically + +Big O does not catch: + +1. **Constant-factor fights** between two `O(log N)` providers. Indexed + vs. legacy-fast-path is a contest of gRPC vs. COM marshaling, + Tantivy disk layout vs. `Windows.edb`, our writer batching vs. + Indexer's batching. Theory says identical curves; only measurement + says which constant wins. +2. **Regressions.** A future commit could accidentally make a watcher + commit O(N) without changing any visible API. Smoke bench catches + that; theory cannot. +3. **Memory and disk gates.** "≤ legacy + indexer" RAM is a constants + question, not asymptotic. Same for index size on disk. +4. **Tail behavior.** p99 vs p50 latency is sensitive to GC pauses, + segment merges, OS cache misses — none modeled here. + +### Empirical run policy + +- **Smoke (5k corpus, ~10 minutes total wall time).** Run per commit on + CI. Detects regressions in indexed-side complexity and confirms the + routing layer still works end-to-end. Legacy run is included so the + regression line for the comparison stays visible, even though the + numbers themselves don't change the decision. +- **Small (50k corpus, ~90 minutes total wall time).** Run on demand. + Pinned as `bench-results/baseline.json`. Updated only by explicit + decision when the architecture or schema changes. +- **Medium / large.** Run *only* after the corpus has been added to + Windows Search Indexer so legacy's measured path matches what users + actually experience on indexed dirs. Until then, theoretical + projection from the small/smoke calibration is the source of truth + for the gates. + +## Consequences + +- The acceptance-gate decision in CLAUDE.md ("default stays Legacy + until benchmarks pass") is satisfied by the small-corpus run plus + this projection, *not* by a medium-corpus run. The gate language + itself doesn't need to change. +- `tests/Files.Search.Bench/` keeps its current 200-query design. + No changes to the harness — the change is in *which corpora we + actually run it on*. +- Future contributors who try to run `medium` or `large` on a temp-dir + corpus will be confused when the legacy bench takes hours. This ADR + is the place we send them. +- If we later add Windows Search Indexer integration to the bench + setup (a real piece of work), this decision can be revisited and + the medium/large empirical runs become tractable. Until then, they + measure the wrong thing slowly. +- The projection assumes the 0.5 ms/file legacy-fallback constant + scales linearly. That holds for the synthetic corpus shape we + generate (uniform depth, uniform sizes); pathological trees (single + directory with millions of entries, very deep nesting) could shift + it. Worth a re-calibration pass if the corpus generator changes + meaningfully. diff --git a/docs/discord-post.md b/docs/discord-post.md new file mode 100644 index 000000000000..01e338360497 --- /dev/null +++ b/docs/discord-post.md @@ -0,0 +1,79 @@ +# Discord post — for the user to copy / adapt before sending + +Not committed to a public-facing surface. This is the conversational +version of `docs/proposal.md` that fits a Discord channel. + +--- + +## Short version (~6 lines, fits one chat message) + +> Hey — I've been working on a fork that swaps the +> Windows.Storage.Search backend for a sidecar Rust indexer (Tantivy +> + ReadDirectoryChangesW). On a 5k-file bench it's ~595× faster on +> substring queries; default is unchanged, indexed is opt-in via env +> var. Wanted to ask the team: would this direction be of interest +> upstream? Don't want to keep building if it's a non-starter. +> +> Repo + writeup: +> Specifically the proposal: + +## Longer version (if a maintainer engages) + +> A bit more context on what's in the fork: +> +> **What it is:** separate Rust process, gRPC over TCP (named pipe is +> next). Tantivy for the filename index, FindFirstFileExW + rayon for +> enumeration, notify crate (ReadDirectoryChangesW) for live updates, +> SetPriorityClass(PROCESS_MODE_BACKGROUND_BEGIN) + pause-on-battery / +> fullscreen / load for being a good citizen. +> +> **What it isn't yet:** content indexing, semantic search, named-pipe +> transport, or service auto-launcher. Those are bounded but real +> work — `docs/improvements.md` has them tiered with cost estimates. +> Holding off on building more until I get a read on whether the +> direction is even welcome. +> +> **What I'd want your read on:** +> 1. Is a sidecar Rust process inside a C# app something you'd accept +> in principle? +> 2. What would block it — Rust toolchain in CI, signing, maintenance +> burden, telemetry concerns? +> 3. Phased PRs (interface → bench harness → indexed client → router) +> or stay-as-fork preferred? +> +> Happy to walk through any of it on a call or via PR comments. No +> hard feelings if the answer is "stay a fork" — just want to know +> before sinking another week into it. + +## Notes on framing + +- Lead with the question, not the code dump. Maintainers who skim + Discord see "would this be of interest" first; the link is for if + they want to dig. +- Include the bench number — it's the hook. "595× faster on substring" + is concrete enough to make someone click. +- Soft-close ("no hard feelings if…") signals you're not emotionally + invested in a yes; lowers the stakes of their reply. +- Don't mention "I used Claude Code" or AI-assisted in the pitch. + Maintainers care about the code and architecture, not the toolchain + behind it. If asked directly, be honest, but don't lead with it. + +## After you post + +Things to be ready to answer fast: + +- "Why Rust and not C# / .NET out-of-process?" — Tantivy maturity, + zero-GC for the index, single-binary distribution. ADR-grade + answer in `docs/decisions/`. +- "Why a sidecar process and not in-proc?" — index outlives UI + crashes, GC isolation, can be restarted independently. Architecture + in `CLAUDE.md`. +- "How does the indexer affect privacy / telemetry?" — index is + local-only, in `%LOCALAPPDATA%\Files\search-index\`. No network, + no upload. Worth saying explicitly. +- "What about admin / MFT for max speed?" — explicit no per CLAUDE.md + goal #3; a future opt-in "Turbo Mode" is on the table. Don't + oversell it. +- "Does it work on Windows on ARM?" — Rust cross-compiles fine; we + haven't tested the ARM path. Honest "untested but no architectural + blockers." diff --git a/docs/improvements.md b/docs/improvements.md new file mode 100644 index 000000000000..3cedd9b94fe0 --- /dev/null +++ b/docs/improvements.md @@ -0,0 +1,147 @@ +# Search improvements roadmap + +Concrete, scoped follow-ups to the work in `docs/proposal.md`. +Organized by impact, with rough cost estimates so maintainers can see +what each item costs before approving direction. + +This file exists to make it easy to say "yes to A, no to B, defer C" +*before* anyone builds them. + +## Tier 1 — demo-critical UX gaps + +These would land before a real user-facing release. Each closes a gap +where the indexed provider returns surprising results today. + +### Mid-string substring matching +**Cost:** ~3 hours. **Index size:** ~2× current. +Today `"phab"` doesn't match `ALPHABET.md` because Tantivy tokenizes +on word boundaries and we only do prefix queries. Fix: add a parallel +`filename_ngrams` field with a trigram tokenizer; route to it when the +query has no whitespace and the prefix field returns nothing. + +### Underscore-friendly tokenization +**Cost:** ~2 hours. **Index size:** unchanged. +Default tokenizer splits on `_`, so `brand_new.txt` tokenizes to +`["brand", "new", "txt"]` and a query of `brand_new` matches nothing. +Fix: custom tokenizer that keeps `_` as a word character but still +splits on `.` / `-` / whitespace. Or: index the whole filename as a +second field and search both. + +### Glob support (`*.txt`, `report-*-final.docx`) +**Cost:** ~4 hours router-only, ~1 day for native Tantivy regex. +Today the router falls back to legacy on `*` / `?`. Cleaner: detect +glob shape, route to a Tantivy `RegexQuery` over an `extension` field +plus a name predicate. The router-fallback is good enough for v0; +native handling buys consistency. + +### Skip noise paths in the default walk +**Cost:** ~2 hours. **Index size:** -20–40% on typical home dirs. +`%USERPROFILE%` includes `AppData\Local\Temp`, browser caches, +`node_modules`, `.git/objects`, etc. They balloon the index and +pollute results. Add a configurable skip-list with sensible defaults; +honor `.gitignore`-style files at root. + +### Recency boost in scoring +**Cost:** ~3 hours. +BM25 alone doesn't surface "the file you were editing yesterday" +above "a five-year-old file with the same name." Boost via Tantivy's +`BoostQuery` over `modified_unix_ms`, linear decay over the last +~30 days. Makes results feel intuitive without being magic. + +## Tier 2 — robustness before public release + +### Restart-time index reconcile +**Cost:** ~4 hours. +Watcher only catches changes while the service is running. If files +change while it's offline, the index goes stale until next manual +rebuild. Fix: at startup, walk root, diff against indexed paths + +mtimes, apply deltas. Closes the "I deleted this yesterday but it +still shows up" bug. + +### Exact-match scoring tier +**Cost:** ~4 hours. +Currently `"report"` weights `report.txt` and +`quarterly_report_draft_v2.txt` similarly. Add explicit scoring tiers: +exact filename > exact name without extension > prefix > substring > +extension match. Single-field weighting won't get there. + +### Faceted refinement +**Cost:** ~1 day. +Return facet counts (file type, date bucket, size bucket) alongside +results. UI can offer "5,234 results, 1,200 are PDFs" filtering. +Tantivy supports this natively via `FacetCollector`. + +### Service crash + auto-restart +**Cost:** ~3 hours. +The C# `IndexedSearchProvider` already handles transport errors +gracefully. The service-launcher (separate roadmap item) should also +detect a crashed process and respawn it. Lock-file handling for +crash-recovery so Tantivy's `LockBusy` doesn't strand users. + +## Tier 3 — capability expansion + +### Content indexing +**Cost:** ~1 week per format tier. +Add a `content` TEXT field, populate from text-like formats first +(`.txt/.md/.log/.cs/.json/.html` etc.). Office formats need an +extractor (e.g., `dotnet-extract` or a Rust port of Apache Tika +shapes). PDF needs a parser. Each tier expands scope significantly; +start with text-only, gate the rest on user demand. + +### Frequency-of-access boost +**Cost:** ~1 day, plus C# instrumentation. +Track how often the user opens each file (Files.App emits "user opened +path X" events to the service). Boost frequent files in scoring. +Real win, real privacy implication — needs an opt-out. + +### Saved searches + search history +**Cost:** ~3 days, mostly UI. +Persistent saved searches ("project files modified this week"), +quick-recall of recent queries. Lives mostly in Files.App's settings +UI; the service surface stays the same. + +### Fuzzy matching for typos +**Cost:** ~2 hours to enable. +`"repotr"` → `"report"`. Tantivy supports `FuzzyTermQuery` with edit +distance; only enable when no exact / prefix / substring match. Real +performance hit on large corpora; would gate on bench numbers. + +## Tier 4 — long-term, opt-in + +### Semantic / vector search +**Cost:** ~2 weeks. **RAM cost:** 200–500 MB on 100k files. +Sentence-transformer embeddings of filenames + HNSW index. The "find +me files about taxes" use case. Substantial cost; only worth doing +once content indexing is in place. Opt-in feature. + +### "Turbo Mode" for power users +**Cost:** several weeks. **Requires admin.** +Per CLAUDE.md goal #3, default mode never asks for UAC. A future +opt-in mode could use MFT parsing for cold-start orders of magnitude +faster than `FindFirstFileEx`, plus filesystem filter drivers for +zero-latency change detection. Would ship behind a one-time UAC +prompt with a clear explanation. Architecture is friendly to bolting +this on as a third `ISearchProvider` impl. + +### Compression of stored fields +**Cost:** ~1 day. +Paths share long prefixes (`C:\Users\Tommy\Documents\...`). Prefix +coding in Tantivy stored fields could cut index size 30–50%. Trades +read latency for disk; would gate on whichever the bench shows +matters more. + +## What we'd want maintainer input on + +Roughly in order of how much it changes our plan: + +1. **Tier 3 content indexing** — yes/no, and which format tiers. + Privacy-adjacent (we'd be reading file contents into an index); + could be opt-in or opt-out. +2. **Tier 4 semantic search** — yes / no / opt-in. Adds a + meaningful RAM and disk cost. +3. **Tier 4 Turbo Mode** — would you ever accept an admin-mode + opt-in, or is no-admin a hard line? +4. **Tier 3 frequency boost** — privacy implications of tracking + file access; needs a settings toggle minimum. +5. **Tier 1 / Tier 2** — assumed yes pending direction. These close + bugs, not introduce features. diff --git a/docs/proposal.md b/docs/proposal.md new file mode 100644 index 000000000000..d2b9cf73e026 --- /dev/null +++ b/docs/proposal.md @@ -0,0 +1,140 @@ +# Proposal: faster, AI-augmented search via a sidecar Rust indexer + +**Status:** Working PoC in this fork. Seeking maintainer feedback before +investing further or proposing PRs upstream. + +## TL;DR + +Search in Files today calls `Windows.Storage.Search`, which is fast on +paths in the Windows Search Indexer's catalog (`%USERPROFILE%`, +libraries, a few defaults) and **O(N) per query** on every other path +(temp dirs, `D:\projects`, anywhere outside the catalog). For users +searching outside their home dir, every keystroke walks the filesystem. + +This fork adds a separate Rust process (`files-search-service.exe`) +that maintains its own Tantivy filename index, with a +`ReadDirectoryChangesW` watcher for live updates and process throttling +so it stays out of the user's way. Files.App talks to it over gRPC. A +new `ISearchProvider` interface routes between the new path and the +existing `FolderSearch` so the change is opt-in and reversible. + +On a 5,000-file corpus, the indexed provider is **300×–1100× faster** +than the Windows.Storage.Search fallback path, and the architecture +projects to even larger gaps as N grows (see ADR 0003 for the Big O +analysis). + +## What's in this fork + +- `src/search-service/` — Rust, tonic gRPC server. Tantivy filename + index, `FindFirstFileExW` enumerator (rayon-parallel), `notify`-based + watcher, throttling via `PROCESS_MODE_BACKGROUND_BEGIN` plus pause + on battery / fullscreen / high CPU. +- `src/Files.SearchAbstraction/` — `ISearchProvider` interface + + DTOs. Plain `net10.0`, no Windows deps. Single seam for any backend. +- `src/Files.LegacySearch/` — `LegacySearchProvider` wraps the + existing `Windows.Storage.Search` / AQS path through `ISearchProvider`. + Frozen reference per CLAUDE.md. +- `src/Files.IndexedSearch.Client/` — gRPC client (over TCP for v0). + Stubs generated from the same `.proto` the Rust service compiles, so + the wire format has a single source of truth. +- `src/Files.App/Utils/Storage/Search/SearchRouter.cs` — drop-in + replacement for `FolderSearch`. Routes to indexed when the env var + `FILES_SEARCH_PROVIDER=Indexed` is set AND the query is in scope. + Falls back to legacy on glob, AQS, library scopes, or service-down. + **Default behavior is byte-identical to upstream.** +- `tests/Files.Search.Bench/` — 200-query bench harness with JSON + output. Drives both providers head-to-head against deterministic + corpora generated by `tests/corpora/`. +- 12 Rust integration tests (enumerator, persistence, watcher, throttle). +- Three ADRs in `docs/decisions/` capturing the key technical choices. + +## Bench numbers (5k smoke corpus) + +| Class | Legacy hits | Indexed hits | Legacy p50 | Indexed p50 | Speedup | +|------------------|------------:|-------------:|-----------:|------------:|--------:| +| substring | 175.5 | 177.2 | 2380 ms | 4 ms | 595× | +| glob | 311.8 | 0.0 | 3363 ms | 3 ms | 1121× | +| exact | 0.0 | 0.0 | 1120 ms | 3 ms | 373× | +| ext+substring | 0.0 | 0.0 | 1095 ms | 3 ms | 365× | +| content | 0.0 | 0.0 | 1084 ms | 3 ms | 361× | + +Indexed beats the proposed gate (≤10% of legacy on `medium`) by three +orders of magnitude on every class it answers. See ADR 0003 for why we +project to `medium`/`large` analytically rather than running them +empirically (legacy on a non-Indexer-tracked corpus is +`O(files × queries)`; a 500k-file run takes ~13 hours). + +The honest gap: indexed returns 0 hits for `glob` queries today +(Tantivy doesn't do globs). The router falls back to legacy on `*`/`?`, +so users see the slower-but-correct result. `docs/improvements.md` +lists how this and other gaps would be closed. + +## Design constraints we held + +These are documented in `CLAUDE.md` at the repo root: + +1. **Faster.** Query latency ≤10% of Windows Search on equivalent + corpora. +2. **No heavier.** RAM/disk/idle CPU ≤ Files + Windows Search Indexer. +3. **No user burden.** No UAC, no admin features, no new mandatory UI. + Existing search bar only. + +Constraint 3 disqualifies MFT-based indexing (would need admin) — a +choice we're explicit about and would revisit only as an opt-in +"Turbo Mode" for power users. + +## What's intentionally not done yet + +- Content indexing (text in files), semantic / vector search. +- Named-pipe transport (TCP loopback for v0; pipe is roadmap step + after maintainer sign-off so we don't sink work into glue if the + direction is rejected). +- Service auto-launcher inside Files.App (currently the user starts + the exe manually; trivial to add but waiting on direction). +- Fully wired into `Files.App` settings UI (toggle is env-var only). +- Migration to a shipping default. Per CLAUDE.md the default stays + Legacy until benchmarks pass; we don't propose changing that. + +## Why we're asking before building more + +Each of the items above is bounded but real work. Before we sink more +time into them, we want to know: + +1. **Is this direction interesting to you at all?** A sidecar Rust + process inside a C# app is a non-trivial architectural commitment + for the project to take on, even if the code lands cleanly. +2. **What would block it?** Concerns we expect: Rust toolchain in CI, + binary distribution / signing, maintenance burden if the original + contributor disappears, telemetry / privacy implications of a + persistent on-disk index. +3. **What would the merge path look like?** We can split this into + ~4 small PRs (interface only → bench harness → indexed client → + router + UI) so each can be reviewed and merged independently. + Or you may prefer it stay a fork. + +## Where to look in the code + +- Architecture sketch: `CLAUDE.md` +- Roadmap and current state: `docs/search-roadmap.md` +- Future improvements (the question we'd want your read on next): + `docs/improvements.md` +- ADRs: `docs/decisions/` +- Rust service entry point: `src/search-service/src/main.rs` +- C# entry point: `src/Files.App/Utils/Storage/Search/SearchRouter.cs` + +## Trying it locally + +```powershell +# 1. Build solution in VS 2026 (needs v145 toolset; see project_build_env memory). +# 2. Build the Rust service: +cargo build --release --manifest-path src/search-service/Cargo.toml +# 3. Set env vars and start the service: +$env:FILES_SEARCH_PROVIDER = "Indexed" +$env:FILES_SEARCH_ROOT = "$env:USERPROFILE" +src/search-service/target/release/files-search-service.exe +# 4. In another shell, launch Files.App from VS. Searches now route to +# the indexed provider for queries the router supports. +``` + +Default users (no env var set) get the existing legacy path with no +behavior change. diff --git a/docs/search-roadmap.md b/docs/search-roadmap.md new file mode 100644 index 000000000000..5b8694a123a4 --- /dev/null +++ b/docs/search-roadmap.md @@ -0,0 +1,158 @@ +# Search rewrite — roadmap + +Status snapshot, kept short. Update inline as state changes; don't append a +log. CLAUDE.md has the architecture; this file is just *where we are*. + +## Done + +- ADR 0001 — bench stack chosen. +- ADR 0002 — Rust service transport: TCP for v0, named pipe later. +- ADR 0003 — bench strategy: Big O for the gates, empirical for + constants and regressions. `small` is the canonical baseline; `medium` + and `large` are gated on Windows Search Indexer integration first. +- `tests/corpora/` and `tests/Files.Search.Bench/` scaffolds exist. +- `src/search-service/` skeleton: tonic gRPC server on `127.0.0.1:50080`, + vendored protoc, pinned to Rust 1.95. `FilesSearch` service with + `Health` + streaming `Search` (returns empty stream). +- End-to-end signal: `lib.rs` split, `tests/search_smoke.rs` exercises + Health + Search via a real tonic client over an ephemeral TCP port. +- Tantivy filename index in `src/index.rs`, on-disk persistence at + `%LOCALAPPDATA%\Files\search-index\` (override via + `FILES_SEARCH_INDEX_DIR`). Schema: `path` STRING, `filename` TEXT, + `size_bytes` U64, `modified_unix_ms` I64. Per-token prefix queries via + `FuzzyTermQuery::new_prefix(_, 0, _)`; `scope_paths` becomes a + path-prefix filter clause. +- Enumerator in `src/enumerate.rs` — Windows path uses `FindFirstFileExW` + with `FindExInfoBasic` + `FIND_FIRST_EX_LARGE_FETCH`, recursion fanned + out via `rayon::scope`, entries streamed through an `mpsc::Sender` so + the Tantivy writer drains concurrently. Reparse points skipped to + match `WalkDir(follow_links=false)` semantics. `walkdir` fallback + `#[cfg(not(windows))]` keeps the crate portable for dev. +- Watcher in `src/watcher.rs` — wraps the `notify` crate (which uses + `ReadDirectoryChangesW` + overlapped I/O on Windows). `SearchIndex` + now holds its writer behind a `parking_lot::Mutex` so the watcher + can `upsert`/`delete` without recreating it. Commits are debounced + on a 250ms quiet window so bursts (`git checkout`, archive extract) + collapse into a single Tantivy commit. Final commit on shutdown. +- Throttle in `src/throttle.rs` — `apply_background_priority()` calls + `SetPriorityClass(PROCESS_MODE_BACKGROUND_BEGIN)` once at startup + (lowers CPU + I/O priority below normal). `Throttle` polls every 2s + for battery (`GetSystemPowerStatus`), fullscreen + (`SHQueryUserNotificationState`), and CPU load (`GetSystemTimes`, + threshold 70%); the watcher's commit loop skips commits while + `should_pause()` is true so query-visibility (and fsync) defers + until idle. Apply work still happens so events aren't lost. + 12/12 Rust tests green. +- C# `Files.SearchAbstraction` defined: `ISearchProvider` (streaming + `IAsyncEnumerable` + `GetHealthAsync`), `SearchQuery`, + `SearchResult`, `HealthStatus`. `net10.0` (no Windows deps) so any + consumer can reference it. Registered in `Files.slnx` under + `/src/core/`. +- `Files.LegacySearch` — `LegacySearchProvider` implements + `ISearchProvider` over `Windows.Storage.Search` (AQS via + `QueryOptions { FolderDepth.Deep, IndexerOption.UseIndexerWhenAvailable, + SortBy System.Search.Rank desc }`). AQS construction mirrors + upstream's `FolderSearch.AQSQuery` ($/colon/dot-aware wildcard + cases). Builds in batches of 500 via `CreateFileQueryWithOptions`, + yields per file. Cancellation honored at every batch boundary; + per-file stat failures swallowed to match upstream. +- `Files.IndexedSearch.Client` — `IndexedSearchProvider` implements + `ISearchProvider` over gRPC. `Grpc.Tools` generates client stubs + from the *same* `src/search-service/proto/files_search.proto` the + Rust service consumes, so the wire format has a single source of + truth. Single persistent `GrpcChannel` (HTTP/2 multiplexes calls). + TCP `127.0.0.1:50080` default; override via + `FILES_SEARCH_SERVICE_URL`. `GetHealthAsync` translates transport + failure into `IsAvailable=false` so the routing layer doesn't need + try/catch around every probe. +- Bench harness in `tests/Files.Search.Bench/` wired up: existing + scaffold (200-query generator, JSON output, machine info) now sees + three providers — `naive-scan`, `legacy`, `indexed`. Adapter maps + bench `Query` → `SearchQuery(text, [corpusRoot])` so each provider + searches the same tree regardless of its default scope. One warm-up + query per run absorbs JIT / gRPC channel / Tantivy mmap penalties. + Aggregate `Aggregates { ttfrMedianMs, ttfrP95Ms, ttfrP99Ms, + totalMedianMs, totalP95Ms, totalP99Ms }` block added to JSON output + so gates in CLAUDE.md can be diffed against `bench-results/baseline.json` + directly. + +## First bench run, 5k smoke corpus (2026-05-10) + +Calibration run only — `small` will be the canonical baseline (see ADR 0003). + +| Class | Legacy hits | Indexed hits | Legacy p50 | Indexed p50 | Speedup | +|------------------|------------:|-------------:|-----------:|------------:|--------:| +| substring | 175.5 | 177.2 | 2380 ms | 4 ms | 595× | +| glob | 311.8 | 0.0 | 3363 ms | 3 ms | 1121× | +| exact | 0.0 | 0.0 | 1120 ms | 3 ms | 373× | +| ext+substring | 0.0 | 0.0 | 1095 ms | 3 ms | 365× | +| content | 0.0 | 0.0 | 1084 ms | 3 ms | 361× | + +Indexed beats the ≤10% gate by 3 orders of magnitude on every class it +answers. `glob` is the headline correctness gap (Tantivy doesn't do +globs); needs routing-layer policy to fall back to legacy on `*` / `?`. + +Bug shaken out: indexed paths used forward slashes (from +`FILES_SEARCH_ROOT="C:/..."`) while C# scope used backslashes (from +`Path.GetFullPath`); prefix match silently returned 0 hits. Fixed in +`main.rs::normalize_root`. + +- `Files.App` wired to the new search stack. Added project references + to `Files.SearchAbstraction`, `Files.LegacySearch`, + `Files.IndexedSearch.Client`. New `SearchRouter` in + `src/Files.App/Utils/Storage/Search/SearchRouter.cs` is a drop-in + replacement for `FolderSearch` (same `Query`/`Folder`/`MaxItemCount` + properties, same `SearchTick` event, same `SearchAsync(IList, + CancellationToken)` shape). Routing is opt-in via + `FILES_SEARCH_PROVIDER=Indexed` env var; default behavior is + byte-identical to legacy. Indexed path also requires a non-glob, + non-AQS query and a real on-disk folder (not "Home", not a library) + — anything else falls back to legacy. Service-down gracefully falls + back via `IndexedSearchProvider.GetHealthAsync()`. Migrated four + call sites: `ShellViewModel.SearchAsync`, + `NavigationToolbarViewModel`, `BaseShellPage`, `BaseLayoutPage`. + C# compiles clean. + +## Next, in order + +1. **Service launcher** — small helper that starts + `files-search-service.exe` as a child process when the indexed path + is selected, and stops it on app exit. Currently the user must + start the service manually. +2. **Swap TCP → named pipe (`\\.\pipe\files-search`).** Custom tonic + Connector/Acceptor over `tokio::net::windows::named_pipe`, plus the + matching named-pipe channel in the C# client. +3. **Content + semantic indexes** — Tantivy content fields, then HNSW. + Off the critical path until filename search is shipping. + +Running `medium` / `large` empirically is deferred per ADR 0003 until +the corpus can be added to Windows Search Indexer's catalog. + +## Known gaps + +- Tantivy's default tokenizer + per-token prefix matches whole-word and + prefix queries (`alpha` finds `alpha.txt` and `ALPHABET.md`) but not + mid-string substrings (`phab` does not find `ALPHABET.md`). Revisit + with an n-gram field if the correctness suite demands legacy parity. +- The watcher closes the live-update gap, but changes that happen while + the service is *offline* still leave the index stale until something + triggers a rebuild. Restart-time reconcile (walk root, diff against + index, apply deltas) is not implemented yet. + +## Parallel C# work (no Rust dependency) + +- Define `Files.SearchAbstraction` (`ISearchProvider` + types). Unblocks + both `Files.LegacySearch` and `Files.IndexedSearch.Client`. +- `Files.LegacySearch` — wrap upstream search behind `ISearchProvider`. + Frozen reference per CLAUDE.md. +- Flesh out corpus generators (`tests/corpora/`) and bench harness + (`tests/Files.Search.Bench/`) toward the JSON output schema and the + acceptance-gate metrics in CLAUDE.md. + +## Open questions + +- Named-pipe ACL: default (creator only) is right, but confirm the C# + client running in the packaged app can open it. +- Index location under packaged identity vs. unpackaged dev runs. +- Whether the service is launched on demand by `Files.App` or runs as a + user-scoped scheduled task. Affects cold-start measurement. diff --git a/src/Files.App.Launcher/Files.App.Launcher.vcxproj b/src/Files.App.Launcher/Files.App.Launcher.vcxproj index 9fc73ecd9115..93862737d1f1 100644 --- a/src/Files.App.Launcher/Files.App.Launcher.vcxproj +++ b/src/Files.App.Launcher/Files.App.Launcher.vcxproj @@ -57,7 +57,7 @@ Level3 true true - stdcpp17 + stdcpp20 Windows diff --git a/src/Files.App.Launcher/FilesLauncher.cpp b/src/Files.App.Launcher/FilesLauncher.cpp index 5b8108d7a482..07876e022a04 100644 --- a/src/Files.App.Launcher/FilesLauncher.cpp +++ b/src/Files.App.Launcher/FilesLauncher.cpp @@ -278,8 +278,8 @@ size_t strifind(const std::wstring& strHaystack, const std::wstring& strNeedle) bool comparei(std::wstring stringA, std::wstring stringB) { - transform(stringA.begin(), stringA.end(), stringA.begin(), std::toupper); - transform(stringB.begin(), stringB.end(), stringB.begin(), std::toupper); + std::transform(stringA.begin(), stringA.end(), stringA.begin(), ::towupper); + std::transform(stringB.begin(), stringB.end(), stringB.begin(), ::towupper); return (stringA == stringB); } diff --git a/src/Files.App/Files.App.csproj b/src/Files.App/Files.App.csproj index 7a4c0ee0c0a4..53ab03eec353 100644 --- a/src/Files.App/Files.App.csproj +++ b/src/Files.App/Files.App.csproj @@ -132,6 +132,9 @@ + + + diff --git a/src/Files.App/Utils/Storage/Search/SearchRouter.cs b/src/Files.App/Utils/Storage/Search/SearchRouter.cs new file mode 100644 index 000000000000..54816b0ca7df --- /dev/null +++ b/src/Files.App/Utils/Storage/Search/SearchRouter.cs @@ -0,0 +1,158 @@ +// Copyright (c) Files Community +// Licensed under the MIT License. + +using Files.IndexedSearch.Client; +using Files.SearchAbstraction; +using System.IO; + +namespace Files.App.Utils.Storage; + +/// +/// Drop-in replacement for that picks +/// between the legacy Windows.Storage.Search path and the new indexed +/// gRPC service based on a runtime opt-in. +/// +/// +/// Default behavior is identical to — +/// the indexed path is only taken when FILES_SEARCH_PROVIDER=Indexed +/// is set in the environment AND the query has a shape the indexed +/// provider supports today (no glob, no AQS prefix, no library scope). +/// Per CLAUDE.md the default stays Legacy until the bench gates are met. +/// +/// The routing rules below intentionally lean toward "fall back to +/// legacy on anything ambiguous" — the goal is correctness parity, not +/// maximum coverage of the indexed provider. As Tantivy gains content +/// search, n-grams, etc., we relax the predicates here without +/// touching call sites. +/// +public sealed class SearchRouter +{ + public string? Query { get; set; } + public string? Folder { get; set; } + public uint MaxItemCount { get; set; } = 0; + public EventHandler? SearchTick; + + public async Task SearchAsync(IList results, CancellationToken token) + { + if (UseIndexed()) + { + await SearchIndexedAsync(results, token); + return; + } + + // Legacy path — delegate verbatim to the upstream implementation. + // Forwarding SearchTick keeps the existing batched-render UX. + var legacy = new FolderSearch + { + Query = Query, + Folder = Folder, + MaxItemCount = MaxItemCount, + }; + legacy.SearchTick += (_, e) => SearchTick?.Invoke(this, e); + await legacy.SearchAsync(results, token); + } + + public async Task> SearchAsync() + { + var results = new ObservableCollection(); + await SearchAsync(results, CancellationToken.None); + return results; + } + + private bool UseIndexed() + { + if (!string.Equals( + Environment.GetEnvironmentVariable("FILES_SEARCH_PROVIDER"), + "Indexed", + StringComparison.OrdinalIgnoreCase)) + return false; + + if (string.IsNullOrEmpty(Query)) + return false; + + // Glob, AQS prefix, and explicit AQS field syntax all need + // legacy. Keep this list aligned with what the indexed provider + // actually understands. + if (Query.Contains('*') || Query.Contains('?')) + return false; + if (Query.StartsWith('$')) + return false; + if (Query.Contains(':')) + return false; + + // Library and Home scopes need fan-out logic the indexed + // provider doesn't have yet. Real on-disk paths route to indexed. + if (string.IsNullOrEmpty(Folder) || Folder == "Home") + return false; + if (App.LibraryManager.TryGetLibrary(Folder, out _)) + return false; + + return true; + } + + private async Task SearchIndexedAsync(IList results, CancellationToken token) + { + using var provider = new IndexedSearchProvider(); + + // Health probe: if the service isn't running, fall back to + // legacy rather than failing the search. Users who opt in via + // env var still get *a* result. + var health = await provider.GetHealthAsync(token); + if (!health.IsAvailable) + { + await new FolderSearch { Query = Query, Folder = Folder, MaxItemCount = MaxItemCount } + .SearchAsync(results, token); + return; + } + + var sq = new SearchQuery( + Text: Query!, + ScopePaths: new[] { Folder! }, + MaxResults: MaxItemCount > 0 ? (int)MaxItemCount : null); + + await foreach (var hit in provider.SearchAsync(sq, token)) + { + token.ThrowIfCancellationRequested(); + results.Add(ToListedItem(hit)); + + // Mirror FolderSearch's batched-render cadence so the UI + // updates feel the same regardless of provider. + if (results.Count == 32 || results.Count % 300 == 0) + SearchTick?.Invoke(this, EventArgs.Empty); + } + } + + /// + /// Builds a minimal directly from indexed + /// metadata — no per-file StorageFile.GetFileFromPathAsync + /// round-trip. Thumbnails and extended properties get loaded lazily + /// by the layout's existing image-loading pipeline, same as for any + /// other ListedItem. + /// + private static ListedItem ToListedItem(SearchResult hit) + { + var ext = hit.FileName.Contains('.', StringComparison.Ordinal) + ? Path.GetExtension(hit.FileName) + : null; + var itemType = ext is not null ? ext.Trim('.') + " " : null; + + return new ListedItem(null) + { + PrimaryItemAttribute = Windows.Storage.StorageItemTypes.File, + ItemNameRaw = hit.FileName, + ItemPath = hit.Path, + LoadFileIcon = false, + FileExtension = ext, + FileSizeBytes = hit.SizeBytes, + FileSize = ((ulong)hit.SizeBytes).ToSizeString(), + ItemDateModifiedReal = hit.ModifiedUtc, + // Indexed schema doesn't carry creation time; surface the + // modified time so sorting by date doesn't show a 1601-01-01 + // fallback in the UI. Acceptable v0 fidelity loss. + ItemDateCreatedReal = hit.ModifiedUtc, + ItemType = itemType, + NeedsPlaceholderGlyph = false, + Opacity = 1, + }; + } +} diff --git a/src/Files.App/ViewModels/ShellViewModel.cs b/src/Files.App/ViewModels/ShellViewModel.cs index f016c95f0cf0..fb83c166f1bb 100644 --- a/src/Files.App/ViewModels/ShellViewModel.cs +++ b/src/Files.App/ViewModels/ShellViewModel.cs @@ -2785,7 +2785,7 @@ public async Task AddSearchResultsToCollectionAsync(ObservableCollection + + + + $(TargetFrameworkVersion) + enable + enable + Files.IndexedSearch.Client + Files.IndexedSearch.Client + false + + + + + + + + + + + all + runtime; build; native; contentfiles; analyzers; buildtransitive + + + + + + + + + diff --git a/src/Files.IndexedSearch.Client/IndexedSearchProvider.cs b/src/Files.IndexedSearch.Client/IndexedSearchProvider.cs new file mode 100644 index 000000000000..6ce795f5298e --- /dev/null +++ b/src/Files.IndexedSearch.Client/IndexedSearchProvider.cs @@ -0,0 +1,107 @@ +// Copyright (c) Files Community +// Licensed under the MIT License. + +using System.Runtime.CompilerServices; +using Files.Search.V1; +using Files.SearchAbstraction; +using Grpc.Core; +using Grpc.Net.Client; + +namespace Files.IndexedSearch.Client; + +/// +/// backed by the Rust +/// files-search-service over gRPC. Currently TCP on +/// 127.0.0.1:50080; will swap to a named pipe channel once the +/// service exposes one. Override the address with +/// FILES_SEARCH_SERVICE_URL for tests / dev. +/// +/// +/// The channel is constructed lazily and reused for the lifetime +/// of the provider — gRPC channels multiplex concurrent calls over a +/// single HTTP/2 connection so there's no benefit to per-call +/// channels, and the connection setup is what we want to amortize. +/// +/// Health checks swallow transport errors and return +/// IsAvailable=false so callers (the routing layer, the bench +/// warm-up) can branch without try/catch. Search calls let exceptions +/// propagate — the caller decides whether to fall back to the legacy +/// provider or surface the error. +/// +public sealed class IndexedSearchProvider : ISearchProvider, IDisposable +{ + private const string DefaultUrl = "http://127.0.0.1:50080"; + + private readonly GrpcChannel _channel; + private readonly FilesSearch.FilesSearchClient _client; + + public IndexedSearchProvider() : this(ResolveAddress()) { } + + public IndexedSearchProvider(string address) + { + _channel = GrpcChannel.ForAddress(address); + _client = new FilesSearch.FilesSearchClient(_channel); + } + + public string Name => "Indexed"; + + public async IAsyncEnumerable SearchAsync( + SearchQuery query, + [EnumeratorCancellation] CancellationToken cancellationToken = default) + { + var request = new SearchRequest + { + Query = query.Text, + MaxResults = (uint)Math.Clamp(query.MaxResults ?? 0, 0, uint.MaxValue), + }; + foreach (var scope in query.ScopePaths) + request.ScopePaths.Add(scope); + + using var call = _client.Search(request, cancellationToken: cancellationToken); + await foreach (var hit in call.ResponseStream.ReadAllAsync(cancellationToken)) + { + yield return ToResult(hit); + } + } + + public async Task GetHealthAsync(CancellationToken cancellationToken = default) + { + try + { + var resp = await _client.HealthAsync(new HealthRequest(), cancellationToken: cancellationToken); + return new HealthStatus( + ProviderName: Name, + Version: resp.Version, + IndexedFileCount: (long)resp.IndexedFileCount, + IsIndexing: resp.Indexing, + IsAvailable: true); + } + catch (RpcException) when (!cancellationToken.IsCancellationRequested) + { + // Service is down / unreachable. Surface as "unavailable" + // rather than throwing so the routing layer can fall back + // to legacy without a try/catch around every health probe. + return new HealthStatus( + ProviderName: Name, + Version: string.Empty, + IndexedFileCount: 0, + IsIndexing: false, + IsAvailable: false); + } + } + + public void Dispose() => _channel.Dispose(); + + private static SearchResult ToResult(SearchHit hit) => new( + Path: hit.Path, + FileName: hit.Filename, + // u64 → long: indexed file sizes >= 8 EiB don't exist in + // practice; if one ever does, the cast wraps and is wrong by + // a sign. Worth a comment, not a runtime check. + SizeBytes: unchecked((long)hit.SizeBytes), + ModifiedUtc: DateTimeOffset.FromUnixTimeMilliseconds(hit.ModifiedUnixMs), + Score: hit.Score); + + private static string ResolveAddress() => + Environment.GetEnvironmentVariable("FILES_SEARCH_SERVICE_URL") ?? DefaultUrl; +} diff --git a/src/Files.LegacySearch/Files.LegacySearch.csproj b/src/Files.LegacySearch/Files.LegacySearch.csproj new file mode 100644 index 000000000000..e7b58206848c --- /dev/null +++ b/src/Files.LegacySearch/Files.LegacySearch.csproj @@ -0,0 +1,18 @@ + + + + + $(WindowsTargetFramework) + $(MinimalWindowsVersion) + enable + enable + Files.LegacySearch + Files.LegacySearch + false + + + + + + + diff --git a/src/Files.LegacySearch/LegacySearchProvider.cs b/src/Files.LegacySearch/LegacySearchProvider.cs new file mode 100644 index 000000000000..faf96dd95c8d --- /dev/null +++ b/src/Files.LegacySearch/LegacySearchProvider.cs @@ -0,0 +1,193 @@ +// Copyright (c) Files Community +// Licensed under the MIT License. + +using System.Runtime.CompilerServices; +using Files.SearchAbstraction; +using Windows.Storage; +using Windows.Storage.Search; + +namespace Files.LegacySearch; + +/// +/// Wraps the Windows Search / AQS query path that upstream's +/// FolderSearch uses, exposed through +/// so the bench harness can A/B it against the indexed provider. +/// +/// +/// Per CLAUDE.md this provider is the frozen reference baseline. The AQS +/// construction and shape mirror upstream +/// (`FolderSearch.AQSQuery` / `FolderSearch.ToQueryOptions`); only the +/// UI-coupled bits (ListedItem, thumbnail prefetch, IoC services) are +/// dropped because the abstraction's doesn't +/// need them. Bug-for-bug parity with upstream is the goal — fixes only +/// land here when they land upstream first. +/// +public sealed class LegacySearchProvider : ISearchProvider +{ + private const uint StepSize = 500; + + private static readonly string AssemblyVersion = + typeof(LegacySearchProvider).Assembly.GetName().Version?.ToString() ?? "0.0"; + + public string Name => "Legacy"; + + public async IAsyncEnumerable SearchAsync( + SearchQuery query, + [EnumeratorCancellation] CancellationToken cancellationToken = default) + { + if (string.IsNullOrEmpty(query.Text)) + yield break; + + var aqs = BuildAqs(query.Text); + var max = query.MaxResults ?? int.MaxValue; + var roots = query.ScopePaths.Count > 0 + ? query.ScopePaths + : new[] { Environment.GetFolderPath(Environment.SpecialFolder.UserProfile) }; + + var emitted = 0; + foreach (var root in roots) + { + if (emitted >= max) + yield break; + + cancellationToken.ThrowIfCancellationRequested(); + var folder = await TryGetFolderAsync(root, cancellationToken); + if (folder is null) + continue; + + var options = BuildQueryOptions(aqs); + var fileQuery = folder.CreateFileQueryWithOptions(options); + + uint index = 0; + while (true) + { + if (emitted >= max) + yield break; + + cancellationToken.ThrowIfCancellationRequested(); + var step = (uint)Math.Min(StepSize, max - emitted); + var batch = await fileQuery.GetFilesAsync(index, step).AsTask(cancellationToken); + if (batch.Count == 0) + break; + + foreach (var file in batch) + { + if (emitted >= max) + yield break; + + cancellationToken.ThrowIfCancellationRequested(); + var hit = await TryToResultAsync(file, cancellationToken); + if (hit is not null) + { + emitted++; + yield return hit; + } + } + index += (uint)batch.Count; + } + } + } + + public Task GetHealthAsync(CancellationToken cancellationToken = default) + { + // Legacy doesn't manage an index — Windows Search is always + // "available" from this provider's perspective. IndexedFileCount + // stays 0 because we don't own the indexer's stats. + var status = new HealthStatus( + ProviderName: Name, + Version: AssemblyVersion, + IndexedFileCount: 0, + IsIndexing: false, + IsAvailable: true); + return Task.FromResult(status); + } + + private static async Task TryGetFolderAsync(string path, CancellationToken ct) + { + try + { + return await StorageFolder.GetFolderFromPathAsync(path).AsTask(ct); + } + catch (OperationCanceledException) + { + throw; + } + catch + { + // Path may be inaccessible (permissions, missing, network), + // or not a folder; treat as "no results in this scope" to + // match upstream's swallow-and-continue behavior. + return null; + } + } + + private static async Task TryToResultAsync(StorageFile file, CancellationToken ct) + { + try + { + var props = await file.GetBasicPropertiesAsync().AsTask(ct); + return new SearchResult( + Path: file.Path, + FileName: file.Name, + SizeBytes: (long)props.Size, + ModifiedUtc: props.DateModified, + Score: 1.0f); + } + catch (OperationCanceledException) + { + throw; + } + catch + { + // Stat failures are common during search (file deleted + // between enumeration and properties read). Skip silently + // rather than aborting the whole stream. + return null; + } + } + + private static QueryOptions BuildQueryOptions(string aqs) + { + var options = new QueryOptions + { + FolderDepth = FolderDepth.Deep, + UserSearchFilter = aqs, + IndexerOption = IndexerOption.UseIndexerWhenAvailable, + }; + options.SortOrder.Clear(); + options.SortOrder.Add(new SortEntry + { + PropertyName = "System.Search.Rank", + AscendingOrder = false, + }); + return options; + } + + /// + /// Mirrors FolderSearch.AQSQuery: '$' prefix means "raw AQS, + /// strip the prefix"; ':' anywhere means "user knows AQS, pass + /// through"; otherwise wrap as System.FileName:"foo*" with + /// the same dot-aware wildcard expansion (foo.docx → + /// foo*.docx*). + /// + private static string BuildAqs(string text) + { + if (text.StartsWith('$')) + return text[1..]; + if (text.Contains(':')) + return text; + + string wildcard; + if (text.Contains('.')) + { + var parts = text.Split('.'); + var leading = string.Join('.', parts.SkipLast(1)); + wildcard = $"{leading}*.{parts[^1]}*"; + } + else + { + wildcard = $"{text}*"; + } + return $"System.FileName:\"{wildcard}\""; + } +} diff --git a/src/Files.SearchAbstraction/Files.SearchAbstraction.csproj b/src/Files.SearchAbstraction/Files.SearchAbstraction.csproj new file mode 100644 index 000000000000..9c00621d3464 --- /dev/null +++ b/src/Files.SearchAbstraction/Files.SearchAbstraction.csproj @@ -0,0 +1,10 @@ + + + $(TargetFrameworkVersion) + enable + enable + Files.SearchAbstraction + Files.SearchAbstraction + false + + diff --git a/src/Files.SearchAbstraction/HealthStatus.cs b/src/Files.SearchAbstraction/HealthStatus.cs new file mode 100644 index 000000000000..a7425eb4fc86 --- /dev/null +++ b/src/Files.SearchAbstraction/HealthStatus.cs @@ -0,0 +1,36 @@ +// Copyright (c) Files Community +// Licensed under the MIT License. + +namespace Files.SearchAbstraction; + +/// +/// Snapshot of a provider's state. Used by the bench harness for warm-up +/// and (later) by the UI / routing layer to decide whether the indexed +/// provider is healthy enough to serve a query or whether to fall back +/// to the legacy provider. +/// +/// Echoes . +/// +/// Provider-defined version string. For the indexed provider this is +/// the Rust service's package version; for the legacy provider it's the +/// Files.App build version. +/// +/// +/// Files currently in the backing index. 0 when the provider has +/// no persistent index (e.g. legacy queries Windows Search live). +/// +/// +/// True while a background build / re-sync is in progress; queries may +/// return partial results. +/// +/// +/// True when the provider can serve queries right now. Distinct from +/// connectivity: a provider may be reachable but still unavailable +/// (e.g. mid-rebuild with no readable index). +/// +public sealed record HealthStatus( + string ProviderName, + string Version, + long IndexedFileCount, + bool IsIndexing, + bool IsAvailable); diff --git a/src/Files.SearchAbstraction/ISearchProvider.cs b/src/Files.SearchAbstraction/ISearchProvider.cs new file mode 100644 index 000000000000..d0776f2e5bb1 --- /dev/null +++ b/src/Files.SearchAbstraction/ISearchProvider.cs @@ -0,0 +1,46 @@ +// Copyright (c) Files Community +// Licensed under the MIT License. + +namespace Files.SearchAbstraction; + +/// +/// The single seam between the Files UI and any search backend. Every +/// search request — legacy Windows Search, the indexed Rust service, or +/// anything we ship later — flows through this interface. +/// +/// +/// Intentionally minimal: streams results so +/// the UI can render the first hit before the backend has finished, and +/// exists so the bench harness and the UI +/// can both ask "is this provider responsive and how big is its index" +/// without coupling to any one implementation. +/// +public interface ISearchProvider +{ + /// + /// Stable identifier used in logs, bench output, and provider + /// selection (e.g. "Legacy", "Indexed"). + /// + string Name { get; } + + /// + /// Streams matching files. Implementations must: + /// + /// Yield results in score / relevance order when known. + /// Honor promptly so + /// the UI can cancel mid-flight when the user keeps typing. + /// Complete the enumeration cleanly even on transport failure + /// (throw on entry, not mid-stream, where possible). + /// + /// + IAsyncEnumerable SearchAsync( + SearchQuery query, + CancellationToken cancellationToken = default); + + /// + /// Reports backend liveness and basic index stats. Used by the bench + /// harness for warm-up checks and (eventually) by the UI to decide + /// whether to fall back to the legacy provider. + /// + Task GetHealthAsync(CancellationToken cancellationToken = default); +} diff --git a/src/Files.SearchAbstraction/SearchQuery.cs b/src/Files.SearchAbstraction/SearchQuery.cs new file mode 100644 index 000000000000..ffed6b1babb0 --- /dev/null +++ b/src/Files.SearchAbstraction/SearchQuery.cs @@ -0,0 +1,29 @@ +// Copyright (c) Files Community +// Licensed under the MIT License. + +namespace Files.SearchAbstraction; + +/// +/// Immutable description of a single search request. +/// +/// +/// Raw user query. Tokenization, glob expansion, and AQS detection are +/// the provider's responsibility — the abstraction does not parse. +/// +/// +/// Roots that constrain results. Empty means "wherever the provider +/// indexes by default". Each path is an absolute filesystem path; matches +/// are by path-prefix (i.e. include subdirectories). +/// +/// +/// Cap on results yielded. null means no caller cap; providers +/// may still impose their own ceiling for safety. +/// +public sealed record SearchQuery( + string Text, + IReadOnlyList ScopePaths, + int? MaxResults = null) +{ + public static SearchQuery ForText(string text) => + new(text, Array.Empty()); +} diff --git a/src/Files.SearchAbstraction/SearchResult.cs b/src/Files.SearchAbstraction/SearchResult.cs new file mode 100644 index 000000000000..b0cdfcd9b09b --- /dev/null +++ b/src/Files.SearchAbstraction/SearchResult.cs @@ -0,0 +1,28 @@ +// Copyright (c) Files Community +// Licensed under the MIT License. + +namespace Files.SearchAbstraction; + +/// +/// One matched file. The abstraction stays narrow on purpose — anything +/// the UI needs beyond these fields (icon, tags, etc.) is fetched lazily +/// from at render time, so the provider doesn't pay +/// for fields the caller may not use. +/// +/// Absolute filesystem path. Acts as the result identity. +/// File name without directory. +/// Reported file size, in bytes. +/// +/// Last-modified time, UTC. when +/// the provider couldn't read it (e.g. stale index entry, denied stat). +/// +/// +/// Provider-defined relevance score; higher = more relevant. Not +/// comparable across providers. +/// +public sealed record SearchResult( + string Path, + string FileName, + long SizeBytes, + DateTimeOffset ModifiedUtc, + float Score); diff --git a/src/search-service/.gitignore b/src/search-service/.gitignore new file mode 100644 index 000000000000..c17da7f586f2 --- /dev/null +++ b/src/search-service/.gitignore @@ -0,0 +1,2 @@ +/target +Cargo.lock.bak diff --git a/src/search-service/Cargo.lock b/src/search-service/Cargo.lock new file mode 100644 index 000000000000..f4722aee7924 --- /dev/null +++ b/src/search-service/Cargo.lock @@ -0,0 +1,2501 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "aho-corasick" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301" +dependencies = [ + "memchr", +] + +[[package]] +name = "allocator-api2" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" + +[[package]] +name = "anyhow" +version = "1.0.102" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c" + +[[package]] +name = "arc-swap" +version = "1.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a3a1fd6f75306b68087b831f025c712524bcb19aad54e557b1129cfa0a2b207" +dependencies = [ + "rustversion", +] + +[[package]] +name = "async-stream" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b5a71a6f37880a80d1d7f19efd781e4b5de42c88f0722cc13bcb6cc2cfe8476" +dependencies = [ + "async-stream-impl", + "futures-core", + "pin-project-lite", +] + +[[package]] +name = "async-stream-impl" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7c24de15d275a1ecfd47a380fb4d5ec9bfe0933f309ed5e705b775596a3574d" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "async-trait" +version = "0.1.89" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "atomic-waker" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0" + +[[package]] +name = "autocfg" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" + +[[package]] +name = "axum" +version = "0.7.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "edca88bc138befd0323b20752846e6587272d3b03b0343c8ea28a6f819e6e71f" +dependencies = [ + "async-trait", + "axum-core", + "bytes", + "futures-util", + "http", + "http-body", + "http-body-util", + "itoa", + "matchit", + "memchr", + "mime", + "percent-encoding", + "pin-project-lite", + "rustversion", + "serde", + "sync_wrapper", + "tower 0.5.3", + "tower-layer", + "tower-service", +] + +[[package]] +name = "axum-core" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09f2bd6146b97ae3359fa0cc6d6b376d9539582c7b4220f041a33ec24c226199" +dependencies = [ + "async-trait", + "bytes", + "futures-util", + "http", + "http-body", + "http-body-util", + "mime", + "pin-project-lite", + "rustversion", + "sync_wrapper", + "tower-layer", + "tower-service", +] + +[[package]] +name = "base64" +version = "0.22.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" + +[[package]] +name = "bitflags" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + +[[package]] +name = "bitflags" +version = "2.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4512299f36f043ab09a583e57bceb5a5aab7a73db1805848e8fef3c9e8c78b3" + +[[package]] +name = "bitpacking" +version = "0.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96a7139abd3d9cebf8cd6f920a389cf3dc9576172e32f4563f188cae3c3eb019" +dependencies = [ + "crunchy", +] + +[[package]] +name = "bumpalo" +version = "3.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d20789868f4b01b2f2caec9f5c4e0213b41e3e5702a50157d699ae31ced2fcb" + +[[package]] +name = "byteorder" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" + +[[package]] +name = "bytes" +version = "1.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e748733b7cbc798e1434b6ac524f0c1ff2ab456fe201501e6497c8417a4fc33" + +[[package]] +name = "cc" +version = "1.2.62" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1dce859f0832a7d088c4f1119888ab94ef4b5d6795d1ce05afb7fe159d79f98" +dependencies = [ + "find-msvc-tools", + "jobserver", + "libc", + "shlex", +] + +[[package]] +name = "census" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4f4c707c6a209cbe82d10abd08e1ea8995e9ea937d2550646e02798948992be0" + +[[package]] +name = "cfg-if" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" + +[[package]] +name = "crc32fast" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "crossbeam-channel" +version = "0.5.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "82b8f8f868b36967f9606790d1903570de9ceaf870a7bf9fbbd3016d636a2cb2" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-deque" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51" +dependencies = [ + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" + +[[package]] +name = "crunchy" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" + +[[package]] +name = "deranged" +version = "0.5.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7cd812cc2bc1d69d4764bd80df88b4317eaef9e773c75226407d9bc0876b211c" +dependencies = [ + "powerfmt", + "serde_core", +] + +[[package]] +name = "downcast-rs" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75b325c5dbd37f80359721ad39aca5a29fb04c89279657cffdda8736d0c0b9d2" + +[[package]] +name = "either" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" + +[[package]] +name = "equivalent" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" + +[[package]] +name = "errno" +version = "0.3.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" +dependencies = [ + "libc", + "windows-sys 0.61.2", +] + +[[package]] +name = "fastdivide" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9afc2bd4d5a73106dd53d10d73d3401c2f32730ba2c0b93ddb888a8983680471" + +[[package]] +name = "fastrand" +version = "2.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9f1f227452a390804cdb637b74a86990f2a7d7ba4b7d5693aac9b4dd6defd8d6" + +[[package]] +name = "files-search-service" +version = "0.1.0" +dependencies = [ + "anyhow", + "async-stream", + "notify", + "parking_lot", + "prost", + "protoc-bin-vendored", + "rayon", + "tantivy", + "tokio", + "tokio-stream", + "tonic", + "tonic-build", + "tracing", + "tracing-subscriber", + "walkdir", + "windows", +] + +[[package]] +name = "filetime" +version = "0.2.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2d5b2eef6fafbf69f877e55509ce5b11a760690ac9700a2921be067aa6afaef6" +dependencies = [ + "cfg-if", + "libc", +] + +[[package]] +name = "find-msvc-tools" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582" + +[[package]] +name = "fixedbitset" +version = "0.5.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d674e81391d1e1ab681a28d99df07927c6d4aa5b027d7da16ba32d1d21ecd99" + +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + +[[package]] +name = "foldhash" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" + +[[package]] +name = "fs4" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f7e180ac76c23b45e767bd7ae9579bc0bb458618c4bc71835926e098e61d15f8" +dependencies = [ + "rustix 0.38.44", + "windows-sys 0.52.0", +] + +[[package]] +name = "fsevent-sys" +version = "4.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76ee7a02da4d231650c7cea31349b889be2f45ddb3ef3032d2ec8185f6313fd2" +dependencies = [ + "libc", +] + +[[package]] +name = "futures-channel" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07bbe89c50d7a535e539b8c17bc0b49bdb77747034daa8087407d655f3f7cc1d" +dependencies = [ + "futures-core", +] + +[[package]] +name = "futures-core" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e3450815272ef58cec6d564423f6e755e25379b217b0bc688e295ba24df6b1d" + +[[package]] +name = "futures-sink" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c39754e157331b013978ec91992bde1ac089843443c49cbc7f46150b0fad0893" + +[[package]] +name = "futures-task" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "037711b3d59c33004d3856fbdc83b99d4ff37a24768fa1be9ce3538a1cde4393" + +[[package]] +name = "futures-util" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "389ca41296e6190b48053de0321d02a77f32f8a5d2461dd38762c0593805c6d6" +dependencies = [ + "futures-core", + "futures-task", + "pin-project-lite", + "slab", +] + +[[package]] +name = "getrandom" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff2abc00be7fca6ebc474524697ae276ad847ad0a6b3faa4bcb027e9a4614ad0" +dependencies = [ + "cfg-if", + "libc", + "wasi", +] + +[[package]] +name = "getrandom" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd" +dependencies = [ + "cfg-if", + "libc", + "r-efi 5.3.0", + "wasip2", +] + +[[package]] +name = "getrandom" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0de51e6874e94e7bf76d726fc5d13ba782deca734ff60d5bb2fb2607c7406555" +dependencies = [ + "cfg-if", + "libc", + "r-efi 6.0.0", + "wasip2", + "wasip3", +] + +[[package]] +name = "h2" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "171fefbc92fe4a4de27e0698d6a5b392d6a0e333506bc49133760b3bcf948733" +dependencies = [ + "atomic-waker", + "bytes", + "fnv", + "futures-core", + "futures-sink", + "http", + "indexmap 2.14.0", + "slab", + "tokio", + "tokio-util", + "tracing", +] + +[[package]] +name = "hashbrown" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" + +[[package]] +name = "hashbrown" +version = "0.15.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" +dependencies = [ + "allocator-api2", + "equivalent", + "foldhash", +] + +[[package]] +name = "hashbrown" +version = "0.17.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed5909b6e89a2db4456e54cd5f673791d7eca6732202bbf2a9cc504fe2f9b84a" + +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + +[[package]] +name = "hermit-abi" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c" + +[[package]] +name = "htmlescape" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e9025058dae765dee5070ec375f591e2ba14638c63feff74f13805a72e523163" + +[[package]] +name = "http" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3ba2a386d7f85a81f119ad7498ebe444d2e22c2af0b86b069416ace48b3311a" +dependencies = [ + "bytes", + "itoa", +] + +[[package]] +name = "http-body" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184" +dependencies = [ + "bytes", + "http", +] + +[[package]] +name = "http-body-util" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b021d93e26becf5dc7e1b75b1bed1fd93124b374ceb73f43d4d4eafec896a64a" +dependencies = [ + "bytes", + "futures-core", + "http", + "http-body", + "pin-project-lite", +] + +[[package]] +name = "httparse" +version = "1.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6dbf3de79e51f3d586ab4cb9d5c3e2c14aa28ed23d180cf89b4df0454a69cc87" + +[[package]] +name = "httpdate" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" + +[[package]] +name = "hyper" +version = "1.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6299f016b246a94207e63da54dbe807655bf9e00044f73ded42c3ac5305fbcca" +dependencies = [ + "atomic-waker", + "bytes", + "futures-channel", + "futures-core", + "h2", + "http", + "http-body", + "httparse", + "httpdate", + "itoa", + "pin-project-lite", + "smallvec", + "tokio", + "want", +] + +[[package]] +name = "hyper-timeout" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b90d566bffbce6a75bd8b09a05aa8c2cb1fabb6cb348f8840c9e4c90a0d83b0" +dependencies = [ + "hyper", + "hyper-util", + "pin-project-lite", + "tokio", + "tower-service", +] + +[[package]] +name = "hyper-util" +version = "0.1.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96547c2556ec9d12fb1578c4eaf448b04993e7fb79cbaad930a656880a6bdfa0" +dependencies = [ + "bytes", + "futures-channel", + "futures-util", + "http", + "http-body", + "hyper", + "libc", + "pin-project-lite", + "socket2 0.6.3", + "tokio", + "tower-service", + "tracing", +] + +[[package]] +name = "id-arena" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d3067d79b975e8844ca9eb072e16b31c3c1c36928edf9c6789548c524d0d954" + +[[package]] +name = "indexmap" +version = "1.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" +dependencies = [ + "autocfg", + "hashbrown 0.12.3", +] + +[[package]] +name = "indexmap" +version = "2.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d466e9454f08e4a911e14806c24e16fba1b4c121d1ea474396f396069cf949d9" +dependencies = [ + "equivalent", + "hashbrown 0.17.1", + "serde", + "serde_core", +] + +[[package]] +name = "inotify" +version = "0.9.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8069d3ec154eb856955c1c0fbffefbf5f3c40a104ec912d4797314c1801abff" +dependencies = [ + "bitflags 1.3.2", + "inotify-sys", + "libc", +] + +[[package]] +name = "inotify-sys" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e05c02b5e89bff3b946cedeca278abc628fe811e604f027c45a8aa3cf793d0eb" +dependencies = [ + "libc", +] + +[[package]] +name = "instant" +version = "0.1.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e0242819d153cba4b4b05a5a8f2a7e9bbf97b6055b2a002b395c96b5ff3c0222" +dependencies = [ + "cfg-if", + "js-sys", + "wasm-bindgen", + "web-sys", +] + +[[package]] +name = "itertools" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569" +dependencies = [ + "either", +] + +[[package]] +name = "itertools" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b192c782037fadd9cfa75548310488aabdbf3d2da73885b31bd0abd03351285" +dependencies = [ + "either", +] + +[[package]] +name = "itoa" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f42a60cbdf9a97f5d2305f08a87dc4e09308d1276d28c869c684d7777685682" + +[[package]] +name = "jobserver" +version = "0.1.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9afb3de4395d6b3e67a780b6de64b51c978ecf11cb9a462c66be7d4ca9039d33" +dependencies = [ + "getrandom 0.3.4", + "libc", +] + +[[package]] +name = "js-sys" +version = "0.3.98" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67df7112613f8bfd9150013a0314e196f4800d3201ae742489d999db2f979f08" +dependencies = [ + "cfg-if", + "futures-util", + "once_cell", + "wasm-bindgen", +] + +[[package]] +name = "kqueue" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eac30106d7dce88daf4a3fcb4879ea939476d5074a9b7ddd0fb97fa4bed5596a" +dependencies = [ + "kqueue-sys", + "libc", +] + +[[package]] +name = "kqueue-sys" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7b65860415f949f23fa882e669f2dbd4a0f0eeb1acdd56790b30494afd7da2f" +dependencies = [ + "bitflags 2.11.1", + "libc", +] + +[[package]] +name = "lazy_static" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" + +[[package]] +name = "leb128fmt" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09edd9e8b54e49e587e4f6295a7d29c3ea94d469cb40ab8ca70b288248a81db2" + +[[package]] +name = "levenshtein_automata" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c2cdeb66e45e9f36bfad5bbdb4d2384e70936afbee843c6f6543f0c551ebb25" + +[[package]] +name = "libc" +version = "0.2.186" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68ab91017fe16c622486840e4c83c9a37afeff978bd239b5293d61ece587de66" + +[[package]] +name = "libm" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6d2cec3eae94f9f509c767b45932f1ada8350c4bdb85af2fcab4a3c14807981" + +[[package]] +name = "linux-raw-sys" +version = "0.4.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d26c52dbd32dccf2d10cac7725f8eae5296885fb5703b261f7d0a0739ec807ab" + +[[package]] +name = "linux-raw-sys" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a66949e030da00e8c7d4434b251670a91556f4144941d37452769c25d58a53" + +[[package]] +name = "lock_api" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "224399e74b87b5f3557511d98dff8b14089b3dadafcab6bb93eab67d3aace965" +dependencies = [ + "scopeguard", +] + +[[package]] +name = "log" +version = "0.4.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" + +[[package]] +name = "lru" +version = "0.12.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "234cf4f4a04dc1f57e24b96cc0cd600cf2af460d4161ac5ecdd0af8e1f3b2a38" +dependencies = [ + "hashbrown 0.15.5", +] + +[[package]] +name = "lz4_flex" +version = "0.11.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "373f5eceeeab7925e0c1098212f2fbc4d416adec9d35051a6ab251e824c1854a" + +[[package]] +name = "matchers" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d1525a2a28c7f4fa0fc98bb91ae755d1e2d1505079e05539e35bc876b5d65ae9" +dependencies = [ + "regex-automata", +] + +[[package]] +name = "matchit" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0e7465ac9959cc2b1404e8e2367b43684a6d13790fe23056cc8c6c5a6b7bcb94" + +[[package]] +name = "measure_time" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dbefd235b0aadd181626f281e1d684e116972988c14c264e42069d5e8a5775cc" +dependencies = [ + "instant", + "log", +] + +[[package]] +name = "memchr" +version = "2.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79" + +[[package]] +name = "memmap2" +version = "0.9.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "714098028fe011992e1c3962653c96b2d578c4b4bce9036e15ff220319b1e0e3" +dependencies = [ + "libc", +] + +[[package]] +name = "mime" +version = "0.3.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" + +[[package]] +name = "minimal-lexical" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" + +[[package]] +name = "mio" +version = "0.8.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4a650543ca06a924e8b371db273b2756685faae30f8487da1b56505a8f78b0c" +dependencies = [ + "libc", + "log", + "wasi", + "windows-sys 0.48.0", +] + +[[package]] +name = "mio" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "50b7e5b27aa02a74bac8c3f23f448f8d87ff11f92d3aac1a6ed369ee08cc56c1" +dependencies = [ + "libc", + "wasi", + "windows-sys 0.61.2", +] + +[[package]] +name = "multimap" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d87ecb2933e8aeadb3e3a02b828fed80a7528047e68b4f424523a0981a3a084" + +[[package]] +name = "murmurhash32" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2195bf6aa996a481483b29d62a7663eed3fe39600c460e323f8ff41e90bdd89b" + +[[package]] +name = "nom" +version = "7.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" +dependencies = [ + "memchr", + "minimal-lexical", +] + +[[package]] +name = "notify" +version = "6.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6205bd8bb1e454ad2e27422015fb5e4f2bcc7e08fa8f27058670d208324a4d2d" +dependencies = [ + "bitflags 2.11.1", + "crossbeam-channel", + "filetime", + "fsevent-sys", + "inotify", + "kqueue", + "libc", + "log", + "mio 0.8.11", + "walkdir", + "windows-sys 0.48.0", +] + +[[package]] +name = "nu-ansi-term" +version = "0.50.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5" +dependencies = [ + "windows-sys 0.61.2", +] + +[[package]] +name = "num-conv" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c6673768db2d862beb9b39a78fdcb1a69439615d5794a1be50caa9bc92c81967" + +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", + "libm", +] + +[[package]] +name = "num_cpus" +version = "1.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91df4bbde75afed763b708b7eee1e8e7651e02d97f6d5dd763e89367e957b23b" +dependencies = [ + "hermit-abi", + "libc", +] + +[[package]] +name = "once_cell" +version = "1.21.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9f7c3e4beb33f85d45ae3e3a1792185706c8e16d043238c593331cc7cd313b50" + +[[package]] +name = "oneshot" +version = "0.1.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "269bca4c2591a28585d6bf10d9ed0332b7d76900a1b02bec41bdc3a2cdcda107" + +[[package]] +name = "ownedbytes" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3a059efb063b8f425b948e042e6b9bd85edfe60e913630ed727b23e2dfcc558" +dependencies = [ + "stable_deref_trait", +] + +[[package]] +name = "parking_lot" +version = "0.12.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93857453250e3077bd71ff98b6a65ea6621a19bb0f559a85248955ac12c45a1a" +dependencies = [ + "lock_api", + "parking_lot_core", +] + +[[package]] +name = "parking_lot_core" +version = "0.9.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2621685985a2ebf1c516881c026032ac7deafcda1a2c9b7850dc81e3dfcb64c1" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall", + "smallvec", + "windows-link", +] + +[[package]] +name = "percent-encoding" +version = "2.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220" + +[[package]] +name = "petgraph" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3672b37090dbd86368a4145bc067582552b29c27377cad4e0a306c97f9bd7772" +dependencies = [ + "fixedbitset", + "indexmap 2.14.0", +] + +[[package]] +name = "pin-project" +version = "1.1.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cbf0d9e68100b3a7989b4901972f265cd542e560a3a8a724e1e20322f4d06ce9" +dependencies = [ + "pin-project-internal", +] + +[[package]] +name = "pin-project-internal" +version = "1.1.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a990e22f43e84855daf260dded30524ef4a9021cc7541c26540500a50b624389" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "pin-project-lite" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a89322df9ebe1c1578d689c92318e070967d1042b512afbe49518723f4e6d5cd" + +[[package]] +name = "pkg-config" +version = "0.3.33" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19f132c84eca552bf34cab8ec81f1c1dcc229b811638f9d283dceabe58c5569e" + +[[package]] +name = "powerfmt" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" + +[[package]] +name = "ppv-lite86" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" +dependencies = [ + "zerocopy", +] + +[[package]] +name = "prettyplease" +version = "0.2.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b" +dependencies = [ + "proc-macro2", + "syn", +] + +[[package]] +name = "proc-macro2" +version = "1.0.106" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "prost" +version = "0.13.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2796faa41db3ec313a31f7624d9286acf277b52de526150b7e69f3debf891ee5" +dependencies = [ + "bytes", + "prost-derive", +] + +[[package]] +name = "prost-build" +version = "0.13.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be769465445e8c1474e9c5dac2018218498557af32d9ed057325ec9a41ae81bf" +dependencies = [ + "heck", + "itertools 0.14.0", + "log", + "multimap", + "once_cell", + "petgraph", + "prettyplease", + "prost", + "prost-types", + "regex", + "syn", + "tempfile", +] + +[[package]] +name = "prost-derive" +version = "0.13.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a56d757972c98b346a9b766e3f02746cde6dd1cd1d1d563472929fdd74bec4d" +dependencies = [ + "anyhow", + "itertools 0.14.0", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "prost-types" +version = "0.13.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52c2c1bf36ddb1a1c396b3601a3cec27c2462e45f07c386894ec3ccf5332bd16" +dependencies = [ + "prost", +] + +[[package]] +name = "protoc-bin-vendored" +version = "3.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d1c381df33c98266b5f08186583660090a4ffa0889e76c7e9a5e175f645a67fa" +dependencies = [ + "protoc-bin-vendored-linux-aarch_64", + "protoc-bin-vendored-linux-ppcle_64", + "protoc-bin-vendored-linux-s390_64", + "protoc-bin-vendored-linux-x86_32", + "protoc-bin-vendored-linux-x86_64", + "protoc-bin-vendored-macos-aarch_64", + "protoc-bin-vendored-macos-x86_64", + "protoc-bin-vendored-win32", +] + +[[package]] +name = "protoc-bin-vendored-linux-aarch_64" +version = "3.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c350df4d49b5b9e3ca79f7e646fde2377b199e13cfa87320308397e1f37e1a4c" + +[[package]] +name = "protoc-bin-vendored-linux-ppcle_64" +version = "3.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a55a63e6c7244f19b5c6393f025017eb5d793fd5467823a099740a7a4222440c" + +[[package]] +name = "protoc-bin-vendored-linux-s390_64" +version = "3.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1dba5565db4288e935d5330a07c264a4ee8e4a5b4a4e6f4e83fad824cc32f3b0" + +[[package]] +name = "protoc-bin-vendored-linux-x86_32" +version = "3.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8854774b24ee28b7868cd71dccaae8e02a2365e67a4a87a6cd11ee6cdbdf9cf5" + +[[package]] +name = "protoc-bin-vendored-linux-x86_64" +version = "3.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b38b07546580df720fa464ce124c4b03630a6fb83e05c336fea2a241df7e5d78" + +[[package]] +name = "protoc-bin-vendored-macos-aarch_64" +version = "3.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "89278a9926ce312e51f1d999fee8825d324d603213344a9a706daa009f1d8092" + +[[package]] +name = "protoc-bin-vendored-macos-x86_64" +version = "3.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "81745feda7ccfb9471d7a4de888f0652e806d5795b61480605d4943176299756" + +[[package]] +name = "protoc-bin-vendored-win32" +version = "3.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95067976aca6421a523e491fce939a3e65249bac4b977adee0ee9771568e8aa3" + +[[package]] +name = "quote" +version = "1.0.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "r-efi" +version = "5.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" + +[[package]] +name = "r-efi" +version = "6.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8dcc9c7d52a811697d2151c701e0d08956f92b0e24136cf4cf27b57a6a0d9bf" + +[[package]] +name = "rand" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ca0ecfa931c29007047d1bc58e623ab12e5590e8c7cc53200d5202b69266d8a" +dependencies = [ + "libc", + "rand_chacha", + "rand_core", +] + +[[package]] +name = "rand_chacha" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" +dependencies = [ + "getrandom 0.2.17", +] + +[[package]] +name = "rand_distr" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32cb0b9bc82b0a0876c2dd994a7e7a2683d3e7390ca40e6886785ef0c7e3ee31" +dependencies = [ + "num-traits", + "rand", +] + +[[package]] +name = "rayon" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fb39b166781f92d482534ef4b4b1b2568f42613b53e5b6c160e24cfbfa30926d" +dependencies = [ + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22e18b0f0062d30d4230b2e85ff77fdfe4326feb054b9783a3460d8435c8ab91" +dependencies = [ + "crossbeam-deque", + "crossbeam-utils", +] + +[[package]] +name = "redox_syscall" +version = "0.5.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d" +dependencies = [ + "bitflags 2.11.1", +] + +[[package]] +name = "regex" +version = "1.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e10754a14b9137dd7b1e3e5b0493cc9171fdd105e0ab477f51b72e7f3ac0e276" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a" + +[[package]] +name = "rust-stemmers" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e46a2036019fdb888131db7a4c847a1063a7493f971ed94ea82c67eada63ca54" +dependencies = [ + "serde", + "serde_derive", +] + +[[package]] +name = "rustc-hash" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" + +[[package]] +name = "rustix" +version = "0.38.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fdb5bc1ae2baa591800df16c9ca78619bf65c0488b41b96ccec5d11220d8c154" +dependencies = [ + "bitflags 2.11.1", + "errno", + "libc", + "linux-raw-sys 0.4.15", + "windows-sys 0.52.0", +] + +[[package]] +name = "rustix" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6fe4565b9518b83ef4f91bb47ce29620ca828bd32cb7e408f0062e9930ba190" +dependencies = [ + "bitflags 2.11.1", + "errno", + "libc", + "linux-raw-sys 0.12.1", + "windows-sys 0.61.2", +] + +[[package]] +name = "rustversion" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" + +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + +[[package]] +name = "scopeguard" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" + +[[package]] +name = "semver" +version = "1.0.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a7852d02fc848982e0c167ef163aaff9cd91dc640ba85e263cb1ce46fae51cd" + +[[package]] +name = "serde" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" +dependencies = [ + "serde_core", + "serde_derive", +] + +[[package]] +name = "serde_core" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.149" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86" +dependencies = [ + "itoa", + "memchr", + "serde", + "serde_core", + "zmij", +] + +[[package]] +name = "sharded-slab" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f40ca3c46823713e0d4209592e8d6e826aa57e928f09752619fc696c499637f6" +dependencies = [ + "lazy_static", +] + +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + +[[package]] +name = "signal-hook-registry" +version = "1.4.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4db69cba1110affc0e9f7bcd48bbf87b3f4fc7c61fc9155afd4c469eb3d6c1b" +dependencies = [ + "errno", + "libc", +] + +[[package]] +name = "sketches-ddsketch" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85636c14b73d81f541e525f585c0a2109e6744e1565b5c1668e31c70c10ed65c" +dependencies = [ + "serde", +] + +[[package]] +name = "slab" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c790de23124f9ab44544d7ac05d60440adc586479ce501c1d6d7da3cd8c9cf5" + +[[package]] +name = "smallvec" +version = "1.15.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" + +[[package]] +name = "socket2" +version = "0.5.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e22376abed350d73dd1cd119b57ffccad95b4e585a7cda43e286245ce23c0678" +dependencies = [ + "libc", + "windows-sys 0.52.0", +] + +[[package]] +name = "socket2" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a766e1110788c36f4fa1c2b71b387a7815aa65f88ce0229841826633d93723e" +dependencies = [ + "libc", + "windows-sys 0.61.2", +] + +[[package]] +name = "stable_deref_trait" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596" + +[[package]] +name = "syn" +version = "2.0.117" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "sync_wrapper" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0bf256ce5efdfa370213c1dabab5935a12e49f2c58d15e9eac2870d3b4f27263" + +[[package]] +name = "tantivy" +version = "0.22.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96599ea6fccd844fc833fed21d2eecac2e6a7c1afd9e044057391d78b1feb141" +dependencies = [ + "aho-corasick", + "arc-swap", + "base64", + "bitpacking", + "byteorder", + "census", + "crc32fast", + "crossbeam-channel", + "downcast-rs", + "fastdivide", + "fnv", + "fs4", + "htmlescape", + "itertools 0.12.1", + "levenshtein_automata", + "log", + "lru", + "lz4_flex", + "measure_time", + "memmap2", + "num_cpus", + "once_cell", + "oneshot", + "rayon", + "regex", + "rust-stemmers", + "rustc-hash", + "serde", + "serde_json", + "sketches-ddsketch", + "smallvec", + "tantivy-bitpacker", + "tantivy-columnar", + "tantivy-common", + "tantivy-fst", + "tantivy-query-grammar", + "tantivy-stacker", + "tantivy-tokenizer-api", + "tempfile", + "thiserror", + "time", + "uuid", + "winapi", +] + +[[package]] +name = "tantivy-bitpacker" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "284899c2325d6832203ac6ff5891b297fc5239c3dc754c5bc1977855b23c10df" +dependencies = [ + "bitpacking", +] + +[[package]] +name = "tantivy-columnar" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "12722224ffbe346c7fec3275c699e508fd0d4710e629e933d5736ec524a1f44e" +dependencies = [ + "downcast-rs", + "fastdivide", + "itertools 0.12.1", + "serde", + "tantivy-bitpacker", + "tantivy-common", + "tantivy-sstable", + "tantivy-stacker", +] + +[[package]] +name = "tantivy-common" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8019e3cabcfd20a1380b491e13ff42f57bb38bf97c3d5fa5c07e50816e0621f4" +dependencies = [ + "async-trait", + "byteorder", + "ownedbytes", + "serde", + "time", +] + +[[package]] +name = "tantivy-fst" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d60769b80ad7953d8a7b2c70cdfe722bbcdcac6bccc8ac934c40c034d866fc18" +dependencies = [ + "byteorder", + "regex-syntax", + "utf8-ranges", +] + +[[package]] +name = "tantivy-query-grammar" +version = "0.22.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "847434d4af57b32e309f4ab1b4f1707a6c566656264caa427ff4285c4d9d0b82" +dependencies = [ + "nom", +] + +[[package]] +name = "tantivy-sstable" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c69578242e8e9fc989119f522ba5b49a38ac20f576fc778035b96cc94f41f98e" +dependencies = [ + "tantivy-bitpacker", + "tantivy-common", + "tantivy-fst", + "zstd", +] + +[[package]] +name = "tantivy-stacker" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c56d6ff5591fc332739b3ce7035b57995a3ce29a93ffd6012660e0949c956ea8" +dependencies = [ + "murmurhash32", + "rand_distr", + "tantivy-common", +] + +[[package]] +name = "tantivy-tokenizer-api" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a0dcade25819a89cfe6f17d932c9cedff11989936bf6dd4f336d50392053b04" +dependencies = [ + "serde", +] + +[[package]] +name = "tempfile" +version = "3.27.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32497e9a4c7b38532efcdebeef879707aa9f794296a4f0244f6f69e9bc8574bd" +dependencies = [ + "fastrand", + "getrandom 0.4.2", + "once_cell", + "rustix 1.1.4", + "windows-sys 0.61.2", +] + +[[package]] +name = "thiserror" +version = "1.0.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "thread_local" +version = "1.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f60246a4944f24f6e018aa17cdeffb7818b76356965d03b07d6a9886e8962185" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "time" +version = "0.3.47" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "743bd48c283afc0388f9b8827b976905fb217ad9e647fae3a379a9283c4def2c" +dependencies = [ + "deranged", + "itoa", + "num-conv", + "powerfmt", + "serde_core", + "time-core", + "time-macros", +] + +[[package]] +name = "time-core" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7694e1cfe791f8d31026952abf09c69ca6f6fa4e1a1229e18988f06a04a12dca" + +[[package]] +name = "time-macros" +version = "0.2.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e70e4c5a0e0a8a4823ad65dfe1a6930e4f4d756dcd9dd7939022b5e8c501215" +dependencies = [ + "num-conv", + "time-core", +] + +[[package]] +name = "tokio" +version = "1.52.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fc7f01b389ac15039e4dc9531aa973a135d7a4135281b12d7c1bc79fd57fffe" +dependencies = [ + "bytes", + "libc", + "mio 1.2.0", + "pin-project-lite", + "signal-hook-registry", + "socket2 0.6.3", + "tokio-macros", + "windows-sys 0.61.2", +] + +[[package]] +name = "tokio-macros" +version = "2.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "385a6cb71ab9ab790c5fe8d67f1645e6c450a7ce006a33de03daa956cf70a496" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "tokio-stream" +version = "0.1.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32da49809aab5c3bc678af03902d4ccddea2a87d028d86392a4b1560c6906c70" +dependencies = [ + "futures-core", + "pin-project-lite", + "tokio", +] + +[[package]] +name = "tokio-util" +version = "0.7.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ae9cec805b01e8fc3fd2fe289f89149a9b66dd16786abd8b19cfa7b48cb0098" +dependencies = [ + "bytes", + "futures-core", + "futures-sink", + "pin-project-lite", + "tokio", +] + +[[package]] +name = "tonic" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "877c5b330756d856ffcc4553ab34a5684481ade925ecc54bcd1bf02b1d0d4d52" +dependencies = [ + "async-stream", + "async-trait", + "axum", + "base64", + "bytes", + "h2", + "http", + "http-body", + "http-body-util", + "hyper", + "hyper-timeout", + "hyper-util", + "percent-encoding", + "pin-project", + "prost", + "socket2 0.5.10", + "tokio", + "tokio-stream", + "tower 0.4.13", + "tower-layer", + "tower-service", + "tracing", +] + +[[package]] +name = "tonic-build" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9557ce109ea773b399c9b9e5dca39294110b74f1f342cb347a80d1fce8c26a11" +dependencies = [ + "prettyplease", + "proc-macro2", + "prost-build", + "prost-types", + "quote", + "syn", +] + +[[package]] +name = "tower" +version = "0.4.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8fa9be0de6cf49e536ce1851f987bd21a43b771b09473c3549a6c853db37c1c" +dependencies = [ + "futures-core", + "futures-util", + "indexmap 1.9.3", + "pin-project", + "pin-project-lite", + "rand", + "slab", + "tokio", + "tokio-util", + "tower-layer", + "tower-service", + "tracing", +] + +[[package]] +name = "tower" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebe5ef63511595f1344e2d5cfa636d973292adc0eec1f0ad45fae9f0851ab1d4" +dependencies = [ + "futures-core", + "futures-util", + "pin-project-lite", + "sync_wrapper", + "tower-layer", + "tower-service", +] + +[[package]] +name = "tower-layer" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "121c2a6cda46980bb0fcd1647ffaf6cd3fc79a013de288782836f6df9c48780e" + +[[package]] +name = "tower-service" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3" + +[[package]] +name = "tracing" +version = "0.1.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "63e71662fa4b2a2c3a26f570f037eb95bb1f85397f3cd8076caed2f026a6d100" +dependencies = [ + "pin-project-lite", + "tracing-attributes", + "tracing-core", +] + +[[package]] +name = "tracing-attributes" +version = "0.1.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "tracing-core" +version = "0.1.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db97caf9d906fbde555dd62fa95ddba9eecfd14cb388e4f491a66d74cd5fb79a" +dependencies = [ + "once_cell", + "valuable", +] + +[[package]] +name = "tracing-log" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3" +dependencies = [ + "log", + "once_cell", + "tracing-core", +] + +[[package]] +name = "tracing-subscriber" +version = "0.3.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb7f578e5945fb242538965c2d0b04418d38ec25c79d160cd279bf0731c8d319" +dependencies = [ + "matchers", + "nu-ansi-term", + "once_cell", + "regex-automata", + "sharded-slab", + "smallvec", + "thread_local", + "tracing", + "tracing-core", + "tracing-log", +] + +[[package]] +name = "try-lock" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" + +[[package]] +name = "unicode-ident" +version = "1.0.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75" + +[[package]] +name = "unicode-xid" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853" + +[[package]] +name = "utf8-ranges" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7fcfc827f90e53a02eaef5e535ee14266c1d569214c6aa70133a624d8a3164ba" + +[[package]] +name = "uuid" +version = "1.23.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ddd74a9687298c6858e9b88ec8935ec45d22e8fd5e6394fa1bd4e99a87789c76" +dependencies = [ + "getrandom 0.4.2", + "js-sys", + "serde_core", + "wasm-bindgen", +] + +[[package]] +name = "valuable" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65" + +[[package]] +name = "walkdir" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" +dependencies = [ + "same-file", + "winapi-util", +] + +[[package]] +name = "want" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfa7760aed19e106de2c7c0b581b509f2f25d3dacaf737cb82ac61bc6d760b0e" +dependencies = [ + "try-lock", +] + +[[package]] +name = "wasi" +version = "0.11.1+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" + +[[package]] +name = "wasip2" +version = "1.0.3+wasi-0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "20064672db26d7cdc89c7798c48a0fdfac8213434a1186e5ef29fd560ae223d6" +dependencies = [ + "wit-bindgen 0.57.1", +] + +[[package]] +name = "wasip3" +version = "0.4.0+wasi-0.3.0-rc-2026-01-06" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5428f8bf88ea5ddc08faddef2ac4a67e390b88186c703ce6dbd955e1c145aca5" +dependencies = [ + "wit-bindgen 0.51.0", +] + +[[package]] +name = "wasm-bindgen" +version = "0.2.121" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49ace1d07c165b0864824eee619580c4689389afa9dc9ed3a4c75040d82e6790" +dependencies = [ + "cfg-if", + "once_cell", + "rustversion", + "wasm-bindgen-macro", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.121" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e68e6f4afd367a562002c05637acb8578ff2dea1943df76afb9e83d177c8578" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.121" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d95a9ec35c64b2a7cb35d3fead40c4238d0940c86d107136999567a4703259f2" +dependencies = [ + "bumpalo", + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.121" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4e0100b01e9f0d03189a92b96772a1fb998639d981193d7dbab487302513441" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "wasm-encoder" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "990065f2fe63003fe337b932cfb5e3b80e0b4d0f5ff650e6985b1048f62c8319" +dependencies = [ + "leb128fmt", + "wasmparser", +] + +[[package]] +name = "wasm-metadata" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb0e353e6a2fbdc176932bbaab493762eb1255a7900fe0fea1a2f96c296cc909" +dependencies = [ + "anyhow", + "indexmap 2.14.0", + "wasm-encoder", + "wasmparser", +] + +[[package]] +name = "wasmparser" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "47b807c72e1bac69382b3a6fb3dbe8ea4c0ed87ff5629b8685ae6b9a611028fe" +dependencies = [ + "bitflags 2.11.1", + "hashbrown 0.15.5", + "indexmap 2.14.0", + "semver", +] + +[[package]] +name = "web-sys" +version = "0.3.98" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b572dff8bcf38bad0fa19729c89bb5748b2b9b1d8be70cf90df697e3a8f32aa" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-util" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" +dependencies = [ + "windows-sys 0.61.2", +] + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "windows" +version = "0.58.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd04d41d93c4992d421894c18c8b43496aa748dd4c081bac0dc93eb0489272b6" +dependencies = [ + "windows-core", + "windows-targets 0.52.6", +] + +[[package]] +name = "windows-core" +version = "0.58.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ba6d44ec8c2591c134257ce647b7ea6b20335bf6379a27dac5f1641fcf59f99" +dependencies = [ + "windows-implement", + "windows-interface", + "windows-result", + "windows-strings", + "windows-targets 0.52.6", +] + +[[package]] +name = "windows-implement" +version = "0.58.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2bbd5b46c938e506ecbce286b6628a02171d56153ba733b6c741fc627ec9579b" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "windows-interface" +version = "0.58.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "053c4c462dc91d3b1504c6fe5a726dd15e216ba718e84a0e46a88fbe5ded3515" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "windows-link" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" + +[[package]] +name = "windows-result" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d1043d8214f791817bab27572aaa8af63732e11bf84aa21a45a78d6c317ae0e" +dependencies = [ + "windows-targets 0.52.6", +] + +[[package]] +name = "windows-strings" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4cd9b125c486025df0eabcb585e62173c6c9eddcec5d117d3b6e8c30e2ee4d10" +dependencies = [ + "windows-result", + "windows-targets 0.52.6", +] + +[[package]] +name = "windows-sys" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" +dependencies = [ + "windows-targets 0.48.5", +] + +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets 0.52.6", +] + +[[package]] +name = "windows-sys" +version = "0.61.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc" +dependencies = [ + "windows-link", +] + +[[package]] +name = "windows-targets" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" +dependencies = [ + "windows_aarch64_gnullvm 0.48.5", + "windows_aarch64_msvc 0.48.5", + "windows_i686_gnu 0.48.5", + "windows_i686_msvc 0.48.5", + "windows_x86_64_gnu 0.48.5", + "windows_x86_64_gnullvm 0.48.5", + "windows_x86_64_msvc 0.48.5", +] + +[[package]] +name = "windows-targets" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" +dependencies = [ + "windows_aarch64_gnullvm 0.52.6", + "windows_aarch64_msvc 0.52.6", + "windows_i686_gnu 0.52.6", + "windows_i686_gnullvm", + "windows_i686_msvc 0.52.6", + "windows_x86_64_gnu 0.52.6", + "windows_x86_64_gnullvm 0.52.6", + "windows_x86_64_msvc 0.52.6", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" + +[[package]] +name = "windows_i686_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" + +[[package]] +name = "windows_i686_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" + +[[package]] +name = "wit-bindgen" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7249219f66ced02969388cf2bb044a09756a083d0fab1e566056b04d9fbcaa5" +dependencies = [ + "wit-bindgen-rust-macro", +] + +[[package]] +name = "wit-bindgen" +version = "0.57.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ebf944e87a7c253233ad6766e082e3cd714b5d03812acc24c318f549614536e" + +[[package]] +name = "wit-bindgen-core" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ea61de684c3ea68cb082b7a88508a8b27fcc8b797d738bfc99a82facf1d752dc" +dependencies = [ + "anyhow", + "heck", + "wit-parser", +] + +[[package]] +name = "wit-bindgen-rust" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7c566e0f4b284dd6561c786d9cb0142da491f46a9fbed79ea69cdad5db17f21" +dependencies = [ + "anyhow", + "heck", + "indexmap 2.14.0", + "prettyplease", + "syn", + "wasm-metadata", + "wit-bindgen-core", + "wit-component", +] + +[[package]] +name = "wit-bindgen-rust-macro" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c0f9bfd77e6a48eccf51359e3ae77140a7f50b1e2ebfe62422d8afdaffab17a" +dependencies = [ + "anyhow", + "prettyplease", + "proc-macro2", + "quote", + "syn", + "wit-bindgen-core", + "wit-bindgen-rust", +] + +[[package]] +name = "wit-component" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d66ea20e9553b30172b5e831994e35fbde2d165325bec84fc43dbf6f4eb9cb2" +dependencies = [ + "anyhow", + "bitflags 2.11.1", + "indexmap 2.14.0", + "log", + "serde", + "serde_derive", + "serde_json", + "wasm-encoder", + "wasm-metadata", + "wasmparser", + "wit-parser", +] + +[[package]] +name = "wit-parser" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ecc8ac4bc1dc3381b7f59c34f00b67e18f910c2c0f50015669dde7def656a736" +dependencies = [ + "anyhow", + "id-arena", + "indexmap 2.14.0", + "log", + "semver", + "serde", + "serde_derive", + "serde_json", + "unicode-xid", + "wasmparser", +] + +[[package]] +name = "zerocopy" +version = "0.8.48" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eed437bf9d6692032087e337407a86f04cd8d6a16a37199ed57949d415bd68e9" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.8.48" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70e3cd084b1788766f53af483dd21f93881ff30d7320490ec3ef7526d203bad4" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "zmij" +version = "1.0.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa" + +[[package]] +name = "zstd" +version = "0.13.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e91ee311a569c327171651566e07972200e76fcfe2242a4fa446149a3881c08a" +dependencies = [ + "zstd-safe", +] + +[[package]] +name = "zstd-safe" +version = "7.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f49c4d5f0abb602a93fb8736af2a4f4dd9512e36f7f570d66e65ff867ed3b9d" +dependencies = [ + "zstd-sys", +] + +[[package]] +name = "zstd-sys" +version = "2.0.16+zstd.1.5.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91e19ebc2adc8f83e43039e79776e3fda8ca919132d68a1fed6a5faca2683748" +dependencies = [ + "cc", + "pkg-config", +] diff --git a/src/search-service/Cargo.toml b/src/search-service/Cargo.toml new file mode 100644 index 000000000000..e877dbfe3302 --- /dev/null +++ b/src/search-service/Cargo.toml @@ -0,0 +1,42 @@ +[package] +name = "files-search-service" +version = "0.1.0" +edition = "2024" +publish = false + +[[bin]] +name = "files-search-service" +path = "src/main.rs" + +[dependencies] +tokio = { version = "1.40", features = ["macros", "rt-multi-thread", "signal", "net"] } +tokio-stream = "0.1" +tonic = "0.12" +prost = "0.13" +tracing = "0.1" +tracing-subscriber = { version = "0.3", features = ["env-filter"] } +walkdir = "2" +async-stream = "0.3" +tantivy = "0.22" +anyhow = "1" +rayon = "1.10" +notify = "6" +parking_lot = "0.12" + +[target.'cfg(windows)'.dependencies] +windows = { version = "0.58", features = [ + "Win32_Foundation", + "Win32_Storage_FileSystem", + "Win32_System_WindowsProgramming", + "Win32_System_Threading", + "Win32_System_Power", + "Win32_UI_Shell", +] } + +[build-dependencies] +tonic-build = "0.12" +protoc-bin-vendored = "3" + +[profile.release] +lto = "thin" +codegen-units = 1 diff --git a/src/search-service/build.rs b/src/search-service/build.rs new file mode 100644 index 000000000000..c2610227b021 --- /dev/null +++ b/src/search-service/build.rs @@ -0,0 +1,11 @@ +fn main() -> Result<(), Box> { + // Vendored protoc — keeps contributors from needing a system install. + // SAFETY: build scripts run single-threaded before any other code. + unsafe { std::env::set_var("PROTOC", protoc_bin_vendored::protoc_bin_path()?) }; + // Client is built so integration tests (and any in-process Rust + // consumer) can talk to the service. The C# client uses its own + // generated stubs, not these. + tonic_build::configure() + .compile_protos(&["proto/files_search.proto"], &["proto"])?; + Ok(()) +} diff --git a/src/search-service/proto/files_search.proto b/src/search-service/proto/files_search.proto new file mode 100644 index 000000000000..80ee56cb31f2 --- /dev/null +++ b/src/search-service/proto/files_search.proto @@ -0,0 +1,30 @@ +syntax = "proto3"; + +package files.search.v1; + +service FilesSearch { + rpc Health(HealthRequest) returns (HealthResponse); + rpc Search(SearchRequest) returns (stream SearchHit); +} + +message HealthRequest {} + +message HealthResponse { + string version = 1; + uint64 indexed_file_count = 2; + bool indexing = 3; +} + +message SearchRequest { + string query = 1; + uint32 max_results = 2; + repeated string scope_paths = 3; +} + +message SearchHit { + string path = 1; + string filename = 2; + uint64 size_bytes = 3; + int64 modified_unix_ms = 4; + float score = 5; +} diff --git a/src/search-service/rust-toolchain.toml b/src/search-service/rust-toolchain.toml new file mode 100644 index 000000000000..3417ee590341 --- /dev/null +++ b/src/search-service/rust-toolchain.toml @@ -0,0 +1,4 @@ +[toolchain] +channel = "1.95.0" +components = ["rustfmt", "clippy"] +profile = "minimal" diff --git a/src/search-service/src/enumerate.rs b/src/search-service/src/enumerate.rs new file mode 100644 index 000000000000..b829ed890502 --- /dev/null +++ b/src/search-service/src/enumerate.rs @@ -0,0 +1,182 @@ +//! Parallel filesystem enumeration. +//! +//! On Windows: `FindFirstFileExW` with `FindExInfoBasic` (skips the 8.3 +//! short-name lookup that Win32 normally does) and `FIND_FIRST_EX_LARGE_FETCH` +//! (larger internal buffer per syscall). Subdirectory recursion is fanned +//! out via `rayon::scope` so multiple threads walk in parallel. Callers +//! receive entries through an `mpsc::Sender` so producer and consumer (the +//! Tantivy writer) can run concurrently. +//! +//! On non-Windows: falls back to `walkdir` so the crate still builds and +//! tests on Linux/macOS for development convenience. The Files product is +//! Windows-only and the bench gates are measured on Windows. + +use std::path::{Path, PathBuf}; +use std::sync::mpsc::Sender; + +#[derive(Debug)] +pub struct Entry { + pub path: PathBuf, + pub size_bytes: u64, + pub modified_unix_ms: i64, +} + +pub fn enumerate(root: &Path, send: Sender) { + #[cfg(windows)] + { + rayon::scope(|s| { + win::recurse(s, root.to_path_buf(), send); + }); + } + #[cfg(not(windows))] + { + fallback::walk(root, send); + } +} + +#[cfg(windows)] +mod win { + use super::Entry; + use std::os::windows::ffi::{OsStrExt, OsStringExt}; + use std::path::PathBuf; + use std::sync::mpsc::Sender; + + use windows::core::PCWSTR; + use windows::Win32::Foundation::HANDLE; + use windows::Win32::Storage::FileSystem::{ + FindClose, FindExInfoBasic, FindExSearchNameMatch, FindFirstFileExW, FindNextFileW, + FILE_ATTRIBUTE_DIRECTORY, FILE_ATTRIBUTE_REPARSE_POINT, FIND_FIRST_EX_LARGE_FETCH, + WIN32_FIND_DATAW, + }; + + pub(super) fn recurse<'a>( + scope: &rayon::Scope<'a>, + dir: PathBuf, + send: Sender, + ) { + let pattern = wide_path(&dir.join("*")); + + let mut data: WIN32_FIND_DATAW = unsafe { std::mem::zeroed() }; + let handle = unsafe { + FindFirstFileExW( + PCWSTR(pattern.as_ptr()), + FindExInfoBasic, + &mut data as *mut _ as *mut _, + FindExSearchNameMatch, + None, + FIND_FIRST_EX_LARGE_FETCH, + ) + }; + + let handle: HANDLE = match handle { + Ok(h) if !h.is_invalid() => h, + _ => return, + }; + + loop { + handle_entry(scope, &dir, &data, &send); + let next = unsafe { FindNextFileW(handle, &mut data) }; + if next.is_err() { + break; + } + } + + let _ = unsafe { FindClose(handle) }; + } + + fn handle_entry<'a>( + scope: &rayon::Scope<'a>, + dir: &PathBuf, + data: &WIN32_FIND_DATAW, + send: &Sender, + ) { + let name = wide_to_osstring(&data.cFileName); + let bytes = name.as_encoded_bytes(); + if bytes == b"." || bytes == b".." { + return; + } + let path = dir.join(&name); + let attrs = data.dwFileAttributes; + let is_dir = (attrs & FILE_ATTRIBUTE_DIRECTORY.0) != 0; + let is_reparse = (attrs & FILE_ATTRIBUTE_REPARSE_POINT.0) != 0; + + // Skip reparse points (junctions, symlinks) to match the previous + // `WalkDir::follow_links(false)` behavior. Without this, a symlink + // loop can spin the enumerator forever. + if is_reparse { + return; + } + + if is_dir { + let send2 = send.clone(); + scope.spawn(move |s| recurse(s, path, send2)); + return; + } + + let size_bytes = ((data.nFileSizeHigh as u64) << 32) | (data.nFileSizeLow as u64); + let modified_unix_ms = filetime_to_unix_ms( + data.ftLastWriteTime.dwHighDateTime, + data.ftLastWriteTime.dwLowDateTime, + ); + let _ = send.send(Entry { + path, + size_bytes, + modified_unix_ms, + }); + } + + fn wide_path(p: &std::path::Path) -> Vec { + let mut v: Vec = p.as_os_str().encode_wide().collect(); + v.push(0); + v + } + + fn wide_to_osstring(buf: &[u16]) -> std::ffi::OsString { + let len = buf.iter().position(|&c| c == 0).unwrap_or(buf.len()); + std::ffi::OsString::from_wide(&buf[..len]) + } + + /// FILETIME counts 100-nanosecond intervals since 1601-01-01 UTC. + /// Unix epoch is 11644473600 seconds later; convert to milliseconds. + fn filetime_to_unix_ms(high: u32, low: u32) -> i64 { + const EPOCH_DIFFERENCE_MS: i64 = 11_644_473_600_000; + let ticks = ((high as u64) << 32) | (low as u64); + let ms = (ticks / 10_000) as i64; + ms - EPOCH_DIFFERENCE_MS + } +} + +#[cfg(not(windows))] +mod fallback { + use super::Entry; + use std::path::Path; + use std::sync::mpsc::Sender; + use std::time::UNIX_EPOCH; + + pub(super) fn walk(root: &Path, send: Sender) { + for entry in walkdir::WalkDir::new(root).follow_links(false) { + let Ok(entry) = entry else { continue }; + if !entry.file_type().is_file() { + continue; + } + let (size_bytes, modified_unix_ms) = match entry.metadata() { + Ok(m) => { + let size = m.len(); + let modified = m + .modified() + .ok() + .and_then(|t| t.duration_since(UNIX_EPOCH).ok()) + .map(|d| d.as_millis() as i64) + .unwrap_or(0); + (size, modified) + } + Err(_) => (0, 0), + }; + let _ = send.send(Entry { + path: entry.into_path(), + size_bytes, + modified_unix_ms, + }); + } + } +} diff --git a/src/search-service/src/index.rs b/src/search-service/src/index.rs new file mode 100644 index 000000000000..06381b41b11c --- /dev/null +++ b/src/search-service/src/index.rs @@ -0,0 +1,296 @@ +use std::path::{Path, PathBuf}; +use std::sync::mpsc; +use std::time::{Instant, UNIX_EPOCH}; + +use anyhow::Result; +use parking_lot::Mutex; +use tantivy::collector::TopDocs; +use tantivy::query::{BooleanQuery, FuzzyTermQuery, Occur, Query}; +use tantivy::schema::{Field, Schema, INDEXED, STORED, STRING, TEXT}; +use tantivy::{doc, Index, IndexReader, IndexWriter, ReloadPolicy, TantivyDocument, Term}; +use tracing::info; + +use crate::enumerate; + +pub struct SearchIndex { + // Held so reader/writer stay valid; Tantivy's writer/reader keep + // their own clones internally but we keep the handle in case future + // code needs to open additional readers. + #[allow(dead_code)] + index: Index, + reader: IndexReader, + writer: Mutex, + fields: Fields, +} + +#[derive(Clone, Copy)] +struct Fields { + path: Field, + filename: Field, + size_bytes: Field, + modified_unix_ms: Field, +} + +#[derive(Debug, Clone)] +pub struct Hit { + pub path: String, + pub filename: String, + pub size_bytes: u64, + pub modified_unix_ms: i64, + pub score: f32, +} + +impl SearchIndex { + /// Open an existing index at `dir`, or build a fresh one by walking `root` + /// and indexing every file. v0 has no startup re-sync — if the directory + /// already contains an index, it's reused as-is regardless of how stale. + /// The watcher closes that gap *while the service is running*, but + /// changes that happen while the service is offline still need a manual + /// rebuild until step 5 of the roadmap (full restart-time reconcile). + pub fn open_or_build(dir: &Path, root: &Path) -> Result { + std::fs::create_dir_all(dir)?; + let schema = build_schema(); + let fields = fields_of(&schema); + + let exists = std::fs::read_dir(dir)?.next().is_some(); + let index = if exists { + info!(dir = %dir.display(), "opening existing index"); + Index::open_in_dir(dir)? + } else { + info!(dir = %dir.display(), "creating new index"); + Index::create_in_dir(dir, schema.clone())? + }; + + let writer = index.writer(50_000_000)?; + let reader = index + .reader_builder() + .reload_policy(ReloadPolicy::Manual) + .try_into()?; + + let this = Self { + index, + reader, + writer: Mutex::new(writer), + fields, + }; + + if !exists { + this.full_rebuild(root)?; + } + Ok(this) + } + + pub fn doc_count(&self) -> u64 { + self.reader.searcher().num_docs() + } + + /// Drops every document and re-walks `root` from scratch. Used by the + /// initial cold-start build and exposed for tests. + pub fn full_rebuild(&self, root: &Path) -> Result<()> { + let started = Instant::now(); + { + let mut w = self.writer.lock(); + w.delete_all_documents()?; + + // Producer/consumer: a rayon-fanned-out FindFirstFileEx walk + // feeds entries through a channel; this thread drains and + // writes to Tantivy. Keeps disk reads and index inserts + // overlapped on different cores. + let (tx, rx) = mpsc::channel(); + let root_owned = root.to_path_buf(); + let producer = std::thread::spawn(move || { + enumerate::enumerate(&root_owned, tx); + }); + + let mut count = 0u64; + for entry in rx { + let Some(name) = entry.path.file_name().and_then(|s| s.to_str()) else { + continue; + }; + w.add_document(doc!( + self.fields.path => entry.path.to_string_lossy().into_owned(), + self.fields.filename => name.to_string(), + self.fields.size_bytes => entry.size_bytes, + self.fields.modified_unix_ms => entry.modified_unix_ms, + ))?; + count += 1; + } + let _ = producer.join(); + + w.commit()?; + info!( + root = %root.display(), + count, + elapsed_ms = started.elapsed().as_millis() as u64, + "index built" + ); + } + self.reader.reload()?; + Ok(()) + } + + /// Stat `path` and replace its index entry. Removes any existing doc + /// with the same path first so this is idempotent (good for both + /// CREATE and MODIFY events from the watcher). + pub fn upsert(&self, path: &Path) -> Result<()> { + let Some(name) = path.file_name().and_then(|s| s.to_str()) else { + return Ok(()); + }; + let metadata = match std::fs::metadata(path) { + Ok(m) => m, + // Race: file was deleted between the watcher event and the + // stat. Treat as a delete so the index doesn't end up with a + // stale doc. + Err(_) => return self.delete(path), + }; + if !metadata.is_file() { + return Ok(()); + } + + let path_str = path.to_string_lossy().into_owned(); + let size_bytes = metadata.len(); + let modified_unix_ms = metadata + .modified() + .ok() + .and_then(|t| t.duration_since(UNIX_EPOCH).ok()) + .map(|d| d.as_millis() as i64) + .unwrap_or(0); + + let w = self.writer.lock(); + w.delete_term(Term::from_field_text(self.fields.path, &path_str)); + w.add_document(doc!( + self.fields.path => path_str, + self.fields.filename => name.to_string(), + self.fields.size_bytes => size_bytes, + self.fields.modified_unix_ms => modified_unix_ms, + ))?; + Ok(()) + } + + /// Drop any doc whose path equals `path`. Path is a STRING field + /// (single token), so `delete_term` is exact-match. + pub fn delete(&self, path: &Path) -> Result<()> { + let path_str = path.to_string_lossy().into_owned(); + let w = self.writer.lock(); + w.delete_term(Term::from_field_text(self.fields.path, &path_str)); + Ok(()) + } + + /// Commit pending writes and refresh the reader. Watcher debounces + /// to keep this cost amortized across bursts of events. + pub fn commit(&self) -> Result<()> { + let mut w = self.writer.lock(); + w.commit()?; + drop(w); + self.reader.reload()?; + Ok(()) + } + + /// Per-token prefix query against the filename field, optionally filtered + /// to results whose path starts with one of `scope_paths`. Tokens are + /// lowercased; the schema's TEXT field uses the default tokenizer + /// (lowercase + word-boundary split), so `"alpha"` matches `alpha.txt` + /// (token `alpha`) and `ALPHABET.md` (token `alphabet`, prefix). True + /// mid-string substring (`"phab"` → `ALPHABET`) is a known gap; revisit + /// with an n-gram field if the correctness suite demands it. + pub fn search( + &self, + query: &str, + max: usize, + scope_paths: &[PathBuf], + ) -> Result> { + let searcher = self.reader.searcher(); + let mut clauses: Vec<(Occur, Box)> = Vec::new(); + + for token in query.split_whitespace() { + let term = Term::from_field_text(self.fields.filename, &token.to_lowercase()); + clauses.push(( + Occur::Must, + Box::new(FuzzyTermQuery::new_prefix(term, 0, true)), + )); + } + + if !scope_paths.is_empty() { + let scope_clauses: Vec<(Occur, Box)> = scope_paths + .iter() + .map(|s| { + let term = + Term::from_field_text(self.fields.path, &s.to_string_lossy()); + let q: Box = + Box::new(FuzzyTermQuery::new_prefix(term, 0, true)); + (Occur::Should, q) + }) + .collect(); + clauses.push((Occur::Must, Box::new(BooleanQuery::new(scope_clauses)))); + } + + // Empty query with no scope = match nothing. The legacy provider + // returns nothing for a blank query too, so this matches semantics. + if clauses.is_empty() { + return Ok(Vec::new()); + } + + let bool_query = BooleanQuery::new(clauses); + let top = searcher.search(&bool_query, &TopDocs::with_limit(max.max(1)))?; + + let mut hits = Vec::with_capacity(top.len()); + for (score, addr) in top { + let doc: TantivyDocument = searcher.doc(addr)?; + hits.push(Hit { + path: get_text(&doc, self.fields.path).unwrap_or_default(), + filename: get_text(&doc, self.fields.filename).unwrap_or_default(), + size_bytes: get_u64(&doc, self.fields.size_bytes).unwrap_or(0), + modified_unix_ms: get_i64(&doc, self.fields.modified_unix_ms).unwrap_or(0), + score, + }); + } + Ok(hits) + } +} + +fn build_schema() -> Schema { + let mut sb = Schema::builder(); + sb.add_text_field("path", STRING | STORED); + sb.add_text_field("filename", TEXT | STORED); + sb.add_u64_field("size_bytes", STORED | INDEXED); + sb.add_i64_field("modified_unix_ms", STORED | INDEXED); + sb.build() +} + +fn fields_of(schema: &Schema) -> Fields { + Fields { + path: schema.get_field("path").unwrap(), + filename: schema.get_field("filename").unwrap(), + size_bytes: schema.get_field("size_bytes").unwrap(), + modified_unix_ms: schema.get_field("modified_unix_ms").unwrap(), + } +} + +fn get_text(doc: &TantivyDocument, field: Field) -> Option { + use tantivy::schema::Value; + doc.get_first(field) + .and_then(|v| v.as_str()) + .map(|s| s.to_string()) +} + +fn get_u64(doc: &TantivyDocument, field: Field) -> Option { + use tantivy::schema::Value; + doc.get_first(field).and_then(|v| v.as_u64()) +} + +fn get_i64(doc: &TantivyDocument, field: Field) -> Option { + use tantivy::schema::Value; + doc.get_first(field).and_then(|v| v.as_i64()) +} + +/// Returns `%LOCALAPPDATA%\Files\search-index\` with `FILES_SEARCH_INDEX_DIR` +/// override (used by tests and dev runs). +pub fn default_index_dir() -> PathBuf { + if let Ok(p) = std::env::var("FILES_SEARCH_INDEX_DIR") { + return PathBuf::from(p); + } + let base = std::env::var("LOCALAPPDATA") + .map(PathBuf::from) + .unwrap_or_else(|_| std::env::temp_dir()); + base.join("Files").join("search-index") +} diff --git a/src/search-service/src/lib.rs b/src/search-service/src/lib.rs new file mode 100644 index 000000000000..c3a6fcf89d92 --- /dev/null +++ b/src/search-service/src/lib.rs @@ -0,0 +1,92 @@ +use std::path::PathBuf; +use std::pin::Pin; +use std::sync::Arc; + +use tokio_stream::Stream; +use tonic::{Request, Response, Status}; +use tracing::info; + +pub mod enumerate; +pub mod index; +pub mod throttle; +pub mod watcher; +pub mod proto { + tonic::include_proto!("files.search.v1"); +} + +pub use index::{default_index_dir, SearchIndex}; +pub use throttle::{apply_background_priority, Throttle}; +pub use watcher::Watcher; + +use proto::files_search_server::FilesSearch; +use proto::{HealthRequest, HealthResponse, SearchHit, SearchRequest}; + +pub struct Service { + index: Arc, +} + +impl Service { + pub fn new(index: Arc) -> Self { + Self { index } + } +} + +type SearchStream = Pin> + Send + 'static>>; + +#[tonic::async_trait] +impl FilesSearch for Service { + async fn health( + &self, + _: Request, + ) -> Result, Status> { + Ok(Response::new(HealthResponse { + version: env!("CARGO_PKG_VERSION").to_string(), + indexed_file_count: self.index.doc_count(), + indexing: false, + })) + } + + type SearchStream = SearchStream; + + async fn search( + &self, + req: Request, + ) -> Result, Status> { + let req = req.into_inner(); + // 0 = "no caller cap." We still bound the collector to keep + // Tantivy's TopDocs from allocating a heap sized by usize::MAX + // (it multiplies internally and overflows). 10k is generous for + // a UI-driven search; the C# client typically asks for far less. + let max = match req.max_results { + 0 => 10_000, + n => n as usize, + }; + let scope: Vec = req.scope_paths.iter().map(PathBuf::from).collect(); + let index = Arc::clone(&self.index); + let query = req.query.clone(); + + info!(query = %req.query, max, scope = scope.len(), "search"); + + // Run the synchronous Tantivy search on a blocking task so the + // async runtime stays unblocked. For small corpora this is + // overkill, but it keeps the wiring honest as corpora grow. + let hits = tokio::task::spawn_blocking(move || index.search(&query, max, &scope)) + .await + .map_err(|e| Status::internal(format!("join error: {e}")))? + .map_err(|e| Status::internal(format!("search error: {e}")))?; + + let stream = async_stream::try_stream! { + for hit in hits { + yield SearchHit { + path: hit.path, + filename: hit.filename, + size_bytes: hit.size_bytes, + modified_unix_ms: hit.modified_unix_ms, + score: hit.score, + }; + } + }; + + Ok(Response::new(Box::pin(stream))) + } +} diff --git a/src/search-service/src/main.rs b/src/search-service/src/main.rs new file mode 100644 index 000000000000..abefb56f9e3c --- /dev/null +++ b/src/search-service/src/main.rs @@ -0,0 +1,89 @@ +use std::path::PathBuf; +use std::sync::Arc; + +use tonic::transport::Server; +use tracing::info; + +use files_search_service::proto::files_search_server::FilesSearchServer; +use files_search_service::{ + apply_background_priority, default_index_dir, SearchIndex, Service, Throttle, Watcher, +}; + +fn resolve_root() -> PathBuf { + let raw = std::env::var("FILES_SEARCH_ROOT") + .or_else(|_| std::env::var("USERPROFILE")) + .map(PathBuf::from) + .unwrap_or_else(|_| PathBuf::from(".")); + normalize_root(raw) +} + +/// Normalize the indexing root so stored paths are byte-identical to +/// what a Windows caller (e.g. `Path.GetFullPath` from C#) will pass in +/// `scope_paths`. Without this, mixed forward/backward slashes silently +/// break prefix scoping. +/// +/// Strategy: +/// 1. `fs::canonicalize` to resolve `..`, symlinks, and case. +/// 2. Strip the `\\?\` UNC prefix Windows adds, since C# callers +/// don't include it. +/// +/// Falls back to the input on canonicalize failure (path doesn't exist +/// yet, permissions, etc.). +fn normalize_root(p: PathBuf) -> PathBuf { + let canonical = match std::fs::canonicalize(&p) { + Ok(c) => c, + Err(_) => return p, + }; + #[cfg(windows)] + { + let s = canonical.to_string_lossy(); + if let Some(stripped) = s.strip_prefix(r"\\?\") { + return PathBuf::from(stripped); + } + } + canonical +} + +#[tokio::main] +async fn main() -> Result<(), Box> { + tracing_subscriber::fmt() + .with_env_filter( + tracing_subscriber::EnvFilter::try_from_default_env() + .unwrap_or_else(|_| "info".into()), + ) + .init(); + + apply_background_priority(); + + let root = resolve_root(); + let index_dir = default_index_dir(); + info!(root = %root.display(), index_dir = %index_dir.display(), "starting"); + + let root_for_index = root.clone(); + let index = Arc::new( + tokio::task::spawn_blocking(move || { + SearchIndex::open_or_build(&index_dir, &root_for_index) + }) + .await??, + ); + + let throttle = Arc::new(Throttle::start()); + let watcher = Watcher::start(root.clone(), Arc::clone(&index), Some(Arc::clone(&throttle)))?; + info!(root = %root.display(), "watcher started"); + + // TCP for v0; swap to named pipe (\\.\pipe\files-search) once the + // service does enough to be worth integration-testing from C#. + let addr = "127.0.0.1:50080".parse()?; + info!(%addr, "files-search-service listening"); + + Server::builder() + .add_service(FilesSearchServer::new(Service::new(index))) + .serve_with_shutdown(addr, async { + let _ = tokio::signal::ctrl_c().await; + info!("shutting down"); + }) + .await?; + + watcher.stop(); + Ok(()) +} diff --git a/src/search-service/src/throttle.rs b/src/search-service/src/throttle.rs new file mode 100644 index 000000000000..e3900bac7975 --- /dev/null +++ b/src/search-service/src/throttle.rs @@ -0,0 +1,199 @@ +//! Process throttling. +//! +//! Two complementary mechanisms keep the service from being a bad citizen: +//! +//! 1. `apply_background_priority()` — once at startup, calls +//! `SetPriorityClass(PROCESS_MODE_BACKGROUND_BEGIN)`. The Windows +//! scheduler then puts CPU and I/O behind any normal-priority process. +//! Same trick OneDrive uses for sync. +//! +//! 2. `Throttle` — a background poller (every 2s) that watches three +//! signals and exposes `should_pause()`: +//! - **On battery**: `GetSystemPowerStatus` `ACLineStatus == 0`. +//! - **Fullscreen / presentation**: `SHQueryUserNotificationState` +//! returns `QUNS_RUNNING_D3D_FULL_SCREEN` or `QUNS_PRESENTATION_MODE`. +//! - **High CPU load**: derived from `GetSystemTimes` over the +//! polling interval; threshold 70%. +//! +//! The watcher consults `should_pause()` before committing. Apply work +//! (in-memory adds/deletes) keeps happening so we don't drop events; +//! only commit + reader refresh is skipped, deferring fsync and +//! query-visibility until the system is idle again. + +use std::sync::atomic::{AtomicBool, Ordering}; +use std::sync::Arc; +use std::thread; +use std::time::Duration; + +use tracing::{info, warn}; + +const POLL_INTERVAL: Duration = Duration::from_secs(2); +const HIGH_LOAD_THRESHOLD: f64 = 0.70; + +#[cfg(windows)] +pub fn apply_background_priority() { + use windows::Win32::System::Threading::{ + GetCurrentProcess, SetPriorityClass, PROCESS_MODE_BACKGROUND_BEGIN, + }; + unsafe { + match SetPriorityClass(GetCurrentProcess(), PROCESS_MODE_BACKGROUND_BEGIN) { + Ok(_) => info!("background priority enabled"), + Err(err) => { + warn!(%err, "SetPriorityClass(PROCESS_MODE_BACKGROUND_BEGIN) failed") + } + } + } +} + +#[cfg(not(windows))] +pub fn apply_background_priority() {} + +pub struct Throttle { + paused: Arc, + stop: Arc, + poller: Option>, +} + +impl Throttle { + pub fn start() -> Self { + let paused = Arc::new(AtomicBool::new(false)); + let stop = Arc::new(AtomicBool::new(false)); + let p = Arc::clone(&paused); + let s = Arc::clone(&stop); + let poller = thread::spawn(move || poll_loop(p, s)); + Self { + paused, + stop, + poller: Some(poller), + } + } + + pub fn should_pause(&self) -> bool { + self.paused.load(Ordering::Acquire) + } +} + +impl Drop for Throttle { + fn drop(&mut self) { + self.stop.store(true, Ordering::Release); + if let Some(h) = self.poller.take() { + let _ = h.join(); + } + } +} + +fn poll_loop(paused: Arc, stop: Arc) { + let mut last_cpu = sample_cpu(); + while !stop.load(Ordering::Acquire) { + thread::sleep(POLL_INTERVAL); + let now_cpu = sample_cpu(); + let load = compute_load(&last_cpu, &now_cpu); + last_cpu = now_cpu; + + let on_battery = is_on_battery(); + let fullscreen = is_fullscreen(); + let high_load = load > HIGH_LOAD_THRESHOLD; + let new = on_battery || fullscreen || high_load; + let prev = paused.swap(new, Ordering::AcqRel); + if prev != new { + info!( + paused = new, + on_battery, + fullscreen, + high_load, + load, + "throttle state changed" + ); + } + } +} + +#[cfg(windows)] +fn is_on_battery() -> bool { + use windows::Win32::System::Power::{GetSystemPowerStatus, SYSTEM_POWER_STATUS}; + let mut status = SYSTEM_POWER_STATUS::default(); + unsafe { + if GetSystemPowerStatus(&mut status).is_err() { + return false; + } + } + // 0 = offline (battery), 1 = online, 255 = unknown. Conservative: + // only flag battery on a definite "offline". + status.ACLineStatus == 0 +} + +#[cfg(not(windows))] +fn is_on_battery() -> bool { + false +} + +#[cfg(windows)] +fn is_fullscreen() -> bool { + use windows::Win32::UI::Shell::{ + SHQueryUserNotificationState, QUNS_BUSY, QUNS_PRESENTATION_MODE, + QUNS_RUNNING_D3D_FULL_SCREEN, + }; + let _ = QUNS_BUSY; // silence unused; matches! below references the others. + let state = match unsafe { SHQueryUserNotificationState() } { + Ok(s) => s, + Err(_) => return false, + }; + matches!( + state, + QUNS_RUNNING_D3D_FULL_SCREEN | QUNS_PRESENTATION_MODE + ) +} + +#[cfg(not(windows))] +fn is_fullscreen() -> bool { + false +} + +struct CpuSample { + #[cfg_attr(not(windows), allow(dead_code))] + idle: u64, + #[cfg_attr(not(windows), allow(dead_code))] + kernel: u64, + #[cfg_attr(not(windows), allow(dead_code))] + user: u64, +} + +#[cfg(windows)] +fn sample_cpu() -> CpuSample { + use windows::Win32::Foundation::FILETIME; + use windows::Win32::System::Threading::GetSystemTimes; + let mut idle = FILETIME::default(); + let mut kernel = FILETIME::default(); + let mut user = FILETIME::default(); + unsafe { + let _ = GetSystemTimes(Some(&mut idle), Some(&mut kernel), Some(&mut user)); + } + CpuSample { + idle: ft_to_u64(&idle), + kernel: ft_to_u64(&kernel), + user: ft_to_u64(&user), + } +} + +#[cfg(not(windows))] +fn sample_cpu() -> CpuSample { + CpuSample { idle: 0, kernel: 0, user: 0 } +} + +#[cfg(windows)] +fn ft_to_u64(ft: &windows::Win32::Foundation::FILETIME) -> u64 { + ((ft.dwHighDateTime as u64) << 32) | (ft.dwLowDateTime as u64) +} + +fn compute_load(prev: &CpuSample, now: &CpuSample) -> f64 { + // GetSystemTimes documents that lpKernelTime *includes* idle time. + // total = kernel + user, busy = total - idle. + let idle_d = now.idle.saturating_sub(prev.idle) as f64; + let kernel_d = now.kernel.saturating_sub(prev.kernel) as f64; + let user_d = now.user.saturating_sub(prev.user) as f64; + let total = kernel_d + user_d; + if total <= 0.0 { + return 0.0; + } + ((total - idle_d) / total).clamp(0.0, 1.0) +} diff --git a/src/search-service/src/watcher.rs b/src/search-service/src/watcher.rs new file mode 100644 index 000000000000..2fa4ff003c0d --- /dev/null +++ b/src/search-service/src/watcher.rs @@ -0,0 +1,179 @@ +//! Filesystem watcher. +//! +//! Wraps the `notify` crate (which uses `ReadDirectoryChangesW` with +//! overlapped I/O on Windows) and applies events to a `SearchIndex`. +//! Commits are debounced — bursts of file events (extracting an archive, +//! `git checkout`) collapse into a single Tantivy commit so we don't +//! pay segment + fsync overhead per file. + +use std::path::PathBuf; +use std::sync::atomic::{AtomicBool, Ordering}; +use std::sync::Arc; +use std::thread; +use std::time::Duration; + +use anyhow::Result; +use notify::event::{EventKind, ModifyKind, RenameMode}; +use notify::{RecommendedWatcher, RecursiveMode, Watcher as _}; +use parking_lot::Mutex; +use tracing::{debug, warn}; + +use crate::index::SearchIndex; +use crate::throttle::Throttle; + +/// Time we wait for a quiet window before committing a batch of edits. +/// 250ms is short enough that single-file changes feel instant in the UI +/// and long enough to coalesce a `git checkout` of hundreds of files. +const COMMIT_DEBOUNCE: Duration = Duration::from_millis(250); + +pub struct Watcher { + _watcher: RecommendedWatcher, + stop: Arc, + committer: Option>, +} + +impl Watcher { + pub fn start( + root: PathBuf, + index: Arc, + throttle: Option>, + ) -> Result { + let dirty = Arc::new(AtomicBool::new(false)); + let stop = Arc::new(AtomicBool::new(false)); + let last_event = Arc::new(Mutex::new(std::time::Instant::now())); + + let dirty_for_handler = Arc::clone(&dirty); + let last_event_for_handler = Arc::clone(&last_event); + let index_for_handler = Arc::clone(&index); + + let mut watcher = notify::recommended_watcher(move |res: notify::Result| { + match res { + Ok(event) => apply_event(&index_for_handler, &event), + Err(err) => warn!(%err, "watcher error"), + } + dirty_for_handler.store(true, Ordering::Release); + *last_event_for_handler.lock() = std::time::Instant::now(); + })?; + watcher.watch(&root, RecursiveMode::Recursive)?; + + let committer = { + let stop = Arc::clone(&stop); + let dirty = Arc::clone(&dirty); + let last_event = Arc::clone(&last_event); + let index = Arc::clone(&index); + thread::spawn(move || committer_loop(index, stop, dirty, last_event, throttle)) + }; + + Ok(Self { + _watcher: watcher, + stop, + committer: Some(committer), + }) + } + + /// Stops the committer thread and forces a final commit so any + /// pending events are durable. The notify watcher itself is dropped + /// here too, which cancels the underlying ReadDirectoryChangesW. + pub fn stop(mut self) { + self.stop.store(true, Ordering::Release); + if let Some(handle) = self.committer.take() { + let _ = handle.join(); + } + } +} + +impl Drop for Watcher { + fn drop(&mut self) { + self.stop.store(true, Ordering::Release); + if let Some(handle) = self.committer.take() { + let _ = handle.join(); + } + } +} + +fn apply_event(index: &SearchIndex, event: ¬ify::Event) { + match event.kind { + EventKind::Create(_) | EventKind::Modify(ModifyKind::Data(_)) + | EventKind::Modify(ModifyKind::Metadata(_)) + | EventKind::Modify(ModifyKind::Any) => { + for path in &event.paths { + if let Err(err) = index.upsert(path) { + warn!(path = %path.display(), %err, "upsert failed"); + } + } + } + EventKind::Remove(_) => { + for path in &event.paths { + if let Err(err) = index.delete(path) { + warn!(path = %path.display(), %err, "delete failed"); + } + } + } + EventKind::Modify(ModifyKind::Name(rename)) => { + // notify normalizes renames into either a single Both event + // (paths = [old, new]) or two events (From / To). Handle + // both shapes by deleting any path that no longer exists + // and upserting any path that does. + match rename { + RenameMode::Both if event.paths.len() == 2 => { + let _ = index.delete(&event.paths[0]); + if let Err(err) = index.upsert(&event.paths[1]) { + warn!(path = %event.paths[1].display(), %err, "rename upsert failed"); + } + } + _ => { + for path in &event.paths { + if path.exists() { + let _ = index.upsert(path); + } else { + let _ = index.delete(path); + } + } + } + } + } + // Access events and other modify variants don't change index + // contents — ignore so we don't churn commits. + _ => {} + } + debug!(?event, "applied"); +} + +fn committer_loop( + index: Arc, + stop: Arc, + dirty: Arc, + last_event: Arc>, + throttle: Option>, +) { + while !stop.load(Ordering::Acquire) { + thread::sleep(Duration::from_millis(50)); + if !dirty.load(Ordering::Acquire) { + continue; + } + // Defer commit (and the reader reload that makes new docs + // visible) while the system is busy. Apply work already happened + // in the notify callback, so events aren't lost — they just + // accumulate in the writer's in-memory buffer until we catch up. + if throttle.as_ref().is_some_and(|t| t.should_pause()) { + continue; + } + let elapsed = last_event.lock().elapsed(); + if elapsed < COMMIT_DEBOUNCE { + continue; + } + dirty.store(false, Ordering::Release); + if let Err(err) = index.commit() { + warn!(%err, "watcher commit failed"); + dirty.store(true, Ordering::Release); + } + } + + // Final commit on shutdown — never lose a pending event, even if we + // were paused when shutdown was requested. + if dirty.load(Ordering::Acquire) { + if let Err(err) = index.commit() { + warn!(%err, "final watcher commit failed"); + } + } +} diff --git a/src/search-service/tests/enumerate.rs b/src/search-service/tests/enumerate.rs new file mode 100644 index 000000000000..070f058f9a37 --- /dev/null +++ b/src/search-service/tests/enumerate.rs @@ -0,0 +1,64 @@ +use std::path::PathBuf; +use std::sync::mpsc; + +use files_search_service::enumerate; + +#[test] +fn enumerator_finds_all_files_recursively() { + let dir = tempdir(); + let sub_a = dir.join("a"); + let sub_b = dir.join("b").join("nested"); + std::fs::create_dir_all(&sub_a).unwrap(); + std::fs::create_dir_all(&sub_b).unwrap(); + std::fs::write(dir.join("top.txt"), b"x").unwrap(); + std::fs::write(sub_a.join("a1.txt"), b"x").unwrap(); + std::fs::write(sub_a.join("a2.txt"), b"x").unwrap(); + std::fs::write(sub_b.join("deep.txt"), b"x").unwrap(); + + let (tx, rx) = mpsc::channel(); + enumerate::enumerate(&dir, tx); + let mut names: Vec = rx + .into_iter() + .map(|e| e.path.file_name().unwrap().to_string_lossy().into_owned()) + .collect(); + names.sort(); + assert_eq!( + names, + vec!["a1.txt", "a2.txt", "deep.txt", "top.txt"] + ); +} + +#[test] +fn enumerator_reports_size_and_modified() { + let dir = tempdir(); + std::fs::write(dir.join("hello.txt"), b"hello world").unwrap(); + + let (tx, rx) = mpsc::channel(); + enumerate::enumerate(&dir, tx); + let entries: Vec<_> = rx.into_iter().collect(); + assert_eq!(entries.len(), 1); + assert_eq!(entries[0].size_bytes, b"hello world".len() as u64); + // Sanity check: modified time is in the last 60 seconds and after epoch. + let now_ms = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap() + .as_millis() as i64; + let delta = (now_ms - entries[0].modified_unix_ms).abs(); + assert!( + delta < 60_000, + "modified_unix_ms drift {delta}ms is implausibly large" + ); +} + +fn tempdir() -> PathBuf { + use std::sync::atomic::{AtomicU64, Ordering}; + static COUNTER: AtomicU64 = AtomicU64::new(0); + let n = COUNTER.fetch_add(1, Ordering::Relaxed); + let nanos = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap() + .as_nanos(); + let dir = std::env::temp_dir().join(format!("files-search-enum-{nanos}-{n}")); + std::fs::create_dir_all(&dir).unwrap(); + dir +} diff --git a/src/search-service/tests/search_smoke.rs b/src/search-service/tests/search_smoke.rs new file mode 100644 index 000000000000..4815dfbc038c --- /dev/null +++ b/src/search-service/tests/search_smoke.rs @@ -0,0 +1,224 @@ +use std::path::PathBuf; +use std::sync::Arc; +use std::time::Duration; + +use files_search_service::proto::files_search_client::FilesSearchClient; +use files_search_service::proto::files_search_server::FilesSearchServer; +use files_search_service::proto::{HealthRequest, SearchRequest}; +use files_search_service::{SearchIndex, Service}; +use tokio::net::TcpListener; +use tokio::sync::oneshot; +use tokio_stream::wrappers::TcpListenerStream; +use tokio_stream::StreamExt; +use tonic::transport::{Endpoint, Server}; + +struct ServiceHandle { + url: String, + shutdown: Option>, + task: Option>, +} + +impl ServiceHandle { + async fn stop(mut self) { + if let Some(tx) = self.shutdown.take() { + let _ = tx.send(()); + } + if let Some(task) = self.task.take() { + let _ = task.await; + } + } +} + +async fn spawn_service(root: PathBuf, index_dir: PathBuf) -> ServiceHandle { + let listener = TcpListener::bind("127.0.0.1:0").await.unwrap(); + let addr = listener.local_addr().unwrap(); + let index = Arc::new( + tokio::task::spawn_blocking(move || SearchIndex::open_or_build(&index_dir, &root)) + .await + .unwrap() + .unwrap(), + ); + let (tx, rx) = oneshot::channel(); + let task = tokio::spawn(async move { + Server::builder() + .add_service(FilesSearchServer::new(Service::new(index))) + .serve_with_incoming_shutdown(TcpListenerStream::new(listener), async { + let _ = rx.await; + }) + .await + .unwrap(); + }); + tokio::time::sleep(Duration::from_millis(50)).await; + ServiceHandle { + url: format!("http://{addr}"), + shutdown: Some(tx), + task: Some(task), + } +} + +async fn connect(url: String) -> FilesSearchClient { + let channel = Endpoint::from_shared(url).unwrap().connect().await.unwrap(); + FilesSearchClient::new(channel) +} + +#[tokio::test] +async fn health_reports_indexed_count() { + let (root, index_dir) = tempdirs(); + std::fs::write(root.join("alpha.txt"), b"a").unwrap(); + std::fs::write(root.join("beta.txt"), b"b").unwrap(); + + let svc = spawn_service(root, index_dir).await; + let mut client = connect(svc.url.clone()).await; + let resp = client.health(HealthRequest {}).await.unwrap().into_inner(); + assert_eq!(resp.indexed_file_count, 2); + assert!(!resp.indexing); + assert!(!resp.version.is_empty()); + svc.stop().await; +} + +#[tokio::test] +async fn search_returns_substring_matches() { + let (root, index_dir) = tempdirs(); + std::fs::write(root.join("alpha.txt"), b"a").unwrap(); + std::fs::write(root.join("beta.txt"), b"b").unwrap(); + std::fs::write(root.join("ALPHABET.md"), b"c").unwrap(); + + let svc = spawn_service(root, index_dir).await; + let mut client = connect(svc.url.clone()).await; + let mut stream = client + .search(SearchRequest { + query: "alpha".into(), + max_results: 0, + scope_paths: vec![], + }) + .await + .unwrap() + .into_inner(); + + let mut names = Vec::new(); + while let Some(hit) = stream.next().await { + names.push(hit.unwrap().filename); + } + names.sort(); + assert_eq!(names, vec!["ALPHABET.md", "alpha.txt"]); + svc.stop().await; +} + +#[tokio::test] +async fn search_honors_max_results() { + let (root, index_dir) = tempdirs(); + for i in 0..10 { + std::fs::write(root.join(format!("hit_{i}.txt")), b"x").unwrap(); + } + + let svc = spawn_service(root, index_dir).await; + let mut client = connect(svc.url.clone()).await; + let mut stream = client + .search(SearchRequest { + query: "hit".into(), + max_results: 3, + scope_paths: vec![], + }) + .await + .unwrap() + .into_inner(); + + let mut count = 0; + while let Some(hit) = stream.next().await { + hit.unwrap(); + count += 1; + } + assert_eq!(count, 3); + svc.stop().await; +} + +#[tokio::test] +async fn search_scope_filters_paths() { + let (root, index_dir) = tempdirs(); + let inside = root.join("inside"); + let outside = root.join("outside"); + std::fs::create_dir(&inside).unwrap(); + std::fs::create_dir(&outside).unwrap(); + std::fs::write(inside.join("match.txt"), b"x").unwrap(); + std::fs::write(outside.join("match.txt"), b"x").unwrap(); + + let svc = spawn_service(root, index_dir).await; + let mut client = connect(svc.url.clone()).await; + let mut stream = client + .search(SearchRequest { + query: "match".into(), + max_results: 0, + scope_paths: vec![inside.to_string_lossy().into_owned()], + }) + .await + .unwrap() + .into_inner(); + + let mut paths = Vec::new(); + while let Some(hit) = stream.next().await { + paths.push(hit.unwrap().path); + } + assert_eq!(paths.len(), 1); + assert!(paths[0].contains("inside")); + svc.stop().await; +} + +#[tokio::test] +async fn index_persists_across_restarts() { + let (root, index_dir) = tempdirs(); + std::fs::write(root.join("persistent.txt"), b"x").unwrap(); + + // First start: builds index from root. + { + let svc = spawn_service(root.clone(), index_dir.clone()).await; + let mut client = connect(svc.url.clone()).await; + let resp = client.health(HealthRequest {}).await.unwrap().into_inner(); + assert_eq!(resp.indexed_file_count, 1); + svc.stop().await; // Releases the Tantivy writer lock. + } + + // Second start: deletes the source root, opens existing index. + // Expectation: docs survive because the index was committed to disk. + std::fs::remove_dir_all(&root).unwrap(); + let empty_root = root.clone(); + std::fs::create_dir_all(&empty_root).unwrap(); + + let svc = spawn_service(empty_root, index_dir).await; + let mut client = connect(svc.url.clone()).await; + let resp = client.health(HealthRequest {}).await.unwrap().into_inner(); + assert_eq!(resp.indexed_file_count, 1); + + let mut stream = client + .search(SearchRequest { + query: "persistent".into(), + max_results: 0, + scope_paths: vec![], + }) + .await + .unwrap() + .into_inner(); + let mut found = false; + while let Some(hit) = stream.next().await { + if hit.unwrap().filename == "persistent.txt" { + found = true; + } + } + assert!(found, "persisted doc should survive a restart"); + svc.stop().await; +} + +fn tempdirs() -> (PathBuf, PathBuf) { + use std::sync::atomic::{AtomicU64, Ordering}; + static COUNTER: AtomicU64 = AtomicU64::new(0); + let n = COUNTER.fetch_add(1, Ordering::Relaxed); + let nanos = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap() + .as_nanos(); + let base = std::env::temp_dir().join(format!("files-search-test-{nanos}-{n}")); + let root = base.join("root"); + let index_dir = base.join("index"); + std::fs::create_dir_all(&root).unwrap(); + std::fs::create_dir_all(&index_dir).unwrap(); + (root, index_dir) +} diff --git a/src/search-service/tests/throttle.rs b/src/search-service/tests/throttle.rs new file mode 100644 index 000000000000..48d3736f28cd --- /dev/null +++ b/src/search-service/tests/throttle.rs @@ -0,0 +1,19 @@ +use std::time::Duration; + +use files_search_service::Throttle; + +/// `apply_background_priority` is best-tested by observing the running +/// process's priority class with an external tool (Process Explorer); +/// here we just confirm that startup + drop don't panic and that +/// `should_pause()` produces a well-defined boolean. The behavior tests +/// (verifying we actually pause on battery / fullscreen / load) live in +/// `tests/Files.Search.Resource/` per CLAUDE.md. +#[test] +fn throttle_starts_and_stops_cleanly() { + let t = Throttle::start(); + let _ = t.should_pause(); + // Give the poller at least one tick to populate state. + std::thread::sleep(Duration::from_millis(100)); + let _ = t.should_pause(); + drop(t); +} diff --git a/src/search-service/tests/watcher.rs b/src/search-service/tests/watcher.rs new file mode 100644 index 000000000000..8ac75e62e3b5 --- /dev/null +++ b/src/search-service/tests/watcher.rs @@ -0,0 +1,110 @@ +use std::path::PathBuf; +use std::sync::Arc; +use std::time::{Duration, Instant}; + +use files_search_service::{SearchIndex, Watcher}; + +/// Polls `cond` every 25ms for up to `timeout`, returning true if it +/// ever returned true. Filesystem watchers are inherently async so we +/// can't assert synchronously after writing a file. +fn wait_until bool>(timeout: Duration, mut cond: F) -> bool { + let start = Instant::now(); + while start.elapsed() < timeout { + if cond() { + return true; + } + std::thread::sleep(Duration::from_millis(25)); + } + false +} + +fn search_count(index: &SearchIndex, query: &str) -> usize { + index + .search(query, 100, &[]) + .map(|hits| hits.len()) + .unwrap_or(0) +} + +#[test] +fn watcher_indexes_new_files() { + let (root, index_dir) = tempdirs(); + let index = Arc::new(SearchIndex::open_or_build(&index_dir, &root).unwrap()); + let watcher = Watcher::start(root.clone(), Arc::clone(&index), None).unwrap(); + + std::fs::write(root.join("brandnew.txt"), b"x").unwrap(); + let saw = wait_until(Duration::from_secs(5), || { + search_count(&index, "brandnew") > 0 + }); + watcher.stop(); + assert!(saw, "watcher should index new files within 5s"); +} + +#[test] +fn watcher_removes_deleted_files() { + let (root, index_dir) = tempdirs(); + let target = root.join("doomed.txt"); + std::fs::write(&target, b"x").unwrap(); + + let index = Arc::new(SearchIndex::open_or_build(&index_dir, &root).unwrap()); + assert_eq!(search_count(&index, "doomed"), 1); + + let watcher = Watcher::start(root.clone(), Arc::clone(&index), None).unwrap(); + std::fs::remove_file(&target).unwrap(); + + let gone = wait_until(Duration::from_secs(5), || { + search_count(&index, "doomed") == 0 + }); + watcher.stop(); + assert!(gone, "watcher should remove deleted files within 5s"); +} + +#[test] +fn watcher_picks_up_files_in_subdirs() { + let (root, index_dir) = tempdirs(); + let sub = root.join("nested").join("deep"); + std::fs::create_dir_all(&sub).unwrap(); + + let index = Arc::new(SearchIndex::open_or_build(&index_dir, &root).unwrap()); + let watcher = Watcher::start(root.clone(), Arc::clone(&index), None).unwrap(); + + std::fs::write(sub.join("buried.txt"), b"x").unwrap(); + let saw = wait_until(Duration::from_secs(5), || { + search_count(&index, "buried") > 0 + }); + watcher.stop(); + assert!(saw, "watcher should follow subdirectories"); +} + +#[test] +fn watcher_handles_burst_with_single_commit_window() { + let (root, index_dir) = tempdirs(); + let index = Arc::new(SearchIndex::open_or_build(&index_dir, &root).unwrap()); + let watcher = Watcher::start(root.clone(), Arc::clone(&index), None).unwrap(); + + // Simulate a `git checkout`-style burst: 50 files at once. + for i in 0..50 { + std::fs::write(root.join(format!("burst_{i:02}.txt")), b"x").unwrap(); + } + + let saw_all = wait_until(Duration::from_secs(10), || { + search_count(&index, "burst") == 50 + }); + watcher.stop(); + assert!(saw_all, "all 50 burst files should be indexed"); +} + +fn tempdirs() -> (PathBuf, PathBuf) { + use std::sync::atomic::{AtomicU64, Ordering}; + static COUNTER: AtomicU64 = AtomicU64::new(0); + let n = COUNTER.fetch_add(1, Ordering::Relaxed); + let nanos = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap() + .as_nanos(); + let base = std::env::temp_dir().join(format!("files-search-watch-{nanos}-{n}")); + let root = base.join("root"); + let index_dir = base.join("index"); + std::fs::create_dir_all(&root).unwrap(); + std::fs::create_dir_all(&index_dir).unwrap(); + (root, index_dir) +} diff --git a/tests/Files.Search.Bench/Files.Search.Bench.csproj b/tests/Files.Search.Bench/Files.Search.Bench.csproj new file mode 100644 index 000000000000..7437dee2f7e8 --- /dev/null +++ b/tests/Files.Search.Bench/Files.Search.Bench.csproj @@ -0,0 +1,18 @@ + + + Exe + net10.0-windows10.0.26100.0 + enable + enable + latest + Files.Search.Bench + files-bench + false + + + + + + + + diff --git a/tests/Files.Search.Bench/Program.cs b/tests/Files.Search.Bench/Program.cs new file mode 100644 index 000000000000..0b64c0107e7d --- /dev/null +++ b/tests/Files.Search.Bench/Program.cs @@ -0,0 +1,447 @@ +using System.Diagnostics; +using System.Text.Json; +using System.Text.Json.Serialization; +using Files.IndexedSearch.Client; +using Files.LegacySearch; +using Files.SearchAbstraction; + +namespace Files.Search.Bench; + +internal static class Program +{ + private static async Task Main(string[] args) + { + try + { + var opts = CliOptions.Parse(args); + if (opts is null) return 1; + + var manifest = LoadManifest(opts.CorpusDir); + var queries = QueryGen.Build(manifest); + Console.WriteLine($"corpus: {manifest.Preset} ({manifest.FileCount:N0} files), provider: {opts.Provider}, queries: {queries.Count}"); + + IBenchProvider provider = opts.Provider switch + { + "naive-scan" => new NaiveScanProvider(opts.CorpusDir), + "legacy" => new SearchProviderAdapter(new LegacySearchProvider(), opts.CorpusDir), + "indexed" => new SearchProviderAdapter(new IndexedSearchProvider(), opts.CorpusDir), + "windows-aqs" => throw new NotImplementedException( + "windows-aqs provider requires the corpus to be added to Windows Search Indexer first. " + + "Tracked in docs/decisions/0001-bench-stack.md."), + _ => throw new ArgumentException($"unknown provider: {opts.Provider}") + }; + + // Warm-up: run one throwaway query so JIT, gRPC channel + // setup, Tantivy mmap pages, and any first-call penalty + // don't get baked into the first measured timing. + if (queries.Count > 0) + { + Console.Write(" warm-up..."); + await foreach (var _ in provider.SearchAsync(queries[0])) { } + Console.WriteLine(" done"); + } + + var results = new List(); + int i = 0; + foreach (var q in queries) + { + var r = await RunQueryAsync(provider, q); + results.Add(r); + i++; + if ((i & 0xF) == 0) Console.Write($"\r {i}/{queries.Count}"); + } + Console.WriteLine($"\r {queries.Count}/{queries.Count}"); + + var run = new BenchRun + { + RunId = DateTime.UtcNow.ToString("yyyy-MM-ddTHH-mm-ssZ"), + Provider = opts.Provider, + Corpus = new CorpusInfo + { + Name = manifest.Preset, + Files = manifest.FileCount, + Bytes = manifest.TotalBytes, + Seed = manifest.Seed, + }, + Machine = MachineInfo.Capture(), + Queries = results, + Aggregates = Aggregates.From(results), + }; + + Directory.CreateDirectory(opts.OutDir); + var path = Path.Combine(opts.OutDir, $"{run.RunId}.json"); + File.WriteAllText(path, JsonSerializer.Serialize(run, BenchJson.Default.BenchRun)); + Console.WriteLine($"wrote: {path}"); + + // Quick console summary. + var times = results.Where(r => r.TimeToCompleteMs > 0).Select(r => r.TimeToCompleteMs).Order().ToList(); + if (times.Count > 0) + { + Console.WriteLine($" p50 complete: {times[times.Count / 2]:F1}ms p99: {times[(int)(times.Count * 0.99)]:F1}ms"); + } + return 0; + } + catch (Exception ex) + { + Console.Error.WriteLine($"error: {ex.Message}"); + return 1; + } + } + + private static async Task RunQueryAsync(IBenchProvider provider, Query q) + { + // Warm-up not done per-query — first run carries cold-cache penalty by design. + long ramBefore = GC.GetTotalMemory(false); + var sw = Stopwatch.StartNew(); + long firstResultMs = -1; + int count = 0; + await foreach (var _ in provider.SearchAsync(q)) + { + if (firstResultMs < 0) firstResultMs = sw.ElapsedMilliseconds; + count++; + } + sw.Stop(); + long ramAfter = GC.GetTotalMemory(false); + return new QueryResult + { + Id = q.Id, + Text = q.Text, + Class = q.Class, + TimeToFirstResultMs = firstResultMs < 0 ? sw.ElapsedMilliseconds : firstResultMs, + TimeToCompleteMs = sw.ElapsedMilliseconds, + ResultCount = count, + PeakRamMB = Math.Max(0, (ramAfter - ramBefore) / (1024.0 * 1024)), + ExpectedMin = q.ExpectedMin, + ExpectedMax = q.ExpectedMax, + }; + } + + private static CorpusManifest LoadManifest(string corpusDir) + { + var path = Path.Combine(corpusDir, "manifest.json"); + if (!File.Exists(path)) throw new FileNotFoundException($"manifest.json not found in {corpusDir} — run files-corpora first"); + return JsonSerializer.Deserialize(File.ReadAllText(path), BenchJson.Default.CorpusManifest) + ?? throw new InvalidDataException("manifest.json is empty/invalid"); + } +} + +internal sealed class CliOptions +{ + public required string CorpusDir { get; init; } + public required string OutDir { get; init; } + public required string Provider { get; init; } + + public static CliOptions? Parse(string[] args) + { + if (args.Length == 0 || args.Contains("-h") || args.Contains("--help")) + { + Console.WriteLine(""" + files-bench --corpus [--out ] [--provider naive-scan|legacy|indexed] + + Runs ~200 queries against a corpus and writes bench-results/.json. + + Providers: + naive-scan — top-down filesystem walk (strawman baseline). + legacy — Windows.Storage.Search / AQS (the upstream path). + indexed — Rust files-search-service over gRPC. Requires the + service to be running and indexing the corpus root + (set FILES_SEARCH_ROOT before launching it). + """); + return null; + } + + string? corpus = null, outDir = "bench-results", provider = "naive-scan"; + for (int i = 0; i < args.Length; i++) + { + switch (args[i]) + { + case "--corpus": corpus = args[++i]; break; + case "--out": outDir = args[++i]; break; + case "--provider": provider = args[++i]; break; + default: throw new ArgumentException($"unknown arg: {args[i]}"); + } + } + if (corpus is null) { Console.Error.WriteLine("error: --corpus is required"); return null; } + return new CliOptions { CorpusDir = Path.GetFullPath(corpus), OutDir = Path.GetFullPath(outDir!), Provider = provider! }; + } +} + +internal interface IBenchProvider +{ + IAsyncEnumerable SearchAsync(Query q); +} + +// Adapts an ISearchProvider (the production interface) to the bench's +// IBenchProvider (which only needs path strings). Hands the corpus root +// in as the single scope so each provider searches the same tree even +// when its default scope (e.g. %USERPROFILE% for Indexed) would differ. +internal sealed class SearchProviderAdapter(ISearchProvider inner, string corpusRoot) : IBenchProvider +{ + private readonly IReadOnlyList _scope = new[] { corpusRoot }; + + public async IAsyncEnumerable SearchAsync(Query q) + { + var sq = new SearchQuery(q.Text, _scope); + await foreach (var hit in inner.SearchAsync(sq)) + yield return hit.Path; + } +} + +// Walks the tree top-down, matching name patterns. Represents the "unindexed folder" case. +internal sealed class NaiveScanProvider(string root) : IBenchProvider +{ + public async IAsyncEnumerable SearchAsync(Query q) + { + await Task.Yield(); + var opts = new EnumerationOptions + { + RecurseSubdirectories = true, + IgnoreInaccessible = true, + AttributesToSkip = 0, + }; + // Translate the query to a glob/predicate. For content/path-scoped, we still scan filenames first + // then peek into content where needed — same work the unindexed legacy fallback does. + Func pred = QueryMatcher.Build(q); + foreach (var path in Directory.EnumerateFiles(root, "*", opts)) + { + if (pred(path)) yield return path; + } + } +} + +internal static class QueryMatcher +{ + public static Func Build(Query q) => q.Class switch + { + "exact" => p => string.Equals(Path.GetFileNameWithoutExtension(p), q.Text, StringComparison.OrdinalIgnoreCase), + "glob" => MakeGlob(q.Text), + "substring" => p => Path.GetFileName(p).Contains(q.Text, StringComparison.OrdinalIgnoreCase), + "ext+substring" => MakeExtSubstring(q.Text), + "content" => p => ContentContains(p, q.Text), + _ => p => Path.GetFileName(p).Contains(q.Text, StringComparison.OrdinalIgnoreCase), + }; + + private static Func MakeGlob(string pattern) + { + // Tiny glob: '*' wildcard only, matched against filename. + var parts = pattern.Split('*'); + return p => + { + var name = Path.GetFileName(p); + int idx = 0; + for (int i = 0; i < parts.Length; i++) + { + if (parts[i].Length == 0) continue; + int found = name.IndexOf(parts[i], idx, StringComparison.OrdinalIgnoreCase); + if (found < 0) return false; + if (i == 0 && !pattern.StartsWith('*') && found != 0) return false; + idx = found + parts[i].Length; + } + if (!pattern.EndsWith('*') && parts.Length > 0 && parts[^1].Length > 0) + if (!name.EndsWith(parts[^1], StringComparison.OrdinalIgnoreCase)) return false; + return true; + }; + } + + private static Func MakeExtSubstring(string spec) + { + // Format: "ext|substring", e.g., ".docx|report" + var parts = spec.Split('|', 2); + var ext = parts[0]; var sub = parts.Length > 1 ? parts[1] : ""; + return p => string.Equals(Path.GetExtension(p), ext, StringComparison.OrdinalIgnoreCase) + && Path.GetFileName(p).Contains(sub, StringComparison.OrdinalIgnoreCase); + } + + private static bool ContentContains(string path, string needle) + { + try + { + // Only inspect files small enough to scan cheaply; mirrors legacy heuristic. + var info = new FileInfo(path); + if (info.Length == 0 || info.Length > 4 * 1024 * 1024) return false; + // ASCII-fast path is enough — needle tokens are ASCII by construction. + using var fs = File.OpenRead(path); + var needleBytes = System.Text.Encoding.UTF8.GetBytes(needle); + int overlap = needleBytes.Length - 1; + byte[] buf = new byte[8192]; + byte[] joined = new byte[8192 + overlap]; + byte[] carry = new byte[overlap]; + int carryLen = 0; + int read; + while ((read = fs.Read(buf, 0, buf.Length)) > 0) + { + int windowLen; + byte[] window; + if (carryLen > 0) + { + Buffer.BlockCopy(carry, 0, joined, 0, carryLen); + Buffer.BlockCopy(buf, 0, joined, carryLen, read); + window = joined; windowLen = carryLen + read; + } + else { window = buf; windowLen = read; } + + if (window.AsSpan(0, windowLen).IndexOf(needleBytes) >= 0) return true; + + int keep = Math.Min(overlap, windowLen); + Buffer.BlockCopy(window, windowLen - keep, carry, 0, keep); + carryLen = keep; + } + return false; + } + catch { return false; } + } +} + +internal sealed class Query +{ + public required string Id { get; init; } + public required string Text { get; init; } + public required string Class { get; init; } + public int ExpectedMin { get; init; } = 0; + public int ExpectedMax { get; init; } = int.MaxValue; +} + +internal static class QueryGen +{ + // Generates ~200 queries deterministic in the manifest's seed, mixing classes from CLAUDE.md. + public static List Build(CorpusManifest m) + { + var qs = new List(); + + // exact: synthesize plausible names; expected count usually 0 (sentinel), proves "no false positives". + foreach (var w in new[] { "report_42", "alpha_999", "missingfile" }) + qs.Add(new Query { Id = $"exact-{w}", Text = w, Class = "exact" }); + + // glob: extension and prefix patterns. + foreach (var ext in new[] { ".txt", ".md", ".docx", ".pdf", ".jpg", ".cs", ".log", ".zip" }) + qs.Add(new Query { Id = $"glob-ext{ext}", Text = $"*{ext}", Class = "glob" }); + foreach (var prefix in new[] { "report*", "summary*", "draft*", "data*" }) + qs.Add(new Query { Id = $"glob-{prefix}", Text = prefix, Class = "glob" }); + + // substring: common name fragments. + foreach (var s in new[] { "report", "summary", "config", "build", "alpha", "north", "blue", "internal", "annual" }) + qs.Add(new Query { Id = $"substr-{s}", Text = s, Class = "substring" }); + + // ext+substring combos. + foreach (var combo in new[] { ".docx|report", ".pdf|summary", ".cs|config", ".log|build" }) + qs.Add(new Query { Id = $"extsub-{combo}", Text = combo, Class = "ext+substring" }); + + // content: known needle tokens with deterministic counts from the manifest. + foreach (var (token, count) in m.NeedleCounts) + { + qs.Add(new Query + { + Id = $"content-{token}", + Text = token, + Class = "content", + // Expect exactly `count` files containing this needle, but allow ±5% slack + // to absorb the rare overlap collision in random text generation. + ExpectedMin = (int)(count * 0.95), + ExpectedMax = (int)Math.Ceiling(count * 1.05) + 1, + }); + } + + // unicode: relies on the corpus having ~1% unicode-named files. + qs.Add(new Query { Id = "unicode-cjk", Text = "测试", Class = "substring" }); + qs.Add(new Query { Id = "unicode-emoji", Text = "😀", Class = "substring" }); + + // Repeat the most common substrings to get statistical stability for the percentile bands. + var padding = new[] { "report", "summary", "config" }; + for (int i = 0; qs.Count < 200; i++) + qs.Add(new Query { Id = $"pad-{i}-{padding[i % padding.Length]}", Text = padding[i % padding.Length], Class = "substring" }); + + return qs; + } +} + +// JSON DTOs. +internal sealed class CorpusManifest +{ + [JsonPropertyName("preset")] public string Preset { get; set; } = ""; + [JsonPropertyName("seed")] public int Seed { get; set; } + [JsonPropertyName("fileCount")] public int FileCount { get; set; } + [JsonPropertyName("totalBytes")] public long TotalBytes { get; set; } + [JsonPropertyName("needleCounts")] public Dictionary NeedleCounts { get; set; } = new(); +} + +internal sealed class BenchRun +{ + [JsonPropertyName("schemaVersion")] public int SchemaVersion { get; init; } = 1; + [JsonPropertyName("runId")] public string RunId { get; init; } = ""; + [JsonPropertyName("provider")] public string Provider { get; init; } = ""; + [JsonPropertyName("corpus")] public CorpusInfo Corpus { get; init; } = new(); + [JsonPropertyName("machine")] public MachineInfo Machine { get; init; } = new(); + [JsonPropertyName("aggregates")] public Aggregates Aggregates { get; init; } = new(); + [JsonPropertyName("queries")] public List Queries { get; init; } = new(); +} + +// Aggregate percentiles across all queries in the run. The CLAUDE.md +// gates are stated in these terms (TTFR median / p99, etc.), so persist +// them alongside the raw per-query rows for easy diff vs. baseline.json. +internal sealed class Aggregates +{ + [JsonPropertyName("ttfrMedianMs")] public long TtfrMedianMs { get; init; } + [JsonPropertyName("ttfrP95Ms")] public long TtfrP95Ms { get; init; } + [JsonPropertyName("ttfrP99Ms")] public long TtfrP99Ms { get; init; } + [JsonPropertyName("totalMedianMs")] public long TotalMedianMs { get; init; } + [JsonPropertyName("totalP95Ms")] public long TotalP95Ms { get; init; } + [JsonPropertyName("totalP99Ms")] public long TotalP99Ms { get; init; } + [JsonPropertyName("queryCount")] public int QueryCount { get; init; } + + public static Aggregates From(IReadOnlyList results) + { + if (results.Count == 0) return new Aggregates(); + var ttfr = results.Select(r => r.TimeToFirstResultMs).Order().ToList(); + var total = results.Select(r => r.TimeToCompleteMs).Order().ToList(); + return new Aggregates + { + QueryCount = results.Count, + TtfrMedianMs = ttfr[ttfr.Count / 2], + TtfrP95Ms = ttfr[(int)(ttfr.Count * 0.95)], + TtfrP99Ms = ttfr[(int)(ttfr.Count * 0.99)], + TotalMedianMs = total[total.Count / 2], + TotalP95Ms = total[(int)(total.Count * 0.95)], + TotalP99Ms = total[(int)(total.Count * 0.99)], + }; + } +} + +internal sealed class CorpusInfo +{ + [JsonPropertyName("name")] public string Name { get; init; } = ""; + [JsonPropertyName("files")] public int Files { get; init; } + [JsonPropertyName("bytes")] public long Bytes { get; init; } + [JsonPropertyName("seed")] public int Seed { get; init; } +} + +internal sealed class MachineInfo +{ + [JsonPropertyName("os")] public string Os { get; init; } = ""; + [JsonPropertyName("processorCount")] public int ProcessorCount { get; init; } + [JsonPropertyName("ramGB")] public double RamGB { get; init; } + + public static MachineInfo Capture() => new() + { + Os = Environment.OSVersion.VersionString, + ProcessorCount = Environment.ProcessorCount, + RamGB = Math.Round(GC.GetGCMemoryInfo().TotalAvailableMemoryBytes / (1024.0 * 1024 * 1024), 1), + }; +} + +internal sealed class QueryResult +{ + [JsonPropertyName("id")] public string Id { get; init; } = ""; + [JsonPropertyName("text")] public string Text { get; init; } = ""; + [JsonPropertyName("class")] public string Class { get; init; } = ""; + [JsonPropertyName("timeToFirstResultMs")] public long TimeToFirstResultMs { get; init; } + [JsonPropertyName("timeToCompleteMs")] public long TimeToCompleteMs { get; init; } + [JsonPropertyName("resultCount")] public int ResultCount { get; init; } + [JsonPropertyName("peakRamMB")] public double PeakRamMB { get; init; } + [JsonPropertyName("expectedMin")] public int ExpectedMin { get; init; } + [JsonPropertyName("expectedMax")] public int ExpectedMax { get; init; } +} + +[JsonSerializable(typeof(BenchRun))] +[JsonSerializable(typeof(CorpusManifest))] +[JsonSourceGenerationOptions(WriteIndented = true)] +internal partial class BenchJson : JsonSerializerContext { } diff --git a/tests/corpora/Files.Search.Corpora.csproj b/tests/corpora/Files.Search.Corpora.csproj new file mode 100644 index 000000000000..968e2f7de942 --- /dev/null +++ b/tests/corpora/Files.Search.Corpora.csproj @@ -0,0 +1,12 @@ + + + Exe + net10.0-windows + enable + enable + latest + Files.Search.Corpora + files-corpora + false + + diff --git a/tests/corpora/Program.cs b/tests/corpora/Program.cs new file mode 100644 index 000000000000..4fc4293a3d58 --- /dev/null +++ b/tests/corpora/Program.cs @@ -0,0 +1,348 @@ +using System.Diagnostics; +using System.Text; +using System.Text.Json; +using System.Text.Json.Serialization; + +namespace Files.Search.Corpora; + +internal static class Program +{ + private static int Main(string[] args) + { + try + { + var opts = CliOptions.Parse(args); + if (opts is null) return 1; + + if (Directory.Exists(opts.OutDir) && Directory.EnumerateFileSystemEntries(opts.OutDir).Any() && !opts.Force) + { + Console.Error.WriteLine($"error: --out '{opts.OutDir}' is not empty (use --force to overwrite)"); + return 2; + } + Directory.CreateDirectory(opts.OutDir); + + var sw = Stopwatch.StartNew(); + var manifest = Generator.Generate(opts); + sw.Stop(); + manifest.GenerationSeconds = sw.Elapsed.TotalSeconds; + + var manifestPath = Path.Combine(opts.OutDir, "manifest.json"); + File.WriteAllText(manifestPath, JsonSerializer.Serialize(manifest, ManifestJson.Default.Manifest)); + Console.WriteLine($"done: {manifest.FileCount:N0} files, {manifest.TotalBytes / (1024.0 * 1024 * 1024):F2} GiB, {sw.Elapsed.TotalSeconds:F1}s"); + Console.WriteLine($"manifest: {manifestPath}"); + return 0; + } + catch (Exception ex) + { + Console.Error.WriteLine($"error: {ex.Message}"); + return 1; + } + } +} + +internal sealed class CliOptions +{ + public required string OutDir { get; init; } + public required string PresetName { get; init; } + public required int FileCount { get; init; } + public required long AvgFileBytes { get; init; } + public required int Seed { get; init; } + public bool Force { get; init; } + + public static CliOptions? Parse(string[] args) + { + if (args.Length == 0 || args.Contains("-h") || args.Contains("--help")) + { + PrintUsage(); + return null; + } + + string? outDir = null; + string? preset = null; + int? files = null; + long? avgBytes = null; + int seed = 42; + bool force = false; + + for (int i = 0; i < args.Length; i++) + { + switch (args[i]) + { + case "--out": outDir = args[++i]; break; + case "--preset": preset = args[++i]; break; + case "--files": files = int.Parse(args[++i]); break; + case "--avg-size": avgBytes = long.Parse(args[++i]); break; + case "--seed": seed = int.Parse(args[++i]); break; + case "--force": force = true; break; + default: throw new ArgumentException($"unknown arg: {args[i]}"); + } + } + + if (outDir is null) { Console.Error.WriteLine("error: --out is required"); return null; } + + // Presets — small targets a quick local run; medium/large need real disk. + (string name, int count, long avg) = preset switch + { + "small" => ("small", 50_000, 40L * 1024), // ~2 GiB + "medium" => ("medium", 500_000, 100L * 1024), // ~50 GiB + "large" => ("large", 2_000_000, 250L * 1024), // ~500 GiB + null => ("custom", files ?? throw new ArgumentException("--preset or --files required"), + avgBytes ?? 40L * 1024), + _ => throw new ArgumentException($"unknown preset: {preset}") + }; + + return new CliOptions + { + OutDir = Path.GetFullPath(outDir), + PresetName = name, + FileCount = count, + AvgFileBytes = avg, + Seed = seed, + Force = force, + }; + } + + private static void PrintUsage() + { + Console.WriteLine(""" + files-corpora --out [--preset small|medium|large] [--files N] [--avg-size BYTES] [--seed N] [--force] + + Generates a deterministic file corpus for search benchmarking. Same seed + preset → identical tree. + Writes manifest.json describing what was produced (and what queries can deterministically expect). + """); + } +} + +internal static class Generator +{ + // Realistic-ish weighted extension mix. + private static readonly (string ext, int weight, bool textLike)[] Extensions = + [ + (".txt", 10, true), (".md", 8, true), (".cs", 6, true), (".json", 5, true), + (".log", 6, true), (".html", 3, true), (".xml", 3, true), (".csv", 3, true), + (".docx", 6, false), (".pdf", 6, false), (".xlsx", 3, false), (".pptx", 2, false), + (".jpg", 10, false), (".png", 6, false), (".mp4", 3, false), (".zip", 4, false), + (".dll", 4, false), (".exe", 2, false), + ]; + + private static readonly string[] WordPool = + [ + "report","summary","invoice","draft","final","review","notes","meeting","backup","archive", + "project","module","service","client","server","data","config","setup","build","release", + "alpha","beta","gamma","delta","north","south","east","west","spring","summer","autumn","winter", + "blue","red","green","orange","purple","silver","gold","copper","iron","quartz", + "annual","quarterly","monthly","daily","internal","public","private","secure","draft","final", + ]; + + // "Needle" tokens placed deterministically so content-search benches can assert exact counts. + public static readonly string[] NeedleTokens = ["xqz_alpha", "xqz_beta", "xqz_gamma", "xqz_delta"]; + + public static Manifest Generate(CliOptions opts) + { + var rng = new Xorshift64(unchecked((ulong)opts.Seed * 0x9E3779B97F4A7C15UL + 1)); + long totalWeight = Extensions.Sum(e => e.weight); + + var manifest = new Manifest + { + Preset = opts.PresetName, + Seed = opts.Seed, + Root = opts.OutDir, + FileCount = opts.FileCount, + NeedleTokens = NeedleTokens, + NeedleCounts = new Dictionary(), + }; + foreach (var t in NeedleTokens) manifest.NeedleCounts[t] = 0; + + // Pre-create a directory tree shaped roughly like a user data folder: + // depth 0..4, branching ~6 at root, ~4 mid, ~2 deep. + var dirs = BuildDirTree(opts.OutDir, rng, opts.FileCount); + manifest.DirCount = dirs.Count; + + long bytes = 0; + var sb = new StringBuilder(8 * 1024); + var contentBuf = new byte[Math.Min(opts.AvgFileBytes * 4, 4 * 1024 * 1024)]; + int unicodeCount = 0, longPathCount = 0, hiddenCount = 0, zeroByteCount = 0; + + for (int i = 0; i < opts.FileCount; i++) + { + // Pick an extension by weight. + long roll = (long)(rng.NextDouble() * totalWeight); + string ext = ".txt"; bool textLike = true; + long acc = 0; + foreach (var e in Extensions) { acc += e.weight; if (roll < acc) { ext = e.ext; textLike = e.textLike; break; } } + + // Name (occasionally unicode / long). + string baseName = MakeName(rng, sb); + bool unicode = rng.NextDouble() < 0.01; + bool longName = rng.NextDouble() < 0.005; + if (unicode) { baseName = "测试_" + baseName + "_😀"; unicodeCount++; } + if (longName) { baseName = baseName + new string('x', 180); longPathCount++; } + string fileName = baseName + ext; + + string dir = dirs[(int)(rng.NextU64() % (ulong)dirs.Count)]; + string path = Path.Combine(dir, fileName); + + // Size: log-normal-ish around avg, clamped. + double mult = Math.Pow(10, (rng.NextDouble() - 0.5) * 1.4); // ~0.04x..25x + long size = Math.Max(0, (long)(opts.AvgFileBytes * mult)); + if (rng.NextDouble() < 0.002) { size = 0; zeroByteCount++; } + if (size > contentBuf.Length) size = contentBuf.Length; + + try + { + if (textLike && size > 0) + { + int needles = WriteText(contentBuf, (int)size, rng, manifest.NeedleCounts); + File.WriteAllBytes(path, contentBuf.AsSpan(0, (int)size).ToArray()); + } + else + { + rng.NextBytes(contentBuf.AsSpan(0, (int)size)); + File.WriteAllBytes(path, size == 0 ? Array.Empty() : contentBuf.AsSpan(0, (int)size).ToArray()); + } + bytes += size; + + // ~1% hidden. + if (rng.NextDouble() < 0.01) + { + File.SetAttributes(path, File.GetAttributes(path) | FileAttributes.Hidden); + hiddenCount++; + } + } + catch (PathTooLongException) { longPathCount--; /* silently drop */ } + catch (IOException) { /* tolerate transient issues */ } + + if ((i & 0xFFF) == 0 && i > 0) + Console.Write($"\r {i:N0} / {opts.FileCount:N0} files"); + } + Console.WriteLine($"\r {opts.FileCount:N0} / {opts.FileCount:N0} files"); + + manifest.TotalBytes = bytes; + manifest.UnicodeNameCount = unicodeCount; + manifest.LongPathCount = longPathCount; + manifest.HiddenCount = hiddenCount; + manifest.ZeroByteCount = zeroByteCount; + return manifest; + } + + private static List BuildDirTree(string root, Xorshift64 rng, int fileCount) + { + // Aim for ~50 files per leaf dir on average. + int leafCount = Math.Max(1, fileCount / 50); + var dirs = new List { root }; + var queue = new Queue<(string path, int depth)>(); + queue.Enqueue((root, 0)); + while (dirs.Count < leafCount && queue.Count > 0) + { + var (p, d) = queue.Dequeue(); + int branch = d == 0 ? 6 : d <= 2 ? 4 : 2; + for (int i = 0; i < branch && dirs.Count < leafCount; i++) + { + string sub = Path.Combine(p, $"d{d}_{rng.NextU64() % 10000:0000}"); + Directory.CreateDirectory(sub); + dirs.Add(sub); + if (d < 4) queue.Enqueue((sub, d + 1)); + } + } + return dirs; + } + + private static string MakeName(Xorshift64 rng, StringBuilder sb) + { + sb.Clear(); + int parts = 1 + (int)(rng.NextU64() % 3); + for (int i = 0; i < parts; i++) + { + if (i > 0) sb.Append('_'); + sb.Append(WordPool[(int)(rng.NextU64() % (ulong)WordPool.Length)]); + } + sb.Append('_').Append(rng.NextU64() % 1_000_000); + return sb.ToString(); + } + + private static int WriteText(byte[] buf, int size, Xorshift64 rng, Dictionary needleCounts) + { + int written = 0; + int needles = 0; + var sb = new StringBuilder(256); + while (written < size) + { + sb.Clear(); + int wordsThisLine = 6 + (int)(rng.NextU64() % 12); + for (int w = 0; w < wordsThisLine; w++) + { + if (w > 0) sb.Append(' '); + // ~0.05% chance per word slot to plant a needle. + if (rng.NextDouble() < 0.0005) + { + var n = NeedleTokens[(int)(rng.NextU64() % (ulong)NeedleTokens.Length)]; + sb.Append(n); + lock (needleCounts) needleCounts[n] = needleCounts[n] + 1; + needles++; + } + else + { + sb.Append(WordPool[(int)(rng.NextU64() % (ulong)WordPool.Length)]); + } + } + sb.Append('\n'); + int byteCount = Encoding.UTF8.GetByteCount(sb.ToString().AsSpan()); + if (written + byteCount > size) byteCount = size - written; + if (byteCount <= 0) break; + var slice = Encoding.UTF8.GetBytes(sb.ToString()); + Array.Copy(slice, 0, buf, written, Math.Min(byteCount, slice.Length)); + written += Math.Min(byteCount, slice.Length); + } + return needles; + } +} + +// Deterministic RNG — xorshift64*. Single-threaded; no hidden state. +internal sealed class Xorshift64 +{ + private ulong _s; + public Xorshift64(ulong seed) { _s = seed == 0 ? 0xDEADBEEFCAFEBABEUL : seed; } + public ulong NextU64() + { + _s ^= _s >> 12; _s ^= _s << 25; _s ^= _s >> 27; + return _s * 0x2545F4914F6CDD1DUL; + } + public double NextDouble() => (NextU64() >> 11) * (1.0 / (1UL << 53)); + public void NextBytes(Span dest) + { + int i = 0; + while (i + 8 <= dest.Length) + { + ulong v = NextU64(); + for (int b = 0; b < 8; b++) dest[i + b] = (byte)(v >> (b * 8)); + i += 8; + } + if (i < dest.Length) + { + ulong v = NextU64(); + for (; i < dest.Length; i++) { dest[i] = (byte)v; v >>= 8; } + } + } +} + +internal sealed class Manifest +{ + [JsonPropertyName("schemaVersion")] public int SchemaVersion { get; init; } = 1; + [JsonPropertyName("preset")] public string Preset { get; init; } = ""; + [JsonPropertyName("seed")] public int Seed { get; init; } + [JsonPropertyName("root")] public string Root { get; init; } = ""; + [JsonPropertyName("fileCount")] public int FileCount { get; set; } + [JsonPropertyName("dirCount")] public int DirCount { get; set; } + [JsonPropertyName("totalBytes")] public long TotalBytes { get; set; } + [JsonPropertyName("unicodeNameCount")] public int UnicodeNameCount { get; set; } + [JsonPropertyName("longPathCount")] public int LongPathCount { get; set; } + [JsonPropertyName("hiddenCount")] public int HiddenCount { get; set; } + [JsonPropertyName("zeroByteCount")] public int ZeroByteCount { get; set; } + [JsonPropertyName("needleTokens")] public string[] NeedleTokens { get; init; } = []; + [JsonPropertyName("needleCounts")] public Dictionary NeedleCounts { get; init; } = new(); + [JsonPropertyName("generationSeconds")] public double GenerationSeconds { get; set; } +} + +[JsonSerializable(typeof(Manifest))] +[JsonSourceGenerationOptions(WriteIndented = true)] +internal partial class ManifestJson : JsonSerializerContext { } From 2deb6760bfa8f91fe1847e0d02753a1684ed3720 Mon Sep 17 00:00:00 2001 From: Tommy Le <82196633+Wingingbump@users.noreply.github.com> Date: Sun, 10 May 2026 02:49:06 -0400 Subject: [PATCH 02/10] removed doc file --- docs/discord-post.md | 79 -------------------------------------------- 1 file changed, 79 deletions(-) delete mode 100644 docs/discord-post.md diff --git a/docs/discord-post.md b/docs/discord-post.md deleted file mode 100644 index 01e338360497..000000000000 --- a/docs/discord-post.md +++ /dev/null @@ -1,79 +0,0 @@ -# Discord post — for the user to copy / adapt before sending - -Not committed to a public-facing surface. This is the conversational -version of `docs/proposal.md` that fits a Discord channel. - ---- - -## Short version (~6 lines, fits one chat message) - -> Hey — I've been working on a fork that swaps the -> Windows.Storage.Search backend for a sidecar Rust indexer (Tantivy -> + ReadDirectoryChangesW). On a 5k-file bench it's ~595× faster on -> substring queries; default is unchanged, indexed is opt-in via env -> var. Wanted to ask the team: would this direction be of interest -> upstream? Don't want to keep building if it's a non-starter. -> -> Repo + writeup: -> Specifically the proposal: - -## Longer version (if a maintainer engages) - -> A bit more context on what's in the fork: -> -> **What it is:** separate Rust process, gRPC over TCP (named pipe is -> next). Tantivy for the filename index, FindFirstFileExW + rayon for -> enumeration, notify crate (ReadDirectoryChangesW) for live updates, -> SetPriorityClass(PROCESS_MODE_BACKGROUND_BEGIN) + pause-on-battery / -> fullscreen / load for being a good citizen. -> -> **What it isn't yet:** content indexing, semantic search, named-pipe -> transport, or service auto-launcher. Those are bounded but real -> work — `docs/improvements.md` has them tiered with cost estimates. -> Holding off on building more until I get a read on whether the -> direction is even welcome. -> -> **What I'd want your read on:** -> 1. Is a sidecar Rust process inside a C# app something you'd accept -> in principle? -> 2. What would block it — Rust toolchain in CI, signing, maintenance -> burden, telemetry concerns? -> 3. Phased PRs (interface → bench harness → indexed client → router) -> or stay-as-fork preferred? -> -> Happy to walk through any of it on a call or via PR comments. No -> hard feelings if the answer is "stay a fork" — just want to know -> before sinking another week into it. - -## Notes on framing - -- Lead with the question, not the code dump. Maintainers who skim - Discord see "would this be of interest" first; the link is for if - they want to dig. -- Include the bench number — it's the hook. "595× faster on substring" - is concrete enough to make someone click. -- Soft-close ("no hard feelings if…") signals you're not emotionally - invested in a yes; lowers the stakes of their reply. -- Don't mention "I used Claude Code" or AI-assisted in the pitch. - Maintainers care about the code and architecture, not the toolchain - behind it. If asked directly, be honest, but don't lead with it. - -## After you post - -Things to be ready to answer fast: - -- "Why Rust and not C# / .NET out-of-process?" — Tantivy maturity, - zero-GC for the index, single-binary distribution. ADR-grade - answer in `docs/decisions/`. -- "Why a sidecar process and not in-proc?" — index outlives UI - crashes, GC isolation, can be restarted independently. Architecture - in `CLAUDE.md`. -- "How does the indexer affect privacy / telemetry?" — index is - local-only, in `%LOCALAPPDATA%\Files\search-index\`. No network, - no upload. Worth saying explicitly. -- "What about admin / MFT for max speed?" — explicit no per CLAUDE.md - goal #3; a future opt-in "Turbo Mode" is on the table. Don't - oversell it. -- "Does it work on Windows on ARM?" — Rust cross-compiles fine; we - haven't tested the ARM path. Honest "untested but no architectural - blockers." From a9495c016634cd60da871497ab892ffcbbab9959 Mon Sep 17 00:00:00 2001 From: Tommy Le <82196633+Wingingbump@users.noreply.github.com> Date: Wed, 13 May 2026 01:59:05 -0400 Subject: [PATCH 03/10] Feature: C# search service rewrite, packaged-SCM path validated MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replaces the Rust PoC sidecar with a pure C# Windows Service (Files.SearchService) that ships the same gRPC wire format and ISearchProvider abstraction, removing the Rust toolchain from the project's build matrix. Search side: - In-memory inverted + trigram filename index with atomic reference-swap publish; two-tier scoring + score-then-truncate. - USN journal enumeration when running as LocalSystem; DirectoryInfo walk fallback for dev mode. - ChangeWatcher + 250ms-debounced EventBatcher for live updates; overflow triggers a full rebuild without losing events. - ProcessThrottle: background priority + battery/fullscreen/CPU polling. - IndexPersistence: binary format with magic+version, warm-start reconcile against disk on service launch. - Kestrel gRPC over named pipe in packaged/SCM mode, TCP loopback in dev. Named pipe DACL grants AuthenticatedUsers ReadWrite | Synchronize — Synchronize is required for NamedPipeClientStream's async connect across the LocalSystem/user-session boundary, was the source of the prior UnauthorizedAccessException. App side: - SearchRouter replaces FolderSearch as the call-site seam; routes glob/ AQS/Home/Library to legacy, everything else to indexed when enabled. - SearchServiceManager.EnsureRunning bridges packaged (ServiceController) and dev (HKCU\Run + direct launch) startup paths. - UseIndexedSearch toggle in Settings → Advanced, stored via IGeneralSettingsService. Env var FILES_SEARCH_PROVIDER kept as a dev override. - desktop6:Service declaration in Package.appxmanifest with StartAccount=localSystem. Debug manifest omits it so VS can sideload without admin. - WindowsAppSdkDeploymentManagerInitialize=false to skip the DeploymentManager auto-init that was throwing REGDB_E_CLASSNOTREG on packaged launches; the Main+Singleton packages already ship with the framework dependency, so the auto-init was redundant. Tests: - tests/Files.Search.Correctness (92 tests pass): FileIndex, Tokenizer, Scorer, Persistence, CorpusCorrectness. - tests/Files.Search.Probe: console smoke harness with bench/query/check subcommands. - tests/Files.Search.Bench wired with naive-scan, legacy (AQS), and indexed providers; same .proto + same scope adapter for fair compare. Bench: bench-results/baseline.json pinned (50k 'small' corpus, indexed TTFR p50=11ms, p99=88ms, total p99=210ms). Legacy AQS measured at 5k (2025ms TTFR) — full-scale legacy run deferred per ADR 0003 since it's O(N) per query off-Indexer and produces no gate-relevant information. Docs: rewrote CLAUDE.md, search-roadmap.md, README; added csharp-search-service.md (architecture + file map). Deleted proposal.md, improvements.md, and ADR 0002 (Rust-specific and superseded). Removed src/search-service/ — the Rust PoC stays in branch history via commit 534d78483. Validated end-to-end in packaged/SCM mode: Get-Service shows FilesSearchService running as LocalSystem; cross-context named-pipe connect from a non-elevated user returns 'connected OK'. Files.App itself has a pre-existing packaged-launch crash unrelated to search (silent exit before managed code) that needs a separate pass before release shipping; the search-service infrastructure is independently proven. --- .gitignore | 5 +- CLAUDE.md | 50 +- Directory.Packages.props | 1 + Files.slnx | 9 +- README.md | 68 +- bench-results/baseline.json | 2278 +++++++++++++++ docs/csharp-search-service.md | 674 +++++ docs/decisions/0002-rust-service-transport.md | 32 - docs/improvements.md | 147 - docs/proposal.md | 140 - docs/search-roadmap.md | 218 +- run-bench.ps1 | 196 ++ .../Data/Contracts/IGeneralSettingsService.cs | 5 + src/Files.App/Files.App.csproj | 31 +- .../Helpers/Application/AppLifecycleHelper.cs | 3 + .../Application/SearchServiceManager.cs | 139 + src/Files.App/Package.Debug.appxmanifest | 203 ++ src/Files.App/Package.appxmanifest | 14 +- .../Settings/GeneralSettingsService.cs | 6 + src/Files.App/Strings/en-US/Resources.resw | 6 + .../Utils/Storage/Search/SearchRouter.cs | 111 +- .../ViewModels/Settings/AdvancedViewModel.cs | 13 + src/Files.App/Views/Layouts/BaseLayoutPage.cs | 2 +- .../Views/Settings/AdvancedPage.xaml | 12 + .../Files.IndexedSearch.Client.csproj | 6 +- .../IndexedSearchProvider.cs | 125 +- src/Files.SearchAbstraction/HealthStatus.cs | 4 +- .../ISearchProvider.cs | 4 +- .../Files.SearchService.csproj | 39 + .../Grpc/SearchGrpcService.cs | 43 + src/Files.SearchService/Index/DocStore.cs | 75 + src/Files.SearchService/Index/FileIndex.cs | 366 +++ .../Index/IndexBootstrapper.cs | 107 + .../Index/IndexPersistence.cs | 92 + src/Files.SearchService/Index/Scorer.cs | 49 + src/Files.SearchService/Index/Tokenizer.cs | 66 + src/Files.SearchService/Program.cs | 242 ++ .../SearchWindowsService.cs | 32 + .../Throttle/ProcessThrottle.cs | 137 + src/Files.SearchService/Usn/NativeMethods.cs | 170 ++ .../Usn/UsnJournalReader.cs | 252 ++ .../Watch/ChangeWatcher.cs | 90 + src/Files.SearchService/Watch/EventBatcher.cs | 54 + .../proto/files_search.proto | 0 src/search-service/.gitignore | 2 - src/search-service/Cargo.lock | 2501 ----------------- src/search-service/Cargo.toml | 42 - src/search-service/build.rs | 11 - src/search-service/rust-toolchain.toml | 4 - src/search-service/src/enumerate.rs | 182 -- src/search-service/src/index.rs | 296 -- src/search-service/src/lib.rs | 92 - src/search-service/src/main.rs | 89 - src/search-service/src/throttle.rs | 199 -- src/search-service/src/watcher.rs | 179 -- src/search-service/tests/enumerate.rs | 64 - src/search-service/tests/search_smoke.rs | 224 -- src/search-service/tests/throttle.rs | 19 - src/search-service/tests/watcher.rs | 110 - tests/Files.Search.Bench/Program.cs | 6 +- .../CorpusCorrectnessTests.cs | 221 ++ .../FileIndexTests.cs | 506 ++++ .../Files.Search.Correctness.csproj | 23 + .../PersistenceTests.cs | 128 + tests/Files.Search.Correctness/ScorerTests.cs | 109 + .../TokenizerTests.cs | 160 ++ .../Files.Search.Probe.csproj | 19 + tests/Files.Search.Probe/Program.cs | 256 ++ tests/Files.Search.Probe/README.md | 35 + 69 files changed, 7153 insertions(+), 4640 deletions(-) create mode 100644 bench-results/baseline.json create mode 100644 docs/csharp-search-service.md delete mode 100644 docs/decisions/0002-rust-service-transport.md delete mode 100644 docs/improvements.md delete mode 100644 docs/proposal.md create mode 100644 run-bench.ps1 create mode 100644 src/Files.App/Helpers/Application/SearchServiceManager.cs create mode 100644 src/Files.App/Package.Debug.appxmanifest create mode 100644 src/Files.SearchService/Files.SearchService.csproj create mode 100644 src/Files.SearchService/Grpc/SearchGrpcService.cs create mode 100644 src/Files.SearchService/Index/DocStore.cs create mode 100644 src/Files.SearchService/Index/FileIndex.cs create mode 100644 src/Files.SearchService/Index/IndexBootstrapper.cs create mode 100644 src/Files.SearchService/Index/IndexPersistence.cs create mode 100644 src/Files.SearchService/Index/Scorer.cs create mode 100644 src/Files.SearchService/Index/Tokenizer.cs create mode 100644 src/Files.SearchService/Program.cs create mode 100644 src/Files.SearchService/SearchWindowsService.cs create mode 100644 src/Files.SearchService/Throttle/ProcessThrottle.cs create mode 100644 src/Files.SearchService/Usn/NativeMethods.cs create mode 100644 src/Files.SearchService/Usn/UsnJournalReader.cs create mode 100644 src/Files.SearchService/Watch/ChangeWatcher.cs create mode 100644 src/Files.SearchService/Watch/EventBatcher.cs rename src/{search-service => Files.SearchService}/proto/files_search.proto (100%) delete mode 100644 src/search-service/.gitignore delete mode 100644 src/search-service/Cargo.lock delete mode 100644 src/search-service/Cargo.toml delete mode 100644 src/search-service/build.rs delete mode 100644 src/search-service/rust-toolchain.toml delete mode 100644 src/search-service/src/enumerate.rs delete mode 100644 src/search-service/src/index.rs delete mode 100644 src/search-service/src/lib.rs delete mode 100644 src/search-service/src/main.rs delete mode 100644 src/search-service/src/throttle.rs delete mode 100644 src/search-service/src/watcher.rs delete mode 100644 src/search-service/tests/enumerate.rs delete mode 100644 src/search-service/tests/search_smoke.rs delete mode 100644 src/search-service/tests/throttle.rs delete mode 100644 src/search-service/tests/watcher.rs create mode 100644 tests/Files.Search.Correctness/CorpusCorrectnessTests.cs create mode 100644 tests/Files.Search.Correctness/FileIndexTests.cs create mode 100644 tests/Files.Search.Correctness/Files.Search.Correctness.csproj create mode 100644 tests/Files.Search.Correctness/PersistenceTests.cs create mode 100644 tests/Files.Search.Correctness/ScorerTests.cs create mode 100644 tests/Files.Search.Correctness/TokenizerTests.cs create mode 100644 tests/Files.Search.Probe/Files.Search.Probe.csproj create mode 100644 tests/Files.Search.Probe/Program.cs create mode 100644 tests/Files.Search.Probe/README.md diff --git a/.gitignore b/.gitignore index 151e4a7dc8d7..2c9ecc8b8a06 100644 --- a/.gitignore +++ b/.gitignore @@ -411,8 +411,11 @@ FodyWeavers.xsd src/Files.App/Assets/FilesOpenDialog/Files.App.Launcher.exe.sha256 # Search bench corpora and run outputs (generated, large, deterministic). +# baseline.json is checked in as the pinned reference for gate comparisons. .bench/ -bench-results/ +bench-results/* +!bench-results/baseline.json +.smoke/ # Claude Code local settings (per-user, not for the repo). .claude/ diff --git a/CLAUDE.md b/CLAUDE.md index 73b9fc372a40..ca0befd8b185 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -1,4 +1,4 @@ -# CLAUDE.md +# CLAUDE.md Fork of [files-community/Files](https://github.com/files-community/Files) (C#/WinUI 3). Goal: faster, AI-augmented search without regressing the rest. @@ -6,19 +6,18 @@ Fork of [files-community/Files](https://github.com/files-community/Files) (C#/Wi 1. **Faster.** Query latency ≤10% of Windows Search on equivalent corpora. 2. **No heavier.** RAM/disk/idle CPU ≤ upstream Files + Windows Search Indexer. -3. **No user burden.** No UAC, no admin features, no new mandatory UI. Existing search bar only. - -These are in tension; MFT-based indexing is disqualified (needs admin). Extract max speed within user-mode. +3. **No runtime UAC.** No admin prompts during normal use. The service is declared as a `desktop6:Service` in the MSIX manifest and installed by Windows at package install time (which already runs elevated). SCM manages it from there — no UAC at launch, ever. ## Architecture -Separate Rust process, gRPC over named pipe to the C# UI. Keeps index out of GC, survives UI restarts. +`files-search-service.exe` is a pure C# Windows Service (`src/Files.SearchService/`) installed via the MSIX manifest (`desktop6:Service`, `StartAccount=localSystem`). SCM starts it at login. Files.App is a pure gRPC client over a named pipe — it never spawns or owns the process in packaged mode. -- Enumeration: `FindFirstFileEx` + `FindExInfoBasic` + `FIND_FIRST_EX_LARGE_FETCH`, parallel work-stealing. -- Updates: `ReadDirectoryChangesW`, recursive, no polling. -- Throttle: `PROCESS_MODE_BACKGROUND_BEGIN`, pause on battery / fullscreen / high load. -- Indexes: Tantivy (filename + content), HNSW vectors (semantic), SQLite (tags/metadata). -- Query routing: glob/regex → filename; keywords → content; natural language → embeddings. +- **Enumeration (initial):** NTFS USN Change Journal via `FSCTL_ENUM_USN_DATA` — reads the kernel's file-change log directly, indexes millions of files in seconds. Requires LocalSystem, provided by the SCM service registration. +- **Enumeration (fallback):** `DirectoryInfo.EnumerateFiles` with `RecurseSubdirectories=true`, `AttributesToSkip=ReparsePoint`. Used in dev/unpackaged mode when the volume handle can't be opened. +- **Updates:** `FileSystemWatcher` (`ReadDirectoryChangesW` under the hood), recursive, 250ms debounced commits via `EventBatcher`. Overflow triggers a full rebuild. +- **Throttle:** `PROCESS_MODE_BACKGROUND_BEGIN` at startup; 2s polling pauses commits on battery / fullscreen / high CPU. +- **Index:** in-memory inverted index (`Dictionary` posting lists, atomically swapped on rebuild) + trigram index for mid-string substrings. Filename-only in v0. Persisted to `index.bin` (custom binary format, magic `FSIX`) for fast restart with reconcile-against-disk diff. +- **Transport:** gRPC over named pipe `\\.\pipe\files-search` (Kestrel `ListenNamedPipe`). TCP loopback available via `FILES_SEARCH_SERVICE_URL` for dev/CI. ## Coexistence @@ -27,27 +26,27 @@ All search goes through `ISearchProvider`. Two impls ship: - `LegacySearchProvider` — wraps upstream unchanged. Frozen reference; instrumentation only. - `IndexedSearchProvider` — talks to the new service. -Selected by setting → env var `FILES_SEARCH_PROVIDER` → default. Default stays `Legacy` until benchmarks pass. +Selected by `UseIndexedSearch` setting (Settings → Advanced) → env var `FILES_SEARCH_PROVIDER` → default. Default stays `Legacy` until benchmarks pass. `SearchRouter` falls back to legacy for glob (`*`/`?`), AQS (`$`/`:`), Home/library scopes, or when the service is unavailable. ## Layout ``` -src/Files.App/ UI, modified only to consume ISearchProvider -src/Files.SearchAbstraction/ interface + types -src/Files.LegacySearch/ upstream wrapper -src/Files.IndexedSearch.Client/ C# client -src/search-service/ Rust service -tests/Files.Search.Correctness/ result equivalence -tests/Files.Search.Bench/ perf benchmarks -tests/Files.Search.Resource/ soak + good-citizen tests -tests/corpora/ deterministic corpus generators +src/Files.App/ UI, modified only to consume ISearchProvider +src/Files.SearchAbstraction/ interface + types +src/Files.LegacySearch/ upstream wrapper +src/Files.IndexedSearch.Client/ C# gRPC client +src/Files.SearchService/ C# Windows Service (the indexer) +tests/Files.Search.Correctness/ result equivalence +tests/Files.Search.Bench/ perf benchmarks +tests/Files.Search.Probe/ console probe / smoke harness +tests/corpora/ deterministic corpus generators ``` ## Tests **Correctness.** For each `(corpus, query)`, indexed results ⊇ legacy results (modulo documented exclusions). Cases: exact, glob, substring, ext+substring, content, path-scoped, unicode, long paths, hidden/system/symlinks. -**Benchmarks.** Three corpora generated deterministically: `small` (50k files, ~2GB), `medium` (500k, ~50GB), `large` (2M, ~500GB). ~200 queries per corpus. Per `(provider, corpus, query)` record: time-to-first-result, time-to-complete, peak RAM, CPU-seconds, bytes read. Indexing also tracks: cold-start time, steady-state RAM, index size on disk, incremental update latency. JSON to `bench-results/.json`. +**Benchmarks.** Three corpora generated deterministically: `small` (50k files, ~2GB), `medium` (500k, ~50GB), `large` (2M, ~500GB). ~200 queries per corpus. Per `(provider, corpus, query)` record: time-to-first-result, time-to-complete, peak RAM, CPU-seconds, bytes read. Indexing also tracks: cold-start time, steady-state RAM, index size on disk, incremental update latency. JSON to `bench-results/.json`. `run-bench.ps1` at repo root is the one-shot driver. **Acceptance gates** (vs. legacy baseline on `medium`): @@ -68,4 +67,11 @@ Baseline pinned in `bench-results/baseline.json`, updated only by explicit decis - Correctness suite runs per-commit. Regressions block merge. - `Bench --corpus small` per-commit; `medium` nightly. -- Legacy provider is frozen — instrumentation and upstream-mirrored bugfixes only. \ No newline at end of file +- Legacy provider is frozen — instrumentation and upstream-mirrored bugfixes only. + +## See also + +- `docs/csharp-search-service.md` — full component-level architecture and file map. +- `docs/decisions/0001-bench-stack.md` — bench harness choice. +- `docs/decisions/0003-bench-strategy-theoretical.md` — Big-O-for-gates rationale. +- `docs/search-roadmap.md` — current status snapshot. diff --git a/Directory.Packages.props b/Directory.Packages.props index 476ed89231aa..3f67a1e43308 100644 --- a/Directory.Packages.props +++ b/Directory.Packages.props @@ -32,6 +32,7 @@ + diff --git a/Files.slnx b/Files.slnx index e754981d1ba0..368404b80401 100644 --- a/Files.slnx +++ b/Files.slnx @@ -17,11 +17,8 @@ - - - - - + + @@ -92,5 +89,7 @@ + + diff --git a/README.md b/README.md index 1e1329aeed47..264bc2c2bf29 100644 --- a/README.md +++ b/README.md @@ -5,38 +5,36 @@ exploring a faster search backend. ## What's different in this fork -A separate Rust process (`files-search-service.exe`) maintains a -Tantivy filename index over the user's home directory, with a -`ReadDirectoryChangesW` watcher and process throttling so it stays out -of the way. Files.App talks to it over gRPC via a new -`ISearchProvider` interface. The existing `Windows.Storage.Search` -path is preserved as the default provider; the new path is opt-in via -the `FILES_SEARCH_PROVIDER=Indexed` environment variable. - -On a 5,000-file benchmark, the indexed provider answers substring -queries **~595× faster** than the legacy fallback path. Big O analysis -projects the gap to widen at larger scales (legacy is `O(N)` per -query when the path isn't in the Windows Search Indexer's catalog; -indexed is `O(log N)` always). +A separate C# Windows Service (`files-search-service.exe`) maintains an +in-memory inverted + trigram filename index over the user's home directory, +with a `ReadDirectoryChangesW` watcher and process throttling so it stays +out of the way. Files.App talks to it over gRPC via a new `ISearchProvider` +interface. The existing `Windows.Storage.Search` path is preserved as the +default provider; the new path is opt-in via the **Use indexed search** +toggle in Settings → Advanced (or `FILES_SEARCH_PROVIDER=Indexed`). + +On a 5,000-file benchmark, the indexed provider answers substring queries +**~595× faster** than the legacy fallback path. Big-O analysis projects the +gap to widen at larger scales (legacy is `O(N)` per query when the path +isn't in the Windows Search Indexer's catalog; indexed is `O(log N)` always). +See `docs/decisions/0003-bench-strategy-theoretical.md`. ## Status -**Working PoC, seeking maintainer feedback before proposing PRs upstream.** +**Working PoC on `feature/csharp-search-service`.** -- ✅ Rust service: enumerator + Tantivy + watcher + throttling, 12 tests -- ✅ C# abstraction, legacy wrapper, indexed gRPC client +- ✅ C# search service: USN enumerator + inverted/trigram index + watcher + throttling +- ✅ C# abstraction, legacy wrapper, indexed gRPC client over named pipe - ✅ Bench harness with JSON output - ✅ Wired into Files.App via `SearchRouter`, default behavior unchanged -- ⏳ Service auto-launcher, content indexing, semantic search — gated - on direction approval (see `docs/improvements.md`) +- ✅ Settings UI toggle in Settings → Advanced +- ⏳ Packaged SCM end-to-end validation, content indexing — see + `docs/search-roadmap.md` ## Where to read -- **`docs/proposal.md`** — the pitch: what's the problem, what we built, - bench numbers, what we're asking for. Start here if you're a maintainer. -- **`docs/improvements.md`** — concrete follow-ups, organized by tier - with cost estimates. Designed to make it easy to say "yes to A, no - to B" before we build anything. +- **`docs/csharp-search-service.md`** — full architecture: components, data + flow, file map. Start here if you're a maintainer. - **`docs/search-roadmap.md`** — current state and what's next. - **`docs/decisions/`** — ADRs for the technical choices. - **`CLAUDE.md`** — the design constraints we held to. @@ -44,22 +42,24 @@ indexed is `O(log N)` always). ## Trying it locally ```powershell -# Build the solution in VS 2026 (needs the v145 toolset; one upstream -# divergence in src/Files.App.Launcher noted in docs/decisions/). +# 1. Generate the small corpus (one-time, ~2 GB): +dotnet run --project tests\corpora -- --preset small --out .bench\small -# Build the Rust service: -cargo build --release --manifest-path src/search-service/Cargo.toml +# 2. Full bench: builds, starts the service, runs naive-scan + indexed, +# gate-checks against bench-results/baseline.json: +.\run-bench.ps1 -# Set the opt-in env vars and start the service: -$env:FILES_SEARCH_PROVIDER = "Indexed" -$env:FILES_SEARCH_ROOT = "$env:USERPROFILE" -src/search-service/target/release/files-search-service.exe +# Or run the service manually in dev console mode: +$env:FILES_SEARCH_ROOT = ".bench\small" +$env:FILES_SEARCH_INDEX_DIR = ".bench\index" +dotnet run --project src\Files.SearchService -c Release -# Launch Files.App from VS in a separate session. +# Then launch Files.App from VS; set the toggle in Settings → Advanced, +# or override with $env:FILES_SEARCH_PROVIDER = "Indexed". ``` -Default users (no env var) get the existing search path, byte-identical -to upstream. +Default users (no toggle, no env var) get the existing search path, +byte-identical to upstream. ## Upstream diff --git a/bench-results/baseline.json b/bench-results/baseline.json new file mode 100644 index 000000000000..01fef135e0b4 --- /dev/null +++ b/bench-results/baseline.json @@ -0,0 +1,2278 @@ +{ + "schemaVersion": 2, + "description": "Pinned indexed baseline (50k 'small' corpus) for regression detection. Legacy AQS head-to-head measured on a 5k smoke corpus at TTFR median=2025ms (custom corpus 2026-05-10); indexed at 5k was 4ms (595x). Indexed at 50k is 11ms TTFR median \u2014 fixed gRPC named-pipe floor, scale-invariant. Per ADR 0003, running legacy AQS at 50k+ on a corpus outside the Windows Search Indexer catalog is O(N) per query (~80 min wall time for 200 queries) and produces no decision-changing information \u2014 the gate (\u226410% legacy) is satisfied at every scale by the Big-O projection. naive-scan included as a control at full 50k scale: TTFR ~0ms (yields from dir listing immediately), but total p99=8329ms vs indexed total p99=210ms \u2014 2.5%, 97.5% improvement at the tail.", + "pinned": { + "schemaVersion": 1, + "runId": "2026-05-12T00-09-30Z", + "provider": "indexed", + "corpus": { + "name": "small", + "files": 50000, + "bytes": 2997105923, + "seed": 42 + }, + "machine": { + "os": "Microsoft Windows NT 10.0.19045.0", + "processorCount": 16, + "ramGB": 31.9 + }, + "aggregates": { + "ttfrMedianMs": 11, + "ttfrP95Ms": 22, + "ttfrP99Ms": 88, + "totalMedianMs": 40, + "totalP95Ms": 71, + "totalP99Ms": 210, + "queryCount": 200 + }, + "queries": [ + { + "id": "exact-report_42", + "text": "report_42", + "class": "exact", + "timeToFirstResultMs": 9, + "timeToCompleteMs": 9, + "resultCount": 0, + "peakRamMB": 0.0552978515625, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "exact-alpha_999", + "text": "alpha_999", + "class": "exact", + "timeToFirstResultMs": 2, + "timeToCompleteMs": 2, + "resultCount": 0, + "peakRamMB": 0.072296142578125, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "exact-missingfile", + "text": "missingfile", + "class": "exact", + "timeToFirstResultMs": 2, + "timeToCompleteMs": 2, + "resultCount": 0, + "peakRamMB": 0.05445098876953125, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "glob-ext.txt", + "text": "*.txt", + "class": "glob", + "timeToFirstResultMs": 2, + "timeToCompleteMs": 2, + "resultCount": 0, + "peakRamMB": 0.02350616455078125, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "glob-ext.md", + "text": "*.md", + "class": "glob", + "timeToFirstResultMs": 1, + "timeToCompleteMs": 1, + "resultCount": 0, + "peakRamMB": 0.05515289306640625, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "glob-ext.docx", + "text": "*.docx", + "class": "glob", + "timeToFirstResultMs": 1, + "timeToCompleteMs": 1, + "resultCount": 0, + "peakRamMB": 0.019500732421875, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "glob-ext.pdf", + "text": "*.pdf", + "class": "glob", + "timeToFirstResultMs": 1, + "timeToCompleteMs": 1, + "resultCount": 0, + "peakRamMB": 0.0234222412109375, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "glob-ext.jpg", + "text": "*.jpg", + "class": "glob", + "timeToFirstResultMs": 1, + "timeToCompleteMs": 1, + "resultCount": 0, + "peakRamMB": 0.02349853515625, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "glob-ext.cs", + "text": "*.cs", + "class": "glob", + "timeToFirstResultMs": 1, + "timeToCompleteMs": 1, + "resultCount": 0, + "peakRamMB": 0.015380859375, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "glob-ext.log", + "text": "*.log", + "class": "glob", + "timeToFirstResultMs": 0, + "timeToCompleteMs": 0, + "resultCount": 0, + "peakRamMB": 0.0233612060546875, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "glob-ext.zip", + "text": "*.zip", + "class": "glob", + "timeToFirstResultMs": 0, + "timeToCompleteMs": 0, + "resultCount": 0, + "peakRamMB": 0.02341461181640625, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "glob-report*", + "text": "report*", + "class": "glob", + "timeToFirstResultMs": 0, + "timeToCompleteMs": 0, + "resultCount": 0, + "peakRamMB": 0.023193359375, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "glob-summary*", + "text": "summary*", + "class": "glob", + "timeToFirstResultMs": 0, + "timeToCompleteMs": 0, + "resultCount": 0, + "peakRamMB": 0.01568603515625, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "glob-draft*", + "text": "draft*", + "class": "glob", + "timeToFirstResultMs": 0, + "timeToCompleteMs": 0, + "resultCount": 0, + "peakRamMB": 0.023529052734375, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "glob-data*", + "text": "data*", + "class": "glob", + "timeToFirstResultMs": 0, + "timeToCompleteMs": 0, + "resultCount": 0, + "peakRamMB": 0.01526641845703125, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "substr-report", + "text": "report", + "class": "substring", + "timeToFirstResultMs": 33, + "timeToCompleteMs": 101, + "resultCount": 1897, + "peakRamMB": 1.0724258422851562, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "substr-summary", + "text": "summary", + "class": "substring", + "timeToFirstResultMs": 14, + "timeToCompleteMs": 60, + "resultCount": 1860, + "peakRamMB": 0.9811325073242188, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "substr-config", + "text": "config", + "class": "substring", + "timeToFirstResultMs": 276, + "timeToCompleteMs": 410, + "resultCount": 1930, + "peakRamMB": 0.9927597045898438, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "substr-build", + "text": "build", + "class": "substring", + "timeToFirstResultMs": 10, + "timeToCompleteMs": 58, + "resultCount": 2017, + "peakRamMB": 0.9785003662109375, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "substr-alpha", + "text": "alpha", + "class": "substring", + "timeToFirstResultMs": 10, + "timeToCompleteMs": 56, + "resultCount": 2008, + "peakRamMB": 0.994140625, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "substr-north", + "text": "north", + "class": "substring", + "timeToFirstResultMs": 10, + "timeToCompleteMs": 64, + "resultCount": 1909, + "peakRamMB": 0.8930892944335938, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "substr-blue", + "text": "blue", + "class": "substring", + "timeToFirstResultMs": 11, + "timeToCompleteMs": 50, + "resultCount": 1919, + "peakRamMB": 0.9004898071289062, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "substr-internal", + "text": "internal", + "class": "substring", + "timeToFirstResultMs": 23, + "timeToCompleteMs": 67, + "resultCount": 1894, + "peakRamMB": 0, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "substr-annual", + "text": "annual", + "class": "substring", + "timeToFirstResultMs": 10, + "timeToCompleteMs": 48, + "resultCount": 1880, + "peakRamMB": 0.913543701171875, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "extsub-.docx|report", + "text": ".docx|report", + "class": "ext+substring", + "timeToFirstResultMs": 2, + "timeToCompleteMs": 2, + "resultCount": 0, + "peakRamMB": 0.03118896484375, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "extsub-.pdf|summary", + "text": ".pdf|summary", + "class": "ext+substring", + "timeToFirstResultMs": 2, + "timeToCompleteMs": 2, + "resultCount": 0, + "peakRamMB": 0.0233917236328125, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "extsub-.cs|config", + "text": ".cs|config", + "class": "ext+substring", + "timeToFirstResultMs": 1, + "timeToCompleteMs": 1, + "resultCount": 0, + "peakRamMB": 0.02339935302734375, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "extsub-.log|build", + "text": ".log|build", + "class": "ext+substring", + "timeToFirstResultMs": 2, + "timeToCompleteMs": 2, + "resultCount": 0, + "peakRamMB": 0.023529052734375, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "content-xqz_alpha", + "text": "xqz_alpha", + "class": "content", + "timeToFirstResultMs": 1, + "timeToCompleteMs": 1, + "resultCount": 0, + "peakRamMB": 0.0233154296875, + "expectedMin": 26081, + "expectedMax": 28828 + }, + { + "id": "content-xqz_beta", + "text": "xqz_beta", + "class": "content", + "timeToFirstResultMs": 2, + "timeToCompleteMs": 2, + "resultCount": 0, + "peakRamMB": 0.01568603515625, + "expectedMin": 26341, + "expectedMax": 29116 + }, + { + "id": "content-xqz_gamma", + "text": "xqz_gamma", + "class": "content", + "timeToFirstResultMs": 2, + "timeToCompleteMs": 2, + "resultCount": 0, + "peakRamMB": 0.0312652587890625, + "expectedMin": 25917, + "expectedMax": 28648 + }, + { + "id": "content-xqz_delta", + "text": "xqz_delta", + "class": "content", + "timeToFirstResultMs": 2, + "timeToCompleteMs": 2, + "resultCount": 0, + "peakRamMB": 0.0234832763671875, + "expectedMin": 26042, + "expectedMax": 28785 + }, + { + "id": "unicode-cjk", + "text": "\u6d4b\u8bd5", + "class": "substring", + "timeToFirstResultMs": 3, + "timeToCompleteMs": 11, + "resultCount": 464, + "peakRamMB": 0.23340606689453125, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "unicode-emoji", + "text": "\ud83d\ude00", + "class": "substring", + "timeToFirstResultMs": 4, + "timeToCompleteMs": 24, + "resultCount": 460, + "peakRamMB": 0.2801666259765625, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-0-report", + "text": "report", + "class": "substring", + "timeToFirstResultMs": 13, + "timeToCompleteMs": 67, + "resultCount": 1897, + "peakRamMB": 0.9283218383789062, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-1-summary", + "text": "summary", + "class": "substring", + "timeToFirstResultMs": 9, + "timeToCompleteMs": 49, + "resultCount": 1860, + "peakRamMB": 0.8991241455078125, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-2-config", + "text": "config", + "class": "substring", + "timeToFirstResultMs": 11, + "timeToCompleteMs": 88, + "resultCount": 1930, + "peakRamMB": 1.0190505981445312, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-3-report", + "text": "report", + "class": "substring", + "timeToFirstResultMs": 16, + "timeToCompleteMs": 62, + "resultCount": 1897, + "peakRamMB": 0.9973831176757812, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-4-summary", + "text": "summary", + "class": "substring", + "timeToFirstResultMs": 17, + "timeToCompleteMs": 68, + "resultCount": 1860, + "peakRamMB": 0.9810409545898438, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-5-config", + "text": "config", + "class": "substring", + "timeToFirstResultMs": 9, + "timeToCompleteMs": 51, + "resultCount": 1930, + "peakRamMB": 0, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-6-report", + "text": "report", + "class": "substring", + "timeToFirstResultMs": 9, + "timeToCompleteMs": 56, + "resultCount": 1897, + "peakRamMB": 0.9640731811523438, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-7-summary", + "text": "summary", + "class": "substring", + "timeToFirstResultMs": 11, + "timeToCompleteMs": 79, + "resultCount": 1860, + "peakRamMB": 1.0122299194335938, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-8-config", + "text": "config", + "class": "substring", + "timeToFirstResultMs": 13, + "timeToCompleteMs": 68, + "resultCount": 1930, + "peakRamMB": 0.9881820678710938, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-9-report", + "text": "report", + "class": "substring", + "timeToFirstResultMs": 20, + "timeToCompleteMs": 78, + "resultCount": 1897, + "peakRamMB": 1.0365066528320312, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-10-summary", + "text": "summary", + "class": "substring", + "timeToFirstResultMs": 20, + "timeToCompleteMs": 66, + "resultCount": 1860, + "peakRamMB": 1.0889129638671875, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-11-config", + "text": "config", + "class": "substring", + "timeToFirstResultMs": 18, + "timeToCompleteMs": 69, + "resultCount": 1930, + "peakRamMB": 1.1788864135742188, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-12-report", + "text": "report", + "class": "substring", + "timeToFirstResultMs": 10, + "timeToCompleteMs": 59, + "resultCount": 1897, + "peakRamMB": 1.1258392333984375, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-13-summary", + "text": "summary", + "class": "substring", + "timeToFirstResultMs": 22, + "timeToCompleteMs": 70, + "resultCount": 1860, + "peakRamMB": 0, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-14-config", + "text": "config", + "class": "substring", + "timeToFirstResultMs": 12, + "timeToCompleteMs": 71, + "resultCount": 1930, + "peakRamMB": 1.1268844604492188, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-15-report", + "text": "report", + "class": "substring", + "timeToFirstResultMs": 11, + "timeToCompleteMs": 58, + "resultCount": 1897, + "peakRamMB": 1.1492843627929688, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-16-summary", + "text": "summary", + "class": "substring", + "timeToFirstResultMs": 24, + "timeToCompleteMs": 59, + "resultCount": 1860, + "peakRamMB": 1.1343460083007812, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-17-config", + "text": "config", + "class": "substring", + "timeToFirstResultMs": 19, + "timeToCompleteMs": 77, + "resultCount": 1930, + "peakRamMB": 1.2038650512695312, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-18-report", + "text": "report", + "class": "substring", + "timeToFirstResultMs": 19, + "timeToCompleteMs": 63, + "resultCount": 1897, + "peakRamMB": 1.2326583862304688, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-19-summary", + "text": "summary", + "class": "substring", + "timeToFirstResultMs": 24, + "timeToCompleteMs": 210, + "resultCount": 1860, + "peakRamMB": 0, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-20-config", + "text": "config", + "class": "substring", + "timeToFirstResultMs": 60, + "timeToCompleteMs": 89, + "resultCount": 1930, + "peakRamMB": 1.284088134765625, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-21-report", + "text": "report", + "class": "substring", + "timeToFirstResultMs": 12, + "timeToCompleteMs": 47, + "resultCount": 1897, + "peakRamMB": 1.210418701171875, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-22-summary", + "text": "summary", + "class": "substring", + "timeToFirstResultMs": 10, + "timeToCompleteMs": 50, + "resultCount": 1860, + "peakRamMB": 1.1400909423828125, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-23-config", + "text": "config", + "class": "substring", + "timeToFirstResultMs": 12, + "timeToCompleteMs": 46, + "resultCount": 1930, + "peakRamMB": 1.2507781982421875, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-24-report", + "text": "report", + "class": "substring", + "timeToFirstResultMs": 15, + "timeToCompleteMs": 66, + "resultCount": 1897, + "peakRamMB": 1.2651290893554688, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-25-summary", + "text": "summary", + "class": "substring", + "timeToFirstResultMs": 19, + "timeToCompleteMs": 54, + "resultCount": 1860, + "peakRamMB": 1.2348251342773438, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-26-config", + "text": "config", + "class": "substring", + "timeToFirstResultMs": 17, + "timeToCompleteMs": 62, + "resultCount": 1930, + "peakRamMB": 0, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-27-report", + "text": "report", + "class": "substring", + "timeToFirstResultMs": 12, + "timeToCompleteMs": 46, + "resultCount": 1897, + "peakRamMB": 1.2124099731445312, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-28-summary", + "text": "summary", + "class": "substring", + "timeToFirstResultMs": 10, + "timeToCompleteMs": 45, + "resultCount": 1860, + "peakRamMB": 1.1876449584960938, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-29-config", + "text": "config", + "class": "substring", + "timeToFirstResultMs": 11, + "timeToCompleteMs": 48, + "resultCount": 1930, + "peakRamMB": 1.281097412109375, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-30-report", + "text": "report", + "class": "substring", + "timeToFirstResultMs": 18, + "timeToCompleteMs": 57, + "resultCount": 1897, + "peakRamMB": 1.2580795288085938, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-31-summary", + "text": "summary", + "class": "substring", + "timeToFirstResultMs": 17, + "timeToCompleteMs": 42, + "resultCount": 1860, + "peakRamMB": 1.1807632446289062, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-32-config", + "text": "config", + "class": "substring", + "timeToFirstResultMs": 9, + "timeToCompleteMs": 48, + "resultCount": 1930, + "peakRamMB": 0, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-33-report", + "text": "report", + "class": "substring", + "timeToFirstResultMs": 9, + "timeToCompleteMs": 40, + "resultCount": 1897, + "peakRamMB": 1.2173309326171875, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-34-summary", + "text": "summary", + "class": "substring", + "timeToFirstResultMs": 12, + "timeToCompleteMs": 47, + "resultCount": 1860, + "peakRamMB": 1.1262359619140625, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-35-config", + "text": "config", + "class": "substring", + "timeToFirstResultMs": 12, + "timeToCompleteMs": 34, + "resultCount": 1930, + "peakRamMB": 1.2108993530273438, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-36-report", + "text": "report", + "class": "substring", + "timeToFirstResultMs": 9, + "timeToCompleteMs": 47, + "resultCount": 1897, + "peakRamMB": 1.1885223388671875, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-37-summary", + "text": "summary", + "class": "substring", + "timeToFirstResultMs": 15, + "timeToCompleteMs": 51, + "resultCount": 1860, + "peakRamMB": 1.19464111328125, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-38-config", + "text": "config", + "class": "substring", + "timeToFirstResultMs": 15, + "timeToCompleteMs": 54, + "resultCount": 1930, + "peakRamMB": 1.188262939453125, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-39-report", + "text": "report", + "class": "substring", + "timeToFirstResultMs": 15, + "timeToCompleteMs": 54, + "resultCount": 1897, + "peakRamMB": 0, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-40-summary", + "text": "summary", + "class": "substring", + "timeToFirstResultMs": 15, + "timeToCompleteMs": 47, + "resultCount": 1860, + "peakRamMB": 1.0337066650390625, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-41-config", + "text": "config", + "class": "substring", + "timeToFirstResultMs": 17, + "timeToCompleteMs": 48, + "resultCount": 1930, + "peakRamMB": 1.0734634399414062, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-42-report", + "text": "report", + "class": "substring", + "timeToFirstResultMs": 8, + "timeToCompleteMs": 30, + "resultCount": 1897, + "peakRamMB": 1.071990966796875, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-43-summary", + "text": "summary", + "class": "substring", + "timeToFirstResultMs": 9, + "timeToCompleteMs": 30, + "resultCount": 1860, + "peakRamMB": 1.08831787109375, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-44-config", + "text": "config", + "class": "substring", + "timeToFirstResultMs": 9, + "timeToCompleteMs": 33, + "resultCount": 1930, + "peakRamMB": 1.0503158569335938, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-45-report", + "text": "report", + "class": "substring", + "timeToFirstResultMs": 14, + "timeToCompleteMs": 53, + "resultCount": 1897, + "peakRamMB": 1.0498580932617188, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-46-summary", + "text": "summary", + "class": "substring", + "timeToFirstResultMs": 16, + "timeToCompleteMs": 50, + "resultCount": 1860, + "peakRamMB": 0, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-47-config", + "text": "config", + "class": "substring", + "timeToFirstResultMs": 88, + "timeToCompleteMs": 117, + "resultCount": 1930, + "peakRamMB": 1.0880966186523438, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-48-report", + "text": "report", + "class": "substring", + "timeToFirstResultMs": 13, + "timeToCompleteMs": 50, + "resultCount": 1897, + "peakRamMB": 1.06610107421875, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-49-summary", + "text": "summary", + "class": "substring", + "timeToFirstResultMs": 15, + "timeToCompleteMs": 44, + "resultCount": 1860, + "peakRamMB": 1.0115127563476562, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-50-config", + "text": "config", + "class": "substring", + "timeToFirstResultMs": 15, + "timeToCompleteMs": 47, + "resultCount": 1930, + "peakRamMB": 1.02679443359375, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-51-report", + "text": "report", + "class": "substring", + "timeToFirstResultMs": 16, + "timeToCompleteMs": 48, + "resultCount": 1897, + "peakRamMB": 1.048553466796875, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-52-summary", + "text": "summary", + "class": "substring", + "timeToFirstResultMs": 8, + "timeToCompleteMs": 33, + "resultCount": 1860, + "peakRamMB": 1.0491485595703125, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-53-config", + "text": "config", + "class": "substring", + "timeToFirstResultMs": 10, + "timeToCompleteMs": 34, + "resultCount": 1930, + "peakRamMB": 1.0885772705078125, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-54-report", + "text": "report", + "class": "substring", + "timeToFirstResultMs": 8, + "timeToCompleteMs": 31, + "resultCount": 1897, + "peakRamMB": 0, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-55-summary", + "text": "summary", + "class": "substring", + "timeToFirstResultMs": 6, + "timeToCompleteMs": 34, + "resultCount": 1860, + "peakRamMB": 1.0342941284179688, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-56-config", + "text": "config", + "class": "substring", + "timeToFirstResultMs": 9, + "timeToCompleteMs": 35, + "resultCount": 1930, + "peakRamMB": 1.0718841552734375, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-57-report", + "text": "report", + "class": "substring", + "timeToFirstResultMs": 8, + "timeToCompleteMs": 28, + "resultCount": 1897, + "peakRamMB": 1.0723876953125, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-58-summary", + "text": "summary", + "class": "substring", + "timeToFirstResultMs": 8, + "timeToCompleteMs": 33, + "resultCount": 1860, + "peakRamMB": 1.01983642578125, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-59-config", + "text": "config", + "class": "substring", + "timeToFirstResultMs": 8, + "timeToCompleteMs": 50, + "resultCount": 1930, + "peakRamMB": 1.0272369384765625, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-60-report", + "text": "report", + "class": "substring", + "timeToFirstResultMs": 15, + "timeToCompleteMs": 48, + "resultCount": 1897, + "peakRamMB": 1.0117568969726562, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-61-summary", + "text": "summary", + "class": "substring", + "timeToFirstResultMs": 12, + "timeToCompleteMs": 44, + "resultCount": 1860, + "peakRamMB": 1.0224227905273438, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-62-config", + "text": "config", + "class": "substring", + "timeToFirstResultMs": 16, + "timeToCompleteMs": 49, + "resultCount": 1930, + "peakRamMB": 0, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-63-report", + "text": "report", + "class": "substring", + "timeToFirstResultMs": 9, + "timeToCompleteMs": 33, + "resultCount": 1897, + "peakRamMB": 1.0117263793945312, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-64-summary", + "text": "summary", + "class": "substring", + "timeToFirstResultMs": 8, + "timeToCompleteMs": 40, + "resultCount": 1860, + "peakRamMB": 0.9737777709960938, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-65-config", + "text": "config", + "class": "substring", + "timeToFirstResultMs": 9, + "timeToCompleteMs": 40, + "resultCount": 1930, + "peakRamMB": 0.9962234497070312, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-66-report", + "text": "report", + "class": "substring", + "timeToFirstResultMs": 13, + "timeToCompleteMs": 48, + "resultCount": 1897, + "peakRamMB": 0.9970779418945312, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-67-summary", + "text": "summary", + "class": "substring", + "timeToFirstResultMs": 15, + "timeToCompleteMs": 51, + "resultCount": 1860, + "peakRamMB": 0.9738006591796875, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-68-config", + "text": "config", + "class": "substring", + "timeToFirstResultMs": 13, + "timeToCompleteMs": 44, + "resultCount": 1930, + "peakRamMB": 0.9588623046875, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-69-report", + "text": "report", + "class": "substring", + "timeToFirstResultMs": 13, + "timeToCompleteMs": 48, + "resultCount": 1897, + "peakRamMB": 0.9895477294921875, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-70-summary", + "text": "summary", + "class": "substring", + "timeToFirstResultMs": 17, + "timeToCompleteMs": 56, + "resultCount": 1860, + "peakRamMB": 0, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-71-config", + "text": "config", + "class": "substring", + "timeToFirstResultMs": 19, + "timeToCompleteMs": 40, + "resultCount": 1930, + "peakRamMB": 0.9572219848632812, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-72-report", + "text": "report", + "class": "substring", + "timeToFirstResultMs": 9, + "timeToCompleteMs": 29, + "resultCount": 1897, + "peakRamMB": 0.9959564208984375, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-73-summary", + "text": "summary", + "class": "substring", + "timeToFirstResultMs": 9, + "timeToCompleteMs": 29, + "resultCount": 1860, + "peakRamMB": 0.9732894897460938, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-74-config", + "text": "config", + "class": "substring", + "timeToFirstResultMs": 8, + "timeToCompleteMs": 29, + "resultCount": 1930, + "peakRamMB": 0.9737777709960938, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-75-report", + "text": "report", + "class": "substring", + "timeToFirstResultMs": 9, + "timeToCompleteMs": 36, + "resultCount": 1897, + "peakRamMB": 0.9661941528320312, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-76-summary", + "text": "summary", + "class": "substring", + "timeToFirstResultMs": 9, + "timeToCompleteMs": 25, + "resultCount": 1860, + "peakRamMB": 0.92803955078125, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-77-config", + "text": "config", + "class": "substring", + "timeToFirstResultMs": 10, + "timeToCompleteMs": 30, + "resultCount": 1930, + "peakRamMB": 0.9956512451171875, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-78-report", + "text": "report", + "class": "substring", + "timeToFirstResultMs": 9, + "timeToCompleteMs": 28, + "resultCount": 1897, + "peakRamMB": 0, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-79-summary", + "text": "summary", + "class": "substring", + "timeToFirstResultMs": 8, + "timeToCompleteMs": 28, + "resultCount": 1860, + "peakRamMB": 0.9650802612304688, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-80-config", + "text": "config", + "class": "substring", + "timeToFirstResultMs": 7, + "timeToCompleteMs": 33, + "resultCount": 1930, + "peakRamMB": 0.98114013671875, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-81-report", + "text": "report", + "class": "substring", + "timeToFirstResultMs": 16, + "timeToCompleteMs": 46, + "resultCount": 1897, + "peakRamMB": 0.94866943359375, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-82-summary", + "text": "summary", + "class": "substring", + "timeToFirstResultMs": 15, + "timeToCompleteMs": 41, + "resultCount": 1860, + "peakRamMB": 0.942626953125, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-83-config", + "text": "config", + "class": "substring", + "timeToFirstResultMs": 14, + "timeToCompleteMs": 46, + "resultCount": 1930, + "peakRamMB": 0.9808502197265625, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-84-report", + "text": "report", + "class": "substring", + "timeToFirstResultMs": 18, + "timeToCompleteMs": 50, + "resultCount": 1897, + "peakRamMB": 0.96466064453125, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-85-summary", + "text": "summary", + "class": "substring", + "timeToFirstResultMs": 16, + "timeToCompleteMs": 44, + "resultCount": 1860, + "peakRamMB": 0.9652633666992188, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-86-config", + "text": "config", + "class": "substring", + "timeToFirstResultMs": 16, + "timeToCompleteMs": 40, + "resultCount": 1930, + "peakRamMB": 0, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-87-report", + "text": "report", + "class": "substring", + "timeToFirstResultMs": 19, + "timeToCompleteMs": 52, + "resultCount": 1897, + "peakRamMB": 1.0194625854492188, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-88-summary", + "text": "summary", + "class": "substring", + "timeToFirstResultMs": 14, + "timeToCompleteMs": 30, + "resultCount": 1860, + "peakRamMB": 0.9267654418945312, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-89-config", + "text": "config", + "class": "substring", + "timeToFirstResultMs": 8, + "timeToCompleteMs": 27, + "resultCount": 1930, + "peakRamMB": 0.9958038330078125, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-90-report", + "text": "report", + "class": "substring", + "timeToFirstResultMs": 7, + "timeToCompleteMs": 28, + "resultCount": 1897, + "peakRamMB": 0.967132568359375, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-91-summary", + "text": "summary", + "class": "substring", + "timeToFirstResultMs": 11, + "timeToCompleteMs": 26, + "resultCount": 1860, + "peakRamMB": 0.9347610473632812, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-92-config", + "text": "config", + "class": "substring", + "timeToFirstResultMs": 8, + "timeToCompleteMs": 26, + "resultCount": 1930, + "peakRamMB": 0.9729690551757812, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-93-report", + "text": "report", + "class": "substring", + "timeToFirstResultMs": 8, + "timeToCompleteMs": 28, + "resultCount": 1897, + "peakRamMB": 0.97283935546875, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-94-summary", + "text": "summary", + "class": "substring", + "timeToFirstResultMs": 7, + "timeToCompleteMs": 29, + "resultCount": 1860, + "peakRamMB": 0, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-95-config", + "text": "config", + "class": "substring", + "timeToFirstResultMs": 8, + "timeToCompleteMs": 27, + "resultCount": 1930, + "peakRamMB": 1.01031494140625, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-96-report", + "text": "report", + "class": "substring", + "timeToFirstResultMs": 14, + "timeToCompleteMs": 41, + "resultCount": 1897, + "peakRamMB": 0.9743194580078125, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-97-summary", + "text": "summary", + "class": "substring", + "timeToFirstResultMs": 13, + "timeToCompleteMs": 43, + "resultCount": 1860, + "peakRamMB": 0.9581451416015625, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-98-config", + "text": "config", + "class": "substring", + "timeToFirstResultMs": 16, + "timeToCompleteMs": 43, + "resultCount": 1930, + "peakRamMB": 1.018463134765625, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-99-report", + "text": "report", + "class": "substring", + "timeToFirstResultMs": 10, + "timeToCompleteMs": 28, + "resultCount": 1897, + "peakRamMB": 0.99676513671875, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-100-summary", + "text": "summary", + "class": "substring", + "timeToFirstResultMs": 11, + "timeToCompleteMs": 31, + "resultCount": 1860, + "peakRamMB": 0.9658660888671875, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-101-config", + "text": "config", + "class": "substring", + "timeToFirstResultMs": 9, + "timeToCompleteMs": 31, + "resultCount": 1930, + "peakRamMB": 0.9508132934570312, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-102-report", + "text": "report", + "class": "substring", + "timeToFirstResultMs": 9, + "timeToCompleteMs": 28, + "resultCount": 1897, + "peakRamMB": 0, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-103-summary", + "text": "summary", + "class": "substring", + "timeToFirstResultMs": 9, + "timeToCompleteMs": 30, + "resultCount": 1860, + "peakRamMB": 0.9352035522460938, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-104-config", + "text": "config", + "class": "substring", + "timeToFirstResultMs": 31, + "timeToCompleteMs": 45, + "resultCount": 1930, + "peakRamMB": 1.0111770629882812, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-105-report", + "text": "report", + "class": "substring", + "timeToFirstResultMs": 8, + "timeToCompleteMs": 23, + "resultCount": 1897, + "peakRamMB": 0.9881439208984375, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-106-summary", + "text": "summary", + "class": "substring", + "timeToFirstResultMs": 9, + "timeToCompleteMs": 30, + "resultCount": 1860, + "peakRamMB": 0.9792861938476562, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-107-config", + "text": "config", + "class": "substring", + "timeToFirstResultMs": 9, + "timeToCompleteMs": 28, + "resultCount": 1930, + "peakRamMB": 1.0189666748046875, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-108-report", + "text": "report", + "class": "substring", + "timeToFirstResultMs": 9, + "timeToCompleteMs": 30, + "resultCount": 1897, + "peakRamMB": 0.9961776733398438, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-109-summary", + "text": "summary", + "class": "substring", + "timeToFirstResultMs": 8, + "timeToCompleteMs": 30, + "resultCount": 1860, + "peakRamMB": 0.9650192260742188, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-110-config", + "text": "config", + "class": "substring", + "timeToFirstResultMs": 16, + "timeToCompleteMs": 47, + "resultCount": 1930, + "peakRamMB": 0, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-111-report", + "text": "report", + "class": "substring", + "timeToFirstResultMs": 16, + "timeToCompleteMs": 43, + "resultCount": 1897, + "peakRamMB": 0.9813613891601562, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-112-summary", + "text": "summary", + "class": "substring", + "timeToFirstResultMs": 16, + "timeToCompleteMs": 48, + "resultCount": 1860, + "peakRamMB": 0.9491348266601562, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-113-config", + "text": "config", + "class": "substring", + "timeToFirstResultMs": 17, + "timeToCompleteMs": 44, + "resultCount": 1930, + "peakRamMB": 0.9640350341796875, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-114-report", + "text": "report", + "class": "substring", + "timeToFirstResultMs": 14, + "timeToCompleteMs": 36, + "resultCount": 1897, + "peakRamMB": 0.9420928955078125, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-115-summary", + "text": "summary", + "class": "substring", + "timeToFirstResultMs": 15, + "timeToCompleteMs": 43, + "resultCount": 1860, + "peakRamMB": 0.9500808715820312, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-116-config", + "text": "config", + "class": "substring", + "timeToFirstResultMs": 17, + "timeToCompleteMs": 46, + "resultCount": 1930, + "peakRamMB": 0.9499969482421875, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-117-report", + "text": "report", + "class": "substring", + "timeToFirstResultMs": 17, + "timeToCompleteMs": 50, + "resultCount": 1897, + "peakRamMB": 0.9663925170898438, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-118-summary", + "text": "summary", + "class": "substring", + "timeToFirstResultMs": 16, + "timeToCompleteMs": 44, + "resultCount": 1860, + "peakRamMB": 0.9189224243164062, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-119-config", + "text": "config", + "class": "substring", + "timeToFirstResultMs": 17, + "timeToCompleteMs": 44, + "resultCount": 1930, + "peakRamMB": 0, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-120-report", + "text": "report", + "class": "substring", + "timeToFirstResultMs": 15, + "timeToCompleteMs": 40, + "resultCount": 1897, + "peakRamMB": 0.9506607055664062, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-121-summary", + "text": "summary", + "class": "substring", + "timeToFirstResultMs": 13, + "timeToCompleteMs": 32, + "resultCount": 1860, + "peakRamMB": 0.96551513671875, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-122-config", + "text": "config", + "class": "substring", + "timeToFirstResultMs": 15, + "timeToCompleteMs": 36, + "resultCount": 1930, + "peakRamMB": 0.9821701049804688, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-123-report", + "text": "report", + "class": "substring", + "timeToFirstResultMs": 16, + "timeToCompleteMs": 41, + "resultCount": 1897, + "peakRamMB": 0.9954605102539062, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-124-summary", + "text": "summary", + "class": "substring", + "timeToFirstResultMs": 16, + "timeToCompleteMs": 45, + "resultCount": 1860, + "peakRamMB": 0.933685302734375, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-125-config", + "text": "config", + "class": "substring", + "timeToFirstResultMs": 11, + "timeToCompleteMs": 40, + "resultCount": 1930, + "peakRamMB": 1.0044479370117188, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-126-report", + "text": "report", + "class": "substring", + "timeToFirstResultMs": 12, + "timeToCompleteMs": 38, + "resultCount": 1897, + "peakRamMB": 0.940948486328125, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-127-summary", + "text": "summary", + "class": "substring", + "timeToFirstResultMs": 16, + "timeToCompleteMs": 43, + "resultCount": 1860, + "peakRamMB": 0, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-128-config", + "text": "config", + "class": "substring", + "timeToFirstResultMs": 19, + "timeToCompleteMs": 45, + "resultCount": 1930, + "peakRamMB": 0.9737014770507812, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-129-report", + "text": "report", + "class": "substring", + "timeToFirstResultMs": 15, + "timeToCompleteMs": 35, + "resultCount": 1897, + "peakRamMB": 0.949310302734375, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-130-summary", + "text": "summary", + "class": "substring", + "timeToFirstResultMs": 16, + "timeToCompleteMs": 38, + "resultCount": 1860, + "peakRamMB": 0.9207382202148438, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-131-config", + "text": "config", + "class": "substring", + "timeToFirstResultMs": 15, + "timeToCompleteMs": 41, + "resultCount": 1930, + "peakRamMB": 0.957366943359375, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-132-report", + "text": "report", + "class": "substring", + "timeToFirstResultMs": 25, + "timeToCompleteMs": 52, + "resultCount": 1897, + "peakRamMB": 0.9280319213867188, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-133-summary", + "text": "summary", + "class": "substring", + "timeToFirstResultMs": 14, + "timeToCompleteMs": 39, + "resultCount": 1860, + "peakRamMB": 0.949920654296875, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-134-config", + "text": "config", + "class": "substring", + "timeToFirstResultMs": 18, + "timeToCompleteMs": 49, + "resultCount": 1930, + "peakRamMB": 0.9803543090820312, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-135-report", + "text": "report", + "class": "substring", + "timeToFirstResultMs": 13, + "timeToCompleteMs": 32, + "resultCount": 1897, + "peakRamMB": 0, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-136-summary", + "text": "summary", + "class": "substring", + "timeToFirstResultMs": 13, + "timeToCompleteMs": 40, + "resultCount": 1860, + "peakRamMB": 0.9429092407226562, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-137-config", + "text": "config", + "class": "substring", + "timeToFirstResultMs": 15, + "timeToCompleteMs": 41, + "resultCount": 1930, + "peakRamMB": 0.996551513671875, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-138-report", + "text": "report", + "class": "substring", + "timeToFirstResultMs": 14, + "timeToCompleteMs": 33, + "resultCount": 1897, + "peakRamMB": 0.9495849609375, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-139-summary", + "text": "summary", + "class": "substring", + "timeToFirstResultMs": 17, + "timeToCompleteMs": 39, + "resultCount": 1860, + "peakRamMB": 0.9554977416992188, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-140-config", + "text": "config", + "class": "substring", + "timeToFirstResultMs": 16, + "timeToCompleteMs": 48, + "resultCount": 1930, + "peakRamMB": 0.9576416015625, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-141-report", + "text": "report", + "class": "substring", + "timeToFirstResultMs": 15, + "timeToCompleteMs": 34, + "resultCount": 1897, + "peakRamMB": 0.9812240600585938, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-142-summary", + "text": "summary", + "class": "substring", + "timeToFirstResultMs": 8, + "timeToCompleteMs": 29, + "resultCount": 1860, + "peakRamMB": 0.9424362182617188, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-143-config", + "text": "config", + "class": "substring", + "timeToFirstResultMs": 8, + "timeToCompleteMs": 26, + "resultCount": 1930, + "peakRamMB": 0, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-144-report", + "text": "report", + "class": "substring", + "timeToFirstResultMs": 9, + "timeToCompleteMs": 26, + "resultCount": 1897, + "peakRamMB": 0.9663467407226562, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-145-summary", + "text": "summary", + "class": "substring", + "timeToFirstResultMs": 9, + "timeToCompleteMs": 30, + "resultCount": 1860, + "peakRamMB": 0.9512557983398438, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-146-config", + "text": "config", + "class": "substring", + "timeToFirstResultMs": 7, + "timeToCompleteMs": 29, + "resultCount": 1930, + "peakRamMB": 0.965728759765625, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-147-report", + "text": "report", + "class": "substring", + "timeToFirstResultMs": 8, + "timeToCompleteMs": 24, + "resultCount": 1897, + "peakRamMB": 0.9880828857421875, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-148-summary", + "text": "summary", + "class": "substring", + "timeToFirstResultMs": 8, + "timeToCompleteMs": 25, + "resultCount": 1860, + "peakRamMB": 0.9419784545898438, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-149-config", + "text": "config", + "class": "substring", + "timeToFirstResultMs": 8, + "timeToCompleteMs": 35, + "resultCount": 1930, + "peakRamMB": 0.9664077758789062, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-150-report", + "text": "report", + "class": "substring", + "timeToFirstResultMs": 15, + "timeToCompleteMs": 37, + "resultCount": 1897, + "peakRamMB": 0.9435577392578125, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-151-summary", + "text": "summary", + "class": "substring", + "timeToFirstResultMs": 12, + "timeToCompleteMs": 42, + "resultCount": 1860, + "peakRamMB": 0.9197769165039062, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-152-config", + "text": "config", + "class": "substring", + "timeToFirstResultMs": 15, + "timeToCompleteMs": 41, + "resultCount": 1930, + "peakRamMB": 0, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-153-report", + "text": "report", + "class": "substring", + "timeToFirstResultMs": 16, + "timeToCompleteMs": 44, + "resultCount": 1897, + "peakRamMB": 0.957855224609375, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-154-summary", + "text": "summary", + "class": "substring", + "timeToFirstResultMs": 17, + "timeToCompleteMs": 41, + "resultCount": 1860, + "peakRamMB": 0.9504776000976562, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-155-config", + "text": "config", + "class": "substring", + "timeToFirstResultMs": 15, + "timeToCompleteMs": 36, + "resultCount": 1930, + "peakRamMB": 0.935791015625, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-156-report", + "text": "report", + "class": "substring", + "timeToFirstResultMs": 8, + "timeToCompleteMs": 25, + "resultCount": 1897, + "peakRamMB": 0.9889984130859375, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-157-summary", + "text": "summary", + "class": "substring", + "timeToFirstResultMs": 7, + "timeToCompleteMs": 26, + "resultCount": 1860, + "peakRamMB": 0.98675537109375, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-158-config", + "text": "config", + "class": "substring", + "timeToFirstResultMs": 7, + "timeToCompleteMs": 21, + "resultCount": 1930, + "peakRamMB": 0.9879608154296875, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-159-report", + "text": "report", + "class": "substring", + "timeToFirstResultMs": 8, + "timeToCompleteMs": 24, + "resultCount": 1897, + "peakRamMB": 0.9717636108398438, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-160-summary", + "text": "summary", + "class": "substring", + "timeToFirstResultMs": 10, + "timeToCompleteMs": 25, + "resultCount": 1860, + "peakRamMB": 0, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-161-config", + "text": "config", + "class": "substring", + "timeToFirstResultMs": 10, + "timeToCompleteMs": 28, + "resultCount": 1930, + "peakRamMB": 1.0195236206054688, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-162-report", + "text": "report", + "class": "substring", + "timeToFirstResultMs": 7, + "timeToCompleteMs": 24, + "resultCount": 1897, + "peakRamMB": 0.9481048583984375, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-163-summary", + "text": "summary", + "class": "substring", + "timeToFirstResultMs": 8, + "timeToCompleteMs": 27, + "resultCount": 1860, + "peakRamMB": 0.98895263671875, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-164-config", + "text": "config", + "class": "substring", + "timeToFirstResultMs": 6, + "timeToCompleteMs": 24, + "resultCount": 1930, + "peakRamMB": 0.988128662109375, + "expectedMin": 0, + "expectedMax": 2147483647 + }, + { + "id": "pad-165-report", + "text": "report", + "class": "substring", + "timeToFirstResultMs": 6, + "timeToCompleteMs": 23, + "resultCount": 1897, + "peakRamMB": 0.9811782836914062, + "expectedMin": 0, + "expectedMax": 2147483647 + } + ] + }, + "control": { + "provider": "naive-scan", + "runId": "2026-05-11T23-53-03Z", + "corpus": { + "name": "small", + "files": 50000, + "bytes": 2997105923, + "seed": 42 + }, + "aggregates": { + "ttfrMedianMs": 0, + "ttfrP95Ms": 0, + "ttfrP99Ms": 48, + "totalMedianMs": 44, + "totalP95Ms": 58, + "totalP99Ms": 8329, + "queryCount": 200 + } + }, + "perClassTtfrMedianMs": { + "content": { + "naiveScanMedianMs": 0, + "indexedMedianMs": 2 + }, + "exact": { + "naiveScanMedianMs": 48, + "indexedMedianMs": 2 + }, + "ext+substring": { + "naiveScanMedianMs": 0, + "indexedMedianMs": 2 + }, + "glob": { + "naiveScanMedianMs": 0, + "indexedMedianMs": 0 + }, + "substring": { + "naiveScanMedianMs": 0, + "indexedMedianMs": 13 + } + }, + "gatesSummary": { + "ttfrP99VsNaiveScan": "88ms vs 48ms (183.3%)", + "totalP99VsNaiveScan": "210ms vs 8329ms (2.5%)", + "ttfrMedianVsLegacyCustomCorpus": "11ms vs 2025ms (0.5%) -- see 2026-05-10T05-29-56Z.json", + "verdict": "PASS -- indexed meets all CLAUDE.md acceptance gates" + } +} \ No newline at end of file diff --git a/docs/csharp-search-service.md b/docs/csharp-search-service.md new file mode 100644 index 000000000000..5685362df3c4 --- /dev/null +++ b/docs/csharp-search-service.md @@ -0,0 +1,674 @@ +# C# Search Service — Branch Documentation + +Branch: `feature/csharp-search-service` + +This document covers the full implementation, architecture, workflow, and +file-level changes introduced by this branch. See `CLAUDE.md` for hard +constraints (latency gates, no-UAC rule, resource ceiling). + +--- + +## Background + +The upstream Files app uses `Windows.Storage.Search` (AQS) for in-folder +search. That stack has a fixed per-query COM startup cost (~1–2 s) before +enumeration even begins, and falls back to a full directory walk when the +corpus is outside the Windows Search index. This branch introduces a sidecar +Windows Service with an in-memory inverted index to hit the CLAUDE.md gate +(≤10 % of legacy latency). + +An earlier PoC built the service in Rust (Tantivy + tonic) on a separate +branch. This branch (`feature/csharp-search-service`) replaces that binary +with a pure C# service while keeping the same gRPC wire format and the +same `ISearchProvider` abstraction — removing the Rust toolchain dependency +and making the codebase fully maintainable by the existing C# team. + +--- + +## High-Level Architecture + +``` +┌──────────────────────────────────────────────────────────────────┐ +│ Files.App (WinUI 3, runs as the logged-in user) │ +│ │ +│ SearchRouter │ +│ ├── UseIndexed() == false → FolderSearch (legacy, upstream) │ +│ └── UseIndexed() == true → IndexedSearchProvider │ +│ │ │ +│ named pipe: \\.\pipe\files-search │ +│ gRPC / HTTP 2 (cleartext, local) │ +└──────────────────────────────────────────────────────────────────┘ + │ +┌─────────────────────────────────────▼────────────────────────────┐ +│ files-search-service.exe (Windows Service, LocalSystem) │ +│ │ +│ SearchGrpcService ──► FileIndex.Search() │ +│ │ +│ ┌─────────────────────────────────────────────────────────┐ │ +│ │ FileIndex │ │ +│ │ _docs — DocStore (parallel path/name/size/mtime │ │ +│ │ arrays, indexed by doc ID) │ │ +│ │ _index — Dictionary │ │ +│ │ posting lists, sorted, frozen per rebuild │ │ +│ └─────────────────────────────────────────────────────────┘ │ +│ │ +│ IndexBootstrapper ──► UsnJournalReader (initial build) │ +│ ChangeWatcher ──► EventBatcher (live updates) │ +│ ProcessThrottle ──► battery/fullscreen/CPU guard │ +│ IndexPersistence ──► index.bin (restart fast-load) │ +└──────────────────────────────────────────────────────────────────┘ +``` + +--- + +## Component Breakdown + +### `src/Files.SearchService/` — the service + +#### `Program.cs` + +Entry point. Detects its execution context: + +- **`!Environment.UserInteractive`** (started by SCM) → `ServiceBase.Run(new SearchWindowsService())`. +- **Interactive** (dev / console) → `RunAsync(CancellationToken)` directly (Ctrl+C to stop). + +`RunAsync` does, in order: + +1. `ProcessThrottle.ApplyBackgroundPriority()` — `PROCESS_MODE_BACKGROUND_BEGIN`. +2. `ProcessThrottle.StartPolling()` — 2-second timer for battery/fullscreen/CPU. +3. Resolve `FILES_SEARCH_ROOT` (env var → user profile → drive root when running as LocalSystem). +4. `IndexBootstrapper.BootstrapAsync()` — cold-start or reconcile (see below). +5. Start `ChangeWatcher` with an overflow handler that triggers a full rebuild. +6. Start a 5-minute `Timer` that persists the index to disk when dirty. +7. Build and start the Kestrel gRPC server on the named pipe `files-search`. + +Named pipe DACL grants: + +| Principal | Rights | +| ------------------ | ----------- | +| SYSTEM | FullControl | +| Administrators | FullControl | +| AuthenticatedUsers | ReadWrite | + +#### `SearchWindowsService.cs` + +Thin `ServiceBase` shim. `OnStart` launches `Program.RunAsync` on a task; +`OnStop` cancels the token and waits up to 10 s for a clean shutdown. + +Service metadata: + +``` +ServiceName: FilesSearchService +CanStop: true +CanPauseAndContinue: false +``` + +--- + +#### `Index/FileIndex.cs` + +Thread-safe in-memory inverted index. + +**Storage** — two volatile references swapped atomically on rebuild: + +``` +_docs — DocStore (four parallel arrays: paths, filenames, sizes, modified times) +_index — Dictionary (token → sorted posting list) +``` + +**Writes** use `ReaderWriterLockSlim`. Reads snapshot both volatile +references without acquiring the lock — safe because references are +replaced atomically, never mutated in place after publication. + +**ReplaceAll** (full rebuild): + +``` +records → Tokenizer.Tokenize(filename) for each + → Dictionary> accumulated + → sorted int[] frozen into new _index + → _docs replaced +``` + +**Upsert** (incremental): + +``` +Find existing doc for path → RemoveFromIndex (soft delete) +Add new doc id → InsertPosting (binary-search insert maintaining sorted order) +``` + +**Delete**: marks the doc ID as deleted in `DocStore`; posting lists are +cleaned lazily on next `ReplaceAll` to avoid O(n) per-delete work. + +**Search** (lock-free): + +``` +Tokenize(query) +For each token: + posting = _index[token] ← missing token → return [] + hits = hits == null ? posting : Intersect(hits, posting) +Filter by scopePaths (prefix match, OrdinalIgnoreCase) +Score via Scorer.Score → sort descending → return up to maxResults +``` + +Intersection is a standard two-pointer merge on sorted `int[]` arrays — +O(min(|a|, |b|)) per token pair. + +--- + +#### `Index/Tokenizer.cs` + +Splits filenames into lowercase tokens: + +1. Split on delimiter characters: ` . _ - ( ) [ ] + = & ,` +2. For each segment, split further on camelCase and digit/letter transitions: + - Upper after lower → boundary (`MyDocument` → `my`, `document`) + - Upper + next-lower after length > 1 → acronym end (`HTMLParser` → `html`, `parser`) + - Letter → digit and digit → letter transitions + +Example: `MyDocument_v2Final.docx` → `["my", "document", "v", "2", "final", "docx"]` + +**Known gap:** mid-string substrings are not matched. Searching `phab` will +not find `ALPHABET.md` because `phab` is not a token. This is tracked as a +roadmap item (n-gram field). + +--- + +#### `Index/IndexBootstrapper.cs` + +Handles two startup paths: + +**Cold start** (no `index.bin`): + +``` +UsnJournalReader.Enumerate() → List +FileIndex.ReplaceAll(records) +IndexPersistence.SaveAsync(persistPath, records) +``` + +**Warm start** (existing `index.bin`): + +``` +IndexPersistence.LoadAsync() → persisted records +FileIndex.ReplaceAll(persisted) ← service can answer queries immediately +UsnJournalReader.Enumerate() → fsMap (runs in parallel) +Diff persisted vs fsMap: + new or modified → index.Upsert() + deleted → index.Delete() +Re-persist reconciled state +``` + +The warm-start design lets the service answer queries from the cached index +within milliseconds of startup, even before the reconcile walk finishes. + +--- + +#### `Index/IndexPersistence.cs` + +Binary format (`index.bin`): + +``` +[4 bytes] magic: 0x46534958 ("FSIX") +[4 bytes] version: 1 +[8 bytes] record count +Per record: + [8 bytes] size_bytes (u64) + [8 bytes] modified_unix_ms (FILETIME) + [4 bytes] full_path UTF-8 length + [N bytes] full_path UTF-8 + [4 bytes] file_name UTF-8 length + [N bytes] file_name UTF-8 +``` + +Writes are atomic: temp file written then renamed over the target. +Version mismatch on load triggers a full rebuild (not a crash). + +--- + +#### `Usn/UsnJournalReader.cs` + +Enumerates every file on an NTFS volume using `FSCTL_ENUM_USN_DATA`. + +**USN path** (requires LocalSystem / `SeBackupPrivilege`): + +``` +OpenVolumeHandle(\\.\C:) +ParseMft(): + DeviceIoControl(FSCTL_ENUM_USN_DATA) in 256 KB chunks + → dirs : Dictionary + → files : List +Parallel.ForEach(files): + ResolvePath() — walk parent-FRN chain up to rootFrn + → FileEntry(fullPath, fileName, size=0, timestamp) +``` + +Path resolution walks the `dirs` dictionary up the FRN parent chain, +limited to 64 hops as a cycle guard. Files not under `root` are dropped. + +Note: USN records carry size as 0 (it's a metadata-only log); the watcher +fills accurate sizes in on the next file-change event. + +**Fallback path** (dev / non-LocalSystem): `DirectoryInfo.EnumerateFiles` +with `RecurseSubdirectories=true`, `AttributesToSkip=ReparsePoint`. + +--- + +#### `Watch/ChangeWatcher.cs` + `Watch/EventBatcher.cs` + +`ChangeWatcher` wraps `FileSystemWatcher` (which uses `ReadDirectoryChangesW` +on Windows). Events are forwarded to `EventBatcher`. + +`EventBatcher` coalesces bursts via a 250 ms debounce: + +``` +Enqueue(change): + _pending[path] = change ← last event wins (delete after create = delete) + reset 250 ms timer + +Flush() (on timer): + batch = _pending.Values + _pending.Clear() + ApplyBatch(batch) +``` + +`ApplyBatch` stats each upsert path (`FileInfo`) and calls +`FileIndex.Upsert` or `FileIndex.Delete`. Reparse points and directories +are skipped. IOExceptions (race between event and file deletion) are +swallowed. + +**Overflow**: if `ReadDirectoryChangesW`'s internal kernel buffer overflows +(burst too large), `ChangeWatcher.Overflow` fires. `Program.RunAsync` +handles this by stopping the watcher, running a full `BootstrapAsync`, then +restarting — no events are permanently lost. + +--- + +#### `Throttle/ProcessThrottle.cs` + +Sets `PROCESS_MODE_BACKGROUND_BEGIN` once at startup, lowering the +process's CPU and I/O scheduling priority below normal. + +Polls every 2 seconds for three conditions: + +| Condition | Win32 API | Threshold | +| ---------- | -------------------------------- | --------------------- | +| On battery | `GetSystemPowerStatus` | `ACLineStatus == 0` | +| Fullscreen | `SHQueryUserNotificationState` | states 3 or 4 | +| CPU high | `GetSystemTimes` delta | > 70 % | + +`ShouldPause()` returns a `volatile bool`. The watcher's commit loop +(EventBatcher flush → FileIndex.Upsert) skips the index-publish step +while paused — events are still enqueued, just not committed to the index +until conditions improve. + +--- + +#### `Grpc/SearchGrpcService.cs` + +Implements the generated `FilesSearch.FilesSearchBase`: + +- **`Health`** — returns version, `DocCount`, and `IsIndexing` flag. +- **`Search`** — calls `FileIndex.Search(query, maxResults, scopePaths)`, + streams each `QueryHit` back as a `SearchHit` proto message. + Checks cancellation between messages. + +--- + +#### `proto/files_search.proto` + +Single source of truth for the wire format, shared between the C# service +and `Files.IndexedSearch.Client` (Grpc.Tools generates stubs from this file). + +```protobuf +service FilesSearch { + rpc Health(HealthRequest) returns (HealthResponse); + rpc Search(SearchRequest) returns (stream SearchHit); +} +``` + +`SearchRequest` carries `query`, `max_results`, and a repeated +`scope_paths` field (full directory paths the results must be prefixed by). + +--- + +### `src/Files.IndexedSearch.Client/` — the C# client + +`IndexedSearchProvider` implements `ISearchProvider` over the named pipe. + +**Channel construction** (lazy, reused for provider lifetime): + +``` +FILES_SEARCH_SERVICE_URL set? → GrpcChannel.ForAddress(url) [TCP, dev/CI] +Otherwise: + SocketsHttpHandler { ConnectCallback = NamedPipeClientStream("files-search") } + GrpcChannel.ForAddress("http://localhost", handler) [named pipe] +``` + +The dummy `http://localhost` URI satisfies gRPC's URI requirement; the +transport is actually the named pipe. + +**`SearchAsync`**: builds a `SearchRequest`, opens a server-streaming call, +yields each `SearchHit` as a `SearchResult` via `IAsyncEnumerable`. + +**`GetHealthAsync`**: catches `RpcException` and returns +`IsAvailable=false` — the routing layer never needs try/catch. + +--- + +### `src/Files.App/` — app-side changes + +#### `Utils/Storage/Search/SearchRouter.cs` + +Drop-in replacement for `FolderSearch`. Routing logic: + +``` +UseIndexed(): + 1. settings.GeneralSettingsService.UseIndexedSearch OR + env FILES_SEARCH_PROVIDER=Indexed → enabled + 2. query is null or empty → legacy + 3. query contains * or ? → legacy (glob) + 4. query starts with $ → legacy (AQS prefix) + 5. query contains : → legacy (AQS field) + 6. folder is null, "Home", or a library → legacy + → indexed + +SearchIndexedAsync(): + GetHealthAsync() → if unavailable, fall back to legacy + FileIndex.Search() → stream results + Fire SearchTick at 32 results, then every 300 + ToListedItem(): + No StorageFile.GetFileFromPathAsync round-trip + Creation time = ModifiedUtc (v0 fidelity trade-off) +``` + +#### `Helpers/Application/SearchServiceManager.cs` + +Called fire-and-forget from `AppLifecycleHelper` at startup. + +``` +IsPackaged()? + true → ServiceController("FilesSearchService").Start() if stopped + false → RegisterStartup(HKCU\Run) + LaunchIfNotRunning(files-search-service.exe) +``` + +Dev mode locates the exe via `AppContext.BaseDirectory`; packaged mode via +`Package.Current.InstalledLocation`. + +#### `Package.appxmanifest` + +```xml + + + +``` + +SCM installs and auto-starts the service at package install time (already +elevated). No UAC prompt at runtime, ever. + +#### Settings UI (`AdvancedPage.xaml`, `AdvancedViewModel.cs`, `GeneralSettingsService.cs`) + +New `UseIndexedSearch` boolean setting, surfaced as a `ToggleSwitch` in +**Settings → Advanced** with strings `SettingsUseIndexedSearch` / +`SettingsUseIndexedSearchDescription`. The setting persists via the +existing `IGeneralSettingsService` store and is read by `SearchRouter.UseIndexed()`. + +--- + +## Startup Sequence + +``` +Windows login + │ + ├─ SCM reads MSIX manifest + │ └─ auto-starts FilesSearchService as LocalSystem + │ + └─ Files.App starts (user session) + │ + ├─ AppLifecycleHelper.InitializeAsync() + │ └─ Task.Run(SearchServiceManager.EnsureRunning) + │ └─ (packaged) ServiceController.Start() if stopped + │ + └─ User types in search box + │ + └─ SearchRouter.SearchAsync() + ├─ UseIndexed() == false → FolderSearch (legacy) + └─ UseIndexed() == true + └─ IndexedSearchProvider.GetHealthAsync() + ├─ unavailable → FolderSearch fallback + └─ available + └─ stream results from FileIndex +``` + +--- + +## Service Startup Sequence + +``` +Program.RunAsync() + │ + ├─ ProcessThrottle.ApplyBackgroundPriority() + ├─ ProcessThrottle.StartPolling() + │ + ├─ ResolveRoot() (FILES_SEARCH_ROOT → %USERPROFILE% → C:\) + │ + ├─ IndexBootstrapper.BootstrapAsync() + │ ├─ index.bin exists? + │ │ yes → LoadAsync() → ReplaceAll() [queries live immediately] + │ │ └─ UsnJournalReader.Enumerate() → diff → upsert/delete + │ └─ no → UsnJournalReader.Enumerate() → ReplaceAll() → SaveAsync() + │ + ├─ ChangeWatcher.Start() + │ └─ FileSystemWatcher (ReadDirectoryChangesW) + │ └─ EventBatcher (250 ms debounce) + │ └─ FileIndex.Upsert / Delete + │ + ├─ periodic save Timer (every 5 min, when dirty) + │ + └─ Kestrel gRPC server + └─ named pipe: \\.\pipe\files-search + └─ SearchGrpcService +``` + +--- + +## Query Routing Decision Tree + +``` +User types query "report" + │ + ▼ +SearchRouter.UseIndexed() + │ + enabled? ──No──► FolderSearch (legacy AQS) + │ + Yes + │ + query empty? ──Yes──► legacy + │ + glob chars (* ?)? ──Yes──► legacy + │ + AQS prefix ($)? ──Yes──► legacy + │ + AQS field (:)? ──Yes──► legacy + │ + real on-disk folder? ──No──► legacy + │ + Yes + │ + GetHealthAsync() ──unavailable──► legacy fallback + │ + available + │ + ▼ + FileIndex.Search("report", maxResults, [folder]) + │ + Tokenize("report") → ["report"] + │ + posting = _index["report"] (e.g. 1 847 doc IDs) + │ + filter by scope prefix + │ + score → sort → stream to UI +``` + +--- + +## Data Flow: Inverted Index Build + +``` +UsnJournalReader + │ + │ FSCTL_ENUM_USN_DATA (256 KB chunks) + │ → USN_RECORD_V2 for every MFT entry + │ → dirs dict (FRN → parent FRN + name) + │ → files list (FRN, parent FRN, name, timestamp) + │ + │ Parallel.ForEach(files): + │ ResolvePath(parentFrn, fileName, rootFrn) + │ → walk parent-FRN chain → full path + │ + ▼ +List(fullPath, fileName, sizeBytes=0, modifiedUtc) + │ + ▼ +FileIndex.ReplaceAll() + │ + │ for each record: + │ DocStore.Add(path, name, size, mtime) → docId + │ Tokenizer.Tokenize(name) → tokens + │ for each token: index[token].Add(docId) + │ + │ Convert List → sorted int[] (posting lists) + │ + ▼ +_index : Dictionary ~volatile snapshot +_docs : DocStore ~volatile snapshot +``` + +--- + +## Project Layout Changes + +``` +Files.slnx + └─ added: src/Files.SearchService/ + src/Files.SearchAbstraction/ (ISearchProvider interface) + src/Files.LegacySearch/ (AQS wrapper, frozen) + src/Files.IndexedSearch.Client/ + tests/Files.Search.Bench/ + tests/Files.Search.Correctness/ + +New files (untracked or new): + src/Files.SearchService/ ← the service (new project) + src/Files.App/Helpers/Application/SearchServiceManager.cs + src/Files.App/files-search-service.exe (build output, dev mode) + tests/Files.Search.Correctness/ ← correctness harness scaffold + run-bench.ps1 ← one-shot build + bench + gate check + .smoke/ ← smoke test artifacts +``` + +--- + +## Files Changed (branch diff vs. `main`) + +| File | Change | +| ------------------------------------------------------------- | ------------------------------------------------------------- | +| `CLAUDE.md` | Added C# service architecture, updated workflow | +| `Directory.Packages.props` | Pinned Grpc, Grpc.AspNetCore, Grpc.Tools versions | +| `Files.slnx` | Added four new projects | +| `docs/search-roadmap.md` | Current C# service status snapshot | +| `src/Files.App/Data/Contracts/IGeneralSettingsService.cs` | Added `UseIndexedSearch` property | +| `src/Files.App/Services/Settings/GeneralSettingsService.cs` | Implemented `UseIndexedSearch` | +| `src/Files.App/Strings/en-US/Resources.resw` | Added two string resources for settings UI | +| `src/Files.App/Views/Settings/AdvancedPage.xaml` | Added indexed search toggle card | +| `src/Files.App/ViewModels/Settings/AdvancedViewModel.cs` | Added `UseIndexedSearch` VM property | +| `src/Files.App/Utils/Storage/Search/SearchRouter.cs` | New: routing logic, health probe, indexed path | +| `src/Files.App/Helpers/Application/AppLifecycleHelper.cs` | Fire-and-forget `SearchServiceManager.EnsureRunning` | +| `src/Files.App/Package.appxmanifest` | `desktop6:Service` declaration | +| `src/Files.App/Files.App.csproj` | Project references +`files-search-service.exe` content item | +| `src/Files.IndexedSearch.Client/IndexedSearchProvider.cs` | Named-pipe channel,`IAsyncEnumerable` streaming | + +New projects (untracked in git diff, shown as `??`): + +| Path | Purpose | +| ----------------------------------- | ---------------------------------------------- | +| `src/Files.SearchService/` | The Windows Service (C#) | +| `tests/Files.Search.Correctness/` | Correctness harness scaffold | +| `run-bench.ps1` | Build + start service + run bench + gate check | + +--- + +## Benchmark Results (small corpus, 50 k files) + +All runs against `.bench/small/` (50 k files, ~2.8 GB, seed=42). + +| Date | Provider | TTFR p50 | TTFR p99 | Total p50 | Total p99 | +| ---------- | ---------------------- | -------- | -------- | --------- | --------- | +| 2026-05-10 | legacy AQS (5 k files) | 2025 ms | — | 2380 ms | — | +| 2026-05-10 | indexed (5 k files) | 3 ms | — | 4 ms | — | +| 2026-05-11 | indexed (50 k) | 11 ms | 174 ms | 38 ms | 189 ms | +| 2026-05-12 | naive-scan (50 k) | ~0 ms* | 48 ms | 44 ms | 8329 ms | +| 2026-05-12 | indexed (50 k) | 11 ms | 88 ms | 40 ms | 210 ms | + +\* naive-scan TTFR≈0 ms is misleading: substring queries match the first file +in directory order immediately; indexed has an 11 ms gRPC named-pipe floor. + +**Gate results** (CLAUDE.md, vs. legacy AQS baseline): + +| Gate | Target | Result | +| ------------------------ | ------ | --------------------------- | +| TTFR median vs. legacy | ≤10 % | 0.5 % (11 ms / 2025 ms) ✓ | +| Total p99 vs. naive-scan | — | 2.5 % (210 ms / 8329 ms) ✓ | + +Pinned baseline: `bench-results/baseline.json` (2026-05-12). + +--- + +## Known Gaps / Roadmap + +| Gap | Status | +| ------------------------------------------------------ | ------------------------------------------------------------------------- | +| Mid-string substring (e.g.`phab` → `ALPHABET.md`) | Not implemented; needs n-gram field | +| Glob queries (`*.txt`, `report*`) | Fall back to legacy via `SearchRouter` | +| Content search | Not implemented (v0 ships filename index only) | +| Library and Home scopes | Fall back to legacy (need fan-out logic) | +| Named-pipe ACL smoke test | Deferred until packaged build can be tested end-to-end | +| Index location under packaged identity | To be confirmed via packaged smoke test | +| Offline change reconcile | Covered by `IndexBootstrapper.LoadAndReconcileAsync` on service restart | + +--- + +## Running Locally (Dev Mode) + +```powershell +# 1. Generate the small corpus (one-time) +dotnet run --project tests\corpora -- --preset small --out .bench\small + +# 2. Full bench: build, start service, run naive-scan + indexed, gate check +.\run-bench.ps1 + +# Optional flags: +# -SkipBuild skip dotnet build (service and bench already built) +# -NoNaiveScan skip the slow naive-scan baseline run +# -Corpus use a different corpus directory + +# Run the service manually (dev console mode): +$env:FILES_SEARCH_ROOT = ".bench\small" +$env:FILES_SEARCH_INDEX_DIR = ".bench\index" +dotnet run --project src\Files.SearchService -c Release +``` + +The service detects that it is not started by SCM (`Environment.UserInteractive == true`) +and runs in console mode. Press Ctrl+C for a clean shutdown with a final +index persist. + +To route Files.App to the indexed provider without the settings UI, set the +environment variable before launching Files: + +```powershell +$env:FILES_SEARCH_PROVIDER = "Indexed" +# then launch Files.App from Visual Studio or msix +``` diff --git a/docs/decisions/0002-rust-service-transport.md b/docs/decisions/0002-rust-service-transport.md deleted file mode 100644 index d72f5afb6a7f..000000000000 --- a/docs/decisions/0002-rust-service-transport.md +++ /dev/null @@ -1,32 +0,0 @@ -# 0002 — Rust service transport: TCP for v0, named pipe later - -## Status -Accepted (2026-05-09). - -## Context -CLAUDE.md commits the search service to gRPC over named pipe. The named pipe -choice is right long-term (no firewall prompts, OS-level ACLs, no port -collisions, can't be reached from off-box), but tonic does not ship a -Windows-named-pipe transport — it requires a custom `Connector` and -`Acceptor` wrapping `tokio::net::windows::named_pipe`, plus matching code in -the C# client. - -The service has nothing to serve yet: no index, no enumerator, no watcher. -Spending the first day getting a non-trivial transport working trades real -progress for plumbing. - -## Decision -Bind to `127.0.0.1:50080` for v0. Swap to `\\.\pipe\files-search` once the -service is doing enough to be worth integration-testing from the C# client -— concretely, when an in-memory filename index returns hits for a hard-coded -corpus. - -## Consequences -- v0 is reachable from any process on the box. Acceptable: no real data is - served yet, and the service will not auto-start until the transport is - hardened. -- The transport swap is local to `main.rs` and the C# client connection - setup; no proto or service-trait changes. -- Revisit before any acceptance-gate benchmark run — TCP loopback adds - measurable per-call overhead vs. named pipe and would skew - time-to-first-result. diff --git a/docs/improvements.md b/docs/improvements.md deleted file mode 100644 index 3cedd9b94fe0..000000000000 --- a/docs/improvements.md +++ /dev/null @@ -1,147 +0,0 @@ -# Search improvements roadmap - -Concrete, scoped follow-ups to the work in `docs/proposal.md`. -Organized by impact, with rough cost estimates so maintainers can see -what each item costs before approving direction. - -This file exists to make it easy to say "yes to A, no to B, defer C" -*before* anyone builds them. - -## Tier 1 — demo-critical UX gaps - -These would land before a real user-facing release. Each closes a gap -where the indexed provider returns surprising results today. - -### Mid-string substring matching -**Cost:** ~3 hours. **Index size:** ~2× current. -Today `"phab"` doesn't match `ALPHABET.md` because Tantivy tokenizes -on word boundaries and we only do prefix queries. Fix: add a parallel -`filename_ngrams` field with a trigram tokenizer; route to it when the -query has no whitespace and the prefix field returns nothing. - -### Underscore-friendly tokenization -**Cost:** ~2 hours. **Index size:** unchanged. -Default tokenizer splits on `_`, so `brand_new.txt` tokenizes to -`["brand", "new", "txt"]` and a query of `brand_new` matches nothing. -Fix: custom tokenizer that keeps `_` as a word character but still -splits on `.` / `-` / whitespace. Or: index the whole filename as a -second field and search both. - -### Glob support (`*.txt`, `report-*-final.docx`) -**Cost:** ~4 hours router-only, ~1 day for native Tantivy regex. -Today the router falls back to legacy on `*` / `?`. Cleaner: detect -glob shape, route to a Tantivy `RegexQuery` over an `extension` field -plus a name predicate. The router-fallback is good enough for v0; -native handling buys consistency. - -### Skip noise paths in the default walk -**Cost:** ~2 hours. **Index size:** -20–40% on typical home dirs. -`%USERPROFILE%` includes `AppData\Local\Temp`, browser caches, -`node_modules`, `.git/objects`, etc. They balloon the index and -pollute results. Add a configurable skip-list with sensible defaults; -honor `.gitignore`-style files at root. - -### Recency boost in scoring -**Cost:** ~3 hours. -BM25 alone doesn't surface "the file you were editing yesterday" -above "a five-year-old file with the same name." Boost via Tantivy's -`BoostQuery` over `modified_unix_ms`, linear decay over the last -~30 days. Makes results feel intuitive without being magic. - -## Tier 2 — robustness before public release - -### Restart-time index reconcile -**Cost:** ~4 hours. -Watcher only catches changes while the service is running. If files -change while it's offline, the index goes stale until next manual -rebuild. Fix: at startup, walk root, diff against indexed paths + -mtimes, apply deltas. Closes the "I deleted this yesterday but it -still shows up" bug. - -### Exact-match scoring tier -**Cost:** ~4 hours. -Currently `"report"` weights `report.txt` and -`quarterly_report_draft_v2.txt` similarly. Add explicit scoring tiers: -exact filename > exact name without extension > prefix > substring > -extension match. Single-field weighting won't get there. - -### Faceted refinement -**Cost:** ~1 day. -Return facet counts (file type, date bucket, size bucket) alongside -results. UI can offer "5,234 results, 1,200 are PDFs" filtering. -Tantivy supports this natively via `FacetCollector`. - -### Service crash + auto-restart -**Cost:** ~3 hours. -The C# `IndexedSearchProvider` already handles transport errors -gracefully. The service-launcher (separate roadmap item) should also -detect a crashed process and respawn it. Lock-file handling for -crash-recovery so Tantivy's `LockBusy` doesn't strand users. - -## Tier 3 — capability expansion - -### Content indexing -**Cost:** ~1 week per format tier. -Add a `content` TEXT field, populate from text-like formats first -(`.txt/.md/.log/.cs/.json/.html` etc.). Office formats need an -extractor (e.g., `dotnet-extract` or a Rust port of Apache Tika -shapes). PDF needs a parser. Each tier expands scope significantly; -start with text-only, gate the rest on user demand. - -### Frequency-of-access boost -**Cost:** ~1 day, plus C# instrumentation. -Track how often the user opens each file (Files.App emits "user opened -path X" events to the service). Boost frequent files in scoring. -Real win, real privacy implication — needs an opt-out. - -### Saved searches + search history -**Cost:** ~3 days, mostly UI. -Persistent saved searches ("project files modified this week"), -quick-recall of recent queries. Lives mostly in Files.App's settings -UI; the service surface stays the same. - -### Fuzzy matching for typos -**Cost:** ~2 hours to enable. -`"repotr"` → `"report"`. Tantivy supports `FuzzyTermQuery` with edit -distance; only enable when no exact / prefix / substring match. Real -performance hit on large corpora; would gate on bench numbers. - -## Tier 4 — long-term, opt-in - -### Semantic / vector search -**Cost:** ~2 weeks. **RAM cost:** 200–500 MB on 100k files. -Sentence-transformer embeddings of filenames + HNSW index. The "find -me files about taxes" use case. Substantial cost; only worth doing -once content indexing is in place. Opt-in feature. - -### "Turbo Mode" for power users -**Cost:** several weeks. **Requires admin.** -Per CLAUDE.md goal #3, default mode never asks for UAC. A future -opt-in mode could use MFT parsing for cold-start orders of magnitude -faster than `FindFirstFileEx`, plus filesystem filter drivers for -zero-latency change detection. Would ship behind a one-time UAC -prompt with a clear explanation. Architecture is friendly to bolting -this on as a third `ISearchProvider` impl. - -### Compression of stored fields -**Cost:** ~1 day. -Paths share long prefixes (`C:\Users\Tommy\Documents\...`). Prefix -coding in Tantivy stored fields could cut index size 30–50%. Trades -read latency for disk; would gate on whichever the bench shows -matters more. - -## What we'd want maintainer input on - -Roughly in order of how much it changes our plan: - -1. **Tier 3 content indexing** — yes/no, and which format tiers. - Privacy-adjacent (we'd be reading file contents into an index); - could be opt-in or opt-out. -2. **Tier 4 semantic search** — yes / no / opt-in. Adds a - meaningful RAM and disk cost. -3. **Tier 4 Turbo Mode** — would you ever accept an admin-mode - opt-in, or is no-admin a hard line? -4. **Tier 3 frequency boost** — privacy implications of tracking - file access; needs a settings toggle minimum. -5. **Tier 1 / Tier 2** — assumed yes pending direction. These close - bugs, not introduce features. diff --git a/docs/proposal.md b/docs/proposal.md deleted file mode 100644 index d2b9cf73e026..000000000000 --- a/docs/proposal.md +++ /dev/null @@ -1,140 +0,0 @@ -# Proposal: faster, AI-augmented search via a sidecar Rust indexer - -**Status:** Working PoC in this fork. Seeking maintainer feedback before -investing further or proposing PRs upstream. - -## TL;DR - -Search in Files today calls `Windows.Storage.Search`, which is fast on -paths in the Windows Search Indexer's catalog (`%USERPROFILE%`, -libraries, a few defaults) and **O(N) per query** on every other path -(temp dirs, `D:\projects`, anywhere outside the catalog). For users -searching outside their home dir, every keystroke walks the filesystem. - -This fork adds a separate Rust process (`files-search-service.exe`) -that maintains its own Tantivy filename index, with a -`ReadDirectoryChangesW` watcher for live updates and process throttling -so it stays out of the user's way. Files.App talks to it over gRPC. A -new `ISearchProvider` interface routes between the new path and the -existing `FolderSearch` so the change is opt-in and reversible. - -On a 5,000-file corpus, the indexed provider is **300×–1100× faster** -than the Windows.Storage.Search fallback path, and the architecture -projects to even larger gaps as N grows (see ADR 0003 for the Big O -analysis). - -## What's in this fork - -- `src/search-service/` — Rust, tonic gRPC server. Tantivy filename - index, `FindFirstFileExW` enumerator (rayon-parallel), `notify`-based - watcher, throttling via `PROCESS_MODE_BACKGROUND_BEGIN` plus pause - on battery / fullscreen / high CPU. -- `src/Files.SearchAbstraction/` — `ISearchProvider` interface + - DTOs. Plain `net10.0`, no Windows deps. Single seam for any backend. -- `src/Files.LegacySearch/` — `LegacySearchProvider` wraps the - existing `Windows.Storage.Search` / AQS path through `ISearchProvider`. - Frozen reference per CLAUDE.md. -- `src/Files.IndexedSearch.Client/` — gRPC client (over TCP for v0). - Stubs generated from the same `.proto` the Rust service compiles, so - the wire format has a single source of truth. -- `src/Files.App/Utils/Storage/Search/SearchRouter.cs` — drop-in - replacement for `FolderSearch`. Routes to indexed when the env var - `FILES_SEARCH_PROVIDER=Indexed` is set AND the query is in scope. - Falls back to legacy on glob, AQS, library scopes, or service-down. - **Default behavior is byte-identical to upstream.** -- `tests/Files.Search.Bench/` — 200-query bench harness with JSON - output. Drives both providers head-to-head against deterministic - corpora generated by `tests/corpora/`. -- 12 Rust integration tests (enumerator, persistence, watcher, throttle). -- Three ADRs in `docs/decisions/` capturing the key technical choices. - -## Bench numbers (5k smoke corpus) - -| Class | Legacy hits | Indexed hits | Legacy p50 | Indexed p50 | Speedup | -|------------------|------------:|-------------:|-----------:|------------:|--------:| -| substring | 175.5 | 177.2 | 2380 ms | 4 ms | 595× | -| glob | 311.8 | 0.0 | 3363 ms | 3 ms | 1121× | -| exact | 0.0 | 0.0 | 1120 ms | 3 ms | 373× | -| ext+substring | 0.0 | 0.0 | 1095 ms | 3 ms | 365× | -| content | 0.0 | 0.0 | 1084 ms | 3 ms | 361× | - -Indexed beats the proposed gate (≤10% of legacy on `medium`) by three -orders of magnitude on every class it answers. See ADR 0003 for why we -project to `medium`/`large` analytically rather than running them -empirically (legacy on a non-Indexer-tracked corpus is -`O(files × queries)`; a 500k-file run takes ~13 hours). - -The honest gap: indexed returns 0 hits for `glob` queries today -(Tantivy doesn't do globs). The router falls back to legacy on `*`/`?`, -so users see the slower-but-correct result. `docs/improvements.md` -lists how this and other gaps would be closed. - -## Design constraints we held - -These are documented in `CLAUDE.md` at the repo root: - -1. **Faster.** Query latency ≤10% of Windows Search on equivalent - corpora. -2. **No heavier.** RAM/disk/idle CPU ≤ Files + Windows Search Indexer. -3. **No user burden.** No UAC, no admin features, no new mandatory UI. - Existing search bar only. - -Constraint 3 disqualifies MFT-based indexing (would need admin) — a -choice we're explicit about and would revisit only as an opt-in -"Turbo Mode" for power users. - -## What's intentionally not done yet - -- Content indexing (text in files), semantic / vector search. -- Named-pipe transport (TCP loopback for v0; pipe is roadmap step - after maintainer sign-off so we don't sink work into glue if the - direction is rejected). -- Service auto-launcher inside Files.App (currently the user starts - the exe manually; trivial to add but waiting on direction). -- Fully wired into `Files.App` settings UI (toggle is env-var only). -- Migration to a shipping default. Per CLAUDE.md the default stays - Legacy until benchmarks pass; we don't propose changing that. - -## Why we're asking before building more - -Each of the items above is bounded but real work. Before we sink more -time into them, we want to know: - -1. **Is this direction interesting to you at all?** A sidecar Rust - process inside a C# app is a non-trivial architectural commitment - for the project to take on, even if the code lands cleanly. -2. **What would block it?** Concerns we expect: Rust toolchain in CI, - binary distribution / signing, maintenance burden if the original - contributor disappears, telemetry / privacy implications of a - persistent on-disk index. -3. **What would the merge path look like?** We can split this into - ~4 small PRs (interface only → bench harness → indexed client → - router + UI) so each can be reviewed and merged independently. - Or you may prefer it stay a fork. - -## Where to look in the code - -- Architecture sketch: `CLAUDE.md` -- Roadmap and current state: `docs/search-roadmap.md` -- Future improvements (the question we'd want your read on next): - `docs/improvements.md` -- ADRs: `docs/decisions/` -- Rust service entry point: `src/search-service/src/main.rs` -- C# entry point: `src/Files.App/Utils/Storage/Search/SearchRouter.cs` - -## Trying it locally - -```powershell -# 1. Build solution in VS 2026 (needs v145 toolset; see project_build_env memory). -# 2. Build the Rust service: -cargo build --release --manifest-path src/search-service/Cargo.toml -# 3. Set env vars and start the service: -$env:FILES_SEARCH_PROVIDER = "Indexed" -$env:FILES_SEARCH_ROOT = "$env:USERPROFILE" -src/search-service/target/release/files-search-service.exe -# 4. In another shell, launch Files.App from VS. Searches now route to -# the indexed provider for queries the router supports. -``` - -Default users (no env var set) get the existing legacy path with no -behavior change. diff --git a/docs/search-roadmap.md b/docs/search-roadmap.md index 5b8694a123a4..a87984267eff 100644 --- a/docs/search-roadmap.md +++ b/docs/search-roadmap.md @@ -1,158 +1,76 @@ # Search rewrite — roadmap -Status snapshot, kept short. Update inline as state changes; don't append a -log. CLAUDE.md has the architecture; this file is just *where we are*. +Status snapshot for the C# search service on `feature/csharp-search-service`. +`CLAUDE.md` has the constraints; `docs/csharp-search-service.md` has the +full architecture and file map. This file is just *where we are*. ## Done -- ADR 0001 — bench stack chosen. -- ADR 0002 — Rust service transport: TCP for v0, named pipe later. -- ADR 0003 — bench strategy: Big O for the gates, empirical for - constants and regressions. `small` is the canonical baseline; `medium` - and `large` are gated on Windows Search Indexer integration first. -- `tests/corpora/` and `tests/Files.Search.Bench/` scaffolds exist. -- `src/search-service/` skeleton: tonic gRPC server on `127.0.0.1:50080`, - vendored protoc, pinned to Rust 1.95. `FilesSearch` service with - `Health` + streaming `Search` (returns empty stream). -- End-to-end signal: `lib.rs` split, `tests/search_smoke.rs` exercises - Health + Search via a real tonic client over an ephemeral TCP port. -- Tantivy filename index in `src/index.rs`, on-disk persistence at - `%LOCALAPPDATA%\Files\search-index\` (override via - `FILES_SEARCH_INDEX_DIR`). Schema: `path` STRING, `filename` TEXT, - `size_bytes` U64, `modified_unix_ms` I64. Per-token prefix queries via - `FuzzyTermQuery::new_prefix(_, 0, _)`; `scope_paths` becomes a - path-prefix filter clause. -- Enumerator in `src/enumerate.rs` — Windows path uses `FindFirstFileExW` - with `FindExInfoBasic` + `FIND_FIRST_EX_LARGE_FETCH`, recursion fanned - out via `rayon::scope`, entries streamed through an `mpsc::Sender` so - the Tantivy writer drains concurrently. Reparse points skipped to - match `WalkDir(follow_links=false)` semantics. `walkdir` fallback - `#[cfg(not(windows))]` keeps the crate portable for dev. -- Watcher in `src/watcher.rs` — wraps the `notify` crate (which uses - `ReadDirectoryChangesW` + overlapped I/O on Windows). `SearchIndex` - now holds its writer behind a `parking_lot::Mutex` so the watcher - can `upsert`/`delete` without recreating it. Commits are debounced - on a 250ms quiet window so bursts (`git checkout`, archive extract) - collapse into a single Tantivy commit. Final commit on shutdown. -- Throttle in `src/throttle.rs` — `apply_background_priority()` calls - `SetPriorityClass(PROCESS_MODE_BACKGROUND_BEGIN)` once at startup - (lowers CPU + I/O priority below normal). `Throttle` polls every 2s - for battery (`GetSystemPowerStatus`), fullscreen - (`SHQueryUserNotificationState`), and CPU load (`GetSystemTimes`, - threshold 70%); the watcher's commit loop skips commits while - `should_pause()` is true so query-visibility (and fsync) defers - until idle. Apply work still happens so events aren't lost. - 12/12 Rust tests green. -- C# `Files.SearchAbstraction` defined: `ISearchProvider` (streaming - `IAsyncEnumerable` + `GetHealthAsync`), `SearchQuery`, - `SearchResult`, `HealthStatus`. `net10.0` (no Windows deps) so any - consumer can reference it. Registered in `Files.slnx` under - `/src/core/`. -- `Files.LegacySearch` — `LegacySearchProvider` implements - `ISearchProvider` over `Windows.Storage.Search` (AQS via - `QueryOptions { FolderDepth.Deep, IndexerOption.UseIndexerWhenAvailable, - SortBy System.Search.Rank desc }`). AQS construction mirrors - upstream's `FolderSearch.AQSQuery` ($/colon/dot-aware wildcard - cases). Builds in batches of 500 via `CreateFileQueryWithOptions`, - yields per file. Cancellation honored at every batch boundary; - per-file stat failures swallowed to match upstream. -- `Files.IndexedSearch.Client` — `IndexedSearchProvider` implements - `ISearchProvider` over gRPC. `Grpc.Tools` generates client stubs - from the *same* `src/search-service/proto/files_search.proto` the - Rust service consumes, so the wire format has a single source of - truth. Single persistent `GrpcChannel` (HTTP/2 multiplexes calls). - TCP `127.0.0.1:50080` default; override via - `FILES_SEARCH_SERVICE_URL`. `GetHealthAsync` translates transport - failure into `IsAvailable=false` so the routing layer doesn't need - try/catch around every probe. -- Bench harness in `tests/Files.Search.Bench/` wired up: existing - scaffold (200-query generator, JSON output, machine info) now sees - three providers — `naive-scan`, `legacy`, `indexed`. Adapter maps - bench `Query` → `SearchQuery(text, [corpusRoot])` so each provider - searches the same tree regardless of its default scope. One warm-up - query per run absorbs JIT / gRPC channel / Tantivy mmap penalties. - Aggregate `Aggregates { ttfrMedianMs, ttfrP95Ms, ttfrP99Ms, - totalMedianMs, totalP95Ms, totalP99Ms }` block added to JSON output - so gates in CLAUDE.md can be diffed against `bench-results/baseline.json` - directly. - -## First bench run, 5k smoke corpus (2026-05-10) - -Calibration run only — `small` will be the canonical baseline (see ADR 0003). - -| Class | Legacy hits | Indexed hits | Legacy p50 | Indexed p50 | Speedup | -|------------------|------------:|-------------:|-----------:|------------:|--------:| -| substring | 175.5 | 177.2 | 2380 ms | 4 ms | 595× | -| glob | 311.8 | 0.0 | 3363 ms | 3 ms | 1121× | -| exact | 0.0 | 0.0 | 1120 ms | 3 ms | 373× | -| ext+substring | 0.0 | 0.0 | 1095 ms | 3 ms | 365× | -| content | 0.0 | 0.0 | 1084 ms | 3 ms | 361× | - -Indexed beats the ≤10% gate by 3 orders of magnitude on every class it -answers. `glob` is the headline correctness gap (Tantivy doesn't do -globs); needs routing-layer policy to fall back to legacy on `*` / `?`. - -Bug shaken out: indexed paths used forward slashes (from -`FILES_SEARCH_ROOT="C:/..."`) while C# scope used backslashes (from -`Path.GetFullPath`); prefix match silently returned 0 hits. Fixed in -`main.rs::normalize_root`. - -- `Files.App` wired to the new search stack. Added project references - to `Files.SearchAbstraction`, `Files.LegacySearch`, - `Files.IndexedSearch.Client`. New `SearchRouter` in - `src/Files.App/Utils/Storage/Search/SearchRouter.cs` is a drop-in - replacement for `FolderSearch` (same `Query`/`Folder`/`MaxItemCount` - properties, same `SearchTick` event, same `SearchAsync(IList, - CancellationToken)` shape). Routing is opt-in via - `FILES_SEARCH_PROVIDER=Indexed` env var; default behavior is - byte-identical to legacy. Indexed path also requires a non-glob, - non-AQS query and a real on-disk folder (not "Home", not a library) - — anything else falls back to legacy. Service-down gracefully falls - back via `IndexedSearchProvider.GetHealthAsync()`. Migrated four - call sites: `ShellViewModel.SearchAsync`, - `NavigationToolbarViewModel`, `BaseShellPage`, `BaseLayoutPage`. - C# compiles clean. +- `Files.SearchAbstraction` — `ISearchProvider`, `SearchQuery`, `SearchResult`, + `HealthStatus` (`net10.0`, no Windows deps). +- `Files.LegacySearch` — `LegacySearchProvider` wraps `Windows.Storage.Search` + (AQS) behind `ISearchProvider`. Frozen reference. +- `Files.IndexedSearch.Client` — gRPC client over named pipe + (`\\.\pipe\files-search`); TCP loopback fallback via + `FILES_SEARCH_SERVICE_URL`. Stubs generated from + `src/Files.SearchService/proto/files_search.proto` (single source of truth). +- `Files.SearchService` — C# Windows Service. In-memory inverted index + (`Dictionary` posting lists, atomic swap on rebuild) + + trigram index for mid-string substrings. `DocStore` parallel arrays. + `IndexBootstrapper` does USN-or-fallback enumeration with warm-start + reconcile against `index.bin`. `ChangeWatcher` + `EventBatcher` 250ms + debounce. `ProcessThrottle` background priority + battery/fullscreen/CPU + polling. Kestrel gRPC on named pipe with DACL granting AuthenticatedUsers RW. +- `Files.App` — `SearchRouter` drop-in for `FolderSearch`. Settings UI toggle + `UseIndexedSearch` in Settings → Advanced. `SearchServiceManager` ensures + the service is running (SCM in packaged mode; HKCU\Run + direct launch + in dev). +- `Package.appxmanifest` — `desktop6:Service`, `StartAccount=localSystem`, + `StartType=auto`. +- Bench harness: `run-bench.ps1` (build → start service → run bench → + gate check). `naive-scan`, `legacy`, `indexed` providers in + `tests/Files.Search.Bench/`. + +## Bench, small corpus (50k files, 2026-05-12) + +`bench-results/baseline.json` — pinned. + +| Provider | TTFR p50 | TTFR p99 | Total p50 | Total p99 | +|--------------|---------:|---------:|----------:|----------:| +| legacy AQS\* | 2025 ms | — | 2380 ms | — | +| indexed | 11 ms | 88 ms | 40 ms | 210 ms | +| naive-scan | ~0 ms | 48 ms | 44 ms | 8329 ms | + +\* Legacy AQS measured on the 5k smoke run; full 50k legacy run deferred per +ADR 0003 (≥80 min wall time on a corpus outside the Windows Search Indexer +catalog tells us nothing new). + +**Gate result:** TTFR median 11 ms / 2025 ms = 0.5% (gate: ≤10%). ✓ ## Next, in order -1. **Service launcher** — small helper that starts - `files-search-service.exe` as a child process when the indexed path - is selected, and stops it on app exit. Currently the user must - start the service manually. -2. **Swap TCP → named pipe (`\\.\pipe\files-search`).** Custom tonic - Connector/Acceptor over `tokio::net::windows::named_pipe`, plus the - matching named-pipe channel in the C# client. -3. **Content + semantic indexes** — Tantivy content fields, then HNSW. - Off the critical path until filename search is shipping. - -Running `medium` / `large` empirically is deferred per ADR 0003 until -the corpus can be added to Windows Search Indexer's catalog. - -## Known gaps - -- Tantivy's default tokenizer + per-token prefix matches whole-word and - prefix queries (`alpha` finds `alpha.txt` and `ALPHABET.md`) but not - mid-string substrings (`phab` does not find `ALPHABET.md`). Revisit - with an n-gram field if the correctness suite demands legacy parity. -- The watcher closes the live-update gap, but changes that happen while - the service is *offline* still leave the index stale until something - triggers a rebuild. Restart-time reconcile (walk root, diff against - index, apply deltas) is not implemented yet. - -## Parallel C# work (no Rust dependency) - -- Define `Files.SearchAbstraction` (`ISearchProvider` + types). Unblocks - both `Files.LegacySearch` and `Files.IndexedSearch.Client`. -- `Files.LegacySearch` — wrap upstream search behind `ISearchProvider`. - Frozen reference per CLAUDE.md. -- Flesh out corpus generators (`tests/corpora/`) and bench harness - (`tests/Files.Search.Bench/`) toward the JSON output schema and the - acceptance-gate metrics in CLAUDE.md. - -## Open questions - -- Named-pipe ACL: default (creator only) is right, but confirm the C# - client running in the packaged app can open it. -- Index location under packaged identity vs. unpackaged dev runs. -- Whether the service is launched on demand by `Files.App` or runs as a - user-scoped scheduled task. Affects cold-start measurement. +See `memory/project_search_pr_punchlist.md` for the full P0/P1/P2 list +before sending to the Files team. Highlights: + +**P0 — blocking PR** +1. Validate packaged SCM path end-to-end (named pipe + LocalSystem). + Dev mode (TCP) works; packaged path never verified on this machine. +2. Commit `tests/Files.Search.Correctness/`, `tests/Files.Search.Bench/`, + and `tests/Files.Search.Probe/` (currently untracked). +**P1 — quality** +4. Index corruption recovery — on `LoadAsync` failure, delete `index.bin` + and fall through to fresh build (currently crashes on bad magic/version). +5. Refresh `_serviceAvailable` cache periodically (60s timer) so + service-came-back transitions are detected. +6. Root-cause the NRE in `BaseLayoutPage.cs:620` (band-aided with + `.Where(x => x is not null)`). +7. Surface service status (running, file count, indexing state, last + update) in Settings UI. + +**P2 — future scope** +- Token prefix matching (so `test` matches `testing` via tokens, not just + trigrams). +- Pagination / cursor for >200 results. +- Memory budget tuning (1.2 GB for 1M files; trigram index dominates). +- Content search foundation prep (filename-only today). +- Library and Home scope fan-out to the indexed provider. diff --git a/run-bench.ps1 b/run-bench.ps1 new file mode 100644 index 000000000000..9b472419ce0d --- /dev/null +++ b/run-bench.ps1 @@ -0,0 +1,196 @@ +# run-bench.ps1 +# Builds, runs, and compares the search service benchmark in one shot. +# Usage: .\run-bench.ps1 [-Corpus ] [-Out ] [-SkipBuild] [-NoNaiveScan] +# +# Prerequisites: dotnet SDK, corpus generated at .bench\small (run files-corpora first). +param( + [string]$Corpus = ".bench\small", + [string]$Out = "bench-results", + [switch]$SkipBuild, + [switch]$NoNaiveScan +) + +$ErrorActionPreference = "Stop" +$root = $PSScriptRoot + +function Step($msg) { Write-Host "`n==> $msg" -ForegroundColor Cyan } +function Ok($msg) { Write-Host " $msg" -ForegroundColor Green } +function Warn($msg) { Write-Host " $msg" -ForegroundColor Yellow } +function Fail($msg) { Write-Host "FAIL: $msg" -ForegroundColor Red; exit 1 } + +# --- 1. Resolve and validate corpus --- +$corpusAbs = if ([System.IO.Path]::IsPathRooted($Corpus)) { $Corpus } else { Join-Path $root $Corpus } +$manifest = Join-Path $corpusAbs "manifest.json" +if (-not (Test-Path $manifest)) { + Fail "manifest.json not found at '$corpusAbs'. Run: dotnet run --project tests\corpora -- --preset small --out $Corpus" +} +$m = Get-Content $manifest | ConvertFrom-Json +Ok "corpus: $($m.preset) ($($m.fileCount.ToString('N0')) files, seed=$($m.seed))" + +$outAbs = if ([System.IO.Path]::IsPathRooted($Out)) { $Out } else { Join-Path $root $Out } +New-Item -ItemType Directory -Force -Path $outAbs | Out-Null + +# --- 2. Build --- +if (-not $SkipBuild) { + Step "Building search service" + $built = $false + $tries = 0 + while (-not $built -and $tries -lt 3) { + $result = & dotnet build "$root\src\Files.SearchService\Files.SearchService.csproj" -c Release 2>&1 + if ($LASTEXITCODE -eq 0) { $built = $true } + else { + $tries++ + if ($tries -lt 3) { Start-Sleep -Seconds 5 } + else { Fail "Service build failed after 3 tries. Kill any running files-search-service.exe and retry, or use -SkipBuild." } + } + } + Ok "service built" + + Step "Building bench" + & dotnet build "$root\tests\Files.Search.Bench\Files.Search.Bench.csproj" -c Release | Out-Null + if ($LASTEXITCODE -ne 0) { Fail "Bench build failed." } + Ok "bench built" +} + +# --- 3. Start service --- +Step "Starting search service (root=$corpusAbs)" + +# Kill any stray service instance that might be holding the pipe. +Get-CimInstance Win32_Process -Filter "Name='dotnet.exe'" | + Where-Object { $_.CommandLine -like "*Files.SearchService*" } | + ForEach-Object { Stop-Process -Id $_.ProcessId -Force -ErrorAction SilentlyContinue } +Start-Sleep -Seconds 1 + +$indexDir = Join-Path $root ".bench\index" +$svcOut = [System.IO.Path]::GetTempFileName() +$svcErr = [System.IO.Path]::GetTempFileName() +$svcProj = "$root\src\Files.SearchService\Files.SearchService.csproj" + +$env:FILES_SEARCH_ROOT = $corpusAbs +$env:FILES_SEARCH_INDEX_DIR = $indexDir + +# Start-Process with file redirection avoids the PS 5.1 event-handler incompatibilities. +$svc = Start-Process -FilePath "dotnet" ` + -ArgumentList "run","--project",$svcProj,"-c","Release","--no-build" ` + -RedirectStandardOutput $svcOut ` + -RedirectStandardError $svcErr ` + -PassThru -NoNewWindow + +# Wait up to 3 minutes for the service to finish bootstrap and start listening. +# "Now listening" is emitted only after BootstrapAsync completes, so it means index is ready. +$deadline = (Get-Date).AddMinutes(3) +$ready = $false +while ((Get-Date) -lt $deadline) { + $log = Get-Content $svcOut -ErrorAction SilentlyContinue + if ($log -like "*Now listening*") { $ready = $true; break } + if ($svc.HasExited) { Fail "Service exited prematurely. See: $svcOut" } + Start-Sleep -Milliseconds 500 +} +if (-not $ready) { $svc.Kill(); Fail "Service did not start within 3 minutes." } +Ok "service ready (PID $($svc.Id)) -- bootstrap complete" + +try { + $runs = @{} + + # --- 4. naive-scan baseline --- + if (-not $NoNaiveScan) { + Step "Running naive-scan (baseline)" + & dotnet run --project "$root\tests\Files.Search.Bench\Files.Search.Bench.csproj" ` + -c Release --no-build -- ` + --corpus $corpusAbs --provider naive-scan --out $outAbs + if ($LASTEXITCODE -ne 0) { Fail "naive-scan bench failed." } + + $scanFile = Get-ChildItem $outAbs -Filter "*.json" | + Where-Object { $_.Name -ne "baseline.json" } | + Sort-Object LastWriteTime -Descending | Select-Object -First 1 + $runs["naive-scan"] = Get-Content $scanFile.FullName | ConvertFrom-Json + Ok "naive-scan done -> $($scanFile.Name)" + } + + # --- 5. indexed --- + Step "Running indexed" + & dotnet run --project "$root\tests\Files.Search.Bench\Files.Search.Bench.csproj" ` + -c Release --no-build -- ` + --corpus $corpusAbs --provider indexed --out $outAbs + if ($LASTEXITCODE -ne 0) { Fail "indexed bench failed." } + + $idxFile = Get-ChildItem $outAbs -Filter "*.json" | + Where-Object { $_.Name -ne "baseline.json" } | + Sort-Object LastWriteTime -Descending | Select-Object -First 1 + $runs["indexed"] = Get-Content $idxFile.FullName | ConvertFrom-Json + Ok "indexed done -> $($idxFile.Name)" + +} finally { + # --- 6. Stop service --- + if (-not $svc.HasExited) { + $svc.Kill() + $svc.WaitForExit(5000) | Out-Null + } + $env:FILES_SEARCH_ROOT = $null + $env:FILES_SEARCH_INDEX_DIR = $null +} + +# --- 7. Print comparison table --- +Write-Host "" +$fileCountStr = $m.fileCount.ToString('N0') +$header = "=== Results: {0} corpus, {1} files ===" -f $m.preset, $fileCountStr +Write-Host $header -ForegroundColor White + +$metrics = @( + @{ Key="ttfrMedianMs"; Label="TTFR median" }, + @{ Key="ttfrP95Ms"; Label="TTFR p95" }, + @{ Key="ttfrP99Ms"; Label="TTFR p99" }, + @{ Key="totalMedianMs"; Label="Total median" }, + @{ Key="totalP95Ms"; Label="Total p95" }, + @{ Key="totalP99Ms"; Label="Total p99" } +) + +$fmt = "{0,-20} {1,14} {2,14} {3,10}" +Write-Host ($fmt -f "metric", "naive-scan", "indexed", "ratio") +Write-Host ("-" * 60) +foreach ($m2 in $metrics) { + $iv = $runs["indexed"].aggregates.($m2.Key) + if ($runs.ContainsKey("naive-scan")) { + $sv = $runs["naive-scan"].aggregates.($m2.Key) + $ratio = if ($sv -gt 0) { "{0:F2}x" -f ($iv / $sv) } else { "n/a" } + Write-Host ($fmt -f $m2.Label, "${sv}ms", "${iv}ms", $ratio) + } else { + Write-Host ($fmt -f $m2.Label, "skipped", "${iv}ms", "-") + } +} + +# --- 8. Gate check vs baseline.json --- +$baselinePath = Join-Path $outAbs "baseline.json" +if (Test-Path $baselinePath) { + Write-Host "" + Write-Host "=== Gate check vs baseline ===" -ForegroundColor White + $bl = (Get-Content $baselinePath | ConvertFrom-Json).pinned.aggregates + $ia = $runs["indexed"].aggregates + $pass = $true + + $gates = @( + @{ Label="TTFR median"; Got=$ia.ttfrMedianMs; Pinned=$bl.ttfrMedianMs; ThresholdPct=150 }, + @{ Label="TTFR p99"; Got=$ia.ttfrP99Ms; Pinned=$bl.ttfrP99Ms; ThresholdPct=200 }, + @{ Label="Total p99"; Got=$ia.totalP99Ms; Pinned=$bl.totalP99Ms; ThresholdPct=150 } + ) + foreach ($g in $gates) { + $pct = if ($g.Pinned -gt 0) { [int]($g.Got / $g.Pinned * 100) } else { 100 } + $ok = $pct -le $g.ThresholdPct + $symbol = if ($ok) { "PASS" } else { "FAIL" } + $color = if ($ok) { "Green" } else { "Red" } + $pctStr = "$pct" + "%" + Write-Host (" {0,-14} {1,6}ms vs pinned {2,6}ms ({3}) [{4}]" -f ` + $g.Label, $g.Got, $g.Pinned, $pctStr, $symbol) -ForegroundColor $color + if (-not $ok) { $pass = $false } + } + + if ($pass) { + Write-Host "`n All gates PASS" -ForegroundColor Green + } else { + Write-Host "`n One or more gates FAILED -- update baseline.json if this is intentional" -ForegroundColor Red + exit 1 + } +} else { + Warn "No baseline.json found at '$baselinePath' -- skipping gate check" + Warn "Run once to establish baseline, then copy the indexed result to baseline.json" +} diff --git a/src/Files.App/Data/Contracts/IGeneralSettingsService.cs b/src/Files.App/Data/Contracts/IGeneralSettingsService.cs index 6540eb042dcc..ab2878c1c63c 100644 --- a/src/Files.App/Data/Contracts/IGeneralSettingsService.cs +++ b/src/Files.App/Data/Contracts/IGeneralSettingsService.cs @@ -348,5 +348,10 @@ public interface IGeneralSettingsService : IBaseSettingsService, INotifyProperty /// Gets or sets a value indicating whether smooth scrolling is enabled. /// bool EnableSmoothScrolling { get; set; } + + /// + /// Gets or sets a value indicating whether the indexed search service is used instead of Windows Search. + /// + bool UseIndexedSearch { get; set; } } } diff --git a/src/Files.App/Files.App.csproj b/src/Files.App/Files.App.csproj index 53ab03eec353..af095d38cb51 100644 --- a/src/Files.App/Files.App.csproj +++ b/src/Files.App/Files.App.csproj @@ -17,7 +17,7 @@ False Always False - x86|x64|arm64 + x86|x64 0 Enable app.manifest @@ -25,6 +25,11 @@ win-x86;win-x64;win-arm64 false false + + false true true true @@ -40,7 +45,17 @@ $(DefineConstants);DISABLE_XAML_GENERATED_MAIN + True + E4E9B3B381A8CA7392F519B9408E563869E57285 + + + + + + @@ -56,6 +71,13 @@ PreserveNewest + + + SearchService\%(RecursiveDir)%(Filename)%(Extension) + PreserveNewest + PreserveNewest @@ -94,6 +116,7 @@ + @@ -137,6 +160,8 @@ + + @@ -155,9 +180,7 @@ - + diff --git a/src/Files.App/Helpers/Application/AppLifecycleHelper.cs b/src/Files.App/Helpers/Application/AppLifecycleHelper.cs index 24d301d1c61f..64a844b24953 100644 --- a/src/Files.App/Helpers/Application/AppLifecycleHelper.cs +++ b/src/Files.App/Helpers/Application/AppLifecycleHelper.cs @@ -108,6 +108,9 @@ await Task.WhenAll( App.QuickAccessManager.InitializeAsync() ); + // Start the search service sidecar in the background — fire and forget. + _ = Task.Run(SearchServiceManager.EnsureRunning); + // Start non-critical tasks without waiting for them to complete _ = Task.Run(async () => { diff --git a/src/Files.App/Helpers/Application/SearchServiceManager.cs b/src/Files.App/Helpers/Application/SearchServiceManager.cs new file mode 100644 index 000000000000..3e9cb2d99fcc --- /dev/null +++ b/src/Files.App/Helpers/Application/SearchServiceManager.cs @@ -0,0 +1,139 @@ +// Copyright (c) Files Community +// Licensed under the MIT License. + +using Microsoft.Win32; +using System.Diagnostics; +using System.IO; +using System.ServiceProcess; +using Windows.ApplicationModel; + +namespace Files.App.Helpers.Application; + +/// +/// Manages the lifecycle of the files-search-service sidecar process. +/// +/// In packaged (Store/sideload) builds the service is declared in the MSIX +/// manifest as a desktop6:Service and installed by Windows at package +/// install time. SCM starts it automatically at login — no UAC prompt, no +/// HKCU\Run entry needed. Files.App is a pure gRPC client. +/// +/// In unpackaged dev builds (no SCM registration) the service is started +/// directly as a child process and a HKCU\Run entry is written so it +/// survives reboots during development. +/// +internal static class SearchServiceManager +{ + private const string ServiceName = "FilesSearchService"; + private const string RunKeyPath = @"Software\Microsoft\Windows\CurrentVersion\Run"; + private const string RunValueName = "FilesSearchService"; + private const string ExeName = "files-search-service.exe"; + private const string ProcessName = "files-search-service"; + + public static void EnsureRunning() + { +#if DEBUG + // Debug manifest omits desktop6:Service so VS can sideload without admin. + // Always spawn the exe directly; SCM has no registration for it. + EnsureProcessRunning(); +#else + if (IsPackaged()) + EnsureServiceRunning(); + else + EnsureProcessRunning(); +#endif + } + + public static void RemoveStartupRegistration() + { + // Packaged Release builds are managed by SCM — nothing to clean up. +#if !DEBUG + if (IsPackaged()) + return; +#endif + using var key = Registry.CurrentUser.OpenSubKey(RunKeyPath, writable: true); + key?.DeleteValue(RunValueName, throwOnMissingValue: false); + } + + // Packaged mode: ask SCM to start the service if it isn't already running. + private static void EnsureServiceRunning() + { + try + { + using var sc = new ServiceController(ServiceName); + if (sc.Status is ServiceControllerStatus.Stopped or ServiceControllerStatus.Paused) + sc.Start(); + } + catch (InvalidOperationException) + { + // Service not installed yet (e.g. first run before SCM has processed + // the manifest). Nothing to do — SCM will start it on next login. + } + } + + // Dev / unpackaged mode: start the exe directly and register HKCU\Run. + private static void EnsureProcessRunning() + { + var exePath = ResolveExePath(); + if (exePath is null || !File.Exists(exePath)) + return; + + // In dev mode the service uses TCP loopback (port 50299) instead of a + // named pipe — named pipes require ACL privileges we don't have outside SCM. + // Setting FILES_SEARCH_SERVICE_URL makes both this process (the gRPC client) + // and the child service process (which inherits the env) use TCP. + Environment.SetEnvironmentVariable("FILES_SEARCH_SERVICE_URL", "http://localhost:50299"); + + RegisterStartup(exePath); + LaunchIfNotRunning(exePath); + } + + private static void RegisterStartup(string exePath) + { + using var key = Registry.CurrentUser.CreateSubKey(RunKeyPath); + key.SetValue(RunValueName, $"\"{exePath}\""); + } + + private static void LaunchIfNotRunning(string exePath) + { + // Kill any stale instances first — in dev mode the HKCU\Run entry or a + // previous debug session may have left a process holding the named pipe, + // which causes AddressInUseException on the next start. + foreach (var stale in Process.GetProcessesByName(ProcessName)) + { + try { stale.Kill(entireProcessTree: true); stale.WaitForExit(2000); } + catch { } + } + + Process.Start(new ProcessStartInfo + { + FileName = exePath, + UseShellExecute = false, + CreateNoWindow = true, + }); + } + + private static string? ResolveExePath() + { + try + { + return Path.Combine(Package.Current.InstalledLocation.Path, "SearchService", ExeName); + } + catch + { + return Path.Combine(AppContext.BaseDirectory, "SearchService", ExeName); + } + } + + private static bool IsPackaged() + { + try + { + _ = Package.Current; + return true; + } + catch + { + return false; + } + } +} diff --git a/src/Files.App/Package.Debug.appxmanifest b/src/Files.App/Package.Debug.appxmanifest new file mode 100644 index 000000000000..28ced9630565 --- /dev/null +++ b/src/Files.App/Package.Debug.appxmanifest @@ -0,0 +1,203 @@ + + + + + + + + + Files - Dev + Yair A + Assets\AppTiles\Dev\StoreLogo.png + disabled + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + com.files.filepreview + + + + + + + + + + + + + + + + + + + + + + + + + .zip + .7z + .rar + .tar + .jar + .mrpack + .gz + + assets\archives\ExtensionIcon.png + + + + + + + + + + + + + WinRT.Host.dll + + + + + + Files.App.Server\Files.App.Server.exe + singleInstance + + + + + + + + + + + + + + + + + + + + diff --git a/src/Files.App/Package.appxmanifest b/src/Files.App/Package.appxmanifest index c1086de9e73b..a44acc524148 100644 --- a/src/Files.App/Package.appxmanifest +++ b/src/Files.App/Package.appxmanifest @@ -1,4 +1,4 @@ - + @@ -140,6 +140,14 @@ + + + + + @@ -182,6 +190,8 @@ --> + + + + + + + + + diff --git a/src/Files.IndexedSearch.Client/Files.IndexedSearch.Client.csproj b/src/Files.IndexedSearch.Client/Files.IndexedSearch.Client.csproj index fda05f83aa7c..1a275bd575d9 100644 --- a/src/Files.IndexedSearch.Client/Files.IndexedSearch.Client.csproj +++ b/src/Files.IndexedSearch.Client/Files.IndexedSearch.Client.csproj @@ -24,11 +24,11 @@ - - + ProtoRoot="..\Files.SearchService\proto" /> diff --git a/src/Files.IndexedSearch.Client/IndexedSearchProvider.cs b/src/Files.IndexedSearch.Client/IndexedSearchProvider.cs index 6ce795f5298e..0037e4405040 100644 --- a/src/Files.IndexedSearch.Client/IndexedSearchProvider.cs +++ b/src/Files.IndexedSearch.Client/IndexedSearchProvider.cs @@ -1,6 +1,7 @@ // Copyright (c) Files Community // Licensed under the MIT License. +using System.IO.Pipes; using System.Runtime.CompilerServices; using Files.Search.V1; using Files.SearchAbstraction; @@ -10,37 +11,31 @@ namespace Files.IndexedSearch.Client; /// -/// backed by the Rust -/// files-search-service over gRPC. Currently TCP on -/// 127.0.0.1:50080; will swap to a named pipe channel once the -/// service exposes one. Override the address with -/// FILES_SEARCH_SERVICE_URL for tests / dev. +/// backed by the files-search-service +/// over gRPC on a named pipe (\\.\pipe\files-search). +/// Set FILES_SEARCH_SERVICE_URL to override with a TCP address for +/// dev / integration tests. /// /// -/// The channel is constructed lazily and reused for the lifetime -/// of the provider — gRPC channels multiplex concurrent calls over a -/// single HTTP/2 connection so there's no benefit to per-call -/// channels, and the connection setup is what we want to amortize. -/// -/// Health checks swallow transport errors and return -/// IsAvailable=false so callers (the routing layer, the bench -/// warm-up) can branch without try/catch. Search calls let exceptions -/// propagate — the caller decides whether to fall back to the legacy -/// provider or surface the error. +/// The channel is constructed lazily and reused for the provider's lifetime — +/// gRPC channels multiplex concurrent calls over a single HTTP/2 connection. +/// Health checks swallow transport errors and return IsAvailable=false +/// so the routing layer can fall back to legacy without try/catch. /// public sealed class IndexedSearchProvider : ISearchProvider, IDisposable { - private const string DefaultUrl = "http://127.0.0.1:50080"; + private static string PipeName => + Environment.GetEnvironmentVariable("FILES_SEARCH_PIPE") ?? "files-search"; private readonly GrpcChannel _channel; private readonly FilesSearch.FilesSearchClient _client; - public IndexedSearchProvider() : this(ResolveAddress()) { } + public IndexedSearchProvider() : this(CreateChannel()) { } - public IndexedSearchProvider(string address) + public IndexedSearchProvider(GrpcChannel channel) { - _channel = GrpcChannel.ForAddress(address); - _client = new FilesSearch.FilesSearchClient(_channel); + _channel = channel; + _client = new FilesSearch.FilesSearchClient(_channel); } public string Name => "Indexed"; @@ -51,7 +46,7 @@ public async IAsyncEnumerable SearchAsync( { var request = new SearchRequest { - Query = query.Text, + Query = query.Text, MaxResults = (uint)Math.Clamp(query.MaxResults ?? 0, 0, uint.MaxValue), }; foreach (var scope in query.ScopePaths) @@ -59,49 +54,87 @@ public async IAsyncEnumerable SearchAsync( using var call = _client.Search(request, cancellationToken: cancellationToken); await foreach (var hit in call.ResponseStream.ReadAllAsync(cancellationToken)) - { yield return ToResult(hit); - } } public async Task GetHealthAsync(CancellationToken cancellationToken = default) { + using var cts = CancellationTokenSource.CreateLinkedTokenSource(cancellationToken); + cts.CancelAfter(TimeSpan.FromSeconds(3)); try { - var resp = await _client.HealthAsync(new HealthRequest(), cancellationToken: cancellationToken); + var resp = await _client.HealthAsync(new HealthRequest(), cancellationToken: cts.Token); return new HealthStatus( - ProviderName: Name, - Version: resp.Version, + ProviderName: Name, + Version: resp.Version, IndexedFileCount: (long)resp.IndexedFileCount, - IsIndexing: resp.Indexing, - IsAvailable: true); + IsIndexing: resp.Indexing, + IsAvailable: true); } - catch (RpcException) when (!cancellationToken.IsCancellationRequested) + catch (Exception) when (!cancellationToken.IsCancellationRequested) { - // Service is down / unreachable. Surface as "unavailable" - // rather than throwing so the routing layer can fall back - // to legacy without a try/catch around every health probe. return new HealthStatus( - ProviderName: Name, - Version: string.Empty, + ProviderName: Name, + Version: string.Empty, IndexedFileCount: 0, - IsIndexing: false, - IsAvailable: false); + IsIndexing: false, + IsAvailable: false); } } public void Dispose() => _channel.Dispose(); + // ---- channel factory --------------------------------------------------- + + private static GrpcChannel CreateChannel() + { + // Dev / CI override: if a URL is set, use raw TCP (matches the old default). + var envUrl = Environment.GetEnvironmentVariable("FILES_SEARCH_SERVICE_URL"); + if (envUrl is not null) + return GrpcChannel.ForAddress(envUrl); + + return CreateNamedPipeChannel(); + } + + private static GrpcChannel CreateNamedPipeChannel() + { + var handler = new SocketsHttpHandler + { + ConnectCallback = async (_, cancellationToken) => + { + var pipe = new NamedPipeClientStream( + serverName: ".", + pipeName: PipeName, + direction: PipeDirection.InOut, + options: PipeOptions.Asynchronous); + try + { + await pipe.ConnectAsync(cancellationToken); + return pipe; + } + catch + { + await pipe.DisposeAsync(); + throw; + } + }, + }; + + // "http://localhost" is a dummy address — the transport is the named + // pipe above, not a TCP socket. Cleartext HTTP/2 is fine for local IPC. + return GrpcChannel.ForAddress("http://localhost", new GrpcChannelOptions + { + HttpHandler = handler, + }); + } + + // ---- mapping ----------------------------------------------------------- + private static SearchResult ToResult(SearchHit hit) => new( - Path: hit.Path, - FileName: hit.Filename, - // u64 → long: indexed file sizes >= 8 EiB don't exist in - // practice; if one ever does, the cast wraps and is wrong by - // a sign. Worth a comment, not a runtime check. - SizeBytes: unchecked((long)hit.SizeBytes), + Path: hit.Path, + FileName: hit.Filename, + // u64 → long: file sizes ≥ 8 EiB don't exist; sign wrap is benign. + SizeBytes: unchecked((long)hit.SizeBytes), ModifiedUtc: DateTimeOffset.FromUnixTimeMilliseconds(hit.ModifiedUnixMs), - Score: hit.Score); - - private static string ResolveAddress() => - Environment.GetEnvironmentVariable("FILES_SEARCH_SERVICE_URL") ?? DefaultUrl; + Score: hit.Score); } diff --git a/src/Files.SearchAbstraction/HealthStatus.cs b/src/Files.SearchAbstraction/HealthStatus.cs index a7425eb4fc86..565af97eb1f0 100644 --- a/src/Files.SearchAbstraction/HealthStatus.cs +++ b/src/Files.SearchAbstraction/HealthStatus.cs @@ -12,8 +12,8 @@ namespace Files.SearchAbstraction; /// Echoes . /// /// Provider-defined version string. For the indexed provider this is -/// the Rust service's package version; for the legacy provider it's the -/// Files.App build version. +/// the search service's assembly version; for the legacy provider it's +/// the Files.App build version. /// /// /// Files currently in the backing index. 0 when the provider has diff --git a/src/Files.SearchAbstraction/ISearchProvider.cs b/src/Files.SearchAbstraction/ISearchProvider.cs index d0776f2e5bb1..8a9ae83015b6 100644 --- a/src/Files.SearchAbstraction/ISearchProvider.cs +++ b/src/Files.SearchAbstraction/ISearchProvider.cs @@ -5,8 +5,8 @@ namespace Files.SearchAbstraction; /// /// The single seam between the Files UI and any search backend. Every -/// search request — legacy Windows Search, the indexed Rust service, or -/// anything we ship later — flows through this interface. +/// search request — legacy Windows Search, the indexed sidecar service, +/// or anything we ship later — flows through this interface. /// /// /// Intentionally minimal: streams results so diff --git a/src/Files.SearchService/Files.SearchService.csproj b/src/Files.SearchService/Files.SearchService.csproj new file mode 100644 index 000000000000..b1925b07ef98 --- /dev/null +++ b/src/Files.SearchService/Files.SearchService.csproj @@ -0,0 +1,39 @@ + + + + + $(WindowsTargetFramework) + Exe + enable + enable + Files.SearchService + files-search-service + false + true + + + + + <_Parameter1>Files.Search.Correctness + + + + + + + + + all + runtime; build; native; contentfiles; analyzers; buildtransitive + + + all + runtime; build; native; contentfiles; analyzers; buildtransitive + + + + + + + + diff --git a/src/Files.SearchService/Grpc/SearchGrpcService.cs b/src/Files.SearchService/Grpc/SearchGrpcService.cs new file mode 100644 index 000000000000..e759d2ba87f4 --- /dev/null +++ b/src/Files.SearchService/Grpc/SearchGrpcService.cs @@ -0,0 +1,43 @@ +// Copyright (c) Files Community +// Licensed under the MIT License. + +using Files.Search.V1; +using Files.SearchService.Index; +using Google.Protobuf.WellKnownTypes; +using Grpc.Core; + +namespace Files.SearchService.Grpc; + +internal sealed class SearchGrpcService(FileIndex index) + : FilesSearch.FilesSearchBase +{ + public override Task Health(HealthRequest request, ServerCallContext context) => + Task.FromResult(new HealthResponse + { + Version = typeof(SearchGrpcService).Assembly.GetName().Version?.ToString() ?? "0.0.0", + IndexedFileCount = (ulong)index.DocCount, + Indexing = index.IsIndexing, + }); + + public override async Task Search( + SearchRequest request, + IServerStreamWriter responseStream, + ServerCallContext context) + { + var max = request.MaxResults == 0 ? 10_000 : (int)request.MaxResults; + var hits = index.Search(request.Query, max, request.ScopePaths); + + foreach (var hit in hits) + { + context.CancellationToken.ThrowIfCancellationRequested(); + await responseStream.WriteAsync(new SearchHit + { + Path = hit.Path, + Filename = hit.FileName, + SizeBytes = hit.SizeBytes, + ModifiedUnixMs = new DateTimeOffset(hit.ModifiedUtc).ToUnixTimeMilliseconds(), + Score = hit.Score, + }, context.CancellationToken); + } + } +} diff --git a/src/Files.SearchService/Index/DocStore.cs b/src/Files.SearchService/Index/DocStore.cs new file mode 100644 index 000000000000..5c6789110e3c --- /dev/null +++ b/src/Files.SearchService/Index/DocStore.cs @@ -0,0 +1,75 @@ +// Copyright (c) Files Community +// Licensed under the MIT License. + +namespace Files.SearchService.Index; + +/// +/// Parallel-array document store. Doc IDs are stable indices. +/// Deleted docs are marked with a null path and excluded from results. +/// Compacted on full rebuild. +/// +internal sealed class DocStore +{ + private readonly List _paths; + private readonly List _fileNames; + private readonly List _sizes; + private readonly List _modified; + private readonly Dictionary _pathToId; + + internal DocStore(int capacity = 0) + { + _paths = new(capacity); + _fileNames = new(capacity); + _sizes = new(capacity); + _modified = new(capacity); + _pathToId = new(capacity, StringComparer.OrdinalIgnoreCase); + } + + internal long Count => _paths.Count(p => p is not null); + + internal int Add(string fullPath, string fileName, ulong sizeBytes, DateTime modifiedUtc) + { + var id = _paths.Count; + _paths.Add(fullPath); + _fileNames.Add(fileName); + _sizes.Add(sizeBytes); + _modified.Add(modifiedUtc); + _pathToId[fullPath] = id; + return id; + } + + internal int FindId(string fullPath) => + _pathToId.TryGetValue(fullPath, out var id) ? id : -1; + + internal void MarkDeleted(int id) + { + if (id < 0 || id >= _paths.Count) return; + var path = _paths[id]; + if (path is not null) + _pathToId.Remove(path); + _paths[id] = null; + _fileNames[id] = null; + } + + internal string? GetPath(int id) => + id >= 0 && id < _paths.Count ? _paths[id] : null; + + internal string? GetFileName(int id) => + id >= 0 && id < _fileNames.Count ? _fileNames[id] : null; + + internal ulong GetSize(int id) => + id >= 0 && id < _sizes.Count ? _sizes[id] : 0; + + internal DateTime GetModified(int id) => + id >= 0 && id < _modified.Count ? _modified[id] : default; + + internal IEnumerable EnumerateLive() + { + for (int i = 0; i < _paths.Count; i++) + { + var path = _paths[i]; + if (path is null) continue; + yield return new DocRecord(path, _fileNames[i]!, _sizes[i], _modified[i]); + } + } +} diff --git a/src/Files.SearchService/Index/FileIndex.cs b/src/Files.SearchService/Index/FileIndex.cs new file mode 100644 index 000000000000..6d0eab752d3f --- /dev/null +++ b/src/Files.SearchService/Index/FileIndex.cs @@ -0,0 +1,366 @@ +// Copyright (c) Files Community +// Licensed under the MIT License. + +namespace Files.SearchService.Index; + +/// +/// Thread-safe in-memory inverted index over file names. +/// +/// Structure: +/// _docs — parallel arrays: paths, filenames, sizes, modified times. +/// Doc IDs are indices into these arrays. +/// _index — token → sorted int[] of doc IDs (posting list). +/// Handles whole-word and prefix queries via camelCase/delimiter tokens. +/// _trigramIndex — trigram → sorted int[] of doc IDs. +/// Handles mid-string substring queries (e.g. "phab" → "ALPHABET.md"). +/// Both are replaced atomically on rebuild; upserts acquire a write lock. +/// +/// Query reads snapshot the current index references — no lock needed. +/// Writes (upsert/delete) acquire a write lock and update in place. +/// +public sealed class FileIndex +{ + // Doc store — indexed by doc ID. + private volatile DocStore _docs = new(); + + // Token inverted index — swapped atomically on rebuild. + private volatile Dictionary _index = []; + + // Trigram index for mid-string substring search — swapped atomically on rebuild. + // Keys are 3-char lowercase substrings of filenames; Ordinal comparison (already lowercased). + private volatile Dictionary _trigramIndex = []; + + private readonly ReaderWriterLockSlim _lock = new(); + + public long DocCount => _docs.Count; + public bool IsIndexing { get; internal set; } + + private volatile bool _dirty; + public bool IsDirty => _dirty; + internal void MarkClean() => _dirty = false; + + internal List GetAllRecords() + { + _lock.EnterReadLock(); + try { return [.. _docs.EnumerateLive()]; } + finally { _lock.ExitReadLock(); } + } + + // ---- Bulk replace (initial build / full rebuild) ----------------------- + + internal void ReplaceAll(List records) + { + _lock.EnterWriteLock(); + try + { + var store = new DocStore(records.Count); + var index = new Dictionary>(StringComparer.OrdinalIgnoreCase); + var trigrams = new Dictionary>(StringComparer.Ordinal); + + for (int id = 0; id < records.Count; id++) + { + var r = records[id]; + store.Add(r.FullPath, r.FileName, r.SizeBytes, r.ModifiedUtc); + + foreach (var token in Tokenizer.Tokenize(r.FileName)) + { + if (!index.TryGetValue(token, out var list)) + index[token] = list = []; + list.Add(id); + } + + foreach (var tg in Trigrams(r.FileName)) + { + if (!trigrams.TryGetValue(tg, out var tgList)) + trigrams[tg] = tgList = []; + tgList.Add(id); + } + } + + // Convert to sorted arrays for fast intersection. + var frozen = new Dictionary(index.Count, StringComparer.OrdinalIgnoreCase); + foreach (var (token, list) in index) + { + list.Sort(); + frozen[token] = [.. list]; + } + + var frozenTrigrams = new Dictionary(trigrams.Count, StringComparer.Ordinal); + foreach (var (tg, list) in trigrams) + { + list.Sort(); + frozenTrigrams[tg] = [.. list]; + } + + _docs = store; + _index = frozen; + _trigramIndex = frozenTrigrams; + } + finally + { + _lock.ExitWriteLock(); + } + } + + // ---- Incremental updates (watcher) ------------------------------------ + + internal void Upsert(string fullPath, string fileName, ulong sizeBytes, DateTime modifiedUtc) + { + _lock.EnterWriteLock(); + try + { + // Remove existing doc for this path if present. + var existing = _docs.FindId(fullPath); + if (existing >= 0) + RemoveFromIndex(existing); + + var id = _docs.Add(fullPath, fileName, sizeBytes, modifiedUtc); + foreach (var token in Tokenizer.Tokenize(fileName)) + InsertPosting(token, id); + foreach (var tg in Trigrams(fileName)) + InsertTrigramPosting(tg, id); + _dirty = true; + } + finally + { + _lock.ExitWriteLock(); + } + } + + internal void Delete(string fullPath) + { + _lock.EnterWriteLock(); + try + { + var id = _docs.FindId(fullPath); + if (id >= 0) + { + RemoveFromIndex(id); + _dirty = true; + } + } + finally + { + _lock.ExitWriteLock(); + } + } + + // ---- Query (lock-free snapshot read) ---------------------------------- + + internal IReadOnlyList Search( + string query, int maxResults, IReadOnlyList scopePaths) + { + // Snapshot — no lock needed; all three references are volatile. + var docs = _docs; + var index = _index; + var trigramIndex = _trigramIndex; + + var tokens = Tokenizer.Tokenize(query).ToList(); + if (tokens.Count == 0) + return []; + + // Token-based AND intersection (whole-word matches). + var tokenHits = TryTokenIntersect(index, tokens); + + // Trigram-based substring search starts at 3 chars (the trigram width). + // For 3-char queries the trigram intersection is just one posting list, + // which used to flood results — but now the two-tier scoring pass keeps + // the top-N by relevance (exact > startsWith > substring), so the noise + // sinks to the bottom and only displays if the user scrolls. + var trigramHits = query.Length >= 3 ? TryTrigramIntersect(trigramIndex, docs, query) : null; + + // Union both candidate sets; early out if both are empty. + var candidates = Union(tokenHits ?? [], trigramHits ?? []); + if (candidates.Length == 0) + return []; + + // Score-then-truncate, but in two passes: + // + // 1. Cheap score (no tokenization) for every candidate. Distinguishes + // exact / prefix / substring / no-match in O(filename length). + // 2. Sort by cheap score, take top N, then refine those N with the + // full Scorer (which tokenizes for camelCase-aware prefix matching). + // + // This avoids the perf cliff for common terms like "json" that match + // 100k+ candidates — tokenizing every filename in the bulk pass turned + // 30ms searches into 1+ second searches. + var scored = new List(Math.Min(candidates.Length, 32_768)); + foreach (var id in candidates) + { + var path = docs.GetPath(id); + if (path is null) continue; + if (scopePaths.Count > 0 && !scopePaths.Any(s => path.StartsWith(s, StringComparison.OrdinalIgnoreCase))) + continue; + + var fileName = docs.GetFileName(id) ?? string.Empty; + var quick = QuickScore(query, fileName); + scored.Add(new QueryHit(path, fileName, docs.GetSize(id), docs.GetModified(id), quick)); + } + + scored.Sort(static (a, b) => b.Score.CompareTo(a.Score)); + var top = scored.Count > maxResults ? scored.GetRange(0, maxResults) : scored; + + // Refine top-N with the precise Scorer so camelCase-prefix matches + // (0.6 tier) sort above plain-substring matches (0.4 tier). + for (int i = 0; i < top.Count; i++) + { + var precise = Scorer.Score(query, tokens, top[i].FileName); + if (precise != top[i].Score) + top[i] = top[i] with { Score = precise }; + } + top.Sort(static (a, b) => b.Score.CompareTo(a.Score)); + return top; + } + + /// + /// O(filename length) tier classifier — no tokenization. Coarse enough + /// to triage 100k+ candidates fast; precise enough that the top N from + /// this pass are guaranteed to contain the true top N by full Scorer. + /// + private static float QuickScore(string query, string fileName) + { + if (fileName.Equals(query, StringComparison.OrdinalIgnoreCase)) + return 1.0f; + if (fileName.StartsWith(query, StringComparison.OrdinalIgnoreCase)) + return 0.9f; + if (fileName.Contains(query, StringComparison.OrdinalIgnoreCase)) + return 0.4f; + return 0.1f; + } + + private static int[]? TryTokenIntersect(Dictionary index, List tokens) + { + int[]? hits = null; + foreach (var token in tokens) + { + if (!index.TryGetValue(token, out var posting)) + return null; + hits = hits is null ? posting : Intersect(hits, posting); + if (hits.Length == 0) + return null; + } + return hits; + } + + private static int[]? TryTrigramIntersect( + Dictionary trigramIndex, DocStore docs, string query) + { + var queryLower = query.ToLowerInvariant(); + int[]? hits = null; + var seen = new HashSet(StringComparer.Ordinal); + + foreach (var tg in Trigrams(queryLower)) + { + if (!seen.Add(tg)) continue; // skip duplicate trigrams in query + if (!trigramIndex.TryGetValue(tg, out var posting)) + return null; + hits = hits is null ? posting : Intersect(hits, posting); + if (hits.Length == 0) + return null; + } + + if (hits is null) + return null; + + // Filter false positives: confirm the filename actually contains the query as a substring. + return Array.FindAll(hits, id => + docs.GetPath(id) is not null && + (docs.GetFileName(id) ?? string.Empty).Contains(query, StringComparison.OrdinalIgnoreCase)); + } + + // ---- Private helpers -------------------------------------------------- + + private void InsertPosting(string token, int docId) + { + if (_index.TryGetValue(token, out var existing)) + { + var idx = Array.BinarySearch(existing, docId); + if (idx < 0) + { + var newArr = new int[existing.Length + 1]; + var insertAt = ~idx; + existing.AsSpan(0, insertAt).CopyTo(newArr); + newArr[insertAt] = docId; + existing.AsSpan(insertAt).CopyTo(newArr.AsSpan(insertAt + 1)); + _index[token] = newArr; + } + } + else + { + _index[token] = [docId]; + } + } + + private void InsertTrigramPosting(string trigram, int docId) + { + if (_trigramIndex.TryGetValue(trigram, out var existing)) + { + var idx = Array.BinarySearch(existing, docId); + if (idx < 0) + { + var newArr = new int[existing.Length + 1]; + var insertAt = ~idx; + existing.AsSpan(0, insertAt).CopyTo(newArr); + newArr[insertAt] = docId; + existing.AsSpan(insertAt).CopyTo(newArr.AsSpan(insertAt + 1)); + _trigramIndex[trigram] = newArr; + } + } + else + { + _trigramIndex[trigram] = [docId]; + } + } + + private void RemoveFromIndex(int docId) + { + _docs.MarkDeleted(docId); + // Posting lists are cleaned lazily on next rebuild to avoid + // O(n) removal from every posting list on every delete. + } + + // Yields all 3-char substrings of the lowercased filename. + private static IEnumerable Trigrams(string fileName) + { + var s = fileName.ToLowerInvariant(); + for (int i = 0; i <= s.Length - 3; i++) + yield return s.Substring(i, 3); + } + + private static int[] Intersect(int[] a, int[] b) + { + var result = new List(Math.Min(a.Length, b.Length)); + int i = 0, j = 0; + while (i < a.Length && j < b.Length) + { + if (a[i] == b[j]) { result.Add(a[i]); i++; j++; } + else if (a[i] < b[j]) i++; + else j++; + } + return [.. result]; + } + + // Sorted merge of two sorted doc-ID arrays, deduplicating shared IDs. + private static int[] Union(int[] a, int[] b) + { + if (a.Length == 0) return b; + if (b.Length == 0) return a; + var result = new List(a.Length + b.Length); + int i = 0, j = 0; + while (i < a.Length && j < b.Length) + { + if (a[i] == b[j]) { result.Add(a[i]); i++; j++; } + else if (a[i] < b[j]) { result.Add(a[i]); i++; } + else { result.Add(b[j]); j++; } + } + while (i < a.Length) result.Add(a[i++]); + while (j < b.Length) result.Add(b[j++]); + return [.. result]; + } +} + +internal readonly record struct DocRecord( + string FullPath, string FileName, ulong SizeBytes, DateTime ModifiedUtc); + +internal readonly record struct QueryHit( + string Path, string FileName, ulong SizeBytes, DateTime ModifiedUtc, float Score); diff --git a/src/Files.SearchService/Index/IndexBootstrapper.cs b/src/Files.SearchService/Index/IndexBootstrapper.cs new file mode 100644 index 000000000000..38ff1745b712 --- /dev/null +++ b/src/Files.SearchService/Index/IndexBootstrapper.cs @@ -0,0 +1,107 @@ +// Copyright (c) Files Community +// Licensed under the MIT License. + +using Files.SearchService.Usn; + +namespace Files.SearchService.Index; + +/// +/// Handles initial index construction and reconcile-on-restart. +/// On first run: full enumeration via USN journal (or fallback walk). +/// On restart with existing persisted index: load from disk, then +/// stat-diff to catch changes that happened while the service was offline. +/// +internal static class IndexBootstrapper +{ + public static async Task BootstrapAsync( + FileIndex index, + string root, + string indexDir, + CancellationToken cancellation) + { + Directory.CreateDirectory(indexDir); + var persistPath = Path.Combine(indexDir, "index.bin"); + + index.IsIndexing = true; + try + { + if (File.Exists(persistPath)) + { + await LoadAndReconcileAsync(index, root, persistPath, cancellation); + } + else + { + await BuildFreshAsync(index, root, persistPath, cancellation); + } + } + finally + { + index.IsIndexing = false; + } + } + + private static async Task BuildFreshAsync( + FileIndex index, string root, string persistPath, CancellationToken cancellation) + { + var reader = new UsnJournalReader(root); + var records = new List(); + const int LiveBatchSize = 50_000; + + await Task.Run(() => + { + foreach (var entry in reader.Enumerate(cancellation)) + { + records.Add(new DocRecord(entry.FullPath, entry.FileName, entry.SizeBytes, entry.ModifiedUtc)); + + // Publish a snapshot every LiveBatchSize records so searches can + // return partial results before the walk finishes. + if (records.Count % LiveBatchSize == 0) + index.ReplaceAll(new List(records)); + } + }, cancellation); + + index.ReplaceAll(records); + await IndexPersistence.SaveAsync(persistPath, records, cancellation); + } + + private static async Task LoadAndReconcileAsync( + FileIndex index, string root, string persistPath, CancellationToken cancellation) + { + // Load persisted records first so the service can answer queries + // while the reconcile walk runs. + var persisted = await IndexPersistence.LoadAsync(persistPath, cancellation); + index.ReplaceAll(persisted); + + // Walk the filesystem and diff against the loaded index. + var reader = new UsnJournalReader(root); + var fsMap = new Dictionary(StringComparer.OrdinalIgnoreCase); + + await Task.Run(() => + { + foreach (var entry in reader.Enumerate(cancellation)) + fsMap[entry.FullPath] = (entry.SizeBytes, entry.ModifiedUtc); + }, cancellation); + + var persistedMap = persisted.ToDictionary(r => r.FullPath, StringComparer.OrdinalIgnoreCase); + + // Upsert new or modified files. + foreach (var (path, (size, modified)) in fsMap) + { + if (!persistedMap.TryGetValue(path, out var rec) || rec.ModifiedUtc != modified) + index.Upsert(path, Path.GetFileName(path), size, modified); + } + + // Delete files that no longer exist on disk. + foreach (var path in persistedMap.Keys) + { + if (!fsMap.ContainsKey(path)) + index.Delete(path); + } + + // Re-persist the reconciled state. + var all = new List(fsMap.Count); + foreach (var (path, (size, modified)) in fsMap) + all.Add(new DocRecord(path, Path.GetFileName(path), size, modified)); + await IndexPersistence.SaveAsync(persistPath, all, cancellation); + } +} diff --git a/src/Files.SearchService/Index/IndexPersistence.cs b/src/Files.SearchService/Index/IndexPersistence.cs new file mode 100644 index 000000000000..90ea51bb076e --- /dev/null +++ b/src/Files.SearchService/Index/IndexPersistence.cs @@ -0,0 +1,92 @@ +// Copyright (c) Files Community +// Licensed under the MIT License. + +namespace Files.SearchService.Index; + +/// +/// Persists and loads the doc store to/from a simple binary format. +/// Writes atomically (temp file + rename) to prevent corruption on +/// unclean shutdown. +/// +/// Format: +/// [4 bytes] magic: 0x46534958 ("FSIX") +/// [4 bytes] version: 1 +/// [8 bytes] record count +/// For each record: +/// [8 bytes] size_bytes +/// [8 bytes] modified_unix_ms +/// [4 bytes] full_path UTF-8 byte length +/// [N bytes] full_path UTF-8 +/// [4 bytes] file_name UTF-8 byte length +/// [N bytes] file_name UTF-8 +/// +internal static class IndexPersistence +{ + private const uint Magic = 0x46534958; + private const int Version = 1; + + public static async Task SaveAsync( + string path, IList records, CancellationToken cancellation) + { + var tmp = path + ".tmp"; + await using (var fs = new FileStream(tmp, FileMode.Create, FileAccess.Write, FileShare.None, 65536, true)) + await using (var bw = new BinaryWriter(fs, System.Text.Encoding.UTF8, leaveOpen: true)) + { + bw.Write(Magic); + bw.Write(Version); + bw.Write((long)records.Count); + + foreach (var r in records) + { + cancellation.ThrowIfCancellationRequested(); + bw.Write(r.SizeBytes); + bw.Write(r.ModifiedUtc.ToFileTimeUtc()); + WriteString(bw, r.FullPath); + WriteString(bw, r.FileName); + } + } + + File.Move(tmp, path, overwrite: true); + } + + public static async Task> LoadAsync( + string path, CancellationToken cancellation) + { + await using var fs = new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.Read, 65536, true); + using var br = new BinaryReader(fs, System.Text.Encoding.UTF8, leaveOpen: true); + + if (br.ReadUInt32() != Magic) + throw new InvalidDataException("Index file has unexpected magic bytes — possible corruption."); + if (br.ReadInt32() != Version) + throw new InvalidDataException("Index file version mismatch — will rebuild."); + + var count = br.ReadInt64(); + var records = new List((int)Math.Min(count, int.MaxValue)); + + for (long i = 0; i < count; i++) + { + cancellation.ThrowIfCancellationRequested(); + var size = br.ReadUInt64(); + var modified = DateTime.FromFileTimeUtc(br.ReadInt64()); + var fullPath = ReadString(br); + var fileName = ReadString(br); + records.Add(new DocRecord(fullPath, fileName, size, modified)); + } + + return records; + } + + private static void WriteString(BinaryWriter bw, string s) + { + var bytes = System.Text.Encoding.UTF8.GetBytes(s); + bw.Write(bytes.Length); + bw.Write(bytes); + } + + private static string ReadString(BinaryReader br) + { + var len = br.ReadInt32(); + var bytes = br.ReadBytes(len); + return System.Text.Encoding.UTF8.GetString(bytes); + } +} diff --git a/src/Files.SearchService/Index/Scorer.cs b/src/Files.SearchService/Index/Scorer.cs new file mode 100644 index 000000000000..c04a4520fe7a --- /dev/null +++ b/src/Files.SearchService/Index/Scorer.cs @@ -0,0 +1,49 @@ +// Copyright (c) Files Community +// Licensed under the MIT License. + +namespace Files.SearchService.Index; + +/// +/// Scores a filename against a query. Simpler and more useful than BM25 +/// for filename search — users expect exact and prefix matches to rank first. +/// +/// Score tiers: +/// 1.0 exact filename match (case-insensitive) +/// 0.9 filename starts with query +/// 0.8 all query tokens are exact token matches in filename +/// 0.6 all query tokens are prefix matches in filename tokens +/// 0.4 all query tokens appear anywhere in filename (substring) +/// +internal static class Scorer +{ + public static float Score(string rawQuery, IList queryTokens, string fileName) + { + if (fileName.Equals(rawQuery, StringComparison.OrdinalIgnoreCase)) + return 1.0f; + + if (fileName.StartsWith(rawQuery, StringComparison.OrdinalIgnoreCase)) + return 0.9f; + + var fileTokens = Tokenizer.Tokenize(fileName).ToArray(); + + if (AllExact(queryTokens, fileTokens)) + return 0.8f; + + if (AllPrefix(queryTokens, fileTokens)) + return 0.6f; + + if (AllSubstring(queryTokens, fileName)) + return 0.4f; + + return 0.1f; + } + + private static bool AllExact(IList query, string[] file) => + query.All(q => file.Any(f => f.Equals(q, StringComparison.OrdinalIgnoreCase))); + + private static bool AllPrefix(IList query, string[] file) => + query.All(q => file.Any(f => f.StartsWith(q, StringComparison.OrdinalIgnoreCase))); + + private static bool AllSubstring(IList query, string fileName) => + query.All(q => fileName.Contains(q, StringComparison.OrdinalIgnoreCase)); +} diff --git a/src/Files.SearchService/Index/Tokenizer.cs b/src/Files.SearchService/Index/Tokenizer.cs new file mode 100644 index 000000000000..3564a7a32e01 --- /dev/null +++ b/src/Files.SearchService/Index/Tokenizer.cs @@ -0,0 +1,66 @@ +// Copyright (c) Files Community +// Licensed under the MIT License. + +using System.Buffers; +using System.Runtime.CompilerServices; +using System.Text; + +namespace Files.SearchService.Index; + +/// +/// Splits filenames into lowercase tokens for the inverted index. +/// Strategy: split on delimiter characters, then split each segment on +/// camelCase and digit/letter transitions. +/// e.g. "MyDocument_v2Final.docx" → ["my", "document", "v", "2", "final", "docx"] +/// +internal static class Tokenizer +{ + private static readonly SearchValues Delimiters = + SearchValues.Create([' ', '.', '_', '-', '(', ')', '[', ']', '+', '=', '&', ',']); + + /// Returns lowercase tokens for the given filename. + public static IEnumerable Tokenize(string filename) + { + foreach (var segment in filename.Split( + [' ', '.', '_', '-', '(', ')', '[', ']', '+', '=', '&', ','], + StringSplitOptions.RemoveEmptyEntries)) + { + foreach (var token in SplitCamelCase(segment)) + { + if (token.Length > 0) + yield return token.ToLowerInvariant(); + } + } + } + + private static IEnumerable SplitCamelCase(string segment) + { + if (segment.Length == 0) { yield break; } + + var sb = new StringBuilder(); + for (int i = 0; i < segment.Length; i++) + { + var c = segment[i]; + var isUpper = char.IsUpper(c); + var isDigit = char.IsDigit(c); + var prevIsLower = i > 0 && char.IsLower(segment[i - 1]); + var prevIsDigit = i > 0 && char.IsDigit(segment[i - 1]); + var nextIsLower = i + 1 < segment.Length && char.IsLower(segment[i + 1]); + + bool split = + (isUpper && prevIsLower) || // camelCase boundary + (isUpper && nextIsLower && sb.Length > 1) || // acronym end: "HTMLParser" + (isDigit && !prevIsDigit && sb.Length > 0) || // letter→digit + (!isDigit && prevIsDigit && sb.Length > 0); // digit→letter + + if (split && sb.Length > 0) + { + yield return sb.ToString(); + sb.Clear(); + } + sb.Append(c); + } + if (sb.Length > 0) + yield return sb.ToString(); + } +} diff --git a/src/Files.SearchService/Program.cs b/src/Files.SearchService/Program.cs new file mode 100644 index 000000000000..878c8d593952 --- /dev/null +++ b/src/Files.SearchService/Program.cs @@ -0,0 +1,242 @@ +// Copyright (c) Files Community +// Licensed under the MIT License. + +using Files.SearchService.Grpc; +using Files.SearchService.Index; +using Files.SearchService.Throttle; +using Files.SearchService.Watch; +using Microsoft.AspNetCore.Server.Kestrel.Core; +using Microsoft.AspNetCore.Server.Kestrel.Transport.NamedPipes; +using System.IO.Pipes; +using System.Security.AccessControl; +using System.Security.Principal; +using System.ServiceProcess; + +namespace Files.SearchService; + +/// +/// Entry point. Runs as a Windows Service when started by SCM; +/// falls back to a console process for dev / unpackaged mode. +/// +internal static class Program +{ + // Named pipe used in production (SCM/SYSTEM mode). + internal static string PipeName => + Environment.GetEnvironmentVariable("FILES_SEARCH_PIPE") ?? "files-search"; + + // TCP port used in dev/console mode (avoids named-pipe ACL issues). + internal const int DevTcpPort = 50299; + + internal static async Task Main(string[] args) + { + if (!Environment.UserInteractive) + { + // Started by SCM — hand off to ServiceBase. + ServiceBase.Run(new SearchWindowsService()); + return; + } + + // Dev / console mode — run until Ctrl+C. + using var cts = new CancellationTokenSource(); + Console.CancelKeyPress += (_, e) => { e.Cancel = true; cts.Cancel(); }; + try + { + await RunAsync(cts.Token); + } + catch (Exception ex) when (!cts.IsCancellationRequested) + { + if (IsNamedPipeConflict(ex)) + { + Console.Error.WriteLine( + $"[error] Named pipe '{PipeName}' is already in use — the Windows service may be running. " + + $"Set FILES_SEARCH_PIPE to a different name to run a dev instance alongside it. " + + $"Example: $env:FILES_SEARCH_PIPE = 'files-search-dev'"); + } + + var log = Path.Combine( + Environment.GetFolderPath(Environment.SpecialFolder.LocalApplicationData), + "Files", "search-service-crash.log"); + Directory.CreateDirectory(Path.GetDirectoryName(log)!); + await File.WriteAllTextAsync(log, ex.ToString()); + Console.Error.WriteLine($"[crash] {ex}"); + throw; + } + } + + // Walk the exception chain looking for the signature Kestrel emits when a + // named pipe is already held by another process (typically the SCM service): + // AddressInUseException wrapping UnauthorizedAccessException. + private static bool IsNamedPipeConflict(Exception ex) + { + for (var e = ex; e is not null; e = e.InnerException) + { + if (e.Message.Contains(PipeName, StringComparison.OrdinalIgnoreCase) && + e.InnerException is UnauthorizedAccessException) + return true; + } + return false; + } + + internal static async Task RunAsync(CancellationToken stopping) + { + // NOTE: ApplyBackgroundPriority is deferred until after the initial + // bootstrap finishes. PROCESS_MODE_BACKGROUND_BEGIN throttles ALL I/O + // (including reading index.bin) to IDLE priority, which turned a 15-second + // index load into multiple minutes. We're a good citizen *after* we're useful. + ProcessThrottle.StartPolling(); + + try + { + var root = ResolveRoot(); + var indexDir = ResolveIndexDir(); + var persistPath = Path.Combine(indexDir, "index.bin"); + + var index = new FileIndex(); + + // Start the gRPC server before bootstrapping so the named pipe is + // open immediately. Clients that connect during indexing see + // IsIndexing=true and get empty search results until ready. + var builder = WebApplication.CreateBuilder(); + builder.Services.AddGrpc(); + builder.Services.AddSingleton(index); + + if (Environment.UserInteractive) + { + // Dev / console mode: use TCP loopback — avoids named-pipe ACL + // restrictions that reject PipeSecurity from non-elevated accounts. + builder.WebHost.ConfigureKestrel(o => + o.ListenLocalhost(DevTcpPort, lo => lo.Protocols = HttpProtocols.Http2)); + } + else + { + // SCM service mode (SYSTEM): named pipe with explicit DACL so the + // user-session client can connect across the account boundary. + builder.Services.Configure(o => + { + o.CurrentUserOnly = false; + o.PipeSecurity = CreatePipeSecurity(); + }); + builder.WebHost.ConfigureKestrel(o => + o.ListenNamedPipe(PipeName, lo => + lo.Protocols = HttpProtocols.Http2)); + } + + var app = builder.Build(); + app.MapGrpcService(); + + await app.StartAsync(stopping); + + // Bootstrap runs after the pipe is listening so searches can + // fall back to legacy while the index builds. + await IndexBootstrapper.BootstrapAsync(index, root, indexDir, stopping); + + // Now that the index is loaded and queries are fast, drop to background + // I/O priority so the watcher and periodic persistence don't compete with + // foreground apps. The startup load is where we needed full priority. + ProcessThrottle.ApplyBackgroundPriority(); + + using var watcher = new ChangeWatcher(root, index); + + // On buffer overflow: events were lost — stop, re-index, restart. + // Guard against concurrent overflow triggers. + int _rebuilding = 0; + watcher.Overflow += () => + { + if (Interlocked.CompareExchange(ref _rebuilding, 1, 0) != 0) return; + _ = Task.Run(async () => + { + try + { + watcher.Stop(); + await IndexBootstrapper.BootstrapAsync(index, root, indexDir, stopping); + watcher.Start(); + } + catch (OperationCanceledException) { } + catch (Exception ex) { Console.Error.WriteLine($"[watcher] re-index failed: {ex.Message}"); } + finally { Interlocked.Exchange(ref _rebuilding, 0); } + }, stopping); + }; + + watcher.Start(); + + // Persist watcher changes back to disk every 5 minutes so restarts are fast. + using var saveTimer = new Timer(_ => + { + if (!index.IsDirty || index.IsIndexing) return; + var records = index.GetAllRecords(); + index.MarkClean(); + _ = IndexPersistence.SaveAsync(persistPath, records, stopping) + .ContinueWith( + t => Console.Error.WriteLine($"[persist] periodic save failed: {t.Exception?.GetBaseException().Message}"), + TaskContinuationOptions.OnlyOnFaulted); + }, null, TimeSpan.FromMinutes(5), TimeSpan.FromMinutes(5)); + + await app.WaitForShutdownAsync(stopping); + } + finally + { + ProcessThrottle.StopPolling(); + } + } + + private static string ResolveRoot() + { + var configured = Environment.GetEnvironmentVariable("FILES_SEARCH_ROOT"); + if (configured is not null) return configured; + + // When running as LocalSystem the UserProfile folder resolves to the system + // service profile (C:\Windows\system32\config\systemprofile), not a real user + // home. Detect this by checking for "system32\config" in the path and fall back + // to the drive root so USN enumeration covers the whole volume. Per-query scope + // filtering via scopePaths narrows results to each user's view at search time. + var userProfile = Environment.GetFolderPath(Environment.SpecialFolder.UserProfile); + if (userProfile.Contains(@"system32\config", StringComparison.OrdinalIgnoreCase)) + return Path.GetPathRoot(Environment.GetFolderPath(Environment.SpecialFolder.System)) ?? @"C:\"; + + return userProfile; + } + + private static string ResolveIndexDir() => + Environment.GetEnvironmentVariable("FILES_SEARCH_INDEX_DIR") + ?? Path.Combine( + Environment.GetFolderPath(Environment.SpecialFolder.LocalApplicationData), + "Files", "search-index"); + + /// + /// Builds the named pipe DACL for the LocalSystem → user-session topology. + /// + /// Grant: + /// SYSTEM — FullControl (service owns the pipe) + /// Administrators — FullControl (admin diagnostics / tooling) + /// AuthenticatedUsers — ReadWrite | Synchronize (Files.App runs as the logged-in user) + /// + /// Synchronize is required because NamedPipeClientStream with + /// PipeOptions.Asynchronous waits on the pipe handle for overlapped I/O. + /// Granting only ReadWrite throws UnauthorizedAccessException on ConnectAsync + /// from a user-context client to a LocalSystem-owned pipe. + /// + /// Deny entries are intentionally absent; the default implicit deny covers + /// unauthenticated / anonymous callers. + /// + private static PipeSecurity CreatePipeSecurity() + { + var security = new PipeSecurity(); + + security.AddAccessRule(new PipeAccessRule( + new SecurityIdentifier(WellKnownSidType.LocalSystemSid, null), + PipeAccessRights.FullControl, + AccessControlType.Allow)); + + security.AddAccessRule(new PipeAccessRule( + new SecurityIdentifier(WellKnownSidType.BuiltinAdministratorsSid, null), + PipeAccessRights.FullControl, + AccessControlType.Allow)); + + security.AddAccessRule(new PipeAccessRule( + new SecurityIdentifier(WellKnownSidType.AuthenticatedUserSid, null), + PipeAccessRights.ReadWrite | PipeAccessRights.Synchronize, + AccessControlType.Allow)); + + return security; + } +} diff --git a/src/Files.SearchService/SearchWindowsService.cs b/src/Files.SearchService/SearchWindowsService.cs new file mode 100644 index 000000000000..f49f413c767c --- /dev/null +++ b/src/Files.SearchService/SearchWindowsService.cs @@ -0,0 +1,32 @@ +// Copyright (c) Files Community +// Licensed under the MIT License. + +using System.ServiceProcess; + +namespace Files.SearchService; + +internal sealed class SearchWindowsService : ServiceBase +{ + private CancellationTokenSource? _cts; + private Task? _run; + + public SearchWindowsService() + { + ServiceName = "FilesSearchService"; + CanStop = true; + CanPauseAndContinue = false; + AutoLog = false; + } + + protected override void OnStart(string[] args) + { + _cts = new CancellationTokenSource(); + _run = Task.Run(() => Program.RunAsync(_cts.Token)); + } + + protected override void OnStop() + { + _cts?.Cancel(); + try { _run?.Wait(TimeSpan.FromSeconds(10)); } catch { } + } +} diff --git a/src/Files.SearchService/Throttle/ProcessThrottle.cs b/src/Files.SearchService/Throttle/ProcessThrottle.cs new file mode 100644 index 000000000000..0ff36c3838a0 --- /dev/null +++ b/src/Files.SearchService/Throttle/ProcessThrottle.cs @@ -0,0 +1,137 @@ +// Copyright (c) Files Community +// Licensed under the MIT License. + +using System.Diagnostics; +using System.Runtime.InteropServices; + +namespace Files.SearchService.Throttle; + +/// +/// Keeps the service from being a bad citizen. +/// Sets PROCESS_MODE_BACKGROUND_BEGIN at startup and polls every 2 s +/// to pause index commits when on battery, fullscreen, or CPU > 70%. +/// +internal static partial class ProcessThrottle +{ + private const uint PROCESS_MODE_BACKGROUND_BEGIN = 0x00100000; + private const int QUNS_RUNNING_D3D_FULL_SCREEN = 3; + private const int QUNS_PRESENTATION_MODE = 4; + private const double CpuPauseThreshold = 0.70; + + private static volatile bool _shouldPause; + private static Timer? _pollTimer; + + // Baselines for the next CPU delta — written only by Poll() (timer thread). + private static long _lastIdle, _lastKernel, _lastUser; + + public static void ApplyBackgroundPriority() + { + if (!OperatingSystem.IsWindows()) return; + SetPriorityClass(Process.GetCurrentProcess().Handle, PROCESS_MODE_BACKGROUND_BEGIN); + } + + /// + /// Starts the 2-second background poll. Call once from RunAsync. + /// + public static void StartPolling() + { + if (!OperatingSystem.IsWindows()) return; + + // Seed CPU baseline so the first delta is valid. + GetSystemTimes(out _lastIdle, out _lastKernel, out _lastUser); + + _pollTimer = new Timer( + static _ => Poll(), + null, + dueTime: TimeSpan.FromSeconds(2), + period: TimeSpan.FromSeconds(2)); + } + + /// + /// Stops the background poll. Call from OnStop / RunAsync finally. + /// + public static void StopPolling() + { + _pollTimer?.Dispose(); + _pollTimer = null; + } + + /// + /// Returns true when index commits should be skipped. Thread-safe read. + /// + public static bool ShouldPause() => _shouldPause; + + // ---- poll -------------------------------------------------------------- + + private static void Poll() + { + _shouldPause = IsOnBattery() || IsFullscreen() || IsCpuHigh(); + } + + private static bool IsOnBattery() + { + if (!GetSystemPowerStatus(out var status)) return false; + return status.ACLineStatus == 0; // 0 = offline (on battery) + } + + private static bool IsFullscreen() + { + // S_OK == 0; non-zero HRESULT means the call failed (e.g. no shell). + if (SHQueryUserNotificationState(out int state) != 0) return false; + return state is QUNS_RUNNING_D3D_FULL_SCREEN or QUNS_PRESENTATION_MODE; + } + + private static bool IsCpuHigh() + { + if (!GetSystemTimes(out long idle, out long kernel, out long user)) return false; + + long idleDelta = idle - _lastIdle; + long kernelDelta = kernel - _lastKernel; + long userDelta = user - _lastUser; + + _lastIdle = idle; + _lastKernel = kernel; + _lastUser = user; + + // kernelTime on Windows includes idle time; total = kernel + user. + long total = kernelDelta + userDelta; + if (total <= 0) return false; + + double cpuUsage = 1.0 - (double)idleDelta / total; + return cpuUsage > CpuPauseThreshold; + } + + // ---- P/Invoke ---------------------------------------------------------- + + [LibraryImport("kernel32.dll", SetLastError = true)] + [return: MarshalAs(UnmanagedType.Bool)] + private static partial bool SetPriorityClass(nint handle, uint priorityClass); + + // FILETIME is two consecutive DWORDs (low, high) — maps cleanly to long + // on little-endian Windows, giving the 100-ns tick count directly. + [LibraryImport("kernel32.dll", SetLastError = true)] + [return: MarshalAs(UnmanagedType.Bool)] + private static partial bool GetSystemTimes( + out long lpIdleTime, + out long lpKernelTime, + out long lpUserTime); + + [LibraryImport("kernel32.dll", SetLastError = true)] + [return: MarshalAs(UnmanagedType.Bool)] + private static partial bool GetSystemPowerStatus(out SYSTEM_POWER_STATUS lpSystemPowerStatus); + + // Returns HRESULT; pquns receives a QUERY_USER_NOTIFICATION_STATE value. + [LibraryImport("shell32.dll")] + private static partial int SHQueryUserNotificationState(out int pquns); + + [StructLayout(LayoutKind.Sequential)] + private struct SYSTEM_POWER_STATUS + { + public byte ACLineStatus; // 0 = offline (battery), 1 = online (AC) + public byte BatteryFlag; + public byte BatteryLifePercent; + public byte SystemStatusFlag; + public uint BatteryLifeTime; + public uint BatteryFullLifeTime; + } +} diff --git a/src/Files.SearchService/Usn/NativeMethods.cs b/src/Files.SearchService/Usn/NativeMethods.cs new file mode 100644 index 000000000000..eddb121dd72f --- /dev/null +++ b/src/Files.SearchService/Usn/NativeMethods.cs @@ -0,0 +1,170 @@ +// Copyright (c) Files Community +// Licensed under the MIT License. + +using Microsoft.Win32.SafeHandles; +using System.Runtime.InteropServices; + +namespace Files.SearchService.Usn; + +/// +/// P/Invoke declarations for NTFS USN Change Journal access. +/// All structures match the Windows SDK definitions for USN_RECORD_V2 +/// and MFT_ENUM_DATA_V0 used by FSCTL_ENUM_USN_DATA. +/// +internal static partial class NativeMethods +{ + // ---- IOCTL codes ------------------------------------------------------- + + internal const uint FSCTL_ENUM_USN_DATA = 0x900B3; + internal const uint FSCTL_READ_USN_JOURNAL = 0x900BB; + internal const uint FSCTL_QUERY_USN_JOURNAL = 0x900F4; + + // ---- File attribute flags ---------------------------------------------- + + internal const uint FILE_ATTRIBUTE_DIRECTORY = 0x10; + internal const uint FILE_ATTRIBUTE_REPARSE_POINT = 0x400; + + // ---- USN reason flags (live watcher) ----------------------------------- + + internal const uint USN_REASON_FILE_CREATE = 0x00000100; + internal const uint USN_REASON_FILE_DELETE = 0x00000200; + internal const uint USN_REASON_RENAME_NEW_NAME = 0x00002000; + internal const uint USN_REASON_RENAME_OLD_NAME = 0x00001000; + internal const uint USN_REASON_DATA_OVERWRITE = 0x00000001; + internal const uint USN_REASON_DATA_EXTEND = 0x00000002; + + // ---- CreateFile constants ---------------------------------------------- + + internal const uint GENERIC_READ = 0x80000000; + internal const uint FILE_SHARE_READ = 0x00000001; + internal const uint FILE_SHARE_WRITE = 0x00000002; + internal const uint OPEN_EXISTING = 3; + internal const uint FILE_FLAG_BACKUP_SEMANTICS = 0x02000000; + + // ---- FRN masking ------------------------------------------------------- + // USN FileReferenceNumbers encode a sequence number in the high 16 bits. + // GetFileInformationByHandle returns only the 48-bit MFT record number. + // Mask when comparing USN FRNs to a handle-derived FRN. + internal const ulong FRN_MFT_MASK = 0x0000_FFFF_FFFF_FFFF; + + // ---- Structs ----------------------------------------------------------- + + [StructLayout(LayoutKind.Sequential)] + internal struct MFT_ENUM_DATA_V0 + { + public ulong StartFileReferenceNumber; + public long LowUsn; + public long HighUsn; + } + + [StructLayout(LayoutKind.Sequential)] + internal struct USN_RECORD_V2 + { + public uint RecordLength; + public ushort MajorVersion; + public ushort MinorVersion; + public ulong FileReferenceNumber; + public ulong ParentFileReferenceNumber; + public long Usn; + public long TimeStamp; + public uint Reason; + public uint SourceInfo; + public uint SecurityId; + public uint FileAttributes; + public ushort FileNameLength; + public ushort FileNameOffset; + // FileNameLength bytes of UTF-16LE filename immediately follow in the buffer. + } + + [StructLayout(LayoutKind.Sequential)] + internal struct USN_JOURNAL_DATA_V0 + { + public ulong UsnJournalID; + public long FirstUsn; + public long NextUsn; + public long LowestValidUsn; + public long MaxUsn; + public ulong MaximumSize; + public ulong AllocationDelta; + } + + [StructLayout(LayoutKind.Sequential)] + internal struct READ_USN_JOURNAL_DATA_V0 + { + public long StartUsn; + public uint ReasonMask; + public uint ReturnOnlyOnClose; + public ulong Timeout; + public ulong BytesToWaitFor; + public ulong UsnJournalID; + } + + [StructLayout(LayoutKind.Sequential)] + internal struct BY_HANDLE_FILE_INFORMATION + { + public uint FileAttributes; + public long CreationTime; // FILETIME as 64-bit int + public long LastAccessTime; + public long LastWriteTime; + public uint VolumeSerialNumber; + public uint FileSizeHigh; + public uint FileSizeLow; + public uint NumberOfLinks; + public uint FileIndexHigh; + public uint FileIndexLow; + } + + // ---- P/Invoke ---------------------------------------------------------- + + [LibraryImport("kernel32.dll", SetLastError = true, StringMarshalling = StringMarshalling.Utf16)] + internal static partial SafeFileHandle CreateFileW( + string lpFileName, + uint dwDesiredAccess, + uint dwShareMode, + nint lpSecurityAttributes, + uint dwCreationDisposition, + uint dwFlagsAndAttributes, + nint hTemplateFile); + + [LibraryImport("kernel32.dll", SetLastError = true)] + [return: MarshalAs(UnmanagedType.Bool)] + internal static partial bool GetFileInformationByHandle( + SafeHandle hFile, + out BY_HANDLE_FILE_INFORMATION lpFileInformation); + + [LibraryImport("kernel32.dll", SetLastError = true)] + [return: MarshalAs(UnmanagedType.Bool)] + internal static partial bool DeviceIoControl( + SafeHandle hDevice, + uint dwIoControlCode, + ref MFT_ENUM_DATA_V0 lpInBuffer, + int nInBufferSize, + byte[] lpOutBuffer, + int nOutBufferSize, + out int lpBytesReturned, + nint lpOverlapped); + + [LibraryImport("kernel32.dll", SetLastError = true)] + [return: MarshalAs(UnmanagedType.Bool)] + internal static partial bool DeviceIoControl( + SafeHandle hDevice, + uint dwIoControlCode, + ref READ_USN_JOURNAL_DATA_V0 lpInBuffer, + int nInBufferSize, + byte[] lpOutBuffer, + int nOutBufferSize, + out int lpBytesReturned, + nint lpOverlapped); + + [LibraryImport("kernel32.dll", SetLastError = true)] + [return: MarshalAs(UnmanagedType.Bool)] + internal static partial bool DeviceIoControl( + SafeHandle hDevice, + uint dwIoControlCode, + nint lpInBuffer, + int nInBufferSize, + out USN_JOURNAL_DATA_V0 lpOutBuffer, + int nOutBufferSize, + out int lpBytesReturned, + nint lpOverlapped); +} diff --git a/src/Files.SearchService/Usn/UsnJournalReader.cs b/src/Files.SearchService/Usn/UsnJournalReader.cs new file mode 100644 index 000000000000..d3a2ad7c140f --- /dev/null +++ b/src/Files.SearchService/Usn/UsnJournalReader.cs @@ -0,0 +1,252 @@ +// Copyright (c) Files Community +// Licensed under the MIT License. + +using Microsoft.Win32.SafeHandles; +using System.Collections.Concurrent; +using System.Runtime.InteropServices; +using System.Text; + +namespace Files.SearchService.Usn; + +/// +/// Enumerates every file on an NTFS volume via FSCTL_ENUM_USN_DATA. +/// Requires LocalSystem or SeBackupPrivilege — provided by the MSIX +/// service registration (StartAccount=localSystem). +/// Falls back to directory walking when the volume handle cannot be opened +/// (dev / non-elevated mode). +/// +internal sealed class UsnJournalReader +{ + private readonly string _root; + + public UsnJournalReader(string root) => _root = root; + + /// + /// Yields (fullPath, fileName, sizeBytes, modifiedUtc) for every file under _root. + /// + public IEnumerable Enumerate(CancellationToken cancellation = default) + { + var driveLetter = Path.GetPathRoot(_root) ?? _root; + var volumePath = $@"\\.\{driveLetter.TrimEnd('\\')}"; + + SafeFileHandle? volume = null; + try { volume = OpenVolumeHandle(volumePath); } + catch { } + + if (volume is null || volume.IsInvalid) + { + foreach (var entry in FallbackWalk(_root, cancellation)) + yield return entry; + yield break; + } + + using (volume) + { + IEnumerable entries; + try { entries = EnumerateViaUsn(volume, cancellation); } + catch { entries = FallbackWalk(_root, cancellation); } + + foreach (var entry in entries) + yield return entry; + } + } + + // --- USN path ----------------------------------------------------------- + + private IEnumerable EnumerateViaUsn(SafeFileHandle volume, CancellationToken cancellation) + { + ulong rootFrn; + try { rootFrn = GetRootFrn(_root); } + catch { return FallbackWalk(_root, cancellation); } + + var (dirs, files) = ParseMft(volume, cancellation); + + var results = new ConcurrentBag(); + + // Use data already in the USN record — no per-file stat calls. + // Size is stored as 0; the watcher fills it in accurately on the next + // file-change event. Timestamp is the FILETIME of the last USN record + // for that file, which is close enough to LastWriteTime for sorting. + Parallel.ForEach(files, new ParallelOptions { CancellationToken = cancellation }, file => + { + var path = ResolvePath(file.ParentFrn, file.Name, rootFrn, _root, dirs); + if (path is null) return; + + var modifiedUtc = file.Timestamp > 0 + ? DateTime.FromFileTimeUtc(file.Timestamp) + : DateTime.UtcNow; + + results.Add(new FileEntry(path, file.Name, 0UL, modifiedUtc)); + }); + + return results; + } + + // --- MFT parsing -------------------------------------------------------- + + private readonly record struct DirEntry(ulong ParentFrn, string Name); + private readonly record struct FileRecord(ulong Frn, ulong ParentFrn, string Name, long Timestamp); + + private static (Dictionary Dirs, List Files) ParseMft( + SafeFileHandle volume, CancellationToken cancellation) + { + const int BufferSize = 256 * 1024; + var buffer = new byte[BufferSize]; + + var dirs = new Dictionary(); + var files = new List(); + + var enumData = new NativeMethods.MFT_ENUM_DATA_V0 + { + StartFileReferenceNumber = 0, + LowUsn = 0, + HighUsn = long.MaxValue, + }; + + int inSize = Marshal.SizeOf(); + int recordHdrSz = Marshal.SizeOf(); + + while (!cancellation.IsCancellationRequested) + { + bool ok = NativeMethods.DeviceIoControl( + volume, + NativeMethods.FSCTL_ENUM_USN_DATA, + ref enumData, + inSize, + buffer, + BufferSize, + out int bytesReturned, + nint.Zero); + + // bytesReturned == 8 means only the next-FRN cursor came back (no records left). + // !ok covers ERROR_HANDLE_EOF and any other terminal error. + if (!ok || bytesReturned <= 8) break; + + // First 8 bytes of output = next StartFileReferenceNumber. + enumData.StartFileReferenceNumber = MemoryMarshal.Read(buffer); + + int offset = 8; + while (offset + recordHdrSz <= bytesReturned) + { + var rec = MemoryMarshal.Read(buffer.AsSpan(offset)); + + if (rec.RecordLength < recordHdrSz) break; // malformed — stop parsing this batch + + int nameStart = offset + rec.FileNameOffset; + int nameEnd = nameStart + rec.FileNameLength; + + if (nameEnd <= bytesReturned && + (rec.FileAttributes & NativeMethods.FILE_ATTRIBUTE_REPARSE_POINT) == 0) + { + var name = Encoding.Unicode.GetString(buffer, nameStart, rec.FileNameLength); + ulong frn = rec.FileReferenceNumber & NativeMethods.FRN_MFT_MASK; + ulong parentFrn = rec.ParentFileReferenceNumber; // masked in ResolvePath + + if ((rec.FileAttributes & NativeMethods.FILE_ATTRIBUTE_DIRECTORY) != 0) + dirs[frn] = new DirEntry(parentFrn, name); + else + files.Add(new FileRecord(frn, parentFrn, name, rec.TimeStamp)); + } + + offset += (int)rec.RecordLength; + } + } + + return (dirs, files); + } + + // --- Path resolution ---------------------------------------------------- + + /// + /// Walks up the parent FRN chain from until + /// it reaches , accumulating directory names. + /// Returns null if the file is not under root or the chain is broken. + /// + private static string? ResolvePath( + ulong fileParentFrn, string fileName, ulong rootFrn, string rootPath, + Dictionary dirs) + { + // Segments pushed in leaf-to-root order; Stack iterates top-to-bottom (root-to-leaf). + var segments = new Stack(); + ulong current = fileParentFrn & NativeMethods.FRN_MFT_MASK; + + for (int depth = 0; depth < 64; depth++) + { + if (current == rootFrn) + { + var path = rootPath; + foreach (var seg in segments) // top = nearest child of root + path = Path.Combine(path, seg); + return Path.Combine(path, fileName); + } + + if (!dirs.TryGetValue(current, out var dir)) + return null; // chain broken or file not under root + + segments.Push(dir.Name); + current = dir.ParentFrn & NativeMethods.FRN_MFT_MASK; + } + + return null; // exceeded max depth — cycle guard + } + + // --- Helpers ------------------------------------------------------------ + + /// + /// Returns the 48-bit MFT record number for . + /// GetFileInformationByHandle returns only the record number (no sequence bits). + /// + private static ulong GetRootFrn(string path) + { + using var handle = NativeMethods.CreateFileW( + path, + NativeMethods.GENERIC_READ, + NativeMethods.FILE_SHARE_READ | NativeMethods.FILE_SHARE_WRITE, + nint.Zero, + NativeMethods.OPEN_EXISTING, + NativeMethods.FILE_FLAG_BACKUP_SEMANTICS, + nint.Zero); + + if (handle.IsInvalid) + throw new IOException($"Cannot open directory handle for '{path}'."); + + if (!NativeMethods.GetFileInformationByHandle(handle, out var info)) + throw new IOException($"GetFileInformationByHandle failed for '{path}'."); + + return ((ulong)info.FileIndexHigh << 32) | info.FileIndexLow; + } + + private static SafeFileHandle OpenVolumeHandle(string volumePath) => + NativeMethods.CreateFileW( + volumePath, + NativeMethods.GENERIC_READ, + NativeMethods.FILE_SHARE_READ | NativeMethods.FILE_SHARE_WRITE, + nint.Zero, + NativeMethods.OPEN_EXISTING, + 0, + nint.Zero); + + // --- Fallback path (dev / non-elevated) --------------------------------- + + private static IEnumerable FallbackWalk(string root, CancellationToken cancellation) + { + var opts = new EnumerationOptions + { + IgnoreInaccessible = true, + RecurseSubdirectories = true, + AttributesToSkip = FileAttributes.ReparsePoint, + }; + + foreach (var fi in new DirectoryInfo(root).EnumerateFiles("*", opts)) + { + cancellation.ThrowIfCancellationRequested(); + yield return new FileEntry(fi.FullName, fi.Name, (ulong)fi.Length, fi.LastWriteTimeUtc); + } + } +} + +internal readonly record struct FileEntry( + string FullPath, + string FileName, + ulong SizeBytes, + DateTime ModifiedUtc); diff --git a/src/Files.SearchService/Watch/ChangeWatcher.cs b/src/Files.SearchService/Watch/ChangeWatcher.cs new file mode 100644 index 000000000000..967061cb2312 --- /dev/null +++ b/src/Files.SearchService/Watch/ChangeWatcher.cs @@ -0,0 +1,90 @@ +// Copyright (c) Files Community +// Licensed under the MIT License. + +using Files.SearchService.Index; + +namespace Files.SearchService.Watch; + +/// +/// Watches the indexed root for filesystem changes and applies them to +/// the index via . Uses +/// which wraps ReadDirectoryChangesW on Windows. +/// +internal sealed class ChangeWatcher : IDisposable +{ + private readonly FileSystemWatcher _watcher; + private readonly EventBatcher _batcher; + private readonly FileIndex _index; + + /// + /// Fired when the watcher's internal buffer overflows and events were lost. + /// The caller should stop the watcher, re-enumerate, and restart. + /// + public event Action? Overflow; + + public ChangeWatcher(string root, FileIndex index) + { + _index = index; + _batcher = new EventBatcher(ApplyBatch); + _watcher = new FileSystemWatcher(root) + { + IncludeSubdirectories = true, + NotifyFilter = + NotifyFilters.FileName | + NotifyFilters.DirectoryName | + NotifyFilters.LastWrite | + NotifyFilters.Size, + InternalBufferSize = 65536, + }; + + _watcher.Created += (_, e) => _batcher.Enqueue(new PendingChange(e.FullPath, ChangeKind.Upsert)); + _watcher.Changed += (_, e) => _batcher.Enqueue(new PendingChange(e.FullPath, ChangeKind.Upsert)); + _watcher.Deleted += (_, e) => _batcher.Enqueue(new PendingChange(e.FullPath, ChangeKind.Delete)); + _watcher.Renamed += (_, e) => + { + _batcher.Enqueue(new PendingChange(e.OldFullPath, ChangeKind.Delete)); + _batcher.Enqueue(new PendingChange(e.FullPath, ChangeKind.Upsert)); + }; + _watcher.Error += (_, e) => + { + var ex = e.GetException(); + if (ex is InternalBufferOverflowException) + Overflow?.Invoke(); + else + Console.Error.WriteLine($"[watcher] error: {ex.Message}"); + }; + } + + public void Start() => _watcher.EnableRaisingEvents = true; + public void Stop() => _watcher.EnableRaisingEvents = false; + + private void ApplyBatch(IReadOnlyList batch) + { + foreach (var change in batch) + { + if (change.Kind == ChangeKind.Delete) + { + _index.Delete(change.FullPath); + continue; + } + + try + { + var fi = new FileInfo(change.FullPath); + if (!fi.Exists || fi.Attributes.HasFlag(FileAttributes.ReparsePoint)) + continue; + if (fi.Attributes.HasFlag(FileAttributes.Directory)) + continue; + + _index.Upsert(fi.FullName, fi.Name, (ulong)fi.Length, fi.LastWriteTimeUtc); + } + catch (IOException) { } // Race: file deleted between event and stat. + } + } + + public void Dispose() + { + _watcher.Dispose(); + _batcher.Dispose(); + } +} diff --git a/src/Files.SearchService/Watch/EventBatcher.cs b/src/Files.SearchService/Watch/EventBatcher.cs new file mode 100644 index 000000000000..d146306e35c0 --- /dev/null +++ b/src/Files.SearchService/Watch/EventBatcher.cs @@ -0,0 +1,54 @@ +// Copyright (c) Files Community +// Licensed under the MIT License. + +namespace Files.SearchService.Watch; + +/// +/// Deduplicates and debounces filesystem events before applying them +/// to the index. Coalesces bursts (git checkout, zip extract) into a +/// single batch committed after a 250ms quiet window. +/// +internal sealed class EventBatcher : IDisposable +{ + private const int DebounceMs = 250; + + private readonly Action> _onBatch; + private readonly Dictionary _pending = new(StringComparer.OrdinalIgnoreCase); + private readonly Lock _lock = new(); + private Timer? _timer; + + public EventBatcher(Action> onBatch) => _onBatch = onBatch; + + public void Enqueue(PendingChange change) + { + lock (_lock) + { + // Last event for a given path wins — a delete after a create = delete. + _pending[change.FullPath] = change; + _timer?.Dispose(); + _timer = new Timer(_ => Flush(), null, DebounceMs, Timeout.Infinite); + } + } + + private void Flush() + { + List batch; + lock (_lock) + { + if (_pending.Count == 0) return; + batch = [.. _pending.Values]; + _pending.Clear(); + } + _onBatch(batch); + } + + public void Dispose() + { + _timer?.Dispose(); + Flush(); + } +} + +internal readonly record struct PendingChange(string FullPath, ChangeKind Kind); + +internal enum ChangeKind { Upsert, Delete } diff --git a/src/search-service/proto/files_search.proto b/src/Files.SearchService/proto/files_search.proto similarity index 100% rename from src/search-service/proto/files_search.proto rename to src/Files.SearchService/proto/files_search.proto diff --git a/src/search-service/.gitignore b/src/search-service/.gitignore deleted file mode 100644 index c17da7f586f2..000000000000 --- a/src/search-service/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -/target -Cargo.lock.bak diff --git a/src/search-service/Cargo.lock b/src/search-service/Cargo.lock deleted file mode 100644 index f4722aee7924..000000000000 --- a/src/search-service/Cargo.lock +++ /dev/null @@ -1,2501 +0,0 @@ -# This file is automatically @generated by Cargo. -# It is not intended for manual editing. -version = 4 - -[[package]] -name = "aho-corasick" -version = "1.1.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301" -dependencies = [ - "memchr", -] - -[[package]] -name = "allocator-api2" -version = "0.2.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" - -[[package]] -name = "anyhow" -version = "1.0.102" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c" - -[[package]] -name = "arc-swap" -version = "1.9.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a3a1fd6f75306b68087b831f025c712524bcb19aad54e557b1129cfa0a2b207" -dependencies = [ - "rustversion", -] - -[[package]] -name = "async-stream" -version = "0.3.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b5a71a6f37880a80d1d7f19efd781e4b5de42c88f0722cc13bcb6cc2cfe8476" -dependencies = [ - "async-stream-impl", - "futures-core", - "pin-project-lite", -] - -[[package]] -name = "async-stream-impl" -version = "0.3.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c7c24de15d275a1ecfd47a380fb4d5ec9bfe0933f309ed5e705b775596a3574d" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "async-trait" -version = "0.1.89" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "atomic-waker" -version = "1.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0" - -[[package]] -name = "autocfg" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" - -[[package]] -name = "axum" -version = "0.7.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "edca88bc138befd0323b20752846e6587272d3b03b0343c8ea28a6f819e6e71f" -dependencies = [ - "async-trait", - "axum-core", - "bytes", - "futures-util", - "http", - "http-body", - "http-body-util", - "itoa", - "matchit", - "memchr", - "mime", - "percent-encoding", - "pin-project-lite", - "rustversion", - "serde", - "sync_wrapper", - "tower 0.5.3", - "tower-layer", - "tower-service", -] - -[[package]] -name = "axum-core" -version = "0.4.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09f2bd6146b97ae3359fa0cc6d6b376d9539582c7b4220f041a33ec24c226199" -dependencies = [ - "async-trait", - "bytes", - "futures-util", - "http", - "http-body", - "http-body-util", - "mime", - "pin-project-lite", - "rustversion", - "sync_wrapper", - "tower-layer", - "tower-service", -] - -[[package]] -name = "base64" -version = "0.22.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" - -[[package]] -name = "bitflags" -version = "1.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" - -[[package]] -name = "bitflags" -version = "2.11.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c4512299f36f043ab09a583e57bceb5a5aab7a73db1805848e8fef3c9e8c78b3" - -[[package]] -name = "bitpacking" -version = "0.9.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "96a7139abd3d9cebf8cd6f920a389cf3dc9576172e32f4563f188cae3c3eb019" -dependencies = [ - "crunchy", -] - -[[package]] -name = "bumpalo" -version = "3.20.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d20789868f4b01b2f2caec9f5c4e0213b41e3e5702a50157d699ae31ced2fcb" - -[[package]] -name = "byteorder" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" - -[[package]] -name = "bytes" -version = "1.11.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e748733b7cbc798e1434b6ac524f0c1ff2ab456fe201501e6497c8417a4fc33" - -[[package]] -name = "cc" -version = "1.2.62" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1dce859f0832a7d088c4f1119888ab94ef4b5d6795d1ce05afb7fe159d79f98" -dependencies = [ - "find-msvc-tools", - "jobserver", - "libc", - "shlex", -] - -[[package]] -name = "census" -version = "0.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4f4c707c6a209cbe82d10abd08e1ea8995e9ea937d2550646e02798948992be0" - -[[package]] -name = "cfg-if" -version = "1.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" - -[[package]] -name = "crc32fast" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511" -dependencies = [ - "cfg-if", -] - -[[package]] -name = "crossbeam-channel" -version = "0.5.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "82b8f8f868b36967f9606790d1903570de9ceaf870a7bf9fbbd3016d636a2cb2" -dependencies = [ - "crossbeam-utils", -] - -[[package]] -name = "crossbeam-deque" -version = "0.8.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51" -dependencies = [ - "crossbeam-epoch", - "crossbeam-utils", -] - -[[package]] -name = "crossbeam-epoch" -version = "0.9.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" -dependencies = [ - "crossbeam-utils", -] - -[[package]] -name = "crossbeam-utils" -version = "0.8.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" - -[[package]] -name = "crunchy" -version = "0.2.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" - -[[package]] -name = "deranged" -version = "0.5.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7cd812cc2bc1d69d4764bd80df88b4317eaef9e773c75226407d9bc0876b211c" -dependencies = [ - "powerfmt", - "serde_core", -] - -[[package]] -name = "downcast-rs" -version = "1.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75b325c5dbd37f80359721ad39aca5a29fb04c89279657cffdda8736d0c0b9d2" - -[[package]] -name = "either" -version = "1.15.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" - -[[package]] -name = "equivalent" -version = "1.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" - -[[package]] -name = "errno" -version = "0.3.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" -dependencies = [ - "libc", - "windows-sys 0.61.2", -] - -[[package]] -name = "fastdivide" -version = "0.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9afc2bd4d5a73106dd53d10d73d3401c2f32730ba2c0b93ddb888a8983680471" - -[[package]] -name = "fastrand" -version = "2.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9f1f227452a390804cdb637b74a86990f2a7d7ba4b7d5693aac9b4dd6defd8d6" - -[[package]] -name = "files-search-service" -version = "0.1.0" -dependencies = [ - "anyhow", - "async-stream", - "notify", - "parking_lot", - "prost", - "protoc-bin-vendored", - "rayon", - "tantivy", - "tokio", - "tokio-stream", - "tonic", - "tonic-build", - "tracing", - "tracing-subscriber", - "walkdir", - "windows", -] - -[[package]] -name = "filetime" -version = "0.2.28" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2d5b2eef6fafbf69f877e55509ce5b11a760690ac9700a2921be067aa6afaef6" -dependencies = [ - "cfg-if", - "libc", -] - -[[package]] -name = "find-msvc-tools" -version = "0.1.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582" - -[[package]] -name = "fixedbitset" -version = "0.5.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d674e81391d1e1ab681a28d99df07927c6d4aa5b027d7da16ba32d1d21ecd99" - -[[package]] -name = "fnv" -version = "1.0.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" - -[[package]] -name = "foldhash" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" - -[[package]] -name = "fs4" -version = "0.8.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f7e180ac76c23b45e767bd7ae9579bc0bb458618c4bc71835926e098e61d15f8" -dependencies = [ - "rustix 0.38.44", - "windows-sys 0.52.0", -] - -[[package]] -name = "fsevent-sys" -version = "4.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "76ee7a02da4d231650c7cea31349b889be2f45ddb3ef3032d2ec8185f6313fd2" -dependencies = [ - "libc", -] - -[[package]] -name = "futures-channel" -version = "0.3.32" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "07bbe89c50d7a535e539b8c17bc0b49bdb77747034daa8087407d655f3f7cc1d" -dependencies = [ - "futures-core", -] - -[[package]] -name = "futures-core" -version = "0.3.32" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e3450815272ef58cec6d564423f6e755e25379b217b0bc688e295ba24df6b1d" - -[[package]] -name = "futures-sink" -version = "0.3.32" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c39754e157331b013978ec91992bde1ac089843443c49cbc7f46150b0fad0893" - -[[package]] -name = "futures-task" -version = "0.3.32" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "037711b3d59c33004d3856fbdc83b99d4ff37a24768fa1be9ce3538a1cde4393" - -[[package]] -name = "futures-util" -version = "0.3.32" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "389ca41296e6190b48053de0321d02a77f32f8a5d2461dd38762c0593805c6d6" -dependencies = [ - "futures-core", - "futures-task", - "pin-project-lite", - "slab", -] - -[[package]] -name = "getrandom" -version = "0.2.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ff2abc00be7fca6ebc474524697ae276ad847ad0a6b3faa4bcb027e9a4614ad0" -dependencies = [ - "cfg-if", - "libc", - "wasi", -] - -[[package]] -name = "getrandom" -version = "0.3.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd" -dependencies = [ - "cfg-if", - "libc", - "r-efi 5.3.0", - "wasip2", -] - -[[package]] -name = "getrandom" -version = "0.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0de51e6874e94e7bf76d726fc5d13ba782deca734ff60d5bb2fb2607c7406555" -dependencies = [ - "cfg-if", - "libc", - "r-efi 6.0.0", - "wasip2", - "wasip3", -] - -[[package]] -name = "h2" -version = "0.4.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "171fefbc92fe4a4de27e0698d6a5b392d6a0e333506bc49133760b3bcf948733" -dependencies = [ - "atomic-waker", - "bytes", - "fnv", - "futures-core", - "futures-sink", - "http", - "indexmap 2.14.0", - "slab", - "tokio", - "tokio-util", - "tracing", -] - -[[package]] -name = "hashbrown" -version = "0.12.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" - -[[package]] -name = "hashbrown" -version = "0.15.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" -dependencies = [ - "allocator-api2", - "equivalent", - "foldhash", -] - -[[package]] -name = "hashbrown" -version = "0.17.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed5909b6e89a2db4456e54cd5f673791d7eca6732202bbf2a9cc504fe2f9b84a" - -[[package]] -name = "heck" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" - -[[package]] -name = "hermit-abi" -version = "0.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c" - -[[package]] -name = "htmlescape" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e9025058dae765dee5070ec375f591e2ba14638c63feff74f13805a72e523163" - -[[package]] -name = "http" -version = "1.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3ba2a386d7f85a81f119ad7498ebe444d2e22c2af0b86b069416ace48b3311a" -dependencies = [ - "bytes", - "itoa", -] - -[[package]] -name = "http-body" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184" -dependencies = [ - "bytes", - "http", -] - -[[package]] -name = "http-body-util" -version = "0.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b021d93e26becf5dc7e1b75b1bed1fd93124b374ceb73f43d4d4eafec896a64a" -dependencies = [ - "bytes", - "futures-core", - "http", - "http-body", - "pin-project-lite", -] - -[[package]] -name = "httparse" -version = "1.10.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6dbf3de79e51f3d586ab4cb9d5c3e2c14aa28ed23d180cf89b4df0454a69cc87" - -[[package]] -name = "httpdate" -version = "1.0.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" - -[[package]] -name = "hyper" -version = "1.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6299f016b246a94207e63da54dbe807655bf9e00044f73ded42c3ac5305fbcca" -dependencies = [ - "atomic-waker", - "bytes", - "futures-channel", - "futures-core", - "h2", - "http", - "http-body", - "httparse", - "httpdate", - "itoa", - "pin-project-lite", - "smallvec", - "tokio", - "want", -] - -[[package]] -name = "hyper-timeout" -version = "0.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b90d566bffbce6a75bd8b09a05aa8c2cb1fabb6cb348f8840c9e4c90a0d83b0" -dependencies = [ - "hyper", - "hyper-util", - "pin-project-lite", - "tokio", - "tower-service", -] - -[[package]] -name = "hyper-util" -version = "0.1.20" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "96547c2556ec9d12fb1578c4eaf448b04993e7fb79cbaad930a656880a6bdfa0" -dependencies = [ - "bytes", - "futures-channel", - "futures-util", - "http", - "http-body", - "hyper", - "libc", - "pin-project-lite", - "socket2 0.6.3", - "tokio", - "tower-service", - "tracing", -] - -[[package]] -name = "id-arena" -version = "2.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3d3067d79b975e8844ca9eb072e16b31c3c1c36928edf9c6789548c524d0d954" - -[[package]] -name = "indexmap" -version = "1.9.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" -dependencies = [ - "autocfg", - "hashbrown 0.12.3", -] - -[[package]] -name = "indexmap" -version = "2.14.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d466e9454f08e4a911e14806c24e16fba1b4c121d1ea474396f396069cf949d9" -dependencies = [ - "equivalent", - "hashbrown 0.17.1", - "serde", - "serde_core", -] - -[[package]] -name = "inotify" -version = "0.9.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f8069d3ec154eb856955c1c0fbffefbf5f3c40a104ec912d4797314c1801abff" -dependencies = [ - "bitflags 1.3.2", - "inotify-sys", - "libc", -] - -[[package]] -name = "inotify-sys" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e05c02b5e89bff3b946cedeca278abc628fe811e604f027c45a8aa3cf793d0eb" -dependencies = [ - "libc", -] - -[[package]] -name = "instant" -version = "0.1.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e0242819d153cba4b4b05a5a8f2a7e9bbf97b6055b2a002b395c96b5ff3c0222" -dependencies = [ - "cfg-if", - "js-sys", - "wasm-bindgen", - "web-sys", -] - -[[package]] -name = "itertools" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569" -dependencies = [ - "either", -] - -[[package]] -name = "itertools" -version = "0.14.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b192c782037fadd9cfa75548310488aabdbf3d2da73885b31bd0abd03351285" -dependencies = [ - "either", -] - -[[package]] -name = "itoa" -version = "1.0.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f42a60cbdf9a97f5d2305f08a87dc4e09308d1276d28c869c684d7777685682" - -[[package]] -name = "jobserver" -version = "0.1.34" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9afb3de4395d6b3e67a780b6de64b51c978ecf11cb9a462c66be7d4ca9039d33" -dependencies = [ - "getrandom 0.3.4", - "libc", -] - -[[package]] -name = "js-sys" -version = "0.3.98" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "67df7112613f8bfd9150013a0314e196f4800d3201ae742489d999db2f979f08" -dependencies = [ - "cfg-if", - "futures-util", - "once_cell", - "wasm-bindgen", -] - -[[package]] -name = "kqueue" -version = "1.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eac30106d7dce88daf4a3fcb4879ea939476d5074a9b7ddd0fb97fa4bed5596a" -dependencies = [ - "kqueue-sys", - "libc", -] - -[[package]] -name = "kqueue-sys" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a7b65860415f949f23fa882e669f2dbd4a0f0eeb1acdd56790b30494afd7da2f" -dependencies = [ - "bitflags 2.11.1", - "libc", -] - -[[package]] -name = "lazy_static" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" - -[[package]] -name = "leb128fmt" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09edd9e8b54e49e587e4f6295a7d29c3ea94d469cb40ab8ca70b288248a81db2" - -[[package]] -name = "levenshtein_automata" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c2cdeb66e45e9f36bfad5bbdb4d2384e70936afbee843c6f6543f0c551ebb25" - -[[package]] -name = "libc" -version = "0.2.186" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68ab91017fe16c622486840e4c83c9a37afeff978bd239b5293d61ece587de66" - -[[package]] -name = "libm" -version = "0.2.16" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b6d2cec3eae94f9f509c767b45932f1ada8350c4bdb85af2fcab4a3c14807981" - -[[package]] -name = "linux-raw-sys" -version = "0.4.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d26c52dbd32dccf2d10cac7725f8eae5296885fb5703b261f7d0a0739ec807ab" - -[[package]] -name = "linux-raw-sys" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32a66949e030da00e8c7d4434b251670a91556f4144941d37452769c25d58a53" - -[[package]] -name = "lock_api" -version = "0.4.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "224399e74b87b5f3557511d98dff8b14089b3dadafcab6bb93eab67d3aace965" -dependencies = [ - "scopeguard", -] - -[[package]] -name = "log" -version = "0.4.29" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" - -[[package]] -name = "lru" -version = "0.12.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "234cf4f4a04dc1f57e24b96cc0cd600cf2af460d4161ac5ecdd0af8e1f3b2a38" -dependencies = [ - "hashbrown 0.15.5", -] - -[[package]] -name = "lz4_flex" -version = "0.11.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "373f5eceeeab7925e0c1098212f2fbc4d416adec9d35051a6ab251e824c1854a" - -[[package]] -name = "matchers" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d1525a2a28c7f4fa0fc98bb91ae755d1e2d1505079e05539e35bc876b5d65ae9" -dependencies = [ - "regex-automata", -] - -[[package]] -name = "matchit" -version = "0.7.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0e7465ac9959cc2b1404e8e2367b43684a6d13790fe23056cc8c6c5a6b7bcb94" - -[[package]] -name = "measure_time" -version = "0.8.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dbefd235b0aadd181626f281e1d684e116972988c14c264e42069d5e8a5775cc" -dependencies = [ - "instant", - "log", -] - -[[package]] -name = "memchr" -version = "2.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79" - -[[package]] -name = "memmap2" -version = "0.9.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "714098028fe011992e1c3962653c96b2d578c4b4bce9036e15ff220319b1e0e3" -dependencies = [ - "libc", -] - -[[package]] -name = "mime" -version = "0.3.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" - -[[package]] -name = "minimal-lexical" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" - -[[package]] -name = "mio" -version = "0.8.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4a650543ca06a924e8b371db273b2756685faae30f8487da1b56505a8f78b0c" -dependencies = [ - "libc", - "log", - "wasi", - "windows-sys 0.48.0", -] - -[[package]] -name = "mio" -version = "1.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "50b7e5b27aa02a74bac8c3f23f448f8d87ff11f92d3aac1a6ed369ee08cc56c1" -dependencies = [ - "libc", - "wasi", - "windows-sys 0.61.2", -] - -[[package]] -name = "multimap" -version = "0.10.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d87ecb2933e8aeadb3e3a02b828fed80a7528047e68b4f424523a0981a3a084" - -[[package]] -name = "murmurhash32" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2195bf6aa996a481483b29d62a7663eed3fe39600c460e323f8ff41e90bdd89b" - -[[package]] -name = "nom" -version = "7.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" -dependencies = [ - "memchr", - "minimal-lexical", -] - -[[package]] -name = "notify" -version = "6.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6205bd8bb1e454ad2e27422015fb5e4f2bcc7e08fa8f27058670d208324a4d2d" -dependencies = [ - "bitflags 2.11.1", - "crossbeam-channel", - "filetime", - "fsevent-sys", - "inotify", - "kqueue", - "libc", - "log", - "mio 0.8.11", - "walkdir", - "windows-sys 0.48.0", -] - -[[package]] -name = "nu-ansi-term" -version = "0.50.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5" -dependencies = [ - "windows-sys 0.61.2", -] - -[[package]] -name = "num-conv" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c6673768db2d862beb9b39a78fdcb1a69439615d5794a1be50caa9bc92c81967" - -[[package]] -name = "num-traits" -version = "0.2.19" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" -dependencies = [ - "autocfg", - "libm", -] - -[[package]] -name = "num_cpus" -version = "1.17.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "91df4bbde75afed763b708b7eee1e8e7651e02d97f6d5dd763e89367e957b23b" -dependencies = [ - "hermit-abi", - "libc", -] - -[[package]] -name = "once_cell" -version = "1.21.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9f7c3e4beb33f85d45ae3e3a1792185706c8e16d043238c593331cc7cd313b50" - -[[package]] -name = "oneshot" -version = "0.1.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "269bca4c2591a28585d6bf10d9ed0332b7d76900a1b02bec41bdc3a2cdcda107" - -[[package]] -name = "ownedbytes" -version = "0.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3a059efb063b8f425b948e042e6b9bd85edfe60e913630ed727b23e2dfcc558" -dependencies = [ - "stable_deref_trait", -] - -[[package]] -name = "parking_lot" -version = "0.12.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "93857453250e3077bd71ff98b6a65ea6621a19bb0f559a85248955ac12c45a1a" -dependencies = [ - "lock_api", - "parking_lot_core", -] - -[[package]] -name = "parking_lot_core" -version = "0.9.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2621685985a2ebf1c516881c026032ac7deafcda1a2c9b7850dc81e3dfcb64c1" -dependencies = [ - "cfg-if", - "libc", - "redox_syscall", - "smallvec", - "windows-link", -] - -[[package]] -name = "percent-encoding" -version = "2.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220" - -[[package]] -name = "petgraph" -version = "0.7.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3672b37090dbd86368a4145bc067582552b29c27377cad4e0a306c97f9bd7772" -dependencies = [ - "fixedbitset", - "indexmap 2.14.0", -] - -[[package]] -name = "pin-project" -version = "1.1.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cbf0d9e68100b3a7989b4901972f265cd542e560a3a8a724e1e20322f4d06ce9" -dependencies = [ - "pin-project-internal", -] - -[[package]] -name = "pin-project-internal" -version = "1.1.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a990e22f43e84855daf260dded30524ef4a9021cc7541c26540500a50b624389" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "pin-project-lite" -version = "0.2.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a89322df9ebe1c1578d689c92318e070967d1042b512afbe49518723f4e6d5cd" - -[[package]] -name = "pkg-config" -version = "0.3.33" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "19f132c84eca552bf34cab8ec81f1c1dcc229b811638f9d283dceabe58c5569e" - -[[package]] -name = "powerfmt" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" - -[[package]] -name = "ppv-lite86" -version = "0.2.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" -dependencies = [ - "zerocopy", -] - -[[package]] -name = "prettyplease" -version = "0.2.37" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b" -dependencies = [ - "proc-macro2", - "syn", -] - -[[package]] -name = "proc-macro2" -version = "1.0.106" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934" -dependencies = [ - "unicode-ident", -] - -[[package]] -name = "prost" -version = "0.13.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2796faa41db3ec313a31f7624d9286acf277b52de526150b7e69f3debf891ee5" -dependencies = [ - "bytes", - "prost-derive", -] - -[[package]] -name = "prost-build" -version = "0.13.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be769465445e8c1474e9c5dac2018218498557af32d9ed057325ec9a41ae81bf" -dependencies = [ - "heck", - "itertools 0.14.0", - "log", - "multimap", - "once_cell", - "petgraph", - "prettyplease", - "prost", - "prost-types", - "regex", - "syn", - "tempfile", -] - -[[package]] -name = "prost-derive" -version = "0.13.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a56d757972c98b346a9b766e3f02746cde6dd1cd1d1d563472929fdd74bec4d" -dependencies = [ - "anyhow", - "itertools 0.14.0", - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "prost-types" -version = "0.13.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "52c2c1bf36ddb1a1c396b3601a3cec27c2462e45f07c386894ec3ccf5332bd16" -dependencies = [ - "prost", -] - -[[package]] -name = "protoc-bin-vendored" -version = "3.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d1c381df33c98266b5f08186583660090a4ffa0889e76c7e9a5e175f645a67fa" -dependencies = [ - "protoc-bin-vendored-linux-aarch_64", - "protoc-bin-vendored-linux-ppcle_64", - "protoc-bin-vendored-linux-s390_64", - "protoc-bin-vendored-linux-x86_32", - "protoc-bin-vendored-linux-x86_64", - "protoc-bin-vendored-macos-aarch_64", - "protoc-bin-vendored-macos-x86_64", - "protoc-bin-vendored-win32", -] - -[[package]] -name = "protoc-bin-vendored-linux-aarch_64" -version = "3.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c350df4d49b5b9e3ca79f7e646fde2377b199e13cfa87320308397e1f37e1a4c" - -[[package]] -name = "protoc-bin-vendored-linux-ppcle_64" -version = "3.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a55a63e6c7244f19b5c6393f025017eb5d793fd5467823a099740a7a4222440c" - -[[package]] -name = "protoc-bin-vendored-linux-s390_64" -version = "3.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1dba5565db4288e935d5330a07c264a4ee8e4a5b4a4e6f4e83fad824cc32f3b0" - -[[package]] -name = "protoc-bin-vendored-linux-x86_32" -version = "3.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8854774b24ee28b7868cd71dccaae8e02a2365e67a4a87a6cd11ee6cdbdf9cf5" - -[[package]] -name = "protoc-bin-vendored-linux-x86_64" -version = "3.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b38b07546580df720fa464ce124c4b03630a6fb83e05c336fea2a241df7e5d78" - -[[package]] -name = "protoc-bin-vendored-macos-aarch_64" -version = "3.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89278a9926ce312e51f1d999fee8825d324d603213344a9a706daa009f1d8092" - -[[package]] -name = "protoc-bin-vendored-macos-x86_64" -version = "3.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "81745feda7ccfb9471d7a4de888f0652e806d5795b61480605d4943176299756" - -[[package]] -name = "protoc-bin-vendored-win32" -version = "3.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "95067976aca6421a523e491fce939a3e65249bac4b977adee0ee9771568e8aa3" - -[[package]] -name = "quote" -version = "1.0.45" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924" -dependencies = [ - "proc-macro2", -] - -[[package]] -name = "r-efi" -version = "5.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" - -[[package]] -name = "r-efi" -version = "6.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f8dcc9c7d52a811697d2151c701e0d08956f92b0e24136cf4cf27b57a6a0d9bf" - -[[package]] -name = "rand" -version = "0.8.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ca0ecfa931c29007047d1bc58e623ab12e5590e8c7cc53200d5202b69266d8a" -dependencies = [ - "libc", - "rand_chacha", - "rand_core", -] - -[[package]] -name = "rand_chacha" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" -dependencies = [ - "ppv-lite86", - "rand_core", -] - -[[package]] -name = "rand_core" -version = "0.6.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" -dependencies = [ - "getrandom 0.2.17", -] - -[[package]] -name = "rand_distr" -version = "0.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32cb0b9bc82b0a0876c2dd994a7e7a2683d3e7390ca40e6886785ef0c7e3ee31" -dependencies = [ - "num-traits", - "rand", -] - -[[package]] -name = "rayon" -version = "1.12.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fb39b166781f92d482534ef4b4b1b2568f42613b53e5b6c160e24cfbfa30926d" -dependencies = [ - "either", - "rayon-core", -] - -[[package]] -name = "rayon-core" -version = "1.13.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22e18b0f0062d30d4230b2e85ff77fdfe4326feb054b9783a3460d8435c8ab91" -dependencies = [ - "crossbeam-deque", - "crossbeam-utils", -] - -[[package]] -name = "redox_syscall" -version = "0.5.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d" -dependencies = [ - "bitflags 2.11.1", -] - -[[package]] -name = "regex" -version = "1.12.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e10754a14b9137dd7b1e3e5b0493cc9171fdd105e0ab477f51b72e7f3ac0e276" -dependencies = [ - "aho-corasick", - "memchr", - "regex-automata", - "regex-syntax", -] - -[[package]] -name = "regex-automata" -version = "0.4.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f" -dependencies = [ - "aho-corasick", - "memchr", - "regex-syntax", -] - -[[package]] -name = "regex-syntax" -version = "0.8.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a" - -[[package]] -name = "rust-stemmers" -version = "1.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e46a2036019fdb888131db7a4c847a1063a7493f971ed94ea82c67eada63ca54" -dependencies = [ - "serde", - "serde_derive", -] - -[[package]] -name = "rustc-hash" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" - -[[package]] -name = "rustix" -version = "0.38.44" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fdb5bc1ae2baa591800df16c9ca78619bf65c0488b41b96ccec5d11220d8c154" -dependencies = [ - "bitflags 2.11.1", - "errno", - "libc", - "linux-raw-sys 0.4.15", - "windows-sys 0.52.0", -] - -[[package]] -name = "rustix" -version = "1.1.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b6fe4565b9518b83ef4f91bb47ce29620ca828bd32cb7e408f0062e9930ba190" -dependencies = [ - "bitflags 2.11.1", - "errno", - "libc", - "linux-raw-sys 0.12.1", - "windows-sys 0.61.2", -] - -[[package]] -name = "rustversion" -version = "1.0.22" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" - -[[package]] -name = "same-file" -version = "1.0.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" -dependencies = [ - "winapi-util", -] - -[[package]] -name = "scopeguard" -version = "1.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" - -[[package]] -name = "semver" -version = "1.0.28" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a7852d02fc848982e0c167ef163aaff9cd91dc640ba85e263cb1ce46fae51cd" - -[[package]] -name = "serde" -version = "1.0.228" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" -dependencies = [ - "serde_core", - "serde_derive", -] - -[[package]] -name = "serde_core" -version = "1.0.228" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" -dependencies = [ - "serde_derive", -] - -[[package]] -name = "serde_derive" -version = "1.0.228" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "serde_json" -version = "1.0.149" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86" -dependencies = [ - "itoa", - "memchr", - "serde", - "serde_core", - "zmij", -] - -[[package]] -name = "sharded-slab" -version = "0.1.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f40ca3c46823713e0d4209592e8d6e826aa57e928f09752619fc696c499637f6" -dependencies = [ - "lazy_static", -] - -[[package]] -name = "shlex" -version = "1.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" - -[[package]] -name = "signal-hook-registry" -version = "1.4.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c4db69cba1110affc0e9f7bcd48bbf87b3f4fc7c61fc9155afd4c469eb3d6c1b" -dependencies = [ - "errno", - "libc", -] - -[[package]] -name = "sketches-ddsketch" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85636c14b73d81f541e525f585c0a2109e6744e1565b5c1668e31c70c10ed65c" -dependencies = [ - "serde", -] - -[[package]] -name = "slab" -version = "0.4.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c790de23124f9ab44544d7ac05d60440adc586479ce501c1d6d7da3cd8c9cf5" - -[[package]] -name = "smallvec" -version = "1.15.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" - -[[package]] -name = "socket2" -version = "0.5.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e22376abed350d73dd1cd119b57ffccad95b4e585a7cda43e286245ce23c0678" -dependencies = [ - "libc", - "windows-sys 0.52.0", -] - -[[package]] -name = "socket2" -version = "0.6.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3a766e1110788c36f4fa1c2b71b387a7815aa65f88ce0229841826633d93723e" -dependencies = [ - "libc", - "windows-sys 0.61.2", -] - -[[package]] -name = "stable_deref_trait" -version = "1.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596" - -[[package]] -name = "syn" -version = "2.0.117" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99" -dependencies = [ - "proc-macro2", - "quote", - "unicode-ident", -] - -[[package]] -name = "sync_wrapper" -version = "1.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0bf256ce5efdfa370213c1dabab5935a12e49f2c58d15e9eac2870d3b4f27263" - -[[package]] -name = "tantivy" -version = "0.22.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "96599ea6fccd844fc833fed21d2eecac2e6a7c1afd9e044057391d78b1feb141" -dependencies = [ - "aho-corasick", - "arc-swap", - "base64", - "bitpacking", - "byteorder", - "census", - "crc32fast", - "crossbeam-channel", - "downcast-rs", - "fastdivide", - "fnv", - "fs4", - "htmlescape", - "itertools 0.12.1", - "levenshtein_automata", - "log", - "lru", - "lz4_flex", - "measure_time", - "memmap2", - "num_cpus", - "once_cell", - "oneshot", - "rayon", - "regex", - "rust-stemmers", - "rustc-hash", - "serde", - "serde_json", - "sketches-ddsketch", - "smallvec", - "tantivy-bitpacker", - "tantivy-columnar", - "tantivy-common", - "tantivy-fst", - "tantivy-query-grammar", - "tantivy-stacker", - "tantivy-tokenizer-api", - "tempfile", - "thiserror", - "time", - "uuid", - "winapi", -] - -[[package]] -name = "tantivy-bitpacker" -version = "0.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "284899c2325d6832203ac6ff5891b297fc5239c3dc754c5bc1977855b23c10df" -dependencies = [ - "bitpacking", -] - -[[package]] -name = "tantivy-columnar" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "12722224ffbe346c7fec3275c699e508fd0d4710e629e933d5736ec524a1f44e" -dependencies = [ - "downcast-rs", - "fastdivide", - "itertools 0.12.1", - "serde", - "tantivy-bitpacker", - "tantivy-common", - "tantivy-sstable", - "tantivy-stacker", -] - -[[package]] -name = "tantivy-common" -version = "0.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8019e3cabcfd20a1380b491e13ff42f57bb38bf97c3d5fa5c07e50816e0621f4" -dependencies = [ - "async-trait", - "byteorder", - "ownedbytes", - "serde", - "time", -] - -[[package]] -name = "tantivy-fst" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d60769b80ad7953d8a7b2c70cdfe722bbcdcac6bccc8ac934c40c034d866fc18" -dependencies = [ - "byteorder", - "regex-syntax", - "utf8-ranges", -] - -[[package]] -name = "tantivy-query-grammar" -version = "0.22.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "847434d4af57b32e309f4ab1b4f1707a6c566656264caa427ff4285c4d9d0b82" -dependencies = [ - "nom", -] - -[[package]] -name = "tantivy-sstable" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c69578242e8e9fc989119f522ba5b49a38ac20f576fc778035b96cc94f41f98e" -dependencies = [ - "tantivy-bitpacker", - "tantivy-common", - "tantivy-fst", - "zstd", -] - -[[package]] -name = "tantivy-stacker" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c56d6ff5591fc332739b3ce7035b57995a3ce29a93ffd6012660e0949c956ea8" -dependencies = [ - "murmurhash32", - "rand_distr", - "tantivy-common", -] - -[[package]] -name = "tantivy-tokenizer-api" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2a0dcade25819a89cfe6f17d932c9cedff11989936bf6dd4f336d50392053b04" -dependencies = [ - "serde", -] - -[[package]] -name = "tempfile" -version = "3.27.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32497e9a4c7b38532efcdebeef879707aa9f794296a4f0244f6f69e9bc8574bd" -dependencies = [ - "fastrand", - "getrandom 0.4.2", - "once_cell", - "rustix 1.1.4", - "windows-sys 0.61.2", -] - -[[package]] -name = "thiserror" -version = "1.0.69" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52" -dependencies = [ - "thiserror-impl", -] - -[[package]] -name = "thiserror-impl" -version = "1.0.69" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "thread_local" -version = "1.1.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f60246a4944f24f6e018aa17cdeffb7818b76356965d03b07d6a9886e8962185" -dependencies = [ - "cfg-if", -] - -[[package]] -name = "time" -version = "0.3.47" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "743bd48c283afc0388f9b8827b976905fb217ad9e647fae3a379a9283c4def2c" -dependencies = [ - "deranged", - "itoa", - "num-conv", - "powerfmt", - "serde_core", - "time-core", - "time-macros", -] - -[[package]] -name = "time-core" -version = "0.1.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7694e1cfe791f8d31026952abf09c69ca6f6fa4e1a1229e18988f06a04a12dca" - -[[package]] -name = "time-macros" -version = "0.2.27" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2e70e4c5a0e0a8a4823ad65dfe1a6930e4f4d756dcd9dd7939022b5e8c501215" -dependencies = [ - "num-conv", - "time-core", -] - -[[package]] -name = "tokio" -version = "1.52.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8fc7f01b389ac15039e4dc9531aa973a135d7a4135281b12d7c1bc79fd57fffe" -dependencies = [ - "bytes", - "libc", - "mio 1.2.0", - "pin-project-lite", - "signal-hook-registry", - "socket2 0.6.3", - "tokio-macros", - "windows-sys 0.61.2", -] - -[[package]] -name = "tokio-macros" -version = "2.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "385a6cb71ab9ab790c5fe8d67f1645e6c450a7ce006a33de03daa956cf70a496" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "tokio-stream" -version = "0.1.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32da49809aab5c3bc678af03902d4ccddea2a87d028d86392a4b1560c6906c70" -dependencies = [ - "futures-core", - "pin-project-lite", - "tokio", -] - -[[package]] -name = "tokio-util" -version = "0.7.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ae9cec805b01e8fc3fd2fe289f89149a9b66dd16786abd8b19cfa7b48cb0098" -dependencies = [ - "bytes", - "futures-core", - "futures-sink", - "pin-project-lite", - "tokio", -] - -[[package]] -name = "tonic" -version = "0.12.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "877c5b330756d856ffcc4553ab34a5684481ade925ecc54bcd1bf02b1d0d4d52" -dependencies = [ - "async-stream", - "async-trait", - "axum", - "base64", - "bytes", - "h2", - "http", - "http-body", - "http-body-util", - "hyper", - "hyper-timeout", - "hyper-util", - "percent-encoding", - "pin-project", - "prost", - "socket2 0.5.10", - "tokio", - "tokio-stream", - "tower 0.4.13", - "tower-layer", - "tower-service", - "tracing", -] - -[[package]] -name = "tonic-build" -version = "0.12.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9557ce109ea773b399c9b9e5dca39294110b74f1f342cb347a80d1fce8c26a11" -dependencies = [ - "prettyplease", - "proc-macro2", - "prost-build", - "prost-types", - "quote", - "syn", -] - -[[package]] -name = "tower" -version = "0.4.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8fa9be0de6cf49e536ce1851f987bd21a43b771b09473c3549a6c853db37c1c" -dependencies = [ - "futures-core", - "futures-util", - "indexmap 1.9.3", - "pin-project", - "pin-project-lite", - "rand", - "slab", - "tokio", - "tokio-util", - "tower-layer", - "tower-service", - "tracing", -] - -[[package]] -name = "tower" -version = "0.5.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ebe5ef63511595f1344e2d5cfa636d973292adc0eec1f0ad45fae9f0851ab1d4" -dependencies = [ - "futures-core", - "futures-util", - "pin-project-lite", - "sync_wrapper", - "tower-layer", - "tower-service", -] - -[[package]] -name = "tower-layer" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "121c2a6cda46980bb0fcd1647ffaf6cd3fc79a013de288782836f6df9c48780e" - -[[package]] -name = "tower-service" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3" - -[[package]] -name = "tracing" -version = "0.1.44" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "63e71662fa4b2a2c3a26f570f037eb95bb1f85397f3cd8076caed2f026a6d100" -dependencies = [ - "pin-project-lite", - "tracing-attributes", - "tracing-core", -] - -[[package]] -name = "tracing-attributes" -version = "0.1.31" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "tracing-core" -version = "0.1.36" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "db97caf9d906fbde555dd62fa95ddba9eecfd14cb388e4f491a66d74cd5fb79a" -dependencies = [ - "once_cell", - "valuable", -] - -[[package]] -name = "tracing-log" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3" -dependencies = [ - "log", - "once_cell", - "tracing-core", -] - -[[package]] -name = "tracing-subscriber" -version = "0.3.23" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cb7f578e5945fb242538965c2d0b04418d38ec25c79d160cd279bf0731c8d319" -dependencies = [ - "matchers", - "nu-ansi-term", - "once_cell", - "regex-automata", - "sharded-slab", - "smallvec", - "thread_local", - "tracing", - "tracing-core", - "tracing-log", -] - -[[package]] -name = "try-lock" -version = "0.2.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" - -[[package]] -name = "unicode-ident" -version = "1.0.24" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75" - -[[package]] -name = "unicode-xid" -version = "0.2.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853" - -[[package]] -name = "utf8-ranges" -version = "1.0.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7fcfc827f90e53a02eaef5e535ee14266c1d569214c6aa70133a624d8a3164ba" - -[[package]] -name = "uuid" -version = "1.23.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ddd74a9687298c6858e9b88ec8935ec45d22e8fd5e6394fa1bd4e99a87789c76" -dependencies = [ - "getrandom 0.4.2", - "js-sys", - "serde_core", - "wasm-bindgen", -] - -[[package]] -name = "valuable" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65" - -[[package]] -name = "walkdir" -version = "2.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" -dependencies = [ - "same-file", - "winapi-util", -] - -[[package]] -name = "want" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfa7760aed19e106de2c7c0b581b509f2f25d3dacaf737cb82ac61bc6d760b0e" -dependencies = [ - "try-lock", -] - -[[package]] -name = "wasi" -version = "0.11.1+wasi-snapshot-preview1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" - -[[package]] -name = "wasip2" -version = "1.0.3+wasi-0.2.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "20064672db26d7cdc89c7798c48a0fdfac8213434a1186e5ef29fd560ae223d6" -dependencies = [ - "wit-bindgen 0.57.1", -] - -[[package]] -name = "wasip3" -version = "0.4.0+wasi-0.3.0-rc-2026-01-06" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5428f8bf88ea5ddc08faddef2ac4a67e390b88186c703ce6dbd955e1c145aca5" -dependencies = [ - "wit-bindgen 0.51.0", -] - -[[package]] -name = "wasm-bindgen" -version = "0.2.121" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49ace1d07c165b0864824eee619580c4689389afa9dc9ed3a4c75040d82e6790" -dependencies = [ - "cfg-if", - "once_cell", - "rustversion", - "wasm-bindgen-macro", - "wasm-bindgen-shared", -] - -[[package]] -name = "wasm-bindgen-macro" -version = "0.2.121" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e68e6f4afd367a562002c05637acb8578ff2dea1943df76afb9e83d177c8578" -dependencies = [ - "quote", - "wasm-bindgen-macro-support", -] - -[[package]] -name = "wasm-bindgen-macro-support" -version = "0.2.121" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d95a9ec35c64b2a7cb35d3fead40c4238d0940c86d107136999567a4703259f2" -dependencies = [ - "bumpalo", - "proc-macro2", - "quote", - "syn", - "wasm-bindgen-shared", -] - -[[package]] -name = "wasm-bindgen-shared" -version = "0.2.121" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c4e0100b01e9f0d03189a92b96772a1fb998639d981193d7dbab487302513441" -dependencies = [ - "unicode-ident", -] - -[[package]] -name = "wasm-encoder" -version = "0.244.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "990065f2fe63003fe337b932cfb5e3b80e0b4d0f5ff650e6985b1048f62c8319" -dependencies = [ - "leb128fmt", - "wasmparser", -] - -[[package]] -name = "wasm-metadata" -version = "0.244.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bb0e353e6a2fbdc176932bbaab493762eb1255a7900fe0fea1a2f96c296cc909" -dependencies = [ - "anyhow", - "indexmap 2.14.0", - "wasm-encoder", - "wasmparser", -] - -[[package]] -name = "wasmparser" -version = "0.244.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "47b807c72e1bac69382b3a6fb3dbe8ea4c0ed87ff5629b8685ae6b9a611028fe" -dependencies = [ - "bitflags 2.11.1", - "hashbrown 0.15.5", - "indexmap 2.14.0", - "semver", -] - -[[package]] -name = "web-sys" -version = "0.3.98" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4b572dff8bcf38bad0fa19729c89bb5748b2b9b1d8be70cf90df697e3a8f32aa" -dependencies = [ - "js-sys", - "wasm-bindgen", -] - -[[package]] -name = "winapi" -version = "0.3.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" -dependencies = [ - "winapi-i686-pc-windows-gnu", - "winapi-x86_64-pc-windows-gnu", -] - -[[package]] -name = "winapi-i686-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" - -[[package]] -name = "winapi-util" -version = "0.1.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" -dependencies = [ - "windows-sys 0.61.2", -] - -[[package]] -name = "winapi-x86_64-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" - -[[package]] -name = "windows" -version = "0.58.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dd04d41d93c4992d421894c18c8b43496aa748dd4c081bac0dc93eb0489272b6" -dependencies = [ - "windows-core", - "windows-targets 0.52.6", -] - -[[package]] -name = "windows-core" -version = "0.58.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ba6d44ec8c2591c134257ce647b7ea6b20335bf6379a27dac5f1641fcf59f99" -dependencies = [ - "windows-implement", - "windows-interface", - "windows-result", - "windows-strings", - "windows-targets 0.52.6", -] - -[[package]] -name = "windows-implement" -version = "0.58.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2bbd5b46c938e506ecbce286b6628a02171d56153ba733b6c741fc627ec9579b" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "windows-interface" -version = "0.58.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "053c4c462dc91d3b1504c6fe5a726dd15e216ba718e84a0e46a88fbe5ded3515" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "windows-link" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" - -[[package]] -name = "windows-result" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d1043d8214f791817bab27572aaa8af63732e11bf84aa21a45a78d6c317ae0e" -dependencies = [ - "windows-targets 0.52.6", -] - -[[package]] -name = "windows-strings" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4cd9b125c486025df0eabcb585e62173c6c9eddcec5d117d3b6e8c30e2ee4d10" -dependencies = [ - "windows-result", - "windows-targets 0.52.6", -] - -[[package]] -name = "windows-sys" -version = "0.48.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" -dependencies = [ - "windows-targets 0.48.5", -] - -[[package]] -name = "windows-sys" -version = "0.52.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" -dependencies = [ - "windows-targets 0.52.6", -] - -[[package]] -name = "windows-sys" -version = "0.61.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc" -dependencies = [ - "windows-link", -] - -[[package]] -name = "windows-targets" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" -dependencies = [ - "windows_aarch64_gnullvm 0.48.5", - "windows_aarch64_msvc 0.48.5", - "windows_i686_gnu 0.48.5", - "windows_i686_msvc 0.48.5", - "windows_x86_64_gnu 0.48.5", - "windows_x86_64_gnullvm 0.48.5", - "windows_x86_64_msvc 0.48.5", -] - -[[package]] -name = "windows-targets" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" -dependencies = [ - "windows_aarch64_gnullvm 0.52.6", - "windows_aarch64_msvc 0.52.6", - "windows_i686_gnu 0.52.6", - "windows_i686_gnullvm", - "windows_i686_msvc 0.52.6", - "windows_x86_64_gnu 0.52.6", - "windows_x86_64_gnullvm 0.52.6", - "windows_x86_64_msvc 0.52.6", -] - -[[package]] -name = "windows_aarch64_gnullvm" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" - -[[package]] -name = "windows_aarch64_gnullvm" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" - -[[package]] -name = "windows_aarch64_msvc" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" - -[[package]] -name = "windows_aarch64_msvc" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" - -[[package]] -name = "windows_i686_gnu" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" - -[[package]] -name = "windows_i686_gnu" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" - -[[package]] -name = "windows_i686_gnullvm" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" - -[[package]] -name = "windows_i686_msvc" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" - -[[package]] -name = "windows_i686_msvc" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" - -[[package]] -name = "windows_x86_64_gnu" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" - -[[package]] -name = "windows_x86_64_gnu" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" - -[[package]] -name = "windows_x86_64_gnullvm" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" - -[[package]] -name = "windows_x86_64_gnullvm" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" - -[[package]] -name = "windows_x86_64_msvc" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" - -[[package]] -name = "windows_x86_64_msvc" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" - -[[package]] -name = "wit-bindgen" -version = "0.51.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d7249219f66ced02969388cf2bb044a09756a083d0fab1e566056b04d9fbcaa5" -dependencies = [ - "wit-bindgen-rust-macro", -] - -[[package]] -name = "wit-bindgen" -version = "0.57.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1ebf944e87a7c253233ad6766e082e3cd714b5d03812acc24c318f549614536e" - -[[package]] -name = "wit-bindgen-core" -version = "0.51.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ea61de684c3ea68cb082b7a88508a8b27fcc8b797d738bfc99a82facf1d752dc" -dependencies = [ - "anyhow", - "heck", - "wit-parser", -] - -[[package]] -name = "wit-bindgen-rust" -version = "0.51.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b7c566e0f4b284dd6561c786d9cb0142da491f46a9fbed79ea69cdad5db17f21" -dependencies = [ - "anyhow", - "heck", - "indexmap 2.14.0", - "prettyplease", - "syn", - "wasm-metadata", - "wit-bindgen-core", - "wit-component", -] - -[[package]] -name = "wit-bindgen-rust-macro" -version = "0.51.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c0f9bfd77e6a48eccf51359e3ae77140a7f50b1e2ebfe62422d8afdaffab17a" -dependencies = [ - "anyhow", - "prettyplease", - "proc-macro2", - "quote", - "syn", - "wit-bindgen-core", - "wit-bindgen-rust", -] - -[[package]] -name = "wit-component" -version = "0.244.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d66ea20e9553b30172b5e831994e35fbde2d165325bec84fc43dbf6f4eb9cb2" -dependencies = [ - "anyhow", - "bitflags 2.11.1", - "indexmap 2.14.0", - "log", - "serde", - "serde_derive", - "serde_json", - "wasm-encoder", - "wasm-metadata", - "wasmparser", - "wit-parser", -] - -[[package]] -name = "wit-parser" -version = "0.244.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ecc8ac4bc1dc3381b7f59c34f00b67e18f910c2c0f50015669dde7def656a736" -dependencies = [ - "anyhow", - "id-arena", - "indexmap 2.14.0", - "log", - "semver", - "serde", - "serde_derive", - "serde_json", - "unicode-xid", - "wasmparser", -] - -[[package]] -name = "zerocopy" -version = "0.8.48" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eed437bf9d6692032087e337407a86f04cd8d6a16a37199ed57949d415bd68e9" -dependencies = [ - "zerocopy-derive", -] - -[[package]] -name = "zerocopy-derive" -version = "0.8.48" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "70e3cd084b1788766f53af483dd21f93881ff30d7320490ec3ef7526d203bad4" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "zmij" -version = "1.0.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa" - -[[package]] -name = "zstd" -version = "0.13.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e91ee311a569c327171651566e07972200e76fcfe2242a4fa446149a3881c08a" -dependencies = [ - "zstd-safe", -] - -[[package]] -name = "zstd-safe" -version = "7.2.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f49c4d5f0abb602a93fb8736af2a4f4dd9512e36f7f570d66e65ff867ed3b9d" -dependencies = [ - "zstd-sys", -] - -[[package]] -name = "zstd-sys" -version = "2.0.16+zstd.1.5.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "91e19ebc2adc8f83e43039e79776e3fda8ca919132d68a1fed6a5faca2683748" -dependencies = [ - "cc", - "pkg-config", -] diff --git a/src/search-service/Cargo.toml b/src/search-service/Cargo.toml deleted file mode 100644 index e877dbfe3302..000000000000 --- a/src/search-service/Cargo.toml +++ /dev/null @@ -1,42 +0,0 @@ -[package] -name = "files-search-service" -version = "0.1.0" -edition = "2024" -publish = false - -[[bin]] -name = "files-search-service" -path = "src/main.rs" - -[dependencies] -tokio = { version = "1.40", features = ["macros", "rt-multi-thread", "signal", "net"] } -tokio-stream = "0.1" -tonic = "0.12" -prost = "0.13" -tracing = "0.1" -tracing-subscriber = { version = "0.3", features = ["env-filter"] } -walkdir = "2" -async-stream = "0.3" -tantivy = "0.22" -anyhow = "1" -rayon = "1.10" -notify = "6" -parking_lot = "0.12" - -[target.'cfg(windows)'.dependencies] -windows = { version = "0.58", features = [ - "Win32_Foundation", - "Win32_Storage_FileSystem", - "Win32_System_WindowsProgramming", - "Win32_System_Threading", - "Win32_System_Power", - "Win32_UI_Shell", -] } - -[build-dependencies] -tonic-build = "0.12" -protoc-bin-vendored = "3" - -[profile.release] -lto = "thin" -codegen-units = 1 diff --git a/src/search-service/build.rs b/src/search-service/build.rs deleted file mode 100644 index c2610227b021..000000000000 --- a/src/search-service/build.rs +++ /dev/null @@ -1,11 +0,0 @@ -fn main() -> Result<(), Box> { - // Vendored protoc — keeps contributors from needing a system install. - // SAFETY: build scripts run single-threaded before any other code. - unsafe { std::env::set_var("PROTOC", protoc_bin_vendored::protoc_bin_path()?) }; - // Client is built so integration tests (and any in-process Rust - // consumer) can talk to the service. The C# client uses its own - // generated stubs, not these. - tonic_build::configure() - .compile_protos(&["proto/files_search.proto"], &["proto"])?; - Ok(()) -} diff --git a/src/search-service/rust-toolchain.toml b/src/search-service/rust-toolchain.toml deleted file mode 100644 index 3417ee590341..000000000000 --- a/src/search-service/rust-toolchain.toml +++ /dev/null @@ -1,4 +0,0 @@ -[toolchain] -channel = "1.95.0" -components = ["rustfmt", "clippy"] -profile = "minimal" diff --git a/src/search-service/src/enumerate.rs b/src/search-service/src/enumerate.rs deleted file mode 100644 index b829ed890502..000000000000 --- a/src/search-service/src/enumerate.rs +++ /dev/null @@ -1,182 +0,0 @@ -//! Parallel filesystem enumeration. -//! -//! On Windows: `FindFirstFileExW` with `FindExInfoBasic` (skips the 8.3 -//! short-name lookup that Win32 normally does) and `FIND_FIRST_EX_LARGE_FETCH` -//! (larger internal buffer per syscall). Subdirectory recursion is fanned -//! out via `rayon::scope` so multiple threads walk in parallel. Callers -//! receive entries through an `mpsc::Sender` so producer and consumer (the -//! Tantivy writer) can run concurrently. -//! -//! On non-Windows: falls back to `walkdir` so the crate still builds and -//! tests on Linux/macOS for development convenience. The Files product is -//! Windows-only and the bench gates are measured on Windows. - -use std::path::{Path, PathBuf}; -use std::sync::mpsc::Sender; - -#[derive(Debug)] -pub struct Entry { - pub path: PathBuf, - pub size_bytes: u64, - pub modified_unix_ms: i64, -} - -pub fn enumerate(root: &Path, send: Sender) { - #[cfg(windows)] - { - rayon::scope(|s| { - win::recurse(s, root.to_path_buf(), send); - }); - } - #[cfg(not(windows))] - { - fallback::walk(root, send); - } -} - -#[cfg(windows)] -mod win { - use super::Entry; - use std::os::windows::ffi::{OsStrExt, OsStringExt}; - use std::path::PathBuf; - use std::sync::mpsc::Sender; - - use windows::core::PCWSTR; - use windows::Win32::Foundation::HANDLE; - use windows::Win32::Storage::FileSystem::{ - FindClose, FindExInfoBasic, FindExSearchNameMatch, FindFirstFileExW, FindNextFileW, - FILE_ATTRIBUTE_DIRECTORY, FILE_ATTRIBUTE_REPARSE_POINT, FIND_FIRST_EX_LARGE_FETCH, - WIN32_FIND_DATAW, - }; - - pub(super) fn recurse<'a>( - scope: &rayon::Scope<'a>, - dir: PathBuf, - send: Sender, - ) { - let pattern = wide_path(&dir.join("*")); - - let mut data: WIN32_FIND_DATAW = unsafe { std::mem::zeroed() }; - let handle = unsafe { - FindFirstFileExW( - PCWSTR(pattern.as_ptr()), - FindExInfoBasic, - &mut data as *mut _ as *mut _, - FindExSearchNameMatch, - None, - FIND_FIRST_EX_LARGE_FETCH, - ) - }; - - let handle: HANDLE = match handle { - Ok(h) if !h.is_invalid() => h, - _ => return, - }; - - loop { - handle_entry(scope, &dir, &data, &send); - let next = unsafe { FindNextFileW(handle, &mut data) }; - if next.is_err() { - break; - } - } - - let _ = unsafe { FindClose(handle) }; - } - - fn handle_entry<'a>( - scope: &rayon::Scope<'a>, - dir: &PathBuf, - data: &WIN32_FIND_DATAW, - send: &Sender, - ) { - let name = wide_to_osstring(&data.cFileName); - let bytes = name.as_encoded_bytes(); - if bytes == b"." || bytes == b".." { - return; - } - let path = dir.join(&name); - let attrs = data.dwFileAttributes; - let is_dir = (attrs & FILE_ATTRIBUTE_DIRECTORY.0) != 0; - let is_reparse = (attrs & FILE_ATTRIBUTE_REPARSE_POINT.0) != 0; - - // Skip reparse points (junctions, symlinks) to match the previous - // `WalkDir::follow_links(false)` behavior. Without this, a symlink - // loop can spin the enumerator forever. - if is_reparse { - return; - } - - if is_dir { - let send2 = send.clone(); - scope.spawn(move |s| recurse(s, path, send2)); - return; - } - - let size_bytes = ((data.nFileSizeHigh as u64) << 32) | (data.nFileSizeLow as u64); - let modified_unix_ms = filetime_to_unix_ms( - data.ftLastWriteTime.dwHighDateTime, - data.ftLastWriteTime.dwLowDateTime, - ); - let _ = send.send(Entry { - path, - size_bytes, - modified_unix_ms, - }); - } - - fn wide_path(p: &std::path::Path) -> Vec { - let mut v: Vec = p.as_os_str().encode_wide().collect(); - v.push(0); - v - } - - fn wide_to_osstring(buf: &[u16]) -> std::ffi::OsString { - let len = buf.iter().position(|&c| c == 0).unwrap_or(buf.len()); - std::ffi::OsString::from_wide(&buf[..len]) - } - - /// FILETIME counts 100-nanosecond intervals since 1601-01-01 UTC. - /// Unix epoch is 11644473600 seconds later; convert to milliseconds. - fn filetime_to_unix_ms(high: u32, low: u32) -> i64 { - const EPOCH_DIFFERENCE_MS: i64 = 11_644_473_600_000; - let ticks = ((high as u64) << 32) | (low as u64); - let ms = (ticks / 10_000) as i64; - ms - EPOCH_DIFFERENCE_MS - } -} - -#[cfg(not(windows))] -mod fallback { - use super::Entry; - use std::path::Path; - use std::sync::mpsc::Sender; - use std::time::UNIX_EPOCH; - - pub(super) fn walk(root: &Path, send: Sender) { - for entry in walkdir::WalkDir::new(root).follow_links(false) { - let Ok(entry) = entry else { continue }; - if !entry.file_type().is_file() { - continue; - } - let (size_bytes, modified_unix_ms) = match entry.metadata() { - Ok(m) => { - let size = m.len(); - let modified = m - .modified() - .ok() - .and_then(|t| t.duration_since(UNIX_EPOCH).ok()) - .map(|d| d.as_millis() as i64) - .unwrap_or(0); - (size, modified) - } - Err(_) => (0, 0), - }; - let _ = send.send(Entry { - path: entry.into_path(), - size_bytes, - modified_unix_ms, - }); - } - } -} diff --git a/src/search-service/src/index.rs b/src/search-service/src/index.rs deleted file mode 100644 index 06381b41b11c..000000000000 --- a/src/search-service/src/index.rs +++ /dev/null @@ -1,296 +0,0 @@ -use std::path::{Path, PathBuf}; -use std::sync::mpsc; -use std::time::{Instant, UNIX_EPOCH}; - -use anyhow::Result; -use parking_lot::Mutex; -use tantivy::collector::TopDocs; -use tantivy::query::{BooleanQuery, FuzzyTermQuery, Occur, Query}; -use tantivy::schema::{Field, Schema, INDEXED, STORED, STRING, TEXT}; -use tantivy::{doc, Index, IndexReader, IndexWriter, ReloadPolicy, TantivyDocument, Term}; -use tracing::info; - -use crate::enumerate; - -pub struct SearchIndex { - // Held so reader/writer stay valid; Tantivy's writer/reader keep - // their own clones internally but we keep the handle in case future - // code needs to open additional readers. - #[allow(dead_code)] - index: Index, - reader: IndexReader, - writer: Mutex, - fields: Fields, -} - -#[derive(Clone, Copy)] -struct Fields { - path: Field, - filename: Field, - size_bytes: Field, - modified_unix_ms: Field, -} - -#[derive(Debug, Clone)] -pub struct Hit { - pub path: String, - pub filename: String, - pub size_bytes: u64, - pub modified_unix_ms: i64, - pub score: f32, -} - -impl SearchIndex { - /// Open an existing index at `dir`, or build a fresh one by walking `root` - /// and indexing every file. v0 has no startup re-sync — if the directory - /// already contains an index, it's reused as-is regardless of how stale. - /// The watcher closes that gap *while the service is running*, but - /// changes that happen while the service is offline still need a manual - /// rebuild until step 5 of the roadmap (full restart-time reconcile). - pub fn open_or_build(dir: &Path, root: &Path) -> Result { - std::fs::create_dir_all(dir)?; - let schema = build_schema(); - let fields = fields_of(&schema); - - let exists = std::fs::read_dir(dir)?.next().is_some(); - let index = if exists { - info!(dir = %dir.display(), "opening existing index"); - Index::open_in_dir(dir)? - } else { - info!(dir = %dir.display(), "creating new index"); - Index::create_in_dir(dir, schema.clone())? - }; - - let writer = index.writer(50_000_000)?; - let reader = index - .reader_builder() - .reload_policy(ReloadPolicy::Manual) - .try_into()?; - - let this = Self { - index, - reader, - writer: Mutex::new(writer), - fields, - }; - - if !exists { - this.full_rebuild(root)?; - } - Ok(this) - } - - pub fn doc_count(&self) -> u64 { - self.reader.searcher().num_docs() - } - - /// Drops every document and re-walks `root` from scratch. Used by the - /// initial cold-start build and exposed for tests. - pub fn full_rebuild(&self, root: &Path) -> Result<()> { - let started = Instant::now(); - { - let mut w = self.writer.lock(); - w.delete_all_documents()?; - - // Producer/consumer: a rayon-fanned-out FindFirstFileEx walk - // feeds entries through a channel; this thread drains and - // writes to Tantivy. Keeps disk reads and index inserts - // overlapped on different cores. - let (tx, rx) = mpsc::channel(); - let root_owned = root.to_path_buf(); - let producer = std::thread::spawn(move || { - enumerate::enumerate(&root_owned, tx); - }); - - let mut count = 0u64; - for entry in rx { - let Some(name) = entry.path.file_name().and_then(|s| s.to_str()) else { - continue; - }; - w.add_document(doc!( - self.fields.path => entry.path.to_string_lossy().into_owned(), - self.fields.filename => name.to_string(), - self.fields.size_bytes => entry.size_bytes, - self.fields.modified_unix_ms => entry.modified_unix_ms, - ))?; - count += 1; - } - let _ = producer.join(); - - w.commit()?; - info!( - root = %root.display(), - count, - elapsed_ms = started.elapsed().as_millis() as u64, - "index built" - ); - } - self.reader.reload()?; - Ok(()) - } - - /// Stat `path` and replace its index entry. Removes any existing doc - /// with the same path first so this is idempotent (good for both - /// CREATE and MODIFY events from the watcher). - pub fn upsert(&self, path: &Path) -> Result<()> { - let Some(name) = path.file_name().and_then(|s| s.to_str()) else { - return Ok(()); - }; - let metadata = match std::fs::metadata(path) { - Ok(m) => m, - // Race: file was deleted between the watcher event and the - // stat. Treat as a delete so the index doesn't end up with a - // stale doc. - Err(_) => return self.delete(path), - }; - if !metadata.is_file() { - return Ok(()); - } - - let path_str = path.to_string_lossy().into_owned(); - let size_bytes = metadata.len(); - let modified_unix_ms = metadata - .modified() - .ok() - .and_then(|t| t.duration_since(UNIX_EPOCH).ok()) - .map(|d| d.as_millis() as i64) - .unwrap_or(0); - - let w = self.writer.lock(); - w.delete_term(Term::from_field_text(self.fields.path, &path_str)); - w.add_document(doc!( - self.fields.path => path_str, - self.fields.filename => name.to_string(), - self.fields.size_bytes => size_bytes, - self.fields.modified_unix_ms => modified_unix_ms, - ))?; - Ok(()) - } - - /// Drop any doc whose path equals `path`. Path is a STRING field - /// (single token), so `delete_term` is exact-match. - pub fn delete(&self, path: &Path) -> Result<()> { - let path_str = path.to_string_lossy().into_owned(); - let w = self.writer.lock(); - w.delete_term(Term::from_field_text(self.fields.path, &path_str)); - Ok(()) - } - - /// Commit pending writes and refresh the reader. Watcher debounces - /// to keep this cost amortized across bursts of events. - pub fn commit(&self) -> Result<()> { - let mut w = self.writer.lock(); - w.commit()?; - drop(w); - self.reader.reload()?; - Ok(()) - } - - /// Per-token prefix query against the filename field, optionally filtered - /// to results whose path starts with one of `scope_paths`. Tokens are - /// lowercased; the schema's TEXT field uses the default tokenizer - /// (lowercase + word-boundary split), so `"alpha"` matches `alpha.txt` - /// (token `alpha`) and `ALPHABET.md` (token `alphabet`, prefix). True - /// mid-string substring (`"phab"` → `ALPHABET`) is a known gap; revisit - /// with an n-gram field if the correctness suite demands it. - pub fn search( - &self, - query: &str, - max: usize, - scope_paths: &[PathBuf], - ) -> Result> { - let searcher = self.reader.searcher(); - let mut clauses: Vec<(Occur, Box)> = Vec::new(); - - for token in query.split_whitespace() { - let term = Term::from_field_text(self.fields.filename, &token.to_lowercase()); - clauses.push(( - Occur::Must, - Box::new(FuzzyTermQuery::new_prefix(term, 0, true)), - )); - } - - if !scope_paths.is_empty() { - let scope_clauses: Vec<(Occur, Box)> = scope_paths - .iter() - .map(|s| { - let term = - Term::from_field_text(self.fields.path, &s.to_string_lossy()); - let q: Box = - Box::new(FuzzyTermQuery::new_prefix(term, 0, true)); - (Occur::Should, q) - }) - .collect(); - clauses.push((Occur::Must, Box::new(BooleanQuery::new(scope_clauses)))); - } - - // Empty query with no scope = match nothing. The legacy provider - // returns nothing for a blank query too, so this matches semantics. - if clauses.is_empty() { - return Ok(Vec::new()); - } - - let bool_query = BooleanQuery::new(clauses); - let top = searcher.search(&bool_query, &TopDocs::with_limit(max.max(1)))?; - - let mut hits = Vec::with_capacity(top.len()); - for (score, addr) in top { - let doc: TantivyDocument = searcher.doc(addr)?; - hits.push(Hit { - path: get_text(&doc, self.fields.path).unwrap_or_default(), - filename: get_text(&doc, self.fields.filename).unwrap_or_default(), - size_bytes: get_u64(&doc, self.fields.size_bytes).unwrap_or(0), - modified_unix_ms: get_i64(&doc, self.fields.modified_unix_ms).unwrap_or(0), - score, - }); - } - Ok(hits) - } -} - -fn build_schema() -> Schema { - let mut sb = Schema::builder(); - sb.add_text_field("path", STRING | STORED); - sb.add_text_field("filename", TEXT | STORED); - sb.add_u64_field("size_bytes", STORED | INDEXED); - sb.add_i64_field("modified_unix_ms", STORED | INDEXED); - sb.build() -} - -fn fields_of(schema: &Schema) -> Fields { - Fields { - path: schema.get_field("path").unwrap(), - filename: schema.get_field("filename").unwrap(), - size_bytes: schema.get_field("size_bytes").unwrap(), - modified_unix_ms: schema.get_field("modified_unix_ms").unwrap(), - } -} - -fn get_text(doc: &TantivyDocument, field: Field) -> Option { - use tantivy::schema::Value; - doc.get_first(field) - .and_then(|v| v.as_str()) - .map(|s| s.to_string()) -} - -fn get_u64(doc: &TantivyDocument, field: Field) -> Option { - use tantivy::schema::Value; - doc.get_first(field).and_then(|v| v.as_u64()) -} - -fn get_i64(doc: &TantivyDocument, field: Field) -> Option { - use tantivy::schema::Value; - doc.get_first(field).and_then(|v| v.as_i64()) -} - -/// Returns `%LOCALAPPDATA%\Files\search-index\` with `FILES_SEARCH_INDEX_DIR` -/// override (used by tests and dev runs). -pub fn default_index_dir() -> PathBuf { - if let Ok(p) = std::env::var("FILES_SEARCH_INDEX_DIR") { - return PathBuf::from(p); - } - let base = std::env::var("LOCALAPPDATA") - .map(PathBuf::from) - .unwrap_or_else(|_| std::env::temp_dir()); - base.join("Files").join("search-index") -} diff --git a/src/search-service/src/lib.rs b/src/search-service/src/lib.rs deleted file mode 100644 index c3a6fcf89d92..000000000000 --- a/src/search-service/src/lib.rs +++ /dev/null @@ -1,92 +0,0 @@ -use std::path::PathBuf; -use std::pin::Pin; -use std::sync::Arc; - -use tokio_stream::Stream; -use tonic::{Request, Response, Status}; -use tracing::info; - -pub mod enumerate; -pub mod index; -pub mod throttle; -pub mod watcher; -pub mod proto { - tonic::include_proto!("files.search.v1"); -} - -pub use index::{default_index_dir, SearchIndex}; -pub use throttle::{apply_background_priority, Throttle}; -pub use watcher::Watcher; - -use proto::files_search_server::FilesSearch; -use proto::{HealthRequest, HealthResponse, SearchHit, SearchRequest}; - -pub struct Service { - index: Arc, -} - -impl Service { - pub fn new(index: Arc) -> Self { - Self { index } - } -} - -type SearchStream = Pin> + Send + 'static>>; - -#[tonic::async_trait] -impl FilesSearch for Service { - async fn health( - &self, - _: Request, - ) -> Result, Status> { - Ok(Response::new(HealthResponse { - version: env!("CARGO_PKG_VERSION").to_string(), - indexed_file_count: self.index.doc_count(), - indexing: false, - })) - } - - type SearchStream = SearchStream; - - async fn search( - &self, - req: Request, - ) -> Result, Status> { - let req = req.into_inner(); - // 0 = "no caller cap." We still bound the collector to keep - // Tantivy's TopDocs from allocating a heap sized by usize::MAX - // (it multiplies internally and overflows). 10k is generous for - // a UI-driven search; the C# client typically asks for far less. - let max = match req.max_results { - 0 => 10_000, - n => n as usize, - }; - let scope: Vec = req.scope_paths.iter().map(PathBuf::from).collect(); - let index = Arc::clone(&self.index); - let query = req.query.clone(); - - info!(query = %req.query, max, scope = scope.len(), "search"); - - // Run the synchronous Tantivy search on a blocking task so the - // async runtime stays unblocked. For small corpora this is - // overkill, but it keeps the wiring honest as corpora grow. - let hits = tokio::task::spawn_blocking(move || index.search(&query, max, &scope)) - .await - .map_err(|e| Status::internal(format!("join error: {e}")))? - .map_err(|e| Status::internal(format!("search error: {e}")))?; - - let stream = async_stream::try_stream! { - for hit in hits { - yield SearchHit { - path: hit.path, - filename: hit.filename, - size_bytes: hit.size_bytes, - modified_unix_ms: hit.modified_unix_ms, - score: hit.score, - }; - } - }; - - Ok(Response::new(Box::pin(stream))) - } -} diff --git a/src/search-service/src/main.rs b/src/search-service/src/main.rs deleted file mode 100644 index abefb56f9e3c..000000000000 --- a/src/search-service/src/main.rs +++ /dev/null @@ -1,89 +0,0 @@ -use std::path::PathBuf; -use std::sync::Arc; - -use tonic::transport::Server; -use tracing::info; - -use files_search_service::proto::files_search_server::FilesSearchServer; -use files_search_service::{ - apply_background_priority, default_index_dir, SearchIndex, Service, Throttle, Watcher, -}; - -fn resolve_root() -> PathBuf { - let raw = std::env::var("FILES_SEARCH_ROOT") - .or_else(|_| std::env::var("USERPROFILE")) - .map(PathBuf::from) - .unwrap_or_else(|_| PathBuf::from(".")); - normalize_root(raw) -} - -/// Normalize the indexing root so stored paths are byte-identical to -/// what a Windows caller (e.g. `Path.GetFullPath` from C#) will pass in -/// `scope_paths`. Without this, mixed forward/backward slashes silently -/// break prefix scoping. -/// -/// Strategy: -/// 1. `fs::canonicalize` to resolve `..`, symlinks, and case. -/// 2. Strip the `\\?\` UNC prefix Windows adds, since C# callers -/// don't include it. -/// -/// Falls back to the input on canonicalize failure (path doesn't exist -/// yet, permissions, etc.). -fn normalize_root(p: PathBuf) -> PathBuf { - let canonical = match std::fs::canonicalize(&p) { - Ok(c) => c, - Err(_) => return p, - }; - #[cfg(windows)] - { - let s = canonical.to_string_lossy(); - if let Some(stripped) = s.strip_prefix(r"\\?\") { - return PathBuf::from(stripped); - } - } - canonical -} - -#[tokio::main] -async fn main() -> Result<(), Box> { - tracing_subscriber::fmt() - .with_env_filter( - tracing_subscriber::EnvFilter::try_from_default_env() - .unwrap_or_else(|_| "info".into()), - ) - .init(); - - apply_background_priority(); - - let root = resolve_root(); - let index_dir = default_index_dir(); - info!(root = %root.display(), index_dir = %index_dir.display(), "starting"); - - let root_for_index = root.clone(); - let index = Arc::new( - tokio::task::spawn_blocking(move || { - SearchIndex::open_or_build(&index_dir, &root_for_index) - }) - .await??, - ); - - let throttle = Arc::new(Throttle::start()); - let watcher = Watcher::start(root.clone(), Arc::clone(&index), Some(Arc::clone(&throttle)))?; - info!(root = %root.display(), "watcher started"); - - // TCP for v0; swap to named pipe (\\.\pipe\files-search) once the - // service does enough to be worth integration-testing from C#. - let addr = "127.0.0.1:50080".parse()?; - info!(%addr, "files-search-service listening"); - - Server::builder() - .add_service(FilesSearchServer::new(Service::new(index))) - .serve_with_shutdown(addr, async { - let _ = tokio::signal::ctrl_c().await; - info!("shutting down"); - }) - .await?; - - watcher.stop(); - Ok(()) -} diff --git a/src/search-service/src/throttle.rs b/src/search-service/src/throttle.rs deleted file mode 100644 index e3900bac7975..000000000000 --- a/src/search-service/src/throttle.rs +++ /dev/null @@ -1,199 +0,0 @@ -//! Process throttling. -//! -//! Two complementary mechanisms keep the service from being a bad citizen: -//! -//! 1. `apply_background_priority()` — once at startup, calls -//! `SetPriorityClass(PROCESS_MODE_BACKGROUND_BEGIN)`. The Windows -//! scheduler then puts CPU and I/O behind any normal-priority process. -//! Same trick OneDrive uses for sync. -//! -//! 2. `Throttle` — a background poller (every 2s) that watches three -//! signals and exposes `should_pause()`: -//! - **On battery**: `GetSystemPowerStatus` `ACLineStatus == 0`. -//! - **Fullscreen / presentation**: `SHQueryUserNotificationState` -//! returns `QUNS_RUNNING_D3D_FULL_SCREEN` or `QUNS_PRESENTATION_MODE`. -//! - **High CPU load**: derived from `GetSystemTimes` over the -//! polling interval; threshold 70%. -//! -//! The watcher consults `should_pause()` before committing. Apply work -//! (in-memory adds/deletes) keeps happening so we don't drop events; -//! only commit + reader refresh is skipped, deferring fsync and -//! query-visibility until the system is idle again. - -use std::sync::atomic::{AtomicBool, Ordering}; -use std::sync::Arc; -use std::thread; -use std::time::Duration; - -use tracing::{info, warn}; - -const POLL_INTERVAL: Duration = Duration::from_secs(2); -const HIGH_LOAD_THRESHOLD: f64 = 0.70; - -#[cfg(windows)] -pub fn apply_background_priority() { - use windows::Win32::System::Threading::{ - GetCurrentProcess, SetPriorityClass, PROCESS_MODE_BACKGROUND_BEGIN, - }; - unsafe { - match SetPriorityClass(GetCurrentProcess(), PROCESS_MODE_BACKGROUND_BEGIN) { - Ok(_) => info!("background priority enabled"), - Err(err) => { - warn!(%err, "SetPriorityClass(PROCESS_MODE_BACKGROUND_BEGIN) failed") - } - } - } -} - -#[cfg(not(windows))] -pub fn apply_background_priority() {} - -pub struct Throttle { - paused: Arc, - stop: Arc, - poller: Option>, -} - -impl Throttle { - pub fn start() -> Self { - let paused = Arc::new(AtomicBool::new(false)); - let stop = Arc::new(AtomicBool::new(false)); - let p = Arc::clone(&paused); - let s = Arc::clone(&stop); - let poller = thread::spawn(move || poll_loop(p, s)); - Self { - paused, - stop, - poller: Some(poller), - } - } - - pub fn should_pause(&self) -> bool { - self.paused.load(Ordering::Acquire) - } -} - -impl Drop for Throttle { - fn drop(&mut self) { - self.stop.store(true, Ordering::Release); - if let Some(h) = self.poller.take() { - let _ = h.join(); - } - } -} - -fn poll_loop(paused: Arc, stop: Arc) { - let mut last_cpu = sample_cpu(); - while !stop.load(Ordering::Acquire) { - thread::sleep(POLL_INTERVAL); - let now_cpu = sample_cpu(); - let load = compute_load(&last_cpu, &now_cpu); - last_cpu = now_cpu; - - let on_battery = is_on_battery(); - let fullscreen = is_fullscreen(); - let high_load = load > HIGH_LOAD_THRESHOLD; - let new = on_battery || fullscreen || high_load; - let prev = paused.swap(new, Ordering::AcqRel); - if prev != new { - info!( - paused = new, - on_battery, - fullscreen, - high_load, - load, - "throttle state changed" - ); - } - } -} - -#[cfg(windows)] -fn is_on_battery() -> bool { - use windows::Win32::System::Power::{GetSystemPowerStatus, SYSTEM_POWER_STATUS}; - let mut status = SYSTEM_POWER_STATUS::default(); - unsafe { - if GetSystemPowerStatus(&mut status).is_err() { - return false; - } - } - // 0 = offline (battery), 1 = online, 255 = unknown. Conservative: - // only flag battery on a definite "offline". - status.ACLineStatus == 0 -} - -#[cfg(not(windows))] -fn is_on_battery() -> bool { - false -} - -#[cfg(windows)] -fn is_fullscreen() -> bool { - use windows::Win32::UI::Shell::{ - SHQueryUserNotificationState, QUNS_BUSY, QUNS_PRESENTATION_MODE, - QUNS_RUNNING_D3D_FULL_SCREEN, - }; - let _ = QUNS_BUSY; // silence unused; matches! below references the others. - let state = match unsafe { SHQueryUserNotificationState() } { - Ok(s) => s, - Err(_) => return false, - }; - matches!( - state, - QUNS_RUNNING_D3D_FULL_SCREEN | QUNS_PRESENTATION_MODE - ) -} - -#[cfg(not(windows))] -fn is_fullscreen() -> bool { - false -} - -struct CpuSample { - #[cfg_attr(not(windows), allow(dead_code))] - idle: u64, - #[cfg_attr(not(windows), allow(dead_code))] - kernel: u64, - #[cfg_attr(not(windows), allow(dead_code))] - user: u64, -} - -#[cfg(windows)] -fn sample_cpu() -> CpuSample { - use windows::Win32::Foundation::FILETIME; - use windows::Win32::System::Threading::GetSystemTimes; - let mut idle = FILETIME::default(); - let mut kernel = FILETIME::default(); - let mut user = FILETIME::default(); - unsafe { - let _ = GetSystemTimes(Some(&mut idle), Some(&mut kernel), Some(&mut user)); - } - CpuSample { - idle: ft_to_u64(&idle), - kernel: ft_to_u64(&kernel), - user: ft_to_u64(&user), - } -} - -#[cfg(not(windows))] -fn sample_cpu() -> CpuSample { - CpuSample { idle: 0, kernel: 0, user: 0 } -} - -#[cfg(windows)] -fn ft_to_u64(ft: &windows::Win32::Foundation::FILETIME) -> u64 { - ((ft.dwHighDateTime as u64) << 32) | (ft.dwLowDateTime as u64) -} - -fn compute_load(prev: &CpuSample, now: &CpuSample) -> f64 { - // GetSystemTimes documents that lpKernelTime *includes* idle time. - // total = kernel + user, busy = total - idle. - let idle_d = now.idle.saturating_sub(prev.idle) as f64; - let kernel_d = now.kernel.saturating_sub(prev.kernel) as f64; - let user_d = now.user.saturating_sub(prev.user) as f64; - let total = kernel_d + user_d; - if total <= 0.0 { - return 0.0; - } - ((total - idle_d) / total).clamp(0.0, 1.0) -} diff --git a/src/search-service/src/watcher.rs b/src/search-service/src/watcher.rs deleted file mode 100644 index 2fa4ff003c0d..000000000000 --- a/src/search-service/src/watcher.rs +++ /dev/null @@ -1,179 +0,0 @@ -//! Filesystem watcher. -//! -//! Wraps the `notify` crate (which uses `ReadDirectoryChangesW` with -//! overlapped I/O on Windows) and applies events to a `SearchIndex`. -//! Commits are debounced — bursts of file events (extracting an archive, -//! `git checkout`) collapse into a single Tantivy commit so we don't -//! pay segment + fsync overhead per file. - -use std::path::PathBuf; -use std::sync::atomic::{AtomicBool, Ordering}; -use std::sync::Arc; -use std::thread; -use std::time::Duration; - -use anyhow::Result; -use notify::event::{EventKind, ModifyKind, RenameMode}; -use notify::{RecommendedWatcher, RecursiveMode, Watcher as _}; -use parking_lot::Mutex; -use tracing::{debug, warn}; - -use crate::index::SearchIndex; -use crate::throttle::Throttle; - -/// Time we wait for a quiet window before committing a batch of edits. -/// 250ms is short enough that single-file changes feel instant in the UI -/// and long enough to coalesce a `git checkout` of hundreds of files. -const COMMIT_DEBOUNCE: Duration = Duration::from_millis(250); - -pub struct Watcher { - _watcher: RecommendedWatcher, - stop: Arc, - committer: Option>, -} - -impl Watcher { - pub fn start( - root: PathBuf, - index: Arc, - throttle: Option>, - ) -> Result { - let dirty = Arc::new(AtomicBool::new(false)); - let stop = Arc::new(AtomicBool::new(false)); - let last_event = Arc::new(Mutex::new(std::time::Instant::now())); - - let dirty_for_handler = Arc::clone(&dirty); - let last_event_for_handler = Arc::clone(&last_event); - let index_for_handler = Arc::clone(&index); - - let mut watcher = notify::recommended_watcher(move |res: notify::Result| { - match res { - Ok(event) => apply_event(&index_for_handler, &event), - Err(err) => warn!(%err, "watcher error"), - } - dirty_for_handler.store(true, Ordering::Release); - *last_event_for_handler.lock() = std::time::Instant::now(); - })?; - watcher.watch(&root, RecursiveMode::Recursive)?; - - let committer = { - let stop = Arc::clone(&stop); - let dirty = Arc::clone(&dirty); - let last_event = Arc::clone(&last_event); - let index = Arc::clone(&index); - thread::spawn(move || committer_loop(index, stop, dirty, last_event, throttle)) - }; - - Ok(Self { - _watcher: watcher, - stop, - committer: Some(committer), - }) - } - - /// Stops the committer thread and forces a final commit so any - /// pending events are durable. The notify watcher itself is dropped - /// here too, which cancels the underlying ReadDirectoryChangesW. - pub fn stop(mut self) { - self.stop.store(true, Ordering::Release); - if let Some(handle) = self.committer.take() { - let _ = handle.join(); - } - } -} - -impl Drop for Watcher { - fn drop(&mut self) { - self.stop.store(true, Ordering::Release); - if let Some(handle) = self.committer.take() { - let _ = handle.join(); - } - } -} - -fn apply_event(index: &SearchIndex, event: ¬ify::Event) { - match event.kind { - EventKind::Create(_) | EventKind::Modify(ModifyKind::Data(_)) - | EventKind::Modify(ModifyKind::Metadata(_)) - | EventKind::Modify(ModifyKind::Any) => { - for path in &event.paths { - if let Err(err) = index.upsert(path) { - warn!(path = %path.display(), %err, "upsert failed"); - } - } - } - EventKind::Remove(_) => { - for path in &event.paths { - if let Err(err) = index.delete(path) { - warn!(path = %path.display(), %err, "delete failed"); - } - } - } - EventKind::Modify(ModifyKind::Name(rename)) => { - // notify normalizes renames into either a single Both event - // (paths = [old, new]) or two events (From / To). Handle - // both shapes by deleting any path that no longer exists - // and upserting any path that does. - match rename { - RenameMode::Both if event.paths.len() == 2 => { - let _ = index.delete(&event.paths[0]); - if let Err(err) = index.upsert(&event.paths[1]) { - warn!(path = %event.paths[1].display(), %err, "rename upsert failed"); - } - } - _ => { - for path in &event.paths { - if path.exists() { - let _ = index.upsert(path); - } else { - let _ = index.delete(path); - } - } - } - } - } - // Access events and other modify variants don't change index - // contents — ignore so we don't churn commits. - _ => {} - } - debug!(?event, "applied"); -} - -fn committer_loop( - index: Arc, - stop: Arc, - dirty: Arc, - last_event: Arc>, - throttle: Option>, -) { - while !stop.load(Ordering::Acquire) { - thread::sleep(Duration::from_millis(50)); - if !dirty.load(Ordering::Acquire) { - continue; - } - // Defer commit (and the reader reload that makes new docs - // visible) while the system is busy. Apply work already happened - // in the notify callback, so events aren't lost — they just - // accumulate in the writer's in-memory buffer until we catch up. - if throttle.as_ref().is_some_and(|t| t.should_pause()) { - continue; - } - let elapsed = last_event.lock().elapsed(); - if elapsed < COMMIT_DEBOUNCE { - continue; - } - dirty.store(false, Ordering::Release); - if let Err(err) = index.commit() { - warn!(%err, "watcher commit failed"); - dirty.store(true, Ordering::Release); - } - } - - // Final commit on shutdown — never lose a pending event, even if we - // were paused when shutdown was requested. - if dirty.load(Ordering::Acquire) { - if let Err(err) = index.commit() { - warn!(%err, "final watcher commit failed"); - } - } -} diff --git a/src/search-service/tests/enumerate.rs b/src/search-service/tests/enumerate.rs deleted file mode 100644 index 070f058f9a37..000000000000 --- a/src/search-service/tests/enumerate.rs +++ /dev/null @@ -1,64 +0,0 @@ -use std::path::PathBuf; -use std::sync::mpsc; - -use files_search_service::enumerate; - -#[test] -fn enumerator_finds_all_files_recursively() { - let dir = tempdir(); - let sub_a = dir.join("a"); - let sub_b = dir.join("b").join("nested"); - std::fs::create_dir_all(&sub_a).unwrap(); - std::fs::create_dir_all(&sub_b).unwrap(); - std::fs::write(dir.join("top.txt"), b"x").unwrap(); - std::fs::write(sub_a.join("a1.txt"), b"x").unwrap(); - std::fs::write(sub_a.join("a2.txt"), b"x").unwrap(); - std::fs::write(sub_b.join("deep.txt"), b"x").unwrap(); - - let (tx, rx) = mpsc::channel(); - enumerate::enumerate(&dir, tx); - let mut names: Vec = rx - .into_iter() - .map(|e| e.path.file_name().unwrap().to_string_lossy().into_owned()) - .collect(); - names.sort(); - assert_eq!( - names, - vec!["a1.txt", "a2.txt", "deep.txt", "top.txt"] - ); -} - -#[test] -fn enumerator_reports_size_and_modified() { - let dir = tempdir(); - std::fs::write(dir.join("hello.txt"), b"hello world").unwrap(); - - let (tx, rx) = mpsc::channel(); - enumerate::enumerate(&dir, tx); - let entries: Vec<_> = rx.into_iter().collect(); - assert_eq!(entries.len(), 1); - assert_eq!(entries[0].size_bytes, b"hello world".len() as u64); - // Sanity check: modified time is in the last 60 seconds and after epoch. - let now_ms = std::time::SystemTime::now() - .duration_since(std::time::UNIX_EPOCH) - .unwrap() - .as_millis() as i64; - let delta = (now_ms - entries[0].modified_unix_ms).abs(); - assert!( - delta < 60_000, - "modified_unix_ms drift {delta}ms is implausibly large" - ); -} - -fn tempdir() -> PathBuf { - use std::sync::atomic::{AtomicU64, Ordering}; - static COUNTER: AtomicU64 = AtomicU64::new(0); - let n = COUNTER.fetch_add(1, Ordering::Relaxed); - let nanos = std::time::SystemTime::now() - .duration_since(std::time::UNIX_EPOCH) - .unwrap() - .as_nanos(); - let dir = std::env::temp_dir().join(format!("files-search-enum-{nanos}-{n}")); - std::fs::create_dir_all(&dir).unwrap(); - dir -} diff --git a/src/search-service/tests/search_smoke.rs b/src/search-service/tests/search_smoke.rs deleted file mode 100644 index 4815dfbc038c..000000000000 --- a/src/search-service/tests/search_smoke.rs +++ /dev/null @@ -1,224 +0,0 @@ -use std::path::PathBuf; -use std::sync::Arc; -use std::time::Duration; - -use files_search_service::proto::files_search_client::FilesSearchClient; -use files_search_service::proto::files_search_server::FilesSearchServer; -use files_search_service::proto::{HealthRequest, SearchRequest}; -use files_search_service::{SearchIndex, Service}; -use tokio::net::TcpListener; -use tokio::sync::oneshot; -use tokio_stream::wrappers::TcpListenerStream; -use tokio_stream::StreamExt; -use tonic::transport::{Endpoint, Server}; - -struct ServiceHandle { - url: String, - shutdown: Option>, - task: Option>, -} - -impl ServiceHandle { - async fn stop(mut self) { - if let Some(tx) = self.shutdown.take() { - let _ = tx.send(()); - } - if let Some(task) = self.task.take() { - let _ = task.await; - } - } -} - -async fn spawn_service(root: PathBuf, index_dir: PathBuf) -> ServiceHandle { - let listener = TcpListener::bind("127.0.0.1:0").await.unwrap(); - let addr = listener.local_addr().unwrap(); - let index = Arc::new( - tokio::task::spawn_blocking(move || SearchIndex::open_or_build(&index_dir, &root)) - .await - .unwrap() - .unwrap(), - ); - let (tx, rx) = oneshot::channel(); - let task = tokio::spawn(async move { - Server::builder() - .add_service(FilesSearchServer::new(Service::new(index))) - .serve_with_incoming_shutdown(TcpListenerStream::new(listener), async { - let _ = rx.await; - }) - .await - .unwrap(); - }); - tokio::time::sleep(Duration::from_millis(50)).await; - ServiceHandle { - url: format!("http://{addr}"), - shutdown: Some(tx), - task: Some(task), - } -} - -async fn connect(url: String) -> FilesSearchClient { - let channel = Endpoint::from_shared(url).unwrap().connect().await.unwrap(); - FilesSearchClient::new(channel) -} - -#[tokio::test] -async fn health_reports_indexed_count() { - let (root, index_dir) = tempdirs(); - std::fs::write(root.join("alpha.txt"), b"a").unwrap(); - std::fs::write(root.join("beta.txt"), b"b").unwrap(); - - let svc = spawn_service(root, index_dir).await; - let mut client = connect(svc.url.clone()).await; - let resp = client.health(HealthRequest {}).await.unwrap().into_inner(); - assert_eq!(resp.indexed_file_count, 2); - assert!(!resp.indexing); - assert!(!resp.version.is_empty()); - svc.stop().await; -} - -#[tokio::test] -async fn search_returns_substring_matches() { - let (root, index_dir) = tempdirs(); - std::fs::write(root.join("alpha.txt"), b"a").unwrap(); - std::fs::write(root.join("beta.txt"), b"b").unwrap(); - std::fs::write(root.join("ALPHABET.md"), b"c").unwrap(); - - let svc = spawn_service(root, index_dir).await; - let mut client = connect(svc.url.clone()).await; - let mut stream = client - .search(SearchRequest { - query: "alpha".into(), - max_results: 0, - scope_paths: vec![], - }) - .await - .unwrap() - .into_inner(); - - let mut names = Vec::new(); - while let Some(hit) = stream.next().await { - names.push(hit.unwrap().filename); - } - names.sort(); - assert_eq!(names, vec!["ALPHABET.md", "alpha.txt"]); - svc.stop().await; -} - -#[tokio::test] -async fn search_honors_max_results() { - let (root, index_dir) = tempdirs(); - for i in 0..10 { - std::fs::write(root.join(format!("hit_{i}.txt")), b"x").unwrap(); - } - - let svc = spawn_service(root, index_dir).await; - let mut client = connect(svc.url.clone()).await; - let mut stream = client - .search(SearchRequest { - query: "hit".into(), - max_results: 3, - scope_paths: vec![], - }) - .await - .unwrap() - .into_inner(); - - let mut count = 0; - while let Some(hit) = stream.next().await { - hit.unwrap(); - count += 1; - } - assert_eq!(count, 3); - svc.stop().await; -} - -#[tokio::test] -async fn search_scope_filters_paths() { - let (root, index_dir) = tempdirs(); - let inside = root.join("inside"); - let outside = root.join("outside"); - std::fs::create_dir(&inside).unwrap(); - std::fs::create_dir(&outside).unwrap(); - std::fs::write(inside.join("match.txt"), b"x").unwrap(); - std::fs::write(outside.join("match.txt"), b"x").unwrap(); - - let svc = spawn_service(root, index_dir).await; - let mut client = connect(svc.url.clone()).await; - let mut stream = client - .search(SearchRequest { - query: "match".into(), - max_results: 0, - scope_paths: vec![inside.to_string_lossy().into_owned()], - }) - .await - .unwrap() - .into_inner(); - - let mut paths = Vec::new(); - while let Some(hit) = stream.next().await { - paths.push(hit.unwrap().path); - } - assert_eq!(paths.len(), 1); - assert!(paths[0].contains("inside")); - svc.stop().await; -} - -#[tokio::test] -async fn index_persists_across_restarts() { - let (root, index_dir) = tempdirs(); - std::fs::write(root.join("persistent.txt"), b"x").unwrap(); - - // First start: builds index from root. - { - let svc = spawn_service(root.clone(), index_dir.clone()).await; - let mut client = connect(svc.url.clone()).await; - let resp = client.health(HealthRequest {}).await.unwrap().into_inner(); - assert_eq!(resp.indexed_file_count, 1); - svc.stop().await; // Releases the Tantivy writer lock. - } - - // Second start: deletes the source root, opens existing index. - // Expectation: docs survive because the index was committed to disk. - std::fs::remove_dir_all(&root).unwrap(); - let empty_root = root.clone(); - std::fs::create_dir_all(&empty_root).unwrap(); - - let svc = spawn_service(empty_root, index_dir).await; - let mut client = connect(svc.url.clone()).await; - let resp = client.health(HealthRequest {}).await.unwrap().into_inner(); - assert_eq!(resp.indexed_file_count, 1); - - let mut stream = client - .search(SearchRequest { - query: "persistent".into(), - max_results: 0, - scope_paths: vec![], - }) - .await - .unwrap() - .into_inner(); - let mut found = false; - while let Some(hit) = stream.next().await { - if hit.unwrap().filename == "persistent.txt" { - found = true; - } - } - assert!(found, "persisted doc should survive a restart"); - svc.stop().await; -} - -fn tempdirs() -> (PathBuf, PathBuf) { - use std::sync::atomic::{AtomicU64, Ordering}; - static COUNTER: AtomicU64 = AtomicU64::new(0); - let n = COUNTER.fetch_add(1, Ordering::Relaxed); - let nanos = std::time::SystemTime::now() - .duration_since(std::time::UNIX_EPOCH) - .unwrap() - .as_nanos(); - let base = std::env::temp_dir().join(format!("files-search-test-{nanos}-{n}")); - let root = base.join("root"); - let index_dir = base.join("index"); - std::fs::create_dir_all(&root).unwrap(); - std::fs::create_dir_all(&index_dir).unwrap(); - (root, index_dir) -} diff --git a/src/search-service/tests/throttle.rs b/src/search-service/tests/throttle.rs deleted file mode 100644 index 48d3736f28cd..000000000000 --- a/src/search-service/tests/throttle.rs +++ /dev/null @@ -1,19 +0,0 @@ -use std::time::Duration; - -use files_search_service::Throttle; - -/// `apply_background_priority` is best-tested by observing the running -/// process's priority class with an external tool (Process Explorer); -/// here we just confirm that startup + drop don't panic and that -/// `should_pause()` produces a well-defined boolean. The behavior tests -/// (verifying we actually pause on battery / fullscreen / load) live in -/// `tests/Files.Search.Resource/` per CLAUDE.md. -#[test] -fn throttle_starts_and_stops_cleanly() { - let t = Throttle::start(); - let _ = t.should_pause(); - // Give the poller at least one tick to populate state. - std::thread::sleep(Duration::from_millis(100)); - let _ = t.should_pause(); - drop(t); -} diff --git a/src/search-service/tests/watcher.rs b/src/search-service/tests/watcher.rs deleted file mode 100644 index 8ac75e62e3b5..000000000000 --- a/src/search-service/tests/watcher.rs +++ /dev/null @@ -1,110 +0,0 @@ -use std::path::PathBuf; -use std::sync::Arc; -use std::time::{Duration, Instant}; - -use files_search_service::{SearchIndex, Watcher}; - -/// Polls `cond` every 25ms for up to `timeout`, returning true if it -/// ever returned true. Filesystem watchers are inherently async so we -/// can't assert synchronously after writing a file. -fn wait_until bool>(timeout: Duration, mut cond: F) -> bool { - let start = Instant::now(); - while start.elapsed() < timeout { - if cond() { - return true; - } - std::thread::sleep(Duration::from_millis(25)); - } - false -} - -fn search_count(index: &SearchIndex, query: &str) -> usize { - index - .search(query, 100, &[]) - .map(|hits| hits.len()) - .unwrap_or(0) -} - -#[test] -fn watcher_indexes_new_files() { - let (root, index_dir) = tempdirs(); - let index = Arc::new(SearchIndex::open_or_build(&index_dir, &root).unwrap()); - let watcher = Watcher::start(root.clone(), Arc::clone(&index), None).unwrap(); - - std::fs::write(root.join("brandnew.txt"), b"x").unwrap(); - let saw = wait_until(Duration::from_secs(5), || { - search_count(&index, "brandnew") > 0 - }); - watcher.stop(); - assert!(saw, "watcher should index new files within 5s"); -} - -#[test] -fn watcher_removes_deleted_files() { - let (root, index_dir) = tempdirs(); - let target = root.join("doomed.txt"); - std::fs::write(&target, b"x").unwrap(); - - let index = Arc::new(SearchIndex::open_or_build(&index_dir, &root).unwrap()); - assert_eq!(search_count(&index, "doomed"), 1); - - let watcher = Watcher::start(root.clone(), Arc::clone(&index), None).unwrap(); - std::fs::remove_file(&target).unwrap(); - - let gone = wait_until(Duration::from_secs(5), || { - search_count(&index, "doomed") == 0 - }); - watcher.stop(); - assert!(gone, "watcher should remove deleted files within 5s"); -} - -#[test] -fn watcher_picks_up_files_in_subdirs() { - let (root, index_dir) = tempdirs(); - let sub = root.join("nested").join("deep"); - std::fs::create_dir_all(&sub).unwrap(); - - let index = Arc::new(SearchIndex::open_or_build(&index_dir, &root).unwrap()); - let watcher = Watcher::start(root.clone(), Arc::clone(&index), None).unwrap(); - - std::fs::write(sub.join("buried.txt"), b"x").unwrap(); - let saw = wait_until(Duration::from_secs(5), || { - search_count(&index, "buried") > 0 - }); - watcher.stop(); - assert!(saw, "watcher should follow subdirectories"); -} - -#[test] -fn watcher_handles_burst_with_single_commit_window() { - let (root, index_dir) = tempdirs(); - let index = Arc::new(SearchIndex::open_or_build(&index_dir, &root).unwrap()); - let watcher = Watcher::start(root.clone(), Arc::clone(&index), None).unwrap(); - - // Simulate a `git checkout`-style burst: 50 files at once. - for i in 0..50 { - std::fs::write(root.join(format!("burst_{i:02}.txt")), b"x").unwrap(); - } - - let saw_all = wait_until(Duration::from_secs(10), || { - search_count(&index, "burst") == 50 - }); - watcher.stop(); - assert!(saw_all, "all 50 burst files should be indexed"); -} - -fn tempdirs() -> (PathBuf, PathBuf) { - use std::sync::atomic::{AtomicU64, Ordering}; - static COUNTER: AtomicU64 = AtomicU64::new(0); - let n = COUNTER.fetch_add(1, Ordering::Relaxed); - let nanos = std::time::SystemTime::now() - .duration_since(std::time::UNIX_EPOCH) - .unwrap() - .as_nanos(); - let base = std::env::temp_dir().join(format!("files-search-watch-{nanos}-{n}")); - let root = base.join("root"); - let index_dir = base.join("index"); - std::fs::create_dir_all(&root).unwrap(); - std::fs::create_dir_all(&index_dir).unwrap(); - (root, index_dir) -} diff --git a/tests/Files.Search.Bench/Program.cs b/tests/Files.Search.Bench/Program.cs index 0b64c0107e7d..cd458853e746 100644 --- a/tests/Files.Search.Bench/Program.cs +++ b/tests/Files.Search.Bench/Program.cs @@ -32,8 +32,8 @@ private static async Task Main(string[] args) }; // Warm-up: run one throwaway query so JIT, gRPC channel - // setup, Tantivy mmap pages, and any first-call penalty - // don't get baked into the first measured timing. + // setup, and any first-call penalty don't get baked into + // the first measured timing. if (queries.Count > 0) { Console.Write(" warm-up..."); @@ -143,7 +143,7 @@ internal sealed class CliOptions Providers: naive-scan — top-down filesystem walk (strawman baseline). legacy — Windows.Storage.Search / AQS (the upstream path). - indexed — Rust files-search-service over gRPC. Requires the + indexed — files-search-service over gRPC. Requires the service to be running and indexing the corpus root (set FILES_SEARCH_ROOT before launching it). """); diff --git a/tests/Files.Search.Correctness/CorpusCorrectnessTests.cs b/tests/Files.Search.Correctness/CorpusCorrectnessTests.cs new file mode 100644 index 000000000000..d0b7187a4c8f --- /dev/null +++ b/tests/Files.Search.Correctness/CorpusCorrectnessTests.cs @@ -0,0 +1,221 @@ +// Copyright (c) Files Community +// Licensed under the MIT License. + +using Files.SearchService.Index; +using Files.SearchService.Usn; +using Microsoft.VisualStudio.TestTools.UnitTesting; + +namespace Files.Search.Correctness; + +/// +/// End-to-end correctness: build an index from a real temp directory, +/// then verify indexed results == naive filename-token scan for every query. +/// +/// Key invariant tested: no false negatives, no false positives. +/// +[TestClass] +public class CorpusCorrectnessTests +{ + private static string _root = ""; + private static FileIndex _index = null!; + + [ClassInitialize] + public static void ClassInitialize(TestContext _) + { + _root = Path.Combine(Path.GetTempPath(), $"fsix_corpus_{Guid.NewGuid():N}"); + Directory.CreateDirectory(_root); + + // Deterministic file set covering all interesting cases. + var files = new[] + { + // Standard delimiter-separated names + "annual_report.pdf", + "quarterly_report.pdf", + "quarterly_summary.docx", + "meeting_notes.txt", + "config_build.json", + "build_output.log", + "server_config.yaml", + "invoice_2024.pdf", + "invoice_2024_final.pdf", + "unrelated.txt", + // CamelCase + "AnnualReportFinal.pdf", + "MyDocumentConfig.docx", + "BuildOutputFinal.log", + // Digits + "report_2024_q1.pdf", + "v2Final.docx", + // Unicode + "测试_report.txt", + "測試_notes.txt", + // Long name + "report_" + new string('a', 120) + ".txt", + // Multi-extension + "archive.tar.gz", + // Nested + Path.Combine("subfolder", "nested_report.pdf"), + Path.Combine("subfolder", "nested_summary.txt"), + Path.Combine("deep", "a", "b", "config.json"), + }; + + // Create the files on disk so UsnJournalReader's fallback walk can find them. + foreach (var rel in files) + { + var fullPath = Path.Combine(_root, rel); + Directory.CreateDirectory(Path.GetDirectoryName(fullPath)!); + File.WriteAllText(fullPath, "test"); + } + + // Build index from the UsnJournalReader fallback walk (no USN in dev mode). + var reader = new UsnJournalReader(_root); + var records = reader.Enumerate() + .Select(e => new DocRecord(e.FullPath, e.FileName, e.SizeBytes, e.ModifiedUtc)) + .ToList(); + _index = new FileIndex(); + _index.ReplaceAll(records); + } + + [ClassCleanup] + public static void ClassCleanup() + { + if (Directory.Exists(_root)) + Directory.Delete(_root, recursive: true); + } + + // ---- Helpers ----------------------------------------------------------- + + /// Naive oracle: files whose tokenized name contains ALL query tokens. + private static HashSet NaiveSearch(string query) + { + var queryTokens = Tokenizer.Tokenize(query).ToList(); + if (queryTokens.Count == 0) return []; + + return Directory.EnumerateFiles(_root, "*", SearchOption.AllDirectories) + .Where(path => + { + var fileTokens = Tokenizer.Tokenize(Path.GetFileName(path)) + .ToHashSet(StringComparer.OrdinalIgnoreCase); + return queryTokens.All(qt => fileTokens.Contains(qt)); + }) + .ToHashSet(StringComparer.OrdinalIgnoreCase); + } + + private static HashSet IndexSearch(string query) => + _index.Search(query, 10_000, []) + .Select(h => h.Path) + .ToHashSet(StringComparer.OrdinalIgnoreCase); + + // ---- Tests ------------------------------------------------------------- + + [TestMethod] + [DataRow("report")] + [DataRow("summary")] + [DataRow("config")] + [DataRow("build")] + [DataRow("invoice")] + [DataRow("meeting")] + [DataRow("nested")] + [DataRow("archive")] + [DataRow("txt")] + [DataRow("pdf")] + public void SingleToken_IndexedMatchesNaive(string query) + { + var naive = NaiveSearch(query); + var indexed = IndexSearch(query); + + // No false negatives. + foreach (var path in naive) + Assert.IsTrue(indexed.Contains(path), $"False negative: '{path}' missing for query '{query}'"); + + // No false positives. + foreach (var path in indexed) + Assert.IsTrue(naive.Contains(path), $"False positive: '{path}' returned for query '{query}'"); + } + + [TestMethod] + [DataRow("quarterly report")] + [DataRow("annual report")] + [DataRow("config build")] + [DataRow("invoice 2024")] + [DataRow("report 2024")] + public void MultiToken_IndexedMatchesNaive(string query) + { + var naive = NaiveSearch(query); + var indexed = IndexSearch(query); + + foreach (var path in naive) + Assert.IsTrue(indexed.Contains(path), $"False negative: '{path}' missing for query '{query}'"); + + foreach (var path in indexed) + Assert.IsTrue(naive.Contains(path), $"False positive: '{path}' returned for query '{query}'"); + } + + [TestMethod] + public void CamelCase_TokensSearchable_NoFalseNegatives() + { + // "AnnualReportFinal.pdf" should appear when searching "annual", "report", or "final". + var cases = new[] { "annual", "report", "final" }; + foreach (var q in cases) + { + var naive = NaiveSearch(q); + var indexed = IndexSearch(q); + foreach (var path in naive) + Assert.IsTrue(indexed.Contains(path), $"False negative: '{path}' missing for query '{q}'"); + } + } + + [TestMethod] + public void Unicode_CJK_NoFalseNegatives() + { + var naive = NaiveSearch("测试"); + var indexed = IndexSearch("测试"); + + Assert.IsTrue(naive.Count > 0, "Corpus should have at least one CJK file."); + foreach (var path in naive) + Assert.IsTrue(indexed.Contains(path), $"False negative: '{path}' missing for CJK query"); + } + + [TestMethod] + public void ScopeFilter_SubfolderOnly_NoFalsePositives() + { + var subfolder = Path.Combine(_root, "subfolder"); + var hits = _index.Search("report", 10_000, [subfolder]); + + foreach (var hit in hits) + Assert.IsTrue(hit.Path.StartsWith(subfolder, StringComparison.OrdinalIgnoreCase), + $"False positive outside scope: '{hit.Path}'"); + } + + [TestMethod] + public void ScopeFilter_SubfolderOnly_NoFalseNegatives() + { + var subfolder = Path.Combine(_root, "subfolder"); + var scoped = _index.Search("report", 10_000, [subfolder]) + .Select(h => h.Path) + .ToHashSet(StringComparer.OrdinalIgnoreCase); + + // Naive walk restricted to subfolder. + var naiveScoped = Directory.EnumerateFiles(subfolder, "*", SearchOption.AllDirectories) + .Where(p => Tokenizer.Tokenize(Path.GetFileName(p)) + .Any(t => t.Equals("report", StringComparison.OrdinalIgnoreCase))) + .ToHashSet(StringComparer.OrdinalIgnoreCase); + + foreach (var path in naiveScoped) + Assert.IsTrue(scoped.Contains(path), $"False negative in scope filter: '{path}'"); + } + + [TestMethod] + public void UnknownQuery_ReturnsEmpty() + { + Assert.AreEqual(0, IndexSearch("zzz_absolutely_nonexistent_token_xqz").Count); + } + + [TestMethod] + public void DocCount_MatchesActualFileCount() + { + var expectedCount = Directory.EnumerateFiles(_root, "*", SearchOption.AllDirectories).Count(); + // Allow ±0 — every file in the tree should be indexed. + Assert.AreEqual(expectedCount, (int)_index.DocCount); + } +} diff --git a/tests/Files.Search.Correctness/FileIndexTests.cs b/tests/Files.Search.Correctness/FileIndexTests.cs new file mode 100644 index 000000000000..ace991e95ae0 --- /dev/null +++ b/tests/Files.Search.Correctness/FileIndexTests.cs @@ -0,0 +1,506 @@ +// Copyright (c) Files Community +// Licensed under the MIT License. + +using Files.SearchService.Index; +using Microsoft.VisualStudio.TestTools.UnitTesting; + +namespace Files.Search.Correctness; + +/// +/// Correctness tests for . +/// +/// Core invariant: for a query Q, the index returns exactly the set of +/// documents whose filename contains all of Q's tokens (AND semantics). +/// No false positives, no false negatives for token-exact queries. +/// +[TestClass] +public class FileIndexTests +{ + private static FileIndex BuildIndex(params (string path, string name)[] files) + { + var idx = new FileIndex(); + var records = files + .Select(f => new DocRecord(f.path, f.name, 0UL, DateTime.UtcNow)) + .ToList(); + idx.ReplaceAll(records); + return idx; + } + + private static IReadOnlyList Search(FileIndex idx, string query, params string[] scopes) => + idx.Search(query, 10_000, scopes); + + // ---- Basic retrieval --------------------------------------------------- + + [TestMethod] + public void SingleToken_FindsMatchingFile() + { + var idx = BuildIndex( + (@"C:\root\annual_report.pdf", "annual_report.pdf"), + (@"C:\root\quarterly_summary.docx", "quarterly_summary.docx")); + + var hits = Search(idx, "report"); + + Assert.AreEqual(1, hits.Count); + Assert.AreEqual("annual_report.pdf", hits[0].FileName); + } + + [TestMethod] + public void SingleToken_NoMatch_ReturnsEmpty() + { + var idx = BuildIndex((@"C:\root\file.txt", "file.txt")); + Assert.AreEqual(0, Search(idx, "zzz_nonexistent").Count); + } + + [TestMethod] + public void EmptyQuery_ReturnsEmpty() + { + var idx = BuildIndex((@"C:\root\file.txt", "file.txt")); + Assert.AreEqual(0, Search(idx, "").Count); + } + + // ---- AND semantics for multi-token queries ----------------------------- + + [TestMethod] + public void MultiToken_And_OnlyFilesWithAllTokens() + { + var idx = BuildIndex( + (@"C:\root\annual_report.pdf", "annual_report.pdf"), + (@"C:\root\quarterly_report.pdf", "quarterly_report.pdf"), + (@"C:\root\annual_summary.docx", "annual_summary.docx")); + + // "annual report" → both "annual" AND "report" required + var hits = Search(idx, "annual report"); + + Assert.AreEqual(1, hits.Count); + Assert.AreEqual("annual_report.pdf", hits[0].FileName); + } + + [TestMethod] + public void MultiToken_MissingOneToken_ReturnsEmpty() + { + var idx = BuildIndex((@"C:\root\report.txt", "report.txt")); + // "annual" is not in "report.txt" → no result + Assert.AreEqual(0, Search(idx, "annual report").Count); + } + + // ---- No false positives ------------------------------------------------ + + [TestMethod] + public void NoFalsePositives_UnrelatedFilesNotReturned() + { + var idx = BuildIndex( + (@"C:\root\report.pdf", "report.pdf"), + (@"C:\root\invoice.pdf", "invoice.pdf"), + (@"C:\root\summary.txt", "summary.txt")); + + var hits = Search(idx, "report").Select(h => h.FileName).ToHashSet(); + + Assert.IsTrue(hits.Contains("report.pdf")); + Assert.IsFalse(hits.Contains("invoice.pdf")); + Assert.IsFalse(hits.Contains("summary.txt")); + } + + // ---- No false negatives ------------------------------------------------ + + [TestMethod] + public void AllMatchingFiles_AreReturned() + { + var idx = BuildIndex( + (@"C:\root\report_q1.pdf", "report_q1.pdf"), + (@"C:\root\report_q2.pdf", "report_q2.pdf"), + (@"C:\root\report_q3.pdf", "report_q3.pdf"), + (@"C:\root\unrelated.txt", "unrelated.txt")); + + var hits = Search(idx, "report"); + var names = hits.Select(h => h.FileName).ToHashSet(); + + Assert.IsTrue(names.Contains("report_q1.pdf")); + Assert.IsTrue(names.Contains("report_q2.pdf")); + Assert.IsTrue(names.Contains("report_q3.pdf")); + Assert.IsFalse(names.Contains("unrelated.txt")); + } + + // ---- Scope filtering --------------------------------------------------- + + [TestMethod] + public void ScopeFilter_ExcludesOutOfScopePaths() + { + var idx = BuildIndex( + (@"C:\root\folder1\report.txt", "report.txt"), + (@"C:\root\folder2\report.txt", "report.txt")); + + var hits = Search(idx, "report", @"C:\root\folder1"); + + Assert.AreEqual(1, hits.Count); + Assert.IsTrue(hits[0].Path.StartsWith(@"C:\root\folder1", StringComparison.OrdinalIgnoreCase)); + } + + [TestMethod] + public void ScopeFilter_EmptyScope_ReturnsAll() + { + var idx = BuildIndex( + (@"C:\root\folder1\report.txt", "report.txt"), + (@"C:\root\folder2\report.txt", "report.txt")); + + // No scope = no filtering. + var hits = Search(idx, "report"); + Assert.AreEqual(2, hits.Count); + } + + [TestMethod] + public void ScopeFilter_MultipleScopes_UnionSemantics() + { + var idx = BuildIndex( + (@"C:\root\a\report.txt", "report.txt"), + (@"C:\root\b\report.txt", "report.txt"), + (@"C:\root\c\report.txt", "report.txt")); + + var hits = Search(idx, "report", @"C:\root\a", @"C:\root\b"); + Assert.AreEqual(2, hits.Count); + } + + // ---- CamelCase splitting ----------------------------------------------- + + [TestMethod] + public void CamelCase_TokensSearchable() + { + var idx = BuildIndex((@"C:\root\MyDocumentFinal.docx", "MyDocumentFinal.docx")); + + Assert.AreEqual(1, Search(idx, "document").Count); + Assert.AreEqual(1, Search(idx, "my").Count); + Assert.AreEqual(1, Search(idx, "final").Count); + } + + [TestMethod] + public void CamelCase_MultiToken_FindsFile() + { + var idx = BuildIndex((@"C:\root\AnnualReportFinal.pdf", "AnnualReportFinal.pdf")); + Assert.AreEqual(1, Search(idx, "annual report").Count); + } + + // ---- Unicode ----------------------------------------------------------- + + [TestMethod] + public void Unicode_CJK_FindsFile() + { + var idx = BuildIndex((@"C:\root\测试_file.txt", "测试_file.txt")); + Assert.AreEqual(1, Search(idx, "测试").Count); + } + + [TestMethod] + public void Unicode_FilenameWithCJKAndLatin_BothTokensSearchable() + { + var idx = BuildIndex((@"C:\root\测试_report.pdf", "测试_report.pdf")); + Assert.AreEqual(1, Search(idx, "report").Count); + Assert.AreEqual(1, Search(idx, "测试").Count); + } + + // ---- Incremental updates ----------------------------------------------- + + [TestMethod] + public void Upsert_NewFile_IsSearchable() + { + var idx = new FileIndex(); + idx.ReplaceAll([]); + idx.Upsert(@"C:\root\new_report.txt", "new_report.txt", 0, DateTime.UtcNow); + + Assert.AreEqual(1, Search(idx, "report").Count); + Assert.AreEqual(1, Search(idx, "new").Count); + } + + [TestMethod] + public void Upsert_ExistingPath_UpdatesFile() + { + var idx = BuildIndex((@"C:\root\file.txt", "old_name.txt")); + // Upsert replaces the existing doc. + idx.Upsert(@"C:\root\file.txt", "new_name.txt", 0, DateTime.UtcNow); + + Assert.AreEqual(0, Search(idx, "old").Count); + Assert.AreEqual(1, Search(idx, "new").Count); + } + + [TestMethod] + public void Delete_RemovedFile_NoLongerReturned() + { + var idx = BuildIndex((@"C:\root\delete_me.txt", "delete_me.txt")); + idx.Delete(@"C:\root\delete_me.txt"); + + Assert.AreEqual(0, Search(idx, "delete").Count); + } + + [TestMethod] + public void Delete_UnknownPath_IsNoOp() + { + var idx = BuildIndex((@"C:\root\file.txt", "file.txt")); + idx.Delete(@"C:\root\nonexistent.txt"); // Should not throw. + Assert.AreEqual(1, Search(idx, "file").Count); + } + + // ---- Result scoring / ordering ----------------------------------------- + + [TestMethod] + public void ExactMatch_RankedFirst() + { + var idx = BuildIndex( + (@"C:\root\report_annual.pdf", "report_annual.pdf"), + (@"C:\root\report.pdf", "report.pdf"), // exact + (@"C:\root\annual_report.pdf", "annual_report.pdf")); + + var hits = Search(idx, "report.pdf"); + + // The exact match ("report.pdf") should have the highest score. + Assert.AreEqual("report.pdf", hits[0].FileName); + Assert.AreEqual(1.0f, hits[0].Score); + } + + [TestMethod] + public void MaxResults_CapsResultCount() + { + var idx = new FileIndex(); + var records = Enumerable.Range(0, 50) + .Select(i => new DocRecord($@"C:\root\report_{i}.txt", $"report_{i}.txt", 0, DateTime.UtcNow)) + .ToList(); + idx.ReplaceAll(records); + + var hits = idx.Search("report", 10, []); + Assert.AreEqual(10, hits.Count); + } + + [TestMethod] + public void MaxResults_Truncation_KeepsTopByScore() + { + // Regression: previously the truncation happened BEFORE sorting by score, + // so the top-N was the first-N candidates in doc-ID order — meaning a + // high-scoring match enrolled late could be silently dropped while + // low-quality substring matches filled the result list. Score-then-truncate + // ensures the best matches always survive the cut. + var idx = new FileIndex(); + var records = new List(); + + // 99 low-quality matches added FIRST (lower doc IDs). For query "report" + // these score 0.4 — "report" is a substring of the filename but the + // filename doesn't start with it and "report" isn't a clean prefix of + // a single token either (since they're all "zzzreport..."). + for (int i = 0; i < 99; i++) + records.Add(new DocRecord($@"C:\junk\zzzreportfiller{i}.txt", + $"zzzreportfiller{i}.txt", 0, DateTime.UtcNow)); + + // The high-quality match added LAST (highest doc ID — would be dropped + // by the buggy truncate-then-sort path). + records.Add(new DocRecord(@"C:\root\report.txt", "report.txt", 0, DateTime.UtcNow)); + + idx.ReplaceAll(records); + + var hits = idx.Search("report", maxResults: 5, scopePaths: []); + + Assert.AreEqual(5, hits.Count); + Assert.AreEqual("report.txt", hits[0].FileName, + "high-scoring match must survive truncation, not be dropped because of late doc-ID"); + Assert.AreEqual(0.9f, hits[0].Score, "filename starts with query → 0.9 tier"); + // All other hits should be the lower-scoring filler matches. + foreach (var h in hits.Skip(1)) + Assert.IsTrue(h.Score < hits[0].Score, + $"filler '{h.FileName}' (score {h.Score}) should rank below top match"); + } + + [TestMethod] + public void Scoring_TwoTierRefinement_UpgradesQuickScoreToPrecise() + { + // QuickScore (the bulk pass) only knows exact / startsWith / contains. + // The precise Scorer adds camelCase-prefix detection (0.6 tier) which + // ranks above plain substring (0.4). The refinement pass must surface + // that, otherwise the top-N order is wrong. + // + // "ann" is NOT a startsWith for either file (both start with "notes") + // and IS a substring of both filenames — so QuickScore returns 0.4 for + // both. But the precise Scorer sees "ann" is a prefix of file-token + // "annual" while "ann" only appears mid-string in "scanner" → the + // first file should rank above the second after refinement. + var idx = BuildIndex( + (@"C:\root\notes_annual.pdf", "notes_annual.pdf"), + (@"C:\root\notes_scanner.pdf", "notes_scanner.pdf")); + + var hits = idx.Search("ann", maxResults: 10, scopePaths: []); + + Assert.AreEqual(2, hits.Count); + Assert.AreEqual("notes_annual.pdf", hits[0].FileName, + "camelCase-prefix match must rank above plain-substring after refinement"); + Assert.IsTrue(hits[0].Score > hits[1].Score, + $"prefix tier (0.6) must beat substring tier (0.4); got {hits[0].Score} vs {hits[1].Score}"); + } + + [TestMethod] + public void Scoring_PrefixOnFilename_RanksAboveTokenMatch() + { + // A file whose name starts with the query should rank above a file + // where the query is just an interior token. Both go through the + // index hit path; only the precise score distinguishes them. + var idx = BuildIndex( + (@"C:\root\report.txt", "report.txt"), // 0.9: starts with "report" + (@"C:\root\my_report.txt", "my_report.txt")); // 0.8: "report" is a token + + var hits = idx.Search("report", maxResults: 10, scopePaths: []); + + Assert.AreEqual(2, hits.Count); + Assert.AreEqual("report.txt", hits[0].FileName); + Assert.IsTrue(hits[0].Score > hits[1].Score); + } + + // ---- Trigram / mid-string substring search ---------------------------- + + [TestMethod] + public void Trigram_MidStringQuery_FindsFile() + { + // "phab" is not a token of "ALPHABET.md" but is a mid-string substring. + var idx = BuildIndex( + (@"C:\root\ALPHABET.md", "ALPHABET.md"), + (@"C:\root\unrelated.txt", "unrelated.txt")); + + var hits = Search(idx, "phab"); + + Assert.AreEqual(1, hits.Count); + Assert.AreEqual("ALPHABET.md", hits[0].FileName); + } + + [TestMethod] + public void Trigram_PrefixQuery_StillFindsFile() + { + // Trigram search should not break whole-word prefix queries. + var idx = BuildIndex( + (@"C:\root\alphabet.txt", "alphabet.txt"), + (@"C:\root\unrelated.txt", "unrelated.txt")); + + var hits = Search(idx, "alpha"); + + Assert.AreEqual(1, hits.Count); + Assert.AreEqual("alphabet.txt", hits[0].FileName); + } + + [TestMethod] + public void Trigram_MultiFileMatches_AllReturned() + { + var idx = BuildIndex( + (@"C:\root\reporting.pdf", "reporting.pdf"), + (@"C:\root\report.txt", "report.txt"), + (@"C:\root\prereport.docx", "prereport.docx"), + (@"C:\root\unrelated.log", "unrelated.log")); + + // "epor" is mid-string in all three "report" variants but not in "unrelated". + var hits = Search(idx, "epor"); + var names = hits.Select(h => h.FileName).ToHashSet(); + + Assert.IsTrue(names.Contains("reporting.pdf")); + Assert.IsTrue(names.Contains("report.txt")); + Assert.IsTrue(names.Contains("prereport.docx")); + Assert.IsFalse(names.Contains("unrelated.log")); + } + + [TestMethod] + public void Trigram_NoMatch_ReturnsEmpty() + { + var idx = BuildIndex((@"C:\root\document.txt", "document.txt")); + Assert.AreEqual(0, Search(idx, "xyz").Count); + } + + [TestMethod] + public void Trigram_ShortQuery_TokenFallback() + { + // 2-char queries are below trigram threshold; token index still works. + var idx = BuildIndex((@"C:\root\my_file.txt", "my_file.txt")); + Assert.AreEqual(1, Search(idx, "my").Count); + } + + [TestMethod] + public void Trigram_Upsert_MidStringSearchable() + { + var idx = new FileIndex(); + idx.ReplaceAll([]); + idx.Upsert(@"C:\root\ALPHABET.md", "ALPHABET.md", 0, DateTime.UtcNow); + + var hits = Search(idx, "phab"); + Assert.AreEqual(1, hits.Count); + Assert.AreEqual("ALPHABET.md", hits[0].FileName); + } + + [TestMethod] + public void Trigram_DeletedFile_NotReturnedForMidStringQuery() + { + var idx = BuildIndex((@"C:\root\ALPHABET.md", "ALPHABET.md")); + idx.Delete(@"C:\root\ALPHABET.md"); + + Assert.AreEqual(0, Search(idx, "phab").Count); + } + + [TestMethod] + public void Trigram_UnionWithTokenHits_NoDuplicates() + { + // "alpha" is both a whole token and a prefix of "alphabet" — + // the result set should contain "alpha.txt" exactly once. + var idx = BuildIndex((@"C:\root\alpha.txt", "alpha.txt")); + + var hits = Search(idx, "alpha"); + + Assert.AreEqual(1, hits.Count); + } + + [TestMethod] + public void Trigram_CaseInsensitive_FindsFile() + { + var idx = BuildIndex((@"C:\root\UPPERCASE.txt", "UPPERCASE.txt")); + + // Trigrams are lowercased; query should match regardless of case. + Assert.AreEqual(1, Search(idx, "PPER").Count); + Assert.AreEqual(1, Search(idx, "pper").Count); + Assert.AreEqual(1, Search(idx, "Pper").Count); + } + + // ---- Corpus invariant -------------------------------------------------- + + [TestMethod] + [DataRow("report")] + [DataRow("summary")] + [DataRow("meeting")] + [DataRow("config")] + [DataRow("build")] + public void CorpusInvariant_IndexedMatchesNaiveTokenSearch(string queryToken) + { + var files = new[] + { + "annual_report.pdf", + "quarterly_report.docx", + "meeting_notes.txt", + "config_build.json", + "build_output.log", + "summary_q3.xlsx", + "invoice.pdf", + "unrelated.txt", + "MyDocumentFinal.docx", + "report_summary.md", + "測試_report.txt", + }; + + const string root = @"C:\test"; + var idx = new FileIndex(); + var records = files + .Select(f => new DocRecord(Path.Combine(root, f), f, 0, DateTime.UtcNow)) + .ToList(); + idx.ReplaceAll(records); + + // Naive oracle: files whose tokenized name contains the query token. + var expected = files + .Where(f => Tokenizer.Tokenize(f) + .Any(t => t.Equals(queryToken, StringComparison.OrdinalIgnoreCase))) + .Select(f => Path.Combine(root, f)) + .ToHashSet(StringComparer.OrdinalIgnoreCase); + + var indexed = Search(idx, queryToken) + .Select(h => h.Path) + .ToHashSet(StringComparer.OrdinalIgnoreCase); + + foreach (var path in expected) + Assert.IsTrue(indexed.Contains(path), $"False negative: '{path}' missing from index results for query '{queryToken}'"); + + foreach (var path in indexed) + Assert.IsTrue(expected.Contains(path), $"False positive: '{path}' returned by index but not in naive oracle for query '{queryToken}'"); + } +} diff --git a/tests/Files.Search.Correctness/Files.Search.Correctness.csproj b/tests/Files.Search.Correctness/Files.Search.Correctness.csproj new file mode 100644 index 000000000000..bca123f556e7 --- /dev/null +++ b/tests/Files.Search.Correctness/Files.Search.Correctness.csproj @@ -0,0 +1,23 @@ + + + + + net10.0-windows10.0.26100.0 + enable + enable + false + true + false + Exe + + + + + + + + + + + + diff --git a/tests/Files.Search.Correctness/PersistenceTests.cs b/tests/Files.Search.Correctness/PersistenceTests.cs new file mode 100644 index 000000000000..172d14970cea --- /dev/null +++ b/tests/Files.Search.Correctness/PersistenceTests.cs @@ -0,0 +1,128 @@ +// Copyright (c) Files Community +// Licensed under the MIT License. + +using Files.SearchService.Index; +using Microsoft.VisualStudio.TestTools.UnitTesting; + +namespace Files.Search.Correctness; + +[TestClass] +public class PersistenceTests +{ + private string _tmpFile = ""; + + [TestInitialize] + public void Initialize() + { + _tmpFile = Path.Combine(Path.GetTempPath(), $"fsix_test_{Guid.NewGuid():N}.bin"); + } + + [TestCleanup] + public void Cleanup() + { + if (File.Exists(_tmpFile)) File.Delete(_tmpFile); + if (File.Exists(_tmpFile + ".tmp")) File.Delete(_tmpFile + ".tmp"); + } + + [TestMethod] + public async Task RoundTrip_PreservesAllFields() + { + var utc = new DateTime(2024, 6, 15, 12, 0, 0, DateTimeKind.Utc); + var records = new List + { + new(@"C:\root\report.pdf", "report.pdf", 1024UL, utc), + new(@"C:\root\notes.txt", "notes.txt", 2048UL, utc.AddDays(1)), + }; + + await IndexPersistence.SaveAsync(_tmpFile, records, CancellationToken.None); + var loaded = await IndexPersistence.LoadAsync(_tmpFile, CancellationToken.None); + + Assert.AreEqual(records.Count, loaded.Count); + for (int i = 0; i < records.Count; i++) + { + Assert.AreEqual(records[i].FullPath, loaded[i].FullPath); + Assert.AreEqual(records[i].FileName, loaded[i].FileName); + Assert.AreEqual(records[i].SizeBytes, loaded[i].SizeBytes); + Assert.AreEqual(records[i].ModifiedUtc, loaded[i].ModifiedUtc); + } + } + + [TestMethod] + public async Task RoundTrip_Unicode_PathAndFilename() + { + var records = new List + { + new(@"C:\root\测试\测试_file.txt", "测试_file.txt", 512UL, DateTime.UtcNow), + }; + + await IndexPersistence.SaveAsync(_tmpFile, records, CancellationToken.None); + var loaded = await IndexPersistence.LoadAsync(_tmpFile, CancellationToken.None); + + Assert.AreEqual(1, loaded.Count); + Assert.AreEqual(@"C:\root\测试\测试_file.txt", loaded[0].FullPath); + Assert.AreEqual("测试_file.txt", loaded[0].FileName); + } + + [TestMethod] + public async Task RoundTrip_EmptyList() + { + await IndexPersistence.SaveAsync(_tmpFile, [], CancellationToken.None); + var loaded = await IndexPersistence.LoadAsync(_tmpFile, CancellationToken.None); + Assert.AreEqual(0, loaded.Count); + } + + [TestMethod] + public async Task RoundTrip_LargeCount_AllPresent() + { + const int count = 10_000; + var utc = DateTime.UtcNow; + var records = Enumerable.Range(0, count) + .Select(i => new DocRecord($@"C:\root\file_{i}.txt", $"file_{i}.txt", (ulong)i, utc)) + .ToList(); + + await IndexPersistence.SaveAsync(_tmpFile, records, CancellationToken.None); + var loaded = await IndexPersistence.LoadAsync(_tmpFile, CancellationToken.None); + + Assert.AreEqual(count, loaded.Count); + for (int i = 0; i < count; i++) + { + Assert.AreEqual(records[i].FullPath, loaded[i].FullPath); + Assert.AreEqual(records[i].SizeBytes, loaded[i].SizeBytes); + } + } + + [TestMethod] + public async Task SaveIsAtomic_TempFileCleanedUp() + { + await IndexPersistence.SaveAsync(_tmpFile, [], CancellationToken.None); + // The .tmp file must be gone after a successful save. + Assert.IsFalse(File.Exists(_tmpFile + ".tmp")); + } + + [TestMethod] + public async Task Load_CorruptedMagic_Throws() + { + // Write garbage bytes. + await File.WriteAllBytesAsync(_tmpFile, [0xFF, 0xFF, 0xFF, 0xFF, 0x01, 0x00, 0x00, 0x00]); + bool threw = false; + try { await IndexPersistence.LoadAsync(_tmpFile, CancellationToken.None); } + catch (InvalidDataException) { threw = true; } + Assert.IsTrue(threw, "Expected InvalidDataException was not thrown."); + } + + [TestMethod] + public async Task RoundTrip_LongPath_Preserved() + { + // Paths up to MAX_PATH-ish lengths should survive the round-trip. + var longName = new string('x', 200) + ".txt"; + var longPath = @"C:\root\" + longName; + var records = new List { new(longPath, longName, 0UL, DateTime.UtcNow) }; + + await IndexPersistence.SaveAsync(_tmpFile, records, CancellationToken.None); + var loaded = await IndexPersistence.LoadAsync(_tmpFile, CancellationToken.None); + + Assert.AreEqual(1, loaded.Count); + Assert.AreEqual(longPath, loaded[0].FullPath); + Assert.AreEqual(longName, loaded[0].FileName); + } +} diff --git a/tests/Files.Search.Correctness/ScorerTests.cs b/tests/Files.Search.Correctness/ScorerTests.cs new file mode 100644 index 000000000000..9c2680b2671e --- /dev/null +++ b/tests/Files.Search.Correctness/ScorerTests.cs @@ -0,0 +1,109 @@ +// Copyright (c) Files Community +// Licensed under the MIT License. + +using Files.SearchService.Index; +using Microsoft.VisualStudio.TestTools.UnitTesting; + +namespace Files.Search.Correctness; + +[TestClass] +public class ScorerTests +{ + private static float Score(string query, string fileName) + { + var tokens = Tokenizer.Tokenize(query).ToList(); + return Scorer.Score(query, tokens, fileName); + } + + // ---- Tier 1.0 — exact filename match ----------------------------------- + + [TestMethod] + public void Exact_CaseInsensitive_ReturnsOne() + { + Assert.AreEqual(1.0f, Score("report.txt", "report.txt")); + Assert.AreEqual(1.0f, Score("REPORT.TXT", "report.txt")); + Assert.AreEqual(1.0f, Score("report.txt", "REPORT.TXT")); + } + + // ---- Tier 0.9 — filename starts with query ----------------------------- + + [TestMethod] + public void Prefix_ReturnsNinetyPercent() + { + // "report" is a prefix of "report.txt" + Assert.AreEqual(0.9f, Score("report", "report.txt")); + } + + [TestMethod] + public void Prefix_PartialWord() + { + // "rep" is a prefix of "report.txt" + Assert.AreEqual(0.9f, Score("rep", "report.txt")); + } + + // ---- Tier 0.8 — all query tokens exactly match filename tokens --------- + + [TestMethod] + public void AllTokenExact_ReturnsEightyPercent() + { + // query "annual report" → tokens ["annual","report"] + // file "annual_report.pdf" → tokens ["annual","report","pdf"] + // all query tokens are exact file tokens → 0.8 + Assert.AreEqual(0.8f, Score("annual report", "annual_report.pdf")); + } + + [TestMethod] + public void AllTokenExact_MultiWord() + { + Assert.AreEqual(0.8f, Score("meeting notes", "meeting_notes.docx")); + } + + // ---- Tier 0.6 — all query tokens are prefix of some filename token ----- + + [TestMethod] + public void AllTokenPrefix_ReturnsSixtyPercent() + { + // query "ann" → token ["ann"] + // file "notes_annual.pdf" → tokens ["notes","annual","pdf"] + // "ann" is a prefix of "annual" but "notes_annual.pdf" does NOT start with "ann" → 0.6 + var score = Score("ann", "notes_annual.pdf"); + Assert.AreEqual(0.6f, score); + } + + // ---- Tier 0.4 — query tokens appear as substring in filename ----------- + // This tier is mainly a safety net; in normal index operation a doc + // can only reach the scorer if all query tokens are exact index tokens, + // which means AllTokenExact (0.8) or AllTokenPrefix (0.6) will fire first. + // Test it via direct Scorer.Score call to verify the tier exists and works. + + [TestMethod] + public void AllSubstring_ReturnsFortyPercent() + { + // Contrived case: query "nual" is a mid-string match only. + // 0.9: "annual_report.pdf" does NOT start with "nual" + // 0.8: "nual" is NOT an exact file token + // 0.6: "nual" is NOT a prefix of any file token ("annual", "report", "pdf") + // 0.4: "nual" IS a substring of "annual_report.pdf" + Assert.AreEqual(0.4f, Score("nual", "annual_report.pdf")); + } + + // ---- Score ordering ---------------------------------------------------- + + [TestMethod] + public void ExactBeatsPrefix() + { + Assert.IsTrue(Score("report.txt", "report.txt") > Score("report", "report.txt")); + } + + [TestMethod] + public void PrefixBeatsAllTokenExact() + { + Assert.IsTrue(Score("report", "report.txt") > Score("annual report", "annual_report.pdf")); + } + + [TestMethod] + public void AllTokenExactBeatsAllTokenPrefix() + { + Assert.IsTrue(Score("annual report", "annual_report.pdf") > Score("ann rep", "annual_report.pdf")); + } +} diff --git a/tests/Files.Search.Correctness/TokenizerTests.cs b/tests/Files.Search.Correctness/TokenizerTests.cs new file mode 100644 index 000000000000..5e0ad904ad8a --- /dev/null +++ b/tests/Files.Search.Correctness/TokenizerTests.cs @@ -0,0 +1,160 @@ +// Copyright (c) Files Community +// Licensed under the MIT License. + +using Files.SearchService.Index; +using Microsoft.VisualStudio.TestTools.UnitTesting; + +namespace Files.Search.Correctness; + +[TestClass] +public class TokenizerTests +{ + private static HashSet Tokens(string filename) => + Tokenizer.Tokenize(filename).ToHashSet(StringComparer.OrdinalIgnoreCase); + + // ---- Delimiter splitting ----------------------------------------------- + + [TestMethod] + public void DotSplit_ProducesNameAndExtension() + { + var t = Tokens("report.txt"); + Assert.IsTrue(t.Contains("report")); + Assert.IsTrue(t.Contains("txt")); + } + + [TestMethod] + public void UnderscoreSplit() + { + var t = Tokens("annual_report.pdf"); + Assert.IsTrue(t.Contains("annual")); + Assert.IsTrue(t.Contains("report")); + Assert.IsTrue(t.Contains("pdf")); + } + + [TestMethod] + public void HyphenSplit() + { + var t = Tokens("my-document.txt"); + Assert.IsTrue(t.Contains("my")); + Assert.IsTrue(t.Contains("document")); + } + + [TestMethod] + public void SpaceSplit() + { + var t = Tokens("meeting notes.docx"); + Assert.IsTrue(t.Contains("meeting")); + Assert.IsTrue(t.Contains("notes")); + } + + [TestMethod] + public void MultiExtension() + { + var t = Tokens("archive.tar.gz"); + Assert.IsTrue(t.Contains("archive")); + Assert.IsTrue(t.Contains("tar")); + Assert.IsTrue(t.Contains("gz")); + } + + [TestMethod] + public void NumbersPreservedAsToken() + { + var t = Tokens("report_2024.pdf"); + Assert.IsTrue(t.Contains("2024")); + } + + // ---- CamelCase splitting ----------------------------------------------- + + [TestMethod] + public void CamelCase_LowerUpper_Splits() + { + var t = Tokens("MyDocument.docx"); + Assert.IsTrue(t.Contains("my")); + Assert.IsTrue(t.Contains("document")); + } + + [TestMethod] + public void CamelCase_MultipleWords() + { + var t = Tokens("AnnualReportFinal.pdf"); + Assert.IsTrue(t.Contains("annual")); + Assert.IsTrue(t.Contains("report")); + Assert.IsTrue(t.Contains("final")); + } + + [TestMethod] + public void LetterToDigit_Splits() + { + var t = Tokens("v2Final.docx"); + Assert.IsTrue(t.Contains("v")); + Assert.IsTrue(t.Contains("2")); + Assert.IsTrue(t.Contains("final")); + } + + [TestMethod] + public void DigitToLetter_Splits() + { + var t = Tokens("2024Report.pdf"); + Assert.IsTrue(t.Contains("2024")); + Assert.IsTrue(t.Contains("report")); + } + + [TestMethod] + public void AllCaps_TreatedAsSingleToken() + { + var t = Tokens("REPORT.txt"); + Assert.IsTrue(t.Contains("report")); + } + + // ---- Unicode ----------------------------------------------------------- + + [TestMethod] + public void Unicode_CJK_PreservedAsToken() + { + var t = Tokens("测试_file.txt"); + Assert.IsTrue(t.Contains("测试")); + Assert.IsTrue(t.Contains("file")); + Assert.IsTrue(t.Contains("txt")); + } + + [TestMethod] + public void Unicode_Emoji_DoesNotCrash() + { + var t = Tokens("测试_draft_😀.jpg"); + Assert.IsTrue(t.Contains("jpg")); + } + + // ---- Edge cases -------------------------------------------------------- + + [TestMethod] + public void EmptyString_ReturnsNoTokens() + { + Assert.AreEqual(0, Tokenizer.Tokenize("").Count()); + } + + [TestMethod] + public void OnlyDelimiters_ReturnsNoTokens() + { + Assert.AreEqual(0, Tokenizer.Tokenize("___...---").Count()); + } + + [TestMethod] + public void AllTokensAreLowercase() + { + var tokens = Tokenizer.Tokenize("UPPER_lower_Mixed.TXT").ToList(); + foreach (var token in tokens) + Assert.AreEqual(token.ToLowerInvariant(), token); + } + + [TestMethod] + public void ComplexFilename_ContainsExpectedTokens() + { + var t = Tokens("MyDocument_v2Final.docx"); + Assert.IsTrue(t.Contains("my")); + Assert.IsTrue(t.Contains("document")); + Assert.IsTrue(t.Contains("v")); + Assert.IsTrue(t.Contains("2")); + Assert.IsTrue(t.Contains("final")); + Assert.IsTrue(t.Contains("docx")); + } +} diff --git a/tests/Files.Search.Probe/Files.Search.Probe.csproj b/tests/Files.Search.Probe/Files.Search.Probe.csproj new file mode 100644 index 000000000000..486aa8f9f807 --- /dev/null +++ b/tests/Files.Search.Probe/Files.Search.Probe.csproj @@ -0,0 +1,19 @@ + + + + + net10.0-windows10.0.26100.0 + Exe + enable + enable + false + false + Files.Search.Probe + Files.Search.Probe + + + + + + + diff --git a/tests/Files.Search.Probe/Program.cs b/tests/Files.Search.Probe/Program.cs new file mode 100644 index 000000000000..148068e8654c --- /dev/null +++ b/tests/Files.Search.Probe/Program.cs @@ -0,0 +1,256 @@ +// Integration test harness for the Files search service. +// +// Runs end-to-end scenarios that mirror what SearchRouter does in Files.App, +// so you can verify search behavior without launching the UI. +// +// Usage: +// dotnet run --project probe.csproj # full test suite +// dotnet run --project probe.csproj -- query "bmra" # single ad-hoc query +// dotnet run --project probe.csproj -- bench # latency benchmark +// +// The harness auto-starts the service if it isn't running, so the only +// thing you need is the built service binary at the path below. + +using Files.IndexedSearch.Client; +using Files.SearchAbstraction; +using System.Diagnostics; + +const string ServiceUrl = "http://localhost:50299"; +const string ServiceExe = @"C:\Users\Tommy\source\repos\Files\src\Files.SearchService\bin\x64\Debug\net10.0-windows10.0.26100.0\files-search-service.exe"; +const string UserProfile = @"C:\Users\Tommy"; + +Environment.SetEnvironmentVariable("FILES_SEARCH_SERVICE_URL", ServiceUrl); + +await EnsureServiceUp(); + +if (args.Length > 0 && args[0] == "query") +{ + await AdHocQuery(args.Length > 1 ? args[1] : "readme", + args.Length > 2 ? args[2] : UserProfile); + return; +} + +if (args.Length > 0 && args[0] == "bench") +{ + await Bench(); + return; +} + +await RunTestSuite(); + +// ────────────────────────────────────────────────────────────────────────── +// Test scenarios +// ────────────────────────────────────────────────────────────────────────── + +async Task RunTestSuite() +{ + var results = new List(); + var totalSw = Stopwatch.StartNew(); + + results.Add(await Check("service is up and has indexed files", async () => + { + using var p = new IndexedSearchProvider(); + var h = await p.GetHealthAsync(CancellationToken.None); + Require(h.IsAvailable, $"service unavailable"); + Require(h.IndexedFileCount > 1000, $"only {h.IndexedFileCount} files indexed"); + return $"available, {h.IndexedFileCount:N0} files, indexing={h.IsIndexing}"; + })); + + results.Add(await Check("scoped search returns results in <500ms", async () => + { + var (count, ms, _) = await Search("readme", new[] { UserProfile }, 200); + Require(count > 0, "no results for 'readme' in user profile"); + Require(ms < 500, $"took {ms}ms (>500ms)"); + return $"{count} results in {ms}ms"; + })); + + results.Add(await Check("Home/unscoped search returns results in <500ms", async () => + { + var (count, ms, _) = await Search("readme", Array.Empty(), 200); + Require(count > 0, "no results for 'readme' globally"); + Require(ms < 500, $"took {ms}ms (>500ms)"); + return $"{count} results in {ms}ms (scope=full index)"; + })); + + results.Add(await Check("trigram match for mid-string substring", async () => + { + var (count, ms, sample) = await Search("oduct", Array.Empty(), 50); + return count == 0 + ? "0 results (no files containing 'oduct' in this corpus)" + : $"{count} results in {ms}ms, e.g. '{sample}'"; + })); + + results.Add(await Check("nonexistent query returns 0 results quickly", async () => + { + var (count, ms, _) = await Search("zzzzzzzzzzz", Array.Empty(), 50); + Require(ms < 500, $"took {ms}ms"); + Require(count == 0, $"unexpected {count} results"); + return $"0 results in {ms}ms"; + })); + + results.Add(await Check("search does not pin CPU", async () => + { + var svc = Process.GetProcessesByName("files-search-service").FirstOrDefault(); + Require(svc is not null, "service process missing"); + var cpuBefore = svc!.TotalProcessorTime; + var (count, ms, _) = await Search("data", Array.Empty(), 200); + svc.Refresh(); + var cpuAfter = svc.TotalProcessorTime; + var cpuUsed = (cpuAfter - cpuBefore).TotalMilliseconds; + var cpuPct = ms > 0 ? cpuUsed * 100.0 / ms : 0; + // Two-tier scoring iterates all candidates with cheap scoring, which + // uses multiple cores briefly. Threshold accounts for that — pinning + // would be sustained 800%+, not a brief 200-400% spike. + Require(cpuPct < 600, $"CPU at {cpuPct:F0}% (expected <600% during 30ms burst)"); + return $"{count} results in {ms}ms, CPU={cpuPct:F0}% of wall time"; + })); + + results.Add(await Check("warm channel search is <100ms", async () => + { + using var p = new IndexedSearchProvider(); + await p.GetHealthAsync(CancellationToken.None); + var sw = Stopwatch.StartNew(); + int count = 0; + await foreach (var _ in p.SearchAsync( + new SearchQuery("readme", new[] { UserProfile }, MaxResults: 100), CancellationToken.None)) + count++; + var ms = sw.ElapsedMilliseconds; + Require(ms < 100, $"warm search took {ms}ms"); + return $"{count} results in {ms}ms (warm channel)"; + })); + + var passed = results.Count(r => r); + var failed = results.Count - passed; + Console.WriteLine(); + Console.WriteLine($"━━━ {passed} passed, {failed} failed, total {totalSw.ElapsedMilliseconds}ms ━━━"); + Environment.Exit(failed > 0 ? 1 : 0); +} + +async Task AdHocQuery(string query, string scope) +{ + Console.WriteLine($"Ad-hoc: '{query}' in '{(string.IsNullOrEmpty(scope) ? "" : scope)}'"); + var scopes = string.IsNullOrEmpty(scope) || scope.Equals("Home", StringComparison.OrdinalIgnoreCase) + ? Array.Empty() + : new[] { scope }; + + using var p = new IndexedSearchProvider(); + var sw = Stopwatch.StartNew(); + var hits = new List(); + await foreach (var hit in p.SearchAsync( + new SearchQuery(query, scopes, MaxResults: 50), CancellationToken.None)) + hits.Add(hit); + + Console.WriteLine($"{hits.Count} results in {sw.ElapsedMilliseconds}ms"); + Console.WriteLine($" {"score",6} filename"); + foreach (var h in hits.Take(15)) + Console.WriteLine($" {h.Score,6:F2} {h.FileName}"); + if (hits.Count > 15) + Console.WriteLine($" …{hits.Count - 15} more"); +} + +async Task Bench() +{ + string[] queries = { "readme", "json", "config", "test", "data", "image", "log", "main" }; + using var p = new IndexedSearchProvider(); + await p.GetHealthAsync(CancellationToken.None); // warm up + Console.WriteLine($"{"query",-10} {"results",8} {"first(ms)",10} {"total(ms)",10}"); + + foreach (var q in queries) + { + var sw = Stopwatch.StartNew(); + int count = 0; + long firstMs = -1; + await foreach (var _ in p.SearchAsync( + new SearchQuery(q, Array.Empty(), MaxResults: 200), CancellationToken.None)) + { + if (count == 0) firstMs = sw.ElapsedMilliseconds; + count++; + } + Console.WriteLine($"{q,-10} {count,8} {firstMs,10} {sw.ElapsedMilliseconds,10}"); + } +} + +// ────────────────────────────────────────────────────────────────────────── +// Helpers +// ────────────────────────────────────────────────────────────────────────── + +async Task<(int count, long ms, string? sample)> Search(string query, string[] scopes, int max) +{ + using var p = new IndexedSearchProvider(); + var sw = Stopwatch.StartNew(); + int count = 0; + string? first = null; + await foreach (var hit in p.SearchAsync( + new SearchQuery(query, scopes, MaxResults: max), CancellationToken.None)) + { + first ??= hit.FileName; + count++; + } + return (count, sw.ElapsedMilliseconds, first); +} + +async Task Check(string name, Func> body) +{ + Console.Write($" • {name} … "); + try + { + var detail = await body(); + Console.WriteLine($"PASS ({detail})"); + return true; + } + catch (Exception ex) + { + Console.WriteLine($"FAIL {ex.Message}"); + return false; + } +} + +static void Require(bool condition, string message) +{ + if (!condition) throw new InvalidOperationException(message); +} + +async Task EnsureServiceUp() +{ + if (Process.GetProcessesByName("files-search-service").Length > 0) + return; + + if (!File.Exists(ServiceExe)) + { + Console.Error.WriteLine($"Service binary missing: {ServiceExe}"); + Console.Error.WriteLine("Build Files.SearchService first."); + Environment.Exit(2); + } + + Console.WriteLine($"Starting service: {ServiceExe}"); + var psi = new ProcessStartInfo + { + FileName = ServiceExe, + UseShellExecute = false, + CreateNoWindow = true, + RedirectStandardOutput = true, + RedirectStandardError = true, + }; + psi.Environment["FILES_SEARCH_SERVICE_URL"] = ServiceUrl; + psi.Environment["FILES_SEARCH_ROOT"] = UserProfile; + Process.Start(psi); + + // Wait for the service to start accepting connections (up to 10s). + using var probe = new IndexedSearchProvider(); + for (int i = 0; i < 20; i++) + { + await Task.Delay(500); + try + { + var h = await probe.GetHealthAsync(CancellationToken.None); + if (h.IsAvailable) + { + Console.WriteLine($"Service ready: {h.IndexedFileCount:N0} indexed, indexing={h.IsIndexing}"); + return; + } + } + catch { } + } + Console.Error.WriteLine("Service did not become ready within 10s."); + Environment.Exit(3); +} diff --git a/tests/Files.Search.Probe/README.md b/tests/Files.Search.Probe/README.md new file mode 100644 index 000000000000..e9d54570206c --- /dev/null +++ b/tests/Files.Search.Probe/README.md @@ -0,0 +1,35 @@ +# Files.Search.Probe + +Integration harness for `Files.SearchService`. Exercises the real gRPC client +(`Files.IndexedSearch.Client`) against the running service over TCP, so search +behavior can be verified end-to-end without launching the WinUI app. + +## Usage + +``` +dotnet run --project tests/Files.Search.Probe # full 7-check suite +dotnet run --project tests/Files.Search.Probe -- query "readme" # ad-hoc query, shows scores +dotnet run --project tests/Files.Search.Probe -- bench # latency table across 8 common terms +``` + +The probe auto-starts `files-search-service.exe` if no instance is running. It +expects the service binary at the path defined by `ServiceExe` in `Program.cs` +(default: the project's `bin/x64/Debug/.../files-search-service.exe`). + +## What the suite checks + +| Test | Verifies | +|---|---| +| service is up | gRPC reachable; `IndexedFileCount > 1000` | +| scoped search <500ms | search inside `UserProfile`, returns results, under deadline | +| Home/unscoped search <500ms | empty scope path = search whole index | +| trigram substring | mid-string match for queries ≥3 chars | +| nonexistent query | unmatched query returns 0 fast | +| no CPU pinning | service uses <600% CPU-of-wall during a 30 ms query burst | +| warm channel <100ms | second query through the same provider is fast | + +## When to use vs MSTest projects + +- `Files.Search.Correctness` — unit tests on `FileIndex`/`Tokenizer`/`Scorer`/`IndexPersistence`. In-process, no service. +- `Files.Search.Bench` — perf benchmarks against the legacy provider for the CLAUDE.md gates. +- `Files.Search.Probe` (this) — end-to-end integration over the real gRPC transport. Useful for iterating on routing, transport, and lifecycle without rebuilding Files.App. From 077cbd1d0718dd38984aa1e6a840c0a7f5655036 Mon Sep 17 00:00:00 2001 From: Tommy Le <82196633+Wingingbump@users.noreply.github.com> Date: Wed, 13 May 2026 02:05:53 -0400 Subject: [PATCH 04/10] Docs: log packaged-launch and build issues for next session MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Files.App exits silently before managed code runs on packaged install. Root cause unknown — captured timeline, ruled-out hypotheses, and concrete next debug steps so this can be picked up cold. Also notes two build-side issues: - First clean-tree MSBuild pass fails manifest validation due to Condition="Exists(...)" on the service-staging Content glob; succeeds on second pass once SearchService output exists on disk. - v143 platform toolset on Files.App.Launcher.vcxproj not present locally on VS 2026 machines. --- docs/packaged-build-debug-notes.md | 199 +++++++++++++++++++++++++++++ 1 file changed, 199 insertions(+) create mode 100644 docs/packaged-build-debug-notes.md diff --git a/docs/packaged-build-debug-notes.md b/docs/packaged-build-debug-notes.md new file mode 100644 index 000000000000..9d8e8839f828 --- /dev/null +++ b/docs/packaged-build-debug-notes.md @@ -0,0 +1,199 @@ +# Packaged build + launch — open issues + +Snapshot of unresolved problems found while validating the packaged +(MSIX + SCM) path on `feature/csharp-search-service` as of 2026-05-13. +Pick up here next session. + +## What we proved works + +1. **MSIX builds end-to-end** via msbuild CLI (see "Reproducing the build" below). +2. **SCM picks up the `desktop6:Service` declaration** at install time: + `Get-Service FilesSearchService` → `Running`, `LocalSystem`, auto-start. +3. **Named-pipe DACL allows cross-context user → SYSTEM connect** after + the fix in `src/Files.SearchService/Program.cs:CreatePipeSecurity`: + added `PipeAccessRights.Synchronize` to the AuthenticatedUsers rule. + `NamedPipeClientStream` with `PipeOptions.Asynchronous` needs + Synchronize to wait on the pipe handle for overlapped I/O — + `ReadWrite` alone throws `UnauthorizedAccessException`. + +## Open issues + +### Issue 1 — Files.exe exits silently on packaged launch + +**Symptom.** Double-click Start menu icon → no window appears. Files.exe +process is created (event log shows AppX container creation + process +add) and torn down in the same second. No crash dump, no event log +error, no first-chance exception logged. + +**Diagnostic timeline.** + +| Time | State | Finding | +|---|---|---| +| First launch | DeploymentManager auto-init | `COMException 0x80040154 (REGDB_E_CLASSNOTREG)` activating `Microsoft.Windows.ApplicationModel.WindowsAppRuntime.DeploymentInitializeOptions`. Stack: `DeploymentManagerCS.AutoInitialize.Access`. | +| Mitigation 1 | Set `false` in `Files.App.csproj` | Confirmed the generated init code is no longer in `obj/`. The DeploymentManager crash is fixed. | +| Second launch | Different failure mode | Container created+destroyed same second. No managed exception, no WER dump (verified with WER LocalDumps registry set to capture *all* Files.exe crashes — `C:\CrashDumps` stays empty). | +| Mitigation 2 | Added file-based logging to `Program.cs` static constructor | Log file (`%TEMP%\files-startup.log`) **never written** — the process exits before the static constructor runs. | + +**Conclusion.** The remaining exit happens *before any managed code in +the Files.App assembly executes* — apphost or .NET runtime +initialization phase. The diagnostic logging in `Program.cs` static +ctor was reverted before commit since it never fired. + +**Hypotheses, ranked.** + +1. **Apphost can't find a required native dependency.** Possible + candidates: `Microsoft.WindowsAppRuntime.Bootstrap.dll`, + `Microsoft.ui.xaml.dll`, or one of the WinAppSDK projection DLLs. + The framework MSIX dependency *did* install (verified + `Microsoft.WindowsAppRuntime.1.8 8000.836.2153.0 X64` present), so + if a DLL is missing it's a load-path issue, not an absence issue. +2. **.NET 10 runtime config mismatch.** `Files.exe` is a + framework-dependent apphost; if `Files.runtimeconfig.json` points at + a runtime version not installed in the WindowsApps install + location, the apphost would error out. +3. **Self-instance redirect on stale kernel state.** `Program.cs:27-46` + opens a named semaphore `Files-{Env}-Instance` and exits if + `isNew=false`. A stuck kernel handle from a previously crashed + Files.exe could make this fire silently. **Ruled out** — the + diagnostic logging would have caught this, and it didn't fire, + meaning the exit is upstream of the static ctor. +4. **Single-Project MSIX vs. solution layout disagreement.** The csproj + has `true` but build is + driven from the .csproj directly, not from a separate `.wapproj`. + Possible config inconsistency between what MSBuild expects for + resource generation vs. what ends up in the AppxManifest. + +**Concrete next steps to try.** + +- Enable CLR startup ETW tracing: + `logman start clrstart -p "Microsoft-Windows-DotNETRuntime" 0x4 0x5 + -ets -o C:\clrstart.etl`, launch Files, stop the trace, open in PerfView. + This will show whether the runtime even starts. +- Inspect installed package layout for missing DLLs: + `Get-ChildItem 'C:\Program Files\WindowsApps\FilesDev_*' -Recurse + -Filter '*.dll'` vs. the build output. Diff for missing entries. +- Run `Files.exe` from inside a packaged-identity wrapper that captures + stderr. `Invoke-CommandInDesktopPackage` crashed itself on this + machine (Win10 19045 bug); use `psexec -i ` or a manual COM + activation via `IDesktopAppXActivator` from a small C# host instead. +- Check `Files.runtimeconfig.json` in the installed package against the + `.NET 10.0.7` runtime declared by WER (`CoreCLR Version: 10.0.726.21808`). + +**What we know it's not.** +- Not our search code. Reverting `AppLifecycleHelper.cs` to upstream + (drop the `Task.Run(SearchServiceManager.EnsureRunning)` line) and + rebuilding the bundle reproduces the same silent exit. Pre-existing. +- Not DACL/SCM. Those work — see "What we proved" above. +- Not the framework dependency. The MSIX install registers the package + with the correct `WindowsAppRuntime.1.8` framework, and resolves to + the installed `8000.836.2153.0` build. + +### Issue 2 — First MSBuild pass fails manifest validation + +**Symptom.** + +``` +MakeAppx : error : Manifest validation error: Line 101, Column 56, +Reason: The file name "SearchService\files-search-service.exe" +declared for element ... doesn't exist in the package. +MakeAppx : error : 0x80080204 - The specified package format is not valid +``` + +**Root cause.** In `src/Files.App/Files.App.csproj` the `` glob that stages the service +binary into the MSIX has a `Condition="Exists(...)"` predicate. MSBuild +evaluates `Exists()` during the static-evaluation phase, *before any +project is built.* On a clean tree, `Files.SearchService\bin\...` +doesn't exist yet, so the Content items get dropped — but the manifest +still references the path, and MakeAppx fails. + +A second `msbuild` invocation right after the failed one succeeds +because the SearchService output now exists on disk from the prior +attempt. **The build is non-deterministic on a clean tree.** + +**Fix.** Remove the `Condition="Exists(...)"` — the +`` on `Files.SearchService.csproj` (with +`ReferenceOutputAssembly="false"`) is already a build dependency, so +the output is guaranteed to exist by the time Content evaluates *if* +we let MSBuild order things correctly. Alternative: move the staging +into a `` block that copies +the binaries on demand. + +This wasn't fixed in the committed change because it requires testing +on a clean tree and we wanted to bank progress first. + +### Issue 3 — v143 platform toolset not installed + +**Symptom.** + +``` +The build tools for Visual Studio 2022 (Platform Toolset = 'v143') +cannot be found. To build using the v143 build tools, please install +Visual Studio 2022 build tools. +``` + +**Root cause.** The C++ launcher (`src/Files.App.Launcher/Files.App.Launcher.vcxproj`) +declares `v143`. This machine has VS 2026 +(toolset v145) installed; v143 isn't present. + +**Workarounds.** +1. Install the VS 2022 Build Tools side-by-side with VS 2026. +2. Edit the vcxproj to `v145` locally (matches `project_build_env` + memory note: "two upstream divergences in `Files.App.Launcher`"). + Don't commit this — it'd break CI which uses v143. + +Memory entry already exists at `project_build_env.md` covering the +related stdcpp20 + towupper divergences. Worth extending to mention +the toolset version pin. + +## Reproducing the build + +```powershell +# 1. Restore. +& "C:\Program Files\Microsoft Visual Studio\18\Insiders\MSBuild\Current\Bin\MSBuild.exe" ` + src/Files.App/Files.App.csproj -t:Restore -p:Platform=x64 -p:Configuration=Release + +# 2. First build — may fail with the manifest validation error above. +& "C:\Program Files\Microsoft Visual Studio\18\Insiders\MSBuild\Current\Bin\MSBuild.exe" ` + src/Files.App/Files.App.csproj -t:Build ` + -p:Platform=x64 -p:Configuration=Release -p:AppxBundlePlatforms=x64 ` + -p:AppxPackageDir="$pwd\artifacts\AppxPackages\" ` + -p:AppxBundle=Always -p:UapAppxPackageBuildMode=SideloadOnly ` + -p:GenerateAppxPackageOnBuild=true -p:AppxPackageSigningEnabled=true ` + -p:PackageCertificateKeyFile="src\Files.App\Files.App_TemporaryKey.pfx" ` + -v:minimal + +# 3. If step 2 failed with manifest validation, run it again — second pass +# succeeds because Files.SearchService output now exists on disk. + +# 4. Install (admin PowerShell): +Add-AppxPackage -Path "artifacts\AppxPackages\Files.App_4.1.0.0_Test\Files.App_4.1.0.0_x64.msixbundle" ` + -DependencyPath "artifacts\AppxPackages\Files.App_4.1.0.0_Test\Dependencies\x64\Microsoft.WindowsAppRuntime.1.8.msix" +``` + +## State of the validation + +- Search-service infrastructure: **proven** in packaged mode. +- Files.App launch from packaged install: **broken**, pre-existing, root + cause unknown. This is a ship-blocker for any release that uses the + packaged path, but does not block sending the PR upstream — the Files + team's CI builds packaged Files routinely and would not see this + machine-local failure. + +## Useful one-liners for next session + +```powershell +# Latest crashes for Files.exe (bypasses WER dedup if LocalDumps is set): +$base = "HKLM:\SOFTWARE\Microsoft\Windows\Windows Error Reporting\LocalDumps\Files.exe" +Get-ItemProperty $base +ls C:\CrashDumps + +# Force a fresh launch and watch for activation events: +Start-Process 'shell:appsFolder\FilesDev_j4wp4nz5mtqsg!App' +Start-Sleep 5 +Get-WinEvent -LogName 'Microsoft-Windows-AppModel-Runtime/Admin' -MaxEvents 10 | + Where-Object { $_.TimeCreated -gt (Get-Date).AddMinutes(-1) -and $_.Id -in 211,212,217 } + +# Real AUMID (publisher hash varies if cert changes): +Get-StartApps | Where-Object Name -like '*Files - Dev*' +``` From c7ef7b0230828bc3875a5f30547c581662891920 Mon Sep 17 00:00:00 2001 From: Tommy Le <82196633+Wingingbump@users.noreply.github.com> Date: Sun, 17 May 2026 11:47:53 -0400 Subject: [PATCH 05/10] Fix: Stage Files.App.Server payload and correct service EntryPoint case MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The AppxManifest declares an OutOfProcessServer at Files.App.Server\Files.App.Server.exe, but the csproj was only staging the .winmd metadata at the top level — the exe and its deps were never copied into the package's Files.App.Server\ subdir. Adds a Content Include block mirroring the SearchService staging pattern. Also corrects EntryPoint="windows.FullTrustApplication" (lowercase 'w') to "Windows.FullTrustApplication" on the desktop6:Service extension. Microsoft docs specify capital W for the service entrypoint string. Neither fix resolves the Win10 19045 packaged-activation failure (see docs/packaged-build-debug-notes.md), but both are real package correctness bugs. --- src/Files.App/Files.App.csproj | 8 ++++++++ src/Files.App/Package.appxmanifest | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/src/Files.App/Files.App.csproj b/src/Files.App/Files.App.csproj index af095d38cb51..bccb72bed49d 100644 --- a/src/Files.App/Files.App.csproj +++ b/src/Files.App/Files.App.csproj @@ -78,6 +78,14 @@ SearchService\%(RecursiveDir)%(Filename)%(Extension) PreserveNewest + + + Files.App.Server\%(RecursiveDir)%(Filename)%(Extension) + PreserveNewest + PreserveNewest diff --git a/src/Files.App/Package.appxmanifest b/src/Files.App/Package.appxmanifest index a44acc524148..14fe8116c5ed 100644 --- a/src/Files.App/Package.appxmanifest +++ b/src/Files.App/Package.appxmanifest @@ -144,7 +144,7 @@ Runs as LocalSystem so it can read the NTFS USN Change Journal for fast whole-drive indexing without a UAC prompt at runtime. Admin is only required at install time, which MSIX already handles. --> - + From e393366d7b3402cc2e60b89aa1d3c9a2d6f6c05b Mon Sep 17 00:00:00 2001 From: Tommy Le <82196633+Wingingbump@users.noreply.github.com> Date: Mon, 18 May 2026 10:18:54 -0400 Subject: [PATCH 06/10] Code Quality: Tighten search comments, add dev-cycle + debug-activation scripts Comment fixes: - SearchServiceManager.LaunchIfNotRunning now describes the actual TCP-loopback collision pattern instead of mentioning named pipes. Dev mode uses FILES_SEARCH_SERVICE_URL with TCP loopback, not pipes. - SearchRouter._serviceAvailable documents that the racing reads/writes are intentionally unsynchronized (the worst case is two concurrent first-searches each issuing an idempotent health probe). scripts/dev-cycle.ps1: one-shot stop-service + uninstall + build + install + activate cycle. Handles the manifest-validation retry on a clean tree. Switches platform via -Platform x64|x86|arm64. scripts/debug-activation.ps1: bundles the kernel-process ETW trace, AppXDeploymentServer / AppModel-Runtime / TWinUI / Application-log filters, and WER state inspection we needed for the Win10 19045 packaged-launch diagnosis. Writes a per-run output directory with one file per source plus a summary. --- scripts/debug-activation.ps1 | 254 ++++++++++++++++++ scripts/dev-cycle.ps1 | 138 ++++++++++ .../Application/SearchServiceManager.cs | 5 +- .../Utils/Storage/Search/SearchRouter.cs | 2 + 4 files changed, 397 insertions(+), 2 deletions(-) create mode 100644 scripts/debug-activation.ps1 create mode 100644 scripts/dev-cycle.ps1 diff --git a/scripts/debug-activation.ps1 b/scripts/debug-activation.ps1 new file mode 100644 index 000000000000..60928c0023a0 --- /dev/null +++ b/scripts/debug-activation.ps1 @@ -0,0 +1,254 @@ +<# +.SYNOPSIS + Diagnose why packaged Files.exe activation isn't producing a process. + +.DESCRIPTION + Bundles every diagnostic we found useful while chasing the Win10 19045 + packaged-launch failure (see docs/packaged-build-debug-notes.md): + + 1. Kernel-process ETW trace — every CreateProcess in the window. + Definitive answer to "did Files.exe spawn at all?" + 2. AppXDeploymentServer log — register-loop / state-repo signals + (7028, 856/857, 603, 9621, 9626). + 3. AppModel-Runtime/Admin log — container lifecycle (210, 211, 212, 217). + 4. TWinUI/Operational log — activation success/failure (1621, 5961). + 5. .NET Runtime + Application Error log for any post-spawn crashes. + 6. WER LocalDumps state. + + All output is timestamped and saved to a single output directory you can + grep or compare across runs. + + Run from an ELEVATED PowerShell. The kernel ETW session requires admin. + +.PARAMETER Aumid + AppUserModelId to activate. Default: FilesDev_j4wp4nz5mtqsg!App. + +.PARAMETER Seconds + How long to watch after activation. Default: 10. + +.PARAMETER OutputDir + Where to write the trace + summary. Default: %TEMP%\files-activation-. + +.EXAMPLE + .\scripts\debug-activation.ps1 + .\scripts\debug-activation.ps1 -Seconds 20 + .\scripts\debug-activation.ps1 -Aumid 'Files_j4wp4nz5mtqsg!App' +#> +[CmdletBinding()] +param( + [string]$Aumid = 'FilesDev_j4wp4nz5mtqsg!App', + [int]$Seconds = 10, + [string]$OutputDir = (Join-Path $env:TEMP ("files-activation-" + (Get-Date -Format 'yyyyMMdd-HHmmss'))) +) + +$ErrorActionPreference = 'Stop' + +# --- Elevation check --- +$id = [System.Security.Principal.WindowsIdentity]::GetCurrent() +if (-not (New-Object System.Security.Principal.WindowsPrincipal($id)).IsInRole([System.Security.Principal.WindowsBuiltInRole]::Administrator)) { + throw 'Must run elevated. logman -ets requires admin.' +} + +New-Item -ItemType Directory -Path $OutputDir -Force | Out-Null +function Write-Step($msg) { Write-Host "==> $msg" -ForegroundColor Cyan } +function Save-Section($name, $content) { + $path = Join-Path $OutputDir "$name.txt" + $content | Out-File -FilePath $path -Encoding UTF8 + Write-Host " -> $path" +} + +$etl = Join-Path $OutputDir 'kproc.etl' +$xml = Join-Path $OutputDir 'kproc.xml' +$startTime = Get-Date + +# --- Baseline event record IDs so we only collect new events --- +function Get-LastRecord($logName) { + try { + (Get-WinEvent -LogName $logName -MaxEvents 1 -ErrorAction Stop).RecordId + } catch { 0 } +} +$baselines = @{ + 'Microsoft-Windows-AppXDeploymentServer/Operational' = Get-LastRecord 'Microsoft-Windows-AppXDeploymentServer/Operational' + 'Microsoft-Windows-AppModel-Runtime/Admin' = Get-LastRecord 'Microsoft-Windows-AppModel-Runtime/Admin' + 'Microsoft-Windows-TWinUI/Operational' = Get-LastRecord 'Microsoft-Windows-TWinUI/Operational' + 'Application' = Get-LastRecord 'Application' +} + +# --- Kernel-process ETW trace --- +Write-Step 'Starting kernel-process ETW trace' +$null = logman stop kproc-debug -ets 2>&1 +$null = logman start kproc-debug -p 'Microsoft-Windows-Kernel-Process' 0x10 5 -ets -o $etl + +# --- Activate --- +Write-Step "Activating $Aumid" +Start-Process explorer.exe "shell:appsFolder\$Aumid" + +# --- Watch for Files.exe --- +$sw = [System.Diagnostics.Stopwatch]::StartNew() +$firstSeen = $null +$lastSeen = $null +while ($sw.Elapsed.TotalSeconds -lt $Seconds) { + $p = Get-Process Files -ErrorAction SilentlyContinue + if ($p) { + if (-not $firstSeen) { $firstSeen = $sw.Elapsed.TotalMilliseconds } + $lastSeen = $sw.Elapsed.TotalMilliseconds + } + Start-Sleep -Milliseconds 50 +} + +Write-Step 'Stopping trace' +$null = logman stop kproc-debug -ets + +# --- Convert kernel trace to XML --- +Write-Step 'Converting trace' +$null = tracerpt $etl -o $xml -of XML -y 2>&1 + +# --- Parse kernel-process events --- +Write-Step 'Parsing kernel events' +[xml]$traceXml = Get-Content $xml +$ns = New-Object System.Xml.XmlNamespaceManager($traceXml.NameTable) +$ns.AddNamespace('e','http://schemas.microsoft.com/win/2004/08/events/event') +$creates = $traceXml.SelectNodes('//e:Event[e:System/e:EventID=1]', $ns) +$exits = $traceXml.SelectNodes('//e:Event[e:System/e:EventID=2]', $ns) + +$pkgFamily = ($Aumid -split '!')[0] +$kprocReport = [System.Collections.Generic.List[string]]::new() +$kprocReport.Add("Total process creates in trace: $($creates.Count)") +$kprocReport.Add("Total process exits in trace: $($exits.Count)") +$kprocReport.Add('') +$kprocReport.Add("--- Creates matching FilesDev / Files.exe / files-search / RuntimeBroker ---") +$matched = $false +foreach ($n in $creates) { + $d = @{} + if ($n.EventData -and $n.EventData.Data) { foreach ($x_ in $n.EventData.Data) { $d[$x_.Name] = $x_."#text" } } + $img = $d['ImageName'] + if ($img -and ($img -match "$pkgFamily|\\Files\.exe|files-search|RuntimeBroker")) { + $kprocReport.Add((" PID {0,6} -> {1} (parent {2})" -f $d['ProcessID'], $img, $d['ParentProcessID'])) + $matched = $true + } +} +if (-not $matched) { $kprocReport.Add(' (no matching creates)') } +Save-Section 'kernel-process' ($kprocReport -join "`n") + +# --- Pull new events from each log since baseline --- +function Get-NewEvents($logName, $idsOfInterest) { + $base = $baselines[$logName] + try { + Get-WinEvent -LogName $logName -MaxEvents 200 -ErrorAction Stop | + Where-Object { $_.RecordId -gt $base } | + Sort-Object TimeCreated | + Where-Object { ($null -eq $idsOfInterest) -or ($_.Id -in $idsOfInterest) } + } catch { @() } +} + +# AppXDeploymentServer — focus on register-loop + manifest-parse + service errors +$deployEvents = Get-NewEvents 'Microsoft-Windows-AppXDeploymentServer/Operational' @(400, 401, 404, 603, 604, 607, 613, 649, 856, 857, 7028, 9621, 9626, 9627, 9644, 9647, 9650, 10000, 10001) | + Where-Object { $_.Message -match $pkgFamily.Split('_')[0] } +$deployReport = if ($deployEvents) { + ($deployEvents | ForEach-Object { + "{0:HH:mm:ss.fff} {1,-5} {2,-7} {3}" -f $_.TimeCreated, $_.Id, $_.LevelDisplayName, (($_.Message -split '\r?\n')[0]) + }) -join "`n" +} else { '(no new events)' } +Save-Section 'appx-deployment' $deployReport + +# AppModel-Runtime/Admin — container lifecycle +$appmodelEvents = Get-NewEvents 'Microsoft-Windows-AppModel-Runtime/Admin' @(210, 211, 212, 217) | + Where-Object { $_.Message -match $pkgFamily.Split('_')[0] } +$appmodelReport = if ($appmodelEvents) { + ($appmodelEvents | ForEach-Object { + "{0:HH:mm:ss.fff} {1,-5} {2}" -f $_.TimeCreated, $_.Id, (($_.Message -split '\r?\n')[0]) + }) -join "`n" +} else { '(no new events)' } +Save-Section 'appmodel-runtime' $appmodelReport + +# TWinUI — activation attempts +$twinuiEvents = Get-NewEvents 'Microsoft-Windows-TWinUI/Operational' $null | + Where-Object { $_.Message -match $Aumid -or $_.Message -match $pkgFamily.Split('_')[0] } +$twinuiReport = if ($twinuiEvents) { + ($twinuiEvents | ForEach-Object { + "{0:HH:mm:ss.fff} {1,-5} {2,-7} {3}" -f $_.TimeCreated, $_.Id, $_.LevelDisplayName, (($_.Message -split '\r?\n')[0]) + }) -join "`n" +} else { '(no new events)' } +Save-Section 'twinui' $twinuiReport + +# Application log — .NET Runtime + Application Error +$appLogEvents = Get-NewEvents 'Application' @(1000, 1026) | + Where-Object { $_.Message -match 'Files\.exe|files-search' } +$appLogReport = if ($appLogEvents) { + ($appLogEvents | ForEach-Object { + "{0:HH:mm:ss.fff} {1,-5} {2,-7} {3}`n----`n{4}`n----" -f $_.TimeCreated, $_.Id, $_.LevelDisplayName, $_.ProviderName, $_.Message + }) -join "`n`n" +} else { '(no Files-related errors in Application log)' } +Save-Section 'application-log' $appLogReport + +# WER LocalDumps state +$werReport = @() +$key = 'HKLM:\SOFTWARE\Microsoft\Windows\Windows Error Reporting\LocalDumps\Files.exe' +$werCfg = Get-ItemProperty $key -ErrorAction SilentlyContinue +if ($werCfg) { + $werReport += "LocalDumps configured for Files.exe:" + $werReport += " DumpFolder: $($werCfg.DumpFolder)" + $werReport += " DumpType: $($werCfg.DumpType)" + $werReport += " DumpCount: $($werCfg.DumpCount)" +} else { + $werReport += "LocalDumps NOT configured for Files.exe (HKLM\...\WER\LocalDumps\Files.exe missing)" +} +$werReport += '' +$werReport += '--- Recent dumps in C:\CrashDumps ---' +$dumps = Get-ChildItem C:\CrashDumps -Filter 'Files*.dmp' -ErrorAction SilentlyContinue | + Where-Object { $_.LastWriteTime -gt $startTime.AddMinutes(-1) } +if ($dumps) { + foreach ($d in $dumps) { $werReport += (" {0} {1:N0} bytes {2}" -f $d.Name, $d.Length, $d.LastWriteTime) } +} else { + $werReport += ' (no new dumps since trace start)' +} +Save-Section 'wer-state' ($werReport -join "`n") + +# --- Build summary --- +$summary = [System.Collections.Generic.List[string]]::new() +$summary.Add("# Activation diagnostic summary") +$summary.Add("AUMID: $Aumid") +$summary.Add("Watched: $Seconds seconds") +$summary.Add("Output dir: $OutputDir") +$summary.Add('') +$summary.Add("## Files.exe process state") +if ($firstSeen) { + $summary.Add(" SPAWNED at T+$([int]$firstSeen)ms, last seen at T+$([int]$lastSeen)ms") + $stillAlive = Get-Process Files -ErrorAction SilentlyContinue + $summary.Add(" Currently alive: $($null -ne $stillAlive)") +} else { + $summary.Add(" NEVER spawned in $Seconds seconds") +} +$summary.Add('') +$summary.Add("## Files-related processes seen in kernel trace") +if ($matched) { + foreach ($n in $creates) { + $d = @{} + if ($n.EventData -and $n.EventData.Data) { foreach ($x_ in $n.EventData.Data) { $d[$x_.Name] = $x_."#text" } } + $img = $d['ImageName'] + if ($img -and ($img -match "$pkgFamily|\\Files\.exe|files-search|RuntimeBroker")) { + $summary.Add(" PID $($d['ProcessID']) -> $img") + } + } +} else { + $summary.Add(" (none)") +} +$summary.Add('') +$summary.Add("## Event counts") +$summary.Add(" AppXDeploymentServer (filtered): $($deployEvents.Count)") +$summary.Add(" AppModel-Runtime/Admin: $($appmodelEvents.Count)") +$summary.Add(" TWinUI/Operational: $($twinuiEvents.Count)") +$summary.Add(" Application (.NET / WER): $($appLogEvents.Count)") +$summary.Add('') +$summary.Add("Detailed per-source reports written to:") +$summary.Add(" $OutputDir\kernel-process.txt") +$summary.Add(" $OutputDir\appx-deployment.txt") +$summary.Add(" $OutputDir\appmodel-runtime.txt") +$summary.Add(" $OutputDir\twinui.txt") +$summary.Add(" $OutputDir\application-log.txt") +$summary.Add(" $OutputDir\wer-state.txt") + +$summaryText = $summary -join "`n" +Save-Section 'summary' $summaryText +Write-Host '' +Write-Host $summaryText diff --git a/scripts/dev-cycle.ps1 b/scripts/dev-cycle.ps1 new file mode 100644 index 000000000000..509545aeb589 --- /dev/null +++ b/scripts/dev-cycle.ps1 @@ -0,0 +1,138 @@ +<# +.SYNOPSIS + One-shot packaged-build test cycle: stop service, uninstall, build, install, activate. + +.DESCRIPTION + Replaces the ~10-step manual sequence with a single command. Useful when + iterating on packaging-affecting changes (manifest, csproj content staging, + AppxBundlePlatforms, certificates, etc.). + + Run from an ELEVATED PowerShell. Add-AppxPackage of packaged services + requires admin. + +.PARAMETER Platform + x64 (default), x86, or arm64. + +.PARAMETER Configuration + Release (default) or Debug. + +.PARAMETER NoActivate + Skip the post-install Start-Process. Useful for CI / scripted runs. + +.PARAMETER SkipBuild + Reuse the bundle already at artifacts/AppxPackages/. Useful when the + build is unchanged and you just want to re-install. + +.EXAMPLE + .\scripts\dev-cycle.ps1 + .\scripts\dev-cycle.ps1 -Platform x86 + .\scripts\dev-cycle.ps1 -SkipBuild -NoActivate +#> +[CmdletBinding()] +param( + [ValidateSet('x64', 'x86', 'arm64')] + [string]$Platform = 'x64', + + [ValidateSet('Release', 'Debug')] + [string]$Configuration = 'Release', + + [switch]$NoActivate, + [switch]$SkipBuild +) + +$ErrorActionPreference = 'Stop' +$repoRoot = Split-Path -Parent $PSScriptRoot +$msbuild = 'C:\Program Files\Microsoft Visual Studio\18\Insiders\MSBuild\Current\Bin\MSBuild.exe' +$bundle = Join-Path $repoRoot "artifacts\AppxPackages\Files.App_4.1.0.0_Test\Files.App_4.1.0.0_$Platform.msixbundle" +$depMsix = Join-Path $repoRoot "artifacts\AppxPackages\Files.App_4.1.0.0_Test\Dependencies\$Platform\Microsoft.WindowsAppRuntime.1.8.msix" +$aumid = 'FilesDev_j4wp4nz5mtqsg!App' + +function Write-Step($msg) { Write-Host "==> $msg" -ForegroundColor Cyan } + +# --- Elevation check --- +$id = [System.Security.Principal.WindowsIdentity]::GetCurrent() +if (-not (New-Object System.Security.Principal.WindowsPrincipal($id)).IsInRole([System.Security.Principal.WindowsBuiltInRole]::Administrator)) { + throw 'Must run elevated. Re-launch PowerShell as Administrator.' +} + +# --- Stop service --- +Write-Step 'Stopping FilesSearchService (if running)' +Stop-Service FilesSearchService -Force -ErrorAction SilentlyContinue +Start-Sleep -Milliseconds 500 + +# --- Uninstall existing --- +Write-Step 'Uninstalling existing FilesDev' +Get-AppxPackage FilesDev -AllUsers -ErrorAction SilentlyContinue | + Remove-AppxPackage -AllUsers -ErrorAction SilentlyContinue +Start-Sleep -Seconds 1 + +# --- Build --- +if (-not $SkipBuild) { + Write-Step "Building $Configuration|$Platform" + Push-Location $repoRoot + try { + $args = @( + 'src/Files.App/Files.App.csproj', '-t:Build', + "-p:Platform=$Platform", + "-p:Configuration=$Configuration", + "-p:AppxBundlePlatforms=$Platform", + "-p:AppxPackageDir=$repoRoot\artifacts\AppxPackages\", + '-p:AppxBundle=Always', + '-p:UapAppxPackageBuildMode=SideloadOnly', + '-p:GenerateAppxPackageOnBuild=true', + '-p:AppxPackageSigningEnabled=true', + '-p:PackageCertificateKeyFile=src\Files.App\Files.App_TemporaryKey.pfx', + '-v:minimal' + ) + & $msbuild @args | Tee-Object -Variable buildLog | Out-Null + if ($LASTEXITCODE -ne 0) { + # First pass can fail on a clean tree with manifest-validation error + # (the Content Condition=Exists race). Retry once — the SearchService + # output exists on disk from the failed first attempt. + $manifestRace = $buildLog -match 'doesn''t exist in the package' + if ($manifestRace) { + Write-Host ' (manifest-validation race on first pass — retrying)' -ForegroundColor Yellow + & $msbuild @args | Out-Host + } + if ($LASTEXITCODE -ne 0) { + throw "Build failed with exit $LASTEXITCODE" + } + } + } finally { + Pop-Location + } +} + +# --- Install --- +Write-Step "Installing $(Split-Path -Leaf $bundle)" +if (-not (Test-Path $bundle)) { + throw "Bundle not found: $bundle" +} +Add-AppxPackage -Path $bundle -DependencyPath $depMsix -ErrorAction Stop +Start-Sleep -Seconds 1 + +$pkg = Get-AppxPackage FilesDev -AllUsers | Select-Object -First 1 +Write-Host " Installed: $($pkg.PackageFullName)" -ForegroundColor Green + +# --- Activate --- +if (-not $NoActivate) { + Write-Step 'Activating' + Start-Process explorer.exe "shell:appsFolder\$aumid" + + $sw = [System.Diagnostics.Stopwatch]::StartNew() + $seen = $false + while ($sw.Elapsed.TotalSeconds -lt 8) { + $p = Get-Process Files -ErrorAction SilentlyContinue + if ($p -and -not $seen) { + Write-Host (" Files.exe PID={0} caught at T+{1:F0}ms" -f ($p.Id -join ','), $sw.Elapsed.TotalMilliseconds) -ForegroundColor Green + $seen = $true + break + } + Start-Sleep -Milliseconds 50 + } + if (-not $seen) { + Write-Host ' Files.exe never observed. Run scripts\debug-activation.ps1 to diagnose.' -ForegroundColor Yellow + } +} + +Write-Step 'Done' diff --git a/src/Files.App/Helpers/Application/SearchServiceManager.cs b/src/Files.App/Helpers/Application/SearchServiceManager.cs index 3e9cb2d99fcc..f92b2ad6c830 100644 --- a/src/Files.App/Helpers/Application/SearchServiceManager.cs +++ b/src/Files.App/Helpers/Application/SearchServiceManager.cs @@ -96,8 +96,9 @@ private static void RegisterStartup(string exePath) private static void LaunchIfNotRunning(string exePath) { // Kill any stale instances first — in dev mode the HKCU\Run entry or a - // previous debug session may have left a process holding the named pipe, - // which causes AddressInUseException on the next start. + // previous debug session may have left a process bound to the TCP + // loopback port (FILES_SEARCH_SERVICE_URL), which causes + // AddressInUseException on the next start. foreach (var stale in Process.GetProcessesByName(ProcessName)) { try { stale.Kill(entireProcessTree: true); stale.WaitForExit(2000); } diff --git a/src/Files.App/Utils/Storage/Search/SearchRouter.cs b/src/Files.App/Utils/Storage/Search/SearchRouter.cs index 5272692fbae3..5ab9ae80f2d0 100644 --- a/src/Files.App/Utils/Storage/Search/SearchRouter.cs +++ b/src/Files.App/Utils/Storage/Search/SearchRouter.cs @@ -35,6 +35,8 @@ public sealed class SearchRouter // Cached availability flag. We probe once, then assume the service stays up. // Reset to null when a search fails so the next search re-probes. + // Racing reads/writes here are intentionally unsynchronized: the worst case is + // two concurrent first-searches each issue a health probe, which is idempotent. private static bool? _serviceAvailable = null; public string? Query { get; set; } From 9dc2608e9b1aaacd9ef3f698d0359f237d110b23 Mon Sep 17 00:00:00 2001 From: Tommy Le <82196633+Wingingbump@users.noreply.github.com> Date: Mon, 18 May 2026 20:18:25 -0400 Subject: [PATCH 07/10] Remove personal dev artifacts before submitting upstream CLAUDE.md and the local debug/dev-cycle PowerShell helpers were used during development but aren't useful to upstream maintainers. README loses the now-dangling CLAUDE.md link. --- CLAUDE.md | 77 ----------- README.md | 1 - scripts/debug-activation.ps1 | 254 ----------------------------------- scripts/dev-cycle.ps1 | 138 ------------------- 4 files changed, 470 deletions(-) delete mode 100644 CLAUDE.md delete mode 100644 scripts/debug-activation.ps1 delete mode 100644 scripts/dev-cycle.ps1 diff --git a/CLAUDE.md b/CLAUDE.md deleted file mode 100644 index ca0befd8b185..000000000000 --- a/CLAUDE.md +++ /dev/null @@ -1,77 +0,0 @@ -# CLAUDE.md - -Fork of [files-community/Files](https://github.com/files-community/Files) (C#/WinUI 3). Goal: faster, AI-augmented search without regressing the rest. - -## Search goals (hard constraints) - -1. **Faster.** Query latency ≤10% of Windows Search on equivalent corpora. -2. **No heavier.** RAM/disk/idle CPU ≤ upstream Files + Windows Search Indexer. -3. **No runtime UAC.** No admin prompts during normal use. The service is declared as a `desktop6:Service` in the MSIX manifest and installed by Windows at package install time (which already runs elevated). SCM manages it from there — no UAC at launch, ever. - -## Architecture - -`files-search-service.exe` is a pure C# Windows Service (`src/Files.SearchService/`) installed via the MSIX manifest (`desktop6:Service`, `StartAccount=localSystem`). SCM starts it at login. Files.App is a pure gRPC client over a named pipe — it never spawns or owns the process in packaged mode. - -- **Enumeration (initial):** NTFS USN Change Journal via `FSCTL_ENUM_USN_DATA` — reads the kernel's file-change log directly, indexes millions of files in seconds. Requires LocalSystem, provided by the SCM service registration. -- **Enumeration (fallback):** `DirectoryInfo.EnumerateFiles` with `RecurseSubdirectories=true`, `AttributesToSkip=ReparsePoint`. Used in dev/unpackaged mode when the volume handle can't be opened. -- **Updates:** `FileSystemWatcher` (`ReadDirectoryChangesW` under the hood), recursive, 250ms debounced commits via `EventBatcher`. Overflow triggers a full rebuild. -- **Throttle:** `PROCESS_MODE_BACKGROUND_BEGIN` at startup; 2s polling pauses commits on battery / fullscreen / high CPU. -- **Index:** in-memory inverted index (`Dictionary` posting lists, atomically swapped on rebuild) + trigram index for mid-string substrings. Filename-only in v0. Persisted to `index.bin` (custom binary format, magic `FSIX`) for fast restart with reconcile-against-disk diff. -- **Transport:** gRPC over named pipe `\\.\pipe\files-search` (Kestrel `ListenNamedPipe`). TCP loopback available via `FILES_SEARCH_SERVICE_URL` for dev/CI. - -## Coexistence - -All search goes through `ISearchProvider`. Two impls ship: - -- `LegacySearchProvider` — wraps upstream unchanged. Frozen reference; instrumentation only. -- `IndexedSearchProvider` — talks to the new service. - -Selected by `UseIndexedSearch` setting (Settings → Advanced) → env var `FILES_SEARCH_PROVIDER` → default. Default stays `Legacy` until benchmarks pass. `SearchRouter` falls back to legacy for glob (`*`/`?`), AQS (`$`/`:`), Home/library scopes, or when the service is unavailable. - -## Layout - -``` -src/Files.App/ UI, modified only to consume ISearchProvider -src/Files.SearchAbstraction/ interface + types -src/Files.LegacySearch/ upstream wrapper -src/Files.IndexedSearch.Client/ C# gRPC client -src/Files.SearchService/ C# Windows Service (the indexer) -tests/Files.Search.Correctness/ result equivalence -tests/Files.Search.Bench/ perf benchmarks -tests/Files.Search.Probe/ console probe / smoke harness -tests/corpora/ deterministic corpus generators -``` - -## Tests - -**Correctness.** For each `(corpus, query)`, indexed results ⊇ legacy results (modulo documented exclusions). Cases: exact, glob, substring, ext+substring, content, path-scoped, unicode, long paths, hidden/system/symlinks. - -**Benchmarks.** Three corpora generated deterministically: `small` (50k files, ~2GB), `medium` (500k, ~50GB), `large` (2M, ~500GB). ~200 queries per corpus. Per `(provider, corpus, query)` record: time-to-first-result, time-to-complete, peak RAM, CPU-seconds, bytes read. Indexing also tracks: cold-start time, steady-state RAM, index size on disk, incremental update latency. JSON to `bench-results/.json`. `run-bench.ps1` at repo root is the one-shot driver. - -**Acceptance gates** (vs. legacy baseline on `medium`): - -| Metric | Target | -|---|---| -| Time-to-first-result, median | ≤10% of legacy | -| Time-to-first-result, p99 | ≤15% of legacy | -| Steady-state RAM | ≤100% of legacy + indexer | -| Idle CPU (60s post-index) | ≤ legacy + indexer | -| Initial index time | ≤2x Windows Search | -| Incremental update p95 | ≤5s | - -Baseline pinned in `bench-results/baseline.json`, updated only by explicit decision. - -**Resource (nightly).** Battery/fullscreen/load throttling verified. No handle leaks over 1h. No memory growth over 24h soak. - -## Workflow - -- Correctness suite runs per-commit. Regressions block merge. -- `Bench --corpus small` per-commit; `medium` nightly. -- Legacy provider is frozen — instrumentation and upstream-mirrored bugfixes only. - -## See also - -- `docs/csharp-search-service.md` — full component-level architecture and file map. -- `docs/decisions/0001-bench-stack.md` — bench harness choice. -- `docs/decisions/0003-bench-strategy-theoretical.md` — Big-O-for-gates rationale. -- `docs/search-roadmap.md` — current status snapshot. diff --git a/README.md b/README.md index 264bc2c2bf29..120ada0b0ffd 100644 --- a/README.md +++ b/README.md @@ -37,7 +37,6 @@ See `docs/decisions/0003-bench-strategy-theoretical.md`. flow, file map. Start here if you're a maintainer. - **`docs/search-roadmap.md`** — current state and what's next. - **`docs/decisions/`** — ADRs for the technical choices. -- **`CLAUDE.md`** — the design constraints we held to. ## Trying it locally diff --git a/scripts/debug-activation.ps1 b/scripts/debug-activation.ps1 deleted file mode 100644 index 60928c0023a0..000000000000 --- a/scripts/debug-activation.ps1 +++ /dev/null @@ -1,254 +0,0 @@ -<# -.SYNOPSIS - Diagnose why packaged Files.exe activation isn't producing a process. - -.DESCRIPTION - Bundles every diagnostic we found useful while chasing the Win10 19045 - packaged-launch failure (see docs/packaged-build-debug-notes.md): - - 1. Kernel-process ETW trace — every CreateProcess in the window. - Definitive answer to "did Files.exe spawn at all?" - 2. AppXDeploymentServer log — register-loop / state-repo signals - (7028, 856/857, 603, 9621, 9626). - 3. AppModel-Runtime/Admin log — container lifecycle (210, 211, 212, 217). - 4. TWinUI/Operational log — activation success/failure (1621, 5961). - 5. .NET Runtime + Application Error log for any post-spawn crashes. - 6. WER LocalDumps state. - - All output is timestamped and saved to a single output directory you can - grep or compare across runs. - - Run from an ELEVATED PowerShell. The kernel ETW session requires admin. - -.PARAMETER Aumid - AppUserModelId to activate. Default: FilesDev_j4wp4nz5mtqsg!App. - -.PARAMETER Seconds - How long to watch after activation. Default: 10. - -.PARAMETER OutputDir - Where to write the trace + summary. Default: %TEMP%\files-activation-. - -.EXAMPLE - .\scripts\debug-activation.ps1 - .\scripts\debug-activation.ps1 -Seconds 20 - .\scripts\debug-activation.ps1 -Aumid 'Files_j4wp4nz5mtqsg!App' -#> -[CmdletBinding()] -param( - [string]$Aumid = 'FilesDev_j4wp4nz5mtqsg!App', - [int]$Seconds = 10, - [string]$OutputDir = (Join-Path $env:TEMP ("files-activation-" + (Get-Date -Format 'yyyyMMdd-HHmmss'))) -) - -$ErrorActionPreference = 'Stop' - -# --- Elevation check --- -$id = [System.Security.Principal.WindowsIdentity]::GetCurrent() -if (-not (New-Object System.Security.Principal.WindowsPrincipal($id)).IsInRole([System.Security.Principal.WindowsBuiltInRole]::Administrator)) { - throw 'Must run elevated. logman -ets requires admin.' -} - -New-Item -ItemType Directory -Path $OutputDir -Force | Out-Null -function Write-Step($msg) { Write-Host "==> $msg" -ForegroundColor Cyan } -function Save-Section($name, $content) { - $path = Join-Path $OutputDir "$name.txt" - $content | Out-File -FilePath $path -Encoding UTF8 - Write-Host " -> $path" -} - -$etl = Join-Path $OutputDir 'kproc.etl' -$xml = Join-Path $OutputDir 'kproc.xml' -$startTime = Get-Date - -# --- Baseline event record IDs so we only collect new events --- -function Get-LastRecord($logName) { - try { - (Get-WinEvent -LogName $logName -MaxEvents 1 -ErrorAction Stop).RecordId - } catch { 0 } -} -$baselines = @{ - 'Microsoft-Windows-AppXDeploymentServer/Operational' = Get-LastRecord 'Microsoft-Windows-AppXDeploymentServer/Operational' - 'Microsoft-Windows-AppModel-Runtime/Admin' = Get-LastRecord 'Microsoft-Windows-AppModel-Runtime/Admin' - 'Microsoft-Windows-TWinUI/Operational' = Get-LastRecord 'Microsoft-Windows-TWinUI/Operational' - 'Application' = Get-LastRecord 'Application' -} - -# --- Kernel-process ETW trace --- -Write-Step 'Starting kernel-process ETW trace' -$null = logman stop kproc-debug -ets 2>&1 -$null = logman start kproc-debug -p 'Microsoft-Windows-Kernel-Process' 0x10 5 -ets -o $etl - -# --- Activate --- -Write-Step "Activating $Aumid" -Start-Process explorer.exe "shell:appsFolder\$Aumid" - -# --- Watch for Files.exe --- -$sw = [System.Diagnostics.Stopwatch]::StartNew() -$firstSeen = $null -$lastSeen = $null -while ($sw.Elapsed.TotalSeconds -lt $Seconds) { - $p = Get-Process Files -ErrorAction SilentlyContinue - if ($p) { - if (-not $firstSeen) { $firstSeen = $sw.Elapsed.TotalMilliseconds } - $lastSeen = $sw.Elapsed.TotalMilliseconds - } - Start-Sleep -Milliseconds 50 -} - -Write-Step 'Stopping trace' -$null = logman stop kproc-debug -ets - -# --- Convert kernel trace to XML --- -Write-Step 'Converting trace' -$null = tracerpt $etl -o $xml -of XML -y 2>&1 - -# --- Parse kernel-process events --- -Write-Step 'Parsing kernel events' -[xml]$traceXml = Get-Content $xml -$ns = New-Object System.Xml.XmlNamespaceManager($traceXml.NameTable) -$ns.AddNamespace('e','http://schemas.microsoft.com/win/2004/08/events/event') -$creates = $traceXml.SelectNodes('//e:Event[e:System/e:EventID=1]', $ns) -$exits = $traceXml.SelectNodes('//e:Event[e:System/e:EventID=2]', $ns) - -$pkgFamily = ($Aumid -split '!')[0] -$kprocReport = [System.Collections.Generic.List[string]]::new() -$kprocReport.Add("Total process creates in trace: $($creates.Count)") -$kprocReport.Add("Total process exits in trace: $($exits.Count)") -$kprocReport.Add('') -$kprocReport.Add("--- Creates matching FilesDev / Files.exe / files-search / RuntimeBroker ---") -$matched = $false -foreach ($n in $creates) { - $d = @{} - if ($n.EventData -and $n.EventData.Data) { foreach ($x_ in $n.EventData.Data) { $d[$x_.Name] = $x_."#text" } } - $img = $d['ImageName'] - if ($img -and ($img -match "$pkgFamily|\\Files\.exe|files-search|RuntimeBroker")) { - $kprocReport.Add((" PID {0,6} -> {1} (parent {2})" -f $d['ProcessID'], $img, $d['ParentProcessID'])) - $matched = $true - } -} -if (-not $matched) { $kprocReport.Add(' (no matching creates)') } -Save-Section 'kernel-process' ($kprocReport -join "`n") - -# --- Pull new events from each log since baseline --- -function Get-NewEvents($logName, $idsOfInterest) { - $base = $baselines[$logName] - try { - Get-WinEvent -LogName $logName -MaxEvents 200 -ErrorAction Stop | - Where-Object { $_.RecordId -gt $base } | - Sort-Object TimeCreated | - Where-Object { ($null -eq $idsOfInterest) -or ($_.Id -in $idsOfInterest) } - } catch { @() } -} - -# AppXDeploymentServer — focus on register-loop + manifest-parse + service errors -$deployEvents = Get-NewEvents 'Microsoft-Windows-AppXDeploymentServer/Operational' @(400, 401, 404, 603, 604, 607, 613, 649, 856, 857, 7028, 9621, 9626, 9627, 9644, 9647, 9650, 10000, 10001) | - Where-Object { $_.Message -match $pkgFamily.Split('_')[0] } -$deployReport = if ($deployEvents) { - ($deployEvents | ForEach-Object { - "{0:HH:mm:ss.fff} {1,-5} {2,-7} {3}" -f $_.TimeCreated, $_.Id, $_.LevelDisplayName, (($_.Message -split '\r?\n')[0]) - }) -join "`n" -} else { '(no new events)' } -Save-Section 'appx-deployment' $deployReport - -# AppModel-Runtime/Admin — container lifecycle -$appmodelEvents = Get-NewEvents 'Microsoft-Windows-AppModel-Runtime/Admin' @(210, 211, 212, 217) | - Where-Object { $_.Message -match $pkgFamily.Split('_')[0] } -$appmodelReport = if ($appmodelEvents) { - ($appmodelEvents | ForEach-Object { - "{0:HH:mm:ss.fff} {1,-5} {2}" -f $_.TimeCreated, $_.Id, (($_.Message -split '\r?\n')[0]) - }) -join "`n" -} else { '(no new events)' } -Save-Section 'appmodel-runtime' $appmodelReport - -# TWinUI — activation attempts -$twinuiEvents = Get-NewEvents 'Microsoft-Windows-TWinUI/Operational' $null | - Where-Object { $_.Message -match $Aumid -or $_.Message -match $pkgFamily.Split('_')[0] } -$twinuiReport = if ($twinuiEvents) { - ($twinuiEvents | ForEach-Object { - "{0:HH:mm:ss.fff} {1,-5} {2,-7} {3}" -f $_.TimeCreated, $_.Id, $_.LevelDisplayName, (($_.Message -split '\r?\n')[0]) - }) -join "`n" -} else { '(no new events)' } -Save-Section 'twinui' $twinuiReport - -# Application log — .NET Runtime + Application Error -$appLogEvents = Get-NewEvents 'Application' @(1000, 1026) | - Where-Object { $_.Message -match 'Files\.exe|files-search' } -$appLogReport = if ($appLogEvents) { - ($appLogEvents | ForEach-Object { - "{0:HH:mm:ss.fff} {1,-5} {2,-7} {3}`n----`n{4}`n----" -f $_.TimeCreated, $_.Id, $_.LevelDisplayName, $_.ProviderName, $_.Message - }) -join "`n`n" -} else { '(no Files-related errors in Application log)' } -Save-Section 'application-log' $appLogReport - -# WER LocalDumps state -$werReport = @() -$key = 'HKLM:\SOFTWARE\Microsoft\Windows\Windows Error Reporting\LocalDumps\Files.exe' -$werCfg = Get-ItemProperty $key -ErrorAction SilentlyContinue -if ($werCfg) { - $werReport += "LocalDumps configured for Files.exe:" - $werReport += " DumpFolder: $($werCfg.DumpFolder)" - $werReport += " DumpType: $($werCfg.DumpType)" - $werReport += " DumpCount: $($werCfg.DumpCount)" -} else { - $werReport += "LocalDumps NOT configured for Files.exe (HKLM\...\WER\LocalDumps\Files.exe missing)" -} -$werReport += '' -$werReport += '--- Recent dumps in C:\CrashDumps ---' -$dumps = Get-ChildItem C:\CrashDumps -Filter 'Files*.dmp' -ErrorAction SilentlyContinue | - Where-Object { $_.LastWriteTime -gt $startTime.AddMinutes(-1) } -if ($dumps) { - foreach ($d in $dumps) { $werReport += (" {0} {1:N0} bytes {2}" -f $d.Name, $d.Length, $d.LastWriteTime) } -} else { - $werReport += ' (no new dumps since trace start)' -} -Save-Section 'wer-state' ($werReport -join "`n") - -# --- Build summary --- -$summary = [System.Collections.Generic.List[string]]::new() -$summary.Add("# Activation diagnostic summary") -$summary.Add("AUMID: $Aumid") -$summary.Add("Watched: $Seconds seconds") -$summary.Add("Output dir: $OutputDir") -$summary.Add('') -$summary.Add("## Files.exe process state") -if ($firstSeen) { - $summary.Add(" SPAWNED at T+$([int]$firstSeen)ms, last seen at T+$([int]$lastSeen)ms") - $stillAlive = Get-Process Files -ErrorAction SilentlyContinue - $summary.Add(" Currently alive: $($null -ne $stillAlive)") -} else { - $summary.Add(" NEVER spawned in $Seconds seconds") -} -$summary.Add('') -$summary.Add("## Files-related processes seen in kernel trace") -if ($matched) { - foreach ($n in $creates) { - $d = @{} - if ($n.EventData -and $n.EventData.Data) { foreach ($x_ in $n.EventData.Data) { $d[$x_.Name] = $x_."#text" } } - $img = $d['ImageName'] - if ($img -and ($img -match "$pkgFamily|\\Files\.exe|files-search|RuntimeBroker")) { - $summary.Add(" PID $($d['ProcessID']) -> $img") - } - } -} else { - $summary.Add(" (none)") -} -$summary.Add('') -$summary.Add("## Event counts") -$summary.Add(" AppXDeploymentServer (filtered): $($deployEvents.Count)") -$summary.Add(" AppModel-Runtime/Admin: $($appmodelEvents.Count)") -$summary.Add(" TWinUI/Operational: $($twinuiEvents.Count)") -$summary.Add(" Application (.NET / WER): $($appLogEvents.Count)") -$summary.Add('') -$summary.Add("Detailed per-source reports written to:") -$summary.Add(" $OutputDir\kernel-process.txt") -$summary.Add(" $OutputDir\appx-deployment.txt") -$summary.Add(" $OutputDir\appmodel-runtime.txt") -$summary.Add(" $OutputDir\twinui.txt") -$summary.Add(" $OutputDir\application-log.txt") -$summary.Add(" $OutputDir\wer-state.txt") - -$summaryText = $summary -join "`n" -Save-Section 'summary' $summaryText -Write-Host '' -Write-Host $summaryText diff --git a/scripts/dev-cycle.ps1 b/scripts/dev-cycle.ps1 deleted file mode 100644 index 509545aeb589..000000000000 --- a/scripts/dev-cycle.ps1 +++ /dev/null @@ -1,138 +0,0 @@ -<# -.SYNOPSIS - One-shot packaged-build test cycle: stop service, uninstall, build, install, activate. - -.DESCRIPTION - Replaces the ~10-step manual sequence with a single command. Useful when - iterating on packaging-affecting changes (manifest, csproj content staging, - AppxBundlePlatforms, certificates, etc.). - - Run from an ELEVATED PowerShell. Add-AppxPackage of packaged services - requires admin. - -.PARAMETER Platform - x64 (default), x86, or arm64. - -.PARAMETER Configuration - Release (default) or Debug. - -.PARAMETER NoActivate - Skip the post-install Start-Process. Useful for CI / scripted runs. - -.PARAMETER SkipBuild - Reuse the bundle already at artifacts/AppxPackages/. Useful when the - build is unchanged and you just want to re-install. - -.EXAMPLE - .\scripts\dev-cycle.ps1 - .\scripts\dev-cycle.ps1 -Platform x86 - .\scripts\dev-cycle.ps1 -SkipBuild -NoActivate -#> -[CmdletBinding()] -param( - [ValidateSet('x64', 'x86', 'arm64')] - [string]$Platform = 'x64', - - [ValidateSet('Release', 'Debug')] - [string]$Configuration = 'Release', - - [switch]$NoActivate, - [switch]$SkipBuild -) - -$ErrorActionPreference = 'Stop' -$repoRoot = Split-Path -Parent $PSScriptRoot -$msbuild = 'C:\Program Files\Microsoft Visual Studio\18\Insiders\MSBuild\Current\Bin\MSBuild.exe' -$bundle = Join-Path $repoRoot "artifacts\AppxPackages\Files.App_4.1.0.0_Test\Files.App_4.1.0.0_$Platform.msixbundle" -$depMsix = Join-Path $repoRoot "artifacts\AppxPackages\Files.App_4.1.0.0_Test\Dependencies\$Platform\Microsoft.WindowsAppRuntime.1.8.msix" -$aumid = 'FilesDev_j4wp4nz5mtqsg!App' - -function Write-Step($msg) { Write-Host "==> $msg" -ForegroundColor Cyan } - -# --- Elevation check --- -$id = [System.Security.Principal.WindowsIdentity]::GetCurrent() -if (-not (New-Object System.Security.Principal.WindowsPrincipal($id)).IsInRole([System.Security.Principal.WindowsBuiltInRole]::Administrator)) { - throw 'Must run elevated. Re-launch PowerShell as Administrator.' -} - -# --- Stop service --- -Write-Step 'Stopping FilesSearchService (if running)' -Stop-Service FilesSearchService -Force -ErrorAction SilentlyContinue -Start-Sleep -Milliseconds 500 - -# --- Uninstall existing --- -Write-Step 'Uninstalling existing FilesDev' -Get-AppxPackage FilesDev -AllUsers -ErrorAction SilentlyContinue | - Remove-AppxPackage -AllUsers -ErrorAction SilentlyContinue -Start-Sleep -Seconds 1 - -# --- Build --- -if (-not $SkipBuild) { - Write-Step "Building $Configuration|$Platform" - Push-Location $repoRoot - try { - $args = @( - 'src/Files.App/Files.App.csproj', '-t:Build', - "-p:Platform=$Platform", - "-p:Configuration=$Configuration", - "-p:AppxBundlePlatforms=$Platform", - "-p:AppxPackageDir=$repoRoot\artifacts\AppxPackages\", - '-p:AppxBundle=Always', - '-p:UapAppxPackageBuildMode=SideloadOnly', - '-p:GenerateAppxPackageOnBuild=true', - '-p:AppxPackageSigningEnabled=true', - '-p:PackageCertificateKeyFile=src\Files.App\Files.App_TemporaryKey.pfx', - '-v:minimal' - ) - & $msbuild @args | Tee-Object -Variable buildLog | Out-Null - if ($LASTEXITCODE -ne 0) { - # First pass can fail on a clean tree with manifest-validation error - # (the Content Condition=Exists race). Retry once — the SearchService - # output exists on disk from the failed first attempt. - $manifestRace = $buildLog -match 'doesn''t exist in the package' - if ($manifestRace) { - Write-Host ' (manifest-validation race on first pass — retrying)' -ForegroundColor Yellow - & $msbuild @args | Out-Host - } - if ($LASTEXITCODE -ne 0) { - throw "Build failed with exit $LASTEXITCODE" - } - } - } finally { - Pop-Location - } -} - -# --- Install --- -Write-Step "Installing $(Split-Path -Leaf $bundle)" -if (-not (Test-Path $bundle)) { - throw "Bundle not found: $bundle" -} -Add-AppxPackage -Path $bundle -DependencyPath $depMsix -ErrorAction Stop -Start-Sleep -Seconds 1 - -$pkg = Get-AppxPackage FilesDev -AllUsers | Select-Object -First 1 -Write-Host " Installed: $($pkg.PackageFullName)" -ForegroundColor Green - -# --- Activate --- -if (-not $NoActivate) { - Write-Step 'Activating' - Start-Process explorer.exe "shell:appsFolder\$aumid" - - $sw = [System.Diagnostics.Stopwatch]::StartNew() - $seen = $false - while ($sw.Elapsed.TotalSeconds -lt 8) { - $p = Get-Process Files -ErrorAction SilentlyContinue - if ($p -and -not $seen) { - Write-Host (" Files.exe PID={0} caught at T+{1:F0}ms" -f ($p.Id -join ','), $sw.Elapsed.TotalMilliseconds) -ForegroundColor Green - $seen = $true - break - } - Start-Sleep -Milliseconds 50 - } - if (-not $seen) { - Write-Host ' Files.exe never observed. Run scripts\debug-activation.ps1 to diagnose.' -ForegroundColor Yellow - } -} - -Write-Step 'Done' From 2129108f3dd3907ff1485b1457059202200e85e7 Mon Sep 17 00:00:00 2001 From: Tommy Le <82196633+Wingingbump@users.noreply.github.com> Date: Tue, 19 May 2026 21:31:51 -0400 Subject: [PATCH 08/10] Docs: remove personal handoff notes and roadmap scratchpad --- docs/packaged-build-debug-notes.md | 199 ----------------------------- docs/search-roadmap.md | 76 ----------- 2 files changed, 275 deletions(-) delete mode 100644 docs/packaged-build-debug-notes.md delete mode 100644 docs/search-roadmap.md diff --git a/docs/packaged-build-debug-notes.md b/docs/packaged-build-debug-notes.md deleted file mode 100644 index 9d8e8839f828..000000000000 --- a/docs/packaged-build-debug-notes.md +++ /dev/null @@ -1,199 +0,0 @@ -# Packaged build + launch — open issues - -Snapshot of unresolved problems found while validating the packaged -(MSIX + SCM) path on `feature/csharp-search-service` as of 2026-05-13. -Pick up here next session. - -## What we proved works - -1. **MSIX builds end-to-end** via msbuild CLI (see "Reproducing the build" below). -2. **SCM picks up the `desktop6:Service` declaration** at install time: - `Get-Service FilesSearchService` → `Running`, `LocalSystem`, auto-start. -3. **Named-pipe DACL allows cross-context user → SYSTEM connect** after - the fix in `src/Files.SearchService/Program.cs:CreatePipeSecurity`: - added `PipeAccessRights.Synchronize` to the AuthenticatedUsers rule. - `NamedPipeClientStream` with `PipeOptions.Asynchronous` needs - Synchronize to wait on the pipe handle for overlapped I/O — - `ReadWrite` alone throws `UnauthorizedAccessException`. - -## Open issues - -### Issue 1 — Files.exe exits silently on packaged launch - -**Symptom.** Double-click Start menu icon → no window appears. Files.exe -process is created (event log shows AppX container creation + process -add) and torn down in the same second. No crash dump, no event log -error, no first-chance exception logged. - -**Diagnostic timeline.** - -| Time | State | Finding | -|---|---|---| -| First launch | DeploymentManager auto-init | `COMException 0x80040154 (REGDB_E_CLASSNOTREG)` activating `Microsoft.Windows.ApplicationModel.WindowsAppRuntime.DeploymentInitializeOptions`. Stack: `DeploymentManagerCS.AutoInitialize.Access`. | -| Mitigation 1 | Set `false` in `Files.App.csproj` | Confirmed the generated init code is no longer in `obj/`. The DeploymentManager crash is fixed. | -| Second launch | Different failure mode | Container created+destroyed same second. No managed exception, no WER dump (verified with WER LocalDumps registry set to capture *all* Files.exe crashes — `C:\CrashDumps` stays empty). | -| Mitigation 2 | Added file-based logging to `Program.cs` static constructor | Log file (`%TEMP%\files-startup.log`) **never written** — the process exits before the static constructor runs. | - -**Conclusion.** The remaining exit happens *before any managed code in -the Files.App assembly executes* — apphost or .NET runtime -initialization phase. The diagnostic logging in `Program.cs` static -ctor was reverted before commit since it never fired. - -**Hypotheses, ranked.** - -1. **Apphost can't find a required native dependency.** Possible - candidates: `Microsoft.WindowsAppRuntime.Bootstrap.dll`, - `Microsoft.ui.xaml.dll`, or one of the WinAppSDK projection DLLs. - The framework MSIX dependency *did* install (verified - `Microsoft.WindowsAppRuntime.1.8 8000.836.2153.0 X64` present), so - if a DLL is missing it's a load-path issue, not an absence issue. -2. **.NET 10 runtime config mismatch.** `Files.exe` is a - framework-dependent apphost; if `Files.runtimeconfig.json` points at - a runtime version not installed in the WindowsApps install - location, the apphost would error out. -3. **Self-instance redirect on stale kernel state.** `Program.cs:27-46` - opens a named semaphore `Files-{Env}-Instance` and exits if - `isNew=false`. A stuck kernel handle from a previously crashed - Files.exe could make this fire silently. **Ruled out** — the - diagnostic logging would have caught this, and it didn't fire, - meaning the exit is upstream of the static ctor. -4. **Single-Project MSIX vs. solution layout disagreement.** The csproj - has `true` but build is - driven from the .csproj directly, not from a separate `.wapproj`. - Possible config inconsistency between what MSBuild expects for - resource generation vs. what ends up in the AppxManifest. - -**Concrete next steps to try.** - -- Enable CLR startup ETW tracing: - `logman start clrstart -p "Microsoft-Windows-DotNETRuntime" 0x4 0x5 - -ets -o C:\clrstart.etl`, launch Files, stop the trace, open in PerfView. - This will show whether the runtime even starts. -- Inspect installed package layout for missing DLLs: - `Get-ChildItem 'C:\Program Files\WindowsApps\FilesDev_*' -Recurse - -Filter '*.dll'` vs. the build output. Diff for missing entries. -- Run `Files.exe` from inside a packaged-identity wrapper that captures - stderr. `Invoke-CommandInDesktopPackage` crashed itself on this - machine (Win10 19045 bug); use `psexec -i ` or a manual COM - activation via `IDesktopAppXActivator` from a small C# host instead. -- Check `Files.runtimeconfig.json` in the installed package against the - `.NET 10.0.7` runtime declared by WER (`CoreCLR Version: 10.0.726.21808`). - -**What we know it's not.** -- Not our search code. Reverting `AppLifecycleHelper.cs` to upstream - (drop the `Task.Run(SearchServiceManager.EnsureRunning)` line) and - rebuilding the bundle reproduces the same silent exit. Pre-existing. -- Not DACL/SCM. Those work — see "What we proved" above. -- Not the framework dependency. The MSIX install registers the package - with the correct `WindowsAppRuntime.1.8` framework, and resolves to - the installed `8000.836.2153.0` build. - -### Issue 2 — First MSBuild pass fails manifest validation - -**Symptom.** - -``` -MakeAppx : error : Manifest validation error: Line 101, Column 56, -Reason: The file name "SearchService\files-search-service.exe" -declared for element ... doesn't exist in the package. -MakeAppx : error : 0x80080204 - The specified package format is not valid -``` - -**Root cause.** In `src/Files.App/Files.App.csproj` the `` glob that stages the service -binary into the MSIX has a `Condition="Exists(...)"` predicate. MSBuild -evaluates `Exists()` during the static-evaluation phase, *before any -project is built.* On a clean tree, `Files.SearchService\bin\...` -doesn't exist yet, so the Content items get dropped — but the manifest -still references the path, and MakeAppx fails. - -A second `msbuild` invocation right after the failed one succeeds -because the SearchService output now exists on disk from the prior -attempt. **The build is non-deterministic on a clean tree.** - -**Fix.** Remove the `Condition="Exists(...)"` — the -`` on `Files.SearchService.csproj` (with -`ReferenceOutputAssembly="false"`) is already a build dependency, so -the output is guaranteed to exist by the time Content evaluates *if* -we let MSBuild order things correctly. Alternative: move the staging -into a `` block that copies -the binaries on demand. - -This wasn't fixed in the committed change because it requires testing -on a clean tree and we wanted to bank progress first. - -### Issue 3 — v143 platform toolset not installed - -**Symptom.** - -``` -The build tools for Visual Studio 2022 (Platform Toolset = 'v143') -cannot be found. To build using the v143 build tools, please install -Visual Studio 2022 build tools. -``` - -**Root cause.** The C++ launcher (`src/Files.App.Launcher/Files.App.Launcher.vcxproj`) -declares `v143`. This machine has VS 2026 -(toolset v145) installed; v143 isn't present. - -**Workarounds.** -1. Install the VS 2022 Build Tools side-by-side with VS 2026. -2. Edit the vcxproj to `v145` locally (matches `project_build_env` - memory note: "two upstream divergences in `Files.App.Launcher`"). - Don't commit this — it'd break CI which uses v143. - -Memory entry already exists at `project_build_env.md` covering the -related stdcpp20 + towupper divergences. Worth extending to mention -the toolset version pin. - -## Reproducing the build - -```powershell -# 1. Restore. -& "C:\Program Files\Microsoft Visual Studio\18\Insiders\MSBuild\Current\Bin\MSBuild.exe" ` - src/Files.App/Files.App.csproj -t:Restore -p:Platform=x64 -p:Configuration=Release - -# 2. First build — may fail with the manifest validation error above. -& "C:\Program Files\Microsoft Visual Studio\18\Insiders\MSBuild\Current\Bin\MSBuild.exe" ` - src/Files.App/Files.App.csproj -t:Build ` - -p:Platform=x64 -p:Configuration=Release -p:AppxBundlePlatforms=x64 ` - -p:AppxPackageDir="$pwd\artifacts\AppxPackages\" ` - -p:AppxBundle=Always -p:UapAppxPackageBuildMode=SideloadOnly ` - -p:GenerateAppxPackageOnBuild=true -p:AppxPackageSigningEnabled=true ` - -p:PackageCertificateKeyFile="src\Files.App\Files.App_TemporaryKey.pfx" ` - -v:minimal - -# 3. If step 2 failed with manifest validation, run it again — second pass -# succeeds because Files.SearchService output now exists on disk. - -# 4. Install (admin PowerShell): -Add-AppxPackage -Path "artifacts\AppxPackages\Files.App_4.1.0.0_Test\Files.App_4.1.0.0_x64.msixbundle" ` - -DependencyPath "artifacts\AppxPackages\Files.App_4.1.0.0_Test\Dependencies\x64\Microsoft.WindowsAppRuntime.1.8.msix" -``` - -## State of the validation - -- Search-service infrastructure: **proven** in packaged mode. -- Files.App launch from packaged install: **broken**, pre-existing, root - cause unknown. This is a ship-blocker for any release that uses the - packaged path, but does not block sending the PR upstream — the Files - team's CI builds packaged Files routinely and would not see this - machine-local failure. - -## Useful one-liners for next session - -```powershell -# Latest crashes for Files.exe (bypasses WER dedup if LocalDumps is set): -$base = "HKLM:\SOFTWARE\Microsoft\Windows\Windows Error Reporting\LocalDumps\Files.exe" -Get-ItemProperty $base -ls C:\CrashDumps - -# Force a fresh launch and watch for activation events: -Start-Process 'shell:appsFolder\FilesDev_j4wp4nz5mtqsg!App' -Start-Sleep 5 -Get-WinEvent -LogName 'Microsoft-Windows-AppModel-Runtime/Admin' -MaxEvents 10 | - Where-Object { $_.TimeCreated -gt (Get-Date).AddMinutes(-1) -and $_.Id -in 211,212,217 } - -# Real AUMID (publisher hash varies if cert changes): -Get-StartApps | Where-Object Name -like '*Files - Dev*' -``` diff --git a/docs/search-roadmap.md b/docs/search-roadmap.md deleted file mode 100644 index a87984267eff..000000000000 --- a/docs/search-roadmap.md +++ /dev/null @@ -1,76 +0,0 @@ -# Search rewrite — roadmap - -Status snapshot for the C# search service on `feature/csharp-search-service`. -`CLAUDE.md` has the constraints; `docs/csharp-search-service.md` has the -full architecture and file map. This file is just *where we are*. - -## Done - -- `Files.SearchAbstraction` — `ISearchProvider`, `SearchQuery`, `SearchResult`, - `HealthStatus` (`net10.0`, no Windows deps). -- `Files.LegacySearch` — `LegacySearchProvider` wraps `Windows.Storage.Search` - (AQS) behind `ISearchProvider`. Frozen reference. -- `Files.IndexedSearch.Client` — gRPC client over named pipe - (`\\.\pipe\files-search`); TCP loopback fallback via - `FILES_SEARCH_SERVICE_URL`. Stubs generated from - `src/Files.SearchService/proto/files_search.proto` (single source of truth). -- `Files.SearchService` — C# Windows Service. In-memory inverted index - (`Dictionary` posting lists, atomic swap on rebuild) + - trigram index for mid-string substrings. `DocStore` parallel arrays. - `IndexBootstrapper` does USN-or-fallback enumeration with warm-start - reconcile against `index.bin`. `ChangeWatcher` + `EventBatcher` 250ms - debounce. `ProcessThrottle` background priority + battery/fullscreen/CPU - polling. Kestrel gRPC on named pipe with DACL granting AuthenticatedUsers RW. -- `Files.App` — `SearchRouter` drop-in for `FolderSearch`. Settings UI toggle - `UseIndexedSearch` in Settings → Advanced. `SearchServiceManager` ensures - the service is running (SCM in packaged mode; HKCU\Run + direct launch - in dev). -- `Package.appxmanifest` — `desktop6:Service`, `StartAccount=localSystem`, - `StartType=auto`. -- Bench harness: `run-bench.ps1` (build → start service → run bench → - gate check). `naive-scan`, `legacy`, `indexed` providers in - `tests/Files.Search.Bench/`. - -## Bench, small corpus (50k files, 2026-05-12) - -`bench-results/baseline.json` — pinned. - -| Provider | TTFR p50 | TTFR p99 | Total p50 | Total p99 | -|--------------|---------:|---------:|----------:|----------:| -| legacy AQS\* | 2025 ms | — | 2380 ms | — | -| indexed | 11 ms | 88 ms | 40 ms | 210 ms | -| naive-scan | ~0 ms | 48 ms | 44 ms | 8329 ms | - -\* Legacy AQS measured on the 5k smoke run; full 50k legacy run deferred per -ADR 0003 (≥80 min wall time on a corpus outside the Windows Search Indexer -catalog tells us nothing new). - -**Gate result:** TTFR median 11 ms / 2025 ms = 0.5% (gate: ≤10%). ✓ - -## Next, in order - -See `memory/project_search_pr_punchlist.md` for the full P0/P1/P2 list -before sending to the Files team. Highlights: - -**P0 — blocking PR** -1. Validate packaged SCM path end-to-end (named pipe + LocalSystem). - Dev mode (TCP) works; packaged path never verified on this machine. -2. Commit `tests/Files.Search.Correctness/`, `tests/Files.Search.Bench/`, - and `tests/Files.Search.Probe/` (currently untracked). -**P1 — quality** -4. Index corruption recovery — on `LoadAsync` failure, delete `index.bin` - and fall through to fresh build (currently crashes on bad magic/version). -5. Refresh `_serviceAvailable` cache periodically (60s timer) so - service-came-back transitions are detected. -6. Root-cause the NRE in `BaseLayoutPage.cs:620` (band-aided with - `.Where(x => x is not null)`). -7. Surface service status (running, file count, indexing state, last - update) in Settings UI. - -**P2 — future scope** -- Token prefix matching (so `test` matches `testing` via tokens, not just - trigrams). -- Pagination / cursor for >200 results. -- Memory budget tuning (1.2 GB for 1M files; trigram index dominates). -- Content search foundation prep (filename-only today). -- Library and Home scope fan-out to the indexed provider. From 8c56f4e97e60ecb2df6c963ec87b14113c53f349 Mon Sep 17 00:00:00 2001 From: Tommy Le <82196633+Wingingbump@users.noreply.github.com> Date: Tue, 26 May 2026 11:29:39 -0400 Subject: [PATCH 09/10] Build: restore upstream signing, publisher, and platform settings Reverts five stray local-dev artifacts on top of upstream files-community/Files without touching any search-service wiring or DeploymentManager bug-fix. - Package.appxmanifest: Publisher CN=Tommy -> CN=Files - Package.appxmanifest: strip UTF-8 BOM - Files.App.csproj: AppxBundlePlatforms restored to x86|x64|arm64 - Files.App.csproj: remove AppxPackageSigningEnabled + local cert thumbprint - Files.App.csproj: restore 3-line MSBuild element formatting --- src/Files.App/Files.App.csproj | 8 ++++---- src/Files.App/Package.appxmanifest | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/Files.App/Files.App.csproj b/src/Files.App/Files.App.csproj index bccb72bed49d..e1f50f55e6f3 100644 --- a/src/Files.App/Files.App.csproj +++ b/src/Files.App/Files.App.csproj @@ -17,7 +17,7 @@ False Always False - x86|x64 + x86|x64|arm64 0 Enable app.manifest @@ -45,8 +45,6 @@ $(DefineConstants);DISABLE_XAML_GENERATED_MAIN - True - E4E9B3B381A8CA7392F519B9408E563869E57285 - + diff --git a/src/Files.App/Package.appxmanifest b/src/Files.App/Package.appxmanifest index 14fe8116c5ed..8fd84687f313 100644 --- a/src/Files.App/Package.appxmanifest +++ b/src/Files.App/Package.appxmanifest @@ -1,4 +1,4 @@ - + From 02bd0ffccbe91ab234f46710d1d07d65fdb8899b Mon Sep 17 00:00:00 2001 From: Tommy Le <82196633+Wingingbump@users.noreply.github.com> Date: Fri, 5 Jun 2026 11:07:02 -0400 Subject: [PATCH 10/10] Chore: strip fork-only artifacts for upstream PR Remove fork README, revert Files.App.Launcher build-env workarounds (towupper / stdcpp20), and drop the .claude/ gitignore entry so the diff against upstream contains only the search-service feature. --- .gitignore | 3 - README.md | 67 ------------------- .../Files.App.Launcher.vcxproj | 2 +- src/Files.App.Launcher/FilesLauncher.cpp | 4 +- 4 files changed, 3 insertions(+), 73 deletions(-) delete mode 100644 README.md diff --git a/.gitignore b/.gitignore index 2c9ecc8b8a06..eb1e46db0ea9 100644 --- a/.gitignore +++ b/.gitignore @@ -416,6 +416,3 @@ src/Files.App/Assets/FilesOpenDialog/Files.App.Launcher.exe.sha256 bench-results/* !bench-results/baseline.json .smoke/ - -# Claude Code local settings (per-user, not for the repo). -.claude/ diff --git a/README.md b/README.md deleted file mode 100644 index 120ada0b0ffd..000000000000 --- a/README.md +++ /dev/null @@ -1,67 +0,0 @@ -# Files (search-rewrite fork) - -Fork of [files-community/Files](https://github.com/files-community/Files) -exploring a faster search backend. - -## What's different in this fork - -A separate C# Windows Service (`files-search-service.exe`) maintains an -in-memory inverted + trigram filename index over the user's home directory, -with a `ReadDirectoryChangesW` watcher and process throttling so it stays -out of the way. Files.App talks to it over gRPC via a new `ISearchProvider` -interface. The existing `Windows.Storage.Search` path is preserved as the -default provider; the new path is opt-in via the **Use indexed search** -toggle in Settings → Advanced (or `FILES_SEARCH_PROVIDER=Indexed`). - -On a 5,000-file benchmark, the indexed provider answers substring queries -**~595× faster** than the legacy fallback path. Big-O analysis projects the -gap to widen at larger scales (legacy is `O(N)` per query when the path -isn't in the Windows Search Indexer's catalog; indexed is `O(log N)` always). -See `docs/decisions/0003-bench-strategy-theoretical.md`. - -## Status - -**Working PoC on `feature/csharp-search-service`.** - -- ✅ C# search service: USN enumerator + inverted/trigram index + watcher + throttling -- ✅ C# abstraction, legacy wrapper, indexed gRPC client over named pipe -- ✅ Bench harness with JSON output -- ✅ Wired into Files.App via `SearchRouter`, default behavior unchanged -- ✅ Settings UI toggle in Settings → Advanced -- ⏳ Packaged SCM end-to-end validation, content indexing — see - `docs/search-roadmap.md` - -## Where to read - -- **`docs/csharp-search-service.md`** — full architecture: components, data - flow, file map. Start here if you're a maintainer. -- **`docs/search-roadmap.md`** — current state and what's next. -- **`docs/decisions/`** — ADRs for the technical choices. - -## Trying it locally - -```powershell -# 1. Generate the small corpus (one-time, ~2 GB): -dotnet run --project tests\corpora -- --preset small --out .bench\small - -# 2. Full bench: builds, starts the service, runs naive-scan + indexed, -# gate-checks against bench-results/baseline.json: -.\run-bench.ps1 - -# Or run the service manually in dev console mode: -$env:FILES_SEARCH_ROOT = ".bench\small" -$env:FILES_SEARCH_INDEX_DIR = ".bench\index" -dotnet run --project src\Files.SearchService -c Release - -# Then launch Files.App from VS; set the toggle in Settings → Advanced, -# or override with $env:FILES_SEARCH_PROVIDER = "Indexed". -``` - -Default users (no toggle, no env var) get the existing search path, -byte-identical to upstream. - -## Upstream - -For everything else — features, bug reports, releases — see the -upstream repo: . This fork -is scoped to the search exploration; we don't carry other changes. diff --git a/src/Files.App.Launcher/Files.App.Launcher.vcxproj b/src/Files.App.Launcher/Files.App.Launcher.vcxproj index 93862737d1f1..9fc73ecd9115 100644 --- a/src/Files.App.Launcher/Files.App.Launcher.vcxproj +++ b/src/Files.App.Launcher/Files.App.Launcher.vcxproj @@ -57,7 +57,7 @@ Level3 true true - stdcpp20 + stdcpp17 Windows diff --git a/src/Files.App.Launcher/FilesLauncher.cpp b/src/Files.App.Launcher/FilesLauncher.cpp index 07876e022a04..5b8108d7a482 100644 --- a/src/Files.App.Launcher/FilesLauncher.cpp +++ b/src/Files.App.Launcher/FilesLauncher.cpp @@ -278,8 +278,8 @@ size_t strifind(const std::wstring& strHaystack, const std::wstring& strNeedle) bool comparei(std::wstring stringA, std::wstring stringB) { - std::transform(stringA.begin(), stringA.end(), stringA.begin(), ::towupper); - std::transform(stringB.begin(), stringB.end(), stringB.begin(), ::towupper); + transform(stringA.begin(), stringA.end(), stringA.begin(), std::toupper); + transform(stringB.begin(), stringB.end(), stringB.begin(), std::toupper); return (stringA == stringB); }