From 2f9924289922fdc467ce6824291df05cfce84a7b Mon Sep 17 00:00:00 2001 From: Roman Fedorov Date: Mon, 22 Jun 2026 14:35:09 +0300 Subject: [PATCH 1/4] fix(cli): package languages/ corpus into the crate for crates.io --doc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The doc corpus (`--doc HK` etc.) is embedded at build time by build.rs walking the repo-root `languages/` tree. That tree lives outside the crate, so it is not in the crates.io tarball — `cargo install code-ranker` from crates.io produced a binary with an EMPTY corpus and `--doc` reported "not embedded". Mirror the existing per-crate README copy: the publish workflow now copies `languages/` into a package-local `crates/code-ranker-cli/languages/` right before `cargo publish` (--allow-dirty tolerates it), and build.rs prefers that package-local copy, falling back to the repo-root tree for workspace/dev builds. No symlinks; the package-local copy is materialized only at publish time. Verified: `cargo package --list` includes all 66 corpus .md files; a build from the package-local copy serves `--doc HK`. Claude-Session: https://claude.ai/code/session_013R7NfedZh9uEkUQrSRGWR7 --- .github/workflows/crates-io.yml | 6 ++++++ crates/code-ranker-cli/build.rs | 32 +++++++++++++++++++------------- 2 files changed, 25 insertions(+), 13 deletions(-) diff --git a/.github/workflows/crates-io.yml b/.github/workflows/crates-io.yml index a6459430..140e0162 100644 --- a/.github/workflows/crates-io.yml +++ b/.github/workflows/crates-io.yml @@ -37,6 +37,12 @@ jobs: # right before publishing; cargo auto-detects a package-root README.md # when no `readme` field is set. `--allow-dirty` tolerates the copy. for d in crates/*/; do cp README.md "$d/README.md"; done + # Same trick for the `--doc` corpus: the languages/ tree lives at the + # repo root (outside the crate, so not in the published tarball by + # default). Copy it into the CLI crate so `cargo publish` packs it and + # `cargo install code-ranker` embeds the full corpus; build.rs prefers + # this package-local copy over the repo-root tree. `--allow-dirty` covers it. + cp -R languages crates/code-ranker-cli/languages for c in code-ranker-plugin-api code-ranker-graph code-ranker-plugins code-ranker-viewer code-ranker; do echo "==> publishing $c" attempt=0 diff --git a/crates/code-ranker-cli/build.rs b/crates/code-ranker-cli/build.rs index f4f13931..d6ff3f83 100644 --- a/crates/code-ranker-cli/build.rs +++ b/crates/code-ranker-cli/build.rs @@ -5,13 +5,14 @@ //! can serve a principle's Markdown (e.g. `--doc HK`) from the binary itself with //! no filesystem at runtime. Dependency-free (no `include_dir` crate). //! -//! The corpus lives at the repo root (`../../languages`), OUTSIDE this crate, so it -//! is NOT in the published crate tarball. A workspace build (the prebuilt binaries -//! shipped via the installer / npm / PyPI / Docker / GitHub Release) finds it and -//! embeds the full corpus; an ISOLATED build (`cargo publish` verify, or -//! `cargo install code-ranker` from crates.io source) won't — so the corpus is -//! resolved best-effort and absence yields an EMPTY corpus (never a build failure). -//! `--doc` then reports "not embedded" on such builds; everything else works. +//! The single source of truth lives at the repo root (`../../languages`), OUTSIDE +//! this crate. So that `cargo install code-ranker` from crates.io still embeds the +//! corpus, the publish workflow copies that tree into a package-local `languages/` +//! right before `cargo publish` (mirroring the per-crate README copy) — and this +//! build script prefers that package-local copy, falling back to the repo-root tree +//! for workspace/dev builds. If NEITHER exists (an unexpected isolated build) the +//! corpus resolves best-effort to EMPTY (never a build failure); `--doc` then reports +//! "not embedded" while everything else works. use std::path::{Path, PathBuf}; use std::{env, fs}; @@ -20,9 +21,13 @@ fn main() { let manifest = env::var("CARGO_MANIFEST_DIR").expect("CARGO_MANIFEST_DIR"); let mut entries: Vec<(String, PathBuf)> = Vec::new(); - // Best-effort: a missing corpus (isolated/published build) is NOT an error — - // it must never break `cargo publish`/`cargo install`. See module docs. - match Path::new(&manifest).join("../../languages").canonicalize() { + // Prefer the package-local copy (present in the published tarball), else the + // repo-root tree (workspace/dev builds). Best-effort: a missing corpus is NOT + // an error — it must never break `cargo publish`/`cargo install`. See module docs. + let local = Path::new(&manifest).join("languages"); + let root = Path::new(&manifest).join("../../languages"); + let resolved = local.canonicalize().or_else(|_| root.canonicalize()); + match resolved { Ok(corpus) => { // Re-run when the tree changes (added/removed files) and on any file edit. println!("cargo:rerun-if-changed={}", corpus.display()); @@ -31,9 +36,10 @@ fn main() { } Err(_) => { println!( - "cargo:warning=languages/ corpus not found (isolated build, e.g. \ - `cargo install code-ranker` from crates.io) — embedding an empty corpus; \ - `--doc` will report \"not embedded\". Prebuilt binaries embed the full corpus." + "cargo:warning=languages/ corpus not found at ./languages or ../../languages \ + — embedding an empty corpus; `--doc` will report \"not embedded\". Published \ + builds carry a package-local copy (see crates-io.yml); workspace builds use the \ + repo-root tree." ); } } From 75176da676c46686a36aacc553e14f0246f60c36 Mon Sep 17 00:00:00 2001 From: Roman Fedorov Date: Mon, 22 Jun 2026 14:35:21 +0300 Subject: [PATCH 2/4] docs(cel): fix mislabeled macro example, add metrics macro + dense-complexity MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The `wide_hub` example in cel-reference.md was titled "list macros" but used no macro — `deps.size() > 20` is just `fan_out > 20`. Replace it with a real `filter` macro (`wide_ext_hub`, counts external crates) and add a parallel metrics-context macro over a literal list, noting graph lists are checks-only. Add a worked "dense complexity" example: size-normalized metrics (cognitive/cyclomatic per 100 SLOC) plus a check that flags short-but-dense files relative to the repo's own distribution (both densities > p90, sloc < median). All snippets verified against the repo before documenting. Claude-Session: https://claude.ai/code/session_013R7NfedZh9uEkUQrSRGWR7 --- docs/customization/cel-reference.md | 46 ++++++++++++++++++++++++++--- 1 file changed, 42 insertions(+), 4 deletions(-) diff --git a/docs/customization/cel-reference.md b/docs/customization/cel-reference.md index 1fca3c49..adf62934 100644 --- a/docs/customization/cel-reference.md +++ b/docs/customization/cel-reference.md @@ -291,10 +291,18 @@ message = "test/source ratio too high ({tloc}/{sloc})" when = 'depends_on("ext:sqlx")' message = "imports the sqlx crate directly" -# Check: list macros over the dependency set -[rules.checks.wide_hub] -when = "deps.size() > 20" -message = "depends on {fan_out} modules — a coupling hub" +# Check: a list-comprehension macro over the dependency set. `filter` is the +# macro; `size()` is the collection function that counts the result. (A bare +# `deps.size() > 20` needs no macro and just equals `fan_out > 20`.) +[rules.checks.wide_ext_hub] +when = 'deps.filter(d, d.startsWith("ext:")).size() > 20' +message = "{name}: depends on many external crates — a coupling hub" + +# Metric: the same macro in a formula. Graph lists (`deps`/`files`/…) are +# checks-only (§4.2), so a metric macro runs over a *literal* list — here the +# file's own complexity signals — counting how many exceed a floor. +[metrics.complexity_signals] +formula_cel = "[cyclomatic, cognitive, branches].filter(x, x > 10.0).size().double()" # Check: relative threshold (this node vs the project distribution) [rules.checks.complexity_outlier] @@ -304,6 +312,36 @@ message = "{name}: cyclomatic {cyclomatic} is in the project's worst 10%" # Metric: branch on path (blank the metric for generated code) [metrics.real_hk] formula_cel = 'path.contains("/generated/") ? 0.0 : hk' + +# Metrics: size-normalized complexity — branching *per 100 source lines*. A raw +# `cognitive`/`cyclomatic` count just tracks size; dividing by `sloc` measures +# DENSITY, in intuitive units (e.g. 42 = 42 points of cognitive load per 100 lines). +# Guard the divide (`sloc == 0 -> 0`). +[metrics.cognitive_per_100sloc] +formula_cel = "sloc > 0.0 ? cognitive / sloc * 100.0 : 0.0" + +[metrics.cyclomatic_per_100sloc] +formula_cel = "sloc > 0.0 ? cyclomatic / sloc * 100.0 : 0.0" + +# Check: a SHORT-but-DENSE file — the most complexity packed into the fewest lines, +# judged RELATIVE to this repo (no fixed number ports across codebases). Custom +# `[metrics]` are aggregatable, so we threshold each density against its own p90: +# 1. top-decile cognitive density cognitive_per_100sloc > p90 +# 2. top-decile branching density cyclomatic_per_100sloc > p90 +# 3. genuinely short sloc < project median → true density, not bulk +# (3) is what excludes large-and-dense files: a 200-line file can top the density +# deciles yet isn't "short". A multi-line `when` (TOML `'''…'''`) stays readable — +# CEL ignores the newlines; a node missing an attr just doesn't fire (never errors). +[rules.checks.dense_complexity] +when = ''' + cognitive_per_100sloc > agg('cognitive_per_100sloc', 'p90', 'not_empty') && + cyclomatic_per_100sloc > agg('cyclomatic_per_100sloc', 'p90', 'not_empty') && + sloc.double() < agg('sloc', 'p50', 'not_empty') +''' +message = "{name}: dense complexity — {cognitive} cognitive / {cyclomatic} cyclomatic packed into {sloc} sloc (top-decile density for this repo)" +why = "High branching crammed into few lines reads as clever but is hard to follow and test." +fix = "Extract the nested branches into named helpers — trade a few more lines for lower per-line complexity." +group = "SRP" ``` --- From 1ac785a2d9d55cf6ed8d54b31d3dc65c852226bd Mon Sep 17 00:00:00 2001 From: Roman Fedorov Date: Mon, 22 Jun 2026 14:37:37 +0300 Subject: [PATCH 3/4] release v3.0.2: bump version + sync doc version refs Claude-Session: https://claude.ai/code/session_013R7NfedZh9uEkUQrSRGWR7 --- Cargo.lock | 10 +++++----- Cargo.toml | 10 +++++----- docs/DESIGN.md | 2 +- docs/PRD.md | 4 ++-- 4 files changed, 13 insertions(+), 13 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 87888066..3ee9db7e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -268,7 +268,7 @@ checksum = "c8d4a3bb8b1e0c1050499d1815f5ab16d04f0959b233085fb31653fbfc9d98f9" [[package]] name = "code-ranker" -version = "3.0.1" +version = "3.0.2" dependencies = [ "anyhow", "chrono", @@ -286,7 +286,7 @@ dependencies = [ [[package]] name = "code-ranker-graph" -version = "3.0.1" +version = "3.0.2" dependencies = [ "cel", "chrono", @@ -298,7 +298,7 @@ dependencies = [ [[package]] name = "code-ranker-plugin-api" -version = "3.0.1" +version = "3.0.2" dependencies = [ "anyhow", "chrono", @@ -309,7 +309,7 @@ dependencies = [ [[package]] name = "code-ranker-plugins" -version = "3.0.1" +version = "3.0.2" dependencies = [ "anyhow", "cargo_metadata", @@ -336,7 +336,7 @@ dependencies = [ [[package]] name = "code-ranker-viewer" -version = "3.0.1" +version = "3.0.2" dependencies = [ "anyhow", "code-ranker-graph", diff --git a/Cargo.toml b/Cargo.toml index 084180e3..de518e8c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -3,7 +3,7 @@ members = ["crates/*"] resolver = "3" [workspace.package] -version = "3.0.1" +version = "3.0.2" edition = "2024" rust-version = "1.88" license = "Apache-2.0" @@ -12,10 +12,10 @@ keywords = ["dependency-graph", "coupling", "refactoring", "code-quality", "stat categories = ["development-tools", "command-line-utilities"] [workspace.dependencies] -code-ranker-graph = { path = "crates/code-ranker-graph", version = "3.0.1" } -code-ranker-plugin-api = { path = "crates/code-ranker-plugin-api", version = "3.0.1" } -code-ranker-plugins = { path = "crates/code-ranker-plugins", version = "3.0.1" } -code-ranker-viewer = { path = "crates/code-ranker-viewer", version = "3.0.1" } +code-ranker-graph = { path = "crates/code-ranker-graph", version = "3.0.2" } +code-ranker-plugin-api = { path = "crates/code-ranker-plugin-api", version = "3.0.2" } +code-ranker-plugins = { path = "crates/code-ranker-plugins", version = "3.0.2" } +code-ranker-viewer = { path = "crates/code-ranker-viewer", version = "3.0.2" } anyhow = "1.0" cel = "0.13" diff --git a/docs/DESIGN.md b/docs/DESIGN.md index ffae8110..edfe910a 100644 --- a/docs/DESIGN.md +++ b/docs/DESIGN.md @@ -986,7 +986,7 @@ dictionaries with the structural graph and the computed cycles/stats: "workspace": "/Users/alice/projects/code-ranker", "target": "/Users/alice/projects/axum-api", "plugin": "rust", - "versions": { "code-ranker": "3.0.1", "rustc": "1.78.0" }, + "versions": { "code-ranker": "3.0.2", "rustc": "1.78.0" }, "roots": { "registry": "/Users/alice/.cargo/registry/src/index.crates.io-abc123", "target": "/Users/alice/projects/axum-api" diff --git a/docs/PRD.md b/docs/PRD.md index 81ea64c2..7e882dc6 100644 --- a/docs/PRD.md +++ b/docs/PRD.md @@ -269,7 +269,7 @@ bundles its semantics dictionaries with the structural graph and computed data "target": "/Users/alice/projects/axum-api", "plugin": "rust", "config_file": "/Users/alice/projects/axum-api/code-ranker.toml", - "versions": { "code-ranker": "3.0.1", "rustc": "1.78.0" }, + "versions": { "code-ranker": "3.0.2", "rustc": "1.78.0" }, "roots": { "registry": "/Users/alice/.cargo/registry/src/index.crates.io-abc123", "target": "/Users/alice/projects/axum-api" @@ -729,7 +729,7 @@ can render any language/metric set without hardcoding names. "workspace": "", "target": "", "plugin": "", - "versions": { "code-ranker": "3.0.1", "rustc": "1.78.0" }, + "versions": { "code-ranker": "3.0.2", "rustc": "1.78.0" }, "roots": { "target": "", "registry": "" }, "git": { "branch": "main", "commit": "a3f9c21b4d5e", "dirty_files": 0, "origin": "git@…:team/proj.git" }, "timings": [ { "stage": "rust", "ms": 0, "detail": "…" }, … ], From 32e9586fca32788fb11e7bca1f8e4574befdbeb3 Mon Sep 17 00:00:00 2001 From: Roman Fedorov Date: Mon, 22 Jun 2026 14:57:47 +0300 Subject: [PATCH 4/4] ci(codeql): advanced setup with per-language path gating MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace GitHub's default CodeQL setup (which re-analyzed every language on every PR) with an advanced workflow that scans a language only when that language's source actually changed. A `changes` job classifies the diff by file extension via dorny/paths-filter and emits a matrix of just the affected languages: - docs/golden-only PR (*.md, *.json, *.sarif, …) → analyze job skipped entirely - any *.rs change → rust (whole product) + actions - a *.cs fixture change → only csharp Keying on extension (not folder) is deliberate: golden snapshots and a plugin's own Rust files (e.g. languages/csharp/dialect.rs) don't trigger a pointless re-scan of that fixture language, while real code outside the fixtures stays covered (rust spans all *.rs, javascript-typescript includes the viewer assets, python includes .github/scripts). go uses build-mode autobuild (no buildless mode); all others use build-mode none. A weekly schedule forces a full scan. Requires default setup to be turned off (done via the code-scanning/default-setup API) so the two don't conflict. Claude-Session: https://claude.ai/code/session_013R7NfedZh9uEkUQrSRGWR7 --- .github/workflows/codeql.yml | 112 +++++++++++++++++++++++++++++++++++ 1 file changed, 112 insertions(+) create mode 100644 .github/workflows/codeql.yml diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml new file mode 100644 index 00000000..b80d0a7f --- /dev/null +++ b/.github/workflows/codeql.yml @@ -0,0 +1,112 @@ +# CodeQL — advanced setup with per-language path gating. +# +# Replaces GitHub's *default* CodeQL setup so we can scan each language ONLY when +# that language's source actually changed. Most non-Rust languages exist here only +# as plugin test fixtures (`crates/.../languages//tests/sample/**`) that +# rarely change, yet default setup re-analyzed all of them on every PR. +# +# How the gating works: the `changes` job classifies the diff by *file extension* +# (CodeQL analyzes source, so the extension is the right signal), then emits a +# matrix of only the changed languages. Consequences of keying on extension: +# - golden snapshots (`*.json` / `*.sarif`) never match any language → no run; +# - a plugin's Rust file (e.g. `languages/csharp/dialect.rs`) is `rust`, not +# `csharp` → it triggers `rust`, not a pointless C# re-scan; +# - real first-party code outside the fixtures is covered too — `rust` spans all +# `**/*.rs`, `javascript-typescript` includes the viewer assets, `python` +# includes `.github/scripts/*.py`. +# A docs-only PR matches nothing → the whole analyze job is skipped. +# +# The weekly `schedule` (and manual dispatch) force a FULL scan of every language +# so coverage never drifts behind the gated PR runs. +# +# ⚠ REQUIRED ONE-TIME STEP: default setup must be turned OFF or it conflicts with +# this workflow (uploads get rejected). Disable it in Settings → Code security → +# "CodeQL analysis" → switch to Advanced, or run: +# gh api -X PATCH repos/ffedoroff/code-ranker/code-scanning/default-setup -f state=not-configured + +name: CodeQL + +on: + push: + branches: [main] + pull_request: + schedule: + - cron: '25 6 * * 1' # weekly Monday full scan + workflow_dispatch: + +permissions: + contents: read + +concurrency: + group: codeql-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +jobs: + # Classify the diff → which CodeQL languages need analysis this run. + changes: + name: Detect changed languages + runs-on: ubuntu-22.04 + outputs: + matrix: ${{ steps.matrix.outputs.matrix }} + any: ${{ steps.matrix.outputs.any }} + steps: + - uses: actions/checkout@v6 + - uses: dorny/paths-filter@v3 + id: filter + if: github.event_name == 'pull_request' || github.event_name == 'push' + with: + filters: | + rust: ['**/*.rs', '**/Cargo.toml', 'Cargo.lock'] + python: ['**/*.py'] + csharp: ['**/*.cs'] + go: ['**/*.go', '**/go.mod', '**/go.sum'] + c-cpp: ['**/*.c', '**/*.cc', '**/*.cpp', '**/*.cxx', '**/*.h', '**/*.hh', '**/*.hpp'] + javascript-typescript: ['**/*.js', '**/*.jsx', '**/*.mjs', '**/*.cjs', '**/*.ts', '**/*.tsx'] + actions: ['.github/workflows/**'] + + # Map the matched filters to a CodeQL matrix. `go` is the only language here + # that has no buildless mode, so it gets `autobuild`; everything else uses + # `build-mode: none` (the fixtures are not buildable). On schedule/dispatch + # we override to the full language set for a complete scan. + - id: matrix + env: + CHANGED: ${{ steps.filter.outputs.changes }} + EVENT: ${{ github.event_name }} + run: | + set -euo pipefail + CHANGED="${CHANGED:-[]}" + [ -z "$CHANGED" ] && CHANGED='[]' + if [ "$EVENT" = "schedule" ] || [ "$EVENT" = "workflow_dispatch" ]; then + CHANGED='["actions","c-cpp","csharp","go","javascript-typescript","python","rust"]' + fi + matrix=$(jq -cn --argjson c "$CHANGED" \ + '{include: [ $c[] | {language: ., "build-mode": (if . == "go" then "autobuild" else "none" end)} ]}') + any=$(jq -rn --argjson c "$CHANGED" 'if ($c|length) > 0 then "true" else "false" end') + echo "matrix=$matrix" >> "$GITHUB_OUTPUT" + echo "any=$any" >> "$GITHUB_OUTPUT" + echo "Languages selected: $matrix" + + analyze: + name: Analyze (${{ matrix.language }}) + needs: changes + if: ${{ needs.changes.outputs.any == 'true' }} + runs-on: ubuntu-22.04 + permissions: + contents: read + security-events: write + packages: read + actions: read + strategy: + fail-fast: false + matrix: ${{ fromJSON(needs.changes.outputs.matrix) }} + steps: + - uses: actions/checkout@v6 + - name: Initialize CodeQL + uses: github/codeql-action/init@v3 + with: + languages: ${{ matrix.language }} + build-mode: ${{ matrix.build-mode }} + - name: Perform CodeQL Analysis + uses: github/codeql-action/analyze@v3 + with: + category: "/language:${{ matrix.language }}"