Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
112 changes: 112 additions & 0 deletions .github/workflows/codeql.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
# CodeQL — advanced setup with per-language path gating.
#
# Replaces GitHub's *default* CodeQL setup so we can scan each language ONLY when
# that language's source actually changed. Most non-Rust languages exist here only
# as plugin test fixtures (`crates/.../languages/<lang>/tests/sample/**`) that
# rarely change, yet default setup re-analyzed all of them on every PR.
#
# How the gating works: the `changes` job classifies the diff by *file extension*
# (CodeQL analyzes source, so the extension is the right signal), then emits a
# matrix of only the changed languages. Consequences of keying on extension:
# - golden snapshots (`*.json` / `*.sarif`) never match any language → no run;
# - a plugin's Rust file (e.g. `languages/csharp/dialect.rs`) is `rust`, not
# `csharp` → it triggers `rust`, not a pointless C# re-scan;
# - real first-party code outside the fixtures is covered too — `rust` spans all
# `**/*.rs`, `javascript-typescript` includes the viewer assets, `python`
# includes `.github/scripts/*.py`.
# A docs-only PR matches nothing → the whole analyze job is skipped.
#
# The weekly `schedule` (and manual dispatch) force a FULL scan of every language
# so coverage never drifts behind the gated PR runs.
#
# ⚠ REQUIRED ONE-TIME STEP: default setup must be turned OFF or it conflicts with
# this workflow (uploads get rejected). Disable it in Settings → Code security →
# "CodeQL analysis" → switch to Advanced, or run:
# gh api -X PATCH repos/ffedoroff/code-ranker/code-scanning/default-setup -f state=not-configured

name: CodeQL

on:
push:
branches: [main]
pull_request:
schedule:
- cron: '25 6 * * 1' # weekly Monday full scan
workflow_dispatch:

permissions:
contents: read

concurrency:
group: codeql-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true

jobs:
# Classify the diff → which CodeQL languages need analysis this run.
changes:
name: Detect changed languages
runs-on: ubuntu-22.04
outputs:
matrix: ${{ steps.matrix.outputs.matrix }}
any: ${{ steps.matrix.outputs.any }}
steps:
- uses: actions/checkout@v6
- uses: dorny/paths-filter@v3
id: filter
if: github.event_name == 'pull_request' || github.event_name == 'push'
with:
filters: |
rust: ['**/*.rs', '**/Cargo.toml', 'Cargo.lock']
python: ['**/*.py']
csharp: ['**/*.cs']
go: ['**/*.go', '**/go.mod', '**/go.sum']
c-cpp: ['**/*.c', '**/*.cc', '**/*.cpp', '**/*.cxx', '**/*.h', '**/*.hh', '**/*.hpp']
javascript-typescript: ['**/*.js', '**/*.jsx', '**/*.mjs', '**/*.cjs', '**/*.ts', '**/*.tsx']
actions: ['.github/workflows/**']

# Map the matched filters to a CodeQL matrix. `go` is the only language here
# that has no buildless mode, so it gets `autobuild`; everything else uses
# `build-mode: none` (the fixtures are not buildable). On schedule/dispatch
# we override to the full language set for a complete scan.
- id: matrix
env:
CHANGED: ${{ steps.filter.outputs.changes }}
EVENT: ${{ github.event_name }}
run: |
set -euo pipefail
CHANGED="${CHANGED:-[]}"
[ -z "$CHANGED" ] && CHANGED='[]'
if [ "$EVENT" = "schedule" ] || [ "$EVENT" = "workflow_dispatch" ]; then
CHANGED='["actions","c-cpp","csharp","go","javascript-typescript","python","rust"]'
fi
matrix=$(jq -cn --argjson c "$CHANGED" \
'{include: [ $c[] | {language: ., "build-mode": (if . == "go" then "autobuild" else "none" end)} ]}')
any=$(jq -rn --argjson c "$CHANGED" 'if ($c|length) > 0 then "true" else "false" end')
echo "matrix=$matrix" >> "$GITHUB_OUTPUT"
echo "any=$any" >> "$GITHUB_OUTPUT"
echo "Languages selected: $matrix"

analyze:
name: Analyze (${{ matrix.language }})
needs: changes
if: ${{ needs.changes.outputs.any == 'true' }}
runs-on: ubuntu-22.04
permissions:
contents: read
security-events: write
packages: read
actions: read
strategy:
fail-fast: false
matrix: ${{ fromJSON(needs.changes.outputs.matrix) }}
steps:
- uses: actions/checkout@v6
- name: Initialize CodeQL
uses: github/codeql-action/init@v3
with:
languages: ${{ matrix.language }}
build-mode: ${{ matrix.build-mode }}
- name: Perform CodeQL Analysis
uses: github/codeql-action/analyze@v3
with:
category: "/language:${{ matrix.language }}"
6 changes: 6 additions & 0 deletions .github/workflows/crates-io.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,12 @@ jobs:
# right before publishing; cargo auto-detects a package-root README.md
# when no `readme` field is set. `--allow-dirty` tolerates the copy.
for d in crates/*/; do cp README.md "$d/README.md"; done
# Same trick for the `--doc` corpus: the languages/ tree lives at the
# repo root (outside the crate, so not in the published tarball by
# default). Copy it into the CLI crate so `cargo publish` packs it and
# `cargo install code-ranker` embeds the full corpus; build.rs prefers
# this package-local copy over the repo-root tree. `--allow-dirty` covers it.
cp -R languages crates/code-ranker-cli/languages
for c in code-ranker-plugin-api code-ranker-graph code-ranker-plugins code-ranker-viewer code-ranker; do
echo "==> publishing $c"
attempt=0
Expand Down
10 changes: 5 additions & 5 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 5 additions & 5 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ members = ["crates/*"]
resolver = "3"

[workspace.package]
version = "3.0.1"
version = "3.0.2"
edition = "2024"
rust-version = "1.88"
license = "Apache-2.0"
Expand All @@ -12,10 +12,10 @@ keywords = ["dependency-graph", "coupling", "refactoring", "code-quality", "stat
categories = ["development-tools", "command-line-utilities"]

[workspace.dependencies]
code-ranker-graph = { path = "crates/code-ranker-graph", version = "3.0.1" }
code-ranker-plugin-api = { path = "crates/code-ranker-plugin-api", version = "3.0.1" }
code-ranker-plugins = { path = "crates/code-ranker-plugins", version = "3.0.1" }
code-ranker-viewer = { path = "crates/code-ranker-viewer", version = "3.0.1" }
code-ranker-graph = { path = "crates/code-ranker-graph", version = "3.0.2" }
code-ranker-plugin-api = { path = "crates/code-ranker-plugin-api", version = "3.0.2" }
code-ranker-plugins = { path = "crates/code-ranker-plugins", version = "3.0.2" }
code-ranker-viewer = { path = "crates/code-ranker-viewer", version = "3.0.2" }

anyhow = "1.0"
cel = "0.13"
Expand Down
32 changes: 19 additions & 13 deletions crates/code-ranker-cli/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,14 @@
//! can serve a principle's Markdown (e.g. `--doc HK`) from the binary itself with
//! no filesystem at runtime. Dependency-free (no `include_dir` crate).
//!
//! The corpus lives at the repo root (`../../languages`), OUTSIDE this crate, so it
//! is NOT in the published crate tarball. A workspace build (the prebuilt binaries
//! shipped via the installer / npm / PyPI / Docker / GitHub Release) finds it and
//! embeds the full corpus; an ISOLATED build (`cargo publish` verify, or
//! `cargo install code-ranker` from crates.io source) won't — so the corpus is
//! resolved best-effort and absence yields an EMPTY corpus (never a build failure).
//! `--doc` then reports "not embedded" on such builds; everything else works.
//! The single source of truth lives at the repo root (`../../languages`), OUTSIDE
//! this crate. So that `cargo install code-ranker` from crates.io still embeds the
//! corpus, the publish workflow copies that tree into a package-local `languages/`
//! right before `cargo publish` (mirroring the per-crate README copy) — and this
//! build script prefers that package-local copy, falling back to the repo-root tree
//! for workspace/dev builds. If NEITHER exists (an unexpected isolated build) the
//! corpus resolves best-effort to EMPTY (never a build failure); `--doc` then reports
//! "not embedded" while everything else works.

use std::path::{Path, PathBuf};
use std::{env, fs};
Expand All @@ -20,9 +21,13 @@ fn main() {
let manifest = env::var("CARGO_MANIFEST_DIR").expect("CARGO_MANIFEST_DIR");

let mut entries: Vec<(String, PathBuf)> = Vec::new();
// Best-effort: a missing corpus (isolated/published build) is NOT an error —
// it must never break `cargo publish`/`cargo install`. See module docs.
match Path::new(&manifest).join("../../languages").canonicalize() {
// Prefer the package-local copy (present in the published tarball), else the
// repo-root tree (workspace/dev builds). Best-effort: a missing corpus is NOT
// an error — it must never break `cargo publish`/`cargo install`. See module docs.
let local = Path::new(&manifest).join("languages");
let root = Path::new(&manifest).join("../../languages");
let resolved = local.canonicalize().or_else(|_| root.canonicalize());
match resolved {
Ok(corpus) => {
// Re-run when the tree changes (added/removed files) and on any file edit.
println!("cargo:rerun-if-changed={}", corpus.display());
Expand All @@ -31,9 +36,10 @@ fn main() {
}
Err(_) => {
println!(
"cargo:warning=languages/ corpus not found (isolated build, e.g. \
`cargo install code-ranker` from crates.io) — embedding an empty corpus; \
`--doc` will report \"not embedded\". Prebuilt binaries embed the full corpus."
"cargo:warning=languages/ corpus not found at ./languages or ../../languages \
— embedding an empty corpus; `--doc` will report \"not embedded\". Published \
builds carry a package-local copy (see crates-io.yml); workspace builds use the \
repo-root tree."
);
}
}
Expand Down
2 changes: 1 addition & 1 deletion docs/DESIGN.md
Original file line number Diff line number Diff line change
Expand Up @@ -986,7 +986,7 @@ dictionaries with the structural graph and the computed cycles/stats:
"workspace": "/Users/alice/projects/code-ranker",
"target": "/Users/alice/projects/axum-api",
"plugin": "rust",
"versions": { "code-ranker": "3.0.1", "rustc": "1.78.0" },
"versions": { "code-ranker": "3.0.2", "rustc": "1.78.0" },
"roots": {
"registry": "/Users/alice/.cargo/registry/src/index.crates.io-abc123",
"target": "/Users/alice/projects/axum-api"
Expand Down
4 changes: 2 additions & 2 deletions docs/PRD.md
Original file line number Diff line number Diff line change
Expand Up @@ -269,7 +269,7 @@ bundles its semantics dictionaries with the structural graph and computed data
"target": "/Users/alice/projects/axum-api",
"plugin": "rust",
"config_file": "/Users/alice/projects/axum-api/code-ranker.toml",
"versions": { "code-ranker": "3.0.1", "rustc": "1.78.0" },
"versions": { "code-ranker": "3.0.2", "rustc": "1.78.0" },
"roots": {
"registry": "/Users/alice/.cargo/registry/src/index.crates.io-abc123",
"target": "/Users/alice/projects/axum-api"
Expand Down Expand Up @@ -729,7 +729,7 @@ can render any language/metric set without hardcoding names.
"workspace": "<absolute-path>",
"target": "<absolute-path>",
"plugin": "<plugin-id>",
"versions": { "code-ranker": "3.0.1", "rustc": "1.78.0" },
"versions": { "code-ranker": "3.0.2", "rustc": "1.78.0" },
"roots": { "target": "<abs>", "registry": "<abs>" },
"git": { "branch": "main", "commit": "a3f9c21b4d5e", "dirty_files": 0, "origin": "git@…:team/proj.git" },
"timings": [ { "stage": "rust", "ms": 0, "detail": "…" }, … ],
Expand Down
46 changes: 42 additions & 4 deletions docs/customization/cel-reference.md
Original file line number Diff line number Diff line change
Expand Up @@ -291,10 +291,18 @@ message = "test/source ratio too high ({tloc}/{sloc})"
when = 'depends_on("ext:sqlx")'
message = "imports the sqlx crate directly"

# Check: list macros over the dependency set
[rules.checks.wide_hub]
when = "deps.size() > 20"
message = "depends on {fan_out} modules — a coupling hub"
# Check: a list-comprehension macro over the dependency set. `filter` is the
# macro; `size()` is the collection function that counts the result. (A bare
# `deps.size() > 20` needs no macro and just equals `fan_out > 20`.)
[rules.checks.wide_ext_hub]
when = 'deps.filter(d, d.startsWith("ext:")).size() > 20'
message = "{name}: depends on many external crates — a coupling hub"

# Metric: the same macro in a formula. Graph lists (`deps`/`files`/…) are
# checks-only (§4.2), so a metric macro runs over a *literal* list — here the
# file's own complexity signals — counting how many exceed a floor.
[metrics.complexity_signals]
formula_cel = "[cyclomatic, cognitive, branches].filter(x, x > 10.0).size().double()"

# Check: relative threshold (this node vs the project distribution)
[rules.checks.complexity_outlier]
Expand All @@ -304,6 +312,36 @@ message = "{name}: cyclomatic {cyclomatic} is in the project's worst 10%"
# Metric: branch on path (blank the metric for generated code)
[metrics.real_hk]
formula_cel = 'path.contains("/generated/") ? 0.0 : hk'

# Metrics: size-normalized complexity — branching *per 100 source lines*. A raw
# `cognitive`/`cyclomatic` count just tracks size; dividing by `sloc` measures
# DENSITY, in intuitive units (e.g. 42 = 42 points of cognitive load per 100 lines).
# Guard the divide (`sloc == 0 -> 0`).
[metrics.cognitive_per_100sloc]
formula_cel = "sloc > 0.0 ? cognitive / sloc * 100.0 : 0.0"

[metrics.cyclomatic_per_100sloc]
formula_cel = "sloc > 0.0 ? cyclomatic / sloc * 100.0 : 0.0"

# Check: a SHORT-but-DENSE file — the most complexity packed into the fewest lines,
# judged RELATIVE to this repo (no fixed number ports across codebases). Custom
# `[metrics]` are aggregatable, so we threshold each density against its own p90:
# 1. top-decile cognitive density cognitive_per_100sloc > p90
# 2. top-decile branching density cyclomatic_per_100sloc > p90
# 3. genuinely short sloc < project median → true density, not bulk
# (3) is what excludes large-and-dense files: a 200-line file can top the density
# deciles yet isn't "short". A multi-line `when` (TOML `'''…'''`) stays readable —
# CEL ignores the newlines; a node missing an attr just doesn't fire (never errors).
[rules.checks.dense_complexity]
when = '''
cognitive_per_100sloc > agg('cognitive_per_100sloc', 'p90', 'not_empty') &&
cyclomatic_per_100sloc > agg('cyclomatic_per_100sloc', 'p90', 'not_empty') &&
sloc.double() < agg('sloc', 'p50', 'not_empty')
'''
message = "{name}: dense complexity — {cognitive} cognitive / {cyclomatic} cyclomatic packed into {sloc} sloc (top-decile density for this repo)"
why = "High branching crammed into few lines reads as clever but is hard to follow and test."
fix = "Extract the nested branches into named helpers — trade a few more lines for lower per-line complexity."
group = "SRP"
```

---
Expand Down
Loading