ffedoroff · ffedoroff · Jun 28, 2026 · Jun 28, 2026 · Jun 28, 2026 · Jun 28, 2026
diff --git a/contrib/prompting-self-improve.md b/contrib/prompting-self-improve.md
@@ -66,9 +66,9 @@ of these and rebuild (see Setup) — all are baked into the binary:
   `crates/code-ranker-plugins/src/languages/<lang>/config.toml`).
 - **scaffolding** (intro / doc-note / task / focus prose) —
   `crates/code-ranker-graph/metrics/prompt.md`.
-- **the full reference doc** the agent reads via `docs <FOCUS>` —
+- **the full reference doc** the agent reads via `docs <lang> <FOCUS>` —
   `plugins/<lang>/<FOCUS>.md` (e.g. `ADP.md`), and the offline entry point
-  `plugins/base/AI.md` (`docs ai`).
+  `plugins/base/AI.md` (`docs <lang> ai`).
 
 Change the **smallest** lever that fixes the observed failure.
 
@@ -151,12 +151,12 @@ nothing eval-related is left in `PROJECT`.
 1. **Clean start.** `PROJECT` on `main`, working tree clean.
 2. **Fresh agent session**, model = `MODEL`, **empty context**. Bootstrap it with the
    offline playbook only — no extra hints: have it read
-   `code-ranker docs ai` (overview + catalog) and `docs <FOCUS>` (the deep
+   `code-ranker docs <lang> ai` (overview + catalog) and `docs <lang> <FOCUS>` (the deep
    doc). This is what a real user would do, so it tests the *prompt*, not your
    coaching.
-3. **BEFORE.** `code-ranker report . --output.html.path=$RUN/before.html --output.json.path=$RUN/before.json`.
+3. **BEFORE.** `code-ranker report . --plugins <lang> --output.html.path=$RUN/before.html --output.json.path=$RUN/before.json`.
 4. **Save the focused prompt** (orchestrator, for the record):
-   `code-ranker report . --prompt <FOCUS> > $RUN/prompt.md`
+   `code-ranker report . --plugins <lang> --prompt <FOCUS> > $RUN/prompt.md`
    — captures the exact fix-prompt this run used into `$RUN/prompt.md`, so prompt ↔
    behaviour stays correlatable across models.
 5. **Fix** (agent). Ask the agent to fix the single worst (`--top 1`) cycle and **let it
@@ -273,7 +273,7 @@ Layout (one build → one `<timestamp>_<CR_SHA>` folder → one subfolder per ru
 
 Each run is a **fresh session** of `MODEL` with **no carried context** — start a new
 one, never `--continue`/`--resume`. Keep `PROJECT` free of a code-ranker-specific
-`CLAUDE.md`/memory so only `docs ai` primes the agent; otherwise you're testing the
+`CLAUDE.md`/memory so only `docs <lang> ai` primes the agent; otherwise you're testing the
 priming, not the prompt.
 
 **Watch the agent's working directory.** Launch it *inside* `PROJECT` (the interactive
@@ -297,13 +297,13 @@ and note in `metrics.csv` which basis the run used.
 
   Then give it **one** opening message (the bootstrap), nothing else:
 
-  > Read `code-ranker docs ai`, then fix the worst `<FOCUS>` in this
+  > Read `code-ranker docs <lang> ai`, then fix the worst `<FOCUS>` in this
   > project. Show me the plan before changing code.
 
   Headless one-shot (scriptable, but weaker for the multi-step loop):
 
   ```sh
-  cd PROJECT && claude -p "Read \`code-ranker docs ai\`, then fix the worst <FOCUS>…" --model haiku
+  cd PROJECT && claude -p "Read \`code-ranker docs <lang> ai\`, then fix the worst <FOCUS>…" --model haiku
   ```
 
 - **Other agents** (Cursor, …): open a **New Chat** (not a continued thread), select
@@ -314,7 +314,7 @@ and note in `metrics.csv` which basis the run used.
 The transcript is the **primary tuning data** — it shows *where* a cheaper model
 diverged (skipped `docs`, picked the wrong cycle, hacked the metric). Save it raw,
 **verbatim, no summary**, into `$RUN/chat.*`. It must include the bootstrap
-(`docs ai` / `docs <FOCUS>` reads), the task, and **every** assistant turn — its
+(`docs <lang> ai` / `docs <lang> <FOCUS>` reads), the task, and **every** assistant turn — its
 reasoning **and** the tool calls (the `code-ranker` commands + their output), through
 the final fix and the test run.
 
@@ -360,7 +360,7 @@ Columns, grouped by objective (most are extractable from the run's artifacts; th
 | `api_duration_s` | cost | transcript | ↓ the **API-only subset** of `wall_s` (active model time, `result.duration_api_ms`). `wall_s − api_duration_s` ≈ local tool execution + queueing. Blank when there's no session `result` event (subagent log) |
 | `files_changed` | cost | diff | context — edit footprint (not better/worse alone) |
 | `loc_added` / `loc_removed` | cost | PROJECT branch `git diff --shortstat` | precise edit footprint; a fix far larger than the reference's is a smell (also catches committed litter) |
-| `read_doc_ai` / `read_doc_focus` | clarity | transcript | 1/0 — read `docs ai` / `docs <FOCUS>` |
+| `read_doc_ai` / `read_doc_focus` | clarity | transcript | 1/0 — read `docs <lang> ai` / `docs <lang> <FOCUS>` |
 | `doc_reread` | clarity | transcript | ↓ times a doc was read more than once (a re-read signals the prompt/doc wasn't clear the first time) |
 | `planned_before_edit` | clarity | transcript | 1/0 — proposed a plan before editing |
 | `used_generated_prompt` | adherence | transcript | 1/0 — actually fetched the tool's fix-prompt (`--prompt`) vs improvising |

diff --git a/crates/code-ranker-cli/src/config/rules.rs b/crates/code-ranker-cli/src/config/rules.rs
@@ -56,7 +56,9 @@ pub fn rule_doc(
         return Some(RuleDoc {
             title: c.label.clone(),
             why: c.description.clone(),
-            fix: c.remediation.clone(),
+            // `{lang}` in an authored remediation → the resolved language, so a
+            // `code-ranker docs {lang} ADP` pointer is runnable as printed.
+            fix: c.remediation.clone().map(|r| r.replace("{lang}", lang)),
         });
     }
     let metric = id.rsplit('.').next().unwrap_or(id);
@@ -66,11 +68,15 @@ pub fn rule_doc(
     // generates the AI fix-prompt for this metric, so the built-in catalog carries no
     // duplicated boilerplate and the command always names the correct subject
     // (`report --plugins <lang> --prompt <key>`).
-    let fix = s.remediation.clone().or_else(|| {
-        Some(format!(
-            "Run `code-ranker report --plugins {lang} --prompt {metric}` to generate an AI fix-prompt."
-        ))
-    });
+    let fix = s
+        .remediation
+        .clone()
+        .map(|r| r.replace("{lang}", lang))
+        .or_else(|| {
+            Some(format!(
+                "Run `code-ranker report --plugins {lang} --prompt {metric}` to generate an AI fix-prompt."
+            ))
+        });
     Some(RuleDoc {
         title: s.name.clone().or_else(|| s.label.clone()),
         why: s.description.clone(),

diff --git a/crates/code-ranker-cli/src/docs.rs b/crates/code-ranker-cli/src/docs.rs
@@ -76,53 +76,66 @@ pub(crate) fn run(
 
     // `docs <lang> ai` → the offline AI-agent playbook.
     if subject.is_some_and(|s| templates::normalize_id(s) == "ai") {
-        emit(templates::ai_doc()?);
+        emit(templates::ai_doc(language)?, language);
         return Ok(());
     }
 
     let specs = build_specs(language, cfg);
 
     let Some(subject) = subject else {
         // `docs <lang>`: the full subject catalog for that language.
-        print!(
-            "{}",
-            templates::with_trailing_newline(render_catalog(&specs, language, None))
-        );
+        emit(render_catalog(&specs, language, None), language);
         return Ok(());
     };
 
     // Every subject is matched on its normalized form (case/separator-insensitive),
     // so `fan_in`, `Fan-in`, and `FAN in` all resolve the same metric.
     let want = templates::normalize_id(subject);
     if want == "metrics" {
-        emit(render_metrics_index(&specs));
+        emit(render_metrics_index(&specs), language);
     } else if want == "principles" {
-        emit(render_principles_index(&specs));
+        emit(render_principles_index(&specs), language);
     } else if let Some(cat) = category_key(&specs, subject) {
-        emit(render_category(&specs, &cat));
+        emit(render_category(&specs, &cat), language);
     } else if let Some(p) = specs
         .principles
         .iter()
         .find(|p| templates::normalize_id(&p.id) == want)
     {
-        emit(render_principle(&specs, &p.id)?);
+        emit(render_principle(&specs, &p.id)?, language);
     } else if let Some(key) = specs
         .node_attributes
         .keys()
         .find(|k| templates::normalize_id(k) == want)
     {
-        emit(render_metric(&specs, key));
+        emit(render_metric(&specs, key), language);
     } else {
         // Unknown subject: print the catalog so the caller sees every option, then
         // fail (non-zero) — it was a real lookup miss, not a help request.
-        emit(render_catalog(&specs, language, Some(subject)));
+        emit(render_catalog(&specs, language, Some(subject)), language);
         bail!("unknown docs subject {subject:?} for language {language:?} — see the list above");
     }
     Ok(())
 }
 
-fn emit(md: String) {
-    print!("{}", templates::with_trailing_newline(md));
+fn emit(md: String, lang: &str) {
+    print!(
+        "{}",
+        templates::with_trailing_newline(localize_lang(md, lang))
+    );
+}
+
+/// Make instructional `<lang>` placeholders concrete in served per-language docs, so
+/// commands print runnable as-is (`docs rust hk`, `--plugins rust`). `base` is the
+/// language-agnostic catalog, so its generic `<lang>` stays a placeholder. Every
+/// `docs`-command hint is written with the literal `<lang>` token and localized
+/// here at emit time — one substitution point for the whole served doc.
+fn localize_lang(md: String, lang: &str) -> String {
+    if lang == "base" {
+        md
+    } else {
+        md.replace("<lang>", lang)
+    }
 }
 
 /// `base` (the language-agnostic catalog) or any registered plugin name.
@@ -401,31 +414,32 @@ fn principles_block(specs: &DocSpecs) -> String {
         .collect()
 }
 
-/// `docs metrics`: every metric, grouped by category.
+/// `docs <lang> metrics`: every metric, grouped by category. The `<lang>` hint is
+/// localized at emit time (concrete language, or kept generic for `base`).
 fn render_metrics_index(specs: &DocSpecs) -> String {
     format!(
-        "Metrics — print one with `code-ranker docs <metric>`:\n{}",
+        "Metrics — print one with `code-ranker docs <lang> <metric>`:\n{}",
         categories_block(specs)
     )
 }
 
-/// `docs principles`: every design principle.
+/// `docs <lang> principles`: every design principle.
 fn render_principles_index(specs: &DocSpecs) -> String {
     format!(
-        "Principles — print one with `code-ranker docs <ID>`:\n\n{}",
+        "Principles — print one with `code-ranker docs <lang> <ID>`:\n\n{}",
         principles_block(specs)
     )
 }
 
-/// `docs <category>`: the category's human label + description + its member metrics.
+/// `docs <lang> <category>`: the category's human label + description + its member metrics.
 fn render_category(specs: &DocSpecs, key: &str) -> String {
     // Single-category view: the human label is the title (the key was just typed),
     // so there is no `key: Label` echo.
     let mut out = category_label(specs, key);
     if let Some(d) = specs.groups.get(key).and_then(|g| g.description.as_deref()) {
         out.push_str(&format!("\n{d}"));
     }
-    out.push_str("\n\nMetrics — print one with `code-ranker docs <metric>`:\n");
+    out.push_str("\n\nMetrics — print one with `code-ranker docs <lang> <metric>`:\n");
     for (k, spec) in metrics_in_category(specs, key) {
         out.push_str(&format!("  - {k}: {}", metric_name(spec, k)));
         if let Some(d) = spec.description.as_deref() {

diff --git a/crates/code-ranker-cli/src/docs_test.rs b/crates/code-ranker-cli/src/docs_test.rs
@@ -134,6 +134,21 @@ fn principles_index_lists_each_principle() {
     assert!(out.contains("- TSR: Test Ratio"), "principle listed: {out}");
 }
 
+/// Index hints carry the generic `<lang>` token; `emit` → `localize_lang` makes it
+/// concrete for a real plugin and keeps `<lang>` for `base` (covered by
+/// `localize_lang_substitutes_concrete_language_but_not_base`).
+#[test]
+fn index_hints_use_generic_lang_placeholder() {
+    assert!(
+        render_metrics_index(&specs()).contains("`code-ranker docs <lang> <metric>`"),
+        "metrics index hint"
+    );
+    assert!(
+        render_principles_index(&specs()).contains("`code-ranker docs <lang> <ID>`"),
+        "principles index hint"
+    );
+}
+
 #[test]
 fn principles_block_reports_when_the_plugin_defines_none() {
     let mut s = specs();
@@ -237,6 +252,24 @@ fn build_specs_base_uses_neutral_catalog() {
     );
 }
 
+/// Served per-language docs make `<lang>` placeholders concrete so commands print
+/// runnable; the language-agnostic `base` catalog keeps the placeholder.
+#[test]
+fn localize_lang_substitutes_concrete_language_but_not_base() {
+    assert_eq!(
+        localize_lang(
+            "`code-ranker docs <lang> hk` then `--plugins <lang>`".into(),
+            "rust",
+        ),
+        "`code-ranker docs rust hk` then `--plugins rust`"
+    );
+    assert_eq!(
+        localize_lang("--plugins <lang>".into(), "base"),
+        "--plugins <lang>",
+        "base keeps the generic placeholder"
+    );
+}
+
 /// With no language markers present, `languages_hint` lists every available
 /// language rather than the project's detected set.
 #[test]

diff --git a/crates/code-ranker-cli/src/recommend.rs b/crates/code-ranker-cli/src/recommend.rs
@@ -24,7 +24,9 @@ use code_ranker_plugin_api::{
 };
 use std::collections::HashMap;
 
-/// Select the `LanguageSnapshot` to use for recommendations.
+/// Select the `LanguageSnapshot` to use for recommendations, returning both the
+/// resolved language KEY and its snapshot — callers need the name to render
+/// per-language commands (e.g. `code-ranker docs <lang> <id>` in a prompt).
 ///
 /// Resolution order:
 /// 1. `--language` explicitly given → use that language or error.
@@ -37,23 +39,28 @@ pub fn resolve_language_snap<'a>(
     snap: &'a Snapshot,
     language: Option<&str>,
     id: Option<&str>,
-) -> Result<&'a LanguageSnapshot> {
+) -> Result<(&'a str, &'a LanguageSnapshot)> {
     // Explicit `--language` always wins. Resolve an alias (`js` → `javascript`)
     // to the canonical key the snapshot stores under.
     if let Some(lang) = language {
         let canon = crate::plugin::to_canonical(lang);
-        return snap.languages.get(&canon).with_context(|| {
-            let available: Vec<&str> = snap.languages.keys().map(String::as_str).collect();
-            format!(
-                "language {lang:?} not found in snapshot; available: {}",
-                available.join(", ")
-            )
-        });
+        return snap
+            .languages
+            .get_key_value(&canon)
+            .map(|(k, v)| (k.as_str(), v))
+            .with_context(|| {
+                let available: Vec<&str> = snap.languages.keys().map(String::as_str).collect();
+                format!(
+                    "language {lang:?} not found in snapshot; available: {}",
+                    available.join(", ")
+                )
+            });
     }
 
     // Single language: no ambiguity.
     if snap.languages.len() == 1 {
-        return Ok(snap.languages.values().next().expect("len==1"));
+        let (k, v) = snap.languages.iter().next().expect("len==1");
+        return Ok((k.as_str(), v));
     }
 
     // Multiple languages: try to resolve the id across all of them.
@@ -73,7 +80,13 @@ pub fn resolve_language_snap<'a>(
             .collect();
 
         match matches.as_slice() {
-            [one] => return Ok(snap.languages.get(*one).expect("key from languages")),
+            [one] => {
+                let (k, v) = snap
+                    .languages
+                    .get_key_value(*one)
+                    .expect("key from languages");
+                return Ok((k.as_str(), v));
+            }
             [] => {} // fall through to first-language default
             langs => anyhow::bail!(
                 "{focus_id:?} found in languages: {}; specify --language <name> to disambiguate",
@@ -84,8 +97,9 @@ pub fn resolve_language_snap<'a>(
 
     // Fall back to the first language (BTreeMap order, deterministic).
     snap.languages
-        .values()
+        .iter()
         .next()
+        .map(|(k, v)| (k.as_str(), v))
         .context("snapshot has no languages; regenerate the report with `code-ranker report`")
 }