From b9c593f1ceae515533d61559c16ee6ac482e2c78 Mon Sep 17 00:00:00 2001
From: VooDisss <www.rickaxas@gmail.com>
Date: Wed, 25 Feb 2026 12:03:20 +0200
Subject: [PATCH 1/3] feat: implement rpg_analyze_health tool - Code Health
 Meter

Implements a new MCP tool providing code health metrics based on the
Code Health Meter (CHM) framework from the research paper.

## New Files

- health.rs: Graph health metrics (instability, centrality, god objects)
- duplication.rs: Token-based (Rabin-Karp) and semantic (Jaccard) clone detection

## Key Features

- Instability index: I = Ce / (Ca + Ce)
- Degree centrality (normalized)
- God object detection (high degree + extreme instability)
- Rabin-Karp rolling hash for Type-1/Type-2 clone detection
- Jaccard similarity on lifted features for Type-3/Type-4 detection
- LLM-friendly output via TOON formatter

## Testing

- 17 unit tests in duplication module
- 6 unit tests in health module

## References

- Inspired by: Khalfallah, B. H. (2025). Code Health Meter.
  ACM Trans. Softw. Eng. Methodol. https://doi.org/10.1145/3737670
---
 crates/rpg-mcp/src/params.rs      |   16 +
 crates/rpg-mcp/src/tools.rs       |   33 +
 crates/rpg-nav/Cargo.toml         |    1 +
 crates/rpg-nav/src/duplication.rs | 1071 +++++++++++++++++++++++++++++
 crates/rpg-nav/src/health.rs      |  587 ++++++++++++++++
 crates/rpg-nav/src/lib.rs         |    5 +-
 crates/rpg-nav/src/search.rs      |    5 +-
 crates/rpg-nav/src/toon.rs        |  150 ++++
 8 files changed, 1864 insertions(+), 4 deletions(-)
 create mode 100644 crates/rpg-nav/src/duplication.rs
 create mode 100644 crates/rpg-nav/src/health.rs
diff --git a/crates/rpg-mcp/src/params.rs b/crates/rpg-mcp/src/params.rs
index 34af32a..37bd1cc 100644
--- a/crates/rpg-mcp/src/params.rs
+++ b/crates/rpg-mcp/src/params.rs
@@ -210,3 +210,19 @@ pub(crate) struct SliceBetweenParams {
     /// Include entity metadata (name, file, features) in output
     pub(crate) include_metadata: Option<bool>,
 }
+
+/// Parameters for the `analyze_health` tool.
+#[derive(Debug, Deserialize, JsonSchema)]
+pub(crate) struct AnalyzeHealthParams {
+    /// Instability threshold above which entities are flagged as highly unstable (default: 0.7).
+    pub(crate) instability_threshold: Option<f64>,
+    /// Minimum total degree for god object detection (default: 10).
+    pub(crate) god_object_threshold: Option<usize>,
+    /// Run Rabin-Karp token-based clone detection (reads source files from disk, slower). Default: false.
+    pub(crate) include_duplication: Option<bool>,
+    /// Run Jaccard feature-based semantic clone detection (in-memory, fast).
+    /// Requires entities to have been lifted. Default: false.
+    pub(crate) include_semantic_duplication: Option<bool>,
+    /// Jaccard similarity threshold for semantic clone detection (default: 0.6).
+    pub(crate) semantic_similarity_threshold: Option<f64>,
+}
diff --git a/crates/rpg-mcp/src/tools.rs b/crates/rpg-mcp/src/tools.rs
index d03e344..74a8292 100644
--- a/crates/rpg-mcp/src/tools.rs
+++ b/crates/rpg-mcp/src/tools.rs
@@ -2883,6 +2883,39 @@ impl RpgServer {
 
         Ok(result)
     }
+
+    #[tool(
+        description = "Analyze code health metrics including coupling, instability, centrality, and potential god objects. Returns entities with architectural issues and recommendations for refactoring. Set include_duplication=true to detect code clones via Rabin-Karp fingerprinting (reads source files, slower). Set include_semantic_duplication=true to detect conceptual duplicates via Jaccard similarity on lifted features (in-memory, fast; requires entities to be lifted)."
+    )]
+    async fn analyze_health(
+        &self,
+        Parameters(params): Parameters<AnalyzeHealthParams>,
+    ) -> Result<String, String> {
+        self.ensure_graph().await?;
+        let notice = self.staleness_notice().await;
+        let guard = self.graph.read().await;
+        let graph = guard.as_ref().unwrap();
+
+        let config = rpg_nav::health::HealthConfig {
+            instability_threshold: params.instability_threshold.unwrap_or(0.7),
+            god_object_degree_threshold: params.god_object_threshold.unwrap_or(10),
+            include_duplication: params.include_duplication.unwrap_or(false),
+            include_semantic_duplication: params.include_semantic_duplication.unwrap_or(false),
+            semantic_duplication_config: rpg_nav::duplication::SemanticDuplicationConfig {
+                similarity_threshold: params.semantic_similarity_threshold.unwrap_or(0.6),
+                ..Default::default()
+            },
+            ..Default::default()
+        };
+
+        let report = rpg_nav::health::compute_health_full(graph, &self.project_root, &config);
+
+        Ok(format!(
+            "{}{}",
+            notice,
+            rpg_nav::toon::format_health_report(&report)
+        ))
+    }
 }
 
 impl RpgServer {
diff --git a/crates/rpg-nav/Cargo.toml b/crates/rpg-nav/Cargo.toml
index f672d25..fec44c0 100644
--- a/crates/rpg-nav/Cargo.toml
+++ b/crates/rpg-nav/Cargo.toml
@@ -19,6 +19,7 @@ strsim.workspace = true
 toon-format.workspace = true
 serde.workspace = true
 serde_json.workspace = true
+rayon.workspace = true
 fastembed = { workspace = true, optional = true }
 
 [dev-dependencies]
diff --git a/crates/rpg-nav/src/duplication.rs b/crates/rpg-nav/src/duplication.rs
new file mode 100644
index 0000000..d7ab017
--- /dev/null
+++ b/crates/rpg-nav/src/duplication.rs
@@ -0,0 +1,1071 @@
+//! Duplication detection via Rabin-Karp rolling hash fingerprinting.
+//!
+//! Implements the CHM (Code Health Meter) duplication analysis from paper.md §3.4:
+//! - Tokenization: strip whitespace/comments, normalize identifiers
+//! - Rolling hash: Rabin-Karp fingerprinting with configurable window size
+//! - Clone detection: HashMap collision-based fingerprint matching
+//!
+//! This approach is language-agnostic and detects Type-1 (exact) and Type-2 (renamed) clones.
+
+use crate::search::jaccard_similarity;
+use rpg_core::graph::{EntityKind, RPGraph};
+use std::collections::{HashMap, HashSet};
+use std::path::Path;
+
+/// Base multiplier for rolling hash (per paper: typically 256)
+const HASH_BASE: u64 = 256;
+
+/// Large prime modulus to prevent overflow (per paper: 10^9 + 7)
+const HASH_MOD: u64 = 1_000_000_007;
+
+/// Default window size in tokens for entity-level fingerprinting.
+/// Lowered from 50 (file-level) to 20 to catch function-sized duplicates.
+const DEFAULT_WINDOW_SIZE: usize = 20;
+
+/// Minimum duplicate length in tokens to report (filters noise).
+/// Lowered from 30 (file-level) to 15 for entity-level snippets.
+const MIN_DUPLICATE_TOKENS: usize = 15;
+
+/// A detected clone group with high similarity.
+#[derive(Debug, Clone, serde::Serialize)]
+pub struct CloneGroup {
+    /// Entity IDs participating in this clone group
+    pub entities: Vec<String>,
+    /// Similarity coefficient (0.0 - 1.0)
+    pub similarity: f64,
+    /// Estimated duplicated token count
+    pub duplicated_tokens: usize,
+    /// File paths involved
+    pub files: Vec<String>,
+}
+
+/// Configuration for duplication detection.
+#[derive(Debug, Clone)]
+pub struct DuplicationConfig {
+    /// Window size in tokens for fingerprinting
+    pub window_size: usize,
+    /// Minimum tokens to consider as a duplicate
+    pub min_tokens: usize,
+    /// Minimum similarity threshold to report (0.0 - 1.0)
+    pub similarity_threshold: f64,
+}
+
+impl Default for DuplicationConfig {
+    fn default() -> Self {
+        Self {
+            window_size: DEFAULT_WINDOW_SIZE,
+            min_tokens: MIN_DUPLICATE_TOKENS,
+            similarity_threshold: 0.7,
+        }
+    }
+}
+
+/// A detected group of conceptual duplicates identified via feature-set Jaccard similarity.
+#[derive(Debug, Clone, serde::Serialize)]
+pub struct SemanticCloneGroup {
+    /// Entity IDs in this group
+    pub entities: Vec<String>,
+    /// Jaccard similarity of feature sets: |A ∩ B| / |A ∪ B|
+    pub similarity: f64,
+    /// Shared features that caused the match
+    pub shared_features: Vec<String>,
+    /// File paths (parallel to `entities`)
+    pub files: Vec<String>,
+}
+
+/// Configuration for semantic (feature-based Jaccard) duplication detection.
+#[derive(Debug, Clone)]
+pub struct SemanticDuplicationConfig {
+    /// Jaccard threshold above which pairs are flagged as conceptual duplicates (default: 0.6).
+    pub similarity_threshold: f64,
+    /// Minimum number of features an entity must have to participate (default: 1).
+    pub min_features: usize,
+    /// Skip pairs from the same source file — cross-file duplicates are more actionable (default: true).
+    pub skip_same_file: bool,
+    /// Skip features appearing in more than this many entities; too generic to be discriminative (default: 20).
+    pub max_feature_frequency: usize,
+    /// Maximum number of groups to return (default: 50).
+    pub max_results: usize,
+}
+
+impl Default for SemanticDuplicationConfig {
+    fn default() -> Self {
+        Self {
+            similarity_threshold: 0.6,
+            min_features: 1,
+            skip_same_file: true,
+            max_feature_frequency: 20,
+            max_results: 50,
+        }
+    }
+}
+
+/// Detect conceptual duplicates by comparing entity semantic feature sets via Jaccard similarity.
+///
+/// Unlike token-based clone detection, this operates entirely on in-memory `entity.semantic_features`
+/// (verb-object phrases from LLM lifting) and requires no disk I/O.
+///
+/// Uses an inverted index to avoid O(n²) pair generation: only entity pairs sharing at
+/// least one feature are considered candidates, reducing work dramatically on large graphs.
+pub fn detect_semantic_duplicates(
+    graph: &RPGraph,
+    config: &SemanticDuplicationConfig,
+) -> Vec<SemanticCloneGroup> {
+    // Step 1: Collect eligible entities (exclude Modules, require min_features)
+    let eligible: Vec<(&String, &str, &[String])> = graph
+        .entities
+        .iter()
+        .filter(|(_, e)| {
+            e.kind != EntityKind::Module && e.semantic_features.len() >= config.min_features
+        })
+        .map(|(id, e)| {
+            let file = e.file.to_str().unwrap_or("");
+            (id, file, e.semantic_features.as_slice())
+        })
+        .collect();
+
+    if eligible.len() < 2 {
+        return Vec::new();
+    }
+
+    // Step 2: Build inverted index: feature → Vec<eligible_index>
+    // Skip features that appear in too many entities (too generic to be useful)
+    let mut feature_freq: HashMap<&str, usize> = HashMap::new();
+    for (_, _, features) in &eligible {
+        for f in *features {
+            *feature_freq.entry(f.as_str()).or_insert(0) += 1;
+        }
+    }
+
+    let mut inverted: HashMap<&str, Vec<usize>> = HashMap::new();
+    for (idx, (_, _, features)) in eligible.iter().enumerate() {
+        for f in *features {
+            if feature_freq.get(f.as_str()).copied().unwrap_or(0) <= config.max_feature_frequency {
+                inverted.entry(f.as_str()).or_default().push(idx);
+            }
+        }
+    }
+
+    // Step 3: Collect candidate pairs that share at least one feature
+    let mut shared_counts: HashMap<(usize, usize), usize> = HashMap::new();
+    for indices in inverted.values() {
+        if indices.len() < 2 {
+            continue;
+        }
+        for i in 0..indices.len() {
+            for j in (i + 1)..indices.len() {
+                let a = indices[i].min(indices[j]);
+                let b = indices[i].max(indices[j]);
+                *shared_counts.entry((a, b)).or_insert(0) += 1;
+            }
+        }
+    }
+
+    // Step 4: Compute exact Jaccard for candidates and filter by threshold
+    let mut groups: Vec<SemanticCloneGroup> = Vec::new();
+    for ((a_idx, b_idx), shared_count) in &shared_counts {
+        let (a_id, a_file, a_features) = eligible[*a_idx];
+        let (b_id, b_file, b_features) = eligible[*b_idx];
+
+        // Early bail: shared / max(|A|, |B|) is an upper bound on Jaccard
+        let upper_bound = *shared_count as f64 / a_features.len().max(b_features.len()) as f64;
+        if upper_bound < config.similarity_threshold {
+            continue;
+        }
+
+        if config.skip_same_file && a_file == b_file {
+            continue;
+        }
+
+        let a_set: HashSet<&str> = a_features.iter().map(|s| s.as_str()).collect();
+        let b_set: HashSet<&str> = b_features.iter().map(|s| s.as_str()).collect();
+        let sim = jaccard_similarity(&a_set, &b_set);
+
+        if sim < config.similarity_threshold {
+            continue;
+        }
+
+        let mut shared: Vec<String> = a_set
+            .intersection(&b_set)
+            .map(|s| (*s).to_string())
+            .collect();
+        shared.sort();
+
+        groups.push(SemanticCloneGroup {
+            entities: vec![a_id.clone(), b_id.clone()],
+            similarity: (sim * 1000.0).round() / 1000.0,
+            shared_features: shared,
+            files: vec![a_file.to_string(), b_file.to_string()],
+        });
+    }
+
+    // Step 5: Sort by similarity descending, cap results
+    groups.sort_by(|a, b| {
+        b.similarity
+            .partial_cmp(&a.similarity)
+            .unwrap_or(std::cmp::Ordering::Equal)
+    });
+    groups.truncate(config.max_results);
+    groups
+}
+
+/// Token type for normalized code representation.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
+enum TokenType {
+    /// Identifier (variable, function, class name) - normalized
+    Identifier,
+    /// Keyword (if, else, fn, let, etc.)
+    Keyword,
+    /// Operator (+, -, *, /, =, etc.)
+    Operator,
+    /// Literal (number, string - replaced with placeholder)
+    Literal,
+    /// Punctuation ({, }, (, ), ;, etc.)
+    Punctuation,
+}
+
+/// A normalized token for fingerprinting.
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+struct Token {
+    kind: TokenType,
+    value: u64,
+}
+
+/// Tokenize source code into normalized tokens.
+///
+/// Per paper §3.4: strip whitespace/comments, normalize identifiers,
+/// replace literals with placeholders for Type-2 clone detection.
+fn tokenize(source: &str) -> Vec<Token> {
+    let mut tokens = Vec::new();
+    let mut chars = source.chars().peekable();
+
+    while let Some(&ch) = chars.peek() {
+        match ch {
+            // Skip whitespace
+            ' ' | '\t' | '\n' | '\r' => {
+                chars.next();
+            }
+            // Single-line comment
+            '/' if chars.clone().nth(1) == Some('/') => {
+                chars.next();
+                chars.next();
+                while let Some(&c) = chars.peek() {
+                    if c == '\n' {
+                        break;
+                    }
+                    chars.next();
+                }
+            }
+            // Multi-line comment (Rust-style)
+            '/' if chars.clone().nth(1) == Some('*') => {
+                chars.next();
+                chars.next();
+                while let Some(&c) = chars.peek() {
+                    if c == '*' && chars.clone().nth(1) == Some('/') {
+                        chars.next();
+                        chars.next();
+                        break;
+                    }
+                    chars.next();
+                }
+            }
+            // String literal
+            '"' | '\'' => {
+                let quote = ch;
+                chars.next();
+                while let Some(&c) = chars.peek() {
+                    chars.next();
+                    if c == quote {
+                        break;
+                    }
+                    if c == '\\' {
+                        chars.next();
+                    }
+                }
+                tokens.push(Token {
+                    kind: TokenType::Literal,
+                    value: hash_str("LIT"),
+                });
+            }
+            // Number literal
+            '0'..='9' => {
+                while let Some(&c) = chars.peek() {
+                    if c.is_ascii_digit()
+                        || c == '.'
+                        || c == 'x'
+                        || c == 'X'
+                        || c == 'e'
+                        || c == 'E'
+                    {
+                        chars.next();
+                    } else {
+                        break;
+                    }
+                }
+                tokens.push(Token {
+                    kind: TokenType::Literal,
+                    value: hash_str("LIT"),
+                });
+            }
+            // Identifier or keyword
+            'a'..='z' | 'A'..='Z' | '_' => {
+                let mut ident = String::new();
+                while let Some(&c) = chars.peek() {
+                    if c.is_ascii_alphanumeric() || c == '_' {
+                        ident.push(c);
+                        chars.next();
+                    } else {
+                        break;
+                    }
+                }
+                let kind = if is_keyword(&ident) {
+                    TokenType::Keyword
+                } else {
+                    TokenType::Identifier
+                };
+                // Normalize identifiers: hash by kind, not by name (Type-2 detection)
+                tokens.push(Token {
+                    kind,
+                    value: if kind == TokenType::Keyword {
+                        hash_str(&ident)
+                    } else {
+                        hash_str("ID")
+                    },
+                });
+            }
+            // Operators (multi-char first)
+            '<' | '>' | '=' | '!' | '&' | '|' | '+' | '-' | '*' | '/' | '%' | '^' => {
+                let mut op = String::new();
+                op.push(chars.next().unwrap());
+                // Check for two-char operators
+                if let Some(&c) = chars.peek() {
+                    if matches!(c, '=' | '&' | '|' | '<' | '>' | '+') {
+                        op.push(c);
+                        chars.next();
+                    }
+                }
+                tokens.push(Token {
+                    kind: TokenType::Operator,
+                    value: hash_str(&op),
+                });
+            }
+            // Punctuation
+            '{' | '}' | '(' | ')' | '[' | ']' | ';' | ':' | ',' | '.' | '#' | '@' | '~' | '?' => {
+                tokens.push(Token {
+                    kind: TokenType::Punctuation,
+                    value: hash_str(&ch.to_string()),
+                });
+                chars.next();
+            }
+            // Unknown - skip
+            _ => {
+                chars.next();
+            }
+        }
+    }
+
+    tokens
+}
+
+/// Check if a string is a programming language keyword.
+#[allow(clippy::match_same_arms)]
+fn is_keyword(s: &str) -> bool {
+    matches!(
+        s,
+        // Rust
+        "fn" | "let" | "mut" | "const" | "static" | "pub" | "mod" | "use" | "crate" | "self"
+        | "Self" | "super" | "struct" | "enum" | "impl" | "trait" | "type" | "where" | "async"
+        | "await" | "move" | "ref" | "match" | "if" | "else" | "loop" | "while" | "for" | "in"
+        | "return" | "break" | "continue" | "unsafe" | "extern" | "dyn" | "as"
+        // TypeScript/JavaScript
+        | "function" | "var" | "class" | "interface" | "extends" | "implements" | "import"
+        | "export" | "from" | "default" | "new" | "this" | "typeof" | "instanceof" | "void"
+        | "null" | "undefined" | "true" | "false" | "try" | "catch" | "finally" | "throw"
+        | "switch" | "case" | "do" | "delete" | "yield" | "constructor" | "readonly"
+        // Python
+        | "def" | "lambda" | "pass" | "raise" | "except" | "with" | "assert" | "global"
+        | "nonlocal" | "print" | "elif"
+        // Go
+        | "package" | "go" | "chan" | "select" | "defer" | "fallthrough" | "goto" | "range"
+        | "map" | "make" | "append" | "copy"
+        // Java
+        | "public" | "private" | "protected" | "final" | "abstract" | "synchronized"
+        | "volatile" | "transient" | "native" | "strictfp" | "throws"
+        // C/C++
+        | "int" | "char" | "float" | "double" | "long" | "short" | "unsigned" | "signed"
+        | "auto" | "register" | "inline" | "restrict" | "sizeof" | "typedef"
+    )
+}
+
+/// Hash a string to a u64 value.
+fn hash_str(s: &str) -> u64 {
+    let mut hash: u64 = 0;
+    for byte in s.bytes() {
+        hash = (hash.wrapping_mul(HASH_BASE).wrapping_add(u64::from(byte))) % HASH_MOD;
+    }
+    hash
+}
+
+/// Compute Rabin-Karp fingerprints for a token stream.
+///
+/// Per paper Algorithm 4: slide a window of size w over tokens,
+/// computing rolling hash for each window position.
+fn compute_fingerprints(tokens: &[Token], window_size: usize) -> Vec<u64> {
+    if tokens.len() < window_size {
+        return Vec::new();
+    }
+
+    let mut fingerprints = Vec::with_capacity(tokens.len() - window_size + 1);
+
+    // Pre-compute base^(window_size - 1) mod MOD for rolling hash.
+    // Use iterative modular exponentiation to avoid u64 overflow (256^49 >> u64::MAX).
+    let base_pow: u64 = (0..window_size - 1).fold(1u64, |acc, _| (acc * HASH_BASE) % HASH_MOD);
+
+    // Compute initial window hash
+    let mut hash: u64 = 0;
+    for token in tokens.iter().take(window_size) {
+        hash = (hash.wrapping_mul(HASH_BASE).wrapping_add(token.value)) % HASH_MOD;
+    }
+    fingerprints.push(hash);
+
+    // Roll the window
+    for i in window_size..tokens.len() {
+        // Remove leftmost token's contribution
+        let left_val = (tokens[i - window_size].value * base_pow) % HASH_MOD;
+        hash = (hash + HASH_MOD - left_val) % HASH_MOD;
+        // Add new token
+        hash = (hash.wrapping_mul(HASH_BASE).wrapping_add(tokens[i].value)) % HASH_MOD;
+        fingerprints.push(hash);
+    }
+
+    fingerprints
+}
+
+/// Entity with its source code and fingerprints.
+#[derive(Debug)]
+struct EntityFingerprints {
+    entity_id: String,
+    file: String,
+    fps: Vec<u64>,
+    token_count: usize,
+}
+
+/// Detect duplication across entities in the graph.
+///
+/// Per paper §3.4: compute fingerprints for each entity, store in HashMap,
+/// find collisions indicating potential clones.
+pub fn detect_duplication(
+    graph: &RPGraph,
+    project_root: &Path,
+    config: &DuplicationConfig,
+) -> Vec<CloneGroup> {
+    use rayon::prelude::*;
+
+    // Collect entities to analyze (skip Module entities)
+    let entities: Vec<_> = graph
+        .entities
+        .iter()
+        .filter(|(_, e)| e.kind != EntityKind::Module)
+        .collect();
+
+    // Phase 1: Cache file contents (read each file once, shared across entities)
+    let file_contents: HashMap<std::path::PathBuf, String> = {
+        let unique_files: HashSet<std::path::PathBuf> = entities
+            .iter()
+            .map(|(_, e)| project_root.join(&e.file))
+            .collect();
+        unique_files
+            .into_iter()
+            .filter_map(|p| std::fs::read_to_string(&p).ok().map(|s| (p, s)))
+            .collect()
+    };
+
+    // Phase 2: Per-entity tokenization using line ranges
+    let entity_fps: Vec<EntityFingerprints> = entities
+        .par_iter()
+        .filter_map(|(id, entity)| {
+            let file_path = project_root.join(&entity.file);
+            let source = file_contents.get(&file_path)?;
+
+            // Extract only the entity's source lines (1-indexed → 0-indexed)
+            let lines: Vec<&str> = source.lines().collect();
+            let start = entity.line_start.saturating_sub(1);
+            let end = entity.line_end.min(lines.len());
+            if start >= end {
+                return None;
+            }
+            let entity_source = lines[start..end].join("\n");
+
+            let tokens = tokenize(&entity_source);
+            if tokens.len() < config.min_tokens {
+                return None;
+            }
+
+            let fingerprints = compute_fingerprints(&tokens, config.window_size);
+            if fingerprints.is_empty() {
+                return None;
+            }
+
+            Some(EntityFingerprints {
+                entity_id: (*id).to_string(),
+                file: entity.file.display().to_string(),
+                fps: fingerprints,
+                token_count: tokens.len(),
+            })
+        })
+        .collect();
+
+    // Build fingerprint -> entity mapping (find collisions)
+    let mut fingerprint_map: HashMap<u64, Vec<usize>> = HashMap::new();
+    for (idx, ef) in entity_fps.iter().enumerate() {
+        for &fp in &ef.fps {
+            fingerprint_map.entry(fp).or_default().push(idx);
+        }
+    }
+
+    // Find entity pairs with high fingerprint overlap.
+    // Deduplicate indices per fingerprint: the same entity can produce many
+    // matching windows for a single fingerprint value, so we must count each
+    // (entity_a, entity_b) pair at most once per fingerprint to keep
+    // similarity ≤ 1.0.
+    let mut pair_scores: HashMap<(usize, usize), usize> = HashMap::new();
+    for indices in fingerprint_map.values() {
+        if indices.len() < 2 {
+            continue;
+        }
+        // Unique entity indices that share this fingerprint
+        let unique: Vec<usize> = {
+            let mut set: Vec<usize> = indices.clone();
+            set.sort_unstable();
+            set.dedup();
+            set
+        };
+        if unique.len() < 2 {
+            continue;
+        }
+        for i in 0..unique.len() {
+            for j in (i + 1)..unique.len() {
+                let a = unique[i]; // already sorted
+                let b = unique[j];
+                *pair_scores.entry((a, b)).or_insert(0) += 1;
+            }
+        }
+    }
+
+    // Convert to similarity and filter by threshold
+    let mut clone_groups: Vec<CloneGroup> = Vec::new();
+    for ((a, b), shared) in pair_scores {
+        let ef_a = &entity_fps[a];
+        let ef_b = &entity_fps[b];
+
+        // Duplication coefficient: shared fingerprints / min(fp_a, fp_b)
+        let min_fps = ef_a.fps.len().min(ef_b.fps.len());
+        if min_fps == 0 {
+            continue;
+        }
+
+        let similarity = shared as f64 / min_fps as f64;
+        if similarity < config.similarity_threshold {
+            continue;
+        }
+
+        // Estimate duplicated tokens
+        let ratio = (shared as f64 / ef_a.fps.len().max(1) as f64).clamp(0.0, 1.0);
+        #[allow(clippy::cast_sign_loss)] // ratio is clamped to [0,1]; result is non-negative
+        let duplicated_tokens = (ratio * ef_a.token_count as f64).round() as usize;
+
+        if duplicated_tokens < config.min_tokens {
+            continue;
+        }
+
+        clone_groups.push(CloneGroup {
+            entities: vec![ef_a.entity_id.clone(), ef_b.entity_id.clone()],
+            similarity: (similarity * 1000.0).round() / 1000.0,
+            duplicated_tokens,
+            files: vec![ef_a.file.clone(), ef_b.file.clone()],
+        });
+    }
+
+    // Sort by similarity descending
+    clone_groups.sort_by(|a, b| {
+        b.similarity
+            .partial_cmp(&a.similarity)
+            .unwrap_or(std::cmp::Ordering::Equal)
+    });
+
+    // Limit to top 50 groups to avoid overwhelming output
+    clone_groups.truncate(50);
+
+    clone_groups
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_tokenize_simple() {
+        let source = "fn foo() { let x = 1; }";
+        let tokens = tokenize(source);
+
+        // Should have tokens for: fn, foo, (, ), {, let, x, =, 1, ;, }
+        assert!(!tokens.is_empty());
+        assert!(tokens.len() >= 8);
+    }
+
+    #[test]
+    fn test_tokenize_normalizes_identifiers() {
+        let source1 = "let foo = 1;";
+        let source2 = "let bar = 2;";
+
+        let tokens1 = tokenize(source1);
+        let tokens2 = tokenize(source2);
+
+        // Both should have same token sequence (identifiers normalized to "ID")
+        let values1: Vec<_> = tokens1.iter().map(|t| t.value).collect();
+        let values2: Vec<_> = tokens2.iter().map(|t| t.value).collect();
+
+        // Keywords and structure should match
+        assert_eq!(tokens1.len(), tokens2.len());
+        assert_eq!(values1, values2);
+    }
+
+    #[test]
+    fn test_tokenize_strips_comments() {
+        let source = "fn foo() { /* comment */ let x = 1; }\n// line comment\nlet y = 2;";
+        let tokens = tokenize(source);
+
+        // Comments should be stripped
+        let token_values: Vec<_> = tokens.iter().map(|t| t.value).collect();
+        assert!(!token_values.iter().any(|&v| v == hash_str("comment")));
+    }
+
+    #[test]
+    fn test_tokenize_normalizes_literals() {
+        let source1 = "let x = 42;";
+        let source2 = "let x = 999999;";
+
+        let tokens1 = tokenize(source1);
+        let tokens2 = tokenize(source2);
+
+        // Literals should both be normalized to same value
+        let lit1 = tokens1.iter().find(|t| t.kind == TokenType::Literal);
+        let lit2 = tokens2.iter().find(|t| t.kind == TokenType::Literal);
+
+        assert_eq!(lit1.map(|t| t.value), lit2.map(|t| t.value));
+    }
+
+    #[test]
+    fn test_fingerprints_deterministic() {
+        let source = "fn foo() { let x = 1; let y = 2; return x + y; }";
+        let tokens = tokenize(source);
+
+        let fp1 = compute_fingerprints(&tokens, 10);
+        let fp2 = compute_fingerprints(&tokens, 10);
+
+        assert_eq!(fp1, fp2);
+    }
+
+    #[test]
+    fn test_fingerprints_empty_for_short_input() {
+        let source = "fn";
+        let tokens = tokenize(source);
+
+        let fp = compute_fingerprints(&tokens, 10);
+
+        assert!(fp.is_empty());
+    }
+
+    #[test]
+    fn test_identical_code_high_similarity() {
+        let source = r"
+            fn calculate_total(items: &[Item]) -> f64 {
+                let mut total = 0.0;
+                for item in items {
+                    total += item.price * item.quantity;
+                }
+                total
+            }
+        ";
+
+        let tokens = tokenize(source);
+        let fps = compute_fingerprints(&tokens, DEFAULT_WINDOW_SIZE);
+
+        // Same code should have matching fingerprints
+        let tokens2 = tokenize(source);
+        let fps2 = compute_fingerprints(&tokens2, DEFAULT_WINDOW_SIZE);
+
+        assert_eq!(fps, fps2);
+    }
+
+    #[test]
+    fn test_type2_clone_detection() {
+        // Type-2: same structure, renamed identifiers
+        let source1 = r"
+            fn process_data(input: &str) -> String {
+                let result = input.to_uppercase();
+                result.trim().to_string()
+            }
+        ";
+
+        let source2 = r"
+            fn handle_text(data: &str) -> String {
+                let output = data.to_uppercase();
+                output.trim().to_string()
+            }
+        ";
+
+        let tokens1 = tokenize(source1);
+        let tokens2 = tokenize(source2);
+
+        // Structure should be identical after normalization
+        let values1: Vec<_> = tokens1.iter().map(|t| t.value).collect();
+        let values2: Vec<_> = tokens2.iter().map(|t| t.value).collect();
+
+        assert_eq!(
+            values1, values2,
+            "Type-2 clones should normalize to same tokens"
+        );
+    }
+
+    #[test]
+    fn test_is_keyword() {
+        assert!(is_keyword("fn"));
+        assert!(is_keyword("let"));
+        assert!(is_keyword("function"));
+        assert!(is_keyword("class"));
+        assert!(is_keyword("def"));
+        assert!(!is_keyword("my_function"));
+        assert!(!is_keyword("MyClass"));
+        assert!(!is_keyword("variable_name"));
+    }
+
+    // --- Semantic duplication tests ---
+
+    use rpg_core::graph::{Entity, EntityDeps};
+    use std::path::PathBuf;
+
+    fn make_entity_with_features(id: &str, file: &str, features: Vec<&str>) -> Entity {
+        Entity {
+            id: id.to_string(),
+            kind: EntityKind::Function,
+            name: id.to_string(),
+            file: PathBuf::from(file),
+            line_start: 1,
+            line_end: 10,
+            parent_class: None,
+            semantic_features: features.into_iter().map(|s| s.to_string()).collect(),
+            feature_source: Some("llm".to_string()),
+            hierarchy_path: String::new(),
+            deps: EntityDeps::default(),
+            signature: None,
+        }
+    }
+
+    #[test]
+    fn test_semantic_duplicates_identical_features() {
+        let mut graph = RPGraph::new("rust");
+        graph.entities.insert(
+            "src/a.rs:process".to_string(),
+            make_entity_with_features(
+                "src/a.rs:process",
+                "src/a.rs",
+                vec!["validate input", "handle error"],
+            ),
+        );
+        graph.entities.insert(
+            "src/b.rs:handle".to_string(),
+            make_entity_with_features(
+                "src/b.rs:handle",
+                "src/b.rs",
+                vec!["validate input", "handle error"],
+            ),
+        );
+
+        let config = SemanticDuplicationConfig {
+            similarity_threshold: 0.6,
+            skip_same_file: true,
+            ..Default::default()
+        };
+        let groups = detect_semantic_duplicates(&graph, &config);
+
+        assert_eq!(groups.len(), 1);
+        assert!((groups[0].similarity - 1.0).abs() < 0.001);
+        assert_eq!(groups[0].shared_features.len(), 2);
+    }
+
+    #[test]
+    fn test_semantic_duplicates_skips_same_file() {
+        let mut graph = RPGraph::new("rust");
+        graph.entities.insert(
+            "src/a.rs:foo".to_string(),
+            make_entity_with_features(
+                "src/a.rs:foo",
+                "src/a.rs",
+                vec!["validate input", "return result"],
+            ),
+        );
+        graph.entities.insert(
+            "src/a.rs:bar".to_string(),
+            make_entity_with_features(
+                "src/a.rs:bar",
+                "src/a.rs",
+                vec!["validate input", "return result"],
+            ),
+        );
+
+        // skip_same_file=true should suppress the pair
+        let config = SemanticDuplicationConfig {
+            similarity_threshold: 0.5,
+            skip_same_file: true,
+            ..Default::default()
+        };
+        assert!(detect_semantic_duplicates(&graph, &config).is_empty());
+
+        // skip_same_file=false should surface it
+        let config2 = SemanticDuplicationConfig {
+            similarity_threshold: 0.5,
+            skip_same_file: false,
+            ..Default::default()
+        };
+        assert_eq!(detect_semantic_duplicates(&graph, &config2).len(), 1);
+    }
+
+    #[test]
+    fn test_semantic_duplicates_skips_unlifted_entities() {
+        let mut graph = RPGraph::new("rust");
+        // Unlifted entity (no features) — must not participate
+        graph.entities.insert(
+            "src/a.rs:empty".to_string(),
+            make_entity_with_features("src/a.rs:empty", "src/a.rs", vec![]),
+        );
+        // Two lifted entities from different files with same feature
+        graph.entities.insert(
+            "src/b.rs:lifted_one".to_string(),
+            make_entity_with_features("src/b.rs:lifted_one", "src/b.rs", vec!["handle request"]),
+        );
+        graph.entities.insert(
+            "src/c.rs:lifted_two".to_string(),
+            make_entity_with_features("src/c.rs:lifted_two", "src/c.rs", vec!["handle request"]),
+        );
+
+        let config = SemanticDuplicationConfig {
+            similarity_threshold: 0.9,
+            min_features: 1,
+            ..Default::default()
+        };
+        let groups = detect_semantic_duplicates(&graph, &config);
+
+        // Only the two lifted entities should match; the unlifted one must not appear
+        assert_eq!(groups.len(), 1);
+        assert!(
+            !groups[0].entities.contains(&"src/a.rs:empty".to_string()),
+            "unlifted entity must not appear in semantic clone groups"
+        );
+    }
+
+    // --- Per-entity token-based detection tests ---
+
+    /// Helper: create an Entity with specific line range (for detect_duplication tests).
+    fn make_entity_at_lines(
+        id: &str,
+        file: &str,
+        line_start: usize,
+        line_end: usize,
+    ) -> Entity {
+        Entity {
+            id: id.to_string(),
+            kind: EntityKind::Function,
+            name: id.to_string(),
+            file: PathBuf::from(file),
+            line_start,
+            line_end,
+            parent_class: None,
+            semantic_features: Vec::new(),
+            feature_source: None,
+            hierarchy_path: String::new(),
+            deps: EntityDeps::default(),
+            signature: None,
+        }
+    }
+
+    #[test]
+    fn test_detect_duplication_identical_functions() {
+        // Two files each containing the same function at known line ranges.
+        // detect_duplication should find them as a clone pair.
+        let dir = tempfile::tempdir().unwrap();
+
+        let func_code = r#"fn looks_like_custom_hook(name: &str) -> bool {
+    if !name.starts_with("use") || name.len() <= 3 {
+        return false;
+    }
+    name.chars().nth(3).is_some_and(|c| c.is_ascii_uppercase())
+}
+"#;
+        // File A: function at lines 1-6
+        let file_a = dir.path().join("a.rs");
+        std::fs::write(&file_a, func_code).unwrap();
+
+        // File B: preamble + same function at lines 3-8
+        let file_b = dir.path().join("b.rs");
+        std::fs::write(
+            &file_b,
+            format!("// preamble\nuse std::io;\n{}", func_code),
+        )
+        .unwrap();
+
+        let mut graph = RPGraph::new("rust");
+        graph.entities.insert(
+            "a.rs:looks_like_custom_hook".to_string(),
+            make_entity_at_lines("a.rs:looks_like_custom_hook", "a.rs", 1, 6),
+        );
+        graph.entities.insert(
+            "b.rs:looks_like_custom_hook".to_string(),
+            make_entity_at_lines("b.rs:looks_like_custom_hook", "b.rs", 3, 8),
+        );
+
+        let config = DuplicationConfig {
+            window_size: 10,
+            min_tokens: 10,
+            similarity_threshold: 0.5,
+        };
+        let groups = detect_duplication(&graph, dir.path(), &config);
+
+        assert!(
+            !groups.is_empty(),
+            "identical functions across files must be detected as clones"
+        );
+        assert!(
+            groups[0].similarity <= 1.0,
+            "similarity must not exceed 1.0, got {}",
+            groups[0].similarity
+        );
+        assert!(
+            groups[0].similarity > 0.7,
+            "identical functions should have high similarity, got {}",
+            groups[0].similarity
+        );
+    }
+
+    #[test]
+    fn test_detect_duplication_similarity_bounded() {
+        // Ensure the dedup fix keeps similarity ≤ 1.0 even with many fingerprint collisions.
+        let dir = tempfile::tempdir().unwrap();
+
+        // Two entities with the EXACT same source → shared fingerprints == min fingerprints
+        let source = "fn compute(x: i32, y: i32) -> i32 { let result = x + y; result * result }\n";
+        let file_a = dir.path().join("x.rs");
+        let file_b = dir.path().join("y.rs");
+        std::fs::write(&file_a, source).unwrap();
+        std::fs::write(&file_b, source).unwrap();
+
+        let mut graph = RPGraph::new("rust");
+        graph.entities.insert(
+            "x.rs:compute".to_string(),
+            make_entity_at_lines("x.rs:compute", "x.rs", 1, 1),
+        );
+        graph.entities.insert(
+            "y.rs:compute".to_string(),
+            make_entity_at_lines("y.rs:compute", "y.rs", 1, 1),
+        );
+
+        let config = DuplicationConfig {
+            window_size: 5,
+            min_tokens: 5,
+            similarity_threshold: 0.1,
+        };
+        let groups = detect_duplication(&graph, dir.path(), &config);
+
+        for group in &groups {
+            assert!(
+                group.similarity <= 1.0,
+                "similarity must be ≤ 1.0 after dedup fix, got {}",
+                group.similarity
+            );
+        }
+    }
+
+    #[test]
+    fn test_detect_duplication_no_clones_for_different_code() {
+        // Two completely different functions should NOT be reported as clones.
+        let dir = tempfile::tempdir().unwrap();
+
+        let file_a = dir.path().join("add.rs");
+        std::fs::write(&file_a, "fn add(a: i32, b: i32) -> i32 { a + b }\n").unwrap();
+
+        let file_b = dir.path().join("greet.rs");
+        std::fs::write(
+            &file_b,
+            "fn greet(name: &str) -> String { format!(\"Hello, {}!\", name) }\n",
+        )
+        .unwrap();
+
+        let mut graph = RPGraph::new("rust");
+        graph.entities.insert(
+            "add.rs:add".to_string(),
+            make_entity_at_lines("add.rs:add", "add.rs", 1, 1),
+        );
+        graph.entities.insert(
+            "greet.rs:greet".to_string(),
+            make_entity_at_lines("greet.rs:greet", "greet.rs", 1, 1),
+        );
+
+        let config = DuplicationConfig {
+            window_size: 5,
+            min_tokens: 5,
+            similarity_threshold: 0.7,
+        };
+        let groups = detect_duplication(&graph, dir.path(), &config);
+
+        assert!(
+            groups.is_empty(),
+            "completely different functions should not be reported as clones"
+        );
+    }
+
+    #[test]
+    fn test_detect_duplication_invalid_line_range() {
+        // Entity with line_start > line_end or beyond file length → no panic, just skipped.
+        let dir = tempfile::tempdir().unwrap();
+
+        let file_a = dir.path().join("short.rs");
+        std::fs::write(&file_a, "fn tiny() {}\n").unwrap(); // 1 line
+
+        let mut graph = RPGraph::new("rust");
+        // line_start beyond file length
+        graph.entities.insert(
+            "short.rs:far".to_string(),
+            make_entity_at_lines("short.rs:far", "short.rs", 100, 200),
+        );
+        // line_start = 0 (edge: saturating_sub converts to 0-indexed start of 0)
+        graph.entities.insert(
+            "short.rs:zero".to_string(),
+            make_entity_at_lines("short.rs:zero", "short.rs", 0, 1),
+        );
+
+        let config = DuplicationConfig::default();
+        // Must not panic
+        let groups = detect_duplication(&graph, dir.path(), &config);
+        // No meaningful pairs expected from degenerate ranges
+        assert!(groups.is_empty() || groups.iter().all(|g| g.similarity <= 1.0));
+    }
+
+    #[test]
+    fn test_detect_duplication_missing_file() {
+        // Entity referencing a non-existent file → gracefully skipped, no panic.
+        let dir = tempfile::tempdir().unwrap();
+
+        let mut graph = RPGraph::new("rust");
+        graph.entities.insert(
+            "gone.rs:phantom".to_string(),
+            make_entity_at_lines("gone.rs:phantom", "gone.rs", 1, 10),
+        );
+
+        let config = DuplicationConfig::default();
+        let groups = detect_duplication(&graph, dir.path(), &config);
+        assert!(
+            groups.is_empty(),
+            "missing files should be skipped, not cause errors"
+        );
+    }
+}
diff --git a/crates/rpg-nav/src/health.rs b/crates/rpg-nav/src/health.rs
new file mode 100644
index 0000000..c48f081
--- /dev/null
+++ b/crates/rpg-nav/src/health.rs
@@ -0,0 +1,587 @@
+//! Health analysis: coupling, instability, centrality, and god object detection.
+//!
+//! Implements the CHM (Code Health Meter) metrics from the paper:
+//! - In-degree (afferent coupling, Ca)
+//! - Out-degree (efferent coupling, Ce)
+//! - Instability index I = Ce / (Ca + Ce)
+//! - Degree centrality (normalized)
+//! - God Object heuristic (high degree + extreme instability)
+
+use crate::duplication::{
+    CloneGroup, DuplicationConfig, SemanticCloneGroup, SemanticDuplicationConfig,
+    detect_duplication, detect_semantic_duplicates,
+};
+use rpg_core::graph::{EdgeKind, EntityKind, RPGraph};
+use serde::Serialize;
+use std::collections::HashMap;
+use std::path::Path;
+
+/// Edge kinds that represent dependency relationships (not structural containment).
+const DEPENDENCY_EDGE_KINDS: &[EdgeKind] = &[
+    EdgeKind::Imports,
+    EdgeKind::Invokes,
+    EdgeKind::Inherits,
+    EdgeKind::Composes,
+    EdgeKind::Renders,
+    EdgeKind::ReadsState,
+    EdgeKind::WritesState,
+    EdgeKind::Dispatches,
+    EdgeKind::DataFlow,
+];
+
+/// A health issue detected for an entity.
+#[derive(Debug, Clone, Serialize, PartialEq)]
+#[serde(rename_all = "snake_case")]
+pub enum HealthIssue {
+    /// Entity has high total degree and extreme instability (god object).
+    PotentialGodObject {
+        total_degree: usize,
+        instability: f64,
+    },
+    /// Entity has high instability (> threshold), indicating it's dependent on many.
+    HighlyUnstable { instability: f64, out_degree: usize },
+    /// Entity has very low instability (< 0.3), indicating it's depended on by many.
+    HighlyStable { instability: f64, in_degree: usize },
+    /// Entity has high total degree (hub).
+    HubEntity { total_degree: usize },
+}
+
+/// Health metrics for a single entity.
+#[derive(Debug, Clone, Serialize)]
+pub struct EntityHealth {
+    pub entity_id: String,
+    pub name: String,
+    pub file: String,
+    pub kind: String,
+    /// Afferent coupling (Ca): number of incoming dependency edges.
+    pub in_degree: usize,
+    /// Efferent coupling (Ce): number of outgoing dependency edges.
+    pub out_degree: usize,
+    /// Instability index: Ce / (Ca + Ce). Range [0, 1].
+    /// I ≈ 1: unstable (depends on many)
+    /// I ≈ 0: stable (depended on by many)
+    pub instability: f64,
+    /// Degree centrality: total_degree / (n - 1), where n = total entities.
+    pub centrality: f64,
+    /// Detected health issues for this entity.
+    #[serde(skip_serializing_if = "Vec::is_empty")]
+    pub issues: Vec<HealthIssue>,
+}
+
+/// Aggregate health statistics for the codebase.
+#[derive(Debug, Clone, Serialize)]
+pub struct HealthSummary {
+    pub total_entities: usize,
+    pub analyzed_entities: usize,
+    pub total_dependency_edges: usize,
+    pub avg_in_degree: f64,
+    pub avg_out_degree: f64,
+    pub avg_instability: f64,
+    pub avg_centrality: f64,
+    pub god_object_count: usize,
+    pub highly_unstable_count: usize,
+    pub highly_stable_count: usize,
+    pub hub_count: usize,
+}
+
+/// Complete health analysis report.
+#[derive(Debug, Clone, Serialize)]
+pub struct HealthReport {
+    pub summary: HealthSummary,
+    pub entities: Vec<EntityHealth>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub duplicates: Option<Vec<CloneGroup>>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub semantic_duplicates: Option<Vec<SemanticCloneGroup>>,
+    #[serde(skip_serializing_if = "Vec::is_empty")]
+    pub top_unstable: Vec<EntityHealth>,
+    #[serde(skip_serializing_if = "Vec::is_empty")]
+    pub top_god_objects: Vec<EntityHealth>,
+}
+
+/// Configuration for health analysis.
+#[derive(Debug, Clone)]
+pub struct HealthConfig {
+    /// Instability threshold for flagging highly unstable entities.
+    pub instability_threshold: f64,
+    /// Minimum total degree to consider as a hub.
+    pub hub_threshold: usize,
+    /// Minimum total degree for god object detection.
+    pub god_object_degree_threshold: usize,
+    /// Instability extreme threshold for god object (must be > this or < 1-this).
+    pub god_object_instability_threshold: f64,
+    /// Maximum entities to include in top lists.
+    pub top_n: usize,
+    /// Include token-based duplication detection (reads source files from disk, slower).
+    pub include_duplication: bool,
+    /// Duplication detection config.
+    pub duplication_config: DuplicationConfig,
+    /// Include semantic duplication detection via Jaccard similarity on lifted features (in-memory, fast).
+    pub include_semantic_duplication: bool,
+    /// Semantic duplication detection config.
+    pub semantic_duplication_config: SemanticDuplicationConfig,
+}
+
+impl Default for HealthConfig {
+    fn default() -> Self {
+        Self {
+            instability_threshold: 0.7,
+            hub_threshold: 8,
+            god_object_degree_threshold: 10,
+            god_object_instability_threshold: 0.7,
+            top_n: 10,
+            include_duplication: false,
+            duplication_config: DuplicationConfig::default(),
+            include_semantic_duplication: false,
+            semantic_duplication_config: SemanticDuplicationConfig::default(),
+        }
+    }
+}
+
+/// Compute health metrics for all entities in the graph.
+pub fn compute_health(graph: &RPGraph, config: &HealthConfig) -> HealthReport {
+    let total_entities = graph.entities.len();
+    let n = total_entities;
+    let normalizer = if n > 1 { (n - 1) as f64 } else { 1.0 };
+
+    // Count dependency edges (exclude Contains)
+    let total_dependency_edges = graph
+        .edges
+        .iter()
+        .filter(|e| e.kind != EdgeKind::Contains)
+        .count();
+
+    // Compute in-degree and out-degree for each entity
+    let mut in_degrees: HashMap<&str, usize> = HashMap::with_capacity(total_entities);
+    let mut out_degrees: HashMap<&str, usize> = HashMap::with_capacity(total_entities);
+
+    for edge in &graph.edges {
+        if !DEPENDENCY_EDGE_KINDS.contains(&edge.kind) {
+            continue;
+        }
+        *out_degrees.entry(edge.source.as_str()).or_insert(0) += 1;
+        *in_degrees.entry(edge.target.as_str()).or_insert(0) += 1;
+    }
+
+    // Build entity health records
+    let mut entities: Vec<EntityHealth> = Vec::with_capacity(total_entities);
+    let mut god_object_count = 0usize;
+    let mut highly_unstable_count = 0usize;
+    let mut highly_stable_count = 0usize;
+    let mut hub_count = 0usize;
+
+    for (id, entity) in &graph.entities {
+        // Skip Module entities (file-level) for analysis
+        if entity.kind == EntityKind::Module {
+            continue;
+        }
+
+        let in_degree = *in_degrees.get(id.as_str()).unwrap_or(&0);
+        let out_degree = *out_degrees.get(id.as_str()).unwrap_or(&0);
+        let total_degree = in_degree + out_degree;
+
+        // Instability: Ce / (Ca + Ce)
+        // Handle edge case where both are 0
+        let instability = if total_degree == 0 {
+            0.0
+        } else {
+            out_degree as f64 / total_degree as f64
+        };
+
+        // Degree centrality (normalized)
+        let centrality = total_degree as f64 / normalizer;
+
+        // Detect issues
+        let mut issues = Vec::new();
+
+        // God Object: high degree + extreme instability
+        if total_degree >= config.god_object_degree_threshold
+            && (instability > config.god_object_instability_threshold
+                || instability < (1.0 - config.god_object_instability_threshold))
+        {
+            issues.push(HealthIssue::PotentialGodObject {
+                total_degree,
+                instability,
+            });
+            god_object_count += 1;
+        }
+
+        // High instability
+        if instability > config.instability_threshold && out_degree > 0 {
+            issues.push(HealthIssue::HighlyUnstable {
+                instability,
+                out_degree,
+            });
+            highly_unstable_count += 1;
+        }
+
+        // High stability (depended on by many)
+        if instability < (1.0 - config.instability_threshold) && in_degree > 0 {
+            issues.push(HealthIssue::HighlyStable {
+                instability,
+                in_degree,
+            });
+            highly_stable_count += 1;
+        }
+
+        // Hub entity
+        if total_degree >= config.hub_threshold {
+            issues.push(HealthIssue::HubEntity { total_degree });
+            hub_count += 1;
+        }
+
+        entities.push(EntityHealth {
+            entity_id: id.clone(),
+            name: entity.name.clone(),
+            file: entity.file.display().to_string(),
+            kind: format!("{:?}", entity.kind).to_lowercase(),
+            in_degree,
+            out_degree,
+            instability: clean_float(instability),
+            centrality: clean_float(centrality),
+            issues,
+        });
+    }
+
+    // Compute summary statistics
+    let analyzed = entities.len();
+    let total_in: usize = entities.iter().map(|e| e.in_degree).sum();
+    let total_out: usize = entities.iter().map(|e| e.out_degree).sum();
+    let total_instability: f64 = entities.iter().map(|e| e.instability).sum();
+    let total_centrality: f64 = entities.iter().map(|e| e.centrality).sum();
+
+    let avg_in_degree = if analyzed > 0 {
+        total_in as f64 / analyzed as f64
+    } else {
+        0.0
+    };
+    let avg_out_degree = if analyzed > 0 {
+        total_out as f64 / analyzed as f64
+    } else {
+        0.0
+    };
+    let avg_instability = if analyzed > 0 {
+        total_instability / analyzed as f64
+    } else {
+        0.0
+    };
+    let avg_centrality = if analyzed > 0 {
+        total_centrality / analyzed as f64
+    } else {
+        0.0
+    };
+
+    // Sort by instability for top unstable
+    let mut sorted_by_instability = entities.clone();
+    sorted_by_instability.sort_by(|a, b| {
+        b.instability
+            .partial_cmp(&a.instability)
+            .unwrap_or(std::cmp::Ordering::Equal)
+    });
+    let top_unstable: Vec<EntityHealth> = sorted_by_instability
+        .into_iter()
+        .filter(|e| e.instability > config.instability_threshold)
+        .take(config.top_n)
+        .collect();
+
+    // Sort by god object score for top god objects
+    let mut sorted_by_god = entities.clone();
+    sorted_by_god.sort_by(|a, b| {
+        let a_score = a
+            .issues
+            .iter()
+            .filter(|i| matches!(i, HealthIssue::PotentialGodObject { .. }))
+            .count();
+        let b_score = b
+            .issues
+            .iter()
+            .filter(|i| matches!(i, HealthIssue::PotentialGodObject { .. }))
+            .count();
+        b_score.cmp(&a_score).then_with(|| {
+            let a_degree: usize = a.in_degree + a.out_degree;
+            let b_degree: usize = b.in_degree + b.out_degree;
+            b_degree.cmp(&a_degree)
+        })
+    });
+    let top_god_objects: Vec<EntityHealth> = sorted_by_god
+        .into_iter()
+        .filter(|e| {
+            e.issues
+                .iter()
+                .any(|i| matches!(i, HealthIssue::PotentialGodObject { .. }))
+        })
+        .take(config.top_n)
+        .collect();
+
+    let summary = HealthSummary {
+        total_entities,
+        analyzed_entities: analyzed,
+        total_dependency_edges,
+        avg_in_degree: clean_float(avg_in_degree),
+        avg_out_degree: clean_float(avg_out_degree),
+        avg_instability: clean_float(avg_instability),
+        avg_centrality: clean_float(avg_centrality),
+        god_object_count,
+        highly_unstable_count,
+        highly_stable_count,
+        hub_count,
+    };
+
+    // Sort entities by entity_id for deterministic output
+    entities.sort_by(|a, b| a.entity_id.cmp(&b.entity_id));
+
+    HealthReport {
+        summary,
+        entities,
+        duplicates: None,
+        semantic_duplicates: None,
+        top_unstable,
+        top_god_objects,
+    }
+}
+
+/// Compute health metrics with optional duplication detection.
+/// This is the main entry point for MCP tool.
+pub fn compute_health_full(
+    graph: &RPGraph,
+    project_root: &Path,
+    config: &HealthConfig,
+) -> HealthReport {
+    let mut report = compute_health(graph, config);
+
+    if config.include_duplication {
+        report.duplicates = Some(detect_duplication(
+            graph,
+            project_root,
+            &config.duplication_config,
+        ));
+    }
+
+    if config.include_semantic_duplication {
+        report.semantic_duplicates = Some(detect_semantic_duplicates(
+            graph,
+            &config.semantic_duplication_config,
+        ));
+    }
+
+    report
+}
+
+/// Clean a float: NaN/Infinity → 0, round to 6 decimals.
+fn clean_float(v: f64) -> f64 {
+    if v.is_nan() || v.is_infinite() {
+        return 0.0;
+    }
+    (v * 1_000_000.0).round() / 1_000_000.0
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use rpg_core::graph::{DependencyEdge, Entity, EntityDeps};
+    use std::path::PathBuf;
+
+    fn make_entity(id: &str, name: &str, kind: EntityKind) -> Entity {
+        Entity {
+            id: id.to_string(),
+            kind,
+            name: name.to_string(),
+            file: PathBuf::from("src/lib.rs"),
+            line_start: 1,
+            line_end: 5,
+            parent_class: None,
+            semantic_features: vec![],
+            feature_source: None,
+            hierarchy_path: String::new(),
+            deps: EntityDeps::default(),
+            signature: None,
+        }
+    }
+
+    fn make_test_graph() -> RPGraph {
+        // A -> B -> C (linear chain via Invokes)
+        // A -> C (direct edge)
+        // Total edges: A has out_degree=2, in_degree=0
+        //              B has out_degree=1, in_degree=1
+        //              C has out_degree=0, in_degree=2
+        let mut graph = RPGraph::new("rust");
+        graph.entities.insert(
+            "a".to_string(),
+            make_entity("a", "fn_a", EntityKind::Function),
+        );
+        graph.entities.insert(
+            "b".to_string(),
+            make_entity("b", "fn_b", EntityKind::Function),
+        );
+        graph.entities.insert(
+            "c".to_string(),
+            make_entity("c", "fn_c", EntityKind::Function),
+        );
+        graph.edges = vec![
+            DependencyEdge {
+                source: "a".to_string(),
+                target: "b".to_string(),
+                kind: EdgeKind::Invokes,
+            },
+            DependencyEdge {
+                source: "a".to_string(),
+                target: "c".to_string(),
+                kind: EdgeKind::Invokes,
+            },
+            DependencyEdge {
+                source: "b".to_string(),
+                target: "c".to_string(),
+                kind: EdgeKind::Invokes,
+            },
+        ];
+        graph.refresh_metadata();
+        graph
+    }
+
+    #[test]
+    fn test_compute_health_linear_chain() {
+        let graph = make_test_graph();
+        let config = HealthConfig::default();
+        let report = compute_health(&graph, &config);
+
+        assert_eq!(report.summary.analyzed_entities, 3);
+        assert_eq!(report.summary.total_dependency_edges, 3);
+
+        // Find entity A
+        let a = report.entities.iter().find(|e| e.entity_id == "a").unwrap();
+        assert_eq!(a.in_degree, 0);
+        assert_eq!(a.out_degree, 2);
+        assert!((a.instability - 1.0).abs() < 0.001); // Fully unstable
+
+        // Find entity C
+        let c = report.entities.iter().find(|e| e.entity_id == "c").unwrap();
+        assert_eq!(c.in_degree, 2);
+        assert_eq!(c.out_degree, 0);
+        assert!((c.instability - 0.0).abs() < 0.001); // Fully stable
+
+        // Find entity B
+        let b = report.entities.iter().find(|e| e.entity_id == "b").unwrap();
+        assert_eq!(b.in_degree, 1);
+        assert_eq!(b.out_degree, 1);
+        assert!((b.instability - 0.5).abs() < 0.001); // Balanced
+    }
+
+    #[test]
+    fn test_god_object_detection() {
+        let mut graph = RPGraph::new("rust");
+
+        // Create a god object with 12 edges
+        graph.entities.insert(
+            "god".to_string(),
+            make_entity("god", "GodClass", EntityKind::Class),
+        );
+
+        // Add many dependencies (8 outgoing + 4 incoming = 12 total)
+        for i in 0..8 {
+            let dep_id = format!("dep_{}", i);
+            graph.entities.insert(
+                dep_id.clone(),
+                make_entity(&dep_id, &dep_id, EntityKind::Function),
+            );
+            graph.edges.push(DependencyEdge {
+                source: "god".to_string(),
+                target: dep_id,
+                kind: EdgeKind::Invokes,
+            });
+        }
+        for i in 0..4 {
+            let caller_id = format!("caller_{}", i);
+            graph.entities.insert(
+                caller_id.clone(),
+                make_entity(&caller_id, &caller_id, EntityKind::Function),
+            );
+            graph.edges.push(DependencyEdge {
+                source: caller_id,
+                target: "god".to_string(),
+                kind: EdgeKind::Invokes,
+            });
+        }
+
+        graph.refresh_metadata();
+
+        let config = HealthConfig {
+            god_object_degree_threshold: 10,
+            god_object_instability_threshold: 0.7,
+            ..Default::default()
+        };
+        let report = compute_health(&graph, &config);
+
+        // The god entity should be flagged
+        assert!(report.summary.god_object_count > 0 || report.summary.hub_count > 0);
+
+        let god = report
+            .entities
+            .iter()
+            .find(|e| e.entity_id == "god")
+            .unwrap();
+        assert_eq!(god.in_degree, 4);
+        assert_eq!(god.out_degree, 8);
+        assert_eq!(god.in_degree + god.out_degree, 12);
+    }
+
+    #[test]
+    fn test_centrality_normalization() {
+        let graph = make_test_graph();
+        let config = HealthConfig::default();
+        let report = compute_health(&graph, &config);
+
+        // Centrality should be <= 1.0 for all entities
+        for entity in &report.entities {
+            assert!(entity.centrality <= 1.0);
+            assert!(entity.centrality >= 0.0);
+        }
+    }
+
+    #[test]
+    fn test_empty_graph() {
+        let graph = RPGraph::new("rust");
+        let config = HealthConfig::default();
+        let report = compute_health(&graph, &config);
+
+        assert_eq!(report.summary.total_entities, 0);
+        assert_eq!(report.summary.analyzed_entities, 0);
+        assert!(report.entities.is_empty());
+    }
+
+    #[test]
+    fn test_skip_module_entities() {
+        let mut graph = RPGraph::new("rust");
+        graph.entities.insert(
+            "mod1".to_string(),
+            make_entity("mod1", "module", EntityKind::Module),
+        );
+        graph.entities.insert(
+            "fn1".to_string(),
+            make_entity("fn1", "function", EntityKind::Function),
+        );
+        graph.refresh_metadata();
+
+        let config = HealthConfig::default();
+        let report = compute_health(&graph, &config);
+
+        // Only the function should be analyzed
+        assert_eq!(report.summary.analyzed_entities, 1);
+        assert_eq!(report.entities.len(), 1);
+        assert_eq!(report.entities[0].entity_id, "fn1");
+    }
+
+    #[test]
+    fn test_deterministic_output() {
+        let graph = make_test_graph();
+        let config = HealthConfig::default();
+
+        let report1 = compute_health(&graph, &config);
+        let report2 = compute_health(&graph, &config);
+
+        // Entities should be sorted by ID for deterministic output
+        assert_eq!(report1.entities.len(), report2.entities.len());
+        for (e1, e2) in report1.entities.iter().zip(report2.entities.iter()) {
+            assert_eq!(e1.entity_id, e2.entity_id);
+        }
+    }
+}
diff --git a/crates/rpg-nav/src/lib.rs b/crates/rpg-nav/src/lib.rs
index 6835de7..a883fa1 100644
--- a/crates/rpg-nav/src/lib.rs
+++ b/crates/rpg-nav/src/lib.rs
@@ -1,16 +1,19 @@
 //! Navigation tools for querying the Repository Planning Graph.
 //!
 //! Provides SearchNode (intent-based discovery), FetchNode (entity details),
-//! ExploreRPG (dependency traversal), and TOON serialization for LLM-optimized output.
+//! ExploreRPG (dependency traversal), Health analysis, Duplication detection,
+//! and TOON serialization for LLM-optimized output.
 
 pub mod context;
 pub mod dataflow;
 pub mod diff;
+pub mod duplication;
 #[cfg(feature = "embeddings")]
 pub mod embeddings;
 pub mod explore;
 pub mod export;
 pub mod fetch;
+pub mod health;
 pub mod impact;
 pub mod paths;
 pub mod planner;
diff --git a/crates/rpg-nav/src/search.rs b/crates/rpg-nav/src/search.rs
index 831c3bc..c785deb 100644
--- a/crates/rpg-nav/src/search.rs
+++ b/crates/rpg-nav/src/search.rs
@@ -231,9 +231,8 @@ pub fn search_with_params(graph: &RPGraph, params: &SearchParams) -> Vec<SearchR
 
 /// Compute Jaccard similarity between two token sets.
 /// Kept as utility — IDF-weighted overlap is used for search scoring, but Jaccard
-/// is still the fallback for drift detection in evolution.rs.
-#[allow(dead_code)]
-fn jaccard_similarity(a: &HashSet<&str>, b: &HashSet<&str>) -> f64 {
+/// is used for semantic duplication detection in duplication.rs.
+pub(crate) fn jaccard_similarity(a: &HashSet<&str>, b: &HashSet<&str>) -> f64 {
     if a.is_empty() && b.is_empty() {
         return 0.0;
     }
diff --git a/crates/rpg-nav/src/toon.rs b/crates/rpg-nav/src/toon.rs
index 38d3c6a..2ebe598 100644
--- a/crates/rpg-nav/src/toon.rs
+++ b/crates/rpg-nav/src/toon.rs
@@ -704,6 +704,156 @@ fn clean_score(v: f64) -> f64 {
     (v * 1_000_000.0).round() / 1_000_000.0
 }
 
+// ---------------------------------------------------------------------------
+// Health report output
+// ---------------------------------------------------------------------------
+
+use crate::health::HealthReport;
+
+/// Format a health report as TOON for LLM consumption.
+pub fn format_health_report(report: &HealthReport) -> String {
+    let mut output = String::new();
+
+    // Summary section
+    output.push_str("# Code Health Analysis\n\n");
+    output.push_str(&format!(
+        "entities: {} ({} analyzed)\n",
+        report.summary.total_entities, report.summary.analyzed_entities
+    ));
+    output.push_str(&format!(
+        "dependency_edges: {}\n",
+        report.summary.total_dependency_edges
+    ));
+    output.push_str(&format!(
+        "avg_instability: {:.3}\n",
+        report.summary.avg_instability
+    ));
+    output.push_str(&format!(
+        "avg_centrality: {:.4}\n",
+        report.summary.avg_centrality
+    ));
+    output.push_str(&format!(
+        "god_objects: {}\n",
+        report.summary.god_object_count
+    ));
+    output.push_str(&format!(
+        "highly_unstable: {}\n",
+        report.summary.highly_unstable_count
+    ));
+    output.push_str(&format!(
+        "highly_stable: {}\n",
+        report.summary.highly_stable_count
+    ));
+    output.push_str(&format!("hubs: {}\n", report.summary.hub_count));
+
+    // Top unstable entities
+    if !report.top_unstable.is_empty() {
+        output.push_str("\n## Top Unstable Entities (I > 0.7)\n\n");
+        for entity in &report.top_unstable {
+            output.push_str(&format!(
+                "- {} ({}) | instability={:.3} | in={} out={}\n",
+                entity.entity_id,
+                entity.kind,
+                entity.instability,
+                entity.in_degree,
+                entity.out_degree
+            ));
+        }
+    }
+
+    // Top god objects
+    if !report.top_god_objects.is_empty() {
+        output.push_str("\n## God Object Candidates\n\n");
+        for entity in &report.top_god_objects {
+            output.push_str(&format!(
+                "- {} ({}) | degree={} | instability={:.3}\n",
+                entity.entity_id,
+                entity.kind,
+                entity.in_degree + entity.out_degree,
+                entity.instability
+            ));
+        }
+    }
+
+    // Duplication info if present
+    if let Some(ref dupes) = report.duplicates {
+        output.push_str("\n## Duplication Hotspots\n\n");
+        if dupes.is_empty() {
+            output.push_str("No token-based clones detected.\n");
+        } else {
+            for group in dupes.iter().take(10) {
+                output.push_str(&format!(
+                    "- similarity={:.1}% | tokens={} | entities={}\n",
+                    group.similarity * 100.0,
+                    group.duplicated_tokens,
+                    group.entities.len()
+                ));
+            }
+        }
+    }
+
+    // Semantic duplication info if present
+    if let Some(ref sem_dupes) = report.semantic_duplicates {
+        if !sem_dupes.is_empty() {
+            output.push_str("\n## Semantic Duplication (Conceptual Clones)\n\n");
+            output.push_str(
+                "Entities sharing similar intent (lifted feature overlap). \
+                 May indicate accidental duplication or a missing abstraction.\n\n",
+            );
+            for group in sem_dupes.iter().take(10) {
+                output.push_str(&format!(
+                    "- similarity={:.1}% | shared: [{}]\n",
+                    group.similarity * 100.0,
+                    group.shared_features.join(", ")
+                ));
+                for (id, file) in group.entities.iter().zip(group.files.iter()) {
+                    output.push_str(&format!("    {} ({})\n", id, file));
+                }
+            }
+        }
+    }
+
+    // Recommendations
+    output.push_str("\n## Recommendations\n\n");
+    if report.summary.god_object_count > 0 {
+        output.push_str(&format!(
+            "1. **Refactor god objects**: {} entities have high coupling. Consider extracting responsibilities.\n",
+            report.summary.god_object_count
+        ));
+    }
+    if report.summary.highly_unstable_count > report.summary.analyzed_entities / 3 {
+        output.push_str(&format!(
+            "2. **Reduce instability**: {} entities are highly unstable. Consider introducing stable abstractions.\n",
+            report.summary.highly_unstable_count
+        ));
+    }
+    if report.summary.hub_count > 0 {
+        output.push_str(&format!(
+            "3. **Review hub entities**: {} entities act as hubs. Ensure they have focused responsibilities.\n",
+            report.summary.hub_count
+        ));
+    }
+    if let Some(ref sem_dupes) = report.semantic_duplicates {
+        if !sem_dupes.is_empty() {
+            output.push_str(&format!(
+                "4. **Extract shared abstractions**: {} entity pairs share similar intent. \
+                 Consider introducing a shared interface or helper.\n",
+                sem_dupes.len()
+            ));
+        }
+    }
+    if report.summary.god_object_count == 0
+        && report.summary.highly_unstable_count == 0
+        && report.summary.hub_count == 0
+    {
+        output.push_str(
+            "✅ No major architectural issues detected. The codebase shows good modularity.\n",
+        );
+    }
+
+    output
+}
+
 // ---------------------------------------------------------------------------
 // Tests
 // ---------------------------------------------------------------------------

From 06813a08491961132f8fae0ff2907d35723533fb Mon Sep 17 00:00:00 2001
From: VooDisss <www.rickaxas@gmail.com>
Date: Wed, 25 Feb 2026 12:16:57 +0200
Subject: [PATCH 2/3] feat: Introduce initial duplication detection module and
 related documentation, configuration, and audit reports.

---
 crates/rpg-nav/src/duplication.rs | 25 ++++++----------
 crates/rpg-nav/src/toon.rs        | 48 +++++++++++++++----------------
 2 files changed, 32 insertions(+), 41 deletions(-)

diff --git a/crates/rpg-nav/src/duplication.rs b/crates/rpg-nav/src/duplication.rs
index d7ab017..d341b90 100644
--- a/crates/rpg-nav/src/duplication.rs
+++ b/crates/rpg-nav/src/duplication.rs
@@ -338,11 +338,11 @@ fn tokenize(source: &str) -> Vec<Token> {
                 let mut op = String::new();
                 op.push(chars.next().unwrap());
                 // Check for two-char operators
-                if let Some(&c) = chars.peek() {
-                    if matches!(c, '=' | '&' | '|' | '<' | '>' | '+') {
-                        op.push(c);
-                        chars.next();
-                    }
+                if let Some(&c) = chars.peek()
+                    && matches!(c, '=' | '&' | '|' | '<' | '>' | '+')
+                {
+                    op.push(c);
+                    chars.next();
                 }
                 tokens.push(Token {
                     kind: TokenType::Operator,
@@ -507,7 +507,7 @@ pub fn detect_duplication(
             }
 
             Some(EntityFingerprints {
-                entity_id: (*id).to_string(),
+                entity_id: (*id).clone(),
                 file: entity.file.display().to_string(),
                 fps: fingerprints,
                 token_count: tokens.len(),
@@ -867,12 +867,7 @@ mod tests {
     // --- Per-entity token-based detection tests ---
 
     /// Helper: create an Entity with specific line range (for detect_duplication tests).
-    fn make_entity_at_lines(
-        id: &str,
-        file: &str,
-        line_start: usize,
-        line_end: usize,
-    ) -> Entity {
+    fn make_entity_at_lines(id: &str, file: &str, line_start: usize, line_end: usize) -> Entity {
         Entity {
             id: id.to_string(),
             kind: EntityKind::Function,
@@ -908,11 +903,7 @@ mod tests {
 
         // File B: preamble + same function at lines 3-8
         let file_b = dir.path().join("b.rs");
-        std::fs::write(
-            &file_b,
-            format!("// preamble\nuse std::io;\n{}", func_code),
-        )
-        .unwrap();
+        std::fs::write(&file_b, format!("// preamble\nuse std::io;\n{}", func_code)).unwrap();
 
         let mut graph = RPGraph::new("rust");
         graph.entities.insert(
diff --git a/crates/rpg-nav/src/toon.rs b/crates/rpg-nav/src/toon.rs
index 2ebe598..65f6f06 100644
--- a/crates/rpg-nav/src/toon.rs
+++ b/crates/rpg-nav/src/toon.rs
@@ -793,22 +793,22 @@ pub fn format_health_report(report: &HealthReport) -> String {
     }
 
     // Semantic duplication info if present
-    if let Some(ref sem_dupes) = report.semantic_duplicates {
-        if !sem_dupes.is_empty() {
-            output.push_str("\n## Semantic Duplication (Conceptual Clones)\n\n");
-            output.push_str(
-                "Entities sharing similar intent (lifted feature overlap). \
-                 May indicate accidental duplication or a missing abstraction.\n\n",
-            );
-            for group in sem_dupes.iter().take(10) {
-                output.push_str(&format!(
-                    "- similarity={:.1}% | shared: [{}]\n",
-                    group.similarity * 100.0,
-                    group.shared_features.join(", ")
-                ));
-                for (id, file) in group.entities.iter().zip(group.files.iter()) {
-                    output.push_str(&format!("    {} ({})\n", id, file));
-                }
+    if let Some(ref sem_dupes) = report.semantic_duplicates
+        && !sem_dupes.is_empty()
+    {
+        output.push_str("\n## Semantic Duplication (Conceptual Clones)\n\n");
+        output.push_str(
+            "Entities sharing similar intent (lifted feature overlap). \
+             May indicate accidental duplication or a missing abstraction.\n\n",
+        );
+        for group in sem_dupes.iter().take(10) {
+            output.push_str(&format!(
+                "- similarity={:.1}% | shared: [{}]\n",
+                group.similarity * 100.0,
+                group.shared_features.join(", ")
+            ));
+            for (id, file) in group.entities.iter().zip(group.files.iter()) {
+                output.push_str(&format!("    {} ({})\n", id, file));
             }
         }
     }
@@ -833,14 +833,14 @@ pub fn format_health_report(report: &HealthReport) -> String {
             report.summary.hub_count
         ));
     }
-    if let Some(ref sem_dupes) = report.semantic_duplicates {
-        if !sem_dupes.is_empty() {
-            output.push_str(&format!(
-                "4. **Extract shared abstractions**: {} entity pairs share similar intent. \
-                 Consider introducing a shared interface or helper.\n",
-                sem_dupes.len()
-            ));
-        }
+    if let Some(ref sem_dupes) = report.semantic_duplicates
+        && !sem_dupes.is_empty()
+    {
+        output.push_str(&format!(
+            "4. **Extract shared abstractions**: {} entity pairs share similar intent. \
+             Consider introducing a shared interface or helper.\n",
+            sem_dupes.len()
+        ));
     }
     if report.summary.god_object_count == 0
         && report.summary.highly_unstable_count == 0

From fcfe330ef325ccde18401f9255b42f05c07bc0a0 Mon Sep 17 00:00:00 2001
From: VooDisss <www.rickaxas@gmail.com>
Date: Wed, 25 Feb 2026 12:28:29 +0200
Subject: [PATCH 3/3] feat: Implement code health analysis with graph metrics
 and duplication detection, alongside new audit reports, documentation, and
 configuration files.

---
 .../src/prompts/server_instructions.md        | 34 +++++++++++++++++++
 1 file changed, 34 insertions(+)

diff --git a/crates/rpg-mcp/src/prompts/server_instructions.md b/crates/rpg-mcp/src/prompts/server_instructions.md
index c358e45..6292eb9 100644
--- a/crates/rpg-mcp/src/prompts/server_instructions.md
+++ b/crates/rpg-mcp/src/prompts/server_instructions.md
@@ -133,6 +133,39 @@ When using the RPG to understand or navigate a codebase (after lifting is comple
 - Use `context_pack` instead of search→fetch→explore chains (1 call vs 3-5, ~44% fewer tokens)
 - Use `impact_radius` for richer reachability analysis with edge paths (1 call vs multi-step explore)
 
+## HEALTH ANALYSIS
+
+Use `analyze_health` to assess architectural quality of the codebase. It computes
+instability, centrality, coupling metrics, and optionally detects code duplication.
+
+**When to use:** After lifting is complete, to identify refactoring targets, god objects,
+unstable modules, and duplicated code.
+
+**Parameters (all optional):**
+- `instability_threshold` (default 0.7) — flag entities with instability above this
+- `god_object_threshold` (default 10) — minimum degree to flag as god object
+- `include_duplication` (default false) — run Rabin-Karp token-based clone detection (reads source files, slower)
+- `include_semantic_duplication` (default false) — run Jaccard feature-based clone detection (in-memory, fast)
+- `semantic_similarity_threshold` (default 0.6) — Jaccard threshold for semantic clones
+
+**Output sections:**
+- Summary: entity count, edges, avg instability/centrality, god objects, hubs
+- God Object Candidates (degree ≥ threshold)
+- Top Unstable Entities (I > 0.7)
+- Hub Entities (high centrality)
+- Duplication Hotspots (when `include_duplication=true`) — token-level Type-1/Type-2 clones
+- Semantic Duplication (when `include_semantic_duplication=true`) — conceptual clones via lifted features
+- Recommendations for refactoring
+
+**Examples:**
+```json
+{}                                          // baseline health (no duplication)
+{"include_duplication": true}               // + token-based clones
+{"include_semantic_duplication": true}      // + conceptual clones
+{"include_duplication": true, "include_semantic_duplication": true}  // both
+{"god_object_threshold": 5, "instability_threshold": 0.5}           // stricter thresholds
+```
+
 ## TOOLS
 - **lifting_status**: Dashboard — coverage, per-area progress, unlifted files, NEXT STEP
 - **build_rpg**: Index the codebase (run once, instant)
@@ -148,6 +181,7 @@ When using the RPG to understand or navigate a codebase (after lifting is comple
 - **context_pack**: Single-call search+fetch+explore. Searches, fetches source, expands neighbors, trims to token budget
 - **impact_radius**: BFS reachability with edge paths. Answers "what depends on X?" in one call. Traverses DataFlow edges for data lineage analysis
 - **plan_change**: Change planning — find relevant entities, dependency-safe modification order, impact radius, and related tests
+- **analyze_health**: Architectural health analysis — instability, centrality, god objects, duplication detection (token + semantic)
 - **rpg_info**: Get codebase overview, statistics, and inter-area connectivity
 - **update_rpg**: Incrementally update after code changes
 - **reload_rpg**: Reload graph from disk