diff --git a/.coderabbit.yaml b/.coderabbit.yaml
new file mode 100644
index 0000000..f7b3a5d
--- /dev/null
+++ b/.coderabbit.yaml
@@ -0,0 +1,4 @@
+reviews:
+  request_changes_workflow: true
+  review_details: true
+  poem: false
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
new file mode 100644
index 0000000..9dac4b1
--- /dev/null
+++ b/.github/workflows/ci.yml
@@ -0,0 +1,34 @@
+name: Rust CI
+
+on:
+  push:
+    branches:
+      - main
+  pull_request:
+
+permissions:
+  contents: read
+
+jobs:
+  rust:
+    name: Build, test, and format
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      - name: Install Rust toolchain
+        run: rustup toolchain install stable --profile minimal --component rustfmt --component clippy
+
+      - name: Check formatting
+        run: cargo fmt --all -- --check
+
+      - name: Run Clippy
+        run: cargo clippy --workspace --all-targets --all-features -- -D warnings -W clippy::too_many_lines -W clippy::too_many_arguments -W clippy::type_complexity -W clippy::excessive_nesting -W clippy::cognitive_complexity -W clippy::pedantic -W clippy::nursery -W clippy::cargo
+
+      - name: Build
+        run: cargo build --locked --all-targets
+
+      - name: Test
+        run: cargo test --locked --all-targets
diff --git a/.github/workflows/coderabbit-review.yml b/.github/workflows/coderabbit-review.yml
new file mode 100644
index 0000000..5cfc373
--- /dev/null
+++ b/.github/workflows/coderabbit-review.yml
@@ -0,0 +1,79 @@
+name: CodeRabbit Review Gate
+
+on:
+  pull_request_review:
+    types:
+      - submitted
+      - edited
+      - dismissed
+
+permissions:
+  contents: read
+  pull-requests: read
+
+jobs:
+  coderabbit-review:
+    name: Validate CodeRabbit review
+    if: github.event.pull_request.draft == false && github.event.review.user.login == 'coderabbitai[bot]'
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Check CodeRabbit review state
+        env:
+          GITHUB_TOKEN: ${{ github.token }}
+          GITHUB_REPOSITORY: ${{ github.repository }}
+          PR_NUMBER: ${{ github.event.pull_request.number }}
+          PR_HEAD_SHA: ${{ github.event.pull_request.head.sha }}
+        run: |
+          node <<'NODE'
+          const token = process.env.GITHUB_TOKEN;
+          const [owner, repo] = process.env.GITHUB_REPOSITORY.split("/");
+          const prNumber = process.env.PR_NUMBER;
+          const headSha = process.env.PR_HEAD_SHA;
+
+          async function fetchReviews(page = 1, reviews = []) {
+            const url = `https://api.github.com/repos/${owner}/${repo}/pulls/${prNumber}/reviews?per_page=100&page=${page}`;
+            const response = await fetch(url, {
+              headers: {
+                Authorization: `Bearer ${token}`,
+                Accept: "application/vnd.github+json",
+                "X-GitHub-Api-Version": "2022-11-28",
+              },
+            });
+
+            if (!response.ok) {
+              const body = await response.text();
+              throw new Error(`GitHub review lookup failed: ${response.status} ${body}`);
+            }
+
+            const pageReviews = await response.json();
+            if (pageReviews.length === 0) {
+              return reviews;
+            }
+            return fetchReviews(page + 1, reviews.concat(pageReviews));
+          }
+
+          (async () => {
+            const reviews = await fetchReviews();
+            const codeRabbitReviews = reviews
+              .filter((review) => review.user?.login === "coderabbitai[bot]")
+              .filter((review) => review.commit_id === headSha)
+              .sort((left, right) => new Date(left.submitted_at) - new Date(right.submitted_at));
+
+            const latestReview = codeRabbitReviews.at(-1);
+            if (!latestReview) {
+              console.error(`CodeRabbit has not submitted a review for ${headSha}.`);
+              process.exit(1);
+            }
+
+            if (latestReview.state === "CHANGES_REQUESTED") {
+              console.error("CodeRabbit requested changes on this pull request.");
+              process.exit(1);
+            }
+
+            console.log(`CodeRabbit review state for ${headSha}: ${latestReview.state}`);
+          })().catch((error) => {
+            console.error(error);
+            process.exit(1);
+          });
+          NODE
diff --git a/AGENTS.md b/AGENTS.md
new file mode 100644
index 0000000..9b718e5
--- /dev/null
+++ b/AGENTS.md
@@ -0,0 +1,19 @@
+# Agent Instructions
+
+These instructions apply to code agents working in this repository, including Codex.
+
+## Before finishing a change
+
+Run the repository verification commands from the workspace root and fix any issues before handing work back:
+
+```bash
+cargo fmt --all -- --check
+cargo clippy --workspace --all-targets --all-features -- -D warnings -W clippy::too_many_lines -W clippy::too_many_arguments -W clippy::type_complexity -W clippy::excessive_nesting -W clippy::cognitive_complexity -W clippy::pedantic -W clippy::nursery -W clippy::cargo
+rtk cargo build --locked --all-targets
+```
+
+## Notes
+
+- Treat Clippy warnings as errors for generated or edited code.
+- Prefer changes that satisfy the repository `clippy.toml` configuration without adding `#[allow(...)]` attributes unless a maintainer explicitly asks for them.
+- If a command cannot be run in the current environment, call that out clearly in the handoff.
diff --git a/Cargo.lock b/Cargo.lock
new file mode 100644
index 0000000..7b39a5f
--- /dev/null
+++ b/Cargo.lock
@@ -0,0 +1,61 @@
+# This file is automatically @generated by Cargo.
+# It is not intended for manual editing.
+version = 4
+
+[[package]]
+name = "aho-corasick"
+version = "1.1.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301"
+dependencies = [
+ "memchr",
+]
+
+[[package]]
+name = "codem8"
+version = "0.1.0"
+dependencies = [
+ "regex",
+ "xxhash-rust",
+]
+
+[[package]]
+name = "memchr"
+version = "2.8.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "88904434abc2901f197fe8cc55f0445e7ded921dba5911dad2e2b39b48e663c4"
+
+[[package]]
+name = "regex"
+version = "1.12.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f1292b7759ae1cb9ec195452d1390a074f0cd8541ab7a5a8c31cd6db45d4a6ba"
+dependencies = [
+ "aho-corasick",
+ "memchr",
+ "regex-automata",
+ "regex-syntax",
+]
+
+[[package]]
+name = "regex-automata"
+version = "0.4.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f"
+dependencies = [
+ "aho-corasick",
+ "memchr",
+ "regex-syntax",
+]
+
+[[package]]
+name = "regex-syntax"
+version = "0.8.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d6f6ff9a378485b298a5286656da665ba74413d36db0979633275d2e708145d4"
+
+[[package]]
+name = "xxhash-rust"
+version = "0.8.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fdd20c5420375476fbd4394763288da7eb0cc0b8c11deed431a91562af7335d3"
diff --git a/Cargo.toml b/Cargo.toml
new file mode 100644
index 0000000..8cfb01c
--- /dev/null
+++ b/Cargo.toml
@@ -0,0 +1,13 @@
+[package]
+name = "codem8"
+version = "0.1.0"
+edition = "2021"
+license = "MIT"
+description = "A deterministic source code analysis CLI for duplicate code reports."
+repository = "https://github.com/b4prog/CodeM8"
+keywords = ["cli", "duplicate-detection", "source-code", "analysis"]
+categories = ["command-line-utilities", "development-tools"]
+
+[dependencies]
+regex = "1"
+xxhash-rust = { version = "0.8", features = ["xxh3"] }
diff --git a/README.md b/README.md
index 93bf7bd..74ed36e 100644
--- a/README.md
+++ b/README.md
@@ -1 +1,153 @@
-# CodeM8
\ No newline at end of file
+# CodeM8
+
+CodeM8 is a Rust command-line application for deterministic source code reports.
+The initial report detects duplicated line-based code blocks in a repository:
+
+```bash
+codem8 --report-duplicate
+```
+
+The duplicate report is designed for both human developers and coding agents. It
+trims source lines, ignores empty lines, hashes normalized lines with XXH3
+128-bit, classifies syntax-only lines as block-only, groups repeated blocks, and
+prints a stable plain-text report sorted by duplicate weight.
+
+## Installation
+
+Install `codem8` from the GitHub source with Cargo:
+
+```bash
+cargo install --git https://github.com/b4prog/CodeM8 codem8
+```
+
+Build from a local checkout with Cargo:
+
+```bash
+cargo build --release
+```
+
+Install from a local checkout:
+
+```bash
+cargo install --path .
+```
+
+Run from the local checkout without installing:
+
+```bash
+cargo run -- --report-duplicate
+```
+
+## Usage
+
+Analyze TypeScript files from the current directory:
+
+```bash
+codem8 --report-duplicate
+```
+
+Analyze multiple extensions:
+
+```bash
+codem8 --report-duplicate -file-extension=ts,tsx,js,jsx
+```
+
+Analyze an explicit list of files instead of recursively discovering files:
+
+```bash
+codem8 --report-duplicate -file-extension=ts,js -files=src/a.ts,src/b.js
+```
+
+Analyze files changed on the current local Git branch compared to the origin
+base branch:
+
+```bash
+codem8 --report-duplicate -git-branch
+```
+
+Include duplicate block metrics:
+
+```bash
+codem8 --report-duplicate -verbose
+```
+
+## Duplicate Report
+
+By default, CodeM8 analyzes `.ts` files. Recursive discovery skips common
+irrelevant directories such as `.git`, `node_modules`, `target`, `dist`,
+`build`, `coverage`, `.next`, `.nuxt`, `.svelte-kit`, `.idea`, and `.vscode`.
+Symbolic links are not followed.
+
+Every non-empty line is normalized with Rust string trimming, so leading and
+trailing Unicode whitespace are removed before hashing and comparison. Empty
+trimmed lines are ignored. CodeM8 currently expects UTF-8 source files; invalid
+UTF-8 produces a clear error rather than lossy output.
+
+Use `-git-branch` to analyze only files changed on the current local branch
+compared to the origin base branch. CodeM8 resolves that base from `origin/HEAD`
+with `origin/main` and `origin/master` fallbacks. This includes committed,
+staged, unstaged, and untracked files that still exist in the worktree. The
+option requires a Git repository and cannot be combined with `-files`.
+
+Duplicate block weight is calculated as:
+
+```text
+(occurrences - 1) * duplicated_line_count * cumulative_normalized_character_count
+```
+
+Reports are sorted deterministically by descending weight, then by line count,
+character count, first location, and normalized block text.
+
+By default, each duplicate block prints the duplicated code before its
+locations. Use `-verbose` to also show weight, line count, and occurrence
+count. Character counts are used internally for scoring and sorting, but are
+not printed.
+
+## Language Heuristics
+
+CodeM8 includes a hard-coded registry of block-only line patterns for common
+languages and markup formats:
+
+- TypeScript / JavaScript
+- Rust
+- C / C++ / Objective-C
+- C#
+- Java / Kotlin / Scala
+- Go
+- Python
+- Ruby
+- PHP
+- Swift
+- Shell
+- PowerShell
+- HTML / XML
+- CSS / SCSS / Sass / Less
+- SQL
+- YAML / JSON / TOML
+
+Block-only lines, such as braces or closing tags, cannot start a duplicate by
+themselves. They can still be included inside a larger duplicated block when
+surrounding comparison lines match.
+
+## Development
+
+Run the full local verification set:
+
+```bash
+cargo fmt --all -- --check
+cargo clippy --workspace --all-targets --all-features -- -D warnings -W clippy::too_many_lines -W clippy::too_many_arguments -W clippy::type_complexity -W clippy::excessive_nesting -W clippy::cognitive_complexity
+rtk cargo build --locked --all-targets
+cargo test --all-targets
+```
+
+The repository includes GitHub Actions workflows for Rust CI and a CodeRabbit
+review gate. CI verifies formatting, build success, and tests on pushes and pull
+requests. The CodeRabbit gate runs when CodeRabbit submits or edits a pull
+request review and fails if CodeRabbit requests changes on the current PR head.
+
+## Dependency Policy
+
+CodeM8 avoids external packages for functionality that is simple to implement
+and maintain directly. The first implementation uses one runtime dependency,
+`xxhash-rust`, for the required XXH3 128-bit hash implementation. The crate is
+widely used and permissively licensed under MIT or Apache-2.0.
diff --git a/clippy.toml b/clippy.toml
new file mode 100644
index 0000000..770b51a
--- /dev/null
+++ b/clippy.toml
@@ -0,0 +1,5 @@
+too-many-lines-threshold = 80
+too-many-arguments-threshold = 5
+type-complexity-threshold = 200
+excessive-nesting-threshold = 4
+cognitive-complexity-threshold = 20
diff --git a/src/cli.rs b/src/cli.rs
new file mode 100644
index 0000000..9b3dc17
--- /dev/null
+++ b/src/cli.rs
@@ -0,0 +1,446 @@
+use std::fmt::Write as _;
+use std::path::PathBuf;
+
+use crate::error::{CodeM8Error, Result};
+use crate::language::supported_file_extensions;
+
+const CARGO_LOCK: &str = include_str!("../Cargo.lock");
+const HELP_TEXT_BODY: &str = "\
+USAGE:
+  codem8 help
+  codem8 --report-duplicate [OPTIONS]
+
+COMMANDS:
+  help
+      Display this detailed documentation.
+
+REQUIRED REPORT SWITCHES:
+  --report-duplicate
+      Analyze source files and print a duplicate code report.
+
+OPTIONS:
+  -file-extension=<extensions>
+      Comma-separated source file extensions to analyze.
+      Defaults to all extensions registered in LANGUAGE_PATTERNS.
+      Examples: -file-extension=ts,tsx,js,jsx
+
+  -files=<paths>
+      Comma-separated explicit files to analyze instead of recursively
+      discovering files from the current directory.
+      Example: -files=src/a.ts,src/b.js
+
+  -git-branch
+      Analyze files changed on the current local Git branch compared to the
+      origin base branch, including committed, staged, unstaged, and untracked
+      files. Cannot be combined with -files.
+
+  -verbose
+      Include duplicate block metrics in report output.
+
+DUPLICATE REPORT PURPOSE:
+  The duplicate report helps you find repeated code that may be worth
+  refactoring, reviewing, or consolidating. It lists each duplicated block with
+  the files and line ranges where it appears, making it easier to compare the
+  repeated code and decide whether it should stay duplicated.
+
+EXAMPLES:
+  codem8 --report-duplicate
+  codem8 --report-duplicate -file-extension=ts,tsx,js,jsx
+  codem8 --report-duplicate -file-extension=ts,js -files=src/a.ts,src/b.js
+  codem8 --report-duplicate -git-branch
+";
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+struct CargoLockPackage<'a> {
+    name: &'a str,
+    version: &'a str,
+}
+
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub enum CliCommand {
+    Help,
+    ReportDuplicate(CliConfig),
+}
+
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct CliConfig {
+    pub report_duplicate: bool,
+    pub verbose: bool,
+    pub file_extensions: Vec<String>,
+    pub files: Option<Vec<PathBuf>>,
+    pub git_branch: bool,
+}
+
+#[must_use]
+pub fn help_text() -> String {
+    let version = codem8_version_from_cargo_lock().unwrap_or("unknown");
+    let mut output = String::new();
+    let _ = writeln!(
+        output,
+        "CodeM8 {version} - deterministic source code analysis reports."
+    );
+    output.push('\n');
+    output.push_str(HELP_TEXT_BODY);
+    output
+}
+
+/// Parses command-line arguments into a CLI command.
+///
+/// # Errors
+///
+/// Returns an error when the arguments are invalid, repeated, or missing the
+/// required report switch.
+pub fn parse_command<I, S>(args: I) -> Result<CliCommand>
+where
+    I: IntoIterator<Item = S>,
+    S: Into<String>,
+{
+    let args = args.into_iter().map(Into::into).collect::<Vec<_>>();
+    if args.len() == 1 && is_help_argument(&args[0]) {
+        return Ok(CliCommand::Help);
+    }
+    parse_args(args).map(CliCommand::ReportDuplicate)
+}
+
+/// Parses command-line arguments into a validated CLI configuration.
+///
+/// # Errors
+///
+/// Returns an error when the arguments are invalid, repeated, or missing the
+/// required report switch.
+pub fn parse_args<I, S>(args: I) -> Result<CliConfig>
+where
+    I: IntoIterator<Item = S>,
+    S: Into<String>,
+{
+    let mut report_duplicate = false;
+    let mut verbose = false;
+    let mut file_extensions = None;
+    let mut files = None;
+    let mut git_branch = false;
+    for arg in args {
+        let arg = arg.into();
+        if arg == "--report-duplicate" {
+            report_duplicate = true;
+        } else if arg == "-verbose" {
+            verbose = true;
+        } else if arg == "-git-branch" {
+            if git_branch {
+                return Err(CodeM8Error::new(
+                    "git branch mode was provided more than once",
+                ));
+            }
+            git_branch = true;
+        } else if let Some(value) = arg.strip_prefix("-file-extension=") {
+            if file_extensions.is_some() {
+                return Err(CodeM8Error::new(
+                    "file extensions were provided more than once",
+                ));
+            }
+            file_extensions = Some(parse_file_extensions(value)?);
+        } else if let Some(value) = arg.strip_prefix("-files=") {
+            if files.is_some() {
+                return Err(CodeM8Error::new(
+                    "explicit files were provided more than once",
+                ));
+            }
+            files = Some(parse_file_list(value)?);
+        } else {
+            return Err(CodeM8Error::new(format!("unknown argument: {arg}")));
+        }
+    }
+    if !report_duplicate {
+        return Err(CodeM8Error::with_help(
+            "no report switch provided; pass --report-duplicate",
+        ));
+    }
+    if git_branch && files.is_some() {
+        return Err(CodeM8Error::new(
+            "git branch mode cannot be combined with explicit files",
+        ));
+    }
+    Ok(CliConfig {
+        report_duplicate,
+        verbose,
+        file_extensions: file_extensions.unwrap_or_else(supported_file_extensions),
+        files,
+        git_branch,
+    })
+}
+
+/// Parses a comma-separated list of file extensions.
+///
+/// # Errors
+///
+/// Returns an error when an extension is empty, starts with `.`, or contains a
+/// path separator.
+pub fn parse_file_extensions(value: &str) -> Result<Vec<String>> {
+    let mut extensions = Vec::new();
+    for raw_extension in value.split(',') {
+        let extension = raw_extension.trim();
+        if extension.is_empty() {
+            return Err(CodeM8Error::new("file extension values must not be empty"));
+        }
+        if extension.starts_with('.') {
+            return Err(CodeM8Error::new(format!(
+                "file extensions must not start with a dot: {extension}"
+            )));
+        }
+        if extension.contains('/') || extension.contains('\\') {
+            return Err(CodeM8Error::new(format!(
+                "file extensions must not contain path separators: {extension}"
+            )));
+        }
+        let extension = extension.to_ascii_lowercase();
+        if !extensions.contains(&extension) {
+            extensions.push(extension);
+        }
+    }
+    if extensions.is_empty() {
+        return Err(CodeM8Error::new("at least one file extension is required"));
+    }
+    Ok(extensions)
+}
+
+/// Parses a comma-separated list of explicit file paths.
+///
+/// # Errors
+///
+/// Returns an error when any provided file path is empty.
+pub fn parse_file_list(value: &str) -> Result<Vec<PathBuf>> {
+    let mut files = Vec::new();
+    for raw_file in value.split(',') {
+        let file = raw_file.trim();
+        if file.is_empty() {
+            return Err(CodeM8Error::new("file path values must not be empty"));
+        }
+        files.push(PathBuf::from(file));
+    }
+    if files.is_empty() {
+        return Err(CodeM8Error::new("at least one explicit file is required"));
+    }
+    Ok(files)
+}
+
+fn is_help_argument(arg: &str) -> bool {
+    matches!(arg, "help" | "-h")
+}
+
+fn codem8_version_from_cargo_lock() -> Option<&'static str> {
+    cargo_lock_packages(CARGO_LOCK)
+        .find(|package| package.name == "codem8")
+        .map(|package| package.version)
+}
+
+fn cargo_lock_packages(lockfile: &str) -> impl Iterator<Item = CargoLockPackage<'_>> {
+    lockfile.split("[[package]]").filter_map(cargo_lock_package)
+}
+
+fn cargo_lock_package(section: &str) -> Option<CargoLockPackage<'_>> {
+    let name = cargo_lock_value(section, "name")?;
+    let version = cargo_lock_value(section, "version")?;
+    Some(CargoLockPackage { name, version })
+}
+
+fn cargo_lock_value<'a>(section: &'a str, key: &str) -> Option<&'a str> {
+    let prefix = format!("{key} = \"");
+    section
+        .lines()
+        .map(str::trim)
+        .find_map(|line| line.strip_prefix(&prefix)?.strip_suffix('"'))
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn parses_help_command() {
+        let command = parse_command(["help"]).expect("help parses");
+        assert_eq!(command, CliCommand::Help);
+    }
+
+    #[test]
+    fn parses_short_help_option() {
+        let command = parse_command(["-h"]).expect("short help parses");
+        assert_eq!(command, CliCommand::Help);
+    }
+
+    #[test]
+    fn exposes_detailed_help_text() {
+        let help = help_text();
+        assert!(help.contains("USAGE:"));
+        assert!(help.contains("--report-duplicate"));
+        assert!(help.contains("-verbose"));
+        assert!(help.contains("-file-extension=<extensions>"));
+        assert!(help.contains("-files=<paths>"));
+        assert!(help.contains("-git-branch"));
+        assert!(!help.contains("--verbose"));
+        assert!(!help.contains("--file-extension=<extensions>"));
+        assert!(!help.contains("--files=<paths>"));
+        assert!(!help.contains("--git-branch"));
+        assert!(help.contains("helps you find repeated code"));
+        assert!(!help.contains("Duplicate weight"));
+    }
+
+    #[test]
+    fn help_text_includes_version_from_cargo_lock() {
+        let version = codem8_version_from_cargo_lock().expect("codem8 version exists");
+        assert!(help_text().starts_with(&format!("CodeM8 {version} - ")));
+    }
+
+    #[test]
+    fn extracts_package_versions_from_cargo_lock_sections() {
+        let lockfile = r#"
+[[package]]
+name = "dependency"
+version = "1.2.3"
+
+[[package]]
+name = "codem8"
+version = "0.4.2"
+"#;
+        let package = cargo_lock_packages(lockfile)
+            .find(|package| package.name == "codem8")
+            .expect("package exists");
+        assert_eq!(package.version, "0.4.2");
+    }
+
+    #[test]
+    fn parses_default_duplicate_report_config() {
+        let config = parse_args(["--report-duplicate"]).expect("config parses");
+        assert!(config.report_duplicate);
+        assert!(!config.verbose);
+        assert_eq!(config.file_extensions, supported_file_extensions());
+        assert_eq!(config.files, None);
+        assert!(!config.git_branch);
+    }
+
+    #[test]
+    fn parses_verbose_duplicate_report_config() {
+        let config = parse_args(["--report-duplicate", "-verbose"]).expect("config parses");
+        assert!(config.report_duplicate);
+        assert!(config.verbose);
+    }
+
+    #[test]
+    fn parses_git_branch_duplicate_report_config() {
+        let config = parse_args(["--report-duplicate", "-git-branch"]).expect("config parses");
+        assert!(config.git_branch);
+        assert_eq!(config.files, None);
+    }
+
+    #[test]
+    fn parses_extensions_case_insensitively_and_trims_whitespace() {
+        let extensions = parse_file_extensions(" ts, JS ,tsx,ts ").expect("extensions parse");
+        assert_eq!(extensions, ["ts", "js", "tsx"]);
+    }
+
+    #[test]
+    fn rejects_empty_extensions() {
+        let error = parse_file_extensions("ts,,js").expect_err("empty extension fails");
+        assert!(error.to_string().contains("must not be empty"));
+    }
+
+    #[test]
+    fn rejects_extensions_with_leading_dot() {
+        let error = parse_file_extensions(".ts").expect_err("dot-prefixed extension fails");
+        assert!(error.to_string().contains("must not start with a dot"));
+    }
+
+    #[test]
+    fn rejects_extensions_with_path_separators() {
+        let error = parse_file_extensions("src/ts").expect_err("path-like extension fails");
+        assert!(error
+            .to_string()
+            .contains("must not contain path separators"));
+    }
+
+    #[test]
+    fn rejects_missing_report_switch() {
+        let error = parse_args(["-file-extension=rs"]).expect_err("missing report fails");
+        assert!(error.to_string().contains("no report switch provided"));
+        assert!(error.should_show_help());
+    }
+
+    #[test]
+    fn rejects_unknown_arguments() {
+        let error = parse_args(["--report-duplicate", "--unknown"]).expect_err("unknown arg fails");
+        assert!(error.to_string().contains("unknown argument: --unknown"));
+        assert!(!error.should_show_help());
+    }
+
+    #[test]
+    fn rejects_double_dash_option_arguments() {
+        for option in [
+            "--help",
+            "--verbose",
+            "--file-extension=js",
+            "--files=src/a.ts",
+            "--git-branch",
+        ] {
+            let error =
+                parse_args(["--report-duplicate", option]).expect_err("double-dash option fails");
+            assert!(error
+                .to_string()
+                .contains(&format!("unknown argument: {option}")));
+        }
+    }
+
+    #[test]
+    fn rejects_repeated_file_extension_arguments() {
+        let error = parse_args([
+            "--report-duplicate",
+            "-file-extension=ts",
+            "-file-extension=js",
+        ])
+        .expect_err("repeated extensions fail");
+        assert!(error
+            .to_string()
+            .contains("file extensions were provided more than once"));
+    }
+
+    #[test]
+    fn rejects_repeated_explicit_file_arguments() {
+        let error = parse_args(["--report-duplicate", "-files=a.ts", "-files=b.ts"])
+            .expect_err("repeated explicit files fail");
+        assert!(error
+            .to_string()
+            .contains("explicit files were provided more than once"));
+    }
+
+    #[test]
+    fn rejects_repeated_git_branch_arguments() {
+        let error = parse_args(["--report-duplicate", "-git-branch", "-git-branch"])
+            .expect_err("repeated git branch mode fails");
+        assert!(error
+            .to_string()
+            .contains("git branch mode was provided more than once"));
+    }
+
+    #[test]
+    fn rejects_git_branch_with_explicit_files() {
+        let error = parse_args(["--report-duplicate", "-git-branch", "-files=a.ts"])
+            .expect_err("exclusive file modes fail");
+        assert!(error
+            .to_string()
+            .contains("git branch mode cannot be combined with explicit files"));
+    }
+
+    #[test]
+    fn parses_explicit_file_list() {
+        let files = parse_file_list("src/a.ts, ./src/b.ts").expect("files parse");
+        assert_eq!(
+            files,
+            [PathBuf::from("src/a.ts"), PathBuf::from("./src/b.ts")]
+        );
+    }
+
+    #[test]
+    fn rejects_empty_explicit_file_paths() {
+        let error = parse_file_list("src/a.ts, ").expect_err("empty explicit file fails");
+        assert!(error
+            .to_string()
+            .contains("file path values must not be empty"));
+    }
+}
diff --git a/src/discovery.rs b/src/discovery.rs
new file mode 100644
index 0000000..c48293a
--- /dev/null
+++ b/src/discovery.rs
@@ -0,0 +1,248 @@
+use std::collections::HashSet;
+use std::fs;
+use std::path::{Path, PathBuf};
+
+use crate::error::{CodeM8Error, Result};
+use crate::model::SourceFile;
+use crate::paths::{format_path, normalize_display_path};
+
+const IGNORED_DIRECTORIES: &[&str] = &[
+    ".git",
+    "node_modules",
+    "target",
+    "dist",
+    "build",
+    "coverage",
+    ".next",
+    ".nuxt",
+    ".svelte-kit",
+    ".idea",
+    ".vscode",
+];
+
+/// Discovers source files that match the selected extensions.
+///
+/// # Errors
+///
+/// Returns an error when explicit files are invalid or when walking the file
+/// tree fails.
+pub fn discover_source_files(
+    current_dir: &Path,
+    extensions: &[String],
+    explicit_files: Option<&[PathBuf]>,
+) -> Result<Vec<SourceFile>> {
+    let mut source_files = if let Some(files) = explicit_files {
+        discover_explicit_files(current_dir, extensions, files)?
+    } else {
+        let mut source_files = Vec::new();
+        walk_directory(current_dir, current_dir, extensions, &mut source_files)?;
+        source_files
+    };
+    source_files.sort_by(|left, right| {
+        format_path(&left.display_path).cmp(&format_path(&right.display_path))
+    });
+    Ok(source_files)
+}
+
+fn discover_explicit_files(
+    current_dir: &Path,
+    extensions: &[String],
+    files: &[PathBuf],
+) -> Result<Vec<SourceFile>> {
+    let mut source_files = Vec::new();
+    let mut seen_paths = HashSet::new();
+    for file in files {
+        let absolute_input = file.is_absolute();
+        let path = if absolute_input {
+            file.clone()
+        } else {
+            current_dir.join(file)
+        };
+        let metadata = fs::symlink_metadata(&path).map_err(|_| {
+            CodeM8Error::new(format!(
+                "explicit file does not exist: {}",
+                format_path(file)
+            ))
+        })?;
+        if metadata.file_type().is_symlink() {
+            return Err(CodeM8Error::new(format!(
+                "explicit file is a symbolic link and will not be followed: {}",
+                format_path(file)
+            )));
+        }
+        if metadata.is_dir() {
+            return Err(CodeM8Error::new(format!(
+                "explicit file is a directory: {}",
+                format_path(file)
+            )));
+        }
+        if !metadata.is_file() {
+            return Err(CodeM8Error::new(format!(
+                "explicit path is not a file: {}",
+                format_path(file)
+            )));
+        }
+        let Some(extension) = selected_extension(&path, extensions) else {
+            continue;
+        };
+        let canonical_path = fs::canonicalize(&path)
+            .map_err(|error| CodeM8Error::io(&path, "canonicalize explicit file", &error))?;
+        if !seen_paths.insert(canonical_path.clone()) {
+            continue;
+        }
+        let display_path = if absolute_input {
+            canonical_path
+                .strip_prefix(current_dir)
+                .map_or_else(|_| normalize_display_path(file), normalize_display_path)
+        } else {
+            normalize_display_path(file)
+        };
+        source_files.push(SourceFile {
+            path: canonical_path,
+            display_path,
+            extension,
+        });
+    }
+    Ok(source_files)
+}
+
+fn walk_directory(
+    root: &Path,
+    directory: &Path,
+    extensions: &[String],
+    source_files: &mut Vec<SourceFile>,
+) -> Result<()> {
+    let mut entries = fs::read_dir(directory)
+        .map_err(|error| CodeM8Error::io(directory, "read directory", &error))?
+        .collect::<std::result::Result<Vec<_>, _>>()
+        .map_err(|error| CodeM8Error::io(directory, "read directory entry", &error))?;
+    entries.sort_by(|left, right| {
+        left.file_name()
+            .to_string_lossy()
+            .cmp(&right.file_name().to_string_lossy())
+    });
+    for entry in entries {
+        let path = entry.path();
+        let file_type = entry
+            .file_type()
+            .map_err(|error| CodeM8Error::io(&path, "inspect path", &error))?;
+        if file_type.is_symlink() {
+            continue;
+        }
+        if file_type.is_dir() {
+            let directory_name = entry.file_name().to_string_lossy().to_ascii_lowercase();
+            if IGNORED_DIRECTORIES.contains(&directory_name.as_str()) {
+                continue;
+            }
+            walk_directory(root, &path, extensions, source_files)?;
+        } else if file_type.is_file() {
+            let Some(extension) = selected_extension(&path, extensions) else {
+                continue;
+            };
+            let display_path = path
+                .strip_prefix(root)
+                .map_or_else(|_| normalize_display_path(&path), normalize_display_path);
+            source_files.push(SourceFile {
+                path,
+                display_path,
+                extension,
+            });
+        }
+    }
+    Ok(())
+}
+
+fn selected_extension(path: &Path, extensions: &[String]) -> Option<String> {
+    let extension = path.extension()?.to_str()?.to_ascii_lowercase();
+    extensions
+        .iter()
+        .any(|selected| selected.eq_ignore_ascii_case(&extension))
+        .then_some(extension)
+}
+
+#[cfg(test)]
+mod tests {
+    use std::fs;
+    use std::sync::atomic::{AtomicUsize, Ordering};
+
+    use super::*;
+
+    static TEMP_COUNTER: AtomicUsize = AtomicUsize::new(0);
+
+    fn temp_dir(name: &str) -> PathBuf {
+        let id = TEMP_COUNTER.fetch_add(1, Ordering::Relaxed);
+        let path = std::env::temp_dir().join(format!(
+            "codem8-discovery-{name}-{}-{id}",
+            std::process::id()
+        ));
+        if path.exists() {
+            fs::remove_dir_all(&path).expect("remove stale test directory");
+        }
+        fs::create_dir_all(&path).expect("create test directory");
+        path
+    }
+
+    #[test]
+    fn recursively_discovers_matching_extensions_and_ignores_common_directories() {
+        let root = temp_dir("recursive");
+        fs::create_dir_all(root.join("src")).expect("create src");
+        fs::create_dir_all(root.join("target")).expect("create target");
+        fs::write(root.join("src").join("a.TS"), "").expect("write ts");
+        fs::write(root.join("src").join("b.js"), "").expect("write js");
+        fs::write(root.join("target").join("ignored.ts"), "").expect("write ignored");
+        let files = discover_source_files(&root, &["ts".to_string()], None).expect("discover");
+        assert_eq!(files.len(), 1);
+        assert_eq!(format_path(&files[0].display_path), "src/a.TS");
+        fs::remove_dir_all(root).expect("cleanup");
+    }
+
+    #[test]
+    fn explicit_files_skip_unselected_extensions() {
+        let root = temp_dir("explicit-skip");
+        fs::write(root.join("a.ts"), "").expect("write ts");
+        fs::write(root.join("b.js"), "").expect("write js");
+        let files = discover_source_files(
+            &root,
+            &["ts".to_string()],
+            Some(&[PathBuf::from("a.ts"), PathBuf::from("b.js")]),
+        )
+        .expect("discover");
+        assert_eq!(files.len(), 1);
+        assert_eq!(format_path(&files[0].display_path), "a.ts");
+        fs::remove_dir_all(root).expect("cleanup");
+    }
+
+    #[test]
+    fn explicit_files_deduplicate_resolved_paths() {
+        let root = temp_dir("explicit-dedup");
+        fs::write(root.join("a.ts"), "").expect("write ts");
+        let absolute = fs::canonicalize(root.join("a.ts")).expect("canonicalize ts");
+        let files = discover_source_files(
+            &root,
+            &["ts".to_string()],
+            Some(&[
+                PathBuf::from("a.ts"),
+                PathBuf::from(".").join("a.ts"),
+                absolute.clone(),
+            ]),
+        )
+        .expect("discover");
+        assert_eq!(files.len(), 1);
+        assert_eq!(files[0].path, absolute);
+        assert_eq!(format_path(&files[0].display_path), "a.ts");
+        fs::remove_dir_all(root).expect("cleanup");
+    }
+
+    #[test]
+    fn explicit_files_reject_directories() {
+        let root = temp_dir("explicit-directory");
+        fs::create_dir_all(root.join("src")).expect("create explicit directory");
+        let error =
+            discover_source_files(&root, &["ts".to_string()], Some(&[PathBuf::from("src")]))
+                .expect_err("directory explicit file fails");
+        assert!(error
+            .to_string()
+            .contains("explicit file is a directory: src"));
+        fs::remove_dir_all(root).expect("cleanup");
+    }
+}
diff --git a/src/duplicate.rs b/src/duplicate.rs
new file mode 100644
index 0000000..6414768
--- /dev/null
+++ b/src/duplicate.rs
@@ -0,0 +1,410 @@
+use std::cmp::Ordering;
+use std::collections::{BTreeSet, HashMap};
+use std::path::PathBuf;
+
+use crate::model::{DuplicateBlock, DuplicateOccurrence, LineEntry, LineStatus, ProcessedFile};
+use crate::paths::format_path;
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
+struct LineRef {
+    file_index: usize,
+    line_index: usize,
+}
+
+#[derive(Debug, Clone, PartialEq, Eq)]
+struct OccurrenceKey {
+    file_path: PathBuf,
+    file_path_key: String,
+    start_line: usize,
+    end_line: usize,
+}
+
+impl Ord for OccurrenceKey {
+    fn cmp(&self, other: &Self) -> Ordering {
+        self.file_path_key
+            .cmp(&other.file_path_key)
+            .then_with(|| self.start_line.cmp(&other.start_line))
+            .then_with(|| self.end_line.cmp(&other.end_line))
+    }
+}
+
+impl PartialOrd for OccurrenceKey {
+    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+        Some(self.cmp(other))
+    }
+}
+
+pub fn detect_duplicate_blocks(files: &[ProcessedFile]) -> Vec<DuplicateBlock> {
+    let mut line_index: HashMap<u128, Vec<LineRef>> = HashMap::new();
+    for (file_index, file) in files.iter().enumerate() {
+        for (line_index_in_file, line) in file.lines.iter().enumerate() {
+            line_index.entry(line.hash).or_default().push(LineRef {
+                file_index,
+                line_index: line_index_in_file,
+            });
+        }
+    }
+    let mut blocks_by_lines: HashMap<Vec<String>, BTreeSet<OccurrenceKey>> = HashMap::new();
+    for refs in line_index.values() {
+        if refs.len() < 2 {
+            continue;
+        }
+        let mut comparison_refs_by_text: HashMap<String, Vec<LineRef>> = HashMap::new();
+        for line_ref in refs {
+            let line = line_at(files, *line_ref);
+            if line.status != LineStatus::Comparison {
+                continue;
+            }
+            comparison_refs_by_text
+                .entry(line.normalized_text.clone())
+                .or_default()
+                .push(*line_ref);
+        }
+        for comparison_refs in comparison_refs_by_text.values() {
+            if comparison_refs.len() < 2 {
+                continue;
+            }
+            collect_candidates(files, comparison_refs, &mut blocks_by_lines);
+        }
+    }
+    let mut duplicate_blocks = blocks_by_lines
+        .into_iter()
+        .filter_map(|(normalized_lines, occurrences)| {
+            if normalized_lines.is_empty() || occurrences.len() < 2 {
+                return None;
+            }
+            let occurrences = occurrences
+                .into_iter()
+                .map(|occurrence| DuplicateOccurrence {
+                    file_path: occurrence.file_path,
+                    start_line: occurrence.start_line,
+                    end_line: occurrence.end_line,
+                })
+                .collect::<Vec<_>>();
+            let character_count = normalized_lines
+                .iter()
+                .map(|line| line.chars().count() as u64)
+                .sum::<u64>();
+            let weight =
+                (occurrences.len() as u64 - 1) * normalized_lines.len() as u64 * character_count;
+            Some(DuplicateBlock {
+                normalized_lines,
+                occurrences,
+                weight,
+            })
+        })
+        .collect::<Vec<_>>();
+    duplicate_blocks.sort_by(compare_duplicate_blocks);
+    duplicate_blocks
+}
+
+#[derive(Debug)]
+struct CandidateBlock {
+    normalized_lines: Vec<String>,
+    left_occurrence: OccurrenceKey,
+    right_occurrence: OccurrenceKey,
+}
+
+fn collect_candidates(
+    files: &[ProcessedFile],
+    comparison_refs: &[LineRef],
+    blocks_by_lines: &mut HashMap<Vec<String>, BTreeSet<OccurrenceKey>>,
+) {
+    for left_index in 0..comparison_refs.len() {
+        let left = comparison_refs[left_index];
+        for &right in &comparison_refs[(left_index + 1)..] {
+            let Some(candidate) = expand_pair(files, left, right) else {
+                continue;
+            };
+            store_candidate(candidate, blocks_by_lines);
+        }
+    }
+}
+
+fn store_candidate(
+    candidate: CandidateBlock,
+    blocks_by_lines: &mut HashMap<Vec<String>, BTreeSet<OccurrenceKey>>,
+) {
+    let occurrences = blocks_by_lines
+        .entry(candidate.normalized_lines)
+        .or_default();
+    occurrences.insert(candidate.left_occurrence);
+    occurrences.insert(candidate.right_occurrence);
+}
+
+fn expand_pair(files: &[ProcessedFile], left: LineRef, right: LineRef) -> Option<CandidateBlock> {
+    if left == right {
+        return None;
+    }
+    let mut left_start = left.line_index;
+    let mut right_start = right.line_index;
+    while left_start > 0
+        && right_start > 0
+        && line_text(files, left.file_index, left_start - 1)
+            == line_text(files, right.file_index, right_start - 1)
+    {
+        left_start -= 1;
+        right_start -= 1;
+    }
+    let mut left_end = left.line_index;
+    let mut right_end = right.line_index;
+    while left_end + 1 < files[left.file_index].lines.len()
+        && right_end + 1 < files[right.file_index].lines.len()
+        && line_text(files, left.file_index, left_end + 1)
+            == line_text(files, right.file_index, right_end + 1)
+    {
+        left_end += 1;
+        right_end += 1;
+    }
+    if left.file_index == right.file_index && left_start <= right_end && right_start <= left_end {
+        return None;
+    }
+    let normalized_lines = files[left.file_index].lines[left_start..=left_end]
+        .iter()
+        .map(|line| line.normalized_text.clone())
+        .collect::<Vec<_>>();
+    Some(CandidateBlock {
+        normalized_lines,
+        left_occurrence: occurrence_for(files, left.file_index, left_start, left_end),
+        right_occurrence: occurrence_for(files, right.file_index, right_start, right_end),
+    })
+}
+
+fn occurrence_for(
+    files: &[ProcessedFile],
+    file_index: usize,
+    start_index: usize,
+    end_index: usize,
+) -> OccurrenceKey {
+    let lines = &files[file_index].lines;
+    let file_path = files[file_index].source.display_path.clone();
+    OccurrenceKey {
+        file_path_key: format_path(&file_path),
+        file_path,
+        start_line: lines[start_index].line_number,
+        end_line: lines[end_index].line_number,
+    }
+}
+
+fn line_at(files: &[ProcessedFile], line_ref: LineRef) -> &LineEntry {
+    &files[line_ref.file_index].lines[line_ref.line_index]
+}
+
+fn line_text(files: &[ProcessedFile], file_index: usize, line_index: usize) -> &str {
+    &files[file_index].lines[line_index].normalized_text
+}
+
+fn compare_duplicate_blocks(left: &DuplicateBlock, right: &DuplicateBlock) -> Ordering {
+    right
+        .weight
+        .cmp(&left.weight)
+        .then_with(|| right.line_count().cmp(&left.line_count()))
+        .then_with(|| right.character_count().cmp(&left.character_count()))
+        .then_with(|| first_occurrence_key(left).cmp(&first_occurrence_key(right)))
+        .then_with(|| first_occurrence_start_line(left).cmp(&first_occurrence_start_line(right)))
+        .then_with(|| normalized_block_text(left).cmp(&normalized_block_text(right)))
+}
+
+fn first_occurrence_key(block: &DuplicateBlock) -> String {
+    block
+        .occurrences
+        .first()
+        .map(|occurrence| format_path(&occurrence.file_path))
+        .unwrap_or_default()
+}
+
+fn first_occurrence_start_line(block: &DuplicateBlock) -> usize {
+    block
+        .occurrences
+        .first()
+        .map(|occurrence| occurrence.start_line)
+        .unwrap_or_default()
+}
+
+fn normalized_block_text(block: &DuplicateBlock) -> String {
+    block.normalized_lines.join("\n")
+}
+
+#[cfg(test)]
+mod tests {
+    use crate::language::hash_normalized_line;
+    use crate::model::{LineEntry, ProcessedFile, SourceFile};
+
+    use super::*;
+
+    fn processed_file(path: &str, extension: &str, lines: &[(&str, LineStatus)]) -> ProcessedFile {
+        let line_entries = lines
+            .iter()
+            .enumerate()
+            .map(|(index, (text, status))| LineEntry {
+                file_path: PathBuf::from(path),
+                line_number: index + 1,
+                normalized_text: (*text).to_string(),
+                hash: hash_normalized_line(text),
+                status: *status,
+            })
+            .collect();
+        ProcessedFile {
+            source: SourceFile {
+                path: PathBuf::from(path),
+                display_path: PathBuf::from(path),
+                extension: extension.to_string(),
+            },
+            lines: line_entries,
+        }
+    }
+
+    #[test]
+    fn groups_three_occurrences_of_the_same_block() {
+        let files = vec![
+            processed_file(
+                "a.ts",
+                "ts",
+                &[
+                    ("const value = one;", LineStatus::Comparison),
+                    ("return value;", LineStatus::Comparison),
+                ],
+            ),
+            processed_file(
+                "b.ts",
+                "ts",
+                &[
+                    ("const value = one;", LineStatus::Comparison),
+                    ("return value;", LineStatus::Comparison),
+                ],
+            ),
+            processed_file(
+                "c.ts",
+                "ts",
+                &[
+                    ("const value = one;", LineStatus::Comparison),
+                    ("return value;", LineStatus::Comparison),
+                ],
+            ),
+        ];
+        let blocks = detect_duplicate_blocks(&files);
+        assert_eq!(blocks.len(), 1);
+        assert_eq!(blocks[0].occurrences.len(), 3);
+        assert_eq!(
+            blocks[0].normalized_lines,
+            ["const value = one;", "return value;"]
+        );
+    }
+
+    #[test]
+    fn ignores_matching_hashes_with_different_text() {
+        let mut files = vec![
+            processed_file(
+                "a.ts",
+                "ts",
+                &[("const value = one;", LineStatus::Comparison)],
+            ),
+            processed_file(
+                "b.ts",
+                "ts",
+                &[("const value = two;", LineStatus::Comparison)],
+            ),
+        ];
+        files[1].lines[0].hash = files[0].lines[0].hash;
+        let blocks = detect_duplicate_blocks(&files);
+        assert!(blocks.is_empty());
+    }
+
+    #[test]
+    fn sorts_duplicate_blocks_by_weight() {
+        let files = vec![
+            processed_file(
+                "a.ts",
+                "ts",
+                &[
+                    ("const x = 1;", LineStatus::Comparison),
+                    ("const uniqueA = true;", LineStatus::Comparison),
+                    ("const longerValue = computeOne();", LineStatus::Comparison),
+                    ("return longerValue;", LineStatus::Comparison),
+                ],
+            ),
+            processed_file(
+                "b.ts",
+                "ts",
+                &[
+                    ("const x = 1;", LineStatus::Comparison),
+                    ("const uniqueB = true;", LineStatus::Comparison),
+                    ("const longerValue = computeOne();", LineStatus::Comparison),
+                    ("return longerValue;", LineStatus::Comparison),
+                ],
+            ),
+        ];
+        let blocks = detect_duplicate_blocks(&files);
+        assert!(blocks.len() >= 2);
+        assert_eq!(
+            blocks[0].normalized_lines,
+            ["const longerValue = computeOne();", "return longerValue;"]
+        );
+        assert!(blocks[0].weight >= blocks[1].weight);
+    }
+
+    #[test]
+    fn ignores_single_line_duplicates_that_are_only_block_only_lines() {
+        let files = vec![
+            processed_file("a.ts", "ts", &[("}", LineStatus::BlockOnly)]),
+            processed_file("b.ts", "ts", &[("}", LineStatus::BlockOnly)]),
+        ];
+        let blocks = detect_duplicate_blocks(&files);
+        assert!(blocks.is_empty());
+    }
+
+    #[test]
+    fn includes_block_only_lines_inside_larger_duplicate_blocks() {
+        let files = vec![
+            processed_file(
+                "a.ts",
+                "ts",
+                &[
+                    ("if (ready) {", LineStatus::Comparison),
+                    ("}", LineStatus::BlockOnly),
+                    ("return value;", LineStatus::Comparison),
+                ],
+            ),
+            processed_file(
+                "b.ts",
+                "ts",
+                &[
+                    ("if (ready) {", LineStatus::Comparison),
+                    ("}", LineStatus::BlockOnly),
+                    ("return value;", LineStatus::Comparison),
+                ],
+            ),
+        ];
+        let blocks = detect_duplicate_blocks(&files);
+        assert_eq!(blocks.len(), 1);
+        assert_eq!(
+            blocks[0].normalized_lines,
+            ["if (ready) {", "}", "return value;"]
+        );
+    }
+
+    #[test]
+    fn rejects_overlapping_duplicate_ranges_in_the_same_file() {
+        let files = vec![processed_file(
+            "a.ts",
+            "ts",
+            &[
+                ("const value = one;", LineStatus::Comparison),
+                ("const value = one;", LineStatus::Comparison),
+                ("const value = one;", LineStatus::Comparison),
+            ],
+        )];
+        let blocks = detect_duplicate_blocks(&files);
+        assert!(!blocks.iter().any(|block| {
+            block.normalized_lines == ["const value = one;", "const value = one;"]
+                && block
+                    .occurrences
+                    .iter()
+                    .any(|occurrence| occurrence.start_line == 1)
+                && block
+                    .occurrences
+                    .iter()
+                    .any(|occurrence| occurrence.start_line == 2)
+        }));
+    }
+}
diff --git a/src/error.rs b/src/error.rs
new file mode 100644
index 0000000..bcfe20b
--- /dev/null
+++ b/src/error.rs
@@ -0,0 +1,50 @@
+use std::error::Error;
+use std::fmt;
+use std::io;
+use std::path::Path;
+
+use crate::paths::format_path;
+
+pub type Result<T> = std::result::Result<T, CodeM8Error>;
+
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct CodeM8Error {
+    message: String,
+    show_help: bool,
+}
+
+impl CodeM8Error {
+    #[must_use]
+    pub fn new(message: impl Into<String>) -> Self {
+        Self {
+            message: message.into(),
+            show_help: false,
+        }
+    }
+
+    #[must_use]
+    pub fn with_help(message: impl Into<String>) -> Self {
+        Self {
+            message: message.into(),
+            show_help: true,
+        }
+    }
+
+    #[must_use]
+    pub fn io(path: &Path, action: &str, error: &io::Error) -> Self {
+        Self::new(format!("could not {action} {}: {error}", format_path(path)))
+    }
+
+    #[must_use]
+    pub const fn should_show_help(&self) -> bool {
+        self.show_help
+    }
+}
+
+impl fmt::Display for CodeM8Error {
+    fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
+        formatter.write_str(&self.message)
+    }
+}
+
+impl Error for CodeM8Error {}
diff --git a/src/git.rs b/src/git.rs
new file mode 100644
index 0000000..965f0c9
--- /dev/null
+++ b/src/git.rs
@@ -0,0 +1,284 @@
+use std::collections::BTreeSet;
+use std::fs;
+use std::path::{Path, PathBuf};
+use std::process::{Command, Output};
+
+use crate::error::{CodeM8Error, Result};
+
+/// Lists files changed on the current branch compared to the origin base branch.
+///
+/// # Errors
+///
+/// Returns an error when `current_dir` is not inside a Git repository, the
+/// current branch cannot be resolved, or the origin base branch is missing.
+pub fn changed_files_against_origin(current_dir: &Path) -> Result<Vec<PathBuf>> {
+    let repo_root = repo_root(current_dir)?;
+    ensure_named_branch(&repo_root)?;
+    let origin_ref = origin_base_ref(&repo_root)?;
+    let merge_base = run_git_text(
+        &repo_root,
+        &["merge-base", &origin_ref, "HEAD"],
+        "find merge base with origin base branch",
+    )?;
+    let mut paths = BTreeSet::new();
+    collect_nul_paths(
+        &repo_root,
+        &[
+            "diff",
+            "--name-only",
+            "-z",
+            "--diff-filter=ACMRTUXB",
+            merge_base.trim(),
+            "HEAD",
+        ],
+        &mut paths,
+    )?;
+    collect_nul_paths(
+        &repo_root,
+        &[
+            "diff",
+            "--name-only",
+            "-z",
+            "--cached",
+            "--diff-filter=ACMRTUXB",
+        ],
+        &mut paths,
+    )?;
+    collect_nul_paths(
+        &repo_root,
+        &["diff", "--name-only", "-z", "--diff-filter=ACMRTUXB"],
+        &mut paths,
+    )?;
+    collect_nul_paths(
+        &repo_root,
+        &["ls-files", "--others", "--exclude-standard", "-z"],
+        &mut paths,
+    )?;
+    Ok(paths
+        .into_iter()
+        .filter_map(|path| existing_file_path(&repo_root, current_dir, &path))
+        .collect())
+}
+
+fn repo_root(current_dir: &Path) -> Result<PathBuf> {
+    let output = run_git_output(
+        current_dir,
+        &["rev-parse", "--show-toplevel"],
+        "find git repository",
+    )?;
+    if !output.status.success() {
+        return Err(CodeM8Error::new(
+            "git branch mode requires the current directory to be inside a git repository",
+        ));
+    }
+    let root = output_text(output.stdout, "parse git repository root")?;
+    Ok(PathBuf::from(root.trim()))
+}
+
+fn ensure_named_branch(repo_root: &Path) -> Result<()> {
+    let branch = run_git_text(
+        repo_root,
+        &["rev-parse", "--abbrev-ref", "HEAD"],
+        "determine current git branch",
+    )?;
+    let branch = branch.trim();
+    if branch == "HEAD" {
+        return Err(CodeM8Error::new(
+            "git branch mode requires a named local branch, but HEAD is detached",
+        ));
+    }
+    Ok(())
+}
+
+fn origin_base_ref(repo_root: &Path) -> Result<String> {
+    for candidate in ["origin/HEAD", "origin/main", "origin/master"] {
+        if verify_origin_ref(repo_root, candidate) {
+            return Ok(candidate.to_string());
+        }
+    }
+    Err(CodeM8Error::new(
+        "git branch mode could not resolve origin base branch",
+    ))
+}
+
+fn verify_origin_ref(repo_root: &Path, origin_ref: &str) -> bool {
+    let commit_ref = format!("{origin_ref}^{{commit}}");
+    run_git_output(
+        repo_root,
+        &["rev-parse", "--verify", &commit_ref],
+        "resolve origin base branch",
+    )
+    .is_ok_and(|output| output.status.success())
+}
+
+fn collect_nul_paths(repo_root: &Path, args: &[&str], paths: &mut BTreeSet<PathBuf>) -> Result<()> {
+    let output = run_git_output(repo_root, args, "list changed git files")?;
+    let stdout = ensure_git_success(output, "list changed git files")?;
+    for path in nul_paths(&stdout) {
+        paths.insert(path);
+    }
+    Ok(())
+}
+
+fn existing_file_path(repo_root: &Path, current_dir: &Path, path: &Path) -> Option<PathBuf> {
+    let absolute = repo_root.join(path);
+    let metadata = fs::symlink_metadata(&absolute).ok()?;
+    if !metadata.is_file() || metadata.file_type().is_symlink() {
+        return None;
+    }
+    let relative = absolute.strip_prefix(current_dir).map(Path::to_path_buf);
+    Some(relative.unwrap_or(absolute))
+}
+
+fn run_git_text(current_dir: &Path, args: &[&str], action: &str) -> Result<String> {
+    let output = run_git_output(current_dir, args, action)?;
+    let stdout = ensure_git_success(output, action)?;
+    output_text(stdout, action)
+}
+
+fn run_git_output(current_dir: &Path, args: &[&str], action: &str) -> Result<Output> {
+    Command::new("git")
+        .arg("-C")
+        .arg(current_dir)
+        .args(args)
+        .output()
+        .map_err(|error| CodeM8Error::new(format!("could not {action}: {error}")))
+}
+
+fn ensure_git_success(output: Output, action: &str) -> Result<Vec<u8>> {
+    if output.status.success() {
+        return Ok(output.stdout);
+    }
+    let stderr = output_text(output.stderr, action)?;
+    Err(CodeM8Error::new(format!(
+        "could not {action}: {}",
+        stderr.trim()
+    )))
+}
+
+fn output_text(bytes: Vec<u8>, action: &str) -> Result<String> {
+    String::from_utf8(bytes)
+        .map_err(|error| CodeM8Error::new(format!("could not {action}: {error}")))
+}
+
+fn nul_paths(bytes: &[u8]) -> Vec<PathBuf> {
+    String::from_utf8_lossy(bytes)
+        .split('\0')
+        .filter(|path| !path.is_empty())
+        .map(PathBuf::from)
+        .collect()
+}
+
+#[cfg(test)]
+mod tests {
+    use std::process::Command;
+    use std::sync::atomic::{AtomicUsize, Ordering};
+
+    use super::*;
+
+    static TEMP_COUNTER: AtomicUsize = AtomicUsize::new(0);
+
+    struct TempGitRepo {
+        path: PathBuf,
+    }
+
+    impl TempGitRepo {
+        fn new(name: &str) -> Self {
+            let id = TEMP_COUNTER.fetch_add(1, Ordering::Relaxed);
+            let path =
+                std::env::temp_dir().join(format!("codem8-git-{name}-{}-{id}", std::process::id()));
+            if path.exists() {
+                fs::remove_dir_all(&path).expect("remove stale test directory");
+            }
+            fs::create_dir_all(&path).expect("create test directory");
+            Self { path }
+        }
+
+        fn path(&self) -> &Path {
+            &self.path
+        }
+
+        fn write(&self, relative_path: &str, contents: &str) {
+            let path = self.path.join(relative_path);
+            if let Some(parent) = path.parent() {
+                fs::create_dir_all(parent).expect("create parent directory");
+            }
+            fs::write(path, contents).expect("write test file");
+        }
+
+        fn git(&self, args: &[&str]) {
+            let status = Command::new("git")
+                .arg("-C")
+                .arg(&self.path)
+                .args(args)
+                .status()
+                .expect("run git");
+            assert!(status.success(), "git command failed: {args:?}");
+        }
+
+        fn commit(&self, message: &str) {
+            self.git(&["add", "."]);
+            self.git(&[
+                "-c",
+                "user.name=CodeM8 Test",
+                "-c",
+                "user.email=codem8@example.invalid",
+                "commit",
+                "-m",
+                message,
+            ]);
+        }
+    }
+
+    impl Drop for TempGitRepo {
+        fn drop(&mut self) {
+            let _ = fs::remove_dir_all(&self.path);
+        }
+    }
+
+    fn git_is_available() -> bool {
+        Command::new("git")
+            .arg("--version")
+            .status()
+            .is_ok_and(|status| status.success())
+    }
+
+    #[test]
+    fn rejects_non_git_directory() {
+        let repo = TempGitRepo::new("non-repo");
+        let error = changed_files_against_origin(repo.path()).expect_err("non-repo fails");
+        assert!(error.to_string().contains("requires the current directory"));
+    }
+
+    #[test]
+    fn lists_committed_staged_unstaged_and_untracked_files() {
+        if !git_is_available() {
+            return;
+        }
+        let repo = TempGitRepo::new("changes");
+        repo.git(&["init"]);
+        repo.write("src/base.ts", "const value = one;\n");
+        repo.write("src/deleted.ts", "const value = deleted;\n");
+        repo.commit("initial");
+        repo.git(&["update-ref", "refs/remotes/origin/main", "HEAD"]);
+        repo.git(&["branch", "-M", "feature"]);
+        repo.write("src/committed.ts", "const value = committed;\n");
+        repo.commit("branch change");
+        repo.git(&["update-ref", "refs/remotes/origin/feature", "HEAD"]);
+        repo.write("src/staged.ts", "const value = staged;\n");
+        repo.git(&["add", "src/staged.ts"]);
+        repo.write("src/base.ts", "const value = modified;\n");
+        repo.write("src/untracked.ts", "const value = untracked;\n");
+        fs::remove_file(repo.path().join("src/deleted.ts")).expect("delete tracked file");
+        let files = changed_files_against_origin(repo.path()).expect("list branch files");
+        assert_eq!(
+            files,
+            [
+                PathBuf::from("src/base.ts"),
+                PathBuf::from("src/committed.ts"),
+                PathBuf::from("src/staged.ts"),
+                PathBuf::from("src/untracked.ts"),
+            ]
+        );
+    }
+}
diff --git a/src/language.rs b/src/language.rs
new file mode 100644
index 0000000..636c8b1
--- /dev/null
+++ b/src/language.rs
@@ -0,0 +1,355 @@
+use std::collections::HashMap;
+use std::sync::OnceLock;
+
+use crate::model::LineStatus;
+use regex::Regex;
+
+#[derive(Debug, Clone, Copy)]
+pub struct LanguageLinePattern {
+    pub language_name: &'static str,
+    pub extensions: &'static [&'static str],
+    pub duplicate_mitigation_pattern: &'static [char],
+    pub duplicate_mitigation_lines: &'static [&'static str],
+    pub duplicate_mitigation_regexps: &'static [&'static str],
+}
+
+pub const LANGUAGE_PATTERNS: &[LanguageLinePattern] = &[
+    LanguageLinePattern {
+        language_name: "TypeScript / JavaScript",
+        extensions: &["ts", "tsx", "js", "jsx", "mjs", "cjs"],
+        duplicate_mitigation_pattern: &['(', ')', ',', ':', ';', '<', '>', '?', '[', ']', '{', '}'],
+        duplicate_mitigation_lines: &[],
+        duplicate_mitigation_regexps: &[],
+    },
+    LanguageLinePattern {
+        language_name: "Rust",
+        extensions: &["rs"],
+        duplicate_mitigation_pattern: &['(', ')', ',', ':', ';', '<', '>', '?', '[', ']', '{', '}'],
+        duplicate_mitigation_lines: &["///"],
+        duplicate_mitigation_regexps: &[
+            // Excludes short path or enum variant fragments. Example: Self::Ready,
+            r"^[A-Za-z0-9_]*::?\s*[A-Za-z0-9_]*[,]?$",
+            // Excludes bare identifiers with optional punctuation. Example: value,
+            r"^[A-Za-z0-9_]+\s*[.,]?$",
+            // Excludes simple method or field access lines. Example: .clone()
+            r"^\.?\s*[A-Za-z0-9_]+(?:\(\s*\)?)?$",
+            // Excludes incomplete let bindings split across lines. Example: let value =
+            r"^let\s+(?:mut\s+)?[A-Za-z0-9_]+\s*=$",
+            // Excludes simple public struct field declarations. Example: pub name: String,
+            r"^pub\s+[A-Za-z0-9_]*\s*:\s*[A-Za-z0-9_]*[,]?$",
+            // Excludes single-path use imports. Example: use crate::module;
+            r"^use\s+[A-Za-z_][A-Za-z0-9_]*(?:::[A-Za-z_][A-Za-z0-9_]*)*;$",
+        ],
+    },
+    LanguageLinePattern {
+        language_name: "C / C++ / Objective-C",
+        extensions: &["c", "h", "cpp", "hpp", "cc", "hh", "cxx", "hxx", "m", "mm"],
+        duplicate_mitigation_pattern: &['(', ')', ',', ':', ';', '<', '>', '?', '[', ']', '{', '}'],
+        duplicate_mitigation_lines: &["#else", "#endif"],
+        duplicate_mitigation_regexps: &[],
+    },
+    LanguageLinePattern {
+        language_name: "C#",
+        extensions: &["cs"],
+        duplicate_mitigation_pattern: &['(', ')', ',', ':', ';', '<', '>', '?', '[', ']', '{', '}'],
+        duplicate_mitigation_lines: &["#else", "#endif", "#endregion"],
+        duplicate_mitigation_regexps: &[],
+    },
+    LanguageLinePattern {
+        language_name: "Java / Kotlin / Scala",
+        extensions: &["java", "kt", "kts", "scala", "sc"],
+        duplicate_mitigation_pattern: &['(', ')', ',', ':', ';', '<', '>', '?', '[', ']', '{', '}'],
+        duplicate_mitigation_lines: &[],
+        duplicate_mitigation_regexps: &[],
+    },
+    LanguageLinePattern {
+        language_name: "Go",
+        extensions: &["go"],
+        duplicate_mitigation_pattern: &['(', ')', ',', ':', ';', '[', ']', '{', '}'],
+        duplicate_mitigation_lines: &[],
+        duplicate_mitigation_regexps: &[],
+    },
+    LanguageLinePattern {
+        language_name: "Python",
+        extensions: &["py", "pyw"],
+        duplicate_mitigation_pattern: &['(', ')', ',', ':', ';', '[', ']', '{', '}'],
+        duplicate_mitigation_lines: &[],
+        duplicate_mitigation_regexps: &[],
+    },
+    LanguageLinePattern {
+        language_name: "Ruby",
+        extensions: &["rb"],
+        duplicate_mitigation_pattern: &['(', ')', ',', ':', ';', '?', '[', ']', '{', '}'],
+        duplicate_mitigation_lines: &["end"],
+        duplicate_mitigation_regexps: &[],
+    },
+    LanguageLinePattern {
+        language_name: "PHP",
+        extensions: &["php", "phtml"],
+        duplicate_mitigation_pattern: &[
+            '(', ')', ',', '/', ':', ';', '<', '>', '?', '[', ']', '{', '}',
+        ],
+        duplicate_mitigation_lines: &[],
+        duplicate_mitigation_regexps: &[],
+    },
+    LanguageLinePattern {
+        language_name: "Swift",
+        extensions: &["swift"],
+        duplicate_mitigation_pattern: &['(', ')', ',', ':', ';', '<', '>', '?', '[', ']', '{', '}'],
+        duplicate_mitigation_lines: &[],
+        duplicate_mitigation_regexps: &[],
+    },
+    LanguageLinePattern {
+        language_name: "Shell",
+        extensions: &["sh", "bash", "zsh", "fish"],
+        duplicate_mitigation_pattern: &['&', '(', ')', ';', '[', ']', '{', '|', '}'],
+        duplicate_mitigation_lines: &["do", "done", "else", "fi", "then"],
+        duplicate_mitigation_regexps: &[],
+    },
+    LanguageLinePattern {
+        language_name: "PowerShell",
+        extensions: &["ps1", "psm1", "psd1"],
+        duplicate_mitigation_pattern: &['(', ')', ',', ':', ';', '?', '[', ']', '{', '|', '}'],
+        duplicate_mitigation_lines: &[],
+        duplicate_mitigation_regexps: &[],
+    },
+    LanguageLinePattern {
+        language_name: "HTML / XML",
+        extensions: &["html", "htm", "xml", "xhtml", "svg"],
+        duplicate_mitigation_pattern: &['/', '<', '>'],
+        duplicate_mitigation_lines: &[
+            "</article>",
+            "</body>",
+            "</div>",
+            "</html>",
+            "</section>",
+            "</span>",
+        ],
+        duplicate_mitigation_regexps: &[],
+    },
+    LanguageLinePattern {
+        language_name: "CSS / SCSS / Sass / Less",
+        extensions: &["css", "scss", "sass", "less"],
+        duplicate_mitigation_pattern: &['(', ')', ',', ':', ';', '[', ']', '{', '}'],
+        duplicate_mitigation_lines: &[],
+        duplicate_mitigation_regexps: &[],
+    },
+    LanguageLinePattern {
+        language_name: "SQL",
+        extensions: &["sql"],
+        duplicate_mitigation_pattern: &['(', ')', ',', ':', ';'],
+        duplicate_mitigation_lines: &["BEGIN", "END"],
+        duplicate_mitigation_regexps: &[],
+    },
+    LanguageLinePattern {
+        language_name: "YAML / JSON / TOML",
+        extensions: &["yaml", "yml", "json", "toml"],
+        duplicate_mitigation_pattern: &['(', ')', ',', ':', ';', '<', '>', '?', '[', ']', '{', '}'],
+        duplicate_mitigation_lines: &["jobs:", "on:"],
+        duplicate_mitigation_regexps: &[],
+    },
+];
+
+#[must_use]
+pub fn supported_file_extensions() -> Vec<String> {
+    let mut extensions = Vec::new();
+    for language in LANGUAGE_PATTERNS {
+        for &extension in language.extensions {
+            if !extensions.iter().any(|selected| selected == extension) {
+                extensions.push(extension.to_string());
+            }
+        }
+    }
+    extensions
+}
+
+#[derive(Debug)]
+struct DuplicateMitigationLineRegistry {
+    by_extension: HashMap<&'static str, DuplicateMitigationPatterns>,
+}
+
+#[derive(Debug, Default)]
+struct DuplicateMitigationPatterns {
+    lines_by_hash: HashMap<u128, Vec<&'static str>>,
+    character_pattern: Vec<char>,
+    regexps: Vec<Regex>,
+}
+
+static DUPLICATE_MITIGATION_LINE_REGISTRY: OnceLock<DuplicateMitigationLineRegistry> =
+    OnceLock::new();
+
+#[must_use]
+pub fn hash_normalized_line(line: &str) -> u128 {
+    xxhash_rust::xxh3::xxh3_128(line.as_bytes())
+}
+
+#[must_use]
+pub fn classify_line(extension: &str, normalized_line: &str, hash: u128) -> LineStatus {
+    let extension = extension.to_ascii_lowercase();
+    let Some(patterns) = registry().by_extension.get(extension.as_str()) else {
+        return LineStatus::Comparison;
+    };
+    if patterns.matches_line(normalized_line, hash) {
+        LineStatus::BlockOnly
+    } else {
+        LineStatus::Comparison
+    }
+}
+
+fn registry() -> &'static DuplicateMitigationLineRegistry {
+    DUPLICATE_MITIGATION_LINE_REGISTRY.get_or_init(|| {
+        let mut by_extension: HashMap<&'static str, DuplicateMitigationPatterns> = HashMap::new();
+        for language in LANGUAGE_PATTERNS {
+            for extension in language.extensions {
+                let patterns = by_extension.entry(extension).or_default();
+                register_duplicate_mitigation_lines(
+                    &mut patterns.lines_by_hash,
+                    language.duplicate_mitigation_lines,
+                );
+                register_duplicate_mitigation_pattern(
+                    &mut patterns.character_pattern,
+                    language.duplicate_mitigation_pattern,
+                );
+                register_duplicate_mitigation_regexps(
+                    &mut patterns.regexps,
+                    language.duplicate_mitigation_regexps,
+                );
+            }
+        }
+        DuplicateMitigationLineRegistry { by_extension }
+    })
+}
+
+impl DuplicateMitigationPatterns {
+    fn matches_line(&self, normalized_line: &str, hash: u128) -> bool {
+        self.matches_registered_line(normalized_line, hash)
+            || matches_duplicate_mitigation_pattern(normalized_line, &self.character_pattern)
+            || matches_duplicate_mitigation_regexps(normalized_line, &self.regexps)
+    }
+
+    fn matches_registered_line(&self, normalized_line: &str, hash: u128) -> bool {
+        self.lines_by_hash
+            .get(&hash)
+            .is_some_and(|patterns| patterns.contains(&normalized_line))
+    }
+}
+
+fn register_duplicate_mitigation_lines(
+    patterns_by_hash: &mut HashMap<u128, Vec<&'static str>>,
+    lines: &'static [&'static str],
+) {
+    for &line in lines {
+        patterns_by_hash
+            .entry(hash_normalized_line(line))
+            .or_default()
+            .push(line);
+    }
+}
+
+fn register_duplicate_mitigation_pattern(
+    character_pattern: &mut Vec<char>,
+    characters: &'static [char],
+) {
+    for &character in characters {
+        if !character_pattern.contains(&character) {
+            character_pattern.push(character);
+        }
+    }
+}
+
+fn register_duplicate_mitigation_regexps(
+    regexps: &mut Vec<Regex>,
+    patterns: &'static [&'static str],
+) {
+    for &pattern in patterns {
+        if !regexps.iter().any(|regexp| regexp.as_str() == pattern) {
+            regexps.push(Regex::new(pattern).expect("duplicate mitigation regexp must compile"));
+        }
+    }
+}
+
+fn matches_duplicate_mitigation_pattern(line: &str, character_pattern: &[char]) -> bool {
+    !character_pattern.is_empty()
+        && line
+            .chars()
+            .all(|character| character.is_whitespace() || character_pattern.contains(&character))
+}
+
+fn matches_duplicate_mitigation_regexps(line: &str, regexps: &[Regex]) -> bool {
+    regexps.iter().any(|regexp| {
+        regexp
+            .find(line)
+            .is_some_and(|matched| matched.start() == 0 && matched.end() == line.len())
+    })
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn assigns_block_only_status_from_extension_specific_line_registry() {
+        let line = ".into_iter()";
+        let hash = hash_normalized_line(line);
+        assert_eq!(classify_line("rs", line, hash), LineStatus::BlockOnly);
+    }
+
+    #[test]
+    fn assigns_comparison_status_for_meaningful_lines() {
+        let line = "const value = computeValue(input);";
+        let hash = hash_normalized_line(line);
+        assert_eq!(classify_line("ts", line, hash), LineStatus::Comparison);
+    }
+
+    #[test]
+    fn verifies_text_after_hash_lookup() {
+        let hash = hash_normalized_line("}");
+        assert_eq!(
+            classify_line("ts", "not-a-brace", hash),
+            LineStatus::Comparison
+        );
+    }
+
+    #[test]
+    fn assigns_block_only_status_from_character_pattern() {
+        let line = "} \t);";
+        let hash = hash_normalized_line(line);
+        assert_eq!(classify_line("ts", line, hash), LineStatus::BlockOnly);
+    }
+
+    #[test]
+    fn assigns_block_only_status_from_regexps() {
+        let line = ".update()";
+        let hash = hash_normalized_line(line);
+        assert_eq!(classify_line("rs", line, hash), LineStatus::BlockOnly);
+    }
+
+    #[test]
+    fn regexps_must_match_the_full_line() {
+        let line = ".update()?.await";
+        let hash = hash_normalized_line(line);
+        assert_eq!(classify_line("rs", line, hash), LineStatus::Comparison);
+    }
+
+    #[test]
+    fn ignores_character_pattern_for_unknown_extensions() {
+        let line = "});";
+        let hash = hash_normalized_line(line);
+        assert_eq!(classify_line("unknown", line, hash), LineStatus::Comparison);
+    }
+
+    #[test]
+    fn empty_character_pattern_does_not_match() {
+        assert!(!matches_duplicate_mitigation_pattern("}", &[]));
+    }
+
+    #[test]
+    fn collects_supported_file_extensions_from_language_patterns() {
+        let extensions = supported_file_extensions();
+        for language in LANGUAGE_PATTERNS {
+            for extension in language.extensions {
+                assert!(extensions.iter().any(|selected| selected == extension));
+            }
+        }
+    }
+}
diff --git a/src/lib.rs b/src/lib.rs
new file mode 100644
index 0000000..6656221
--- /dev/null
+++ b/src/lib.rs
@@ -0,0 +1,245 @@
+pub mod cli;
+pub mod discovery;
+pub mod duplicate;
+pub mod error;
+pub mod git;
+pub mod language;
+pub mod line;
+pub mod model;
+pub mod paths;
+pub mod report;
+
+use std::io::Write;
+use std::path::Path;
+
+use crate::error::{CodeM8Error, Result};
+
+/// Runs the CLI workflow and writes the selected report to the provided writer.
+///
+/// # Errors
+///
+/// Returns an error when argument parsing, file discovery, file processing, or
+/// report writing fails.
+pub fn run<I, S, W>(args: I, current_dir: &Path, writer: &mut W) -> Result<()>
+where
+    I: IntoIterator<Item = S>,
+    S: Into<String>,
+    W: Write,
+{
+    match cli::parse_command(args)? {
+        cli::CliCommand::Help => writer
+            .write_all(cli::help_text().as_bytes())
+            .map_err(|error| CodeM8Error::new(format!("could not write help output: {error}")))?,
+        cli::CliCommand::ReportDuplicate(config) => {
+            let should_report_scanned_files = config.git_branch || config.files.is_some();
+            let git_branch_files = if config.git_branch {
+                Some(git::changed_files_against_origin(current_dir)?)
+            } else {
+                None
+            };
+            let source_files = discovery::discover_source_files(
+                current_dir,
+                &config.file_extensions,
+                git_branch_files.as_deref().or(config.files.as_deref()),
+            )?;
+            let processed_files = line::process_source_files(&source_files)?;
+            let duplicate_blocks = duplicate::detect_duplicate_blocks(&processed_files);
+            let report = report::DuplicateReport {
+                analyzed_files: source_files.len(),
+                analyzed_extensions: config.file_extensions,
+                scanned_files: should_report_scanned_files.then(|| {
+                    source_files
+                        .iter()
+                        .map(|source_file| source_file.display_path.clone())
+                        .collect()
+                }),
+                duplicate_blocks,
+            };
+            writer
+                .write_all(report::render_duplicate_report(&report, config.verbose).as_bytes())
+                .map_err(|error| {
+                    CodeM8Error::new(format!("could not write report output: {error}"))
+                })?;
+        }
+    }
+    Ok(())
+}
+
+#[cfg(test)]
+mod tests {
+    use std::fs;
+    use std::path::{Path, PathBuf};
+    use std::sync::atomic::{AtomicUsize, Ordering};
+
+    use super::*;
+
+    static TEMP_COUNTER: AtomicUsize = AtomicUsize::new(0);
+
+    struct TempProject {
+        path: PathBuf,
+    }
+
+    impl TempProject {
+        fn new(name: &str) -> Self {
+            let id = TEMP_COUNTER.fetch_add(1, Ordering::Relaxed);
+            let path =
+                std::env::temp_dir().join(format!("codem8-{name}-{}-{id}", std::process::id()));
+            if path.exists() {
+                fs::remove_dir_all(&path).expect("remove stale test directory");
+            }
+            fs::create_dir_all(&path).expect("create test directory");
+            Self { path }
+        }
+
+        fn write(&self, relative_path: &str, contents: &str) {
+            let path = self.path.join(relative_path);
+            if let Some(parent) = path.parent() {
+                fs::create_dir_all(parent).expect("create test parent directory");
+            }
+            fs::write(path, contents).expect("write test file");
+        }
+
+        fn path(&self) -> &Path {
+            &self.path
+        }
+    }
+
+    impl Drop for TempProject {
+        fn drop(&mut self) {
+            let _ = fs::remove_dir_all(&self.path);
+        }
+    }
+
+    fn run_in(project: &TempProject, args: &[&str]) -> std::result::Result<String, CodeM8Error> {
+        let mut output = Vec::new();
+        run(args.iter().copied(), project.path(), &mut output)?;
+        Ok(String::from_utf8(output).expect("report is UTF-8"))
+    }
+
+    #[test]
+    fn duplicate_report_snapshot_is_stable() {
+        let project = TempProject::new("snapshot");
+        project.write(
+            "src/a.ts",
+            "const value = computeValue(input);\nif (value === undefined) {\nreturn defaultValue;\n}\n",
+        );
+        project.write(
+            "src/b.ts",
+            "const value = computeValue(input);\nif (value === undefined) {\nreturn defaultValue;\n}\n",
+        );
+        let output = run_in(&project, &["--report-duplicate"]).expect("report succeeds");
+        let expected_extensions = language::supported_file_extensions().join(", ");
+        assert_eq!(
+            output,
+            [
+                "Duplicate Code Report\n",
+                "=====================\n",
+                "\n",
+                "Number of files scanned: 2\n",
+                "Analyzed extensions: ",
+                &expected_extensions,
+                "\n",
+                "Duplicate blocks found: 1\n",
+                "\n",
+                "#1\n",
+                "Code:\n",
+                "  const value = computeValue(input);\n",
+                "  if (value === undefined) {\n",
+                "  return defaultValue;\n",
+                "  }\n",
+                "\n",
+                "Locations:\n",
+                "- src/a.ts:1-4\n",
+                "- src/b.ts:1-4\n",
+            ]
+            .concat()
+        );
+    }
+
+    #[test]
+    fn verbose_duplicate_report_includes_metrics_without_characters() {
+        let project = TempProject::new("verbose");
+        project.write(
+            "src/a.ts",
+            "const value = computeValue(input);\nreturn value;\n",
+        );
+        project.write(
+            "src/b.ts",
+            "const value = computeValue(input);\nreturn value;\n",
+        );
+        let output =
+            run_in(&project, &["--report-duplicate", "-verbose"]).expect("report succeeds");
+        assert!(output.contains("Weight:"));
+        assert!(output.contains("Lines: 2"));
+        assert!(output.contains("Occurrences: 2"));
+        assert!(!output.contains("Characters:"));
+        assert!(
+            output.find("Code:").expect("code section exists")
+                < output.find("Locations:").expect("locations section exists")
+        );
+    }
+
+    #[test]
+    fn explicit_files_disable_recursive_discovery() {
+        let project = TempProject::new("explicit-files");
+        project.write("src/a.ts", "const value = one;\n");
+        project.write("src/b.ts", "const value = one;\n");
+        let output =
+            run_in(&project, &["--report-duplicate", "-files=src/a.ts"]).expect("report succeeds");
+        assert!(output.contains("Number of files scanned: 1"));
+        assert!(output.contains("Duplicate blocks found: 0"));
+    }
+
+    #[test]
+    fn verbose_explicit_files_report_lists_scanned_files() {
+        let project = TempProject::new("verbose-explicit-files");
+        project.write("src/a.ts", "const value = one;\n");
+        project.write("src/b.ts", "const value = one;\n");
+        let quiet_output =
+            run_in(&project, &["--report-duplicate", "-files=src/a.ts"]).expect("report succeeds");
+        assert!(!quiet_output.contains("Files scanned:"));
+        let verbose_output = run_in(
+            &project,
+            &["--report-duplicate", "-verbose", "-files=src/a.ts"],
+        )
+        .expect("report succeeds");
+        assert!(verbose_output.contains(
+            "Number of files scanned: 1\n\
+             Files scanned:\n\
+             - src/a.ts\n\
+             Analyzed extensions:"
+        ));
+    }
+
+    #[test]
+    fn custom_extensions_change_analyzed_files() {
+        let project = TempProject::new("custom-extensions");
+        project.write("src/a.js", "const value = one;\n");
+        project.write("src/b.js", "const value = one;\n");
+        let default_output = run_in(&project, &["--report-duplicate"]).expect("report succeeds");
+        assert!(default_output.contains("Number of files scanned: 2"));
+        assert!(default_output.contains("Duplicate blocks found: 1"));
+        let js_output = run_in(&project, &["--report-duplicate", "-file-extension=js"])
+            .expect("report succeeds");
+        assert!(js_output.contains("Number of files scanned: 2"));
+        assert!(js_output.contains("Duplicate blocks found: 1"));
+    }
+
+    #[test]
+    fn invalid_explicit_file_returns_a_clear_error() {
+        let project = TempProject::new("invalid-file");
+        let error = run_in(&project, &["--report-duplicate", "-files=missing.ts"])
+            .expect_err("missing explicit file fails");
+        assert!(error
+            .to_string()
+            .contains("explicit file does not exist: missing.ts"));
+    }
+
+    #[test]
+    fn help_command_prints_documentation() {
+        let project = TempProject::new("help");
+        let output = run_in(&project, &["help"]).expect("help succeeds");
+        assert!(output.contains("USAGE:"));
+        assert!(output.contains("--report-duplicate"));
+    }
+}
diff --git a/src/line.rs b/src/line.rs
new file mode 100644
index 0000000..92dc0f5
--- /dev/null
+++ b/src/line.rs
@@ -0,0 +1,118 @@
+use std::fs::File;
+use std::io::{BufRead, BufReader};
+
+use crate::error::{CodeM8Error, Result};
+use crate::language::{classify_line, hash_normalized_line};
+use crate::model::{LineEntry, ProcessedFile, SourceFile};
+
+/// Processes a set of source files into normalized line entries.
+///
+/// # Errors
+///
+/// Returns an error when any input file cannot be opened or read as UTF-8 text.
+pub fn process_source_files(source_files: &[SourceFile]) -> Result<Vec<ProcessedFile>> {
+    source_files.iter().map(process_source_file).collect()
+}
+
+/// Processes one source file into its normalized, classified lines.
+///
+/// # Errors
+///
+/// Returns an error when the file cannot be opened or read as UTF-8 text.
+pub fn process_source_file(source_file: &SourceFile) -> Result<ProcessedFile> {
+    let file = File::open(&source_file.path)
+        .map_err(|error| CodeM8Error::io(&source_file.display_path, "open file", &error))?;
+    let reader = BufReader::new(file);
+    let mut lines = Vec::new();
+    for (index, line) in reader.lines().enumerate() {
+        let line = line.map_err(|error| {
+            CodeM8Error::new(format!(
+                "could not read {} as UTF-8 text: {error}",
+                crate::paths::format_path(&source_file.display_path)
+            ))
+        })?;
+        let Some(normalized_text) = normalize_line(&line) else {
+            continue;
+        };
+        let hash = hash_normalized_line(&normalized_text);
+        let status = classify_line(&source_file.extension, &normalized_text, hash);
+        lines.push(LineEntry {
+            file_path: source_file.display_path.clone(),
+            line_number: index + 1,
+            normalized_text,
+            hash,
+            status,
+        });
+    }
+    Ok(ProcessedFile {
+        source: source_file.clone(),
+        lines,
+    })
+}
+
+#[must_use]
+pub fn normalize_line(line: &str) -> Option<String> {
+    let normalized = line.trim();
+    if normalized.is_empty() {
+        None
+    } else {
+        Some(normalized.to_string())
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use std::fs;
+
+    use crate::model::LineStatus;
+
+    use super::*;
+
+    #[test]
+    fn trims_unicode_whitespace_and_skips_empty_lines() {
+        assert_eq!(
+            normalize_line("\t value \u{2003}"),
+            Some("value".to_string())
+        );
+        assert_eq!(normalize_line(" \t "), None);
+    }
+
+    #[test]
+    fn processes_non_empty_lines_with_original_line_numbers() {
+        let path = std::env::temp_dir().join(format!("codem8-line-test-{}.ts", std::process::id()));
+        fs::write(&path, "  const value = 1;  \n\n   }\n").expect("write source file");
+        let source = SourceFile {
+            path: path.clone(),
+            display_path: "sample.ts".into(),
+            extension: "ts".to_string(),
+        };
+        let processed = process_source_file(&source).expect("process source file");
+        assert_eq!(processed.lines.len(), 2);
+        assert_eq!(processed.lines[0].line_number, 1);
+        assert_eq!(processed.lines[0].normalized_text, "const value = 1;");
+        assert_eq!(processed.lines[0].status, LineStatus::Comparison);
+        assert_eq!(processed.lines[1].line_number, 3);
+        assert_eq!(processed.lines[1].normalized_text, "}");
+        assert_eq!(processed.lines[1].status, LineStatus::BlockOnly);
+        fs::remove_file(path).expect("cleanup");
+    }
+
+    #[test]
+    fn returns_clear_error_for_invalid_utf8() {
+        let path = std::env::temp_dir().join(format!(
+            "codem8-line-invalid-utf8-{}.ts",
+            std::process::id()
+        ));
+        fs::write(&path, [0xff, b'\n']).expect("write invalid source file");
+        let source = SourceFile {
+            path: path.clone(),
+            display_path: "invalid.ts".into(),
+            extension: "ts".to_string(),
+        };
+        let error = process_source_file(&source).expect_err("invalid UTF-8 fails");
+        assert!(error
+            .to_string()
+            .contains("could not read invalid.ts as UTF-8 text"));
+        fs::remove_file(path).expect("cleanup");
+    }
+}
diff --git a/src/main.rs b/src/main.rs
new file mode 100644
index 0000000..a6e1e1d
--- /dev/null
+++ b/src/main.rs
@@ -0,0 +1,28 @@
+use std::io::Write;
+use std::process::ExitCode;
+
+fn main() -> ExitCode {
+    let current_dir = match std::env::current_dir() {
+        Ok(current_dir) => current_dir,
+        Err(error) => {
+            eprintln!("error: could not determine current directory: {error}");
+            return ExitCode::FAILURE;
+        }
+    };
+    let stdout = std::io::stdout();
+    let mut stdout = stdout.lock();
+    match codem8::run(std::env::args().skip(1), &current_dir, &mut stdout) {
+        Ok(()) => {
+            let _ = stdout.flush();
+            ExitCode::SUCCESS
+        }
+        Err(error) => {
+            eprintln!("error: {error}");
+            if error.should_show_help() {
+                eprintln!();
+                eprint!("{}", codem8::cli::help_text());
+            }
+            ExitCode::FAILURE
+        }
+    }
+}
diff --git a/src/model.rs b/src/model.rs
new file mode 100644
index 0000000..2a1b195
--- /dev/null
+++ b/src/model.rs
@@ -0,0 +1,58 @@
+use std::path::PathBuf;
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum LineStatus {
+    Comparison,
+    BlockOnly,
+}
+
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct SourceFile {
+    pub path: PathBuf,
+    pub display_path: PathBuf,
+    pub extension: String,
+}
+
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct LineEntry {
+    pub file_path: PathBuf,
+    pub line_number: usize,
+    pub normalized_text: String,
+    pub hash: u128,
+    pub status: LineStatus,
+}
+
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct ProcessedFile {
+    pub source: SourceFile,
+    pub lines: Vec<LineEntry>,
+}
+
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct DuplicateOccurrence {
+    pub file_path: PathBuf,
+    pub start_line: usize,
+    pub end_line: usize,
+}
+
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct DuplicateBlock {
+    pub normalized_lines: Vec<String>,
+    pub occurrences: Vec<DuplicateOccurrence>,
+    pub weight: u64,
+}
+
+impl DuplicateBlock {
+    #[must_use]
+    pub const fn line_count(&self) -> usize {
+        self.normalized_lines.len()
+    }
+
+    #[must_use]
+    pub fn character_count(&self) -> u64 {
+        self.normalized_lines
+            .iter()
+            .map(|line| line.chars().count() as u64)
+            .sum()
+    }
+}
diff --git a/src/paths.rs b/src/paths.rs
new file mode 100644
index 0000000..f55926f
--- /dev/null
+++ b/src/paths.rs
@@ -0,0 +1,50 @@
+use std::path::{Component, Path, PathBuf};
+
+#[must_use]
+pub fn format_path(path: &Path) -> String {
+    path.to_string_lossy().replace('\\', "/")
+}
+
+#[must_use]
+pub fn normalize_display_path(path: &Path) -> PathBuf {
+    let mut normalized = PathBuf::new();
+    for component in path.components() {
+        match component {
+            Component::CurDir => {}
+            Component::Normal(part) => normalized.push(part),
+            Component::ParentDir => normalized.push(".."),
+            Component::RootDir | Component::Prefix(_) => normalized.push(component.as_os_str()),
+        }
+    }
+    if normalized.as_os_str().is_empty() {
+        PathBuf::from(".")
+    } else {
+        normalized
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn formats_paths_with_forward_slashes() {
+        assert_eq!(
+            format_path(Path::new("src\\nested\\a.ts")),
+            "src/nested/a.ts"
+        );
+    }
+
+    #[test]
+    fn normalizes_display_paths_without_losing_parent_segments() {
+        assert_eq!(
+            normalize_display_path(Path::new("./src/../a.ts")),
+            PathBuf::from("src").join("..").join("a.ts")
+        );
+    }
+
+    #[test]
+    fn normalizes_empty_display_path_to_current_directory() {
+        assert_eq!(normalize_display_path(Path::new(".")), PathBuf::from("."));
+    }
+}
diff --git a/src/report.rs b/src/report.rs
new file mode 100644
index 0000000..eb7c42d
--- /dev/null
+++ b/src/report.rs
@@ -0,0 +1,187 @@
+use std::fmt::Write as _;
+use std::path::PathBuf;
+
+use crate::model::DuplicateBlock;
+use crate::paths::format_path;
+
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct DuplicateReport {
+    pub analyzed_files: usize,
+    pub analyzed_extensions: Vec<String>,
+    pub scanned_files: Option<Vec<PathBuf>>,
+    pub duplicate_blocks: Vec<DuplicateBlock>,
+}
+
+#[must_use]
+pub fn render_duplicate_report(report: &DuplicateReport, verbose: bool) -> String {
+    let mut output = String::new();
+    output.push_str("Duplicate Code Report\n");
+    output.push_str("=====================\n\n");
+    let _ = writeln!(output, "Number of files scanned: {}", report.analyzed_files);
+    let scanned_files = if verbose {
+        report.scanned_files.as_ref()
+    } else {
+        None
+    };
+    if let Some(scanned_files) = scanned_files {
+        output.push_str("Files scanned:\n");
+        for file in scanned_files {
+            let _ = writeln!(output, "- {}", format_path(file));
+        }
+    }
+    let _ = writeln!(
+        output,
+        "Analyzed extensions: {}",
+        report.analyzed_extensions.join(", ")
+    );
+    let _ = writeln!(
+        output,
+        "Duplicate blocks found: {}",
+        report.duplicate_blocks.len()
+    );
+    for (index, block) in report.duplicate_blocks.iter().enumerate() {
+        output.push('\n');
+        let _ = writeln!(output, "#{}", index + 1);
+        if verbose {
+            let _ = writeln!(output, "Weight: {}", block.weight);
+            let _ = writeln!(output, "Lines: {}", block.line_count());
+            let _ = writeln!(output, "Occurrences: {}", block.occurrences.len());
+            output.push('\n');
+        }
+        output.push_str("Code:\n");
+        for line in &block.normalized_lines {
+            output.push_str("  ");
+            output.push_str(line);
+            output.push('\n');
+        }
+        output.push_str("\nLocations:\n");
+        for occurrence in &block.occurrences {
+            let _ = writeln!(
+                output,
+                "- {}:{}-{}",
+                format_path(&occurrence.file_path),
+                occurrence.start_line,
+                occurrence.end_line
+            );
+        }
+    }
+    output
+}
+
+#[cfg(test)]
+mod tests {
+    use std::path::PathBuf;
+
+    use crate::model::{DuplicateBlock, DuplicateOccurrence};
+
+    use super::*;
+
+    #[test]
+    fn renders_empty_report() {
+        let report = DuplicateReport {
+            analyzed_files: 0,
+            analyzed_extensions: vec!["ts".to_string()],
+            scanned_files: None,
+            duplicate_blocks: Vec::new(),
+        };
+        assert_eq!(
+            render_duplicate_report(&report, false),
+            "Duplicate Code Report\n\
+             =====================\n\
+             \n\
+             Number of files scanned: 0\n\
+             Analyzed extensions: ts\n\
+             Duplicate blocks found: 0\n"
+        );
+    }
+
+    #[test]
+    fn renders_duplicate_block_details() {
+        let report = DuplicateReport {
+            analyzed_files: 2,
+            analyzed_extensions: vec!["ts".to_string(), "js".to_string()],
+            scanned_files: None,
+            duplicate_blocks: vec![DuplicateBlock {
+                normalized_lines: vec!["return value;".to_string()],
+                occurrences: vec![
+                    DuplicateOccurrence {
+                        file_path: PathBuf::from("src/a.ts"),
+                        start_line: 1,
+                        end_line: 1,
+                    },
+                    DuplicateOccurrence {
+                        file_path: PathBuf::from("src/b.js"),
+                        start_line: 5,
+                        end_line: 5,
+                    },
+                ],
+                weight: 13,
+            }],
+        };
+        let output = render_duplicate_report(&report, false);
+        assert!(output.contains("#1\n"));
+        assert!(!output.contains("Weight: 13"));
+        assert!(!output.contains("Lines: 1"));
+        assert!(!output.contains("Occurrences: 2"));
+        assert!(!output.contains("Characters:"));
+        assert!(output.contains("- src/a.ts:1-1"));
+        assert!(output.contains("  return value;"));
+        assert!(
+            output.find("Code:").expect("code section exists")
+                < output.find("Locations:").expect("locations section exists")
+        );
+    }
+
+    #[test]
+    fn renders_duplicate_block_metrics_in_verbose_mode() {
+        let report = DuplicateReport {
+            analyzed_files: 2,
+            analyzed_extensions: vec!["ts".to_string()],
+            scanned_files: None,
+            duplicate_blocks: vec![DuplicateBlock {
+                normalized_lines: vec!["return value;".to_string()],
+                occurrences: vec![
+                    DuplicateOccurrence {
+                        file_path: PathBuf::from("src/a.ts"),
+                        start_line: 1,
+                        end_line: 1,
+                    },
+                    DuplicateOccurrence {
+                        file_path: PathBuf::from("src/b.ts"),
+                        start_line: 2,
+                        end_line: 2,
+                    },
+                ],
+                weight: 13,
+            }],
+        };
+        let output = render_duplicate_report(&report, true);
+        assert!(output.contains("Weight: 13"));
+        assert!(output.contains("Lines: 1"));
+        assert!(output.contains("Occurrences: 2"));
+        assert!(!output.contains("Characters:"));
+    }
+
+    #[test]
+    fn renders_scanned_file_list_only_in_verbose_mode() {
+        let report = DuplicateReport {
+            analyzed_files: 2,
+            analyzed_extensions: vec!["ts".to_string()],
+            scanned_files: Some(vec![
+                PathBuf::from("src/a.ts"),
+                PathBuf::from("src/nested/b.ts"),
+            ]),
+            duplicate_blocks: Vec::new(),
+        };
+        let quiet_output = render_duplicate_report(&report, false);
+        assert!(!quiet_output.contains("Files scanned:"));
+        let verbose_output = render_duplicate_report(&report, true);
+        assert!(verbose_output.contains(
+            "Number of files scanned: 2\n\
+             Files scanned:\n\
+             - src/a.ts\n\
+             - src/nested/b.ts\n\
+             Analyzed extensions: ts"
+        ));
+    }
+}