From ca2d92a74b97471d643d8c450a6d269d66b67c00 Mon Sep 17 00:00:00 2001 From: b4prog Date: Fri, 26 Jun 2026 08:26:07 +0200 Subject: [PATCH 1/4] [chore] bump CodeM8 minor version to 0.5.0 --- Cargo.lock | 2 +- Cargo.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index e20583f..7fda882 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -113,7 +113,7 @@ checksum = "c8d4a3bb8b1e0c1050499d1815f5ab16d04f0959b233085fb31653fbfc9d98f9" [[package]] name = "codem8" -version = "0.4.0" +version = "0.5.0" dependencies = [ "clap", "ignore", diff --git a/Cargo.toml b/Cargo.toml index f45a274..f24c20d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "codem8" -version = "0.4.0" +version = "0.5.0" edition = "2021" rust-version = "1.85" license = "MIT" From 52eb4748341417665742f3a9840dc55e9f625c96 Mon Sep 17 00:00:00 2001 From: b4prog Date: Fri, 26 Jun 2026 08:28:15 +0200 Subject: [PATCH 2/4] [fix] treat #[test] as a duplicate mitigation line --- src/language.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/language.rs b/src/language.rs index 202827a..64964cb 100644 --- a/src/language.rs +++ b/src/language.rs @@ -34,7 +34,7 @@ pub const LANGUAGE_PATTERNS: &[LanguageLinePattern] = &[ language_name: "Rust", extensions: &["rs"], duplicate_mitigation_pattern: &['(', ')', ',', ':', ';', '<', '>', '?', '[', ']', '{', '}'], - duplicate_mitigation_lines: &["///"], + duplicate_mitigation_lines: &["///", "#[test]"], duplicate_mitigation_regexps: &[ // Excludes short path or enum variant fragments. Example: Self::Ready, r"^[A-Za-z0-9_]*::?\s*[A-Za-z0-9_]*[,]?$", From 15224b12456ee286597a169823b62dbcfb0b286c Mon Sep 17 00:00:00 2001 From: b4prog Date: Fri, 26 Jun 2026 08:51:29 +0200 Subject: [PATCH 3/4] [refactor] split language pattern settings by language --- src/language.rs | 242 ++++++++++++++++++++++++++++++++++++------------ 1 file changed, 184 insertions(+), 58 deletions(-) diff --git a/src/language.rs b/src/language.rs index 64964cb..9544ade 100644 --- a/src/language.rs +++ b/src/language.rs @@ -15,8 +15,78 @@ pub struct LanguageLinePattern { pub const LANGUAGE_PATTERNS: &[LanguageLinePattern] = &[ LanguageLinePattern { - language_name: "TypeScript / JavaScript", - extensions: &["ts", "tsx", "js", "jsx", "mjs", "cjs"], + language_name: "Bash", + extensions: &["bash"], + duplicate_mitigation_pattern: &['&', '(', ')', ';', '[', ']', '{', '|', '}'], + duplicate_mitigation_lines: &["do", "done", "else", "fi", "then"], + duplicate_mitigation_regexps: &[], + }, + LanguageLinePattern { + language_name: "C", + extensions: &["c", "h"], + duplicate_mitigation_pattern: &['(', ')', ',', ':', ';', '<', '>', '?', '[', ']', '{', '}'], + duplicate_mitigation_lines: &["#else", "#endif"], + duplicate_mitigation_regexps: &[], + }, + LanguageLinePattern { + language_name: "C#", + extensions: &["cs"], + duplicate_mitigation_pattern: &['(', ')', ',', ':', ';', '<', '>', '?', '[', ']', '{', '}'], + duplicate_mitigation_lines: &["#else", "#endif", "#endregion"], + duplicate_mitigation_regexps: &[], + }, + LanguageLinePattern { + language_name: "C++", + extensions: &["cpp", "hpp", "cc", "hh", "cxx", "hxx"], + duplicate_mitigation_pattern: &['(', ')', ',', ':', ';', '<', '>', '?', '[', ']', '{', '}'], + duplicate_mitigation_lines: &["#else", "#endif"], + duplicate_mitigation_regexps: &[], + }, + LanguageLinePattern { + language_name: "CSS", + extensions: &["css"], + duplicate_mitigation_pattern: &['(', ')', ',', ':', ';', '[', ']', '{', '}'], + duplicate_mitigation_lines: &[], + duplicate_mitigation_regexps: &[], + }, + LanguageLinePattern { + language_name: "Fish", + extensions: &["fish"], + duplicate_mitigation_pattern: &['&', '(', ')', ';', '[', ']', '{', '|', '}'], + duplicate_mitigation_lines: &["do", "done", "else", "fi", "then"], + duplicate_mitigation_regexps: &[], + }, + LanguageLinePattern { + language_name: "Go", + extensions: &["go"], + duplicate_mitigation_pattern: &['(', ')', ',', ':', ';', '[', ']', '{', '}'], + duplicate_mitigation_lines: &[], + duplicate_mitigation_regexps: &[], + }, + LanguageLinePattern { + language_name: "HTML", + extensions: &["html", "htm"], + duplicate_mitigation_pattern: &['/', '<', '>'], + duplicate_mitigation_lines: &[ + "", + "", + "", + "", + "", + "", + ], + duplicate_mitigation_regexps: &[], + }, + LanguageLinePattern { + language_name: "Java", + extensions: &["java"], + duplicate_mitigation_pattern: &['(', ')', ',', ':', ';', '<', '>', '?', '[', ']', '{', '}'], + duplicate_mitigation_lines: &[], + duplicate_mitigation_regexps: &[], + }, + LanguageLinePattern { + language_name: "JavaScript", + extensions: &["js", "jsx", "mjs", "cjs"], duplicate_mitigation_pattern: &[ '&', '(', ')', ',', ':', ';', '<', '>', '?', '[', ']', '{', '|', '}', ], @@ -31,50 +101,39 @@ pub const LANGUAGE_PATTERNS: &[LanguageLinePattern] = &[ ], }, LanguageLinePattern { - language_name: "Rust", - extensions: &["rs"], + language_name: "Kotlin", + extensions: &["kt", "kts"], duplicate_mitigation_pattern: &['(', ')', ',', ':', ';', '<', '>', '?', '[', ']', '{', '}'], - duplicate_mitigation_lines: &["///", "#[test]"], - duplicate_mitigation_regexps: &[ - // Excludes short path or enum variant fragments. Example: Self::Ready, - r"^[A-Za-z0-9_]*::?\s*[A-Za-z0-9_]*[,]?$", - // Excludes bare identifiers with optional punctuation. Example: value, - r"^[A-Za-z0-9_]+\s*[.,]?$", - // Excludes simple method or field access lines. Example: .clone() - r"^\.?\s*[A-Za-z0-9_]+(?:\(\s*\)?)?$", - // Excludes incomplete let bindings split across lines. Example: let value = - r"^let\s+(?:mut\s+)?[A-Za-z0-9_]+\s*=$", - // Excludes simple public struct field declarations. Example: pub name: String, - r"^pub\s+[A-Za-z0-9_]*\s*:\s*[A-Za-z0-9_]*[,]?$", - // Excludes single-path use imports. Example: use crate::module; - r"^use\s+[A-Za-z_][A-Za-z0-9_]*(?:::[A-Za-z_][A-Za-z0-9_]*)*;$", - ], + duplicate_mitigation_lines: &[], + duplicate_mitigation_regexps: &[], }, LanguageLinePattern { - language_name: "C / C++ / Objective-C", - extensions: &["c", "h", "cpp", "hpp", "cc", "hh", "cxx", "hxx", "m", "mm"], - duplicate_mitigation_pattern: &['(', ')', ',', ':', ';', '<', '>', '?', '[', ']', '{', '}'], - duplicate_mitigation_lines: &["#else", "#endif"], + language_name: "Less", + extensions: &["less"], + duplicate_mitigation_pattern: &['(', ')', ',', ':', ';', '[', ']', '{', '}'], + duplicate_mitigation_lines: &[], duplicate_mitigation_regexps: &[], }, LanguageLinePattern { - language_name: "C#", - extensions: &["cs"], + language_name: "Objective-C", + extensions: &["m", "mm"], duplicate_mitigation_pattern: &['(', ')', ',', ':', ';', '<', '>', '?', '[', ']', '{', '}'], - duplicate_mitigation_lines: &["#else", "#endif", "#endregion"], + duplicate_mitigation_lines: &["#else", "#endif"], duplicate_mitigation_regexps: &[], }, LanguageLinePattern { - language_name: "Java / Kotlin / Scala", - extensions: &["java", "kt", "kts", "scala", "sc"], - duplicate_mitigation_pattern: &['(', ')', ',', ':', ';', '<', '>', '?', '[', ']', '{', '}'], + language_name: "PHP", + extensions: &["php", "phtml"], + duplicate_mitigation_pattern: &[ + '(', ')', ',', '/', ':', ';', '<', '>', '?', '[', ']', '{', '}', + ], duplicate_mitigation_lines: &[], duplicate_mitigation_regexps: &[], }, LanguageLinePattern { - language_name: "Go", - extensions: &["go"], - duplicate_mitigation_pattern: &['(', ')', ',', ':', ';', '[', ']', '{', '}'], + language_name: "PowerShell", + extensions: &["ps1", "psm1", "psd1"], + duplicate_mitigation_pattern: &['(', ')', ',', ':', ';', '?', '[', ']', '{', '|', '}'], duplicate_mitigation_lines: &[], duplicate_mitigation_regexps: &[], }, @@ -93,38 +152,86 @@ pub const LANGUAGE_PATTERNS: &[LanguageLinePattern] = &[ duplicate_mitigation_regexps: &[], }, LanguageLinePattern { - language_name: "PHP", - extensions: &["php", "phtml"], - duplicate_mitigation_pattern: &[ - '(', ')', ',', '/', ':', ';', '<', '>', '?', '[', ']', '{', '}', + language_name: "Rust", + extensions: &["rs"], + duplicate_mitigation_pattern: &['(', ')', ',', ':', ';', '<', '>', '?', '[', ']', '{', '}'], + duplicate_mitigation_lines: &["///", "#[test]"], + duplicate_mitigation_regexps: &[ + // Excludes short path or enum variant fragments. Example: Self::Ready, + r"^[A-Za-z0-9_]*::?\s*[A-Za-z0-9_]*[,]?$", + // Excludes bare identifiers with optional punctuation. Example: value, + r"^[A-Za-z0-9_]+\s*[.,]?$", + // Excludes simple method or field access lines. Example: .clone() + r"^\.?\s*[A-Za-z0-9_]+(?:\(\s*\)?)?$", + // Excludes incomplete let bindings split across lines. Example: let value = + r"^let\s+(?:mut\s+)?[A-Za-z0-9_]+\s*=$", + // Excludes simple public struct field declarations. Example: pub name: String, + r"^pub\s+[A-Za-z0-9_]*\s*:\s*[A-Za-z0-9_]*[,]?$", + // Excludes single-path use imports. Example: use crate::module; + r"^use\s+[A-Za-z_][A-Za-z0-9_]*(?:::[A-Za-z_][A-Za-z0-9_]*)*;$", ], + }, + LanguageLinePattern { + language_name: "Sass", + extensions: &["sass"], + duplicate_mitigation_pattern: &['(', ')', ',', ':', ';', '[', ']', '{', '}'], duplicate_mitigation_lines: &[], duplicate_mitigation_regexps: &[], }, LanguageLinePattern { - language_name: "Swift", - extensions: &["swift"], + language_name: "Scala", + extensions: &["scala", "sc"], duplicate_mitigation_pattern: &['(', ')', ',', ':', ';', '<', '>', '?', '[', ']', '{', '}'], duplicate_mitigation_lines: &[], duplicate_mitigation_regexps: &[], }, + LanguageLinePattern { + language_name: "SCSS", + extensions: &["scss"], + duplicate_mitigation_pattern: &['(', ')', ',', ':', ';', '[', ']', '{', '}'], + duplicate_mitigation_lines: &[], + duplicate_mitigation_regexps: &[], + }, LanguageLinePattern { language_name: "Shell", - extensions: &["sh", "bash", "zsh", "fish"], + extensions: &["sh"], duplicate_mitigation_pattern: &['&', '(', ')', ';', '[', ']', '{', '|', '}'], duplicate_mitigation_lines: &["do", "done", "else", "fi", "then"], duplicate_mitigation_regexps: &[], }, LanguageLinePattern { - language_name: "PowerShell", - extensions: &["ps1", "psm1", "psd1"], - duplicate_mitigation_pattern: &['(', ')', ',', ':', ';', '?', '[', ']', '{', '|', '}'], + language_name: "SQL", + extensions: &["sql"], + duplicate_mitigation_pattern: &['(', ')', ',', ':', ';'], + duplicate_mitigation_lines: &["BEGIN", "END"], + duplicate_mitigation_regexps: &[], + }, + LanguageLinePattern { + language_name: "Swift", + extensions: &["swift"], + duplicate_mitigation_pattern: &['(', ')', ',', ':', ';', '<', '>', '?', '[', ']', '{', '}'], duplicate_mitigation_lines: &[], duplicate_mitigation_regexps: &[], }, LanguageLinePattern { - language_name: "HTML / XML", - extensions: &["html", "htm", "xml", "xhtml", "svg"], + language_name: "TypeScript", + extensions: &["ts", "tsx"], + duplicate_mitigation_pattern: &[ + '&', '(', ')', ',', ':', ';', '<', '>', '?', '[', ']', '{', '|', '}', + ], + duplicate_mitigation_lines: &["// @ts-nocheck"], + duplicate_mitigation_regexps: &[ + // Excludes single-line block comments used by generated files and tooling. Example: /* eslint-disable */ + r"^/\*.*\*/$", + // Excludes generated interface field declarations. Example: errors: InvalidInputError[] + r"^[A-Za-z_$][A-Za-z0-9_$]*\??:\s*(?:Scalars\['[A-Za-z]+'\]|[A-Z][A-Za-z0-9_$]*(?:\[\])?|[a-z]+(?:\[\])?|\([^)]*\))(?:\[\])?(?:\s*\|\s*(?:null|number|boolean|string))*[,]?$", + // Excludes generated GraphQL typename marker fields. Example: __typename: 'User' + r"^__typename:\s*'[A-Za-z_$][A-Za-z0-9_$]*'[,]?$", + ], + }, + LanguageLinePattern { + language_name: "XML", + extensions: &["xml", "xhtml", "svg"], duplicate_mitigation_pattern: &['/', '<', '>'], duplicate_mitigation_lines: &[ "", @@ -136,20 +243,6 @@ pub const LANGUAGE_PATTERNS: &[LanguageLinePattern] = &[ ], duplicate_mitigation_regexps: &[], }, - LanguageLinePattern { - language_name: "CSS / SCSS / Sass / Less", - extensions: &["css", "scss", "sass", "less"], - duplicate_mitigation_pattern: &['(', ')', ',', ':', ';', '[', ']', '{', '}'], - duplicate_mitigation_lines: &[], - duplicate_mitigation_regexps: &[], - }, - LanguageLinePattern { - language_name: "SQL", - extensions: &["sql"], - duplicate_mitigation_pattern: &['(', ')', ',', ':', ';'], - duplicate_mitigation_lines: &["BEGIN", "END"], - duplicate_mitigation_regexps: &[], - }, LanguageLinePattern { language_name: "YAML", extensions: &["yaml", "yml"], @@ -157,6 +250,13 @@ pub const LANGUAGE_PATTERNS: &[LanguageLinePattern] = &[ duplicate_mitigation_lines: &["jobs:", "on:"], duplicate_mitigation_regexps: &[], }, + LanguageLinePattern { + language_name: "Zsh", + extensions: &["zsh"], + duplicate_mitigation_pattern: &['&', '(', ')', ';', '[', ']', '{', '|', '}'], + duplicate_mitigation_lines: &["do", "done", "else", "fi", "then"], + duplicate_mitigation_regexps: &[], + }, ]; #[must_use] @@ -389,4 +489,30 @@ mod tests { } } } + + #[test] + fn language_patterns_are_sorted_by_name() { + for pair in LANGUAGE_PATTERNS.windows(2) { + assert!( + pair[0].language_name.to_ascii_lowercase() + <= pair[1].language_name.to_ascii_lowercase() + ); + } + } + + #[test] + fn language_patterns_use_unique_extensions() { + let mut languages_by_extension = HashMap::new(); + for language in LANGUAGE_PATTERNS { + for extension in language.extensions { + let previous = languages_by_extension.insert(extension, language.language_name); + assert!( + previous.is_none(), + "{extension} belongs to both {} and {}", + previous.unwrap_or_default(), + language.language_name + ); + } + } + } } From af635dde24aa1732142cb32500d735af25557f56 Mon Sep 17 00:00:00 2001 From: b4prog Date: Fri, 26 Jun 2026 09:07:51 +0200 Subject: [PATCH 4/4] [fix] use Fish block terminator for duplicate mitigation --- src/language.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/language.rs b/src/language.rs index 9544ade..7997237 100644 --- a/src/language.rs +++ b/src/language.rs @@ -53,7 +53,7 @@ pub const LANGUAGE_PATTERNS: &[LanguageLinePattern] = &[ language_name: "Fish", extensions: &["fish"], duplicate_mitigation_pattern: &['&', '(', ')', ';', '[', ']', '{', '|', '}'], - duplicate_mitigation_lines: &["do", "done", "else", "fi", "then"], + duplicate_mitigation_lines: &["else", "end"], duplicate_mitigation_regexps: &[], }, LanguageLinePattern {