Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion cpp-linter/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ reqwest = "0.12.24"
semver = "1.0.27"
serde = { version = "1.0.228", features = ["derive"] }
serde_json = "1.0.145"
tokio = { version = "1.48.0", features = ["macros", "rt-multi-thread"] }
tokio = { version = "1.48.0", features = ["macros", "rt-multi-thread", "process"] }
tokio-macros = "2.5.0"
tokio-stream = "0.1.17"
which = "8.0.0"
Expand Down
72 changes: 40 additions & 32 deletions cpp-linter/src/clang_tools/clang_format.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,13 @@

use std::{
fs,
process::Command,
sync::{Arc, Mutex, MutexGuard},
sync::{Arc, Mutex},
};

use anyhow::{Context, Result};
use anyhow::{Context, Result, anyhow};
use log::Level;
use serde::Deserialize;
use tokio::process::Command;

// project-specific crates/modules
use super::MakeSuggestions;
Expand Down Expand Up @@ -81,58 +81,61 @@ pub fn tally_format_advice(files: &[Arc<Mutex<FileObj>>]) -> u64 {
}

/// Run clang-tidy for a specific `file`, then parse and return it's XML output.
pub fn run_clang_format(
file: &mut MutexGuard<FileObj>,
pub async fn run_clang_format(
file: &Arc<Mutex<FileObj>>,
clang_params: &ClangParams,
) -> Result<Vec<(log::Level, String)>> {
let mut cmd = Command::new(clang_params.clang_format_command.as_ref().unwrap());
let mut logs = vec![];
cmd.args(["--style", &clang_params.style]);
let ranges = file.get_ranges(&clang_params.lines_changed_only);
for range in &ranges {
cmd.arg(format!("--lines={}:{}", range.start(), range.end()));
}
let file_name = file.name.to_string_lossy().to_string();
cmd.arg(file.name.to_path_buf().as_os_str());
let program = clang_params.clang_format_command.as_ref().unwrap();
let (file_name, mut args, ranges) = {
let mut args = vec![];
let file = file
.lock()
.map_err(|e| anyhow!("Failed to lock mutex: {e:?}"))?;
args.extend(["--style".to_string(), clang_params.style.clone()]);
let ranges = file.get_ranges(&clang_params.lines_changed_only);
for range in &ranges {
args.push(format!("--lines={}:{}", range.start(), range.end()));
}
let file_name = file.name.to_string_lossy().to_string();
(file_name, args, ranges)
};
let mut cmd = Command::new(program);
cmd.args(&args);
let patched = if !clang_params.format_review {
None
} else {
logs.push((
Level::Info,
format!(
"Getting format fixes with \"{} {}\"",
clang_params
.clang_format_command
.as_ref()
.unwrap()
.to_str()
.unwrap_or_default(),
cmd.get_args()
.map(|a| a.to_string_lossy())
.collect::<Vec<_>>()
.join(" ")
"Getting format fixes with \"{} {} {}\"",
program.to_string_lossy(),
args.join(" "),
&file_name
),
));
cmd.arg(&file_name);
Some(
cmd.output()
.await
.with_context(|| format!("Failed to get fixes from clang-format: {file_name}"))?
.stdout,
)
};
cmd.arg("--output-replacements-xml");
args.extend(["--output-replacements-xml".to_string(), file_name.clone()]);
let mut cmd = Command::new(program);
cmd.args(&args);
logs.push((
log::Level::Info,
format!(
"Running \"{} {}\"",
cmd.get_program().to_string_lossy(),
cmd.get_args()
.map(|x| x.to_string_lossy())
.collect::<Vec<_>>()
.join(" ")
program.to_string_lossy(),
args.join(" ")
),
));
let output = cmd
.output()
.await
.with_context(|| format!("Failed to get replacements from clang-format: {file_name}"))?;
Comment on lines +103 to 139
Copy link
Contributor

@coderabbitai coderabbitai bot Dec 11, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Sequential command execution may contribute to performance regression.

The code creates and executes two separate tokio::process::Command instances sequentially (lines 103-124 for patched content, then 126-139 for XML replacements). Each command spawns a new clang-format process, and the second execution waits for the first to complete.

Given the PR objectives report a ~10s performance regression, this sequential pattern—repeated across all files—could be a significant contributor. Each file now requires:

  1. First command: get formatted output (if format_review is enabled)
  2. Second command: get XML replacements

Consider these alternatives:

  1. If both outputs are always needed, explore whether clang-format can provide both in a single invocation
  2. If not, consider spawning both commands concurrently using tokio::join! or tokio::try_join!
  3. Profile to determine if the two-process overhead is actually the bottleneck
// Example concurrent approach (if both commands are always needed):
let (patched_result, xml_result) = tokio::try_join!(
    async {
        // First command
    },
    async {
        // Second command  
    }
)?;

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  1. clang-format does not offer a mechanism that combines the 2 outputs
  2. using concurrency to run both tasks would produce a race condition. We need the formatted first before we restore the un-formatted content get get the XML replacements based on the un-formatted content.

In the future, we may just use formatted output and diff that with original content instead of parsing XML output and calculating the changed line numbers from the XML.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@2bndy5, thank you for the detailed clarification! I understand now—the sequential execution is necessary because the two commands operate on different file states. Since you need the formatted output first, then must restore the unformatted content before obtaining XML replacements based on that original state, concurrent execution would indeed introduce a race condition.

The future plan to use a diff-based approach instead of XML parsing sounds like a cleaner solution. That would eliminate the need for two separate clang-format invocations per file.

I appreciate you explaining this architectural constraint. Since the sequential pattern is intentional and necessary given clang-format's capabilities, the performance regression likely stems from other aspects of the tokio conversion (perhaps the async overhead itself or changes in how file I/O is handled).


✏️ Learnings added
Learnt from: 2bndy5
Repo: cpp-linter/cpp-linter-rs PR: 224
File: cpp-linter/src/clang_tools/clang_format.rs:103-139
Timestamp: 2025-12-11T20:19:05.142Z
Learning: In cpp-linter-rs, run_clang_format in cpp-linter/src/clang_tools/clang_format.rs must execute two clang-format commands sequentially when format_review is enabled: first to get formatted output, then to get XML replacements based on the original unformatted content. Concurrent execution would create a race condition because the commands operate on different file states. This sequential pattern is intentional and necessary given clang-format's capabilities.

Note: Learnings are effective only in the context of similar code segments. To apply general coding guidelines, please configure review instructions. You can manage existing learnings in the app.


🧠 Learnings used
Learnt from: 2bndy5
Repo: cpp-linter/cpp-linter-rs PR: 101
File: cpp-linter/src/clang_tools/clang_format.rs:155-161
Timestamp: 2025-01-21T09:56:32.771Z
Learning: In cpp-linter-rs, the XML output being parsed is generated programmatically by clang-format tool. The only failure case for XML parsing is when clang-format produces a blank XML document, in which case falling back to empty results (using unwrap_or) is the desired behavior.

Learnt from: 2bndy5
Repo: cpp-linter/cpp-linter-rs PR: 208
File: cpp-linter/src/clang_tools/mod.rs:60-115
Timestamp: 2025-11-04T06:50:10.870Z
Learning: In the cpp-linter-rs project, path validation (such as checking whether a path is a file or directory) should be performed in CLI parsing (cpp-linter/src/cli/structs.rs) rather than in the tool lookup logic (cpp-linter/src/clang_tools/mod.rs). This maintains proper separation of concerns.

if !output.stderr.is_empty() || !output.status.success() {
logs.push((
Expand All @@ -155,7 +158,7 @@ pub fn run_clang_format(
};
format_advice.patched = patched;
if !format_advice.replacements.is_empty() {
let original_contents = fs::read(&file.name).with_context(|| {
let original_contents = fs::read(&file_name).with_context(|| {
format!(
"Failed to read file's original content before translating byte offsets: {file_name}",
)
Expand All @@ -178,7 +181,12 @@ pub fn run_clang_format(
}
format_advice.replacements = filtered_replacements;
}
file.format_advice = Some(format_advice);
{
let mut file = file
.lock()
.map_err(|e| anyhow!("Failed to lock mutex: {e:?}"))?;
file.format_advice = Some(format_advice);
}
Ok(logs)
}

Expand Down
116 changes: 63 additions & 53 deletions cpp-linter/src/clang_tools/clang_tidy.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,14 @@ use std::{
env::{consts::OS, current_dir},
fs,
path::PathBuf,
process::Command,
sync::{Arc, Mutex, MutexGuard},
sync::{Arc, Mutex},
};

// non-std crates
use anyhow::{Context, Result};
use anyhow::{Context, Result, anyhow};
use regex::Regex;
use serde::Deserialize;
use tokio::process::Command;

// project-specific modules/crates
use super::MakeSuggestions;
Expand Down Expand Up @@ -247,61 +247,70 @@ pub fn tally_tidy_advice(files: &[Arc<Mutex<FileObj>>]) -> u64 {
}

/// Run clang-tidy, then parse and return it's output.
pub fn run_clang_tidy(
file: &mut MutexGuard<FileObj>,
pub async fn run_clang_tidy(
file: &Arc<Mutex<FileObj>>,
clang_params: &ClangParams,
) -> Result<Vec<(log::Level, std::string::String)>> {
let mut cmd = Command::new(clang_params.clang_tidy_command.as_ref().unwrap());
let mut logs = vec![];
if !clang_params.tidy_checks.is_empty() {
cmd.args(["-checks", &clang_params.tidy_checks]);
}
if let Some(db) = &clang_params.database {
cmd.args(["-p", &db.to_string_lossy()]);
}
for arg in &clang_params.extra_args {
cmd.args(["--extra-arg", format!("\"{}\"", arg).as_str()]);
}
let file_name = file.name.to_string_lossy().to_string();
let ranges = file.get_ranges(&clang_params.lines_changed_only);
if !ranges.is_empty() {
let filter = format!(
"[{{\"name\":{:?},\"lines\":{:?}}}]",
&file_name.replace('/', if OS == "windows" { "\\" } else { "/" }),
ranges
.iter()
.map(|r| [r.start(), r.end()])
.collect::<Vec<_>>()
);
cmd.args(["--line-filter", filter.as_str()]);
}
let (file_name, mut args) = {
let mut args = vec![];
let file = file
.lock()
.map_err(|e| anyhow!("Failed to lock mutex: {e:?}"))?;
let file_name = file.name.to_string_lossy().to_string();
if !clang_params.tidy_checks.is_empty() {
args.extend(["-checks".to_string(), clang_params.tidy_checks.to_owned()]);
}
if let Some(db) = &clang_params.database {
args.extend(["-p".to_string(), db.to_string_lossy().to_string()]);
}
for arg in &clang_params.extra_args {
args.extend(["--extra-arg".to_string(), format!("\"{}\"", arg)]);
}
let ranges = file.get_ranges(&clang_params.lines_changed_only);
if !ranges.is_empty() {
let filter = format!(
"[{{\"name\":{:?},\"lines\":{:?}}}]",
&file_name.replace('/', if OS == "windows" { "\\" } else { "/" }),
ranges
.iter()
.map(|r| [r.start(), r.end()])
.collect::<Vec<_>>()
);
args.extend(["--line-filter".to_string(), filter]);
}
(file_name, args)
};
let original_content = if !clang_params.tidy_review {
None
} else {
cmd.arg("--fix-errors");
Some(fs::read_to_string(&file.name).with_context(|| {
args.push("--fix-errors".to_string());
Some(fs::read_to_string(&file_name).with_context(|| {
format!(
"Failed to cache file's original content before applying clang-tidy changes: {}",
file_name.clone()
)
})?)
};
if !clang_params.style.is_empty() {
cmd.args(["--format-style", clang_params.style.as_str()]);
args.extend(["--format-style".to_string(), clang_params.style.to_owned()]);
}
cmd.arg(file.name.to_string_lossy().as_ref());
args.push(file_name.clone());
let program = clang_params.clang_tidy_command.as_ref().unwrap();
let mut cmd = Command::new(program);
cmd.args(&args);
logs.push((
log::Level::Info,
format!(
"Running \"{} {}\"",
cmd.get_program().to_string_lossy(),
cmd.get_args()
.map(|x| x.to_string_lossy())
.collect::<Vec<_>>()
.join(" ")
program.to_string_lossy(),
args.join(" ")
),
));
let output = cmd.output().unwrap();
let output = cmd
.output()
.await
.with_context(|| format!("Failed to run clang-tidy on file: {}", file_name.clone()))?;
logs.push((
log::Level::Debug,
format!(
Expand All @@ -318,22 +327,23 @@ pub fn run_clang_tidy(
),
));
}
file.tidy_advice = Some(parse_tidy_output(
&output.stdout,
&clang_params.database_json,
)?);
let mut tidy_advice = parse_tidy_output(&output.stdout, &clang_params.database_json)?;
if clang_params.tidy_review {
if let Some(tidy_advice) = &mut file.tidy_advice {
// cache file changes in a buffer and restore the original contents for further analysis
tidy_advice.patched =
Some(fs::read(&file_name).with_context(|| {
format!("Failed to read changes from clang-tidy: {file_name}")
})?);
}
// cache file changes in a buffer and restore the original contents for further analysis
tidy_advice.patched = Some(
fs::read(&file_name)
.with_context(|| format!("Failed to read changes from clang-tidy: {file_name}"))?,
);
// original_content is guaranteed to be Some() value at this point
fs::write(&file_name, original_content.unwrap())
.with_context(|| format!("Failed to restore file's original content: {file_name}"))?;
}
{
let mut file = file
.lock()
.map_err(|e| anyhow!("Failed to lock mutex: {e:?}"))?;
file.tidy_advice = Some(tidy_advice);
}
Ok(logs)
}

Expand Down Expand Up @@ -416,8 +426,8 @@ mod test {
)
}

#[test]
fn use_extra_args() {
#[tokio::test]
async fn use_extra_args() {
let exe_path = ClangTool::ClangTidy
.get_exe_path(
&RequestedVersion::from_str(
Expand All @@ -443,8 +453,8 @@ mod test {
clang_tidy_command: Some(exe_path),
clang_format_command: None,
};
let mut file_lock = arc_file.lock().unwrap();
let logs = run_clang_tidy(&mut file_lock, &clang_params)
let logs = run_clang_tidy(&arc_file, &clang_params)
.await
.unwrap()
.into_iter()
.filter_map(|(_lvl, msg)| {
Expand Down
Loading
Loading