Skip to content

Commit e8ddf5b

Browse files
authored
Merge pull request #170 from rostilos/1.5.5-rc
feat: Update issue classification logic to route unresolved snippets to AI for verification
2 parents fdb4bfd + 5fe5d1d commit e8ddf5b

3 files changed

Lines changed: 70 additions & 30 deletions

File tree

java-ecosystem/libs/analysis-engine/src/main/java/org/rostilos/codecrow/analysisengine/service/IssueReconciliationEngine.java

Lines changed: 24 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -360,10 +360,18 @@ public List<ContentClassification> classifyByContent(
360360

361361
results.add(new ContentClassification(
362362
issue, Classification.CONFIRMED, updatedLine, updatedLineHash));
363-
} else if (hasAnyReliableAnchor(issue)) {
364-
results.add(new ContentClassification(
365-
issue, Classification.RESOLVED, null, null));
366363
} else {
364+
// Content anchor not found — route to AI instead of auto-resolving.
365+
// The anchor (codeSnippet / lineHash) may be stale or imprecise:
366+
// - lineHash can be NULL (file content unavailable at ingestion time)
367+
// - codeSnippet from LLM may not be verbatim (diff markers, truncation)
368+
// When lineHash is null, only the (unreliable) snippet is checked.
369+
// Auto-resolving here causes mass false-positive resolutions.
370+
log.debug("Classify: content not found for {} issue (id={}) at line {} — "
371+
+ "routing to AI. hasSnippet={}, hasLineHash={}",
372+
scope, issue.getId(), currentLine,
373+
issue.getCodeSnippet() != null && !issue.getCodeSnippet().isBlank(),
374+
issue.getLineHash() != null);
367375
results.add(new ContentClassification(
368376
issue, Classification.NEEDS_AI, null, null));
369377
}
@@ -468,8 +476,12 @@ public List<SweepResult> sweepDeterministic(
468476
}
469477

470478
if (!contentFound) {
471-
results.add(new SweepResult(issue, true,
472-
"Code snippet/hash no longer found in file (deterministic sweep)"));
479+
// Don't auto-resolve: hash comparison alone is not reliable enough.
480+
// The codeSnippet from the LLM may not be verbatim, and lineHash
481+
// may be null (never computed at ingestion time). Prefer keeping
482+
// stale issues over falsely resolving valid ones.
483+
results.add(new SweepResult(issue, false,
484+
"Content anchor not found — skipped (not auto-resolved, requires AI verification)"));
473485
} else {
474486
results.add(new SweepResult(issue, false, "Content anchor still present"));
475487
}
@@ -611,10 +623,14 @@ public List<ContentClassification> classifyByContent(
611623

612624
results.add(new ContentClassification(
613625
issue, Classification.CONFIRMED, updatedLine, updatedLineHash));
614-
} else if (hasAnyReliableAnchor(issue)) {
615-
results.add(new ContentClassification(
616-
issue, Classification.RESOLVED, null, null));
617626
} else {
627+
// Content anchor not found — route to AI instead of auto-resolving.
628+
// Same reasoning as base classifyByContent: anchors can be stale/null.
629+
log.debug("Classify (AST): content not found for {} issue (id={}) at line {} — "
630+
+ "routing to AI. hasSnippet={}, hasLineHash={}",
631+
scope, issue.getId(), currentLine,
632+
issue.getCodeSnippet() != null && !issue.getCodeSnippet().isBlank(),
633+
issue.getLineHash() != null);
618634
results.add(new ContentClassification(
619635
issue, Classification.NEEDS_AI, null, null));
620636
}

java-ecosystem/libs/analysis-engine/src/main/java/org/rostilos/codecrow/analysisengine/service/branch/BranchIssueReconciliationService.java

Lines changed: 24 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -265,12 +265,30 @@ public int sweepDeterministicResolutions(
265265
request.getCommitHash(), filePath);
266266
}
267267
if (fileContent == null) {
268-
// File doesn't exist — resolve all issues for this file
269-
for (BranchIssue bi : fileIssues) {
270-
resolveIssue(bi, request.getCommitHash(), request.getSourcePrNumber(),
271-
"File no longer exists on branch (deterministic sweep)",
272-
"deterministic-sweep");
273-
resolvedCount++;
268+
// Content unavailable — verify file existence explicitly.
269+
// getFileContent returning null could be an API error/timeout/rate-limit,
270+
// not actual file deletion. Must confirm before resolving (fail-open).
271+
boolean confirmedDeleted = false;
272+
try {
273+
confirmedDeleted = !operationsService.checkFileExistsInBranch(
274+
client, vcsRepoInfo.getRepoWorkspace(), vcsRepoInfo.getRepoSlug(),
275+
request.getTargetBranchName(), filePath);
276+
} catch (Exception ex) {
277+
log.warn("Sweep: file existence check failed for {} — skipping (fail-open): {}",
278+
filePath, ex.getMessage());
279+
continue; // Don't resolve on error
280+
}
281+
if (confirmedDeleted) {
282+
for (BranchIssue bi : fileIssues) {
283+
resolveIssue(bi, request.getCommitHash(), request.getSourcePrNumber(),
284+
"File no longer exists on branch (deterministic sweep)",
285+
"deterministic-sweep");
286+
resolvedCount++;
287+
}
288+
} else {
289+
log.warn("Sweep: file {} exists but content unavailable — "
290+
+ "skipping {} issues (fail-open to prevent false resolution)",
291+
filePath, fileIssues.size());
274292
}
275293
continue;
276294
}

java-ecosystem/libs/analysis-engine/src/test/java/org/rostilos/codecrow/analysisengine/service/IssueReconciliationEngineTest.java

Lines changed: 22 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -253,16 +253,17 @@ void snippetFoundInContent_shouldBeConfirmed() {
253253
}
254254

255255
@Test
256-
void snippetGone_shouldBeResolved() {
256+
void snippetGone_shouldBeRoutedToAi() {
257257
String content = "alpha\ngamma\ndelta";
258258
var hashes = LineHashSequence.from(content);
259-
// Snippet "beta" at line 2 — but "beta" no longer exists
259+
// Snippet "beta" at line 2 — but "beta" no longer exists.
260+
// Should route to AI for verification, not auto-resolve.
260261
var results = engine.classifyByContent(
261262
List.of(issue(2, "beta")),
262263
hashes);
263264
assertThat(results).hasSize(1);
264265
assertThat(results.get(0).classification())
265-
.isEqualTo(IssueReconciliationEngine.Classification.RESOLVED);
266+
.isEqualTo(IssueReconciliationEngine.Classification.NEEDS_AI);
266267
}
267268

268269
@Test
@@ -279,16 +280,18 @@ void lineHashFoundButNoSnippet_shouldBeConfirmed() {
279280
}
280281

281282
@Test
282-
void lineHashGone_shouldBeResolved() {
283+
void lineHashGone_shouldBeRoutedToAi() {
283284
String content = "alpha\ngamma\ndelta";
284285
var hashes = LineHashSequence.from(content);
285286
String betaHash = LineHashSequence.hashLine("beta");
287+
// lineHash for "beta" no longer in file.
288+
// Should route to AI for verification, not auto-resolve.
286289
var results = engine.classifyByContent(
287290
List.of(issue(2, null, betaHash)),
288291
hashes);
289292
assertThat(results).hasSize(1);
290293
assertThat(results.get(0).classification())
291-
.isEqualTo(IssueReconciliationEngine.Classification.RESOLVED);
294+
.isEqualTo(IssueReconciliationEngine.Classification.NEEDS_AI);
292295
}
293296

294297
@Test
@@ -335,19 +338,18 @@ void contextHashChanged_blockScope_shouldBeNeedsAi() {
335338
void multipleIssues_mixedClassifications() {
336339
String content = "alpha\nbeta\ngamma\ndelta";
337340
var hashes = LineHashSequence.from(content);
338-
String betaHash = LineHashSequence.hashLine("beta");
339341

340342
var results = engine.classifyByContent(List.of(
341343
issue(2, "beta"), // CONFIRMED (snippet found)
342-
issue(3, "zzzz"), // RESOLVED (snippet gone)
344+
issue(3, "zzzz"), // NEEDS_AI (snippet gone — routed to AI)
343345
issueWithScope(1, null, IssueScope.FILE) // NEEDS_AI (FILE scope)
344346
), hashes);
345347

346348
assertThat(results).hasSize(3);
347349
assertThat(results.get(0).classification())
348350
.isEqualTo(IssueReconciliationEngine.Classification.CONFIRMED);
349351
assertThat(results.get(1).classification())
350-
.isEqualTo(IssueReconciliationEngine.Classification.RESOLVED);
352+
.isEqualTo(IssueReconciliationEngine.Classification.NEEDS_AI);
351353
assertThat(results.get(2).classification())
352354
.isEqualTo(IssueReconciliationEngine.Classification.NEEDS_AI);
353355
}
@@ -404,26 +406,29 @@ void snippetStillPresent_shouldNotResolve() {
404406
}
405407

406408
@Test
407-
void snippetGone_shouldResolve() {
409+
void snippetGone_shouldNotAutoResolve() {
408410
String content = "alpha\ngamma\ndelta";
409411
var hashes = LineHashSequence.from(content);
412+
// Snippet "beta" gone from file — but sweep should NOT auto-resolve.
413+
// Hash comparison alone is unreliable (LLM snippet not verbatim, null lineHash).
410414
var results = engine.sweepDeterministic(
411415
List.of(issue(2, "beta")),
412416
hashes);
413417
assertThat(results).hasSize(1);
414-
assertThat(results.get(0).resolved()).isTrue();
418+
assertThat(results.get(0).resolved()).isFalse();
415419
}
416420

417421
@Test
418-
void lineHashGone_shouldResolve() {
422+
void lineHashGone_shouldNotAutoResolve() {
419423
String content = "alpha\ngamma\ndelta";
420424
var hashes = LineHashSequence.from(content);
421425
String betaHash = LineHashSequence.hashLine("beta");
426+
// lineHash for "beta" gone — but sweep should NOT auto-resolve.
422427
var results = engine.sweepDeterministic(
423428
List.of(issue(2, null, betaHash)),
424429
hashes);
425430
assertThat(results).hasSize(1);
426-
assertThat(results.get(0).resolved()).isTrue();
431+
assertThat(results.get(0).resolved()).isFalse();
427432
}
428433

429434
@Test
@@ -454,17 +459,16 @@ void blockScope_someSnippetLinesPresent_shouldNotResolve() {
454459
void multipleIssues_mixedResults() {
455460
String content = "alpha\nbeta\ngamma";
456461
var hashes = LineHashSequence.from(content);
457-
String betaHash = LineHashSequence.hashLine("beta");
458462
String zzzzHash = LineHashSequence.hashLine("zzzz");
459463

460464
var results = engine.sweepDeterministic(List.of(
461465
issue(2, "beta"), // present → not resolved
462-
issue(2, null, zzzzHash) // gone → resolved
466+
issue(2, null, zzzzHash) // gone → still not auto-resolved (requires AI)
463467
), hashes);
464468

465469
assertThat(results).hasSize(2);
466470
assertThat(results.get(0).resolved()).isFalse();
467-
assertThat(results.get(1).resolved()).isTrue();
471+
assertThat(results.get(1).resolved()).isFalse();
468472
}
469473
}
470474

@@ -490,10 +494,12 @@ void classifyByContent_nullAst_shouldFallBackToBase() {
490494
void sweepDeterministic_nullAst_shouldFallBackToBase() {
491495
String content = "alpha\ngamma\ndelta";
492496
var hashes = LineHashSequence.from(content);
497+
// AST overload with null tree/resolver falls back to base method,
498+
// which no longer auto-resolves (anchor not found → skip, not resolve)
493499
var results = engine.sweepDeterministic(
494500
List.of(issue(2, "beta")), hashes, null, null);
495501
assertThat(results).hasSize(1);
496-
assertThat(results.get(0).resolved()).isTrue();
502+
assertThat(results.get(0).resolved()).isFalse();
497503
}
498504

499505
@Test

0 commit comments

Comments
 (0)