From 04e96763d5a37a733564ffd9545c436b1c148fbd Mon Sep 17 00:00:00 2001
From: Jakub Slowinski <32519034+slow-J@users.noreply.github.com>
Date: Thu, 18 Jun 2026 17:42:45 +0100
Subject: [PATCH 1/4] Add detail to KNN no-match explanations

Recompute the explained doc's score in explain() so "not in top N docs" says why, e.g. below cutoff, excluded by filter, no vector value, or a tie-break/recall miss.
---
 lucene/CHANGES.txt                            |  2 +
 .../lucene/search/AbstractKnnVectorQuery.java | 59 ++++++++++++++++++-
 .../lucene/search/DocAndScoreQuery.java       | 44 ++++++++++++--
 .../search/BaseKnnVectorQueryTestCase.java    | 57 +++++++++++++++---
 4 files changed, 148 insertions(+), 14 deletions(-)

diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 42a6e8073576..f7857f33ea31 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -439,6 +439,8 @@ Improvements
 * GITHUB#16264: Check if merge is aborted before executing file integrity checks to avoid
   costly full-file checksums on segments when the merge has already been cancelled. (Tanguy Leroux)
 
+* GITHUB#16271: Add detail to KNN no-match explanations. (Jakub Slowinski)
+
 Optimizations
 ---------------------
 * GITHUB#16111: Optimize FieldExistsQuery.count() when all docs have the field. (Prithvi S)
diff --git a/lucene/core/src/java/org/apache/lucene/search/AbstractKnnVectorQuery.java b/lucene/core/src/java/org/apache/lucene/search/AbstractKnnVectorQuery.java
index 4bdd821ccba0..2218a4bb3953 100644
--- a/lucene/core/src/java/org/apache/lucene/search/AbstractKnnVectorQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/AbstractKnnVectorQuery.java
@@ -149,9 +149,64 @@ public Query rewrite(IndexSearcher indexSearcher) throws IOException {
       topK = runSearchTasks(tasks, taskExecutor, perLeafResults, leafReaderContexts);
     }
     if (topK.scoreDocs.length == 0) {
-      return MatchNoDocsQuery.INSTANCE;
+      return new MatchNoDocsQuery("No documents matched the nearest-neighbor search");
     }
-    return DocAndScoreQuery.createDocAndScoreQuery(reader, topK, reentryCount);
+    return DocAndScoreQuery.createDocAndScoreQuery(
+        reader, topK, reentryCount, noMatchExplainer(topK, filterWeight));
+  }
+
+  /** Builds the explainer for documents this query did not collect, capturing minTopKScore. */
+  private DocAndScoreQuery.NoMatchExplainer noMatchExplainer(TopDocs topK, Weight filterWeight) {
+    // topK is score-descending, so the lowest collected score is the last entry.
+    final float minTopKScore = topK.scoreDocs[topK.scoreDocs.length - 1].score;
+    return (context, doc, topN) ->
+        explainNotCollected(context, doc, topN, filterWeight, minTopKScore);
+  }
+
+  /** Explains why a doc was not collected, by recomputing its score. null when no vectors. */
+  private Explanation explainNotCollected(
+      LeafReaderContext context, int doc, int topN, Weight filterWeight, float minTopKScore)
+      throws IOException {
+    String prefix = "Not in top " + topN + " doc(s): ";
+    FieldInfo fi = context.reader().getFieldInfos().fieldInfo(field);
+    if (fi == null || fi.getVectorDimension() == 0) {
+      return null;
+    }
+    VectorScorer vectorScorer = createVectorScorer(context, fi);
+    if (vectorScorer == null) {
+      return null;
+    }
+    if (vectorScorer.iterator().advance(doc) != doc) {
+      return Explanation.noMatch(prefix + "no vector value in field \"" + field + "\"");
+    }
+    if (filterWeight != null && docPassesFilter(filterWeight, context, doc) == false) {
+      return Explanation.noMatch(prefix + "excluded by filter");
+    }
+    float score = vectorScorer.score();
+    if (score < minTopKScore) {
+      return Explanation.noMatch(prefix + "score " + score + " < minTopKScore " + minTopKScore);
+    }
+    // Score meets the cutoff but the doc was not collected (tie-break, recall miss, or rescoring).
+    return Explanation.noMatch(
+        prefix
+            + "score "
+            + score
+            + " >= minTopKScore "
+            + minTopKScore
+            + " (tie-break or approximate-search miss)");
+  }
+
+  private static boolean docPassesFilter(Weight filterWeight, LeafReaderContext context, int doc)
+      throws IOException {
+    Scorer scorer = filterWeight.scorer(context);
+    if (scorer == null) {
+      return false;
+    }
+    TwoPhaseIterator twoPhase = scorer.twoPhaseIterator();
+    if (twoPhase != null) {
+      return twoPhase.approximation().advance(doc) == doc && twoPhase.matches();
+    }
+    return scorer.iterator().advance(doc) == doc;
   }
 
   private TopDocs runSearchTasks(
diff --git a/lucene/core/src/java/org/apache/lucene/search/DocAndScoreQuery.java b/lucene/core/src/java/org/apache/lucene/search/DocAndScoreQuery.java
index cf71ca8d8c20..3dbd9d86bb17 100644
--- a/lucene/core/src/java/org/apache/lucene/search/DocAndScoreQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/DocAndScoreQuery.java
@@ -29,6 +29,12 @@
 /** A query that wraps precomputed documents and scores */
 class DocAndScoreQuery extends Query {
 
+  /** Optional hook to explain why a doc is missing. Returns null for the generic message. */
+  @FunctionalInterface
+  interface NoMatchExplainer {
+    Explanation explain(LeafReaderContext context, int doc, int topN) throws IOException;
+  }
+
   private final int[] docs;
   private final float[] scores;
   private final float maxScore;
@@ -36,6 +42,8 @@ class DocAndScoreQuery extends Query {
   private final long visited;
   private final Object contextIdentity;
   private final int reentryCount;
+  // Only used in explain(). Omitted from equals()/hashCode().
+  private final NoMatchExplainer noMatchExplainer;
 
   /**
    * Constructor
@@ -60,6 +68,18 @@ class DocAndScoreQuery extends Query {
       long visited,
       Object contextIdentity,
       int reentryCount) {
+    this(docs, scores, maxScore, segmentStarts, visited, contextIdentity, reentryCount, null);
+  }
+
+  DocAndScoreQuery(
+      int[] docs,
+      float[] scores,
+      float maxScore,
+      int[] segmentStarts,
+      long visited,
+      Object contextIdentity,
+      int reentryCount,
+      NoMatchExplainer noMatchExplainer) {
     this.docs = docs;
     this.scores = scores;
     this.maxScore = maxScore;
@@ -67,9 +87,15 @@ class DocAndScoreQuery extends Query {
     this.visited = visited;
     this.contextIdentity = contextIdentity;
     this.reentryCount = reentryCount;
+    this.noMatchExplainer = noMatchExplainer;
   }
 
   static Query createDocAndScoreQuery(IndexReader reader, TopDocs topK, int reentryCount) {
+    return createDocAndScoreQuery(reader, topK, reentryCount, null);
+  }
+
+  static Query createDocAndScoreQuery(
+      IndexReader reader, TopDocs topK, int reentryCount, NoMatchExplainer noMatchExplainer) {
     int len = topK.scoreDocs.length;
     assert len > 0;
     float maxScore = topK.scoreDocs[0].score;
@@ -88,7 +114,8 @@ static Query createDocAndScoreQuery(IndexReader reader, TopDocs topK, int reentr
         segmentStarts,
         topK.totalHits.value(),
         reader.getContext().id(),
-        reentryCount);
+        reentryCount,
+        noMatchExplainer);
   }
 
   static int[] findSegmentStarts(List<LeafReaderContext> leaves, int[] docs) {
@@ -121,12 +148,19 @@ public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float bo
     }
     return new Weight(this) {
       @Override
-      public Explanation explain(LeafReaderContext context, int doc) {
+      public Explanation explain(LeafReaderContext context, int doc) throws IOException {
         int found = Arrays.binarySearch(docs, doc + context.docBase);
         if (found < 0) {
-          return Explanation.noMatch("not in top " + docs.length + " docs");
+          // Defer to the originating query for a richer reason, if available.
+          if (noMatchExplainer != null) {
+            Explanation enriched = noMatchExplainer.explain(context, doc, docs.length);
+            if (enriched != null) {
+              return enriched;
+            }
+          }
+          return Explanation.noMatch("Not in top " + docs.length + " doc(s)");
         }
-        return Explanation.match(scores[found] * boost, "within top " + docs.length + " docs");
+        return Explanation.match(scores[found] * boost, "Within top " + docs.length + " doc(s)");
       }
 
       @Override
@@ -218,7 +252,7 @@ public boolean isCacheable(LeafReaderContext ctx) {
 
   @Override
   public String toString(String field) {
-    return "DocAndScoreQuery[" + docs[0] + ",...][" + scores[0] + ",...]," + maxScore;
+    return "DocAndScoreQuery[" + docs.length + " doc(s), maxScore=" + maxScore + "]";
   }
 
   @Override
diff --git a/lucene/core/src/test/org/apache/lucene/search/BaseKnnVectorQueryTestCase.java b/lucene/core/src/test/org/apache/lucene/search/BaseKnnVectorQueryTestCase.java
index f2c7acd3f6d3..e98cdd51be4a 100644
--- a/lucene/core/src/test/org/apache/lucene/search/BaseKnnVectorQueryTestCase.java
+++ b/lucene/core/src/test/org/apache/lucene/search/BaseKnnVectorQueryTestCase.java
@@ -169,6 +169,8 @@ public void testEmptyIndex() throws IOException {
       assertMatches(searcher, kvq, 0);
       Query q = searcher.rewrite(kvq);
       assertTrue(q instanceof MatchNoDocsQuery);
+      assertEquals(
+          "MatchNoDocsQuery(\"No documents matched the nearest-neighbor search\")", q.toString());
     }
   }
 
@@ -433,13 +435,16 @@ public void testExplain() throws IOException {
         // scores vary widely due to quantization
         assertEquals(1 / 2f, matched.getValue().doubleValue(), 0.5);
         assertEquals(0, matched.getDetails().length);
-        assertEquals("within top 3 docs", matched.getDescription());
+        assertEquals("Within top 3 doc(s)", matched.getDescription());
 
-        Explanation nomatch = searcher.explain(query, 5);
+        // Doc 0 ({0,0}) is farthest from the query {2,3}, so it is reliably ranked out.
+        Explanation nomatch = searcher.explain(query, 0);
         assertFalse(nomatch.isMatch());
         assertEquals(0f, nomatch.getValue());
         assertEquals(0, matched.getDetails().length);
-        assertEquals("not in top 3 docs", nomatch.getDescription());
+        assertTrue(
+            nomatch.getDescription(),
+            nomatch.getDescription().startsWith("Not in top 3 doc(s): score "));
       }
     }
   }
@@ -462,13 +467,50 @@ public void testExplainMultipleSegments() throws IOException {
         // scores vary widely due to quantization
         assertEquals(1 / 2f, matched.getValue().doubleValue(), 0.5);
         assertEquals(0, matched.getDetails().length);
-        assertEquals("within top 3 docs", matched.getDescription());
+        assertEquals("Within top 3 doc(s)", matched.getDescription());
 
-        Explanation nomatch = searcher.explain(query, 4);
+        // Doc 0 ({0,0}) is farthest from the query {2,3}, so it is reliably ranked out.
+        Explanation nomatch = searcher.explain(query, 0);
         assertFalse(nomatch.isMatch());
         assertEquals(0f, nomatch.getValue());
         assertEquals(0, matched.getDetails().length);
-        assertEquals("not in top 3 docs", nomatch.getDescription());
+        assertTrue(
+            nomatch.getDescription(),
+            nomatch.getDescription().startsWith("Not in top 3 doc(s): score "));
+      }
+    }
+  }
+
+  public void testExplainFiltered() throws IOException {
+    try (Directory d = newDirectoryForTest()) {
+      try (IndexWriter w = new IndexWriter(d, new IndexWriterConfig())) {
+        for (int j = 0; j < 5; j++) {
+          Document doc = new Document();
+          doc.add(getKnnVectorField("field", new float[] {j, j}));
+          doc.add(new IntPoint("tag", j));
+          w.addDocument(doc);
+        }
+        // Doc 5 passes the filter (tag in range) but has no vector.
+        Document noVector = new Document();
+        noVector.add(new IntPoint("tag", 1));
+        w.addDocument(noVector);
+      }
+      try (IndexReader reader = DirectoryReader.open(d)) {
+        IndexSearcher searcher = new IndexSearcher(reader);
+        // Filter to docs 0-2, so docs 3 and 4 cannot be collected.
+        Query filter = IntPoint.newRangeQuery("tag", 0, 2);
+        AbstractKnnVectorQuery query = getKnnVectorQuery("field", new float[] {2, 3}, 3, filter);
+
+        // Doc 4 has a vector but fails the filter.
+        Explanation filtered = searcher.explain(query, 4);
+        assertFalse(filtered.isMatch());
+        assertEquals("Not in top 3 doc(s): excluded by filter", filtered.getDescription());
+
+        // Doc 5 passes the filter but has no vector, so blame the missing vector, not the filter.
+        Explanation noVector = searcher.explain(query, 5);
+        assertFalse(noVector.isMatch());
+        assertEquals(
+            "Not in top 3 doc(s): no vector value in field \"field\"", noVector.getDescription());
       }
     }
   }
@@ -1072,7 +1114,8 @@ void assertDocScoreQueryToString(Query query) {
     // Since a forceMerge could occur in this test, we must not assert that a specific doc_id is
     // matched
     // But that instead the string format is expected and that the max score is 1.0
-    assertTrue(queryString.matches("DocAndScoreQuery\\[\\d+,...]\\[\\d+.\\d+,...],1.0"));
+    assertTrue(
+        queryString, queryString.matches("DocAndScoreQuery\\[\\d+ doc\\(s\\), maxScore=1.0]"));
   }
 
   /**

From dd1a52efb0a76a7865b8cf79dd9fe390f29af43a Mon Sep 17 00:00:00 2001
From: Jakub Slowinski <32519034+slow-J@users.noreply.github.com>
Date: Fri, 19 Jun 2026 12:16:26 +0100
Subject: [PATCH 2/4] Add testExplainTieBreak

---
 .../search/BaseKnnVectorQueryTestCase.java    | 36 +++++++++++++++++++
 1 file changed, 36 insertions(+)

diff --git a/lucene/core/src/test/org/apache/lucene/search/BaseKnnVectorQueryTestCase.java b/lucene/core/src/test/org/apache/lucene/search/BaseKnnVectorQueryTestCase.java
index e98cdd51be4a..7fcf44afebdd 100644
--- a/lucene/core/src/test/org/apache/lucene/search/BaseKnnVectorQueryTestCase.java
+++ b/lucene/core/src/test/org/apache/lucene/search/BaseKnnVectorQueryTestCase.java
@@ -515,6 +515,42 @@ public void testExplainFiltered() throws IOException {
     }
   }
 
+  public void testExplainTieBreak() throws IOException {
+    try (Directory d = newDirectoryForTest()) {
+      // All five docs share one vector, so they all score equally. With top k = 3, two are dropped
+      // due to tie-break.
+      try (IndexWriter w = new IndexWriter(d, new IndexWriterConfig())) {
+        for (int j = 0; j < 5; j++) {
+          Document doc = new Document();
+          doc.add(getKnnVectorField("field", new float[] {1, 1}));
+          w.addDocument(doc);
+        }
+        w.forceMerge(1);
+      }
+      try (IndexReader reader = DirectoryReader.open(d)) {
+        IndexSearcher searcher = new IndexSearcher(reader);
+        AbstractKnnVectorQuery query = getKnnVectorQuery("field", new float[] {1, 1}, 3);
+
+        Set<Integer> collected = new HashSet<>();
+        for (ScoreDoc sd : searcher.search(query, 3).scoreDocs) {
+          collected.add(sd.doc);
+        }
+        int dropped = -1;
+        for (int doc = 0; doc < 5; doc++) {
+          if (collected.contains(doc) == false) {
+            dropped = doc;
+            break;
+          }
+        }
+        Explanation nomatch = searcher.explain(query, dropped);
+        assertFalse(nomatch.isMatch());
+        String description = nomatch.getDescription();
+        assertTrue(description, description.startsWith("Not in top 3 doc(s): score "));
+        assertTrue(description, description.endsWith(" (tie-break or approximate-search miss)"));
+      }
+    }
+  }
+
   /** Test that when vectors are abnormally distributed among segments, we still find the top K */
   public void testSkewedIndex() throws IOException {
     /* We have to choose the numbers carefully here so that some segment has more than the expected

From 5e03bf0193db38eba41b69f2cc0969049ea60b7f Mon Sep 17 00:00:00 2001
From: Jakub Slowinski <32519034+slow-J@users.noreply.github.com>
Date: Sun, 21 Jun 2026 11:07:51 +0100
Subject: [PATCH 3/4] Only allow noMatchExplainer.explain to be called for the
 reader the query was built against

---
 .../java/org/apache/lucene/search/DocAndScoreQuery.java    | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/lucene/core/src/java/org/apache/lucene/search/DocAndScoreQuery.java b/lucene/core/src/java/org/apache/lucene/search/DocAndScoreQuery.java
index 3dbd9d86bb17..d4dcea8a2d41 100644
--- a/lucene/core/src/java/org/apache/lucene/search/DocAndScoreQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/DocAndScoreQuery.java
@@ -25,6 +25,7 @@
 import java.util.Objects;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.ReaderUtil;
 
 /** A query that wraps precomputed documents and scores */
 class DocAndScoreQuery extends Query {
@@ -151,8 +152,10 @@ public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float bo
       public Explanation explain(LeafReaderContext context, int doc) throws IOException {
         int found = Arrays.binarySearch(docs, doc + context.docBase);
         if (found < 0) {
-          // Defer to the originating query for a richer reason, if available.
-          if (noMatchExplainer != null) {
+          // Defer to the originating query for a richer reason, but only when this leaf belongs
+          // to the reader this query was built against.
+          if (noMatchExplainer != null
+              && ReaderUtil.getTopLevelContext(context).id() == contextIdentity) {
             Explanation enriched = noMatchExplainer.explain(context, doc, docs.length);
             if (enriched != null) {
               return enriched;

From 68c75ab7beead5bb703ed1f3f0b4c40c0eff1f9b Mon Sep 17 00:00:00 2001
From: Jakub Slowinski <32519034+slow-J@users.noreply.github.com>
Date: Mon, 22 Jun 2026 16:01:38 +0100
Subject: [PATCH 4/4] Move GITHUB#16271 changelog entry to Lucene 10.6.0

---
 lucene/CHANGES.txt | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index f7857f33ea31..58d4df5c7574 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -287,7 +287,7 @@ New Features
 
 Improvements
 ---------------------
-(No changes)
+* GITHUB#16271: Add detail to KNN no-match explanations. (Jakub Slowinski)
 
 Optimizations
 ---------------------
@@ -439,8 +439,6 @@ Improvements
 * GITHUB#16264: Check if merge is aborted before executing file integrity checks to avoid
   costly full-file checksums on segments when the merge has already been cancelled. (Tanguy Leroux)
 
-* GITHUB#16271: Add detail to KNN no-match explanations. (Jakub Slowinski)
-
 Optimizations
 ---------------------
 * GITHUB#16111: Optimize FieldExistsQuery.count() when all docs have the field. (Prithvi S)