apache · Tim-Brooks · Jun 10, 2026 · Jun 18, 2026 · Jun 18, 2026 · Jun 18, 2026
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
@@ -287,7 +287,8 @@ New Features
 
 Improvements
 ---------------------
-(No changes)
+
+* GITHUB#16269: Lazily build the term-vectors per-field. (Tim Brooks)
 
 Optimizations
 ---------------------

diff --git a/lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriter.java b/lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriter.java
@@ -42,12 +42,18 @@
 
 final class FreqProxTermsWriter extends TermsHash {
 
+  // The term vectors consumer is the (optional) downstream consumer of the terms interned here:
+  // it reuses our term byte pool and only buffers fields that store term vectors. This is the same
+  // instance the base class tracks as nextTermsHash, kept here with its concrete type.
+  private final TermVectorsConsumer termVectors;
+
   FreqProxTermsWriter(
       final IntBlockPool.Allocator intBlockAllocator,
       final ByteBlockPool.Allocator byteBlockAllocator,
       Counter bytesUsed,
-      TermsHash termVectors) {
+      TermVectorsConsumer termVectors) {
     super(intBlockAllocator, byteBlockAllocator, bytesUsed, termVectors);
+    this.termVectors = termVectors;
   }
 
   private void applyDeletes(SegmentWriteState state, Fields fields) throws IOException {
@@ -79,14 +85,15 @@ private void applyDeletes(SegmentWriteState state, Fields fields) throws IOExcep
     }
   }
 
-  @Override
   public void flush(
       Map<String, TermsHashPerField> fieldsToFlush,
       final SegmentWriteState state,
       Sorter.DocMap sortMap,
       NormsProducer norms)
       throws IOException {
-    super.flush(fieldsToFlush, state, sortMap, norms);
+    // Flush the per-document term vectors first (they were buffered as each document finished),
+    // then write the postings gathered per-field below.
+    termVectors.flush(state, sortMap);
 
     // Gather all fields that saw any postings:
     List<FreqProxTermsWriterPerField> allFields = new ArrayList<>();
@@ -136,8 +143,11 @@ public Terms terms(final String field) {
 
   @Override
   public TermsHashPerField addField(FieldInvertState invertState, FieldInfo fieldInfo) {
-    return new FreqProxTermsWriterPerField(
-        invertState, this, fieldInfo, nextTermsHash.addField(invertState, fieldInfo));
+    // Only build the downstream term-vectors per-field when the field actually stores term vectors.
+    // hasTermVectors() is fixed at field-init time and is immutable for the segment.
+    TermsHashPerField termVectorsPerField =
+        fieldInfo.hasTermVectors() ? termVectors.addField(invertState, fieldInfo) : null;
+    return new FreqProxTermsWriterPerField(invertState, this, fieldInfo, termVectorsPerField);
   }
 
   static class SortingTerms extends FilterLeafReader.FilterTerms {

diff --git a/lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java b/lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java
@@ -46,14 +46,14 @@ final class FreqProxTermsWriterPerField extends TermsHashPerField {
       FieldInvertState invertState,
       TermsHash termsHash,
       FieldInfo fieldInfo,
-      TermsHashPerField nextPerField) {
+      TermsHashPerField termVectorsPerField) {
     super(
         fieldInfo.getIndexOptions().subsumes(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) ? 2 : 1,
         termsHash.intPool,
         termsHash.bytePool,
         termsHash.termBytePool,
         termsHash.bytesUsed,
-        nextPerField,
+        termVectorsPerField,
         fieldInfo.name,
         fieldInfo.getIndexOptions());
     this.fieldState = invertState;
@@ -62,6 +62,8 @@ final class FreqProxTermsWriterPerField extends TermsHashPerField {
     hasProx = indexOptions.subsumes(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
     hasOffsets = indexOptions.subsumes(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
     isTermDoc = fieldInfo.isTermDocField();
+    // The downstream term-vectors per-field exists iff the field stores term vectors.
+    assert (getTermVectorsPerField() != null) == fieldInfo.hasTermVectors();
   }
 
   @Override
@@ -73,12 +75,11 @@ void finish() throws IOException {
   }
 
   @Override
-  boolean start(IndexableField f, boolean first) {
+  void start(IndexableField f, boolean first) {
     super.start(f, first);
     termFreqAtt = fieldState.termFreqAttribute;
     payloadAttribute = fieldState.payloadAttribute;
     offsetAttribute = fieldState.offsetAttribute;
-    return true;
   }
 
   void writeProx(int termID, int proxCode) {

diff --git a/lucene/core/src/java/org/apache/lucene/index/IndexingChain.java b/lucene/core/src/java/org/apache/lucene/index/IndexingChain.java
@@ -84,8 +84,8 @@ final class IndexingChain implements Accountable {
   final Counter bytesUsed = Counter.newCounter();
   final FieldInfos.Builder fieldInfos;
 
-  // Writes postings and term vectors:
-  final TermsHash termsHash;
+  // Writes postings, and drives the (optional) downstream term-vectors consumer:
+  final FreqProxTermsWriter termsHash;
   // Shared pool for doc-value terms
   final ByteBlockPool docValuesBytePool;
   // Shared scratch buffers for dense points encoding
@@ -592,7 +592,7 @@ void processDocument(
     // analyzer is free to reuse TokenStream across fields
     // (i.e., we cannot have more than one TokenStream
     // running "at once"):
-    termsHash.startDocument();
+    termVectorsWriter.startDocument();
     startStoredFields(docID);
     try {
       // Handle the parent field first (before document fields). Its schema was already
@@ -663,9 +663,9 @@ void processDocument(
           fields[i].finish(docID);
         }
         finishStoredFields();
-        // TODO: for broken docs, optimize termsHash.finishDocument
+        // TODO: for broken docs, optimize termVectorsWriter.finishDocument
         try {
-          termsHash.finishDocument(docID);
+          termVectorsWriter.finishDocument(docID);
         } catch (Throwable th) {
           // Must abort, on the possibility that on-disk term
           // vectors are now corrupt:
@@ -886,7 +886,7 @@ private void processRowColumns(int baseDocID, int numDocs, Iterable<Column> colu
       int indexedFieldCount = 0;
 
       if (hasInverted) {
-        termsHash.startDocument();
+        termVectorsWriter.startDocument();
       }
       if (hasStored) {
         startStoredFields(segDocID);
@@ -925,7 +925,7 @@ private void processRowColumns(int baseDocID, int numDocs, Iterable<Column> colu
           }
           if (hasInverted) {
             try {
-              termsHash.finishDocument(segDocID);
+              termVectorsWriter.finishDocument(segDocID);
             } catch (Throwable th) {
               abortingExceptionConsumer.accept(th);
               throw th;
@@ -1478,6 +1478,9 @@ private static void updateDocFieldSchema(
     if (fieldType.indexOptions() != IndexOptions.NONE) {
       schema.setIndexOptions(
           fieldType.indexOptions(), fieldType.omitNorms(), fieldType.storeTermVectors());
+      if (fieldType.storeTermVectors() == false) {
+        verifyNoTermVectorOptionsWithoutVectors(fieldName, fieldType);
+      }
     } else {
       // TODO: should this be checked when a fieldType is created?
       verifyUnIndexedFieldType(fieldName, fieldType);
@@ -1540,6 +1543,31 @@ private static void verifyUnIndexedFieldType(String name, IndexableFieldType ft)
     }
   }
 
+  /**
+   * Verifies that an indexed field which does not store term vectors does not request any
+   * term-vector sub-options.
+   */
+  private static void verifyNoTermVectorOptionsWithoutVectors(String name, IndexableFieldType ft) {
+    if (ft.storeTermVectorOffsets()) {
+      throw new IllegalArgumentException(
+          "cannot index term vector offsets when term vectors are not indexed (field=\""
+              + name
+              + "\")");
+    }
+    if (ft.storeTermVectorPositions()) {
+      throw new IllegalArgumentException(
+          "cannot index term vector positions when term vectors are not indexed (field=\""
+              + name
+              + "\")");
+    }
+    if (ft.storeTermVectorPayloads()) {
+      throw new IllegalArgumentException(
+          "cannot index term vector payloads when term vectors are not indexed (field=\""
+              + name
+              + "\")");
+    }
+  }
+
   private static void validateMaxVectorDimension(
       String fieldName, int vectorDim, int maxVectorDim) {
     if (vectorDim > maxVectorDim) {

diff --git a/lucene/core/src/java/org/apache/lucene/index/SortingTermVectorsConsumer.java b/lucene/core/src/java/org/apache/lucene/index/SortingTermVectorsConsumer.java
@@ -18,9 +18,7 @@
 
 import java.io.IOException;
 import java.util.Iterator;
-import java.util.Map;
 import org.apache.lucene.codecs.Codec;
-import org.apache.lucene.codecs.NormsProducer;
 import org.apache.lucene.codecs.TermVectorsFormat;
 import org.apache.lucene.codecs.TermVectorsReader;
 import org.apache.lucene.codecs.TermVectorsWriter;
@@ -51,13 +49,8 @@ final class SortingTermVectorsConsumer extends TermVectorsConsumer {
   }
 
   @Override
-  void flush(
-      Map<String, TermsHashPerField> fieldsToFlush,
-      final SegmentWriteState state,
-      Sorter.DocMap sortMap,
-      NormsProducer norms)
-      throws IOException {
-    super.flush(fieldsToFlush, state, sortMap, norms);
+  void flush(final SegmentWriteState state, Sorter.DocMap sortMap) throws IOException {
+    super.flush(state, sortMap);
     if (tmpDirectory != null) {
       TermVectorsReader reader =
           TEMP_TERM_VECTORS_FORMAT.vectorsReader(

diff --git a/lucene/core/src/java/org/apache/lucene/index/TermVectorsConsumer.java b/lucene/core/src/java/org/apache/lucene/index/TermVectorsConsumer.java
@@ -18,9 +18,7 @@
 
 import java.io.IOException;
 import java.util.Arrays;
-import java.util.Map;
 import org.apache.lucene.codecs.Codec;
-import org.apache.lucene.codecs.NormsProducer;
 import org.apache.lucene.codecs.TermVectorsWriter;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.FlushInfo;
@@ -68,13 +66,7 @@ class TermVectorsConsumer extends TermsHash {
     this.codec = codec;
   }
 
-  @Override
-  void flush(
-      Map<String, TermsHashPerField> fieldsToFlush,
-      final SegmentWriteState state,
-      Sorter.DocMap sortMap,
-      NormsProducer norms)
-      throws IOException {
+  void flush(final SegmentWriteState state, Sorter.DocMap sortMap) throws IOException {
     if (writer != null) {
       int numDocs = state.segmentInfo.maxDoc();
       assert numDocs > 0;
@@ -113,7 +105,7 @@ void setHasVectors() {
     hasVectors = true;
   }
 
-  @Override
+  /** Writes this document's term vectors. Called per document by {@link IndexingChain}. */
   void finishDocument(int docID) throws IOException {
 
     if (!hasVectors) {
@@ -173,7 +165,7 @@ void addFieldToFlush(TermVectorsConsumerPerField fieldToFlush) {
     perFields[numVectorFields++] = fieldToFlush;
   }
 
-  @Override
+  /** Resets per-document state. Called per document by {@link IndexingChain}. */
   void startDocument() {
     resetFields();
     numVectorFields = 0;